1/* load.c --- parsing a 'dumpfile'-formatted stream.
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23
24#include "svn_private_config.h"
25#include "svn_hash.h"
26#include "svn_pools.h"
27#include "svn_error.h"
28#include "svn_fs.h"
29#include "svn_repos.h"
30#include "svn_string.h"
31#include "svn_path.h"
32#include "svn_props.h"
33#include "repos.h"
34#include "svn_private_config.h"
35#include "svn_mergeinfo.h"
36#include "svn_checksum.h"
37#include "svn_subst.h"
38#include "svn_ctype.h"
39
40#include <apr_lib.h>
41
42#include "private/svn_dep_compat.h"
43#include "private/svn_mergeinfo_private.h"
44
45/*----------------------------------------------------------------------*/
46
47/** The parser and related helper funcs **/
48
49
50static svn_error_t *
51stream_ran_dry(void)
52{
53  return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
54                          _("Premature end of content data in dumpstream"));
55}
56
57static svn_error_t *
58stream_malformed(void)
59{
60  return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
61                          _("Dumpstream data appears to be malformed"));
62}
63
64/* Allocate a new hash *HEADERS in POOL, and read a series of
65   RFC822-style headers from STREAM.  Duplicate each header's name and
66   value into POOL and store in hash as a const char * ==> const char *.
67
68   The headers are assumed to be terminated by a single blank line,
69   which will be permanently sucked from the stream and tossed.
70
71   If the caller has already read in the first header line, it should
72   be passed in as FIRST_HEADER.  If not, pass NULL instead.
73 */
74static svn_error_t *
75read_header_block(svn_stream_t *stream,
76                  svn_stringbuf_t *first_header,
77                  apr_hash_t **headers,
78                  apr_pool_t *pool)
79{
80  *headers = apr_hash_make(pool);
81
82  while (1)
83    {
84      svn_stringbuf_t *header_str;
85      const char *name, *value;
86      svn_boolean_t eof;
87      apr_size_t i = 0;
88
89      if (first_header != NULL)
90        {
91          header_str = first_header;
92          first_header = NULL;  /* so we never visit this block again. */
93          eof = FALSE;
94        }
95
96      else
97        /* Read the next line into a stringbuf. */
98        SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool));
99
100      if (svn_stringbuf_isempty(header_str))
101        break;    /* end of header block */
102      else if (eof)
103        return stream_ran_dry();
104
105      /* Find the next colon in the stringbuf. */
106      while (header_str->data[i] != ':')
107        {
108          if (header_str->data[i] == '\0')
109            return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
110                                     _("Dump stream contains a malformed "
111                                       "header (with no ':') at '%.20s'"),
112                                     header_str->data);
113          i++;
114        }
115      /* Create a 'name' string and point to it. */
116      header_str->data[i] = '\0';
117      name = header_str->data;
118
119      /* Skip over the NULL byte and the space following it.  */
120      i += 2;
121      if (i > header_str->len)
122        return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
123                                 _("Dump stream contains a malformed "
124                                   "header (with no value) at '%.20s'"),
125                                 header_str->data);
126
127      /* Point to the 'value' string. */
128      value = header_str->data + i;
129
130      /* Store name/value in hash. */
131      svn_hash_sets(*headers, name, value);
132    }
133
134  return SVN_NO_ERROR;
135}
136
137
138/* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM.
139   Also read a newline from STREAM and increase *ACTUAL_LEN by the total
140   number of bytes read from STREAM.  */
141static svn_error_t *
142read_key_or_val(char **pbuf,
143                svn_filesize_t *actual_length,
144                svn_stream_t *stream,
145                apr_size_t len,
146                apr_pool_t *pool)
147{
148  char *buf = apr_pcalloc(pool, len + 1);
149  apr_size_t numread;
150  char c;
151
152  numread = len;
153  SVN_ERR(svn_stream_read(stream, buf, &numread));
154  *actual_length += numread;
155  if (numread != len)
156    return svn_error_trace(stream_ran_dry());
157  buf[len] = '\0';
158
159  /* Suck up extra newline after key data */
160  numread = 1;
161  SVN_ERR(svn_stream_read(stream, &c, &numread));
162  *actual_length += numread;
163  if (numread != 1)
164    return svn_error_trace(stream_ran_dry());
165  if (c != '\n')
166    return svn_error_trace(stream_malformed());
167
168  *pbuf = buf;
169  return SVN_NO_ERROR;
170}
171
172
173/* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an
174   encoded Subversion properties hash, and making multiple calls to
175   PARSE_FNS->set_*_property on RECORD_BATON (depending on the value
176   of IS_NODE.)
177
178   Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM.
179   If an error is returned, the value of *ACTUAL_LENGTH is undefined.
180
181   Use POOL for all allocations.  */
182static svn_error_t *
183parse_property_block(svn_stream_t *stream,
184                     svn_filesize_t content_length,
185                     const svn_repos_parse_fns3_t *parse_fns,
186                     void *record_baton,
187                     void *parse_baton,
188                     svn_boolean_t is_node,
189                     svn_filesize_t *actual_length,
190                     apr_pool_t *pool)
191{
192  svn_stringbuf_t *strbuf;
193  apr_pool_t *proppool = svn_pool_create(pool);
194
195  *actual_length = 0;
196  while (content_length != *actual_length)
197    {
198      char *buf;  /* a pointer into the stringbuf's data */
199      svn_boolean_t eof;
200
201      svn_pool_clear(proppool);
202
203      /* Read a key length line.  (Actually, it might be PROPS_END). */
204      SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
205
206      if (eof)
207        {
208          /* We could just use stream_ran_dry() or stream_malformed(),
209             but better to give a non-generic property block error. */
210          return svn_error_create
211            (SVN_ERR_STREAM_MALFORMED_DATA, NULL,
212             _("Incomplete or unterminated property block"));
213        }
214
215      *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */
216      buf = strbuf->data;
217
218      if (! strcmp(buf, "PROPS-END"))
219        break; /* no more properties. */
220
221      else if ((buf[0] == 'K') && (buf[1] == ' '))
222        {
223          char *keybuf;
224          apr_uint64_t len;
225
226          SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
227          SVN_ERR(read_key_or_val(&keybuf, actual_length,
228                                  stream, (apr_size_t)len, proppool));
229
230          /* Read a val length line */
231          SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
232          if (eof)
233            return stream_ran_dry();
234
235          *actual_length += (strbuf->len + 1); /* +1 because we read \n too */
236          buf = strbuf->data;
237
238          if ((buf[0] == 'V') && (buf[1] == ' '))
239            {
240              svn_string_t propstring;
241              char *valbuf;
242              apr_int64_t val;
243
244              SVN_ERR(svn_cstring_atoi64(&val, buf + 2));
245              propstring.len = (apr_size_t)val;
246              SVN_ERR(read_key_or_val(&valbuf, actual_length,
247                                      stream, propstring.len, proppool));
248              propstring.data = valbuf;
249
250              /* Now, send the property pair to the vtable! */
251              if (is_node)
252                {
253                  SVN_ERR(parse_fns->set_node_property(record_baton,
254                                                       keybuf,
255                                                       &propstring));
256                }
257              else
258                {
259                  SVN_ERR(parse_fns->set_revision_property(record_baton,
260                                                           keybuf,
261                                                           &propstring));
262                }
263            }
264          else
265            return stream_malformed(); /* didn't find expected 'V' line */
266        }
267      else if ((buf[0] == 'D') && (buf[1] == ' '))
268        {
269          char *keybuf;
270          apr_uint64_t len;
271
272          SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
273          SVN_ERR(read_key_or_val(&keybuf, actual_length,
274                                  stream, (apr_size_t)len, proppool));
275
276          /* We don't expect these in revision properties, and if we see
277             one when we don't have a delete_node_property callback,
278             then we're seeing a v3 feature in a v2 dump. */
279          if (!is_node || !parse_fns->delete_node_property)
280            return stream_malformed();
281
282          SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf));
283        }
284      else
285        return stream_malformed(); /* didn't find expected 'K' line */
286
287    } /* while (1) */
288
289  svn_pool_destroy(proppool);
290  return SVN_NO_ERROR;
291}
292
293
294/* Read CONTENT_LENGTH bytes from STREAM, and use
295   PARSE_FNS->set_fulltext to push those bytes as replace fulltext for
296   a node.  Use BUFFER/BUFLEN to push the fulltext in "chunks".
297
298   Use POOL for all allocations.  */
299static svn_error_t *
300parse_text_block(svn_stream_t *stream,
301                 svn_filesize_t content_length,
302                 svn_boolean_t is_delta,
303                 const svn_repos_parse_fns3_t *parse_fns,
304                 void *record_baton,
305                 char *buffer,
306                 apr_size_t buflen,
307                 apr_pool_t *pool)
308{
309  svn_stream_t *text_stream = NULL;
310  apr_size_t num_to_read, rlen, wlen;
311
312  if (is_delta)
313    {
314      svn_txdelta_window_handler_t wh;
315      void *whb;
316
317      SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton));
318      if (wh)
319        text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool);
320    }
321  else
322    {
323      /* Get a stream to which we can push the data. */
324      SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton));
325    }
326
327  /* If there are no contents to read, just write an empty buffer
328     through our callback. */
329  if (content_length == 0)
330    {
331      wlen = 0;
332      if (text_stream)
333        SVN_ERR(svn_stream_write(text_stream, "", &wlen));
334    }
335
336  /* Regardless of whether or not we have a sink for our data, we
337     need to read it. */
338  while (content_length)
339    {
340      if (content_length >= (svn_filesize_t)buflen)
341        rlen = buflen;
342      else
343        rlen = (apr_size_t) content_length;
344
345      num_to_read = rlen;
346      SVN_ERR(svn_stream_read(stream, buffer, &rlen));
347      content_length -= rlen;
348      if (rlen != num_to_read)
349        return stream_ran_dry();
350
351      if (text_stream)
352        {
353          /* write however many bytes you read. */
354          wlen = rlen;
355          SVN_ERR(svn_stream_write(text_stream, buffer, &wlen));
356          if (wlen != rlen)
357            {
358              /* Uh oh, didn't write as many bytes as we read. */
359              return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL,
360                                      _("Unexpected EOF writing contents"));
361            }
362        }
363    }
364
365  /* If we opened a stream, we must close it. */
366  if (text_stream)
367    SVN_ERR(svn_stream_close(text_stream));
368
369  return SVN_NO_ERROR;
370}
371
372
373
374/* Parse VERSIONSTRING and verify that we support the dumpfile format
375   version number, setting *VERSION appropriately. */
376static svn_error_t *
377parse_format_version(int *version,
378                     const char *versionstring)
379{
380  static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1;
381  const char *p = strchr(versionstring, ':');
382  int value;
383
384  if (p == NULL
385      || p != (versionstring + magic_len)
386      || strncmp(versionstring,
387                 SVN_REPOS_DUMPFILE_MAGIC_HEADER,
388                 magic_len))
389    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
390                             _("Malformed dumpfile header '%s'"),
391                             versionstring);
392
393  SVN_ERR(svn_cstring_atoi(&value, p + 1));
394
395  if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION)
396    return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
397                             _("Unsupported dumpfile version: %d"),
398                             value);
399
400  *version = value;
401  return SVN_NO_ERROR;
402}
403
404
405
406/*----------------------------------------------------------------------*/
407
408/** The public routines **/
409
410svn_error_t *
411svn_repos_parse_dumpstream3(svn_stream_t *stream,
412                            const svn_repos_parse_fns3_t *parse_fns,
413                            void *parse_baton,
414                            svn_boolean_t deltas_are_text,
415                            svn_cancel_func_t cancel_func,
416                            void *cancel_baton,
417                            apr_pool_t *pool)
418{
419  svn_boolean_t eof;
420  svn_stringbuf_t *linebuf;
421  void *rev_baton = NULL;
422  char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
423  apr_size_t buflen = SVN__STREAM_CHUNK_SIZE;
424  apr_pool_t *linepool = svn_pool_create(pool);
425  apr_pool_t *revpool = svn_pool_create(pool);
426  apr_pool_t *nodepool = svn_pool_create(pool);
427  int version;
428
429  SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
430  if (eof)
431    return stream_ran_dry();
432
433  /* The first two lines of the stream are the dumpfile-format version
434     number, and a blank line.  To preserve backward compatibility,
435     don't assume the existence of newer parser-vtable functions. */
436  SVN_ERR(parse_format_version(&version, linebuf->data));
437  if (parse_fns->magic_header_record != NULL)
438    SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool));
439
440  /* A dumpfile "record" is defined to be a header-block of
441     rfc822-style headers, possibly followed by a content-block.
442
443       - A header-block is always terminated by a single blank line (\n\n)
444
445       - We know whether the record has a content-block by looking for
446         a 'Content-length:' header.  The content-block will always be
447         of a specific length, plus an extra newline.
448
449     Once a record is fully sucked from the stream, an indeterminate
450     number of blank lines (or lines that begin with whitespace) may
451     follow before the next record (or the end of the stream.)
452  */
453
454  while (1)
455    {
456      apr_hash_t *headers;
457      void *node_baton;
458      svn_boolean_t found_node = FALSE;
459      svn_boolean_t old_v1_with_cl = FALSE;
460      const char *content_length;
461      const char *prop_cl;
462      const char *text_cl;
463      const char *value;
464      svn_filesize_t actual_prop_length;
465
466      /* Clear our per-line pool. */
467      svn_pool_clear(linepool);
468
469      /* Check for cancellation. */
470      if (cancel_func)
471        SVN_ERR(cancel_func(cancel_baton));
472
473      /* Keep reading blank lines until we discover a new record, or until
474         the stream runs out. */
475      SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
476
477      if (eof)
478        {
479          if (svn_stringbuf_isempty(linebuf))
480            break;   /* end of stream, go home. */
481          else
482            return stream_ran_dry();
483        }
484
485      if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0])))
486        continue; /* empty line ... loop */
487
488      /*** Found the beginning of a new record. ***/
489
490      /* The last line we read better be a header of some sort.
491         Read the whole header-block into a hash. */
492      SVN_ERR(read_header_block(stream, linebuf, &headers, linepool));
493
494      /*** Handle the various header blocks. ***/
495
496      /* Is this a revision record? */
497      if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER))
498        {
499          /* If we already have a rev_baton open, we need to close it
500             and clear the per-revision subpool. */
501          if (rev_baton != NULL)
502            {
503              SVN_ERR(parse_fns->close_revision(rev_baton));
504              svn_pool_clear(revpool);
505            }
506
507          SVN_ERR(parse_fns->new_revision_record(&rev_baton,
508                                                 headers, parse_baton,
509                                                 revpool));
510        }
511      /* Or is this, perhaps, a node record? */
512      else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH))
513        {
514          SVN_ERR(parse_fns->new_node_record(&node_baton,
515                                             headers,
516                                             rev_baton,
517                                             nodepool));
518          found_node = TRUE;
519        }
520      /* Or is this the repos UUID? */
521      else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID)))
522        {
523          SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool));
524        }
525      /* Or perhaps a dumpfile format? */
526      /* ### TODO: use parse_format_version */
527      else if ((value = svn_hash_gets(headers,
528                                      SVN_REPOS_DUMPFILE_MAGIC_HEADER)))
529        {
530          /* ### someday, switch modes of operation here. */
531          SVN_ERR(svn_cstring_atoi(&version, value));
532        }
533      /* Or is this bogosity?! */
534      else
535        {
536          /* What the heck is this record?!? */
537          return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
538                                  _("Unrecognized record type in stream"));
539        }
540
541      /* Need 3 values below to determine v1 dump type
542
543         Old (pre 0.14?) v1 dumps don't have Prop-content-length
544         and Text-content-length fields, but always have a properties
545         block in a block with Content-Length > 0 */
546
547      content_length = svn_hash_gets(headers,
548                                     SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
549      prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
550      text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
551      old_v1_with_cl =
552        version == 1 && content_length && ! prop_cl && ! text_cl;
553
554      /* Is there a props content-block to parse? */
555      if (prop_cl || old_v1_with_cl)
556        {
557          const char *delta = svn_hash_gets(headers,
558                                            SVN_REPOS_DUMPFILE_PROP_DELTA);
559          svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0);
560
561          /* First, remove all node properties, unless this is a delta
562             property block. */
563          if (found_node && !is_delta)
564            SVN_ERR(parse_fns->remove_node_props(node_baton));
565
566          SVN_ERR(parse_property_block
567                  (stream,
568                   svn__atoui64(prop_cl ? prop_cl : content_length),
569                   parse_fns,
570                   found_node ? node_baton : rev_baton,
571                   parse_baton,
572                   found_node,
573                   &actual_prop_length,
574                   found_node ? nodepool : revpool));
575        }
576
577      /* Is there a text content-block to parse? */
578      if (text_cl)
579        {
580          const char *delta = svn_hash_gets(headers,
581                                            SVN_REPOS_DUMPFILE_TEXT_DELTA);
582          svn_boolean_t is_delta = FALSE;
583          if (! deltas_are_text)
584            is_delta = (delta && strcmp(delta, "true") == 0);
585
586          SVN_ERR(parse_text_block(stream,
587                                   svn__atoui64(text_cl),
588                                   is_delta,
589                                   parse_fns,
590                                   found_node ? node_baton : rev_baton,
591                                   buffer,
592                                   buflen,
593                                   found_node ? nodepool : revpool));
594        }
595      else if (old_v1_with_cl)
596        {
597          /* An old-v1 block with a Content-length might have a text block.
598             If the property block did not consume all the bytes of the
599             Content-length, then it clearly does have a text block.
600             If not, then we must deduce whether we have an *empty* text
601             block or an *absent* text block.  The rules are:
602             - "Node-kind: file" blocks have an empty (i.e. present, but
603               zero-length) text block, since they represent a file
604               modification.  Note that file-copied-text-unmodified blocks
605               have no Content-length - even if they should have contained
606               a modified property block, the pre-0.14 dumper forgets to
607               dump the modified properties.
608             - If it is not a file node, then it is a revision or directory,
609               and so has an absent text block.
610          */
611          const char *node_kind;
612          svn_filesize_t cl_value = svn__atoui64(content_length)
613                                    - actual_prop_length;
614
615          if (cl_value ||
616              ((node_kind = svn_hash_gets(headers,
617                                          SVN_REPOS_DUMPFILE_NODE_KIND))
618               && strcmp(node_kind, "file") == 0)
619             )
620            SVN_ERR(parse_text_block(stream,
621                                     cl_value,
622                                     FALSE,
623                                     parse_fns,
624                                     found_node ? node_baton : rev_baton,
625                                     buffer,
626                                     buflen,
627                                     found_node ? nodepool : revpool));
628        }
629
630      /* if we have a content-length header, did we read all of it?
631         in case of an old v1, we *always* read all of it, because
632         text-content-length == content-length - prop-content-length
633      */
634      if (content_length && ! old_v1_with_cl)
635        {
636          apr_size_t rlen, num_to_read;
637          svn_filesize_t remaining =
638            svn__atoui64(content_length) -
639            (prop_cl ? svn__atoui64(prop_cl) : 0) -
640            (text_cl ? svn__atoui64(text_cl) : 0);
641
642
643          if (remaining < 0)
644            return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
645                                    _("Sum of subblock sizes larger than "
646                                      "total block content length"));
647
648          /* Consume remaining bytes in this content block */
649          while (remaining > 0)
650            {
651              if (remaining >= (svn_filesize_t)buflen)
652                rlen = buflen;
653              else
654                rlen = (apr_size_t) remaining;
655
656              num_to_read = rlen;
657              SVN_ERR(svn_stream_read(stream, buffer, &rlen));
658              remaining -= rlen;
659              if (rlen != num_to_read)
660                return stream_ran_dry();
661            }
662        }
663
664      /* If we just finished processing a node record, we need to
665         close the node record and clear the per-node subpool. */
666      if (found_node)
667        {
668          SVN_ERR(parse_fns->close_node(node_baton));
669          svn_pool_clear(nodepool);
670        }
671
672      /*** End of processing for one record. ***/
673
674    } /* end of stream */
675
676  /* Close out whatever revision we're in. */
677  if (rev_baton != NULL)
678    SVN_ERR(parse_fns->close_revision(rev_baton));
679
680  svn_pool_destroy(linepool);
681  svn_pool_destroy(revpool);
682  svn_pool_destroy(nodepool);
683  return SVN_NO_ERROR;
684}
685