parse-diff.c revision 299742
155682Smarkm/*
2233294Sstas * parse-diff.c: functions for parsing diff files
355682Smarkm *
455682Smarkm * ====================================================================
555682Smarkm *    Licensed to the Apache Software Foundation (ASF) under one
655682Smarkm *    or more contributor license agreements.  See the NOTICE file
755682Smarkm *    distributed with this work for additional information
855682Smarkm *    regarding copyright ownership.  The ASF licenses this file
955682Smarkm *    to you under the Apache License, Version 2.0 (the
1055682Smarkm *    "License"); you may not use this file except in compliance
1155682Smarkm *    with the License.  You may obtain a copy of the License at
1255682Smarkm *
1355682Smarkm *      http://www.apache.org/licenses/LICENSE-2.0
1472445Sassar *
1555682Smarkm *    Unless required by applicable law or agreed to in writing,
1655682Smarkm *    software distributed under the License is distributed on an
1755682Smarkm *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18178825Sdfr *    KIND, either express or implied.  See the License for the
1955682Smarkm *    specific language governing permissions and limitations
2055682Smarkm *    under the License.
2155682Smarkm * ====================================================================
2255682Smarkm */
2355682Smarkm
2455682Smarkm#include <stdlib.h>
2555682Smarkm#include <stddef.h>
2655682Smarkm#include <string.h>
2755682Smarkm
2855682Smarkm#include "svn_hash.h"
2955682Smarkm#include "svn_types.h"
3055682Smarkm#include "svn_error.h"
3155682Smarkm#include "svn_io.h"
3255682Smarkm#include "svn_pools.h"
3355682Smarkm#include "svn_props.h"
3455682Smarkm#include "svn_string.h"
3555682Smarkm#include "svn_utf.h"
3655682Smarkm#include "svn_dirent_uri.h"
3755682Smarkm#include "svn_diff.h"
3855682Smarkm#include "svn_ctype.h"
3955682Smarkm#include "svn_mergeinfo.h"
4055682Smarkm
4155682Smarkm#include "private/svn_eol_private.h"
4255682Smarkm#include "private/svn_dep_compat.h"
4355682Smarkm#include "private/svn_sorts_private.h"
4455682Smarkm
4555682Smarkm/* Helper macro for readability */
4655682Smarkm#define starts_with(str, start)  \
4755682Smarkm  (strncmp((str), (start), strlen(start)) == 0)
4855682Smarkm
4955682Smarkm/* Like strlen() but for string literals. */
5055682Smarkm#define STRLEN_LITERAL(str) (sizeof(str) - 1)
5155682Smarkm
5255682Smarkm/* This struct describes a range within a file, as well as the
5355682Smarkm * current cursor position within the range. All numbers are in bytes. */
5455682Smarkmstruct svn_diff__hunk_range {
5555682Smarkm  apr_off_t start;
5655682Smarkm  apr_off_t end;
5755682Smarkm  apr_off_t current;
5855682Smarkm};
5955682Smarkm
6055682Smarkmstruct svn_diff_hunk_t {
6155682Smarkm  /* The patch this hunk belongs to. */
6255682Smarkm  svn_patch_t *patch;
6355682Smarkm
6455682Smarkm  /* APR file handle to the patch file this hunk came from. */
6572445Sassar  apr_file_t *apr_file;
6655682Smarkm
6755682Smarkm  /* Ranges used to keep track of this hunk's texts positions within
6855682Smarkm   * the patch file. */
6955682Smarkm  struct svn_diff__hunk_range diff_text_range;
7055682Smarkm  struct svn_diff__hunk_range original_text_range;
7155682Smarkm  struct svn_diff__hunk_range modified_text_range;
7255682Smarkm
7355682Smarkm  /* Hunk ranges as they appeared in the patch file.
7455682Smarkm   * All numbers are lines, not bytes. */
7555682Smarkm  svn_linenum_t original_start;
7655682Smarkm  svn_linenum_t original_length;
77233294Sstas  svn_linenum_t modified_start;
7855682Smarkm  svn_linenum_t modified_length;
7955682Smarkm
80  /* Number of lines of leading and trailing hunk context. */
81  svn_linenum_t leading_context;
82  svn_linenum_t trailing_context;
83};
84
85void
86svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
87{
88  hunk->diff_text_range.current = hunk->diff_text_range.start;
89}
90
91void
92svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
93{
94  if (hunk->patch->reverse)
95    hunk->modified_text_range.current = hunk->modified_text_range.start;
96  else
97    hunk->original_text_range.current = hunk->original_text_range.start;
98}
99
100void
101svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
102{
103  if (hunk->patch->reverse)
104    hunk->original_text_range.current = hunk->original_text_range.start;
105  else
106    hunk->modified_text_range.current = hunk->modified_text_range.start;
107}
108
109svn_linenum_t
110svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
111{
112  return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
113}
114
115svn_linenum_t
116svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
117{
118  return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
119}
120
121svn_linenum_t
122svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
123{
124  return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
125}
126
127svn_linenum_t
128svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
129{
130  return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
131}
132
133svn_linenum_t
134svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
135{
136  return hunk->leading_context;
137}
138
139svn_linenum_t
140svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
141{
142  return hunk->trailing_context;
143}
144
145/* Try to parse a positive number from a decimal number encoded
146 * in the string NUMBER. Return parsed number in OFFSET, and return
147 * TRUE if parsing was successful. */
148static svn_boolean_t
149parse_offset(svn_linenum_t *offset, const char *number)
150{
151  svn_error_t *err;
152  apr_uint64_t val;
153
154  err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
155  if (err)
156    {
157      svn_error_clear(err);
158      return FALSE;
159    }
160
161  *offset = (svn_linenum_t)val;
162
163  return TRUE;
164}
165
166/* Try to parse a hunk range specification from the string RANGE.
167 * Return parsed information in *START and *LENGTH, and return TRUE
168 * if the range parsed correctly. Note: This function may modify the
169 * input value RANGE. */
170static svn_boolean_t
171parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
172{
173  char *comma;
174
175  if (*range == 0)
176    return FALSE;
177
178  comma = strstr(range, ",");
179  if (comma)
180    {
181      if (strlen(comma + 1) > 0)
182        {
183          /* Try to parse the length. */
184          if (! parse_offset(length, comma + 1))
185            return FALSE;
186
187          /* Snip off the end of the string,
188           * so we can comfortably parse the line
189           * number the hunk starts at. */
190          *comma = '\0';
191        }
192       else
193         /* A comma but no length? */
194         return FALSE;
195    }
196  else
197    {
198      *length = 1;
199    }
200
201  /* Try to parse the line number the hunk starts at. */
202  return parse_offset(start, range);
203}
204
205/* Try to parse a hunk header in string HEADER, putting parsed information
206 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
207 * character string used to delimit the hunk header.
208 * Do all allocations in POOL. */
209static svn_boolean_t
210parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
211                  const char *atat, apr_pool_t *pool)
212{
213  const char *p;
214  const char *start;
215  svn_stringbuf_t *range;
216
217  p = header + strlen(atat);
218  if (*p != ' ')
219    /* No. */
220    return FALSE;
221  p++;
222  if (*p != '-')
223    /* Nah... */
224    return FALSE;
225  /* OK, this may be worth allocating some memory for... */
226  range = svn_stringbuf_create_ensure(31, pool);
227  start = ++p;
228  while (*p && *p != ' ')
229    {
230      p++;
231    }
232
233  if (*p != ' ')
234    /* No no no... */
235    return FALSE;
236
237  svn_stringbuf_appendbytes(range, start, p - start);
238
239  /* Try to parse the first range. */
240  if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
241    return FALSE;
242
243  /* Clear the stringbuf so we can reuse it for the second range. */
244  svn_stringbuf_setempty(range);
245  p++;
246  if (*p != '+')
247    /* Eeek! */
248    return FALSE;
249  /* OK, this may be worth copying... */
250  start = ++p;
251  while (*p && *p != ' ')
252    {
253      p++;
254    }
255  if (*p != ' ')
256    /* No no no... */
257    return FALSE;
258
259  svn_stringbuf_appendbytes(range, start, p - start);
260
261  /* Check for trailing @@ */
262  p++;
263  if (! starts_with(p, atat))
264    return FALSE;
265
266  /* There may be stuff like C-function names after the trailing @@,
267   * but we ignore that. */
268
269  /* Try to parse the second range. */
270  if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
271    return FALSE;
272
273  /* Hunk header is good. */
274  return TRUE;
275}
276
277/* Read a line of original or modified hunk text from the specified
278 * RANGE within FILE. FILE is expected to contain unidiff text.
279 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
280 * Any lines commencing with the VERBOTEN character are discarded.
281 * VERBOTEN should be '+' or '-', depending on which form of hunk text
282 * is being read.
283 *
284 * All other parameters are as in svn_diff_hunk_readline_original_text()
285 * and svn_diff_hunk_readline_modified_text().
286 */
287static svn_error_t *
288hunk_readline_original_or_modified(apr_file_t *file,
289                                   struct svn_diff__hunk_range *range,
290                                   svn_stringbuf_t **stringbuf,
291                                   const char **eol,
292                                   svn_boolean_t *eof,
293                                   char verboten,
294                                   apr_pool_t *result_pool,
295                                   apr_pool_t *scratch_pool)
296{
297  apr_size_t max_len;
298  svn_boolean_t filtered;
299  apr_off_t pos;
300  svn_stringbuf_t *str;
301
302  if (range->current >= range->end)
303    {
304      /* We're past the range. Indicate that no bytes can be read. */
305      *eof = TRUE;
306      if (eol)
307        *eol = NULL;
308      *stringbuf = svn_stringbuf_create_empty(result_pool);
309      return SVN_NO_ERROR;
310    }
311
312  pos = 0;
313  SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos,  scratch_pool));
314  SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
315  do
316    {
317      max_len = range->end - range->current;
318      SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
319                                   result_pool, scratch_pool));
320      range->current = 0;
321      SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool));
322      filtered = (str->data[0] == verboten || str->data[0] == '\\');
323    }
324  while (filtered && ! *eof);
325
326  if (filtered)
327    {
328      /* EOF, return an empty string. */
329      *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
330    }
331  else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
332    {
333      /* Shave off leading unidiff symbols. */
334      *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
335    }
336  else
337    {
338      /* Return the line as-is. */
339      *stringbuf = svn_stringbuf_dup(str, result_pool);
340    }
341
342  SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
343
344  return SVN_NO_ERROR;
345}
346
347svn_error_t *
348svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
349                                     svn_stringbuf_t **stringbuf,
350                                     const char **eol,
351                                     svn_boolean_t *eof,
352                                     apr_pool_t *result_pool,
353                                     apr_pool_t *scratch_pool)
354{
355  return svn_error_trace(
356    hunk_readline_original_or_modified(hunk->apr_file,
357                                       hunk->patch->reverse ?
358                                         &hunk->modified_text_range :
359                                         &hunk->original_text_range,
360                                       stringbuf, eol, eof,
361                                       hunk->patch->reverse ? '-' : '+',
362                                       result_pool, scratch_pool));
363}
364
365svn_error_t *
366svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
367                                     svn_stringbuf_t **stringbuf,
368                                     const char **eol,
369                                     svn_boolean_t *eof,
370                                     apr_pool_t *result_pool,
371                                     apr_pool_t *scratch_pool)
372{
373  return svn_error_trace(
374    hunk_readline_original_or_modified(hunk->apr_file,
375                                       hunk->patch->reverse ?
376                                         &hunk->original_text_range :
377                                         &hunk->modified_text_range,
378                                       stringbuf, eol, eof,
379                                       hunk->patch->reverse ? '+' : '-',
380                                       result_pool, scratch_pool));
381}
382
383svn_error_t *
384svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
385                                 svn_stringbuf_t **stringbuf,
386                                 const char **eol,
387                                 svn_boolean_t *eof,
388                                 apr_pool_t *result_pool,
389                                 apr_pool_t *scratch_pool)
390{
391  svn_stringbuf_t *line;
392  apr_size_t max_len;
393  apr_off_t pos;
394
395  if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
396    {
397      /* We're past the range. Indicate that no bytes can be read. */
398      *eof = TRUE;
399      if (eol)
400        *eol = NULL;
401      *stringbuf = svn_stringbuf_create_empty(result_pool);
402      return SVN_NO_ERROR;
403    }
404
405  pos = 0;
406  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool));
407  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
408                           &hunk->diff_text_range.current, scratch_pool));
409  max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
410  SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
411                               result_pool,
412                   scratch_pool));
413  hunk->diff_text_range.current = 0;
414  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR,
415                           &hunk->diff_text_range.current, scratch_pool));
416  SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
417
418  if (hunk->patch->reverse)
419    {
420      if (line->data[0] == '+')
421        line->data[0] = '-';
422      else if (line->data[0] == '-')
423        line->data[0] = '+';
424    }
425
426  *stringbuf = line;
427
428  return SVN_NO_ERROR;
429}
430
431/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
432 * Allocate *PROP_NAME in RESULT_POOL.
433 * Set *PROP_NAME to NULL if no valid property name was found. */
434static svn_error_t *
435parse_prop_name(const char **prop_name, const char *header,
436                const char *indicator, apr_pool_t *result_pool)
437{
438  SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
439                                  header + strlen(indicator),
440                                  result_pool));
441  if (**prop_name == '\0')
442    *prop_name = NULL;
443  else if (! svn_prop_name_is_valid(*prop_name))
444    {
445      svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
446      svn_stringbuf_strip_whitespace(buf);
447      *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
448    }
449
450  return SVN_NO_ERROR;
451}
452
453
454/* A helper function to parse svn:mergeinfo diffs.
455 *
456 * These diffs use a special pretty-print format, for instance:
457 *
458 * Added: svn:mergeinfo
459 * ## -0,0 +0,1 ##
460 *   Merged /trunk:r2-3
461 *
462 * The hunk header has the following format:
463 * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
464 *
465 * At this point, the number of reverse merges has already been
466 * parsed into HUNK->ORIGINAL_LENGTH, and the number of forward
467 * merges has been parsed into HUNK->MODIFIED_LENGTH.
468 *
469 * The header is followed by a list of mergeinfo, one path per line.
470 * This function parses such lines. Lines describing reverse merges
471 * appear first, and then all lines describing forward merges appear.
472 *
473 * Parts of the line are affected by i18n. The words 'Merged'
474 * and 'Reverse-merged' can appear in any language and at any
475 * position within the line. We can only assume that a leading
476 * '/' starts the merge source path, the path is followed by
477 * ":r", which in turn is followed by a mergeinfo revision range,
478 *  which is terminated by whitespace or end-of-string.
479 *
480 * If the current line meets the above criteria and we're able
481 * to parse valid mergeinfo from it, the resulting mergeinfo
482 * is added to patch->mergeinfo or patch->reverse_mergeinfo,
483 * and we proceed to the next line.
484 */
485static svn_error_t *
486parse_mergeinfo(svn_boolean_t *found_mergeinfo,
487                svn_stringbuf_t *line,
488                svn_diff_hunk_t *hunk,
489                svn_patch_t *patch,
490                apr_pool_t *result_pool,
491                apr_pool_t *scratch_pool)
492{
493  char *slash = strchr(line->data, '/');
494  char *colon = strrchr(line->data, ':');
495
496  *found_mergeinfo = FALSE;
497
498  if (slash && colon && colon[1] == 'r' && slash < colon)
499    {
500      svn_stringbuf_t *input;
501      svn_mergeinfo_t mergeinfo = NULL;
502      char *s;
503      svn_error_t *err;
504
505      input = svn_stringbuf_create_ensure(line->len, scratch_pool);
506
507      /* Copy the merge source path + colon */
508      s = slash;
509      while (s <= colon)
510        {
511          svn_stringbuf_appendbyte(input, *s);
512          s++;
513        }
514
515      /* skip 'r' after colon */
516      s++;
517
518      /* Copy the revision range. */
519      while (s < line->data + line->len)
520        {
521          if (svn_ctype_isspace(*s))
522            break;
523          svn_stringbuf_appendbyte(input, *s);
524          s++;
525        }
526
527      err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
528      if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
529        {
530          svn_error_clear(err);
531          mergeinfo = NULL;
532        }
533      else
534        SVN_ERR(err);
535
536      if (mergeinfo)
537        {
538          if (hunk->original_length > 0) /* reverse merges */
539            {
540              if (patch->reverse)
541                {
542                  if (patch->mergeinfo == NULL)
543                    patch->mergeinfo = mergeinfo;
544                  else
545                    SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
546                                                 mergeinfo,
547                                                 result_pool,
548                                                 scratch_pool));
549                }
550              else
551                {
552                  if (patch->reverse_mergeinfo == NULL)
553                    patch->reverse_mergeinfo = mergeinfo;
554                  else
555                    SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
556                                                 mergeinfo,
557                                                 result_pool,
558                                                 scratch_pool));
559                }
560              hunk->original_length--;
561            }
562          else if (hunk->modified_length > 0) /* forward merges */
563            {
564              if (patch->reverse)
565                {
566                  if (patch->reverse_mergeinfo == NULL)
567                    patch->reverse_mergeinfo = mergeinfo;
568                  else
569                    SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
570                                                 mergeinfo,
571                                                 result_pool,
572                                                 scratch_pool));
573                }
574              else
575                {
576                  if (patch->mergeinfo == NULL)
577                    patch->mergeinfo = mergeinfo;
578                  else
579                    SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
580                                                 mergeinfo,
581                                                 result_pool,
582                                                 scratch_pool));
583                }
584              hunk->modified_length--;
585            }
586
587          *found_mergeinfo = TRUE;
588        }
589    }
590
591  return SVN_NO_ERROR;
592}
593
594/* Return the next *HUNK from a PATCH in APR_FILE.
595 * If no hunk can be found, set *HUNK to NULL.
596 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
597 * is the first belonging to a certain property, then PROP_NAME and
598 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
599 * NULL.  If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
600 * treated as context lines.  Allocate results in RESULT_POOL.
601 * Use SCRATCH_POOL for all other allocations. */
602static svn_error_t *
603parse_next_hunk(svn_diff_hunk_t **hunk,
604                svn_boolean_t *is_property,
605                const char **prop_name,
606                svn_diff_operation_kind_t *prop_operation,
607                svn_patch_t *patch,
608                apr_file_t *apr_file,
609                svn_boolean_t ignore_whitespace,
610                apr_pool_t *result_pool,
611                apr_pool_t *scratch_pool)
612{
613  static const char * const minus = "--- ";
614  static const char * const text_atat = "@@";
615  static const char * const prop_atat = "##";
616  svn_stringbuf_t *line;
617  svn_boolean_t eof, in_hunk, hunk_seen;
618  apr_off_t pos, last_line;
619  apr_off_t start, end;
620  apr_off_t original_end;
621  apr_off_t modified_end;
622  svn_linenum_t original_lines;
623  svn_linenum_t modified_lines;
624  svn_linenum_t leading_context;
625  svn_linenum_t trailing_context;
626  svn_boolean_t changed_line_seen;
627  enum {
628    noise_line,
629    original_line,
630    modified_line,
631    context_line
632  } last_line_type;
633  apr_pool_t *iterpool;
634
635  *prop_operation = svn_diff_op_unchanged;
636
637  /* We only set this if we have a property hunk header. */
638  *prop_name = NULL;
639  *is_property = FALSE;
640
641  if (apr_file_eof(apr_file) == APR_EOF)
642    {
643      /* No more hunks here. */
644      *hunk = NULL;
645      return SVN_NO_ERROR;
646    }
647
648  in_hunk = FALSE;
649  hunk_seen = FALSE;
650  leading_context = 0;
651  trailing_context = 0;
652  changed_line_seen = FALSE;
653  original_end = 0;
654  modified_end = 0;
655  *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
656
657  /* Get current seek position -- APR has no ftell() :( */
658  pos = 0;
659  SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool));
660
661  /* Start out assuming noise. */
662  last_line_type = noise_line;
663
664  iterpool = svn_pool_create(scratch_pool);
665  do
666    {
667
668      svn_pool_clear(iterpool);
669
670      /* Remember the current line's offset, and read the line. */
671      last_line = pos;
672      SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
673                                   iterpool, iterpool));
674
675      /* Update line offset for next iteration. */
676      pos = 0;
677      SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool));
678
679      /* Lines starting with a backslash indicate a missing EOL:
680       * "\ No newline at end of file" or "end of property". */
681      if (line->data[0] == '\\')
682        {
683          if (in_hunk)
684            {
685              char eolbuf[2];
686              apr_size_t len;
687              apr_off_t off;
688              apr_off_t hunk_text_end;
689
690              /* Comment terminates the hunk text and says the hunk text
691               * has no trailing EOL. Snip off trailing EOL which is part
692               * of the patch file but not part of the hunk text. */
693              off = last_line - 2;
694              SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
695              len = sizeof(eolbuf);
696              SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
697                                             &eof, iterpool));
698              if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
699                hunk_text_end = last_line - 2;
700              else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
701                hunk_text_end = last_line - 1;
702              else
703                hunk_text_end = last_line;
704
705              if (last_line_type == original_line && original_end == 0)
706                original_end = hunk_text_end;
707              else if (last_line_type == modified_line && modified_end == 0)
708                modified_end = hunk_text_end;
709              else if (last_line_type == context_line)
710                {
711                  if (original_end == 0)
712                    original_end = hunk_text_end;
713                  if (modified_end == 0)
714                    modified_end = hunk_text_end;
715                }
716
717              SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
718            }
719
720          continue;
721        }
722
723      if (in_hunk && *is_property && *prop_name &&
724          strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
725        {
726          svn_boolean_t found_mergeinfo;
727
728          SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch,
729                                  result_pool, iterpool));
730          if (found_mergeinfo)
731            continue; /* Proceed to the next line in the patch. */
732        }
733
734      if (in_hunk)
735        {
736          char c;
737          static const char add = '+';
738          static const char del = '-';
739
740          if (! hunk_seen)
741            {
742              /* We're reading the first line of the hunk, so the start
743               * of the line just read is the hunk text's byte offset. */
744              start = last_line;
745            }
746
747          c = line->data[0];
748          if (original_lines > 0 && modified_lines > 0 &&
749              ((c == ' ')
750               /* Tolerate chopped leading spaces on empty lines. */
751               || (! eof && line->len == 0)
752               /* Maybe tolerate chopped leading spaces on non-empty lines. */
753               || (ignore_whitespace && c != del && c != add)))
754            {
755              /* It's a "context" line in the hunk. */
756              hunk_seen = TRUE;
757              original_lines--;
758              modified_lines--;
759              if (changed_line_seen)
760                trailing_context++;
761              else
762                leading_context++;
763              last_line_type = context_line;
764            }
765          else if (original_lines > 0 && c == del)
766            {
767              /* It's a "deleted" line in the hunk. */
768              hunk_seen = TRUE;
769              changed_line_seen = TRUE;
770
771              /* A hunk may have context in the middle. We only want
772                 trailing lines of context. */
773              if (trailing_context > 0)
774                trailing_context = 0;
775
776              original_lines--;
777              last_line_type = original_line;
778            }
779          else if (modified_lines > 0 && c == add)
780            {
781              /* It's an "added" line in the hunk. */
782              hunk_seen = TRUE;
783              changed_line_seen = TRUE;
784
785              /* A hunk may have context in the middle. We only want
786                 trailing lines of context. */
787              if (trailing_context > 0)
788                trailing_context = 0;
789
790              modified_lines--;
791              last_line_type = modified_line;
792            }
793          else
794            {
795              if (eof)
796                {
797                  /* The hunk ends at EOF. */
798                  end = pos;
799                }
800              else
801                {
802                  /* The start of the current line marks the first byte
803                   * after the hunk text. */
804                  end = last_line;
805                }
806
807              if (original_end == 0)
808                original_end = end;
809              if (modified_end == 0)
810                modified_end = end;
811              break; /* Hunk was empty or has been read. */
812            }
813        }
814      else
815        {
816          if (starts_with(line->data, text_atat))
817            {
818              /* Looks like we have a hunk header, try to rip it apart. */
819              in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
820                                          iterpool);
821              if (in_hunk)
822                {
823                  original_lines = (*hunk)->original_length;
824                  modified_lines = (*hunk)->modified_length;
825                  *is_property = FALSE;
826                }
827              }
828          else if (starts_with(line->data, prop_atat))
829            {
830              /* Looks like we have a property hunk header, try to rip it
831               * apart. */
832              in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
833                                          iterpool);
834              if (in_hunk)
835                {
836                  original_lines = (*hunk)->original_length;
837                  modified_lines = (*hunk)->modified_length;
838                  *is_property = TRUE;
839                }
840            }
841          else if (starts_with(line->data, "Added: "))
842            {
843              SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
844                                      result_pool));
845              if (*prop_name)
846                *prop_operation = svn_diff_op_added;
847            }
848          else if (starts_with(line->data, "Deleted: "))
849            {
850              SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
851                                      result_pool));
852              if (*prop_name)
853                *prop_operation = svn_diff_op_deleted;
854            }
855          else if (starts_with(line->data, "Modified: "))
856            {
857              SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
858                                      result_pool));
859              if (*prop_name)
860                *prop_operation = svn_diff_op_modified;
861            }
862          else if (starts_with(line->data, minus)
863                   || starts_with(line->data, "diff --git "))
864            /* This could be a header of another patch. Bail out. */
865            break;
866        }
867    }
868  /* Check for the line length since a file may not have a newline at the
869   * end and we depend upon the last line to be an empty one. */
870  while (! eof || line->len > 0);
871  svn_pool_destroy(iterpool);
872
873  if (! eof)
874    /* Rewind to the start of the line just read, so subsequent calls
875     * to this function or svn_diff_parse_next_patch() don't end
876     * up skipping the line -- it may contain a patch or hunk header. */
877    SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
878
879  if (hunk_seen && start < end)
880    {
881      (*hunk)->patch = patch;
882      (*hunk)->apr_file = apr_file;
883      (*hunk)->leading_context = leading_context;
884      (*hunk)->trailing_context = trailing_context;
885      (*hunk)->diff_text_range.start = start;
886      (*hunk)->diff_text_range.current = start;
887      (*hunk)->diff_text_range.end = end;
888      (*hunk)->original_text_range.start = start;
889      (*hunk)->original_text_range.current = start;
890      (*hunk)->original_text_range.end = original_end;
891      (*hunk)->modified_text_range.start = start;
892      (*hunk)->modified_text_range.current = start;
893      (*hunk)->modified_text_range.end = modified_end;
894    }
895  else
896    /* Something went wrong, just discard the result. */
897    *hunk = NULL;
898
899  return SVN_NO_ERROR;
900}
901
902/* Compare function for sorting hunks after parsing.
903 * We sort hunks by their original line offset. */
904static int
905compare_hunks(const void *a, const void *b)
906{
907  const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
908  const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
909
910  if (ha->original_start < hb->original_start)
911    return -1;
912  if (ha->original_start > hb->original_start)
913    return 1;
914  return 0;
915}
916
917/* Possible states of the diff header parser. */
918enum parse_state
919{
920   state_start,           /* initial */
921   state_git_diff_seen,   /* diff --git */
922   state_git_tree_seen,   /* a tree operation, rather then content change */
923   state_git_minus_seen,  /* --- /dev/null; or --- a/ */
924   state_git_plus_seen,   /* +++ /dev/null; or +++ a/ */
925   state_move_from_seen,  /* rename from foo.c */
926   state_copy_from_seen,  /* copy from foo.c */
927   state_minus_seen,      /* --- foo.c */
928   state_unidiff_found,   /* valid start of a regular unidiff header */
929   state_git_header_found /* valid start of a --git diff header */
930};
931
932/* Data type describing a valid state transition of the parser. */
933struct transition
934{
935  const char *expected_input;
936  enum parse_state required_state;
937
938  /* A callback called upon each parser state transition. */
939  svn_error_t *(*fn)(enum parse_state *new_state, char *input,
940                     svn_patch_t *patch, apr_pool_t *result_pool,
941                     apr_pool_t *scratch_pool);
942};
943
944/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
945static svn_error_t *
946grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
947              apr_pool_t *scratch_pool)
948{
949  const char *utf8_path;
950  const char *canon_path;
951
952  /* Grab the filename and encode it in UTF-8. */
953  /* TODO: Allow specifying the patch file's encoding.
954   *       For now, we assume its encoding is native. */
955  /* ### This can fail if the filename cannot be represented in the current
956   * ### locale's encoding. */
957  SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
958                                  line,
959                                  scratch_pool));
960
961  /* Canonicalize the path name. */
962  canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
963
964  *file_name = apr_pstrdup(result_pool, canon_path);
965
966  return SVN_NO_ERROR;
967}
968
969/* Parse the '--- ' line of a regular unidiff. */
970static svn_error_t *
971diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
972           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
973{
974  /* If we can find a tab, it separates the filename from
975   * the rest of the line which we can discard. */
976  char *tab = strchr(line, '\t');
977  if (tab)
978    *tab = '\0';
979
980  SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
981                        result_pool, scratch_pool));
982
983  *new_state = state_minus_seen;
984
985  return SVN_NO_ERROR;
986}
987
988/* Parse the '+++ ' line of a regular unidiff. */
989static svn_error_t *
990diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
991           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
992{
993  /* If we can find a tab, it separates the filename from
994   * the rest of the line which we can discard. */
995  char *tab = strchr(line, '\t');
996  if (tab)
997    *tab = '\0';
998
999  SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1000                        result_pool, scratch_pool));
1001
1002  *new_state = state_unidiff_found;
1003
1004  return SVN_NO_ERROR;
1005}
1006
1007/* Parse the first line of a git extended unidiff. */
1008static svn_error_t *
1009git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1010          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1011{
1012  const char *old_path_start;
1013  char *old_path_end;
1014  const char *new_path_start;
1015  const char *new_path_end;
1016  char *new_path_marker;
1017  const char *old_path_marker;
1018
1019  /* ### Add handling of escaped paths
1020   * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1021   *
1022   * TAB, LF, double quote and backslash characters in pathnames are
1023   * represented as \t, \n, \" and \\, respectively. If there is need for
1024   * such substitution then the whole pathname is put in double quotes.
1025   */
1026
1027  /* Our line should look like this: 'diff --git a/path b/path'.
1028   *
1029   * If we find any deviations from that format, we return with state reset
1030   * to start.
1031   */
1032  old_path_marker = strstr(line, " a/");
1033
1034  if (! old_path_marker)
1035    {
1036      *new_state = state_start;
1037      return SVN_NO_ERROR;
1038    }
1039
1040  if (! *(old_path_marker + 3))
1041    {
1042      *new_state = state_start;
1043      return SVN_NO_ERROR;
1044    }
1045
1046  new_path_marker = strstr(old_path_marker, " b/");
1047
1048  if (! new_path_marker)
1049    {
1050      *new_state = state_start;
1051      return SVN_NO_ERROR;
1052    }
1053
1054  if (! *(new_path_marker + 3))
1055    {
1056      *new_state = state_start;
1057      return SVN_NO_ERROR;
1058    }
1059
1060  /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1061   * We only need the filenames when we have deleted or added empty
1062   * files. In those cases the old_path and new_path is identical on the
1063   * 'diff --git' line.  For all other cases we fetch the filenames from
1064   * other header lines. */
1065  old_path_start = line + STRLEN_LITERAL("diff --git a/");
1066  new_path_end = line + strlen(line);
1067  new_path_start = old_path_start;
1068
1069  while (TRUE)
1070    {
1071      ptrdiff_t len_old;
1072      ptrdiff_t len_new;
1073
1074      new_path_marker = strstr(new_path_start, " b/");
1075
1076      /* No new path marker, bail out. */
1077      if (! new_path_marker)
1078        break;
1079
1080      old_path_end = new_path_marker;
1081      new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1082
1083      /* No path after the marker. */
1084      if (! *new_path_start)
1085        break;
1086
1087      len_old = old_path_end - old_path_start;
1088      len_new = new_path_end - new_path_start;
1089
1090      /* Are the paths before and after the " b/" marker the same? */
1091      if (len_old == len_new
1092          && ! strncmp(old_path_start, new_path_start, len_old))
1093        {
1094          *old_path_end = '\0';
1095          SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1096                                result_pool, scratch_pool));
1097
1098          SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1099                                result_pool, scratch_pool));
1100          break;
1101        }
1102    }
1103
1104  /* We assume that the path is only modified until we've found a 'tree'
1105   * header */
1106  patch->operation = svn_diff_op_modified;
1107
1108  *new_state = state_git_diff_seen;
1109  return SVN_NO_ERROR;
1110}
1111
1112/* Parse the '--- ' line of a git extended unidiff. */
1113static svn_error_t *
1114git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1115          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1116{
1117  /* If we can find a tab, it separates the filename from
1118   * the rest of the line which we can discard. */
1119  char *tab = strchr(line, '\t');
1120  if (tab)
1121    *tab = '\0';
1122
1123  if (starts_with(line, "--- /dev/null"))
1124    SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1125                          result_pool, scratch_pool));
1126  else
1127    SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1128                          result_pool, scratch_pool));
1129
1130  *new_state = state_git_minus_seen;
1131  return SVN_NO_ERROR;
1132}
1133
1134/* Parse the '+++ ' line of a git extended unidiff. */
1135static svn_error_t *
1136git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1137          apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1138{
1139  /* If we can find a tab, it separates the filename from
1140   * the rest of the line which we can discard. */
1141  char *tab = strchr(line, '\t');
1142  if (tab)
1143    *tab = '\0';
1144
1145  if (starts_with(line, "+++ /dev/null"))
1146    SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1147                          result_pool, scratch_pool));
1148  else
1149    SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1150                          result_pool, scratch_pool));
1151
1152  *new_state = state_git_header_found;
1153  return SVN_NO_ERROR;
1154}
1155
1156/* Parse the 'rename from ' line of a git extended unidiff. */
1157static svn_error_t *
1158git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1159              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1160{
1161  SVN_ERR(grab_filename(&patch->old_filename,
1162                        line + STRLEN_LITERAL("rename from "),
1163                        result_pool, scratch_pool));
1164
1165  *new_state = state_move_from_seen;
1166  return SVN_NO_ERROR;
1167}
1168
1169/* Parse the 'rename to ' line of a git extended unidiff. */
1170static svn_error_t *
1171git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1172            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1173{
1174  SVN_ERR(grab_filename(&patch->new_filename,
1175                        line + STRLEN_LITERAL("rename to "),
1176                        result_pool, scratch_pool));
1177
1178  patch->operation = svn_diff_op_moved;
1179
1180  *new_state = state_git_tree_seen;
1181  return SVN_NO_ERROR;
1182}
1183
1184/* Parse the 'copy from ' line of a git extended unidiff. */
1185static svn_error_t *
1186git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1187              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1188{
1189  SVN_ERR(grab_filename(&patch->old_filename,
1190                        line + STRLEN_LITERAL("copy from "),
1191                        result_pool, scratch_pool));
1192
1193  *new_state = state_copy_from_seen;
1194  return SVN_NO_ERROR;
1195}
1196
1197/* Parse the 'copy to ' line of a git extended unidiff. */
1198static svn_error_t *
1199git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1200            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1201{
1202  SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1203                        result_pool, scratch_pool));
1204
1205  patch->operation = svn_diff_op_copied;
1206
1207  *new_state = state_git_tree_seen;
1208  return SVN_NO_ERROR;
1209}
1210
1211/* Parse the 'new file ' line of a git extended unidiff. */
1212static svn_error_t *
1213git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1214             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1215{
1216  patch->operation = svn_diff_op_added;
1217
1218  /* Filename already retrieved from diff --git header. */
1219
1220  *new_state = state_git_tree_seen;
1221  return SVN_NO_ERROR;
1222}
1223
1224/* Parse the 'deleted file ' line of a git extended unidiff. */
1225static svn_error_t *
1226git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1227                 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1228{
1229  patch->operation = svn_diff_op_deleted;
1230
1231  /* Filename already retrieved from diff --git header. */
1232
1233  *new_state = state_git_tree_seen;
1234  return SVN_NO_ERROR;
1235}
1236
1237/* Add a HUNK associated with the property PROP_NAME to PATCH. */
1238static svn_error_t *
1239add_property_hunk(svn_patch_t *patch, const char *prop_name,
1240                  svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1241                  apr_pool_t *result_pool)
1242{
1243  svn_prop_patch_t *prop_patch;
1244
1245  prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1246
1247  if (! prop_patch)
1248    {
1249      prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1250      prop_patch->name = prop_name;
1251      prop_patch->operation = operation;
1252      prop_patch->hunks = apr_array_make(result_pool, 1,
1253                                         sizeof(svn_diff_hunk_t *));
1254
1255      svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1256    }
1257
1258  APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1259
1260  return SVN_NO_ERROR;
1261}
1262
1263struct svn_patch_file_t
1264{
1265  /* The APR file handle to the patch file. */
1266  apr_file_t *apr_file;
1267
1268  /* The file offset at which the next patch is expected. */
1269  apr_off_t next_patch_offset;
1270};
1271
1272svn_error_t *
1273svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1274                         const char *local_abspath,
1275                         apr_pool_t *result_pool)
1276{
1277  svn_patch_file_t *p;
1278
1279  p = apr_palloc(result_pool, sizeof(*p));
1280  SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1281                           APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1282                           result_pool));
1283  p->next_patch_offset = 0;
1284  *patch_file = p;
1285
1286  return SVN_NO_ERROR;
1287}
1288
1289/* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1290 * Parsing stops if no valid next hunk can be found.
1291 * If IGNORE_WHITESPACE is TRUE, lines without
1292 * leading spaces will be treated as context lines.
1293 * Allocate results in RESULT_POOL.
1294 * Use SCRATCH_POOL for temporary allocations. */
1295static svn_error_t *
1296parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1297            svn_boolean_t ignore_whitespace,
1298            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1299{
1300  svn_diff_hunk_t *hunk;
1301  svn_boolean_t is_property;
1302  const char *last_prop_name;
1303  const char *prop_name;
1304  svn_diff_operation_kind_t prop_operation;
1305  apr_pool_t *iterpool;
1306
1307  last_prop_name = NULL;
1308
1309  patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1310  patch->prop_patches = apr_hash_make(result_pool);
1311  iterpool = svn_pool_create(scratch_pool);
1312  do
1313    {
1314      svn_pool_clear(iterpool);
1315
1316      SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
1317                              patch, apr_file, ignore_whitespace, result_pool,
1318                              iterpool));
1319
1320      if (hunk && is_property)
1321        {
1322          if (! prop_name)
1323            prop_name = last_prop_name;
1324          else
1325            last_prop_name = prop_name;
1326
1327          /* Skip svn:mergeinfo properties.
1328           * Mergeinfo data cannot be represented as a hunk and
1329           * is therefore stored in PATCH itself. */
1330          if (strcmp(prop_name, SVN_PROP_MERGEINFO) == 0)
1331            continue;
1332
1333          SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
1334                                    result_pool));
1335        }
1336      else if (hunk)
1337        {
1338          APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
1339          last_prop_name = NULL;
1340        }
1341
1342    }
1343  while (hunk);
1344  svn_pool_destroy(iterpool);
1345
1346  return SVN_NO_ERROR;
1347}
1348
1349/* State machine for the diff header parser.
1350 * Expected Input   Required state          Function to call */
1351static struct transition transitions[] =
1352{
1353  {"--- ",          state_start,            diff_minus},
1354  {"+++ ",          state_minus_seen,       diff_plus},
1355  {"diff --git",    state_start,            git_start},
1356  {"--- a/",        state_git_diff_seen,    git_minus},
1357  {"--- a/",        state_git_tree_seen,    git_minus},
1358  {"--- /dev/null", state_git_tree_seen,    git_minus},
1359  {"+++ b/",        state_git_minus_seen,   git_plus},
1360  {"+++ /dev/null", state_git_minus_seen,   git_plus},
1361  {"rename from ",  state_git_diff_seen,    git_move_from},
1362  {"rename to ",    state_move_from_seen,   git_move_to},
1363  {"copy from ",    state_git_diff_seen,    git_copy_from},
1364  {"copy to ",      state_copy_from_seen,   git_copy_to},
1365  {"new file ",     state_git_diff_seen,    git_new_file},
1366  {"deleted file ", state_git_diff_seen,    git_deleted_file},
1367};
1368
1369svn_error_t *
1370svn_diff_parse_next_patch(svn_patch_t **patch_p,
1371                          svn_patch_file_t *patch_file,
1372                          svn_boolean_t reverse,
1373                          svn_boolean_t ignore_whitespace,
1374                          apr_pool_t *result_pool,
1375                          apr_pool_t *scratch_pool)
1376{
1377  apr_off_t pos, last_line;
1378  svn_boolean_t eof;
1379  svn_boolean_t line_after_tree_header_read = FALSE;
1380  apr_pool_t *iterpool;
1381  svn_patch_t *patch;
1382  enum parse_state state = state_start;
1383
1384  if (apr_file_eof(patch_file->apr_file) == APR_EOF)
1385    {
1386      /* No more patches here. */
1387      *patch_p = NULL;
1388      return SVN_NO_ERROR;
1389    }
1390
1391  patch = apr_pcalloc(result_pool, sizeof(*patch));
1392
1393  pos = patch_file->next_patch_offset;
1394  SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
1395
1396  iterpool = svn_pool_create(scratch_pool);
1397  do
1398    {
1399      svn_stringbuf_t *line;
1400      svn_boolean_t valid_header_line = FALSE;
1401      int i;
1402
1403      svn_pool_clear(iterpool);
1404
1405      /* Remember the current line's offset, and read the line. */
1406      last_line = pos;
1407      SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
1408                                   APR_SIZE_MAX, iterpool, iterpool));
1409
1410      if (! eof)
1411        {
1412          /* Update line offset for next iteration. */
1413          pos = 0;
1414          SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos,
1415                                   iterpool));
1416        }
1417
1418      /* Run the state machine. */
1419      for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
1420        {
1421          if (starts_with(line->data, transitions[i].expected_input)
1422              && state == transitions[i].required_state)
1423            {
1424              SVN_ERR(transitions[i].fn(&state, line->data, patch,
1425                                        result_pool, iterpool));
1426              valid_header_line = TRUE;
1427              break;
1428            }
1429        }
1430
1431      if (state == state_unidiff_found || state == state_git_header_found)
1432        {
1433          /* We have a valid diff header, yay! */
1434          break;
1435        }
1436      else if (state == state_git_tree_seen && line_after_tree_header_read)
1437        {
1438          /* git patches can contain an index line after the file mode line */
1439          if (!starts_with(line->data, "index "))
1440          {
1441            /* We have a valid diff header for a patch with only tree changes.
1442             * Rewind to the start of the line just read, so subsequent calls
1443             * to this function don't end up skipping the line -- it may
1444             * contain a patch. */
1445            SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
1446                    scratch_pool));
1447            break;
1448          }
1449        }
1450      else if (state == state_git_tree_seen)
1451        {
1452          line_after_tree_header_read = TRUE;
1453        }
1454      else if (! valid_header_line && state != state_start
1455               && state != state_git_diff_seen
1456               && !starts_with(line->data, "index "))
1457        {
1458          /* We've encountered an invalid diff header.
1459           *
1460           * Rewind to the start of the line just read - it may be a new
1461           * header that begins there. */
1462          SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
1463                                   scratch_pool));
1464          state = state_start;
1465        }
1466
1467    }
1468  while (! eof);
1469
1470  patch->reverse = reverse;
1471  if (reverse)
1472    {
1473      const char *temp;
1474      temp = patch->old_filename;
1475      patch->old_filename = patch->new_filename;
1476      patch->new_filename = temp;
1477    }
1478
1479  if (patch->old_filename == NULL || patch->new_filename == NULL)
1480    {
1481      /* Something went wrong, just discard the result. */
1482      patch = NULL;
1483    }
1484  else
1485    SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
1486                        result_pool, iterpool));
1487
1488  svn_pool_destroy(iterpool);
1489
1490  patch_file->next_patch_offset = 0;
1491  SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR,
1492                           &patch_file->next_patch_offset, scratch_pool));
1493
1494  if (patch)
1495    {
1496      /* Usually, hunks appear in the patch sorted by their original line
1497       * offset. But just in case they weren't parsed in this order for
1498       * some reason, we sort them so that our caller can assume that hunks
1499       * are sorted as if parsed from a usual patch. */
1500      svn_sort__array(patch->hunks, compare_hunks);
1501    }
1502
1503  *patch_p = patch;
1504  return SVN_NO_ERROR;
1505}
1506
1507svn_error_t *
1508svn_diff_close_patch_file(svn_patch_file_t *patch_file,
1509                          apr_pool_t *scratch_pool)
1510{
1511  return svn_error_trace(svn_io_file_close(patch_file->apr_file,
1512                                           scratch_pool));
1513}
1514