diff_file.c revision 262253
1/*
2 * diff_file.c :  routines for doing diffs on files
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_pools.h>
27#include <apr_general.h>
28#include <apr_file_io.h>
29#include <apr_file_info.h>
30#include <apr_time.h>
31#include <apr_mmap.h>
32#include <apr_getopt.h>
33
34#include "svn_error.h"
35#include "svn_diff.h"
36#include "svn_types.h"
37#include "svn_string.h"
38#include "svn_subst.h"
39#include "svn_io.h"
40#include "svn_utf.h"
41#include "svn_pools.h"
42#include "diff.h"
43#include "svn_private_config.h"
44#include "svn_path.h"
45#include "svn_ctype.h"
46
47#include "private/svn_utf_private.h"
48#include "private/svn_eol_private.h"
49#include "private/svn_dep_compat.h"
50#include "private/svn_adler32.h"
51#include "private/svn_diff_private.h"
52
53/* A token, i.e. a line read from a file. */
54typedef struct svn_diff__file_token_t
55{
56  /* Next token in free list. */
57  struct svn_diff__file_token_t *next;
58  svn_diff_datasource_e datasource;
59  /* Offset in the datasource. */
60  apr_off_t offset;
61  /* Offset of the normalized token (may skip leading whitespace) */
62  apr_off_t norm_offset;
63  /* Total length - before normalization. */
64  apr_off_t raw_length;
65  /* Total length - after normalization. */
66  apr_off_t length;
67} svn_diff__file_token_t;
68
69
70typedef struct svn_diff__file_baton_t
71{
72  const svn_diff_file_options_t *options;
73
74  struct file_info {
75    const char *path;  /* path to this file, absolute or relative to CWD */
76
77    /* All the following fields are active while this datasource is open */
78    apr_file_t *file;  /* handle of this file */
79    apr_off_t size;    /* total raw size in bytes of this file */
80
81    /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
82    int chunk;     /* the current chunk number, zero-based */
83    char *buffer;  /* a buffer containing the current chunk */
84    char *curp;    /* current position in the current chunk */
85    char *endp;    /* next memory address after the current chunk */
86
87    svn_diff__normalize_state_t normalize_state;
88
89    /* Where the identical suffix starts in this datasource */
90    int suffix_start_chunk;
91    apr_off_t suffix_offset_in_chunk;
92  } files[4];
93
94  /* List of free tokens that may be reused. */
95  svn_diff__file_token_t *tokens;
96
97  apr_pool_t *pool;
98} svn_diff__file_baton_t;
99
100static int
101datasource_to_index(svn_diff_datasource_e datasource)
102{
103  switch (datasource)
104    {
105    case svn_diff_datasource_original:
106      return 0;
107
108    case svn_diff_datasource_modified:
109      return 1;
110
111    case svn_diff_datasource_latest:
112      return 2;
113
114    case svn_diff_datasource_ancestor:
115      return 3;
116    }
117
118  return -1;
119}
120
121/* Files are read in chunks of 128k.  There is no support for this number
122 * whatsoever.  If there is a number someone comes up with that has some
123 * argumentation, let's use that.
124 */
125/* If you change this number, update test_norm_offset(),
126 * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
127 */
128#define CHUNK_SHIFT 17
129#define CHUNK_SIZE (1 << CHUNK_SHIFT)
130
131#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
132#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
133#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
134
135
136/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
137 * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
138 */
139static APR_INLINE svn_error_t *
140read_chunk(apr_file_t *file, const char *path,
141           char *buffer, apr_off_t length,
142           apr_off_t offset, apr_pool_t *pool)
143{
144  /* XXX: The final offset may not be the one we asked for.
145   * XXX: Check.
146   */
147  SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
148  return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
149                                NULL, NULL, pool);
150}
151
152
153/* Map or read a file at PATH. *BUFFER will point to the file
154 * contents; if the file was mapped, *FILE and *MM will contain the
155 * mmap context; otherwise they will be NULL.  SIZE will contain the
156 * file size.  Allocate from POOL.
157 */
158#if APR_HAS_MMAP
159#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
160#define MMAP_T_ARG(NAME)   &(NAME),
161#else
162#define MMAP_T_PARAM(NAME)
163#define MMAP_T_ARG(NAME)
164#endif
165
166static svn_error_t *
167map_or_read_file(apr_file_t **file,
168                 MMAP_T_PARAM(mm)
169                 char **buffer, apr_size_t *size_p,
170                 const char *path, apr_pool_t *pool)
171{
172  apr_finfo_t finfo;
173  apr_status_t rv;
174  apr_size_t size;
175
176  *buffer = NULL;
177
178  SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
179  SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
180
181  if (finfo.size > APR_SIZE_MAX)
182    {
183      return svn_error_createf(APR_ENOMEM, NULL,
184                               _("File '%s' is too large to be read in "
185                                 "to memory"), path);
186    }
187
188  size = (apr_size_t) finfo.size;
189#if APR_HAS_MMAP
190  if (size > APR_MMAP_THRESHOLD)
191    {
192      rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
193      if (rv == APR_SUCCESS)
194        {
195          *buffer = (*mm)->mm;
196        }
197      else
198        {
199          /* Clear *MM because output parameters are undefined on error. */
200          *mm = NULL;
201        }
202
203      /* On failure we just fall through and try reading the file into
204       * memory instead.
205       */
206    }
207#endif /* APR_HAS_MMAP */
208
209   if (*buffer == NULL && size > 0)
210    {
211      *buffer = apr_palloc(pool, size);
212
213      SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
214
215      /* Since we have the entire contents of the file we can
216       * close it now.
217       */
218      SVN_ERR(svn_io_file_close(*file, pool));
219
220      *file = NULL;
221    }
222
223  *size_p = size;
224
225  return SVN_NO_ERROR;
226}
227
228
229/* For all files in the FILE array, increment the curp pointer.  If a file
230 * points before the beginning of file, let it point at the first byte again.
231 * If the end of the current chunk is reached, read the next chunk in the
232 * buffer and point curp to the start of the chunk.  If EOF is reached, set
233 * curp equal to endp to indicate EOF. */
234#define INCREMENT_POINTERS(all_files, files_len, pool)                       \
235  do {                                                                       \
236    apr_size_t svn_macro__i;                                                 \
237                                                                             \
238    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
239    {                                                                        \
240      if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
241        (all_files)[svn_macro__i].curp++;                                    \
242      else                                                                   \
243        SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
244    }                                                                        \
245  } while (0)
246
247
248/* For all files in the FILE array, decrement the curp pointer.  If the
249 * start of a chunk is reached, read the previous chunk in the buffer and
250 * point curp to the last byte of the chunk.  If the beginning of a FILE is
251 * reached, set chunk to -1 to indicate BOF. */
252#define DECREMENT_POINTERS(all_files, files_len, pool)                       \
253  do {                                                                       \
254    apr_size_t svn_macro__i;                                                 \
255                                                                             \
256    for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
257    {                                                                        \
258      if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
259        (all_files)[svn_macro__i].curp--;                                    \
260      else                                                                   \
261        SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
262    }                                                                        \
263  } while (0)
264
265
266static svn_error_t *
267increment_chunk(struct file_info *file, apr_pool_t *pool)
268{
269  apr_off_t length;
270  apr_off_t last_chunk = offset_to_chunk(file->size);
271
272  if (file->chunk == -1)
273    {
274      /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
275      file->chunk = 0;
276      file->curp = file->buffer;
277    }
278  else if (file->chunk == last_chunk)
279    {
280      /* We are at the last chunk. Indicate EOF by setting curp == endp. */
281      file->curp = file->endp;
282    }
283  else
284    {
285      /* There are still chunks left. Read next chunk and reset pointers. */
286      file->chunk++;
287      length = file->chunk == last_chunk ?
288        offset_in_chunk(file->size) : CHUNK_SIZE;
289      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
290                         length, chunk_to_offset(file->chunk),
291                         pool));
292      file->endp = file->buffer + length;
293      file->curp = file->buffer;
294    }
295
296  return SVN_NO_ERROR;
297}
298
299
300static svn_error_t *
301decrement_chunk(struct file_info *file, apr_pool_t *pool)
302{
303  if (file->chunk == 0)
304    {
305      /* We are already at the first chunk. Indicate BOF (Beginning Of File)
306         by setting chunk = -1 and curp = endp - 1. Both conditions are
307         important. They help the increment step to catch the BOF situation
308         in an efficient way. */
309      file->chunk--;
310      file->curp = file->endp - 1;
311    }
312  else
313    {
314      /* Read previous chunk and reset pointers. */
315      file->chunk--;
316      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
317                         CHUNK_SIZE, chunk_to_offset(file->chunk),
318                         pool));
319      file->endp = file->buffer + CHUNK_SIZE;
320      file->curp = file->endp - 1;
321    }
322
323  return SVN_NO_ERROR;
324}
325
326
327/* Check whether one of the FILEs has its pointers 'before' the beginning of
328 * the file (this can happen while scanning backwards). This is the case if
329 * one of them has chunk == -1. */
330static svn_boolean_t
331is_one_at_bof(struct file_info file[], apr_size_t file_len)
332{
333  apr_size_t i;
334
335  for (i = 0; i < file_len; i++)
336    if (file[i].chunk == -1)
337      return TRUE;
338
339  return FALSE;
340}
341
342/* Check whether one of the FILEs has its pointers at EOF (this is the case if
343 * one of them has curp == endp (this can only happen at the last chunk)) */
344static svn_boolean_t
345is_one_at_eof(struct file_info file[], apr_size_t file_len)
346{
347  apr_size_t i;
348
349  for (i = 0; i < file_len; i++)
350    if (file[i].curp == file[i].endp)
351      return TRUE;
352
353  return FALSE;
354}
355
356/* Quickly determine whether there is a eol char in CHUNK.
357 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
358 */
359
360#if SVN_UNALIGNED_ACCESS_IS_OK
361static svn_boolean_t contains_eol(apr_uintptr_t chunk)
362{
363  apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
364  apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
365
366  r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
367  n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
368
369  return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
370}
371#endif
372
373/* Find the prefix which is identical between all elements of the FILE array.
374 * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
375 * set to TRUE if one of the FILEs reached its end while scanning prefix,
376 * i.e. at least one file consisted entirely of prefix.  Otherwise,
377 * REACHED_ONE_EOF is set to FALSE.
378 *
379 * After this function is finished, the buffers, chunks, curp's and endp's
380 * of the FILEs are set to point at the first byte after the prefix. */
381static svn_error_t *
382find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
383                      struct file_info file[], apr_size_t file_len,
384                      apr_pool_t *pool)
385{
386  svn_boolean_t had_cr = FALSE;
387  svn_boolean_t is_match;
388  apr_off_t lines = 0;
389  apr_size_t i;
390
391  *reached_one_eof = FALSE;
392
393  for (i = 1, is_match = TRUE; i < file_len; i++)
394    is_match = is_match && *file[0].curp == *file[i].curp;
395  while (is_match)
396    {
397#if SVN_UNALIGNED_ACCESS_IS_OK
398      apr_ssize_t max_delta, delta;
399#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
400
401      /* ### TODO: see if we can take advantage of
402         diff options like ignore_eol_style or ignore_space. */
403      /* check for eol, and count */
404      if (*file[0].curp == '\r')
405        {
406          lines++;
407          had_cr = TRUE;
408        }
409      else if (*file[0].curp == '\n' && !had_cr)
410        {
411          lines++;
412        }
413      else
414        {
415          had_cr = FALSE;
416        }
417
418      INCREMENT_POINTERS(file, file_len, pool);
419
420#if SVN_UNALIGNED_ACCESS_IS_OK
421
422      /* Try to advance as far as possible with machine-word granularity.
423       * Determine how far we may advance with chunky ops without reaching
424       * endp for any of the files.
425       * Signedness is important here if curp gets close to endp.
426       */
427      max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
428      for (i = 1; i < file_len; i++)
429        {
430          delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
431          if (delta < max_delta)
432            max_delta = delta;
433        }
434
435      is_match = TRUE;
436      for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
437        {
438          apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
439          if (contains_eol(chunk))
440            break;
441
442          for (i = 1; i < file_len; i++)
443            if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
444              {
445                is_match = FALSE;
446                break;
447              }
448
449          if (! is_match)
450            break;
451        }
452
453      if (delta /* > 0*/)
454        {
455          /* We either found a mismatch or an EOL at or shortly behind curp+delta
456           * or we cannot proceed with chunky ops without exceeding endp.
457           * In any way, everything up to curp + delta is equal and not an EOL.
458           */
459          for (i = 0; i < file_len; i++)
460            file[i].curp += delta;
461
462          /* Skipped data without EOL markers, so last char was not a CR. */
463          had_cr = FALSE;
464        }
465#endif
466
467      *reached_one_eof = is_one_at_eof(file, file_len);
468      if (*reached_one_eof)
469        break;
470      else
471        for (i = 1, is_match = TRUE; i < file_len; i++)
472          is_match = is_match && *file[0].curp == *file[i].curp;
473    }
474
475  if (had_cr)
476    {
477      /* Check if we ended in the middle of a \r\n for one file, but \r for
478         another. If so, back up one byte, so the next loop will back up
479         the entire line. Also decrement lines, since we counted one
480         too many for the \r. */
481      svn_boolean_t ended_at_nonmatching_newline = FALSE;
482      for (i = 0; i < file_len; i++)
483        if (file[i].curp < file[i].endp)
484          ended_at_nonmatching_newline = ended_at_nonmatching_newline
485                                         || *file[i].curp == '\n';
486      if (ended_at_nonmatching_newline)
487        {
488          lines--;
489          DECREMENT_POINTERS(file, file_len, pool);
490        }
491    }
492
493  /* Back up one byte, so we point at the last identical byte */
494  DECREMENT_POINTERS(file, file_len, pool);
495
496  /* Back up to the last eol sequence (\n, \r\n or \r) */
497  while (!is_one_at_bof(file, file_len) &&
498         *file[0].curp != '\n' && *file[0].curp != '\r')
499    DECREMENT_POINTERS(file, file_len, pool);
500
501  /* Slide one byte forward, to point past the eol sequence */
502  INCREMENT_POINTERS(file, file_len, pool);
503
504  *prefix_lines = lines;
505
506  return SVN_NO_ERROR;
507}
508
509
510/* The number of identical suffix lines to keep with the middle section. These
511 * lines are not eliminated as suffix, and can be picked up by the token
512 * parsing and lcs steps. This is mainly for backward compatibility with
513 * the previous diff (and blame) output (if there are multiple diff solutions,
514 * our lcs algorithm prefers taking common lines from the start, rather than
515 * from the end. By giving it back some suffix lines, we give it some wiggle
516 * room to find the exact same diff as before).
517 *
518 * The number 50 is more or less arbitrary, based on some real-world tests
519 * with big files (and then doubling the required number to be on the safe
520 * side). This has a negligible effect on the power of the optimization. */
521/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
522#ifndef SUFFIX_LINES_TO_KEEP
523#define SUFFIX_LINES_TO_KEEP 50
524#endif
525
526/* Find the suffix which is identical between all elements of the FILE array.
527 * Return the number of suffix lines in SUFFIX_LINES.
528 *
529 * Before this function is called the FILEs' pointers and chunks should be
530 * positioned right after the identical prefix (which is the case after
531 * find_identical_prefix), so we can determine where suffix scanning should
532 * ultimately stop. */
533static svn_error_t *
534find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
535                      apr_size_t file_len, apr_pool_t *pool)
536{
537  struct file_info file_for_suffix[4] = { { 0 }  };
538  apr_off_t length[4];
539  apr_off_t suffix_min_chunk0;
540  apr_off_t suffix_min_offset0;
541  apr_off_t min_file_size;
542  int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
543  svn_boolean_t is_match;
544  apr_off_t lines = 0;
545  svn_boolean_t had_cr;
546  svn_boolean_t had_nl;
547  apr_size_t i;
548
549  /* Initialize file_for_suffix[].
550     Read last chunk, position curp at last byte. */
551  for (i = 0; i < file_len; i++)
552    {
553      file_for_suffix[i].path = file[i].path;
554      file_for_suffix[i].file = file[i].file;
555      file_for_suffix[i].size = file[i].size;
556      file_for_suffix[i].chunk =
557        (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
558      length[i] = offset_in_chunk(file_for_suffix[i].size);
559      if (length[i] == 0)
560        {
561          /* last chunk is an empty chunk -> start at next-to-last chunk */
562          file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
563          length[i] = CHUNK_SIZE;
564        }
565
566      if (file_for_suffix[i].chunk == file[i].chunk)
567        {
568          /* Prefix ended in last chunk, so we can reuse the prefix buffer */
569          file_for_suffix[i].buffer = file[i].buffer;
570        }
571      else
572        {
573          /* There is at least more than 1 chunk,
574             so allocate full chunk size buffer */
575          file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
576          SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path,
577                             file_for_suffix[i].buffer, length[i],
578                             chunk_to_offset(file_for_suffix[i].chunk),
579                             pool));
580        }
581      file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
582      file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
583    }
584
585  /* Get the chunk and pointer offset (for file[0]) at which we should stop
586     scanning backward for the identical suffix, i.e. when we reach prefix. */
587  suffix_min_chunk0 = file[0].chunk;
588  suffix_min_offset0 = file[0].curp - file[0].buffer;
589
590  /* Compensate if other files are smaller than file[0] */
591  for (i = 1, min_file_size = file[0].size; i < file_len; i++)
592    if (file[i].size < min_file_size)
593      min_file_size = file[i].size;
594  if (file[0].size > min_file_size)
595    {
596      suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
597      suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
598    }
599
600  /* Scan backwards until mismatch or until we reach the prefix. */
601  for (i = 1, is_match = TRUE; i < file_len; i++)
602    is_match = is_match
603               && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
604  if (is_match && *file_for_suffix[0].curp != '\r'
605               && *file_for_suffix[0].curp != '\n')
606    /* Count an extra line for the last line not ending in an eol. */
607    lines++;
608
609  had_nl = FALSE;
610  while (is_match)
611    {
612      svn_boolean_t reached_prefix;
613#if SVN_UNALIGNED_ACCESS_IS_OK
614      /* Initialize the minimum pointer positions. */
615      const char *min_curp[4];
616      svn_boolean_t can_read_word;
617#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
618
619      /* ### TODO: see if we can take advantage of
620         diff options like ignore_eol_style or ignore_space. */
621      /* check for eol, and count */
622      if (*file_for_suffix[0].curp == '\n')
623        {
624          lines++;
625          had_nl = TRUE;
626        }
627      else if (*file_for_suffix[0].curp == '\r' && !had_nl)
628        {
629          lines++;
630        }
631      else
632        {
633          had_nl = FALSE;
634        }
635
636      DECREMENT_POINTERS(file_for_suffix, file_len, pool);
637
638#if SVN_UNALIGNED_ACCESS_IS_OK
639      for (i = 0; i < file_len; i++)
640        min_curp[i] = file_for_suffix[i].buffer;
641
642      /* If we are in the same chunk that contains the last part of the common
643         prefix, use the min_curp[0] pointer to make sure we don't get a
644         suffix that overlaps the already determined common prefix. */
645      if (file_for_suffix[0].chunk == suffix_min_chunk0)
646        min_curp[0] += suffix_min_offset0;
647
648      /* Scan quickly by reading with machine-word granularity. */
649      for (i = 0, can_read_word = TRUE; i < file_len; i++)
650        can_read_word = can_read_word
651                        && (  (file_for_suffix[i].curp + 1
652                                 - sizeof(apr_uintptr_t))
653                            > min_curp[i]);
654      while (can_read_word)
655        {
656          apr_uintptr_t chunk;
657
658          /* For each file curp is positioned at the current byte, but we
659             want to examine the current byte and the ones before the current
660             location as one machine word. */
661
662          chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663                                             - sizeof(apr_uintptr_t));
664          if (contains_eol(chunk))
665            break;
666
667          for (i = 1, is_match = TRUE; i < file_len; i++)
668            is_match = is_match
669                       && (   chunk
670                           == *(const apr_uintptr_t *)
671                                    (file_for_suffix[i].curp + 1
672                                       - sizeof(apr_uintptr_t)));
673
674          if (! is_match)
675            break;
676
677          for (i = 0; i < file_len; i++)
678            {
679              file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
680              can_read_word = can_read_word
681                              && (  (file_for_suffix[i].curp + 1
682                                       - sizeof(apr_uintptr_t))
683                                  > min_curp[i]);
684            }
685
686          /* We skipped some bytes, so there are no closing EOLs */
687          had_nl = FALSE;
688          had_cr = FALSE;
689        }
690
691      /* The > min_curp[i] check leaves at least one final byte for checking
692         in the non block optimized case below. */
693#endif
694
695      reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
696                       && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
697                          == suffix_min_offset0;
698      if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
699        break;
700
701      is_match = TRUE;
702      for (i = 1; i < file_len; i++)
703        is_match = is_match
704                   && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
705    }
706
707  /* Slide one byte forward, to point at the first byte of identical suffix */
708  INCREMENT_POINTERS(file_for_suffix, file_len, pool);
709
710  /* Slide forward until we find an eol sequence to add the rest of the line
711     we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
712     one file reaches its end. */
713  do
714    {
715      had_cr = FALSE;
716      while (!is_one_at_eof(file_for_suffix, file_len)
717             && *file_for_suffix[0].curp != '\n'
718             && *file_for_suffix[0].curp != '\r')
719        INCREMENT_POINTERS(file_for_suffix, file_len, pool);
720
721      /* Slide one or two more bytes, to point past the eol. */
722      if (!is_one_at_eof(file_for_suffix, file_len)
723          && *file_for_suffix[0].curp == '\r')
724        {
725          lines--;
726          had_cr = TRUE;
727          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
728        }
729      if (!is_one_at_eof(file_for_suffix, file_len)
730          && *file_for_suffix[0].curp == '\n')
731        {
732          if (!had_cr)
733            lines--;
734          INCREMENT_POINTERS(file_for_suffix, file_len, pool);
735        }
736    }
737  while (!is_one_at_eof(file_for_suffix, file_len)
738         && suffix_lines_to_keep--);
739
740  if (is_one_at_eof(file_for_suffix, file_len))
741    lines = 0;
742
743  /* Save the final suffix information in the original file_info */
744  for (i = 0; i < file_len; i++)
745    {
746      file[i].suffix_start_chunk = file_for_suffix[i].chunk;
747      file[i].suffix_offset_in_chunk =
748        file_for_suffix[i].curp - file_for_suffix[i].buffer;
749    }
750
751  *suffix_lines = lines;
752
753  return SVN_NO_ERROR;
754}
755
756
757/* Let FILE stand for the array of file_info struct elements of BATON->files
758 * that are indexed by the elements of the DATASOURCE array.
759 * BATON's type is (svn_diff__file_baton_t *).
760 *
761 * For each file in the FILE array, open the file at FILE.path; initialize
762 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
763 * buffer and read the first chunk.  Then find the prefix and suffix lines
764 * which are identical between all the files.  Return the number of identical
765 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
766 * SUFFIX_LINES.
767 *
768 * Finding the identical prefix and suffix allows us to exclude those from the
769 * rest of the diff algorithm, which increases performance by reducing the
770 * problem space.
771 *
772 * Implements svn_diff_fns2_t::datasources_open. */
773static svn_error_t *
774datasources_open(void *baton,
775                 apr_off_t *prefix_lines,
776                 apr_off_t *suffix_lines,
777                 const svn_diff_datasource_e *datasources,
778                 apr_size_t datasources_len)
779{
780  svn_diff__file_baton_t *file_baton = baton;
781  struct file_info files[4];
782  apr_finfo_t finfo[4];
783  apr_off_t length[4];
784#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
785  svn_boolean_t reached_one_eof;
786#endif
787  apr_size_t i;
788
789  /* Make sure prefix_lines and suffix_lines are set correctly, even if we
790   * exit early because one of the files is empty. */
791  *prefix_lines = 0;
792  *suffix_lines = 0;
793
794  /* Open datasources and read first chunk */
795  for (i = 0; i < datasources_len; i++)
796    {
797      struct file_info *file
798          = &file_baton->files[datasource_to_index(datasources[i])];
799      SVN_ERR(svn_io_file_open(&file->file, file->path,
800                               APR_READ, APR_OS_DEFAULT, file_baton->pool));
801      SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
802                                   file->file, file_baton->pool));
803      file->size = finfo[i].size;
804      length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
805      file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
806      SVN_ERR(read_chunk(file->file, file->path, file->buffer,
807                         length[i], 0, file_baton->pool));
808      file->endp = file->buffer + length[i];
809      file->curp = file->buffer;
810      /* Set suffix_start_chunk to a guard value, so if suffix scanning is
811       * skipped because one of the files is empty, or because of
812       * reached_one_eof, we can still easily check for the suffix during
813       * token reading (datasource_get_next_token). */
814      file->suffix_start_chunk = -1;
815
816      files[i] = *file;
817    }
818
819  for (i = 0; i < datasources_len; i++)
820    if (length[i] == 0)
821      /* There will not be any identical prefix/suffix, so we're done. */
822      return SVN_NO_ERROR;
823
824#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
825
826  SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
827                                files, datasources_len, file_baton->pool));
828
829  if (!reached_one_eof)
830    /* No file consisted totally of identical prefix,
831     * so there may be some identical suffix.  */
832    SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
833                                  file_baton->pool));
834
835#endif
836
837  /* Copy local results back to baton. */
838  for (i = 0; i < datasources_len; i++)
839    file_baton->files[datasource_to_index(datasources[i])] = files[i];
840
841  return SVN_NO_ERROR;
842}
843
844
845/* Implements svn_diff_fns2_t::datasource_close */
846static svn_error_t *
847datasource_close(void *baton, svn_diff_datasource_e datasource)
848{
849  /* Do nothing.  The compare_token function needs previous datasources
850   * to stay available until all datasources are processed.
851   */
852
853  return SVN_NO_ERROR;
854}
855
856/* Implements svn_diff_fns2_t::datasource_get_next_token */
857static svn_error_t *
858datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
859                          svn_diff_datasource_e datasource)
860{
861  svn_diff__file_baton_t *file_baton = baton;
862  svn_diff__file_token_t *file_token;
863  struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
864  char *endp;
865  char *curp;
866  char *eol;
867  apr_off_t last_chunk;
868  apr_off_t length;
869  apr_uint32_t h = 0;
870  /* Did the last chunk end in a CR character? */
871  svn_boolean_t had_cr = FALSE;
872
873  *token = NULL;
874
875  curp = file->curp;
876  endp = file->endp;
877
878  last_chunk = offset_to_chunk(file->size);
879
880  /* Are we already at the end of a chunk? */
881  if (curp == endp)
882    {
883      /* Are we at EOF */
884      if (last_chunk == file->chunk)
885        return SVN_NO_ERROR; /* EOF */
886
887      /* Or right before an identical suffix in the next chunk? */
888      if (file->chunk + 1 == file->suffix_start_chunk
889          && file->suffix_offset_in_chunk == 0)
890        return SVN_NO_ERROR;
891    }
892
893  /* Stop when we encounter the identical suffix. If suffix scanning was not
894   * performed, suffix_start_chunk will be -1, so this condition will never
895   * be true. */
896  if (file->chunk == file->suffix_start_chunk
897      && (curp - file->buffer) == file->suffix_offset_in_chunk)
898    return SVN_NO_ERROR;
899
900  /* Allocate a new token, or fetch one from the "reusable tokens" list. */
901  file_token = file_baton->tokens;
902  if (file_token)
903    {
904      file_baton->tokens = file_token->next;
905    }
906  else
907    {
908      file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
909    }
910
911  file_token->datasource = datasource;
912  file_token->offset = chunk_to_offset(file->chunk)
913                       + (curp - file->buffer);
914  file_token->norm_offset = file_token->offset;
915  file_token->raw_length = 0;
916  file_token->length = 0;
917
918  while (1)
919    {
920      eol = svn_eol__find_eol_start(curp, endp - curp);
921      if (eol)
922        {
923          had_cr = (*eol == '\r');
924          eol++;
925          /* If we have the whole eol sequence in the chunk... */
926          if (!(had_cr && eol == endp))
927            {
928              /* Also skip past the '\n' in an '\r\n' sequence. */
929              if (had_cr && *eol == '\n')
930                eol++;
931              break;
932            }
933        }
934
935      if (file->chunk == last_chunk)
936        {
937          eol = endp;
938          break;
939        }
940
941      length = endp - curp;
942      file_token->raw_length += length;
943      {
944        char *c = curp;
945
946        svn_diff__normalize_buffer(&c, &length,
947                                   &file->normalize_state,
948                                   curp, file_baton->options);
949        if (file_token->length == 0)
950          {
951            /* When we are reading the first part of the token, move the
952               normalized offset past leading ignored characters, if any. */
953            file_token->norm_offset += (c - curp);
954          }
955        file_token->length += length;
956        h = svn__adler32(h, c, length);
957      }
958
959      curp = endp = file->buffer;
960      file->chunk++;
961      length = file->chunk == last_chunk ?
962        offset_in_chunk(file->size) : CHUNK_SIZE;
963      endp += length;
964      file->endp = endp;
965
966      /* Issue #4283: Normally we should have checked for reaching the skipped
967         suffix here, but because we assume that a suffix always starts on a
968         line and token boundary we rely on catching the suffix earlier in this
969         function.
970
971         When changing things here, make sure the whitespace settings are
972         applied, or we mught not reach the exact suffix boundary as token
973         boundary. */
974      SVN_ERR(read_chunk(file->file, file->path,
975                         curp, length,
976                         chunk_to_offset(file->chunk),
977                         file_baton->pool));
978
979      /* If the last chunk ended in a CR, we're done. */
980      if (had_cr)
981        {
982          eol = curp;
983          if (*curp == '\n')
984            ++eol;
985          break;
986        }
987    }
988
989  length = eol - curp;
990  file_token->raw_length += length;
991  file->curp = eol;
992
993  /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
994   * with a spurious empty token.  Avoid returning it.
995   * Note that we use the unnormalized length; we don't want a line containing
996   * only spaces (and no trailing newline) to appear like a non-existent
997   * line. */
998  if (file_token->raw_length > 0)
999    {
1000      char *c = curp;
1001      svn_diff__normalize_buffer(&c, &length,
1002                                 &file->normalize_state,
1003                                 curp, file_baton->options);
1004      if (file_token->length == 0)
1005        {
1006          /* When we are reading the first part of the token, move the
1007             normalized offset past leading ignored characters, if any. */
1008          file_token->norm_offset += (c - curp);
1009        }
1010
1011      file_token->length += length;
1012
1013      *hash = svn__adler32(h, c, length);
1014      *token = file_token;
1015    }
1016
1017  return SVN_NO_ERROR;
1018}
1019
1020#define COMPARE_CHUNK_SIZE 4096
1021
1022/* Implements svn_diff_fns2_t::token_compare */
1023static svn_error_t *
1024token_compare(void *baton, void *token1, void *token2, int *compare)
1025{
1026  svn_diff__file_baton_t *file_baton = baton;
1027  svn_diff__file_token_t *file_token[2];
1028  char buffer[2][COMPARE_CHUNK_SIZE];
1029  char *bufp[2];
1030  apr_off_t offset[2];
1031  struct file_info *file[2];
1032  apr_off_t length[2];
1033  apr_off_t total_length;
1034  /* How much is left to read of each token from the file. */
1035  apr_off_t raw_length[2];
1036  int i;
1037  svn_diff__normalize_state_t state[2];
1038
1039  file_token[0] = token1;
1040  file_token[1] = token2;
1041  if (file_token[0]->length < file_token[1]->length)
1042    {
1043      *compare = -1;
1044      return SVN_NO_ERROR;
1045    }
1046
1047  if (file_token[0]->length > file_token[1]->length)
1048    {
1049      *compare = 1;
1050      return SVN_NO_ERROR;
1051    }
1052
1053  total_length = file_token[0]->length;
1054  if (total_length == 0)
1055    {
1056      *compare = 0;
1057      return SVN_NO_ERROR;
1058    }
1059
1060  for (i = 0; i < 2; ++i)
1061    {
1062      int idx = datasource_to_index(file_token[i]->datasource);
1063
1064      file[i] = &file_baton->files[idx];
1065      offset[i] = file_token[i]->norm_offset;
1066      state[i] = svn_diff__normalize_state_normal;
1067
1068      if (offset_to_chunk(offset[i]) == file[i]->chunk)
1069        {
1070          /* If the start of the token is in memory, the entire token is
1071           * in memory.
1072           */
1073          bufp[i] = file[i]->buffer;
1074          bufp[i] += offset_in_chunk(offset[i]);
1075
1076          length[i] = total_length;
1077          raw_length[i] = 0;
1078        }
1079      else
1080        {
1081          apr_off_t skipped;
1082
1083          length[i] = 0;
1084
1085          /* When we skipped the first part of the token via the whitespace
1086             normalization we must reduce the raw length of the token */
1087          skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1088
1089          raw_length[i] = file_token[i]->raw_length - skipped;
1090        }
1091    }
1092
1093  do
1094    {
1095      apr_off_t len;
1096      for (i = 0; i < 2; i++)
1097        {
1098          if (length[i] == 0)
1099            {
1100              /* Error if raw_length is 0, that's an unexpected change
1101               * of the file that can happen when ingoring whitespace
1102               * and that can lead to an infinite loop. */
1103              if (raw_length[i] == 0)
1104                return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1105                                         NULL,
1106                                         _("The file '%s' changed unexpectedly"
1107                                           " during diff"),
1108                                         file[i]->path);
1109
1110              /* Read a chunk from disk into a buffer */
1111              bufp[i] = buffer[i];
1112              length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1113                COMPARE_CHUNK_SIZE : raw_length[i];
1114
1115              SVN_ERR(read_chunk(file[i]->file,
1116                                 file[i]->path,
1117                                 bufp[i], length[i], offset[i],
1118                                 file_baton->pool));
1119              offset[i] += length[i];
1120              raw_length[i] -= length[i];
1121              /* bufp[i] gets reset to buffer[i] before reading each chunk,
1122                 so, overwriting it isn't a problem */
1123              svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1124                                         bufp[i], file_baton->options);
1125
1126              /* assert(length[i] == file_token[i]->length); */
1127            }
1128        }
1129
1130      len = length[0] > length[1] ? length[1] : length[0];
1131
1132      /* Compare two chunks (that could be entire tokens if they both reside
1133       * in memory).
1134       */
1135      *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1136      if (*compare != 0)
1137        return SVN_NO_ERROR;
1138
1139      total_length -= len;
1140      length[0] -= len;
1141      length[1] -= len;
1142      bufp[0] += len;
1143      bufp[1] += len;
1144    }
1145  while(total_length > 0);
1146
1147  *compare = 0;
1148  return SVN_NO_ERROR;
1149}
1150
1151
1152/* Implements svn_diff_fns2_t::token_discard */
1153static void
1154token_discard(void *baton, void *token)
1155{
1156  svn_diff__file_baton_t *file_baton = baton;
1157  svn_diff__file_token_t *file_token = token;
1158
1159  /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1160  file_token->next = file_baton->tokens;
1161  file_baton->tokens = file_token;
1162}
1163
1164
1165/* Implements svn_diff_fns2_t::token_discard_all */
1166static void
1167token_discard_all(void *baton)
1168{
1169  svn_diff__file_baton_t *file_baton = baton;
1170
1171  /* Discard all memory in use by the tokens, and close all open files. */
1172  svn_pool_clear(file_baton->pool);
1173}
1174
1175
1176static const svn_diff_fns2_t svn_diff__file_vtable =
1177{
1178  datasources_open,
1179  datasource_close,
1180  datasource_get_next_token,
1181  token_compare,
1182  token_discard,
1183  token_discard_all
1184};
1185
1186/* Id for the --ignore-eol-style option, which doesn't have a short name. */
1187#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1188
1189/* Options supported by svn_diff_file_options_parse(). */
1190static const apr_getopt_option_t diff_options[] =
1191{
1192  { "ignore-space-change", 'b', 0, NULL },
1193  { "ignore-all-space", 'w', 0, NULL },
1194  { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1195  { "show-c-function", 'p', 0, NULL },
1196  /* ### For compatibility; we don't support the argument to -u, because
1197   * ### we don't have optional argument support. */
1198  { "unified", 'u', 0, NULL },
1199  { NULL, 0, 0, NULL }
1200};
1201
1202svn_diff_file_options_t *
1203svn_diff_file_options_create(apr_pool_t *pool)
1204{
1205  return apr_pcalloc(pool, sizeof(svn_diff_file_options_t));
1206}
1207
1208/* A baton for use with opt_parsing_error_func(). */
1209struct opt_parsing_error_baton_t
1210{
1211  svn_error_t *err;
1212  apr_pool_t *pool;
1213};
1214
1215/* Store an error message from apr_getopt_long().  Set BATON->err to a new
1216 * error with a message generated from FMT and the remaining arguments.
1217 * Implements apr_getopt_err_fn_t. */
1218static void
1219opt_parsing_error_func(void *baton,
1220                       const char *fmt, ...)
1221{
1222  struct opt_parsing_error_baton_t *b = baton;
1223  const char *message;
1224  va_list ap;
1225
1226  va_start(ap, fmt);
1227  message = apr_pvsprintf(b->pool, fmt, ap);
1228  va_end(ap);
1229
1230  /* Skip leading ": " (if present, which it always is in known cases). */
1231  if (strncmp(message, ": ", 2) == 0)
1232    message += 2;
1233
1234  b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1235}
1236
1237svn_error_t *
1238svn_diff_file_options_parse(svn_diff_file_options_t *options,
1239                            const apr_array_header_t *args,
1240                            apr_pool_t *pool)
1241{
1242  apr_getopt_t *os;
1243  struct opt_parsing_error_baton_t opt_parsing_error_baton;
1244  /* Make room for each option (starting at index 1) plus trailing NULL. */
1245  const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1246
1247  opt_parsing_error_baton.err = NULL;
1248  opt_parsing_error_baton.pool = pool;
1249
1250  argv[0] = "";
1251  memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts);
1252  argv[args->nelts + 1] = NULL;
1253
1254  apr_getopt_init(&os, pool, args->nelts + 1, argv);
1255
1256  /* Capture any error message from apr_getopt_long().  This will typically
1257   * say which option is wrong, which we would not otherwise know. */
1258  os->errfn = opt_parsing_error_func;
1259  os->errarg = &opt_parsing_error_baton;
1260
1261  while (1)
1262    {
1263      const char *opt_arg;
1264      int opt_id;
1265      apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1266
1267      if (APR_STATUS_IS_EOF(err))
1268        break;
1269      if (err)
1270        /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1271         * it always will produce one, but never mind if it doesn't.  Avoid
1272         * using the message associated with the return code ERR, because
1273         * it refers to the "command line" which may be misleading here. */
1274        return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1275                                opt_parsing_error_baton.err,
1276                                _("Error in options to internal diff"));
1277
1278      switch (opt_id)
1279        {
1280        case 'b':
1281          /* -w takes precedence over -b. */
1282          if (! options->ignore_space)
1283            options->ignore_space = svn_diff_file_ignore_space_change;
1284          break;
1285        case 'w':
1286          options->ignore_space = svn_diff_file_ignore_space_all;
1287          break;
1288        case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1289          options->ignore_eol_style = TRUE;
1290          break;
1291        case 'p':
1292          options->show_c_function = TRUE;
1293          break;
1294        default:
1295          break;
1296        }
1297    }
1298
1299  /* Check for spurious arguments. */
1300  if (os->ind < os->argc)
1301    return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1302                             _("Invalid argument '%s' in diff options"),
1303                             os->argv[os->ind]);
1304
1305  return SVN_NO_ERROR;
1306}
1307
1308svn_error_t *
1309svn_diff_file_diff_2(svn_diff_t **diff,
1310                     const char *original,
1311                     const char *modified,
1312                     const svn_diff_file_options_t *options,
1313                     apr_pool_t *pool)
1314{
1315  svn_diff__file_baton_t baton = { 0 };
1316
1317  baton.options = options;
1318  baton.files[0].path = original;
1319  baton.files[1].path = modified;
1320  baton.pool = svn_pool_create(pool);
1321
1322  SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1323
1324  svn_pool_destroy(baton.pool);
1325  return SVN_NO_ERROR;
1326}
1327
1328svn_error_t *
1329svn_diff_file_diff3_2(svn_diff_t **diff,
1330                      const char *original,
1331                      const char *modified,
1332                      const char *latest,
1333                      const svn_diff_file_options_t *options,
1334                      apr_pool_t *pool)
1335{
1336  svn_diff__file_baton_t baton = { 0 };
1337
1338  baton.options = options;
1339  baton.files[0].path = original;
1340  baton.files[1].path = modified;
1341  baton.files[2].path = latest;
1342  baton.pool = svn_pool_create(pool);
1343
1344  SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1345
1346  svn_pool_destroy(baton.pool);
1347  return SVN_NO_ERROR;
1348}
1349
1350svn_error_t *
1351svn_diff_file_diff4_2(svn_diff_t **diff,
1352                      const char *original,
1353                      const char *modified,
1354                      const char *latest,
1355                      const char *ancestor,
1356                      const svn_diff_file_options_t *options,
1357                      apr_pool_t *pool)
1358{
1359  svn_diff__file_baton_t baton = { 0 };
1360
1361  baton.options = options;
1362  baton.files[0].path = original;
1363  baton.files[1].path = modified;
1364  baton.files[2].path = latest;
1365  baton.files[3].path = ancestor;
1366  baton.pool = svn_pool_create(pool);
1367
1368  SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1369
1370  svn_pool_destroy(baton.pool);
1371  return SVN_NO_ERROR;
1372}
1373
1374
1375/** Display unified context diffs **/
1376
1377/* Maximum length of the extra context to show when show_c_function is set.
1378 * GNU diff uses 40, let's be brave and use 50 instead. */
1379#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1380typedef struct svn_diff__file_output_baton_t
1381{
1382  svn_stream_t *output_stream;
1383  const char *header_encoding;
1384
1385  /* Cached markers, in header_encoding. */
1386  const char *context_str;
1387  const char *delete_str;
1388  const char *insert_str;
1389
1390  const char *path[2];
1391  apr_file_t *file[2];
1392
1393  apr_off_t   current_line[2];
1394
1395  char        buffer[2][4096];
1396  apr_size_t  length[2];
1397  char       *curp[2];
1398
1399  apr_off_t   hunk_start[2];
1400  apr_off_t   hunk_length[2];
1401  svn_stringbuf_t *hunk;
1402
1403  /* Should we emit C functions in the unified diff header */
1404  svn_boolean_t show_c_function;
1405  /* Extra strings to skip over if we match. */
1406  apr_array_header_t *extra_skip_match;
1407  /* "Context" to append to the @@ line when the show_c_function option
1408   * is set. */
1409  svn_stringbuf_t *extra_context;
1410  /* Extra context for the current hunk. */
1411  char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1412
1413  apr_pool_t *pool;
1414} svn_diff__file_output_baton_t;
1415
1416typedef enum svn_diff__file_output_unified_type_e
1417{
1418  svn_diff__file_output_unified_skip,
1419  svn_diff__file_output_unified_context,
1420  svn_diff__file_output_unified_delete,
1421  svn_diff__file_output_unified_insert
1422} svn_diff__file_output_unified_type_e;
1423
1424
1425static svn_error_t *
1426output_unified_line(svn_diff__file_output_baton_t *baton,
1427                    svn_diff__file_output_unified_type_e type, int idx)
1428{
1429  char *curp;
1430  char *eol;
1431  apr_size_t length;
1432  svn_error_t *err;
1433  svn_boolean_t bytes_processed = FALSE;
1434  svn_boolean_t had_cr = FALSE;
1435  /* Are we collecting extra context? */
1436  svn_boolean_t collect_extra = FALSE;
1437
1438  length = baton->length[idx];
1439  curp = baton->curp[idx];
1440
1441  /* Lazily update the current line even if we're at EOF.
1442   * This way we fake output of context at EOF
1443   */
1444  baton->current_line[idx]++;
1445
1446  if (length == 0 && apr_file_eof(baton->file[idx]))
1447    {
1448      return SVN_NO_ERROR;
1449    }
1450
1451  do
1452    {
1453      if (length > 0)
1454        {
1455          if (!bytes_processed)
1456            {
1457              switch (type)
1458                {
1459                case svn_diff__file_output_unified_context:
1460                  svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1461                  baton->hunk_length[0]++;
1462                  baton->hunk_length[1]++;
1463                  break;
1464                case svn_diff__file_output_unified_delete:
1465                  svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1466                  baton->hunk_length[0]++;
1467                  break;
1468                case svn_diff__file_output_unified_insert:
1469                  svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1470                  baton->hunk_length[1]++;
1471                  break;
1472                default:
1473                  break;
1474                }
1475
1476              if (baton->show_c_function
1477                  && (type == svn_diff__file_output_unified_skip
1478                      || type == svn_diff__file_output_unified_context)
1479                  && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1480                  && !svn_cstring_match_glob_list(curp,
1481                                                  baton->extra_skip_match))
1482                {
1483                  svn_stringbuf_setempty(baton->extra_context);
1484                  collect_extra = TRUE;
1485                }
1486            }
1487
1488          eol = svn_eol__find_eol_start(curp, length);
1489
1490          if (eol != NULL)
1491            {
1492              apr_size_t len;
1493
1494              had_cr = (*eol == '\r');
1495              eol++;
1496              len = (apr_size_t)(eol - curp);
1497
1498              if (! had_cr || len < length)
1499                {
1500                  if (had_cr && *eol == '\n')
1501                    {
1502                      ++eol;
1503                      ++len;
1504                    }
1505
1506                  length -= len;
1507
1508                  if (type != svn_diff__file_output_unified_skip)
1509                    {
1510                      svn_stringbuf_appendbytes(baton->hunk, curp, len);
1511                    }
1512                  if (collect_extra)
1513                    {
1514                      svn_stringbuf_appendbytes(baton->extra_context,
1515                                                curp, len);
1516                    }
1517
1518                  baton->curp[idx] = eol;
1519                  baton->length[idx] = length;
1520
1521                  err = SVN_NO_ERROR;
1522
1523                  break;
1524                }
1525            }
1526
1527          if (type != svn_diff__file_output_unified_skip)
1528            {
1529              svn_stringbuf_appendbytes(baton->hunk, curp, length);
1530            }
1531
1532          if (collect_extra)
1533            {
1534              svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1535            }
1536
1537          bytes_processed = TRUE;
1538        }
1539
1540      curp = baton->buffer[idx];
1541      length = sizeof(baton->buffer[idx]);
1542
1543      err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1544
1545      /* If the last chunk ended with a CR, we look for an LF at the start
1546         of this chunk. */
1547      if (had_cr)
1548        {
1549          if (! err && length > 0 && *curp == '\n')
1550            {
1551              if (type != svn_diff__file_output_unified_skip)
1552                {
1553                  svn_stringbuf_appendbyte(baton->hunk, *curp);
1554                }
1555              /* We don't append the LF to extra_context, since it would
1556               * just be stripped anyway. */
1557              ++curp;
1558              --length;
1559            }
1560
1561          baton->curp[idx] = curp;
1562          baton->length[idx] = length;
1563
1564          break;
1565        }
1566    }
1567  while (! err);
1568
1569  if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1570    return err;
1571
1572  if (err && APR_STATUS_IS_EOF(err->apr_err))
1573    {
1574      svn_error_clear(err);
1575      /* Special case if we reach the end of file AND the last line is in the
1576         changed range AND the file doesn't end with a newline */
1577      if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1578          && ! had_cr)
1579        {
1580          SVN_ERR(svn_diff__unified_append_no_newline_msg(
1581                    baton->hunk, baton->header_encoding, baton->pool));
1582        }
1583
1584      baton->length[idx] = 0;
1585    }
1586
1587  return SVN_NO_ERROR;
1588}
1589
1590static APR_INLINE svn_error_t *
1591output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1592                          int source,
1593                          svn_diff__file_output_unified_type_e type,
1594                          apr_off_t until)
1595{
1596  while (output_baton->current_line[source] < until)
1597    {
1598      SVN_ERR(output_unified_line(output_baton, type, source));
1599    }
1600  return SVN_NO_ERROR;
1601}
1602
1603static svn_error_t *
1604output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1605{
1606  apr_off_t target_line;
1607  apr_size_t hunk_len;
1608  apr_off_t old_start;
1609  apr_off_t new_start;
1610
1611  if (svn_stringbuf_isempty(baton->hunk))
1612    {
1613      /* Nothing to flush */
1614      return SVN_NO_ERROR;
1615    }
1616
1617  target_line = baton->hunk_start[0] + baton->hunk_length[0]
1618                + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1619
1620  /* Add trailing context to the hunk */
1621  SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1622                                    svn_diff__file_output_unified_context,
1623                                    target_line));
1624
1625  old_start = baton->hunk_start[0];
1626  new_start = baton->hunk_start[1];
1627
1628  /* If the file is non-empty, convert the line indexes from
1629     zero based to one based */
1630  if (baton->hunk_length[0])
1631    old_start++;
1632  if (baton->hunk_length[1])
1633    new_start++;
1634
1635  /* Write the hunk header */
1636  SVN_ERR(svn_diff__unified_write_hunk_header(
1637            baton->output_stream, baton->header_encoding, "@@",
1638            old_start, baton->hunk_length[0],
1639            new_start, baton->hunk_length[1],
1640            baton->hunk_extra_context,
1641            baton->pool));
1642
1643  /* Output the hunk content */
1644  hunk_len = baton->hunk->len;
1645  SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1646                           &hunk_len));
1647
1648  /* Prepare for the next hunk */
1649  baton->hunk_length[0] = 0;
1650  baton->hunk_length[1] = 0;
1651  baton->hunk_start[0] = 0;
1652  baton->hunk_start[1] = 0;
1653  svn_stringbuf_setempty(baton->hunk);
1654
1655  return SVN_NO_ERROR;
1656}
1657
1658static svn_error_t *
1659output_unified_diff_modified(void *baton,
1660  apr_off_t original_start, apr_off_t original_length,
1661  apr_off_t modified_start, apr_off_t modified_length,
1662  apr_off_t latest_start, apr_off_t latest_length)
1663{
1664  svn_diff__file_output_baton_t *output_baton = baton;
1665  apr_off_t context_prefix_length;
1666  apr_off_t prev_context_end;
1667  svn_boolean_t init_hunk = FALSE;
1668
1669  if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
1670    context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1671  else
1672    context_prefix_length = original_start;
1673
1674  /* Calculate where the previous hunk will end if we would write it now
1675     (including the necessary context at the end) */
1676  if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1677    {
1678      prev_context_end = output_baton->hunk_start[0]
1679                         + output_baton->hunk_length[0]
1680                         + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1681    }
1682  else
1683    {
1684      prev_context_end = -1;
1685
1686      if (output_baton->hunk_start[0] == 0
1687          && (original_length > 0 || modified_length > 0))
1688        init_hunk = TRUE;
1689    }
1690
1691  /* If the changed range is far enough from the previous range, flush the current
1692     hunk. */
1693  {
1694    apr_off_t new_hunk_start = (original_start - context_prefix_length);
1695
1696    if (output_baton->current_line[0] < new_hunk_start
1697          && prev_context_end <= new_hunk_start)
1698      {
1699        SVN_ERR(output_unified_flush_hunk(output_baton));
1700        init_hunk = TRUE;
1701      }
1702    else if (output_baton->hunk_length[0] > 0
1703             || output_baton->hunk_length[1] > 0)
1704      {
1705        /* We extend the current hunk */
1706
1707
1708        /* Original: Output the context preceding the changed range */
1709        SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1710                                          svn_diff__file_output_unified_context,
1711                                          original_start));
1712      }
1713  }
1714
1715  /* Original: Skip lines until we are at the beginning of the context we want
1716     to display */
1717  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1718                                    svn_diff__file_output_unified_skip,
1719                                    original_start - context_prefix_length));
1720
1721  /* Note that the above skip stores data for the show_c_function support below */
1722
1723  if (init_hunk)
1724    {
1725      SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1726                     && output_baton->hunk_length[1] == 0);
1727
1728      output_baton->hunk_start[0] = original_start - context_prefix_length;
1729      output_baton->hunk_start[1] = modified_start - context_prefix_length;
1730    }
1731
1732  if (init_hunk && output_baton->show_c_function)
1733    {
1734      apr_size_t p;
1735      const char *invalid_character;
1736
1737      /* Save the extra context for later use.
1738       * Note that the last byte of the hunk_extra_context array is never
1739       * touched after it is zero-initialized, so the array is always
1740       * 0-terminated. */
1741      strncpy(output_baton->hunk_extra_context,
1742              output_baton->extra_context->data,
1743              SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1744      /* Trim whitespace at the end, most notably to get rid of any
1745       * newline characters. */
1746      p = strlen(output_baton->hunk_extra_context);
1747      while (p > 0
1748             && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1749        {
1750          output_baton->hunk_extra_context[--p] = '\0';
1751        }
1752      invalid_character =
1753        svn_utf__last_valid(output_baton->hunk_extra_context,
1754                            SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1755      for (p = invalid_character - output_baton->hunk_extra_context;
1756           p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1757        {
1758          output_baton->hunk_extra_context[p] = '\0';
1759        }
1760    }
1761
1762  /* Modified: Skip lines until we are at the start of the changed range */
1763  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1764                                    svn_diff__file_output_unified_skip,
1765                                    modified_start));
1766
1767  /* Original: Output the context preceding the changed range */
1768  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1769                                    svn_diff__file_output_unified_context,
1770                                    original_start));
1771
1772  /* Both: Output the changed range */
1773  SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1774                                    svn_diff__file_output_unified_delete,
1775                                    original_start + original_length));
1776  SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1777                                    svn_diff__file_output_unified_insert,
1778                                    modified_start + modified_length));
1779
1780  return SVN_NO_ERROR;
1781}
1782
1783/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1784static svn_error_t *
1785output_unified_default_hdr(const char **header, const char *path,
1786                           apr_pool_t *pool)
1787{
1788  apr_finfo_t file_info;
1789  apr_time_exp_t exploded_time;
1790  char time_buffer[64];
1791  apr_size_t time_len;
1792  const char *utf8_timestr;
1793
1794  SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1795  apr_time_exp_lt(&exploded_time, file_info.mtime);
1796
1797  apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1798  /* Order of date components can be different in different languages */
1799               _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1800
1801  SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1802
1803  *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1804
1805  return SVN_NO_ERROR;
1806}
1807
1808static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1809{
1810  NULL, /* output_common */
1811  output_unified_diff_modified,
1812  NULL, /* output_diff_latest */
1813  NULL, /* output_diff_common */
1814  NULL  /* output_conflict */
1815};
1816
1817svn_error_t *
1818svn_diff_file_output_unified3(svn_stream_t *output_stream,
1819                              svn_diff_t *diff,
1820                              const char *original_path,
1821                              const char *modified_path,
1822                              const char *original_header,
1823                              const char *modified_header,
1824                              const char *header_encoding,
1825                              const char *relative_to_dir,
1826                              svn_boolean_t show_c_function,
1827                              apr_pool_t *pool)
1828{
1829  if (svn_diff_contains_diffs(diff))
1830    {
1831      svn_diff__file_output_baton_t baton;
1832      int i;
1833
1834      memset(&baton, 0, sizeof(baton));
1835      baton.output_stream = output_stream;
1836      baton.pool = pool;
1837      baton.header_encoding = header_encoding;
1838      baton.path[0] = original_path;
1839      baton.path[1] = modified_path;
1840      baton.hunk = svn_stringbuf_create_empty(pool);
1841      baton.show_c_function = show_c_function;
1842      baton.extra_context = svn_stringbuf_create_empty(pool);
1843
1844      if (show_c_function)
1845        {
1846          baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1847
1848          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1849          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1850          APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1851        }
1852
1853      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1854                                            header_encoding, pool));
1855      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1856                                            header_encoding, pool));
1857      SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1858                                            header_encoding, pool));
1859
1860      if (relative_to_dir)
1861        {
1862          /* Possibly adjust the "original" and "modified" paths shown in
1863             the output (see issue #2723). */
1864          const char *child_path;
1865
1866          if (! original_header)
1867            {
1868              child_path = svn_dirent_is_child(relative_to_dir,
1869                                               original_path, pool);
1870              if (child_path)
1871                original_path = child_path;
1872              else
1873                return svn_error_createf(
1874                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1875                                   _("Path '%s' must be inside "
1876                                     "the directory '%s'"),
1877                                   svn_dirent_local_style(original_path, pool),
1878                                   svn_dirent_local_style(relative_to_dir,
1879                                                          pool));
1880            }
1881
1882          if (! modified_header)
1883            {
1884              child_path = svn_dirent_is_child(relative_to_dir,
1885                                               modified_path, pool);
1886              if (child_path)
1887                modified_path = child_path;
1888              else
1889                return svn_error_createf(
1890                                   SVN_ERR_BAD_RELATIVE_PATH, NULL,
1891                                   _("Path '%s' must be inside "
1892                                     "the directory '%s'"),
1893                                   svn_dirent_local_style(modified_path, pool),
1894                                   svn_dirent_local_style(relative_to_dir,
1895                                                          pool));
1896            }
1897        }
1898
1899      for (i = 0; i < 2; i++)
1900        {
1901          SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1902                                   APR_READ, APR_OS_DEFAULT, pool));
1903        }
1904
1905      if (original_header == NULL)
1906        {
1907          SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1908                                             pool));
1909        }
1910
1911      if (modified_header == NULL)
1912        {
1913          SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1914                                             pool));
1915        }
1916
1917      SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1918                                             original_header, modified_header,
1919                                             pool));
1920
1921      SVN_ERR(svn_diff_output(diff, &baton,
1922                              &svn_diff__file_output_unified_vtable));
1923      SVN_ERR(output_unified_flush_hunk(&baton));
1924
1925      for (i = 0; i < 2; i++)
1926        {
1927          SVN_ERR(svn_io_file_close(baton.file[i], pool));
1928        }
1929    }
1930
1931  return SVN_NO_ERROR;
1932}
1933
1934
1935/** Display diff3 **/
1936
1937/* A stream to remember *leading* context.  Note that this stream does
1938   *not* copy the data that it is remembering; it just saves
1939   *pointers! */
1940typedef struct context_saver_t {
1941  svn_stream_t *stream;
1942  const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1943  apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1944  apr_size_t next_slot;
1945  apr_size_t total_written;
1946} context_saver_t;
1947
1948
1949static svn_error_t *
1950context_saver_stream_write(void *baton,
1951                           const char *data,
1952                           apr_size_t *len)
1953{
1954  context_saver_t *cs = baton;
1955  cs->data[cs->next_slot] = data;
1956  cs->len[cs->next_slot] = *len;
1957  cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1958  cs->total_written++;
1959  return SVN_NO_ERROR;
1960}
1961
1962typedef struct svn_diff3__file_output_baton_t
1963{
1964  svn_stream_t *output_stream;
1965
1966  const char *path[3];
1967
1968  apr_off_t   current_line[3];
1969
1970  char       *buffer[3];
1971  char       *endp[3];
1972  char       *curp[3];
1973
1974  /* The following four members are in the encoding used for the output. */
1975  const char *conflict_modified;
1976  const char *conflict_original;
1977  const char *conflict_separator;
1978  const char *conflict_latest;
1979
1980  const char *marker_eol;
1981
1982  svn_diff_conflict_display_style_t conflict_style;
1983
1984  /* The rest of the fields are for
1985     svn_diff_conflict_display_only_conflicts only.  Note that for
1986     these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
1987     (soon after a conflict) a "trailing context stream", never the
1988     actual output stream.*/
1989  /* The actual output stream. */
1990  svn_stream_t *real_output_stream;
1991  context_saver_t *context_saver;
1992  /* Used to allocate context_saver and trailing context streams, and
1993     for some printfs. */
1994  apr_pool_t *pool;
1995} svn_diff3__file_output_baton_t;
1996
1997static svn_error_t *
1998flush_context_saver(context_saver_t *cs,
1999                    svn_stream_t *output_stream)
2000{
2001  int i;
2002  for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++)
2003    {
2004      apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2005      if (cs->data[slot])
2006        {
2007          apr_size_t len = cs->len[slot];
2008          SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2009        }
2010    }
2011  return SVN_NO_ERROR;
2012}
2013
2014static void
2015make_context_saver(svn_diff3__file_output_baton_t *fob)
2016{
2017  context_saver_t *cs;
2018
2019  svn_pool_clear(fob->pool);
2020  cs = apr_pcalloc(fob->pool, sizeof(*cs));
2021  cs->stream = svn_stream_empty(fob->pool);
2022  svn_stream_set_baton(cs->stream, cs);
2023  svn_stream_set_write(cs->stream, context_saver_stream_write);
2024  fob->context_saver = cs;
2025  fob->output_stream = cs->stream;
2026}
2027
2028
2029/* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to
2030   BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2031   a context_saver; used for *trailing* context. */
2032
2033struct trailing_context_printer {
2034  apr_size_t lines_to_print;
2035  svn_diff3__file_output_baton_t *fob;
2036};
2037
2038
2039
2040static svn_error_t *
2041trailing_context_printer_write(void *baton,
2042                               const char *data,
2043                               apr_size_t *len)
2044{
2045  struct trailing_context_printer *tcp = baton;
2046  SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2047  SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2048  tcp->lines_to_print--;
2049  if (tcp->lines_to_print == 0)
2050    make_context_saver(tcp->fob);
2051  return SVN_NO_ERROR;
2052}
2053
2054
2055static void
2056make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2057{
2058  struct trailing_context_printer *tcp;
2059  svn_stream_t *s;
2060
2061  svn_pool_clear(btn->pool);
2062
2063  tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2064  tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2065  tcp->fob = btn;
2066  s = svn_stream_empty(btn->pool);
2067  svn_stream_set_baton(s, tcp);
2068  svn_stream_set_write(s, trailing_context_printer_write);
2069  btn->output_stream = s;
2070}
2071
2072
2073
2074typedef enum svn_diff3__file_output_type_e
2075{
2076  svn_diff3__file_output_skip,
2077  svn_diff3__file_output_normal
2078} svn_diff3__file_output_type_e;
2079
2080
2081static svn_error_t *
2082output_line(svn_diff3__file_output_baton_t *baton,
2083            svn_diff3__file_output_type_e type, int idx)
2084{
2085  char *curp;
2086  char *endp;
2087  char *eol;
2088  apr_size_t len;
2089
2090  curp = baton->curp[idx];
2091  endp = baton->endp[idx];
2092
2093  /* Lazily update the current line even if we're at EOF.
2094   */
2095  baton->current_line[idx]++;
2096
2097  if (curp == endp)
2098    return SVN_NO_ERROR;
2099
2100  eol = svn_eol__find_eol_start(curp, endp - curp);
2101  if (!eol)
2102    eol = endp;
2103  else
2104    {
2105      svn_boolean_t had_cr = (*eol == '\r');
2106      eol++;
2107      if (had_cr && eol != endp && *eol == '\n')
2108        eol++;
2109    }
2110
2111  if (type != svn_diff3__file_output_skip)
2112    {
2113      len = eol - curp;
2114      /* Note that the trailing context printer assumes that
2115         svn_stream_write is called exactly once per line. */
2116      SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2117    }
2118
2119  baton->curp[idx] = eol;
2120
2121  return SVN_NO_ERROR;
2122}
2123
2124static svn_error_t *
2125output_marker_eol(svn_diff3__file_output_baton_t *btn)
2126{
2127  return svn_stream_puts(btn->output_stream, btn->marker_eol);
2128}
2129
2130static svn_error_t *
2131output_hunk(void *baton, int idx, apr_off_t target_line,
2132            apr_off_t target_length)
2133{
2134  svn_diff3__file_output_baton_t *output_baton = baton;
2135
2136  /* Skip lines until we are at the start of the changed range */
2137  while (output_baton->current_line[idx] < target_line)
2138    {
2139      SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2140    }
2141
2142  target_line += target_length;
2143
2144  while (output_baton->current_line[idx] < target_line)
2145    {
2146      SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2147    }
2148
2149  return SVN_NO_ERROR;
2150}
2151
2152static svn_error_t *
2153output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2154              apr_off_t modified_start, apr_off_t modified_length,
2155              apr_off_t latest_start, apr_off_t latest_length)
2156{
2157  return output_hunk(baton, 1, modified_start, modified_length);
2158}
2159
2160static svn_error_t *
2161output_diff_modified(void *baton,
2162                     apr_off_t original_start, apr_off_t original_length,
2163                     apr_off_t modified_start, apr_off_t modified_length,
2164                     apr_off_t latest_start, apr_off_t latest_length)
2165{
2166  return output_hunk(baton, 1, modified_start, modified_length);
2167}
2168
2169static svn_error_t *
2170output_diff_latest(void *baton,
2171                   apr_off_t original_start, apr_off_t original_length,
2172                   apr_off_t modified_start, apr_off_t modified_length,
2173                   apr_off_t latest_start, apr_off_t latest_length)
2174{
2175  return output_hunk(baton, 2, latest_start, latest_length);
2176}
2177
2178static svn_error_t *
2179output_conflict(void *baton,
2180                apr_off_t original_start, apr_off_t original_length,
2181                apr_off_t modified_start, apr_off_t modified_length,
2182                apr_off_t latest_start, apr_off_t latest_length,
2183                svn_diff_t *diff);
2184
2185static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2186{
2187  output_common,
2188  output_diff_modified,
2189  output_diff_latest,
2190  output_diff_modified, /* output_diff_common */
2191  output_conflict
2192};
2193
2194
2195
2196static svn_error_t *
2197output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2198                             apr_off_t original_start,
2199                             apr_off_t original_length,
2200                             apr_off_t modified_start,
2201                             apr_off_t modified_length,
2202                             apr_off_t latest_start,
2203                             apr_off_t latest_length)
2204{
2205  /* Are we currently saving starting context (as opposed to printing
2206     trailing context)?  If so, flush it. */
2207  if (btn->output_stream == btn->context_saver->stream)
2208    {
2209      if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
2210        SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2211      SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2212    }
2213
2214  /* Print to the real output stream. */
2215  btn->output_stream = btn->real_output_stream;
2216
2217  /* Output the conflict itself. */
2218  SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2219                            (modified_length == 1
2220                             ? "%s (%" APR_OFF_T_FMT ")"
2221                             : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2222                            btn->conflict_modified,
2223                            modified_start + 1, modified_length));
2224  SVN_ERR(output_marker_eol(btn));
2225  SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2226
2227  SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2228                            (original_length == 1
2229                             ? "%s (%" APR_OFF_T_FMT ")"
2230                             : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2231                            btn->conflict_original,
2232                            original_start + 1, original_length));
2233  SVN_ERR(output_marker_eol(btn));
2234  SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2235
2236  SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2237                            "%s%s", btn->conflict_separator, btn->marker_eol));
2238  SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2239  SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2240                            (latest_length == 1
2241                             ? "%s (%" APR_OFF_T_FMT ")"
2242                             : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2243                            btn->conflict_latest,
2244                            latest_start + 1, latest_length));
2245  SVN_ERR(output_marker_eol(btn));
2246
2247  /* Go into print-trailing-context mode instead. */
2248  make_trailing_context_printer(btn);
2249
2250  return SVN_NO_ERROR;
2251}
2252
2253
2254static svn_error_t *
2255output_conflict(void *baton,
2256                apr_off_t original_start, apr_off_t original_length,
2257                apr_off_t modified_start, apr_off_t modified_length,
2258                apr_off_t latest_start, apr_off_t latest_length,
2259                svn_diff_t *diff)
2260{
2261  svn_diff3__file_output_baton_t *file_baton = baton;
2262
2263  svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2264
2265  if (style == svn_diff_conflict_display_only_conflicts)
2266    return output_conflict_with_context(file_baton,
2267                                        original_start, original_length,
2268                                        modified_start, modified_length,
2269                                        latest_start, latest_length);
2270
2271  if (style == svn_diff_conflict_display_resolved_modified_latest)
2272    {
2273      if (diff)
2274        return svn_diff_output(diff, baton,
2275                               &svn_diff3__file_output_vtable);
2276      else
2277        style = svn_diff_conflict_display_modified_latest;
2278    }
2279
2280  if (style == svn_diff_conflict_display_modified_latest ||
2281      style == svn_diff_conflict_display_modified_original_latest)
2282    {
2283      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2284                               file_baton->conflict_modified));
2285      SVN_ERR(output_marker_eol(file_baton));
2286
2287      SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2288
2289      if (style == svn_diff_conflict_display_modified_original_latest)
2290        {
2291          SVN_ERR(svn_stream_puts(file_baton->output_stream,
2292                                   file_baton->conflict_original));
2293          SVN_ERR(output_marker_eol(file_baton));
2294          SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2295        }
2296
2297      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2298                              file_baton->conflict_separator));
2299      SVN_ERR(output_marker_eol(file_baton));
2300
2301      SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2302
2303      SVN_ERR(svn_stream_puts(file_baton->output_stream,
2304                              file_baton->conflict_latest));
2305      SVN_ERR(output_marker_eol(file_baton));
2306    }
2307  else if (style == svn_diff_conflict_display_modified)
2308    SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2309  else if (style == svn_diff_conflict_display_latest)
2310    SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2311  else /* unknown style */
2312    SVN_ERR_MALFUNCTION();
2313
2314  return SVN_NO_ERROR;
2315}
2316
2317svn_error_t *
2318svn_diff_file_output_merge2(svn_stream_t *output_stream,
2319                            svn_diff_t *diff,
2320                            const char *original_path,
2321                            const char *modified_path,
2322                            const char *latest_path,
2323                            const char *conflict_original,
2324                            const char *conflict_modified,
2325                            const char *conflict_latest,
2326                            const char *conflict_separator,
2327                            svn_diff_conflict_display_style_t style,
2328                            apr_pool_t *pool)
2329{
2330  svn_diff3__file_output_baton_t baton;
2331  apr_file_t *file[3];
2332  int idx;
2333#if APR_HAS_MMAP
2334  apr_mmap_t *mm[3] = { 0 };
2335#endif /* APR_HAS_MMAP */
2336  const char *eol;
2337  svn_boolean_t conflicts_only =
2338    (style == svn_diff_conflict_display_only_conflicts);
2339
2340  memset(&baton, 0, sizeof(baton));
2341  if (conflicts_only)
2342    {
2343      baton.pool = svn_pool_create(pool);
2344      make_context_saver(&baton);
2345      baton.real_output_stream = output_stream;
2346    }
2347  else
2348    baton.output_stream = output_stream;
2349  baton.path[0] = original_path;
2350  baton.path[1] = modified_path;
2351  baton.path[2] = latest_path;
2352  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2353                                    conflict_modified ? conflict_modified
2354                                    : apr_psprintf(pool, "<<<<<<< %s",
2355                                                   modified_path),
2356                                    pool));
2357  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2358                                    conflict_original ? conflict_original
2359                                    : apr_psprintf(pool, "||||||| %s",
2360                                                   original_path),
2361                                    pool));
2362  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2363                                    conflict_separator ? conflict_separator
2364                                    : "=======", pool));
2365  SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2366                                    conflict_latest ? conflict_latest
2367                                    : apr_psprintf(pool, ">>>>>>> %s",
2368                                                   latest_path),
2369                                    pool));
2370
2371  baton.conflict_style = style;
2372
2373  for (idx = 0; idx < 3; idx++)
2374    {
2375      apr_size_t size;
2376
2377      SVN_ERR(map_or_read_file(&file[idx],
2378                               MMAP_T_ARG(mm[idx])
2379                               &baton.buffer[idx], &size,
2380                               baton.path[idx], pool));
2381
2382      baton.curp[idx] = baton.buffer[idx];
2383      baton.endp[idx] = baton.buffer[idx];
2384
2385      if (baton.endp[idx])
2386        baton.endp[idx] += size;
2387    }
2388
2389  /* Check what eol marker we should use for conflict markers.
2390     We use the eol marker of the modified file and fall back on the
2391     platform's eol marker if that file doesn't contain any newlines. */
2392  eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2393                            NULL);
2394  if (! eol)
2395    eol = APR_EOL_STR;
2396  baton.marker_eol = eol;
2397
2398  SVN_ERR(svn_diff_output(diff, &baton,
2399                          &svn_diff3__file_output_vtable));
2400
2401  for (idx = 0; idx < 3; idx++)
2402    {
2403#if APR_HAS_MMAP
2404      if (mm[idx])
2405        {
2406          apr_status_t rv = apr_mmap_delete(mm[idx]);
2407          if (rv != APR_SUCCESS)
2408            {
2409              return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2410                                        baton.path[idx]);
2411            }
2412        }
2413#endif /* APR_HAS_MMAP */
2414
2415      if (file[idx])
2416        {
2417          SVN_ERR(svn_io_file_close(file[idx], pool));
2418        }
2419    }
2420
2421  if (conflicts_only)
2422    svn_pool_destroy(baton.pool);
2423
2424  return SVN_NO_ERROR;
2425}
2426
2427