1/*
2 * util.c :  routines for doing diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_general.h>
27
28#include "svn_hash.h"
29#include "svn_pools.h"
30#include "svn_dirent_uri.h"
31#include "svn_props.h"
32#include "svn_mergeinfo.h"
33#include "svn_error.h"
34#include "svn_diff.h"
35#include "svn_types.h"
36#include "svn_ctype.h"
37#include "svn_utf.h"
38#include "svn_version.h"
39
40#include "private/svn_diff_private.h"
41#include "diff.h"
42
43#include "svn_private_config.h"
44
45
46svn_boolean_t
47svn_diff_contains_conflicts(svn_diff_t *diff)
48{
49  while (diff != NULL)
50    {
51      if (diff->type == svn_diff__type_conflict)
52        {
53          return TRUE;
54        }
55
56      diff = diff->next;
57    }
58
59  return FALSE;
60}
61
62svn_boolean_t
63svn_diff_contains_diffs(svn_diff_t *diff)
64{
65  while (diff != NULL)
66    {
67      if (diff->type != svn_diff__type_common)
68        {
69          return TRUE;
70        }
71
72      diff = diff->next;
73    }
74
75  return FALSE;
76}
77
78svn_error_t *
79svn_diff_output(svn_diff_t *diff,
80                void *output_baton,
81                const svn_diff_output_fns_t *vtable)
82{
83  svn_error_t *(*output_fn)(void *,
84                            apr_off_t, apr_off_t,
85                            apr_off_t, apr_off_t,
86                            apr_off_t, apr_off_t);
87
88  while (diff != NULL)
89    {
90      switch (diff->type)
91        {
92        case svn_diff__type_common:
93          output_fn = vtable->output_common;
94          break;
95
96        case svn_diff__type_diff_common:
97          output_fn = vtable->output_diff_common;
98          break;
99
100        case svn_diff__type_diff_modified:
101          output_fn = vtable->output_diff_modified;
102          break;
103
104        case svn_diff__type_diff_latest:
105          output_fn = vtable->output_diff_latest;
106          break;
107
108        case svn_diff__type_conflict:
109          output_fn = NULL;
110          if (vtable->output_conflict != NULL)
111            {
112              SVN_ERR(vtable->output_conflict(output_baton,
113                               diff->original_start, diff->original_length,
114                               diff->modified_start, diff->modified_length,
115                               diff->latest_start, diff->latest_length,
116                               diff->resolved_diff));
117            }
118          break;
119
120        default:
121          output_fn = NULL;
122          break;
123        }
124
125      if (output_fn != NULL)
126        {
127          SVN_ERR(output_fn(output_baton,
128                            diff->original_start, diff->original_length,
129                            diff->modified_start, diff->modified_length,
130                            diff->latest_start, diff->latest_length));
131        }
132
133      diff = diff->next;
134    }
135
136  return SVN_NO_ERROR;
137}
138
139
140void
141svn_diff__normalize_buffer(char **tgt,
142                           apr_off_t *lengthp,
143                           svn_diff__normalize_state_t *statep,
144                           const char *buf,
145                           const svn_diff_file_options_t *opts)
146{
147  /* Variables for looping through BUF */
148  const char *curp, *endp;
149
150  /* Variable to record normalizing state */
151  svn_diff__normalize_state_t state = *statep;
152
153  /* Variables to track what needs copying into the target buffer */
154  const char *start = buf;
155  apr_size_t include_len = 0;
156  svn_boolean_t last_skipped = FALSE; /* makes sure we set 'start' */
157
158  /* Variable to record the state of the target buffer */
159  char *tgt_newend = *tgt;
160
161  /* If this is a noop, then just get out of here. */
162  if (! opts->ignore_space && ! opts->ignore_eol_style)
163    {
164      *tgt = (char *)buf;
165      return;
166    }
167
168
169  /* It only took me forever to get this routine right,
170     so here my thoughts go:
171
172    Below, we loop through the data, doing 2 things:
173
174     - Normalizing
175     - Copying other data
176
177     The routine tries its hardest *not* to copy data, but instead
178     returning a pointer into already normalized existing data.
179
180     To this end, a block 'other data' shouldn't be copied when found,
181     but only as soon as it can't be returned in-place.
182
183     On a character level, there are 3 possible operations:
184
185     - Skip the character (don't include in the normalized data)
186     - Include the character (do include in the normalizad data)
187     - Include as another character
188       This is essentially the same as skipping the current character
189       and inserting a given character in the output data.
190
191    The macros below (SKIP, INCLUDE and INCLUDE_AS) are defined to
192    handle the character based operations.  The macros themselves
193    collect character level data into blocks.
194
195    At all times designate the START, INCLUDED_LEN and CURP pointers
196    an included and and skipped block like this:
197
198      [ start, start + included_len ) [ start + included_len, curp )
199             INCLUDED                        EXCLUDED
200
201    When the routine flips from skipping to including, the last
202    included block has to be flushed to the output buffer.
203  */
204
205  /* Going from including to skipping; only schedules the current
206     included section for flushing.
207     Also, simply chop off the character if it's the first in the buffer,
208     so we can possibly just return the remainder of the buffer */
209#define SKIP             \
210  do {                   \
211    if (start == curp)   \
212       ++start;          \
213    last_skipped = TRUE; \
214  } while (0)
215
216#define INCLUDE                \
217  do {                         \
218    if (last_skipped)          \
219      COPY_INCLUDED_SECTION;   \
220    ++include_len;             \
221    last_skipped = FALSE;      \
222  } while (0)
223
224#define COPY_INCLUDED_SECTION                     \
225  do {                                            \
226    if (include_len > 0)                          \
227      {                                           \
228         memmove(tgt_newend, start, include_len); \
229         tgt_newend += include_len;               \
230         include_len = 0;                         \
231      }                                           \
232    start = curp;                                 \
233  } while (0)
234
235  /* Include the current character as character X.
236     If the current character already *is* X, add it to the
237     currently included region, increasing chances for consecutive
238     fully normalized blocks. */
239#define INCLUDE_AS(x)          \
240  do {                         \
241    if (*curp == (x))          \
242      INCLUDE;                 \
243    else                       \
244      {                        \
245        INSERT((x));           \
246        SKIP;                  \
247      }                        \
248  } while (0)
249
250  /* Insert character X in the output buffer */
251#define INSERT(x)              \
252  do {                         \
253    COPY_INCLUDED_SECTION;     \
254    *tgt_newend++ = (x);       \
255  } while (0)
256
257  for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp)
258    {
259      switch (*curp)
260        {
261        case '\r':
262          if (opts->ignore_eol_style)
263            INCLUDE_AS('\n');
264          else
265            INCLUDE;
266          state = svn_diff__normalize_state_cr;
267          break;
268
269        case '\n':
270          if (state == svn_diff__normalize_state_cr
271              && opts->ignore_eol_style)
272            SKIP;
273          else
274            INCLUDE;
275          state = svn_diff__normalize_state_normal;
276          break;
277
278        default:
279          if (svn_ctype_isspace(*curp)
280              && opts->ignore_space != svn_diff_file_ignore_space_none)
281            {
282              /* Whitespace but not '\r' or '\n' */
283              if (state != svn_diff__normalize_state_whitespace
284                  && opts->ignore_space
285                     == svn_diff_file_ignore_space_change)
286                /*### If we can postpone insertion of the space
287                  until the next non-whitespace character,
288                  we have a potential of reducing the number of copies:
289                  If this space is followed by more spaces,
290                  this will cause a block-copy.
291                  If the next non-space block is considered normalized
292                  *and* preceded by a space, we can take advantage of that. */
293                /* Note, the above optimization applies to 90% of the source
294                   lines in our own code, since it (generally) doesn't use
295                   more than one space per blank section, except for the
296                   beginning of a line. */
297                INCLUDE_AS(' ');
298              else
299                SKIP;
300              state = svn_diff__normalize_state_whitespace;
301            }
302          else
303            {
304              /* Non-whitespace character, or whitespace character in
305                 svn_diff_file_ignore_space_none mode. */
306              INCLUDE;
307              state = svn_diff__normalize_state_normal;
308            }
309        }
310    }
311
312  /* If we're not in whitespace, flush the last chunk of data.
313   * Note that this will work correctly when this is the last chunk of the
314   * file:
315   * * If there is an eol, it will either have been output when we entered
316   *   the state_cr, or it will be output now.
317   * * If there is no eol and we're not in whitespace, then we just output
318   *   everything below.
319   * * If there's no eol and we are in whitespace, we want to ignore
320   *   whitespace unconditionally. */
321
322  if (*tgt == tgt_newend)
323    {
324      /* we haven't copied any data in to *tgt and our chunk consists
325         only of one block of (already normalized) data.
326         Just return the block. */
327      *tgt = (char *)start;
328      *lengthp = include_len;
329    }
330  else
331    {
332      COPY_INCLUDED_SECTION;
333      *lengthp = tgt_newend - *tgt;
334    }
335
336  *statep = state;
337
338#undef SKIP
339#undef INCLUDE
340#undef INCLUDE_AS
341#undef INSERT
342#undef COPY_INCLUDED_SECTION
343}
344
345svn_error_t *
346svn_diff__unified_append_no_newline_msg(svn_stringbuf_t *stringbuf,
347                                        const char *header_encoding,
348                                        apr_pool_t *scratch_pool)
349{
350  const char *out_str;
351
352  SVN_ERR(svn_utf_cstring_from_utf8_ex2(
353            &out_str,
354            APR_EOL_STR
355            SVN_DIFF__NO_NEWLINE_AT_END_OF_FILE APR_EOL_STR,
356            header_encoding, scratch_pool));
357  svn_stringbuf_appendcstr(stringbuf, out_str);
358  return SVN_NO_ERROR;
359}
360
361svn_error_t *
362svn_diff__unified_write_hunk_header(svn_stream_t *output_stream,
363                                    const char *header_encoding,
364                                    const char *hunk_delimiter,
365                                    apr_off_t old_start,
366                                    apr_off_t old_length,
367                                    apr_off_t new_start,
368                                    apr_off_t new_length,
369                                    const char *hunk_extra_context,
370                                    apr_pool_t *scratch_pool)
371{
372  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
373                                      scratch_pool,
374                                      "%s -%" APR_OFF_T_FMT,
375                                      hunk_delimiter, old_start));
376  /* If the hunk length is 1, suppress the number of lines in the hunk
377   * (it is 1 implicitly) */
378  if (old_length != 1)
379    {
380      SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
381                                          scratch_pool,
382                                          ",%" APR_OFF_T_FMT, old_length));
383    }
384
385  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
386                                      scratch_pool,
387                                      " +%" APR_OFF_T_FMT, new_start));
388  if (new_length != 1)
389    {
390      SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
391                                          scratch_pool,
392                                          ",%" APR_OFF_T_FMT, new_length));
393    }
394
395  if (hunk_extra_context == NULL)
396      hunk_extra_context = "";
397  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
398                                      scratch_pool,
399                                      " %s%s%s" APR_EOL_STR,
400                                      hunk_delimiter,
401                                      hunk_extra_context[0] ? " " : "",
402                                      hunk_extra_context));
403  return SVN_NO_ERROR;
404}
405
406svn_error_t *
407svn_diff__unidiff_write_header(svn_stream_t *output_stream,
408                               const char *header_encoding,
409                               const char *old_header,
410                               const char *new_header,
411                               apr_pool_t *scratch_pool)
412{
413  SVN_ERR(svn_stream_printf_from_utf8(output_stream, header_encoding,
414                                      scratch_pool,
415                                      "--- %s" APR_EOL_STR
416                                      "+++ %s" APR_EOL_STR,
417                                      old_header,
418                                      new_header));
419  return SVN_NO_ERROR;
420}
421
422/* A helper function for display_prop_diffs.  Output the differences between
423   the mergeinfo stored in ORIG_MERGEINFO_VAL and NEW_MERGEINFO_VAL in a
424   human-readable form to OUTSTREAM, using ENCODING.  Use POOL for temporary
425   allocations. */
426static svn_error_t *
427display_mergeinfo_diff(const char *old_mergeinfo_val,
428                       const char *new_mergeinfo_val,
429                       const char *encoding,
430                       svn_stream_t *outstream,
431                       apr_pool_t *pool)
432{
433  apr_hash_t *old_mergeinfo_hash, *new_mergeinfo_hash, *added, *deleted;
434  apr_pool_t *iterpool = svn_pool_create(pool);
435  apr_hash_index_t *hi;
436
437  if (old_mergeinfo_val)
438    SVN_ERR(svn_mergeinfo_parse(&old_mergeinfo_hash, old_mergeinfo_val, pool));
439  else
440    old_mergeinfo_hash = NULL;
441
442  if (new_mergeinfo_val)
443    SVN_ERR(svn_mergeinfo_parse(&new_mergeinfo_hash, new_mergeinfo_val, pool));
444  else
445    new_mergeinfo_hash = NULL;
446
447  SVN_ERR(svn_mergeinfo_diff2(&deleted, &added, old_mergeinfo_hash,
448                              new_mergeinfo_hash,
449                              TRUE, pool, pool));
450
451  for (hi = apr_hash_first(pool, deleted);
452       hi; hi = apr_hash_next(hi))
453    {
454      const char *from_path = svn__apr_hash_index_key(hi);
455      svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
456      svn_string_t *merge_revstr;
457
458      svn_pool_clear(iterpool);
459      SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
460                                      iterpool));
461
462      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
463                                          _("   Reverse-merged %s:r%s%s"),
464                                          from_path, merge_revstr->data,
465                                          APR_EOL_STR));
466    }
467
468  for (hi = apr_hash_first(pool, added);
469       hi; hi = apr_hash_next(hi))
470    {
471      const char *from_path = svn__apr_hash_index_key(hi);
472      svn_rangelist_t *merge_revarray = svn__apr_hash_index_val(hi);
473      svn_string_t *merge_revstr;
474
475      svn_pool_clear(iterpool);
476      SVN_ERR(svn_rangelist_to_string(&merge_revstr, merge_revarray,
477                                      iterpool));
478
479      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
480                                          _("   Merged %s:r%s%s"),
481                                          from_path, merge_revstr->data,
482                                          APR_EOL_STR));
483    }
484
485  svn_pool_destroy(iterpool);
486  return SVN_NO_ERROR;
487}
488
489svn_error_t *
490svn_diff__display_prop_diffs(svn_stream_t *outstream,
491                             const char *encoding,
492                             const apr_array_header_t *propchanges,
493                             apr_hash_t *original_props,
494                             svn_boolean_t pretty_print_mergeinfo,
495                             apr_pool_t *pool)
496{
497  apr_pool_t *iterpool = svn_pool_create(pool);
498  int i;
499
500  for (i = 0; i < propchanges->nelts; i++)
501    {
502      const char *action;
503      const svn_string_t *original_value;
504      const svn_prop_t *propchange
505        = &APR_ARRAY_IDX(propchanges, i, svn_prop_t);
506
507      if (original_props)
508        original_value = svn_hash_gets(original_props, propchange->name);
509      else
510        original_value = NULL;
511
512      /* If the property doesn't exist on either side, or if it exists
513         with the same value, skip it.  This can happen if the client is
514         hitting an old mod_dav_svn server that doesn't understand the
515         "send-all" REPORT style. */
516      if ((! (original_value || propchange->value))
517          || (original_value && propchange->value
518              && svn_string_compare(original_value, propchange->value)))
519        continue;
520
521      svn_pool_clear(iterpool);
522
523      if (! original_value)
524        action = "Added";
525      else if (! propchange->value)
526        action = "Deleted";
527      else
528        action = "Modified";
529      SVN_ERR(svn_stream_printf_from_utf8(outstream, encoding, iterpool,
530                                          "%s: %s%s", action,
531                                          propchange->name, APR_EOL_STR));
532
533      if (pretty_print_mergeinfo
534          && strcmp(propchange->name, SVN_PROP_MERGEINFO) == 0)
535        {
536          const char *orig = original_value ? original_value->data : NULL;
537          const char *val = propchange->value ? propchange->value->data : NULL;
538          svn_error_t *err = display_mergeinfo_diff(orig, val, encoding,
539                                                    outstream, iterpool);
540
541          /* Issue #3896: If we can't pretty-print mergeinfo differences
542             because invalid mergeinfo is present, then don't let the diff
543             fail, just print the diff as any other property. */
544          if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
545            {
546              svn_error_clear(err);
547            }
548          else
549            {
550              SVN_ERR(err);
551              continue;
552            }
553        }
554
555      {
556        svn_diff_t *diff;
557        svn_diff_file_options_t options = { 0 };
558        const svn_string_t *orig
559          = original_value ? original_value
560                           : svn_string_create_empty(iterpool);
561        const svn_string_t *val
562          = propchange->value ? propchange->value
563                              : svn_string_create_empty(iterpool);
564
565        SVN_ERR(svn_diff_mem_string_diff(&diff, orig, val, &options,
566                                         iterpool));
567
568        /* UNIX patch will try to apply a diff even if the diff header
569         * is missing. It tries to be helpful by asking the user for a
570         * target filename when it can't determine the target filename
571         * from the diff header. But there usually are no files which
572         * UNIX patch could apply the property diff to, so we use "##"
573         * instead of "@@" as the default hunk delimiter for property diffs.
574         * We also supress the diff header. */
575        SVN_ERR(svn_diff_mem_string_output_unified2(
576                  outstream, diff, FALSE /* no header */, "##", NULL, NULL,
577                  encoding, orig, val, iterpool));
578      }
579    }
580  svn_pool_destroy(iterpool);
581
582  return SVN_NO_ERROR;
583}
584
585
586/* Return the library version number. */
587const svn_version_t *
588svn_diff_version(void)
589{
590  SVN_VERSION_BODY;
591}
592