1/*
2 * diff.c :  routines for doing diffs
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <apr.h>
26#include <apr_pools.h>
27#include <apr_general.h>
28
29#include "svn_pools.h"
30#include "svn_error.h"
31#include "svn_diff.h"
32#include "svn_types.h"
33
34#include "diff.h"
35
36/*
37 * Variance adjustment rules:
38 *
39 * See notes/variance-adjusted-patching.html
40 *
41 * ###: Expand this comment to contain the full set of adjustment
42 * ###: rules instead of pointing to a webpage.
43 */
44
45/*
46 * In the text below consider the following:
47 *
48 * O     = Original
49 * M     = Modified
50 * L     = Latest
51 * A     = Ancestor
52 * X:Y   = diff between X and Y
53 * X:Y:Z = 3-way diff between X, Y and Z
54 * P     = O:L, possibly adjusted
55 *
56 * diff4 -- Variance adjusted diff algorithm
57 *
58 * 1. Create a diff O:L and call that P.
59 *
60 * 2. Morph P into a 3-way diff by performing the following
61 *    transformation: O:L -> O:O:L.
62 *
63 * 3. Create a diff A:O.
64 *
65 * 4. Using A:O...
66 *
67 * #. Using M:A...
68 *
69 * #. Resolve conflicts...
70 *
71
72   1. Out-range added line: decrement the line numbers in every hunk in P
73      that comes after the addition. This undoes the effect of the add, since
74      the add never happened in D.
75
76   2. Out-range deleted line: increment the line numbers in every hunk in P
77      that comes after the deletion. This undoes the effect of the deletion,
78      since the deletion never happened in D.
79
80   3. Out-range edited line: do nothing. Out-range edits are irrelevant to P.
81
82   4. Added line in context range in P: remove the corresponding line from
83      the context, optionally replacing it with new context based on that
84      region in M, and adjust line numbers and mappings appropriately.
85
86   5. Added line in affected text range in P: this is a dependency problem
87      -- part of the change T:18-T:19 depends on changes introduced to T after
88      B branched. There are several possible behaviors, depending on what the
89      user wants. One is to generate an informative error, stating that
90      T:18-T:19 depends on some other change (T:N-T:M, where N>=8, M<=18,
91      and M-N == 1); the exact revisions can be discovered automatically using
92      the same process as "cvs annotate", though it may take some time to do
93      so. Another option is to include the change in P, as an insertion of the
94      "after" version of the text, and adjust line numbers and mappings
95      accordingly. (And if all this isn't sounding a lot like a directory
96      merge algorithm, try drinking more of the Kool-Aid.) A third option is
97      to include it as an insertion, but with metadata (such as CVS-style
98      conflict markers) indicating that the line attempting to be patched
99      does not exist in B.
100
101   6. Deleted line that is in-range in P: request another universe -- this
102      situation can't happen in ours.
103
104   7. In-range edited line: reverse that edit in the "before" version of the
105      corresponding line in the appropriate hunk in P, to obtain the version of
106      the line that will be found in B when P is applied.
107*/
108
109
110static void
111adjust_diff(svn_diff_t *diff, svn_diff_t *adjust)
112{
113  svn_diff_t *hunk;
114  apr_off_t range_start;
115  apr_off_t range_end;
116  apr_off_t adjustment;
117
118  for (; adjust; adjust = adjust->next)
119    {
120      range_start = adjust->modified_start;
121      range_end = range_start + adjust->modified_length;
122      adjustment = adjust->original_length - adjust->modified_length;
123
124      /* No change in line count, so no modifications. [3, 7] */
125      if (adjustment == 0)
126        continue;
127
128      for (hunk = diff; hunk; hunk = hunk->next)
129        {
130          /* Changes are in the range before this hunk.  Adjust the start
131           * of the hunk. [1, 2]
132           */
133          if (hunk->modified_start >= range_end)
134            {
135              hunk->modified_start += adjustment;
136              continue;
137            }
138
139          /* Changes are in the range beyond this hunk.  No adjustments
140           * needed. [1, 2]
141           */
142          if (hunk->modified_start + hunk->modified_length <= range_start)
143            continue;
144
145          /* From here on changes are in the range of this hunk. */
146
147          /* This is a context hunk.  Adjust the length. [4]
148           */
149          if (hunk->type == svn_diff__type_diff_modified)
150            {
151              hunk->modified_length += adjustment;
152              continue;
153            }
154
155          /* Mark as conflicted. This happens in the reverse case when a line
156           * is added in range and in the forward case when a line is deleted
157           * in range. [5 (reverse), 6 (forward)]
158           */
159          if (adjustment < 0)
160              hunk->type = svn_diff__type_conflict;
161
162          /* Adjust the length of this hunk (reverse the change). [5, 6] */
163          hunk->modified_length -= adjustment;
164        }
165    }
166}
167
168svn_error_t *
169svn_diff_diff4_2(svn_diff_t **diff,
170                 void *diff_baton,
171                 const svn_diff_fns2_t *vtable,
172                 apr_pool_t *pool)
173{
174  svn_diff__tree_t *tree;
175  svn_diff__position_t *position_list[4];
176  svn_diff__token_index_t num_tokens;
177  svn_diff__token_index_t *token_counts[4];
178  svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
179                                        svn_diff_datasource_modified,
180                                        svn_diff_datasource_latest,
181                                        svn_diff_datasource_ancestor};
182  svn_diff__lcs_t *lcs_ol;
183  svn_diff__lcs_t *lcs_adjust;
184  svn_diff_t *diff_ol;
185  svn_diff_t *diff_adjust;
186  svn_diff_t *hunk;
187  apr_pool_t *subpool;
188  apr_pool_t *subpool2;
189  apr_pool_t *subpool3;
190  apr_off_t prefix_lines = 0;
191  apr_off_t suffix_lines = 0;
192
193  *diff = NULL;
194
195  subpool = svn_pool_create(pool);
196  subpool2 = svn_pool_create(subpool);
197  subpool3 = svn_pool_create(subpool2);
198
199  svn_diff__tree_create(&tree, subpool3);
200
201  SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
202                                   datasource, 4));
203
204  SVN_ERR(svn_diff__get_tokens(&position_list[0],
205                               tree,
206                               diff_baton, vtable,
207                               svn_diff_datasource_original,
208                               prefix_lines,
209                               subpool2));
210
211  SVN_ERR(svn_diff__get_tokens(&position_list[1],
212                               tree,
213                               diff_baton, vtable,
214                               svn_diff_datasource_modified,
215                               prefix_lines,
216                               subpool));
217
218  SVN_ERR(svn_diff__get_tokens(&position_list[2],
219                               tree,
220                               diff_baton, vtable,
221                               svn_diff_datasource_latest,
222                               prefix_lines,
223                               subpool));
224
225  SVN_ERR(svn_diff__get_tokens(&position_list[3],
226                               tree,
227                               diff_baton, vtable,
228                               svn_diff_datasource_ancestor,
229                               prefix_lines,
230                               subpool2));
231
232  num_tokens = svn_diff__get_node_count(tree);
233
234  /* Get rid of the tokens, we don't need them to calc the diff */
235  if (vtable->token_discard_all != NULL)
236    vtable->token_discard_all(diff_baton);
237
238  /* We don't need the nodes in the tree either anymore, nor the tree itself */
239  svn_pool_clear(subpool3);
240
241  token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
242                                               subpool);
243  token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
244                                               subpool);
245  token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
246                                               subpool);
247  token_counts[3] = svn_diff__get_token_counts(position_list[3], num_tokens,
248                                               subpool);
249
250  /* Get the lcs for original - latest */
251  lcs_ol = svn_diff__lcs(position_list[0], position_list[2],
252                         token_counts[0], token_counts[2],
253                         num_tokens, prefix_lines,
254                         suffix_lines, subpool3);
255  diff_ol = svn_diff__diff(lcs_ol, 1, 1, TRUE, pool);
256
257  svn_pool_clear(subpool3);
258
259  for (hunk = diff_ol; hunk; hunk = hunk->next)
260    {
261      hunk->latest_start = hunk->modified_start;
262      hunk->latest_length = hunk->modified_length;
263      hunk->modified_start = hunk->original_start;
264      hunk->modified_length = hunk->original_length;
265
266      if (hunk->type == svn_diff__type_diff_modified)
267          hunk->type = svn_diff__type_diff_latest;
268      else
269          hunk->type = svn_diff__type_diff_modified;
270    }
271
272  /* Get the lcs for common ancestor - original
273   * Do reverse adjustements
274   */
275  lcs_adjust = svn_diff__lcs(position_list[3], position_list[2],
276                             token_counts[3], token_counts[2],
277                             num_tokens, prefix_lines,
278                             suffix_lines, subpool3);
279  diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
280  adjust_diff(diff_ol, diff_adjust);
281
282  svn_pool_clear(subpool3);
283
284  /* Get the lcs for modified - common ancestor
285   * Do forward adjustments
286   */
287  lcs_adjust = svn_diff__lcs(position_list[1], position_list[3],
288                             token_counts[1], token_counts[3],
289                             num_tokens, prefix_lines,
290                             suffix_lines, subpool3);
291  diff_adjust = svn_diff__diff(lcs_adjust, 1, 1, FALSE, subpool3);
292  adjust_diff(diff_ol, diff_adjust);
293
294  /* Get rid of the position lists for original and ancestor, and delete
295   * our scratchpool.
296   */
297  svn_pool_destroy(subpool2);
298
299  /* Now we try and resolve the conflicts we encountered */
300  for (hunk = diff_ol; hunk; hunk = hunk->next)
301    {
302      if (hunk->type == svn_diff__type_conflict)
303        {
304          svn_diff__resolve_conflict(hunk, &position_list[1],
305                                     &position_list[2], num_tokens, pool);
306        }
307    }
308
309  svn_pool_destroy(subpool);
310
311  *diff = diff_ol;
312
313  return SVN_NO_ERROR;
314}
315