diff_file.c revision 262253
1/* 2 * diff_file.c : routines for doing diffs on files 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25#include <apr.h> 26#include <apr_pools.h> 27#include <apr_general.h> 28#include <apr_file_io.h> 29#include <apr_file_info.h> 30#include <apr_time.h> 31#include <apr_mmap.h> 32#include <apr_getopt.h> 33 34#include "svn_error.h" 35#include "svn_diff.h" 36#include "svn_types.h" 37#include "svn_string.h" 38#include "svn_subst.h" 39#include "svn_io.h" 40#include "svn_utf.h" 41#include "svn_pools.h" 42#include "diff.h" 43#include "svn_private_config.h" 44#include "svn_path.h" 45#include "svn_ctype.h" 46 47#include "private/svn_utf_private.h" 48#include "private/svn_eol_private.h" 49#include "private/svn_dep_compat.h" 50#include "private/svn_adler32.h" 51#include "private/svn_diff_private.h" 52 53/* A token, i.e. a line read from a file. */ 54typedef struct svn_diff__file_token_t 55{ 56 /* Next token in free list. */ 57 struct svn_diff__file_token_t *next; 58 svn_diff_datasource_e datasource; 59 /* Offset in the datasource. */ 60 apr_off_t offset; 61 /* Offset of the normalized token (may skip leading whitespace) */ 62 apr_off_t norm_offset; 63 /* Total length - before normalization. */ 64 apr_off_t raw_length; 65 /* Total length - after normalization. */ 66 apr_off_t length; 67} svn_diff__file_token_t; 68 69 70typedef struct svn_diff__file_baton_t 71{ 72 const svn_diff_file_options_t *options; 73 74 struct file_info { 75 const char *path; /* path to this file, absolute or relative to CWD */ 76 77 /* All the following fields are active while this datasource is open */ 78 apr_file_t *file; /* handle of this file */ 79 apr_off_t size; /* total raw size in bytes of this file */ 80 81 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */ 82 int chunk; /* the current chunk number, zero-based */ 83 char *buffer; /* a buffer containing the current chunk */ 84 char *curp; /* current position in the current chunk */ 85 char *endp; /* next memory address after the current chunk */ 86 87 svn_diff__normalize_state_t normalize_state; 88 89 /* Where the identical suffix starts in this datasource */ 90 int suffix_start_chunk; 91 apr_off_t suffix_offset_in_chunk; 92 } files[4]; 93 94 /* List of free tokens that may be reused. */ 95 svn_diff__file_token_t *tokens; 96 97 apr_pool_t *pool; 98} svn_diff__file_baton_t; 99 100static int 101datasource_to_index(svn_diff_datasource_e datasource) 102{ 103 switch (datasource) 104 { 105 case svn_diff_datasource_original: 106 return 0; 107 108 case svn_diff_datasource_modified: 109 return 1; 110 111 case svn_diff_datasource_latest: 112 return 2; 113 114 case svn_diff_datasource_ancestor: 115 return 3; 116 } 117 118 return -1; 119} 120 121/* Files are read in chunks of 128k. There is no support for this number 122 * whatsoever. If there is a number someone comes up with that has some 123 * argumentation, let's use that. 124 */ 125/* If you change this number, update test_norm_offset(), 126 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c. 127 */ 128#define CHUNK_SHIFT 17 129#define CHUNK_SIZE (1 << CHUNK_SHIFT) 130 131#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT) 132#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT) 133#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1)) 134 135 136/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for 137 * *LENGTH. The actual bytes read are stored in *LENGTH on return. 138 */ 139static APR_INLINE svn_error_t * 140read_chunk(apr_file_t *file, const char *path, 141 char *buffer, apr_off_t length, 142 apr_off_t offset, apr_pool_t *pool) 143{ 144 /* XXX: The final offset may not be the one we asked for. 145 * XXX: Check. 146 */ 147 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool)); 148 return svn_io_file_read_full2(file, buffer, (apr_size_t) length, 149 NULL, NULL, pool); 150} 151 152 153/* Map or read a file at PATH. *BUFFER will point to the file 154 * contents; if the file was mapped, *FILE and *MM will contain the 155 * mmap context; otherwise they will be NULL. SIZE will contain the 156 * file size. Allocate from POOL. 157 */ 158#if APR_HAS_MMAP 159#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME, 160#define MMAP_T_ARG(NAME) &(NAME), 161#else 162#define MMAP_T_PARAM(NAME) 163#define MMAP_T_ARG(NAME) 164#endif 165 166static svn_error_t * 167map_or_read_file(apr_file_t **file, 168 MMAP_T_PARAM(mm) 169 char **buffer, apr_size_t *size_p, 170 const char *path, apr_pool_t *pool) 171{ 172 apr_finfo_t finfo; 173 apr_status_t rv; 174 apr_size_t size; 175 176 *buffer = NULL; 177 178 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool)); 179 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool)); 180 181 if (finfo.size > APR_SIZE_MAX) 182 { 183 return svn_error_createf(APR_ENOMEM, NULL, 184 _("File '%s' is too large to be read in " 185 "to memory"), path); 186 } 187 188 size = (apr_size_t) finfo.size; 189#if APR_HAS_MMAP 190 if (size > APR_MMAP_THRESHOLD) 191 { 192 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool); 193 if (rv == APR_SUCCESS) 194 { 195 *buffer = (*mm)->mm; 196 } 197 else 198 { 199 /* Clear *MM because output parameters are undefined on error. */ 200 *mm = NULL; 201 } 202 203 /* On failure we just fall through and try reading the file into 204 * memory instead. 205 */ 206 } 207#endif /* APR_HAS_MMAP */ 208 209 if (*buffer == NULL && size > 0) 210 { 211 *buffer = apr_palloc(pool, size); 212 213 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool)); 214 215 /* Since we have the entire contents of the file we can 216 * close it now. 217 */ 218 SVN_ERR(svn_io_file_close(*file, pool)); 219 220 *file = NULL; 221 } 222 223 *size_p = size; 224 225 return SVN_NO_ERROR; 226} 227 228 229/* For all files in the FILE array, increment the curp pointer. If a file 230 * points before the beginning of file, let it point at the first byte again. 231 * If the end of the current chunk is reached, read the next chunk in the 232 * buffer and point curp to the start of the chunk. If EOF is reached, set 233 * curp equal to endp to indicate EOF. */ 234#define INCREMENT_POINTERS(all_files, files_len, pool) \ 235 do { \ 236 apr_size_t svn_macro__i; \ 237 \ 238 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 239 { \ 240 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\ 241 (all_files)[svn_macro__i].curp++; \ 242 else \ 243 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \ 244 } \ 245 } while (0) 246 247 248/* For all files in the FILE array, decrement the curp pointer. If the 249 * start of a chunk is reached, read the previous chunk in the buffer and 250 * point curp to the last byte of the chunk. If the beginning of a FILE is 251 * reached, set chunk to -1 to indicate BOF. */ 252#define DECREMENT_POINTERS(all_files, files_len, pool) \ 253 do { \ 254 apr_size_t svn_macro__i; \ 255 \ 256 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \ 257 { \ 258 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \ 259 (all_files)[svn_macro__i].curp--; \ 260 else \ 261 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \ 262 } \ 263 } while (0) 264 265 266static svn_error_t * 267increment_chunk(struct file_info *file, apr_pool_t *pool) 268{ 269 apr_off_t length; 270 apr_off_t last_chunk = offset_to_chunk(file->size); 271 272 if (file->chunk == -1) 273 { 274 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */ 275 file->chunk = 0; 276 file->curp = file->buffer; 277 } 278 else if (file->chunk == last_chunk) 279 { 280 /* We are at the last chunk. Indicate EOF by setting curp == endp. */ 281 file->curp = file->endp; 282 } 283 else 284 { 285 /* There are still chunks left. Read next chunk and reset pointers. */ 286 file->chunk++; 287 length = file->chunk == last_chunk ? 288 offset_in_chunk(file->size) : CHUNK_SIZE; 289 SVN_ERR(read_chunk(file->file, file->path, file->buffer, 290 length, chunk_to_offset(file->chunk), 291 pool)); 292 file->endp = file->buffer + length; 293 file->curp = file->buffer; 294 } 295 296 return SVN_NO_ERROR; 297} 298 299 300static svn_error_t * 301decrement_chunk(struct file_info *file, apr_pool_t *pool) 302{ 303 if (file->chunk == 0) 304 { 305 /* We are already at the first chunk. Indicate BOF (Beginning Of File) 306 by setting chunk = -1 and curp = endp - 1. Both conditions are 307 important. They help the increment step to catch the BOF situation 308 in an efficient way. */ 309 file->chunk--; 310 file->curp = file->endp - 1; 311 } 312 else 313 { 314 /* Read previous chunk and reset pointers. */ 315 file->chunk--; 316 SVN_ERR(read_chunk(file->file, file->path, file->buffer, 317 CHUNK_SIZE, chunk_to_offset(file->chunk), 318 pool)); 319 file->endp = file->buffer + CHUNK_SIZE; 320 file->curp = file->endp - 1; 321 } 322 323 return SVN_NO_ERROR; 324} 325 326 327/* Check whether one of the FILEs has its pointers 'before' the beginning of 328 * the file (this can happen while scanning backwards). This is the case if 329 * one of them has chunk == -1. */ 330static svn_boolean_t 331is_one_at_bof(struct file_info file[], apr_size_t file_len) 332{ 333 apr_size_t i; 334 335 for (i = 0; i < file_len; i++) 336 if (file[i].chunk == -1) 337 return TRUE; 338 339 return FALSE; 340} 341 342/* Check whether one of the FILEs has its pointers at EOF (this is the case if 343 * one of them has curp == endp (this can only happen at the last chunk)) */ 344static svn_boolean_t 345is_one_at_eof(struct file_info file[], apr_size_t file_len) 346{ 347 apr_size_t i; 348 349 for (i = 0; i < file_len; i++) 350 if (file[i].curp == file[i].endp) 351 return TRUE; 352 353 return FALSE; 354} 355 356/* Quickly determine whether there is a eol char in CHUNK. 357 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start). 358 */ 359 360#if SVN_UNALIGNED_ACCESS_IS_OK 361static svn_boolean_t contains_eol(apr_uintptr_t chunk) 362{ 363 apr_uintptr_t r_test = chunk ^ SVN__R_MASK; 364 apr_uintptr_t n_test = chunk ^ SVN__N_MASK; 365 366 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 367 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET; 368 369 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET; 370} 371#endif 372 373/* Find the prefix which is identical between all elements of the FILE array. 374 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be 375 * set to TRUE if one of the FILEs reached its end while scanning prefix, 376 * i.e. at least one file consisted entirely of prefix. Otherwise, 377 * REACHED_ONE_EOF is set to FALSE. 378 * 379 * After this function is finished, the buffers, chunks, curp's and endp's 380 * of the FILEs are set to point at the first byte after the prefix. */ 381static svn_error_t * 382find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines, 383 struct file_info file[], apr_size_t file_len, 384 apr_pool_t *pool) 385{ 386 svn_boolean_t had_cr = FALSE; 387 svn_boolean_t is_match; 388 apr_off_t lines = 0; 389 apr_size_t i; 390 391 *reached_one_eof = FALSE; 392 393 for (i = 1, is_match = TRUE; i < file_len; i++) 394 is_match = is_match && *file[0].curp == *file[i].curp; 395 while (is_match) 396 { 397#if SVN_UNALIGNED_ACCESS_IS_OK 398 apr_ssize_t max_delta, delta; 399#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 400 401 /* ### TODO: see if we can take advantage of 402 diff options like ignore_eol_style or ignore_space. */ 403 /* check for eol, and count */ 404 if (*file[0].curp == '\r') 405 { 406 lines++; 407 had_cr = TRUE; 408 } 409 else if (*file[0].curp == '\n' && !had_cr) 410 { 411 lines++; 412 } 413 else 414 { 415 had_cr = FALSE; 416 } 417 418 INCREMENT_POINTERS(file, file_len, pool); 419 420#if SVN_UNALIGNED_ACCESS_IS_OK 421 422 /* Try to advance as far as possible with machine-word granularity. 423 * Determine how far we may advance with chunky ops without reaching 424 * endp for any of the files. 425 * Signedness is important here if curp gets close to endp. 426 */ 427 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t); 428 for (i = 1; i < file_len; i++) 429 { 430 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t); 431 if (delta < max_delta) 432 max_delta = delta; 433 } 434 435 is_match = TRUE; 436 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t)) 437 { 438 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta); 439 if (contains_eol(chunk)) 440 break; 441 442 for (i = 1; i < file_len; i++) 443 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta)) 444 { 445 is_match = FALSE; 446 break; 447 } 448 449 if (! is_match) 450 break; 451 } 452 453 if (delta /* > 0*/) 454 { 455 /* We either found a mismatch or an EOL at or shortly behind curp+delta 456 * or we cannot proceed with chunky ops without exceeding endp. 457 * In any way, everything up to curp + delta is equal and not an EOL. 458 */ 459 for (i = 0; i < file_len; i++) 460 file[i].curp += delta; 461 462 /* Skipped data without EOL markers, so last char was not a CR. */ 463 had_cr = FALSE; 464 } 465#endif 466 467 *reached_one_eof = is_one_at_eof(file, file_len); 468 if (*reached_one_eof) 469 break; 470 else 471 for (i = 1, is_match = TRUE; i < file_len; i++) 472 is_match = is_match && *file[0].curp == *file[i].curp; 473 } 474 475 if (had_cr) 476 { 477 /* Check if we ended in the middle of a \r\n for one file, but \r for 478 another. If so, back up one byte, so the next loop will back up 479 the entire line. Also decrement lines, since we counted one 480 too many for the \r. */ 481 svn_boolean_t ended_at_nonmatching_newline = FALSE; 482 for (i = 0; i < file_len; i++) 483 if (file[i].curp < file[i].endp) 484 ended_at_nonmatching_newline = ended_at_nonmatching_newline 485 || *file[i].curp == '\n'; 486 if (ended_at_nonmatching_newline) 487 { 488 lines--; 489 DECREMENT_POINTERS(file, file_len, pool); 490 } 491 } 492 493 /* Back up one byte, so we point at the last identical byte */ 494 DECREMENT_POINTERS(file, file_len, pool); 495 496 /* Back up to the last eol sequence (\n, \r\n or \r) */ 497 while (!is_one_at_bof(file, file_len) && 498 *file[0].curp != '\n' && *file[0].curp != '\r') 499 DECREMENT_POINTERS(file, file_len, pool); 500 501 /* Slide one byte forward, to point past the eol sequence */ 502 INCREMENT_POINTERS(file, file_len, pool); 503 504 *prefix_lines = lines; 505 506 return SVN_NO_ERROR; 507} 508 509 510/* The number of identical suffix lines to keep with the middle section. These 511 * lines are not eliminated as suffix, and can be picked up by the token 512 * parsing and lcs steps. This is mainly for backward compatibility with 513 * the previous diff (and blame) output (if there are multiple diff solutions, 514 * our lcs algorithm prefers taking common lines from the start, rather than 515 * from the end. By giving it back some suffix lines, we give it some wiggle 516 * room to find the exact same diff as before). 517 * 518 * The number 50 is more or less arbitrary, based on some real-world tests 519 * with big files (and then doubling the required number to be on the safe 520 * side). This has a negligible effect on the power of the optimization. */ 521/* If you change this number, update test_identical_suffix() in diff-diff3-test.c */ 522#ifndef SUFFIX_LINES_TO_KEEP 523#define SUFFIX_LINES_TO_KEEP 50 524#endif 525 526/* Find the suffix which is identical between all elements of the FILE array. 527 * Return the number of suffix lines in SUFFIX_LINES. 528 * 529 * Before this function is called the FILEs' pointers and chunks should be 530 * positioned right after the identical prefix (which is the case after 531 * find_identical_prefix), so we can determine where suffix scanning should 532 * ultimately stop. */ 533static svn_error_t * 534find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[], 535 apr_size_t file_len, apr_pool_t *pool) 536{ 537 struct file_info file_for_suffix[4] = { { 0 } }; 538 apr_off_t length[4]; 539 apr_off_t suffix_min_chunk0; 540 apr_off_t suffix_min_offset0; 541 apr_off_t min_file_size; 542 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP; 543 svn_boolean_t is_match; 544 apr_off_t lines = 0; 545 svn_boolean_t had_cr; 546 svn_boolean_t had_nl; 547 apr_size_t i; 548 549 /* Initialize file_for_suffix[]. 550 Read last chunk, position curp at last byte. */ 551 for (i = 0; i < file_len; i++) 552 { 553 file_for_suffix[i].path = file[i].path; 554 file_for_suffix[i].file = file[i].file; 555 file_for_suffix[i].size = file[i].size; 556 file_for_suffix[i].chunk = 557 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */ 558 length[i] = offset_in_chunk(file_for_suffix[i].size); 559 if (length[i] == 0) 560 { 561 /* last chunk is an empty chunk -> start at next-to-last chunk */ 562 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1; 563 length[i] = CHUNK_SIZE; 564 } 565 566 if (file_for_suffix[i].chunk == file[i].chunk) 567 { 568 /* Prefix ended in last chunk, so we can reuse the prefix buffer */ 569 file_for_suffix[i].buffer = file[i].buffer; 570 } 571 else 572 { 573 /* There is at least more than 1 chunk, 574 so allocate full chunk size buffer */ 575 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE); 576 SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path, 577 file_for_suffix[i].buffer, length[i], 578 chunk_to_offset(file_for_suffix[i].chunk), 579 pool)); 580 } 581 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i]; 582 file_for_suffix[i].curp = file_for_suffix[i].endp - 1; 583 } 584 585 /* Get the chunk and pointer offset (for file[0]) at which we should stop 586 scanning backward for the identical suffix, i.e. when we reach prefix. */ 587 suffix_min_chunk0 = file[0].chunk; 588 suffix_min_offset0 = file[0].curp - file[0].buffer; 589 590 /* Compensate if other files are smaller than file[0] */ 591 for (i = 1, min_file_size = file[0].size; i < file_len; i++) 592 if (file[i].size < min_file_size) 593 min_file_size = file[i].size; 594 if (file[0].size > min_file_size) 595 { 596 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE; 597 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE; 598 } 599 600 /* Scan backwards until mismatch or until we reach the prefix. */ 601 for (i = 1, is_match = TRUE; i < file_len; i++) 602 is_match = is_match 603 && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 604 if (is_match && *file_for_suffix[0].curp != '\r' 605 && *file_for_suffix[0].curp != '\n') 606 /* Count an extra line for the last line not ending in an eol. */ 607 lines++; 608 609 had_nl = FALSE; 610 while (is_match) 611 { 612 svn_boolean_t reached_prefix; 613#if SVN_UNALIGNED_ACCESS_IS_OK 614 /* Initialize the minimum pointer positions. */ 615 const char *min_curp[4]; 616 svn_boolean_t can_read_word; 617#endif /* SVN_UNALIGNED_ACCESS_IS_OK */ 618 619 /* ### TODO: see if we can take advantage of 620 diff options like ignore_eol_style or ignore_space. */ 621 /* check for eol, and count */ 622 if (*file_for_suffix[0].curp == '\n') 623 { 624 lines++; 625 had_nl = TRUE; 626 } 627 else if (*file_for_suffix[0].curp == '\r' && !had_nl) 628 { 629 lines++; 630 } 631 else 632 { 633 had_nl = FALSE; 634 } 635 636 DECREMENT_POINTERS(file_for_suffix, file_len, pool); 637 638#if SVN_UNALIGNED_ACCESS_IS_OK 639 for (i = 0; i < file_len; i++) 640 min_curp[i] = file_for_suffix[i].buffer; 641 642 /* If we are in the same chunk that contains the last part of the common 643 prefix, use the min_curp[0] pointer to make sure we don't get a 644 suffix that overlaps the already determined common prefix. */ 645 if (file_for_suffix[0].chunk == suffix_min_chunk0) 646 min_curp[0] += suffix_min_offset0; 647 648 /* Scan quickly by reading with machine-word granularity. */ 649 for (i = 0, can_read_word = TRUE; i < file_len; i++) 650 can_read_word = can_read_word 651 && ( (file_for_suffix[i].curp + 1 652 - sizeof(apr_uintptr_t)) 653 > min_curp[i]); 654 while (can_read_word) 655 { 656 apr_uintptr_t chunk; 657 658 /* For each file curp is positioned at the current byte, but we 659 want to examine the current byte and the ones before the current 660 location as one machine word. */ 661 662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1 663 - sizeof(apr_uintptr_t)); 664 if (contains_eol(chunk)) 665 break; 666 667 for (i = 1, is_match = TRUE; i < file_len; i++) 668 is_match = is_match 669 && ( chunk 670 == *(const apr_uintptr_t *) 671 (file_for_suffix[i].curp + 1 672 - sizeof(apr_uintptr_t))); 673 674 if (! is_match) 675 break; 676 677 for (i = 0; i < file_len; i++) 678 { 679 file_for_suffix[i].curp -= sizeof(apr_uintptr_t); 680 can_read_word = can_read_word 681 && ( (file_for_suffix[i].curp + 1 682 - sizeof(apr_uintptr_t)) 683 > min_curp[i]); 684 } 685 686 /* We skipped some bytes, so there are no closing EOLs */ 687 had_nl = FALSE; 688 had_cr = FALSE; 689 } 690 691 /* The > min_curp[i] check leaves at least one final byte for checking 692 in the non block optimized case below. */ 693#endif 694 695 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0 696 && (file_for_suffix[0].curp - file_for_suffix[0].buffer) 697 == suffix_min_offset0; 698 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len)) 699 break; 700 701 is_match = TRUE; 702 for (i = 1; i < file_len; i++) 703 is_match = is_match 704 && *file_for_suffix[0].curp == *file_for_suffix[i].curp; 705 } 706 707 /* Slide one byte forward, to point at the first byte of identical suffix */ 708 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 709 710 /* Slide forward until we find an eol sequence to add the rest of the line 711 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least 712 one file reaches its end. */ 713 do 714 { 715 had_cr = FALSE; 716 while (!is_one_at_eof(file_for_suffix, file_len) 717 && *file_for_suffix[0].curp != '\n' 718 && *file_for_suffix[0].curp != '\r') 719 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 720 721 /* Slide one or two more bytes, to point past the eol. */ 722 if (!is_one_at_eof(file_for_suffix, file_len) 723 && *file_for_suffix[0].curp == '\r') 724 { 725 lines--; 726 had_cr = TRUE; 727 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 728 } 729 if (!is_one_at_eof(file_for_suffix, file_len) 730 && *file_for_suffix[0].curp == '\n') 731 { 732 if (!had_cr) 733 lines--; 734 INCREMENT_POINTERS(file_for_suffix, file_len, pool); 735 } 736 } 737 while (!is_one_at_eof(file_for_suffix, file_len) 738 && suffix_lines_to_keep--); 739 740 if (is_one_at_eof(file_for_suffix, file_len)) 741 lines = 0; 742 743 /* Save the final suffix information in the original file_info */ 744 for (i = 0; i < file_len; i++) 745 { 746 file[i].suffix_start_chunk = file_for_suffix[i].chunk; 747 file[i].suffix_offset_in_chunk = 748 file_for_suffix[i].curp - file_for_suffix[i].buffer; 749 } 750 751 *suffix_lines = lines; 752 753 return SVN_NO_ERROR; 754} 755 756 757/* Let FILE stand for the array of file_info struct elements of BATON->files 758 * that are indexed by the elements of the DATASOURCE array. 759 * BATON's type is (svn_diff__file_baton_t *). 760 * 761 * For each file in the FILE array, open the file at FILE.path; initialize 762 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a 763 * buffer and read the first chunk. Then find the prefix and suffix lines 764 * which are identical between all the files. Return the number of identical 765 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in 766 * SUFFIX_LINES. 767 * 768 * Finding the identical prefix and suffix allows us to exclude those from the 769 * rest of the diff algorithm, which increases performance by reducing the 770 * problem space. 771 * 772 * Implements svn_diff_fns2_t::datasources_open. */ 773static svn_error_t * 774datasources_open(void *baton, 775 apr_off_t *prefix_lines, 776 apr_off_t *suffix_lines, 777 const svn_diff_datasource_e *datasources, 778 apr_size_t datasources_len) 779{ 780 svn_diff__file_baton_t *file_baton = baton; 781 struct file_info files[4]; 782 apr_finfo_t finfo[4]; 783 apr_off_t length[4]; 784#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 785 svn_boolean_t reached_one_eof; 786#endif 787 apr_size_t i; 788 789 /* Make sure prefix_lines and suffix_lines are set correctly, even if we 790 * exit early because one of the files is empty. */ 791 *prefix_lines = 0; 792 *suffix_lines = 0; 793 794 /* Open datasources and read first chunk */ 795 for (i = 0; i < datasources_len; i++) 796 { 797 struct file_info *file 798 = &file_baton->files[datasource_to_index(datasources[i])]; 799 SVN_ERR(svn_io_file_open(&file->file, file->path, 800 APR_READ, APR_OS_DEFAULT, file_baton->pool)); 801 SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE, 802 file->file, file_baton->pool)); 803 file->size = finfo[i].size; 804 length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size; 805 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]); 806 SVN_ERR(read_chunk(file->file, file->path, file->buffer, 807 length[i], 0, file_baton->pool)); 808 file->endp = file->buffer + length[i]; 809 file->curp = file->buffer; 810 /* Set suffix_start_chunk to a guard value, so if suffix scanning is 811 * skipped because one of the files is empty, or because of 812 * reached_one_eof, we can still easily check for the suffix during 813 * token reading (datasource_get_next_token). */ 814 file->suffix_start_chunk = -1; 815 816 files[i] = *file; 817 } 818 819 for (i = 0; i < datasources_len; i++) 820 if (length[i] == 0) 821 /* There will not be any identical prefix/suffix, so we're done. */ 822 return SVN_NO_ERROR; 823 824#ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING 825 826 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines, 827 files, datasources_len, file_baton->pool)); 828 829 if (!reached_one_eof) 830 /* No file consisted totally of identical prefix, 831 * so there may be some identical suffix. */ 832 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len, 833 file_baton->pool)); 834 835#endif 836 837 /* Copy local results back to baton. */ 838 for (i = 0; i < datasources_len; i++) 839 file_baton->files[datasource_to_index(datasources[i])] = files[i]; 840 841 return SVN_NO_ERROR; 842} 843 844 845/* Implements svn_diff_fns2_t::datasource_close */ 846static svn_error_t * 847datasource_close(void *baton, svn_diff_datasource_e datasource) 848{ 849 /* Do nothing. The compare_token function needs previous datasources 850 * to stay available until all datasources are processed. 851 */ 852 853 return SVN_NO_ERROR; 854} 855 856/* Implements svn_diff_fns2_t::datasource_get_next_token */ 857static svn_error_t * 858datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, 859 svn_diff_datasource_e datasource) 860{ 861 svn_diff__file_baton_t *file_baton = baton; 862 svn_diff__file_token_t *file_token; 863 struct file_info *file = &file_baton->files[datasource_to_index(datasource)]; 864 char *endp; 865 char *curp; 866 char *eol; 867 apr_off_t last_chunk; 868 apr_off_t length; 869 apr_uint32_t h = 0; 870 /* Did the last chunk end in a CR character? */ 871 svn_boolean_t had_cr = FALSE; 872 873 *token = NULL; 874 875 curp = file->curp; 876 endp = file->endp; 877 878 last_chunk = offset_to_chunk(file->size); 879 880 /* Are we already at the end of a chunk? */ 881 if (curp == endp) 882 { 883 /* Are we at EOF */ 884 if (last_chunk == file->chunk) 885 return SVN_NO_ERROR; /* EOF */ 886 887 /* Or right before an identical suffix in the next chunk? */ 888 if (file->chunk + 1 == file->suffix_start_chunk 889 && file->suffix_offset_in_chunk == 0) 890 return SVN_NO_ERROR; 891 } 892 893 /* Stop when we encounter the identical suffix. If suffix scanning was not 894 * performed, suffix_start_chunk will be -1, so this condition will never 895 * be true. */ 896 if (file->chunk == file->suffix_start_chunk 897 && (curp - file->buffer) == file->suffix_offset_in_chunk) 898 return SVN_NO_ERROR; 899 900 /* Allocate a new token, or fetch one from the "reusable tokens" list. */ 901 file_token = file_baton->tokens; 902 if (file_token) 903 { 904 file_baton->tokens = file_token->next; 905 } 906 else 907 { 908 file_token = apr_palloc(file_baton->pool, sizeof(*file_token)); 909 } 910 911 file_token->datasource = datasource; 912 file_token->offset = chunk_to_offset(file->chunk) 913 + (curp - file->buffer); 914 file_token->norm_offset = file_token->offset; 915 file_token->raw_length = 0; 916 file_token->length = 0; 917 918 while (1) 919 { 920 eol = svn_eol__find_eol_start(curp, endp - curp); 921 if (eol) 922 { 923 had_cr = (*eol == '\r'); 924 eol++; 925 /* If we have the whole eol sequence in the chunk... */ 926 if (!(had_cr && eol == endp)) 927 { 928 /* Also skip past the '\n' in an '\r\n' sequence. */ 929 if (had_cr && *eol == '\n') 930 eol++; 931 break; 932 } 933 } 934 935 if (file->chunk == last_chunk) 936 { 937 eol = endp; 938 break; 939 } 940 941 length = endp - curp; 942 file_token->raw_length += length; 943 { 944 char *c = curp; 945 946 svn_diff__normalize_buffer(&c, &length, 947 &file->normalize_state, 948 curp, file_baton->options); 949 if (file_token->length == 0) 950 { 951 /* When we are reading the first part of the token, move the 952 normalized offset past leading ignored characters, if any. */ 953 file_token->norm_offset += (c - curp); 954 } 955 file_token->length += length; 956 h = svn__adler32(h, c, length); 957 } 958 959 curp = endp = file->buffer; 960 file->chunk++; 961 length = file->chunk == last_chunk ? 962 offset_in_chunk(file->size) : CHUNK_SIZE; 963 endp += length; 964 file->endp = endp; 965 966 /* Issue #4283: Normally we should have checked for reaching the skipped 967 suffix here, but because we assume that a suffix always starts on a 968 line and token boundary we rely on catching the suffix earlier in this 969 function. 970 971 When changing things here, make sure the whitespace settings are 972 applied, or we mught not reach the exact suffix boundary as token 973 boundary. */ 974 SVN_ERR(read_chunk(file->file, file->path, 975 curp, length, 976 chunk_to_offset(file->chunk), 977 file_baton->pool)); 978 979 /* If the last chunk ended in a CR, we're done. */ 980 if (had_cr) 981 { 982 eol = curp; 983 if (*curp == '\n') 984 ++eol; 985 break; 986 } 987 } 988 989 length = eol - curp; 990 file_token->raw_length += length; 991 file->curp = eol; 992 993 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up 994 * with a spurious empty token. Avoid returning it. 995 * Note that we use the unnormalized length; we don't want a line containing 996 * only spaces (and no trailing newline) to appear like a non-existent 997 * line. */ 998 if (file_token->raw_length > 0) 999 { 1000 char *c = curp; 1001 svn_diff__normalize_buffer(&c, &length, 1002 &file->normalize_state, 1003 curp, file_baton->options); 1004 if (file_token->length == 0) 1005 { 1006 /* When we are reading the first part of the token, move the 1007 normalized offset past leading ignored characters, if any. */ 1008 file_token->norm_offset += (c - curp); 1009 } 1010 1011 file_token->length += length; 1012 1013 *hash = svn__adler32(h, c, length); 1014 *token = file_token; 1015 } 1016 1017 return SVN_NO_ERROR; 1018} 1019 1020#define COMPARE_CHUNK_SIZE 4096 1021 1022/* Implements svn_diff_fns2_t::token_compare */ 1023static svn_error_t * 1024token_compare(void *baton, void *token1, void *token2, int *compare) 1025{ 1026 svn_diff__file_baton_t *file_baton = baton; 1027 svn_diff__file_token_t *file_token[2]; 1028 char buffer[2][COMPARE_CHUNK_SIZE]; 1029 char *bufp[2]; 1030 apr_off_t offset[2]; 1031 struct file_info *file[2]; 1032 apr_off_t length[2]; 1033 apr_off_t total_length; 1034 /* How much is left to read of each token from the file. */ 1035 apr_off_t raw_length[2]; 1036 int i; 1037 svn_diff__normalize_state_t state[2]; 1038 1039 file_token[0] = token1; 1040 file_token[1] = token2; 1041 if (file_token[0]->length < file_token[1]->length) 1042 { 1043 *compare = -1; 1044 return SVN_NO_ERROR; 1045 } 1046 1047 if (file_token[0]->length > file_token[1]->length) 1048 { 1049 *compare = 1; 1050 return SVN_NO_ERROR; 1051 } 1052 1053 total_length = file_token[0]->length; 1054 if (total_length == 0) 1055 { 1056 *compare = 0; 1057 return SVN_NO_ERROR; 1058 } 1059 1060 for (i = 0; i < 2; ++i) 1061 { 1062 int idx = datasource_to_index(file_token[i]->datasource); 1063 1064 file[i] = &file_baton->files[idx]; 1065 offset[i] = file_token[i]->norm_offset; 1066 state[i] = svn_diff__normalize_state_normal; 1067 1068 if (offset_to_chunk(offset[i]) == file[i]->chunk) 1069 { 1070 /* If the start of the token is in memory, the entire token is 1071 * in memory. 1072 */ 1073 bufp[i] = file[i]->buffer; 1074 bufp[i] += offset_in_chunk(offset[i]); 1075 1076 length[i] = total_length; 1077 raw_length[i] = 0; 1078 } 1079 else 1080 { 1081 apr_off_t skipped; 1082 1083 length[i] = 0; 1084 1085 /* When we skipped the first part of the token via the whitespace 1086 normalization we must reduce the raw length of the token */ 1087 skipped = (file_token[i]->norm_offset - file_token[i]->offset); 1088 1089 raw_length[i] = file_token[i]->raw_length - skipped; 1090 } 1091 } 1092 1093 do 1094 { 1095 apr_off_t len; 1096 for (i = 0; i < 2; i++) 1097 { 1098 if (length[i] == 0) 1099 { 1100 /* Error if raw_length is 0, that's an unexpected change 1101 * of the file that can happen when ingoring whitespace 1102 * and that can lead to an infinite loop. */ 1103 if (raw_length[i] == 0) 1104 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED, 1105 NULL, 1106 _("The file '%s' changed unexpectedly" 1107 " during diff"), 1108 file[i]->path); 1109 1110 /* Read a chunk from disk into a buffer */ 1111 bufp[i] = buffer[i]; 1112 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ? 1113 COMPARE_CHUNK_SIZE : raw_length[i]; 1114 1115 SVN_ERR(read_chunk(file[i]->file, 1116 file[i]->path, 1117 bufp[i], length[i], offset[i], 1118 file_baton->pool)); 1119 offset[i] += length[i]; 1120 raw_length[i] -= length[i]; 1121 /* bufp[i] gets reset to buffer[i] before reading each chunk, 1122 so, overwriting it isn't a problem */ 1123 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i], 1124 bufp[i], file_baton->options); 1125 1126 /* assert(length[i] == file_token[i]->length); */ 1127 } 1128 } 1129 1130 len = length[0] > length[1] ? length[1] : length[0]; 1131 1132 /* Compare two chunks (that could be entire tokens if they both reside 1133 * in memory). 1134 */ 1135 *compare = memcmp(bufp[0], bufp[1], (size_t) len); 1136 if (*compare != 0) 1137 return SVN_NO_ERROR; 1138 1139 total_length -= len; 1140 length[0] -= len; 1141 length[1] -= len; 1142 bufp[0] += len; 1143 bufp[1] += len; 1144 } 1145 while(total_length > 0); 1146 1147 *compare = 0; 1148 return SVN_NO_ERROR; 1149} 1150 1151 1152/* Implements svn_diff_fns2_t::token_discard */ 1153static void 1154token_discard(void *baton, void *token) 1155{ 1156 svn_diff__file_baton_t *file_baton = baton; 1157 svn_diff__file_token_t *file_token = token; 1158 1159 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */ 1160 file_token->next = file_baton->tokens; 1161 file_baton->tokens = file_token; 1162} 1163 1164 1165/* Implements svn_diff_fns2_t::token_discard_all */ 1166static void 1167token_discard_all(void *baton) 1168{ 1169 svn_diff__file_baton_t *file_baton = baton; 1170 1171 /* Discard all memory in use by the tokens, and close all open files. */ 1172 svn_pool_clear(file_baton->pool); 1173} 1174 1175 1176static const svn_diff_fns2_t svn_diff__file_vtable = 1177{ 1178 datasources_open, 1179 datasource_close, 1180 datasource_get_next_token, 1181 token_compare, 1182 token_discard, 1183 token_discard_all 1184}; 1185 1186/* Id for the --ignore-eol-style option, which doesn't have a short name. */ 1187#define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256 1188 1189/* Options supported by svn_diff_file_options_parse(). */ 1190static const apr_getopt_option_t diff_options[] = 1191{ 1192 { "ignore-space-change", 'b', 0, NULL }, 1193 { "ignore-all-space", 'w', 0, NULL }, 1194 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL }, 1195 { "show-c-function", 'p', 0, NULL }, 1196 /* ### For compatibility; we don't support the argument to -u, because 1197 * ### we don't have optional argument support. */ 1198 { "unified", 'u', 0, NULL }, 1199 { NULL, 0, 0, NULL } 1200}; 1201 1202svn_diff_file_options_t * 1203svn_diff_file_options_create(apr_pool_t *pool) 1204{ 1205 return apr_pcalloc(pool, sizeof(svn_diff_file_options_t)); 1206} 1207 1208/* A baton for use with opt_parsing_error_func(). */ 1209struct opt_parsing_error_baton_t 1210{ 1211 svn_error_t *err; 1212 apr_pool_t *pool; 1213}; 1214 1215/* Store an error message from apr_getopt_long(). Set BATON->err to a new 1216 * error with a message generated from FMT and the remaining arguments. 1217 * Implements apr_getopt_err_fn_t. */ 1218static void 1219opt_parsing_error_func(void *baton, 1220 const char *fmt, ...) 1221{ 1222 struct opt_parsing_error_baton_t *b = baton; 1223 const char *message; 1224 va_list ap; 1225 1226 va_start(ap, fmt); 1227 message = apr_pvsprintf(b->pool, fmt, ap); 1228 va_end(ap); 1229 1230 /* Skip leading ": " (if present, which it always is in known cases). */ 1231 if (strncmp(message, ": ", 2) == 0) 1232 message += 2; 1233 1234 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message); 1235} 1236 1237svn_error_t * 1238svn_diff_file_options_parse(svn_diff_file_options_t *options, 1239 const apr_array_header_t *args, 1240 apr_pool_t *pool) 1241{ 1242 apr_getopt_t *os; 1243 struct opt_parsing_error_baton_t opt_parsing_error_baton; 1244 /* Make room for each option (starting at index 1) plus trailing NULL. */ 1245 const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2)); 1246 1247 opt_parsing_error_baton.err = NULL; 1248 opt_parsing_error_baton.pool = pool; 1249 1250 argv[0] = ""; 1251 memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts); 1252 argv[args->nelts + 1] = NULL; 1253 1254 apr_getopt_init(&os, pool, args->nelts + 1, argv); 1255 1256 /* Capture any error message from apr_getopt_long(). This will typically 1257 * say which option is wrong, which we would not otherwise know. */ 1258 os->errfn = opt_parsing_error_func; 1259 os->errarg = &opt_parsing_error_baton; 1260 1261 while (1) 1262 { 1263 const char *opt_arg; 1264 int opt_id; 1265 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg); 1266 1267 if (APR_STATUS_IS_EOF(err)) 1268 break; 1269 if (err) 1270 /* Wrap apr_getopt_long()'s error message. Its doc string implies 1271 * it always will produce one, but never mind if it doesn't. Avoid 1272 * using the message associated with the return code ERR, because 1273 * it refers to the "command line" which may be misleading here. */ 1274 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, 1275 opt_parsing_error_baton.err, 1276 _("Error in options to internal diff")); 1277 1278 switch (opt_id) 1279 { 1280 case 'b': 1281 /* -w takes precedence over -b. */ 1282 if (! options->ignore_space) 1283 options->ignore_space = svn_diff_file_ignore_space_change; 1284 break; 1285 case 'w': 1286 options->ignore_space = svn_diff_file_ignore_space_all; 1287 break; 1288 case SVN_DIFF__OPT_IGNORE_EOL_STYLE: 1289 options->ignore_eol_style = TRUE; 1290 break; 1291 case 'p': 1292 options->show_c_function = TRUE; 1293 break; 1294 default: 1295 break; 1296 } 1297 } 1298 1299 /* Check for spurious arguments. */ 1300 if (os->ind < os->argc) 1301 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL, 1302 _("Invalid argument '%s' in diff options"), 1303 os->argv[os->ind]); 1304 1305 return SVN_NO_ERROR; 1306} 1307 1308svn_error_t * 1309svn_diff_file_diff_2(svn_diff_t **diff, 1310 const char *original, 1311 const char *modified, 1312 const svn_diff_file_options_t *options, 1313 apr_pool_t *pool) 1314{ 1315 svn_diff__file_baton_t baton = { 0 }; 1316 1317 baton.options = options; 1318 baton.files[0].path = original; 1319 baton.files[1].path = modified; 1320 baton.pool = svn_pool_create(pool); 1321 1322 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool)); 1323 1324 svn_pool_destroy(baton.pool); 1325 return SVN_NO_ERROR; 1326} 1327 1328svn_error_t * 1329svn_diff_file_diff3_2(svn_diff_t **diff, 1330 const char *original, 1331 const char *modified, 1332 const char *latest, 1333 const svn_diff_file_options_t *options, 1334 apr_pool_t *pool) 1335{ 1336 svn_diff__file_baton_t baton = { 0 }; 1337 1338 baton.options = options; 1339 baton.files[0].path = original; 1340 baton.files[1].path = modified; 1341 baton.files[2].path = latest; 1342 baton.pool = svn_pool_create(pool); 1343 1344 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool)); 1345 1346 svn_pool_destroy(baton.pool); 1347 return SVN_NO_ERROR; 1348} 1349 1350svn_error_t * 1351svn_diff_file_diff4_2(svn_diff_t **diff, 1352 const char *original, 1353 const char *modified, 1354 const char *latest, 1355 const char *ancestor, 1356 const svn_diff_file_options_t *options, 1357 apr_pool_t *pool) 1358{ 1359 svn_diff__file_baton_t baton = { 0 }; 1360 1361 baton.options = options; 1362 baton.files[0].path = original; 1363 baton.files[1].path = modified; 1364 baton.files[2].path = latest; 1365 baton.files[3].path = ancestor; 1366 baton.pool = svn_pool_create(pool); 1367 1368 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool)); 1369 1370 svn_pool_destroy(baton.pool); 1371 return SVN_NO_ERROR; 1372} 1373 1374 1375/** Display unified context diffs **/ 1376 1377/* Maximum length of the extra context to show when show_c_function is set. 1378 * GNU diff uses 40, let's be brave and use 50 instead. */ 1379#define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50 1380typedef struct svn_diff__file_output_baton_t 1381{ 1382 svn_stream_t *output_stream; 1383 const char *header_encoding; 1384 1385 /* Cached markers, in header_encoding. */ 1386 const char *context_str; 1387 const char *delete_str; 1388 const char *insert_str; 1389 1390 const char *path[2]; 1391 apr_file_t *file[2]; 1392 1393 apr_off_t current_line[2]; 1394 1395 char buffer[2][4096]; 1396 apr_size_t length[2]; 1397 char *curp[2]; 1398 1399 apr_off_t hunk_start[2]; 1400 apr_off_t hunk_length[2]; 1401 svn_stringbuf_t *hunk; 1402 1403 /* Should we emit C functions in the unified diff header */ 1404 svn_boolean_t show_c_function; 1405 /* Extra strings to skip over if we match. */ 1406 apr_array_header_t *extra_skip_match; 1407 /* "Context" to append to the @@ line when the show_c_function option 1408 * is set. */ 1409 svn_stringbuf_t *extra_context; 1410 /* Extra context for the current hunk. */ 1411 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1]; 1412 1413 apr_pool_t *pool; 1414} svn_diff__file_output_baton_t; 1415 1416typedef enum svn_diff__file_output_unified_type_e 1417{ 1418 svn_diff__file_output_unified_skip, 1419 svn_diff__file_output_unified_context, 1420 svn_diff__file_output_unified_delete, 1421 svn_diff__file_output_unified_insert 1422} svn_diff__file_output_unified_type_e; 1423 1424 1425static svn_error_t * 1426output_unified_line(svn_diff__file_output_baton_t *baton, 1427 svn_diff__file_output_unified_type_e type, int idx) 1428{ 1429 char *curp; 1430 char *eol; 1431 apr_size_t length; 1432 svn_error_t *err; 1433 svn_boolean_t bytes_processed = FALSE; 1434 svn_boolean_t had_cr = FALSE; 1435 /* Are we collecting extra context? */ 1436 svn_boolean_t collect_extra = FALSE; 1437 1438 length = baton->length[idx]; 1439 curp = baton->curp[idx]; 1440 1441 /* Lazily update the current line even if we're at EOF. 1442 * This way we fake output of context at EOF 1443 */ 1444 baton->current_line[idx]++; 1445 1446 if (length == 0 && apr_file_eof(baton->file[idx])) 1447 { 1448 return SVN_NO_ERROR; 1449 } 1450 1451 do 1452 { 1453 if (length > 0) 1454 { 1455 if (!bytes_processed) 1456 { 1457 switch (type) 1458 { 1459 case svn_diff__file_output_unified_context: 1460 svn_stringbuf_appendcstr(baton->hunk, baton->context_str); 1461 baton->hunk_length[0]++; 1462 baton->hunk_length[1]++; 1463 break; 1464 case svn_diff__file_output_unified_delete: 1465 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str); 1466 baton->hunk_length[0]++; 1467 break; 1468 case svn_diff__file_output_unified_insert: 1469 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str); 1470 baton->hunk_length[1]++; 1471 break; 1472 default: 1473 break; 1474 } 1475 1476 if (baton->show_c_function 1477 && (type == svn_diff__file_output_unified_skip 1478 || type == svn_diff__file_output_unified_context) 1479 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_') 1480 && !svn_cstring_match_glob_list(curp, 1481 baton->extra_skip_match)) 1482 { 1483 svn_stringbuf_setempty(baton->extra_context); 1484 collect_extra = TRUE; 1485 } 1486 } 1487 1488 eol = svn_eol__find_eol_start(curp, length); 1489 1490 if (eol != NULL) 1491 { 1492 apr_size_t len; 1493 1494 had_cr = (*eol == '\r'); 1495 eol++; 1496 len = (apr_size_t)(eol - curp); 1497 1498 if (! had_cr || len < length) 1499 { 1500 if (had_cr && *eol == '\n') 1501 { 1502 ++eol; 1503 ++len; 1504 } 1505 1506 length -= len; 1507 1508 if (type != svn_diff__file_output_unified_skip) 1509 { 1510 svn_stringbuf_appendbytes(baton->hunk, curp, len); 1511 } 1512 if (collect_extra) 1513 { 1514 svn_stringbuf_appendbytes(baton->extra_context, 1515 curp, len); 1516 } 1517 1518 baton->curp[idx] = eol; 1519 baton->length[idx] = length; 1520 1521 err = SVN_NO_ERROR; 1522 1523 break; 1524 } 1525 } 1526 1527 if (type != svn_diff__file_output_unified_skip) 1528 { 1529 svn_stringbuf_appendbytes(baton->hunk, curp, length); 1530 } 1531 1532 if (collect_extra) 1533 { 1534 svn_stringbuf_appendbytes(baton->extra_context, curp, length); 1535 } 1536 1537 bytes_processed = TRUE; 1538 } 1539 1540 curp = baton->buffer[idx]; 1541 length = sizeof(baton->buffer[idx]); 1542 1543 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool); 1544 1545 /* If the last chunk ended with a CR, we look for an LF at the start 1546 of this chunk. */ 1547 if (had_cr) 1548 { 1549 if (! err && length > 0 && *curp == '\n') 1550 { 1551 if (type != svn_diff__file_output_unified_skip) 1552 { 1553 svn_stringbuf_appendbyte(baton->hunk, *curp); 1554 } 1555 /* We don't append the LF to extra_context, since it would 1556 * just be stripped anyway. */ 1557 ++curp; 1558 --length; 1559 } 1560 1561 baton->curp[idx] = curp; 1562 baton->length[idx] = length; 1563 1564 break; 1565 } 1566 } 1567 while (! err); 1568 1569 if (err && ! APR_STATUS_IS_EOF(err->apr_err)) 1570 return err; 1571 1572 if (err && APR_STATUS_IS_EOF(err->apr_err)) 1573 { 1574 svn_error_clear(err); 1575 /* Special case if we reach the end of file AND the last line is in the 1576 changed range AND the file doesn't end with a newline */ 1577 if (bytes_processed && (type != svn_diff__file_output_unified_skip) 1578 && ! had_cr) 1579 { 1580 SVN_ERR(svn_diff__unified_append_no_newline_msg( 1581 baton->hunk, baton->header_encoding, baton->pool)); 1582 } 1583 1584 baton->length[idx] = 0; 1585 } 1586 1587 return SVN_NO_ERROR; 1588} 1589 1590static APR_INLINE svn_error_t * 1591output_unified_diff_range(svn_diff__file_output_baton_t *output_baton, 1592 int source, 1593 svn_diff__file_output_unified_type_e type, 1594 apr_off_t until) 1595{ 1596 while (output_baton->current_line[source] < until) 1597 { 1598 SVN_ERR(output_unified_line(output_baton, type, source)); 1599 } 1600 return SVN_NO_ERROR; 1601} 1602 1603static svn_error_t * 1604output_unified_flush_hunk(svn_diff__file_output_baton_t *baton) 1605{ 1606 apr_off_t target_line; 1607 apr_size_t hunk_len; 1608 apr_off_t old_start; 1609 apr_off_t new_start; 1610 1611 if (svn_stringbuf_isempty(baton->hunk)) 1612 { 1613 /* Nothing to flush */ 1614 return SVN_NO_ERROR; 1615 } 1616 1617 target_line = baton->hunk_start[0] + baton->hunk_length[0] 1618 + SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1619 1620 /* Add trailing context to the hunk */ 1621 SVN_ERR(output_unified_diff_range(baton, 0 /* original */, 1622 svn_diff__file_output_unified_context, 1623 target_line)); 1624 1625 old_start = baton->hunk_start[0]; 1626 new_start = baton->hunk_start[1]; 1627 1628 /* If the file is non-empty, convert the line indexes from 1629 zero based to one based */ 1630 if (baton->hunk_length[0]) 1631 old_start++; 1632 if (baton->hunk_length[1]) 1633 new_start++; 1634 1635 /* Write the hunk header */ 1636 SVN_ERR(svn_diff__unified_write_hunk_header( 1637 baton->output_stream, baton->header_encoding, "@@", 1638 old_start, baton->hunk_length[0], 1639 new_start, baton->hunk_length[1], 1640 baton->hunk_extra_context, 1641 baton->pool)); 1642 1643 /* Output the hunk content */ 1644 hunk_len = baton->hunk->len; 1645 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data, 1646 &hunk_len)); 1647 1648 /* Prepare for the next hunk */ 1649 baton->hunk_length[0] = 0; 1650 baton->hunk_length[1] = 0; 1651 baton->hunk_start[0] = 0; 1652 baton->hunk_start[1] = 0; 1653 svn_stringbuf_setempty(baton->hunk); 1654 1655 return SVN_NO_ERROR; 1656} 1657 1658static svn_error_t * 1659output_unified_diff_modified(void *baton, 1660 apr_off_t original_start, apr_off_t original_length, 1661 apr_off_t modified_start, apr_off_t modified_length, 1662 apr_off_t latest_start, apr_off_t latest_length) 1663{ 1664 svn_diff__file_output_baton_t *output_baton = baton; 1665 apr_off_t context_prefix_length; 1666 apr_off_t prev_context_end; 1667 svn_boolean_t init_hunk = FALSE; 1668 1669 if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE) 1670 context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1671 else 1672 context_prefix_length = original_start; 1673 1674 /* Calculate where the previous hunk will end if we would write it now 1675 (including the necessary context at the end) */ 1676 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0) 1677 { 1678 prev_context_end = output_baton->hunk_start[0] 1679 + output_baton->hunk_length[0] 1680 + SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1681 } 1682 else 1683 { 1684 prev_context_end = -1; 1685 1686 if (output_baton->hunk_start[0] == 0 1687 && (original_length > 0 || modified_length > 0)) 1688 init_hunk = TRUE; 1689 } 1690 1691 /* If the changed range is far enough from the previous range, flush the current 1692 hunk. */ 1693 { 1694 apr_off_t new_hunk_start = (original_start - context_prefix_length); 1695 1696 if (output_baton->current_line[0] < new_hunk_start 1697 && prev_context_end <= new_hunk_start) 1698 { 1699 SVN_ERR(output_unified_flush_hunk(output_baton)); 1700 init_hunk = TRUE; 1701 } 1702 else if (output_baton->hunk_length[0] > 0 1703 || output_baton->hunk_length[1] > 0) 1704 { 1705 /* We extend the current hunk */ 1706 1707 1708 /* Original: Output the context preceding the changed range */ 1709 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1710 svn_diff__file_output_unified_context, 1711 original_start)); 1712 } 1713 } 1714 1715 /* Original: Skip lines until we are at the beginning of the context we want 1716 to display */ 1717 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1718 svn_diff__file_output_unified_skip, 1719 original_start - context_prefix_length)); 1720 1721 /* Note that the above skip stores data for the show_c_function support below */ 1722 1723 if (init_hunk) 1724 { 1725 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0 1726 && output_baton->hunk_length[1] == 0); 1727 1728 output_baton->hunk_start[0] = original_start - context_prefix_length; 1729 output_baton->hunk_start[1] = modified_start - context_prefix_length; 1730 } 1731 1732 if (init_hunk && output_baton->show_c_function) 1733 { 1734 apr_size_t p; 1735 const char *invalid_character; 1736 1737 /* Save the extra context for later use. 1738 * Note that the last byte of the hunk_extra_context array is never 1739 * touched after it is zero-initialized, so the array is always 1740 * 0-terminated. */ 1741 strncpy(output_baton->hunk_extra_context, 1742 output_baton->extra_context->data, 1743 SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1744 /* Trim whitespace at the end, most notably to get rid of any 1745 * newline characters. */ 1746 p = strlen(output_baton->hunk_extra_context); 1747 while (p > 0 1748 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1])) 1749 { 1750 output_baton->hunk_extra_context[--p] = '\0'; 1751 } 1752 invalid_character = 1753 svn_utf__last_valid(output_baton->hunk_extra_context, 1754 SVN_DIFF__EXTRA_CONTEXT_LENGTH); 1755 for (p = invalid_character - output_baton->hunk_extra_context; 1756 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++) 1757 { 1758 output_baton->hunk_extra_context[p] = '\0'; 1759 } 1760 } 1761 1762 /* Modified: Skip lines until we are at the start of the changed range */ 1763 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1764 svn_diff__file_output_unified_skip, 1765 modified_start)); 1766 1767 /* Original: Output the context preceding the changed range */ 1768 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1769 svn_diff__file_output_unified_context, 1770 original_start)); 1771 1772 /* Both: Output the changed range */ 1773 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */, 1774 svn_diff__file_output_unified_delete, 1775 original_start + original_length)); 1776 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */, 1777 svn_diff__file_output_unified_insert, 1778 modified_start + modified_length)); 1779 1780 return SVN_NO_ERROR; 1781} 1782 1783/* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */ 1784static svn_error_t * 1785output_unified_default_hdr(const char **header, const char *path, 1786 apr_pool_t *pool) 1787{ 1788 apr_finfo_t file_info; 1789 apr_time_exp_t exploded_time; 1790 char time_buffer[64]; 1791 apr_size_t time_len; 1792 const char *utf8_timestr; 1793 1794 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool)); 1795 apr_time_exp_lt(&exploded_time, file_info.mtime); 1796 1797 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1, 1798 /* Order of date components can be different in different languages */ 1799 _("%a %b %e %H:%M:%S %Y"), &exploded_time); 1800 1801 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool)); 1802 1803 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr); 1804 1805 return SVN_NO_ERROR; 1806} 1807 1808static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable = 1809{ 1810 NULL, /* output_common */ 1811 output_unified_diff_modified, 1812 NULL, /* output_diff_latest */ 1813 NULL, /* output_diff_common */ 1814 NULL /* output_conflict */ 1815}; 1816 1817svn_error_t * 1818svn_diff_file_output_unified3(svn_stream_t *output_stream, 1819 svn_diff_t *diff, 1820 const char *original_path, 1821 const char *modified_path, 1822 const char *original_header, 1823 const char *modified_header, 1824 const char *header_encoding, 1825 const char *relative_to_dir, 1826 svn_boolean_t show_c_function, 1827 apr_pool_t *pool) 1828{ 1829 if (svn_diff_contains_diffs(diff)) 1830 { 1831 svn_diff__file_output_baton_t baton; 1832 int i; 1833 1834 memset(&baton, 0, sizeof(baton)); 1835 baton.output_stream = output_stream; 1836 baton.pool = pool; 1837 baton.header_encoding = header_encoding; 1838 baton.path[0] = original_path; 1839 baton.path[1] = modified_path; 1840 baton.hunk = svn_stringbuf_create_empty(pool); 1841 baton.show_c_function = show_c_function; 1842 baton.extra_context = svn_stringbuf_create_empty(pool); 1843 1844 if (show_c_function) 1845 { 1846 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **)); 1847 1848 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*"; 1849 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*"; 1850 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*"; 1851 } 1852 1853 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ", 1854 header_encoding, pool)); 1855 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-", 1856 header_encoding, pool)); 1857 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+", 1858 header_encoding, pool)); 1859 1860 if (relative_to_dir) 1861 { 1862 /* Possibly adjust the "original" and "modified" paths shown in 1863 the output (see issue #2723). */ 1864 const char *child_path; 1865 1866 if (! original_header) 1867 { 1868 child_path = svn_dirent_is_child(relative_to_dir, 1869 original_path, pool); 1870 if (child_path) 1871 original_path = child_path; 1872 else 1873 return svn_error_createf( 1874 SVN_ERR_BAD_RELATIVE_PATH, NULL, 1875 _("Path '%s' must be inside " 1876 "the directory '%s'"), 1877 svn_dirent_local_style(original_path, pool), 1878 svn_dirent_local_style(relative_to_dir, 1879 pool)); 1880 } 1881 1882 if (! modified_header) 1883 { 1884 child_path = svn_dirent_is_child(relative_to_dir, 1885 modified_path, pool); 1886 if (child_path) 1887 modified_path = child_path; 1888 else 1889 return svn_error_createf( 1890 SVN_ERR_BAD_RELATIVE_PATH, NULL, 1891 _("Path '%s' must be inside " 1892 "the directory '%s'"), 1893 svn_dirent_local_style(modified_path, pool), 1894 svn_dirent_local_style(relative_to_dir, 1895 pool)); 1896 } 1897 } 1898 1899 for (i = 0; i < 2; i++) 1900 { 1901 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i], 1902 APR_READ, APR_OS_DEFAULT, pool)); 1903 } 1904 1905 if (original_header == NULL) 1906 { 1907 SVN_ERR(output_unified_default_hdr(&original_header, original_path, 1908 pool)); 1909 } 1910 1911 if (modified_header == NULL) 1912 { 1913 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path, 1914 pool)); 1915 } 1916 1917 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding, 1918 original_header, modified_header, 1919 pool)); 1920 1921 SVN_ERR(svn_diff_output(diff, &baton, 1922 &svn_diff__file_output_unified_vtable)); 1923 SVN_ERR(output_unified_flush_hunk(&baton)); 1924 1925 for (i = 0; i < 2; i++) 1926 { 1927 SVN_ERR(svn_io_file_close(baton.file[i], pool)); 1928 } 1929 } 1930 1931 return SVN_NO_ERROR; 1932} 1933 1934 1935/** Display diff3 **/ 1936 1937/* A stream to remember *leading* context. Note that this stream does 1938 *not* copy the data that it is remembering; it just saves 1939 *pointers! */ 1940typedef struct context_saver_t { 1941 svn_stream_t *stream; 1942 const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE]; 1943 apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE]; 1944 apr_size_t next_slot; 1945 apr_size_t total_written; 1946} context_saver_t; 1947 1948 1949static svn_error_t * 1950context_saver_stream_write(void *baton, 1951 const char *data, 1952 apr_size_t *len) 1953{ 1954 context_saver_t *cs = baton; 1955 cs->data[cs->next_slot] = data; 1956 cs->len[cs->next_slot] = *len; 1957 cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE; 1958 cs->total_written++; 1959 return SVN_NO_ERROR; 1960} 1961 1962typedef struct svn_diff3__file_output_baton_t 1963{ 1964 svn_stream_t *output_stream; 1965 1966 const char *path[3]; 1967 1968 apr_off_t current_line[3]; 1969 1970 char *buffer[3]; 1971 char *endp[3]; 1972 char *curp[3]; 1973 1974 /* The following four members are in the encoding used for the output. */ 1975 const char *conflict_modified; 1976 const char *conflict_original; 1977 const char *conflict_separator; 1978 const char *conflict_latest; 1979 1980 const char *marker_eol; 1981 1982 svn_diff_conflict_display_style_t conflict_style; 1983 1984 /* The rest of the fields are for 1985 svn_diff_conflict_display_only_conflicts only. Note that for 1986 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or 1987 (soon after a conflict) a "trailing context stream", never the 1988 actual output stream.*/ 1989 /* The actual output stream. */ 1990 svn_stream_t *real_output_stream; 1991 context_saver_t *context_saver; 1992 /* Used to allocate context_saver and trailing context streams, and 1993 for some printfs. */ 1994 apr_pool_t *pool; 1995} svn_diff3__file_output_baton_t; 1996 1997static svn_error_t * 1998flush_context_saver(context_saver_t *cs, 1999 svn_stream_t *output_stream) 2000{ 2001 int i; 2002 for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++) 2003 { 2004 apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE; 2005 if (cs->data[slot]) 2006 { 2007 apr_size_t len = cs->len[slot]; 2008 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len)); 2009 } 2010 } 2011 return SVN_NO_ERROR; 2012} 2013 2014static void 2015make_context_saver(svn_diff3__file_output_baton_t *fob) 2016{ 2017 context_saver_t *cs; 2018 2019 svn_pool_clear(fob->pool); 2020 cs = apr_pcalloc(fob->pool, sizeof(*cs)); 2021 cs->stream = svn_stream_empty(fob->pool); 2022 svn_stream_set_baton(cs->stream, cs); 2023 svn_stream_set_write(cs->stream, context_saver_stream_write); 2024 fob->context_saver = cs; 2025 fob->output_stream = cs->stream; 2026} 2027 2028 2029/* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to 2030 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to 2031 a context_saver; used for *trailing* context. */ 2032 2033struct trailing_context_printer { 2034 apr_size_t lines_to_print; 2035 svn_diff3__file_output_baton_t *fob; 2036}; 2037 2038 2039 2040static svn_error_t * 2041trailing_context_printer_write(void *baton, 2042 const char *data, 2043 apr_size_t *len) 2044{ 2045 struct trailing_context_printer *tcp = baton; 2046 SVN_ERR_ASSERT(tcp->lines_to_print > 0); 2047 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len)); 2048 tcp->lines_to_print--; 2049 if (tcp->lines_to_print == 0) 2050 make_context_saver(tcp->fob); 2051 return SVN_NO_ERROR; 2052} 2053 2054 2055static void 2056make_trailing_context_printer(svn_diff3__file_output_baton_t *btn) 2057{ 2058 struct trailing_context_printer *tcp; 2059 svn_stream_t *s; 2060 2061 svn_pool_clear(btn->pool); 2062 2063 tcp = apr_pcalloc(btn->pool, sizeof(*tcp)); 2064 tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE; 2065 tcp->fob = btn; 2066 s = svn_stream_empty(btn->pool); 2067 svn_stream_set_baton(s, tcp); 2068 svn_stream_set_write(s, trailing_context_printer_write); 2069 btn->output_stream = s; 2070} 2071 2072 2073 2074typedef enum svn_diff3__file_output_type_e 2075{ 2076 svn_diff3__file_output_skip, 2077 svn_diff3__file_output_normal 2078} svn_diff3__file_output_type_e; 2079 2080 2081static svn_error_t * 2082output_line(svn_diff3__file_output_baton_t *baton, 2083 svn_diff3__file_output_type_e type, int idx) 2084{ 2085 char *curp; 2086 char *endp; 2087 char *eol; 2088 apr_size_t len; 2089 2090 curp = baton->curp[idx]; 2091 endp = baton->endp[idx]; 2092 2093 /* Lazily update the current line even if we're at EOF. 2094 */ 2095 baton->current_line[idx]++; 2096 2097 if (curp == endp) 2098 return SVN_NO_ERROR; 2099 2100 eol = svn_eol__find_eol_start(curp, endp - curp); 2101 if (!eol) 2102 eol = endp; 2103 else 2104 { 2105 svn_boolean_t had_cr = (*eol == '\r'); 2106 eol++; 2107 if (had_cr && eol != endp && *eol == '\n') 2108 eol++; 2109 } 2110 2111 if (type != svn_diff3__file_output_skip) 2112 { 2113 len = eol - curp; 2114 /* Note that the trailing context printer assumes that 2115 svn_stream_write is called exactly once per line. */ 2116 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len)); 2117 } 2118 2119 baton->curp[idx] = eol; 2120 2121 return SVN_NO_ERROR; 2122} 2123 2124static svn_error_t * 2125output_marker_eol(svn_diff3__file_output_baton_t *btn) 2126{ 2127 return svn_stream_puts(btn->output_stream, btn->marker_eol); 2128} 2129 2130static svn_error_t * 2131output_hunk(void *baton, int idx, apr_off_t target_line, 2132 apr_off_t target_length) 2133{ 2134 svn_diff3__file_output_baton_t *output_baton = baton; 2135 2136 /* Skip lines until we are at the start of the changed range */ 2137 while (output_baton->current_line[idx] < target_line) 2138 { 2139 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx)); 2140 } 2141 2142 target_line += target_length; 2143 2144 while (output_baton->current_line[idx] < target_line) 2145 { 2146 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx)); 2147 } 2148 2149 return SVN_NO_ERROR; 2150} 2151 2152static svn_error_t * 2153output_common(void *baton, apr_off_t original_start, apr_off_t original_length, 2154 apr_off_t modified_start, apr_off_t modified_length, 2155 apr_off_t latest_start, apr_off_t latest_length) 2156{ 2157 return output_hunk(baton, 1, modified_start, modified_length); 2158} 2159 2160static svn_error_t * 2161output_diff_modified(void *baton, 2162 apr_off_t original_start, apr_off_t original_length, 2163 apr_off_t modified_start, apr_off_t modified_length, 2164 apr_off_t latest_start, apr_off_t latest_length) 2165{ 2166 return output_hunk(baton, 1, modified_start, modified_length); 2167} 2168 2169static svn_error_t * 2170output_diff_latest(void *baton, 2171 apr_off_t original_start, apr_off_t original_length, 2172 apr_off_t modified_start, apr_off_t modified_length, 2173 apr_off_t latest_start, apr_off_t latest_length) 2174{ 2175 return output_hunk(baton, 2, latest_start, latest_length); 2176} 2177 2178static svn_error_t * 2179output_conflict(void *baton, 2180 apr_off_t original_start, apr_off_t original_length, 2181 apr_off_t modified_start, apr_off_t modified_length, 2182 apr_off_t latest_start, apr_off_t latest_length, 2183 svn_diff_t *diff); 2184 2185static const svn_diff_output_fns_t svn_diff3__file_output_vtable = 2186{ 2187 output_common, 2188 output_diff_modified, 2189 output_diff_latest, 2190 output_diff_modified, /* output_diff_common */ 2191 output_conflict 2192}; 2193 2194 2195 2196static svn_error_t * 2197output_conflict_with_context(svn_diff3__file_output_baton_t *btn, 2198 apr_off_t original_start, 2199 apr_off_t original_length, 2200 apr_off_t modified_start, 2201 apr_off_t modified_length, 2202 apr_off_t latest_start, 2203 apr_off_t latest_length) 2204{ 2205 /* Are we currently saving starting context (as opposed to printing 2206 trailing context)? If so, flush it. */ 2207 if (btn->output_stream == btn->context_saver->stream) 2208 { 2209 if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE) 2210 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n")); 2211 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream)); 2212 } 2213 2214 /* Print to the real output stream. */ 2215 btn->output_stream = btn->real_output_stream; 2216 2217 /* Output the conflict itself. */ 2218 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2219 (modified_length == 1 2220 ? "%s (%" APR_OFF_T_FMT ")" 2221 : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), 2222 btn->conflict_modified, 2223 modified_start + 1, modified_length)); 2224 SVN_ERR(output_marker_eol(btn)); 2225 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length)); 2226 2227 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2228 (original_length == 1 2229 ? "%s (%" APR_OFF_T_FMT ")" 2230 : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), 2231 btn->conflict_original, 2232 original_start + 1, original_length)); 2233 SVN_ERR(output_marker_eol(btn)); 2234 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length)); 2235 2236 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2237 "%s%s", btn->conflict_separator, btn->marker_eol)); 2238 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length)); 2239 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool, 2240 (latest_length == 1 2241 ? "%s (%" APR_OFF_T_FMT ")" 2242 : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"), 2243 btn->conflict_latest, 2244 latest_start + 1, latest_length)); 2245 SVN_ERR(output_marker_eol(btn)); 2246 2247 /* Go into print-trailing-context mode instead. */ 2248 make_trailing_context_printer(btn); 2249 2250 return SVN_NO_ERROR; 2251} 2252 2253 2254static svn_error_t * 2255output_conflict(void *baton, 2256 apr_off_t original_start, apr_off_t original_length, 2257 apr_off_t modified_start, apr_off_t modified_length, 2258 apr_off_t latest_start, apr_off_t latest_length, 2259 svn_diff_t *diff) 2260{ 2261 svn_diff3__file_output_baton_t *file_baton = baton; 2262 2263 svn_diff_conflict_display_style_t style = file_baton->conflict_style; 2264 2265 if (style == svn_diff_conflict_display_only_conflicts) 2266 return output_conflict_with_context(file_baton, 2267 original_start, original_length, 2268 modified_start, modified_length, 2269 latest_start, latest_length); 2270 2271 if (style == svn_diff_conflict_display_resolved_modified_latest) 2272 { 2273 if (diff) 2274 return svn_diff_output(diff, baton, 2275 &svn_diff3__file_output_vtable); 2276 else 2277 style = svn_diff_conflict_display_modified_latest; 2278 } 2279 2280 if (style == svn_diff_conflict_display_modified_latest || 2281 style == svn_diff_conflict_display_modified_original_latest) 2282 { 2283 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2284 file_baton->conflict_modified)); 2285 SVN_ERR(output_marker_eol(file_baton)); 2286 2287 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2288 2289 if (style == svn_diff_conflict_display_modified_original_latest) 2290 { 2291 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2292 file_baton->conflict_original)); 2293 SVN_ERR(output_marker_eol(file_baton)); 2294 SVN_ERR(output_hunk(baton, 0, original_start, original_length)); 2295 } 2296 2297 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2298 file_baton->conflict_separator)); 2299 SVN_ERR(output_marker_eol(file_baton)); 2300 2301 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2302 2303 SVN_ERR(svn_stream_puts(file_baton->output_stream, 2304 file_baton->conflict_latest)); 2305 SVN_ERR(output_marker_eol(file_baton)); 2306 } 2307 else if (style == svn_diff_conflict_display_modified) 2308 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length)); 2309 else if (style == svn_diff_conflict_display_latest) 2310 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length)); 2311 else /* unknown style */ 2312 SVN_ERR_MALFUNCTION(); 2313 2314 return SVN_NO_ERROR; 2315} 2316 2317svn_error_t * 2318svn_diff_file_output_merge2(svn_stream_t *output_stream, 2319 svn_diff_t *diff, 2320 const char *original_path, 2321 const char *modified_path, 2322 const char *latest_path, 2323 const char *conflict_original, 2324 const char *conflict_modified, 2325 const char *conflict_latest, 2326 const char *conflict_separator, 2327 svn_diff_conflict_display_style_t style, 2328 apr_pool_t *pool) 2329{ 2330 svn_diff3__file_output_baton_t baton; 2331 apr_file_t *file[3]; 2332 int idx; 2333#if APR_HAS_MMAP 2334 apr_mmap_t *mm[3] = { 0 }; 2335#endif /* APR_HAS_MMAP */ 2336 const char *eol; 2337 svn_boolean_t conflicts_only = 2338 (style == svn_diff_conflict_display_only_conflicts); 2339 2340 memset(&baton, 0, sizeof(baton)); 2341 if (conflicts_only) 2342 { 2343 baton.pool = svn_pool_create(pool); 2344 make_context_saver(&baton); 2345 baton.real_output_stream = output_stream; 2346 } 2347 else 2348 baton.output_stream = output_stream; 2349 baton.path[0] = original_path; 2350 baton.path[1] = modified_path; 2351 baton.path[2] = latest_path; 2352 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified, 2353 conflict_modified ? conflict_modified 2354 : apr_psprintf(pool, "<<<<<<< %s", 2355 modified_path), 2356 pool)); 2357 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original, 2358 conflict_original ? conflict_original 2359 : apr_psprintf(pool, "||||||| %s", 2360 original_path), 2361 pool)); 2362 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator, 2363 conflict_separator ? conflict_separator 2364 : "=======", pool)); 2365 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest, 2366 conflict_latest ? conflict_latest 2367 : apr_psprintf(pool, ">>>>>>> %s", 2368 latest_path), 2369 pool)); 2370 2371 baton.conflict_style = style; 2372 2373 for (idx = 0; idx < 3; idx++) 2374 { 2375 apr_size_t size; 2376 2377 SVN_ERR(map_or_read_file(&file[idx], 2378 MMAP_T_ARG(mm[idx]) 2379 &baton.buffer[idx], &size, 2380 baton.path[idx], pool)); 2381 2382 baton.curp[idx] = baton.buffer[idx]; 2383 baton.endp[idx] = baton.buffer[idx]; 2384 2385 if (baton.endp[idx]) 2386 baton.endp[idx] += size; 2387 } 2388 2389 /* Check what eol marker we should use for conflict markers. 2390 We use the eol marker of the modified file and fall back on the 2391 platform's eol marker if that file doesn't contain any newlines. */ 2392 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1], 2393 NULL); 2394 if (! eol) 2395 eol = APR_EOL_STR; 2396 baton.marker_eol = eol; 2397 2398 SVN_ERR(svn_diff_output(diff, &baton, 2399 &svn_diff3__file_output_vtable)); 2400 2401 for (idx = 0; idx < 3; idx++) 2402 { 2403#if APR_HAS_MMAP 2404 if (mm[idx]) 2405 { 2406 apr_status_t rv = apr_mmap_delete(mm[idx]); 2407 if (rv != APR_SUCCESS) 2408 { 2409 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"), 2410 baton.path[idx]); 2411 } 2412 } 2413#endif /* APR_HAS_MMAP */ 2414 2415 if (file[idx]) 2416 { 2417 SVN_ERR(svn_io_file_close(file[idx], pool)); 2418 } 2419 } 2420 2421 if (conflicts_only) 2422 svn_pool_destroy(baton.pool); 2423 2424 return SVN_NO_ERROR; 2425} 2426 2427