parse-diff.c revision 299742
155682Smarkm/* 2233294Sstas * parse-diff.c: functions for parsing diff files 355682Smarkm * 455682Smarkm * ==================================================================== 555682Smarkm * Licensed to the Apache Software Foundation (ASF) under one 655682Smarkm * or more contributor license agreements. See the NOTICE file 755682Smarkm * distributed with this work for additional information 855682Smarkm * regarding copyright ownership. The ASF licenses this file 955682Smarkm * to you under the Apache License, Version 2.0 (the 1055682Smarkm * "License"); you may not use this file except in compliance 1155682Smarkm * with the License. You may obtain a copy of the License at 1255682Smarkm * 1355682Smarkm * http://www.apache.org/licenses/LICENSE-2.0 1472445Sassar * 1555682Smarkm * Unless required by applicable law or agreed to in writing, 1655682Smarkm * software distributed under the License is distributed on an 1755682Smarkm * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18178825Sdfr * KIND, either express or implied. See the License for the 1955682Smarkm * specific language governing permissions and limitations 2055682Smarkm * under the License. 2155682Smarkm * ==================================================================== 2255682Smarkm */ 2355682Smarkm 2455682Smarkm#include <stdlib.h> 2555682Smarkm#include <stddef.h> 2655682Smarkm#include <string.h> 2755682Smarkm 2855682Smarkm#include "svn_hash.h" 2955682Smarkm#include "svn_types.h" 3055682Smarkm#include "svn_error.h" 3155682Smarkm#include "svn_io.h" 3255682Smarkm#include "svn_pools.h" 3355682Smarkm#include "svn_props.h" 3455682Smarkm#include "svn_string.h" 3555682Smarkm#include "svn_utf.h" 3655682Smarkm#include "svn_dirent_uri.h" 3755682Smarkm#include "svn_diff.h" 3855682Smarkm#include "svn_ctype.h" 3955682Smarkm#include "svn_mergeinfo.h" 4055682Smarkm 4155682Smarkm#include "private/svn_eol_private.h" 4255682Smarkm#include "private/svn_dep_compat.h" 4355682Smarkm#include "private/svn_sorts_private.h" 4455682Smarkm 4555682Smarkm/* Helper macro for readability */ 4655682Smarkm#define starts_with(str, start) \ 4755682Smarkm (strncmp((str), (start), strlen(start)) == 0) 4855682Smarkm 4955682Smarkm/* Like strlen() but for string literals. */ 5055682Smarkm#define STRLEN_LITERAL(str) (sizeof(str) - 1) 5155682Smarkm 5255682Smarkm/* This struct describes a range within a file, as well as the 5355682Smarkm * current cursor position within the range. All numbers are in bytes. */ 5455682Smarkmstruct svn_diff__hunk_range { 5555682Smarkm apr_off_t start; 5655682Smarkm apr_off_t end; 5755682Smarkm apr_off_t current; 5855682Smarkm}; 5955682Smarkm 6055682Smarkmstruct svn_diff_hunk_t { 6155682Smarkm /* The patch this hunk belongs to. */ 6255682Smarkm svn_patch_t *patch; 6355682Smarkm 6455682Smarkm /* APR file handle to the patch file this hunk came from. */ 6572445Sassar apr_file_t *apr_file; 6655682Smarkm 6755682Smarkm /* Ranges used to keep track of this hunk's texts positions within 6855682Smarkm * the patch file. */ 6955682Smarkm struct svn_diff__hunk_range diff_text_range; 7055682Smarkm struct svn_diff__hunk_range original_text_range; 7155682Smarkm struct svn_diff__hunk_range modified_text_range; 7255682Smarkm 7355682Smarkm /* Hunk ranges as they appeared in the patch file. 7455682Smarkm * All numbers are lines, not bytes. */ 7555682Smarkm svn_linenum_t original_start; 7655682Smarkm svn_linenum_t original_length; 77233294Sstas svn_linenum_t modified_start; 7855682Smarkm svn_linenum_t modified_length; 7955682Smarkm 80 /* Number of lines of leading and trailing hunk context. */ 81 svn_linenum_t leading_context; 82 svn_linenum_t trailing_context; 83}; 84 85void 86svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk) 87{ 88 hunk->diff_text_range.current = hunk->diff_text_range.start; 89} 90 91void 92svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk) 93{ 94 if (hunk->patch->reverse) 95 hunk->modified_text_range.current = hunk->modified_text_range.start; 96 else 97 hunk->original_text_range.current = hunk->original_text_range.start; 98} 99 100void 101svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk) 102{ 103 if (hunk->patch->reverse) 104 hunk->original_text_range.current = hunk->original_text_range.start; 105 else 106 hunk->modified_text_range.current = hunk->modified_text_range.start; 107} 108 109svn_linenum_t 110svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk) 111{ 112 return hunk->patch->reverse ? hunk->modified_start : hunk->original_start; 113} 114 115svn_linenum_t 116svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk) 117{ 118 return hunk->patch->reverse ? hunk->modified_length : hunk->original_length; 119} 120 121svn_linenum_t 122svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk) 123{ 124 return hunk->patch->reverse ? hunk->original_start : hunk->modified_start; 125} 126 127svn_linenum_t 128svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk) 129{ 130 return hunk->patch->reverse ? hunk->original_length : hunk->modified_length; 131} 132 133svn_linenum_t 134svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk) 135{ 136 return hunk->leading_context; 137} 138 139svn_linenum_t 140svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk) 141{ 142 return hunk->trailing_context; 143} 144 145/* Try to parse a positive number from a decimal number encoded 146 * in the string NUMBER. Return parsed number in OFFSET, and return 147 * TRUE if parsing was successful. */ 148static svn_boolean_t 149parse_offset(svn_linenum_t *offset, const char *number) 150{ 151 svn_error_t *err; 152 apr_uint64_t val; 153 154 err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10); 155 if (err) 156 { 157 svn_error_clear(err); 158 return FALSE; 159 } 160 161 *offset = (svn_linenum_t)val; 162 163 return TRUE; 164} 165 166/* Try to parse a hunk range specification from the string RANGE. 167 * Return parsed information in *START and *LENGTH, and return TRUE 168 * if the range parsed correctly. Note: This function may modify the 169 * input value RANGE. */ 170static svn_boolean_t 171parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range) 172{ 173 char *comma; 174 175 if (*range == 0) 176 return FALSE; 177 178 comma = strstr(range, ","); 179 if (comma) 180 { 181 if (strlen(comma + 1) > 0) 182 { 183 /* Try to parse the length. */ 184 if (! parse_offset(length, comma + 1)) 185 return FALSE; 186 187 /* Snip off the end of the string, 188 * so we can comfortably parse the line 189 * number the hunk starts at. */ 190 *comma = '\0'; 191 } 192 else 193 /* A comma but no length? */ 194 return FALSE; 195 } 196 else 197 { 198 *length = 1; 199 } 200 201 /* Try to parse the line number the hunk starts at. */ 202 return parse_offset(start, range); 203} 204 205/* Try to parse a hunk header in string HEADER, putting parsed information 206 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the 207 * character string used to delimit the hunk header. 208 * Do all allocations in POOL. */ 209static svn_boolean_t 210parse_hunk_header(const char *header, svn_diff_hunk_t *hunk, 211 const char *atat, apr_pool_t *pool) 212{ 213 const char *p; 214 const char *start; 215 svn_stringbuf_t *range; 216 217 p = header + strlen(atat); 218 if (*p != ' ') 219 /* No. */ 220 return FALSE; 221 p++; 222 if (*p != '-') 223 /* Nah... */ 224 return FALSE; 225 /* OK, this may be worth allocating some memory for... */ 226 range = svn_stringbuf_create_ensure(31, pool); 227 start = ++p; 228 while (*p && *p != ' ') 229 { 230 p++; 231 } 232 233 if (*p != ' ') 234 /* No no no... */ 235 return FALSE; 236 237 svn_stringbuf_appendbytes(range, start, p - start); 238 239 /* Try to parse the first range. */ 240 if (! parse_range(&hunk->original_start, &hunk->original_length, range->data)) 241 return FALSE; 242 243 /* Clear the stringbuf so we can reuse it for the second range. */ 244 svn_stringbuf_setempty(range); 245 p++; 246 if (*p != '+') 247 /* Eeek! */ 248 return FALSE; 249 /* OK, this may be worth copying... */ 250 start = ++p; 251 while (*p && *p != ' ') 252 { 253 p++; 254 } 255 if (*p != ' ') 256 /* No no no... */ 257 return FALSE; 258 259 svn_stringbuf_appendbytes(range, start, p - start); 260 261 /* Check for trailing @@ */ 262 p++; 263 if (! starts_with(p, atat)) 264 return FALSE; 265 266 /* There may be stuff like C-function names after the trailing @@, 267 * but we ignore that. */ 268 269 /* Try to parse the second range. */ 270 if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data)) 271 return FALSE; 272 273 /* Hunk header is good. */ 274 return TRUE; 275} 276 277/* Read a line of original or modified hunk text from the specified 278 * RANGE within FILE. FILE is expected to contain unidiff text. 279 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line, 280 * Any lines commencing with the VERBOTEN character are discarded. 281 * VERBOTEN should be '+' or '-', depending on which form of hunk text 282 * is being read. 283 * 284 * All other parameters are as in svn_diff_hunk_readline_original_text() 285 * and svn_diff_hunk_readline_modified_text(). 286 */ 287static svn_error_t * 288hunk_readline_original_or_modified(apr_file_t *file, 289 struct svn_diff__hunk_range *range, 290 svn_stringbuf_t **stringbuf, 291 const char **eol, 292 svn_boolean_t *eof, 293 char verboten, 294 apr_pool_t *result_pool, 295 apr_pool_t *scratch_pool) 296{ 297 apr_size_t max_len; 298 svn_boolean_t filtered; 299 apr_off_t pos; 300 svn_stringbuf_t *str; 301 302 if (range->current >= range->end) 303 { 304 /* We're past the range. Indicate that no bytes can be read. */ 305 *eof = TRUE; 306 if (eol) 307 *eol = NULL; 308 *stringbuf = svn_stringbuf_create_empty(result_pool); 309 return SVN_NO_ERROR; 310 } 311 312 pos = 0; 313 SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, scratch_pool)); 314 SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool)); 315 do 316 { 317 max_len = range->end - range->current; 318 SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len, 319 result_pool, scratch_pool)); 320 range->current = 0; 321 SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool)); 322 filtered = (str->data[0] == verboten || str->data[0] == '\\'); 323 } 324 while (filtered && ! *eof); 325 326 if (filtered) 327 { 328 /* EOF, return an empty string. */ 329 *stringbuf = svn_stringbuf_create_ensure(0, result_pool); 330 } 331 else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ') 332 { 333 /* Shave off leading unidiff symbols. */ 334 *stringbuf = svn_stringbuf_create(str->data + 1, result_pool); 335 } 336 else 337 { 338 /* Return the line as-is. */ 339 *stringbuf = svn_stringbuf_dup(str, result_pool); 340 } 341 342 SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool)); 343 344 return SVN_NO_ERROR; 345} 346 347svn_error_t * 348svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk, 349 svn_stringbuf_t **stringbuf, 350 const char **eol, 351 svn_boolean_t *eof, 352 apr_pool_t *result_pool, 353 apr_pool_t *scratch_pool) 354{ 355 return svn_error_trace( 356 hunk_readline_original_or_modified(hunk->apr_file, 357 hunk->patch->reverse ? 358 &hunk->modified_text_range : 359 &hunk->original_text_range, 360 stringbuf, eol, eof, 361 hunk->patch->reverse ? '-' : '+', 362 result_pool, scratch_pool)); 363} 364 365svn_error_t * 366svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk, 367 svn_stringbuf_t **stringbuf, 368 const char **eol, 369 svn_boolean_t *eof, 370 apr_pool_t *result_pool, 371 apr_pool_t *scratch_pool) 372{ 373 return svn_error_trace( 374 hunk_readline_original_or_modified(hunk->apr_file, 375 hunk->patch->reverse ? 376 &hunk->original_text_range : 377 &hunk->modified_text_range, 378 stringbuf, eol, eof, 379 hunk->patch->reverse ? '+' : '-', 380 result_pool, scratch_pool)); 381} 382 383svn_error_t * 384svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk, 385 svn_stringbuf_t **stringbuf, 386 const char **eol, 387 svn_boolean_t *eof, 388 apr_pool_t *result_pool, 389 apr_pool_t *scratch_pool) 390{ 391 svn_stringbuf_t *line; 392 apr_size_t max_len; 393 apr_off_t pos; 394 395 if (hunk->diff_text_range.current >= hunk->diff_text_range.end) 396 { 397 /* We're past the range. Indicate that no bytes can be read. */ 398 *eof = TRUE; 399 if (eol) 400 *eol = NULL; 401 *stringbuf = svn_stringbuf_create_empty(result_pool); 402 return SVN_NO_ERROR; 403 } 404 405 pos = 0; 406 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool)); 407 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, 408 &hunk->diff_text_range.current, scratch_pool)); 409 max_len = hunk->diff_text_range.end - hunk->diff_text_range.current; 410 SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len, 411 result_pool, 412 scratch_pool)); 413 hunk->diff_text_range.current = 0; 414 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, 415 &hunk->diff_text_range.current, scratch_pool)); 416 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool)); 417 418 if (hunk->patch->reverse) 419 { 420 if (line->data[0] == '+') 421 line->data[0] = '-'; 422 else if (line->data[0] == '-') 423 line->data[0] = '+'; 424 } 425 426 *stringbuf = line; 427 428 return SVN_NO_ERROR; 429} 430 431/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line. 432 * Allocate *PROP_NAME in RESULT_POOL. 433 * Set *PROP_NAME to NULL if no valid property name was found. */ 434static svn_error_t * 435parse_prop_name(const char **prop_name, const char *header, 436 const char *indicator, apr_pool_t *result_pool) 437{ 438 SVN_ERR(svn_utf_cstring_to_utf8(prop_name, 439 header + strlen(indicator), 440 result_pool)); 441 if (**prop_name == '\0') 442 *prop_name = NULL; 443 else if (! svn_prop_name_is_valid(*prop_name)) 444 { 445 svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool); 446 svn_stringbuf_strip_whitespace(buf); 447 *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL); 448 } 449 450 return SVN_NO_ERROR; 451} 452 453 454/* A helper function to parse svn:mergeinfo diffs. 455 * 456 * These diffs use a special pretty-print format, for instance: 457 * 458 * Added: svn:mergeinfo 459 * ## -0,0 +0,1 ## 460 * Merged /trunk:r2-3 461 * 462 * The hunk header has the following format: 463 * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ## 464 * 465 * At this point, the number of reverse merges has already been 466 * parsed into HUNK->ORIGINAL_LENGTH, and the number of forward 467 * merges has been parsed into HUNK->MODIFIED_LENGTH. 468 * 469 * The header is followed by a list of mergeinfo, one path per line. 470 * This function parses such lines. Lines describing reverse merges 471 * appear first, and then all lines describing forward merges appear. 472 * 473 * Parts of the line are affected by i18n. The words 'Merged' 474 * and 'Reverse-merged' can appear in any language and at any 475 * position within the line. We can only assume that a leading 476 * '/' starts the merge source path, the path is followed by 477 * ":r", which in turn is followed by a mergeinfo revision range, 478 * which is terminated by whitespace or end-of-string. 479 * 480 * If the current line meets the above criteria and we're able 481 * to parse valid mergeinfo from it, the resulting mergeinfo 482 * is added to patch->mergeinfo or patch->reverse_mergeinfo, 483 * and we proceed to the next line. 484 */ 485static svn_error_t * 486parse_mergeinfo(svn_boolean_t *found_mergeinfo, 487 svn_stringbuf_t *line, 488 svn_diff_hunk_t *hunk, 489 svn_patch_t *patch, 490 apr_pool_t *result_pool, 491 apr_pool_t *scratch_pool) 492{ 493 char *slash = strchr(line->data, '/'); 494 char *colon = strrchr(line->data, ':'); 495 496 *found_mergeinfo = FALSE; 497 498 if (slash && colon && colon[1] == 'r' && slash < colon) 499 { 500 svn_stringbuf_t *input; 501 svn_mergeinfo_t mergeinfo = NULL; 502 char *s; 503 svn_error_t *err; 504 505 input = svn_stringbuf_create_ensure(line->len, scratch_pool); 506 507 /* Copy the merge source path + colon */ 508 s = slash; 509 while (s <= colon) 510 { 511 svn_stringbuf_appendbyte(input, *s); 512 s++; 513 } 514 515 /* skip 'r' after colon */ 516 s++; 517 518 /* Copy the revision range. */ 519 while (s < line->data + line->len) 520 { 521 if (svn_ctype_isspace(*s)) 522 break; 523 svn_stringbuf_appendbyte(input, *s); 524 s++; 525 } 526 527 err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool); 528 if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR) 529 { 530 svn_error_clear(err); 531 mergeinfo = NULL; 532 } 533 else 534 SVN_ERR(err); 535 536 if (mergeinfo) 537 { 538 if (hunk->original_length > 0) /* reverse merges */ 539 { 540 if (patch->reverse) 541 { 542 if (patch->mergeinfo == NULL) 543 patch->mergeinfo = mergeinfo; 544 else 545 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo, 546 mergeinfo, 547 result_pool, 548 scratch_pool)); 549 } 550 else 551 { 552 if (patch->reverse_mergeinfo == NULL) 553 patch->reverse_mergeinfo = mergeinfo; 554 else 555 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo, 556 mergeinfo, 557 result_pool, 558 scratch_pool)); 559 } 560 hunk->original_length--; 561 } 562 else if (hunk->modified_length > 0) /* forward merges */ 563 { 564 if (patch->reverse) 565 { 566 if (patch->reverse_mergeinfo == NULL) 567 patch->reverse_mergeinfo = mergeinfo; 568 else 569 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo, 570 mergeinfo, 571 result_pool, 572 scratch_pool)); 573 } 574 else 575 { 576 if (patch->mergeinfo == NULL) 577 patch->mergeinfo = mergeinfo; 578 else 579 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo, 580 mergeinfo, 581 result_pool, 582 scratch_pool)); 583 } 584 hunk->modified_length--; 585 } 586 587 *found_mergeinfo = TRUE; 588 } 589 } 590 591 return SVN_NO_ERROR; 592} 593 594/* Return the next *HUNK from a PATCH in APR_FILE. 595 * If no hunk can be found, set *HUNK to NULL. 596 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK 597 * is the first belonging to a certain property, then PROP_NAME and 598 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be 599 * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be 600 * treated as context lines. Allocate results in RESULT_POOL. 601 * Use SCRATCH_POOL for all other allocations. */ 602static svn_error_t * 603parse_next_hunk(svn_diff_hunk_t **hunk, 604 svn_boolean_t *is_property, 605 const char **prop_name, 606 svn_diff_operation_kind_t *prop_operation, 607 svn_patch_t *patch, 608 apr_file_t *apr_file, 609 svn_boolean_t ignore_whitespace, 610 apr_pool_t *result_pool, 611 apr_pool_t *scratch_pool) 612{ 613 static const char * const minus = "--- "; 614 static const char * const text_atat = "@@"; 615 static const char * const prop_atat = "##"; 616 svn_stringbuf_t *line; 617 svn_boolean_t eof, in_hunk, hunk_seen; 618 apr_off_t pos, last_line; 619 apr_off_t start, end; 620 apr_off_t original_end; 621 apr_off_t modified_end; 622 svn_linenum_t original_lines; 623 svn_linenum_t modified_lines; 624 svn_linenum_t leading_context; 625 svn_linenum_t trailing_context; 626 svn_boolean_t changed_line_seen; 627 enum { 628 noise_line, 629 original_line, 630 modified_line, 631 context_line 632 } last_line_type; 633 apr_pool_t *iterpool; 634 635 *prop_operation = svn_diff_op_unchanged; 636 637 /* We only set this if we have a property hunk header. */ 638 *prop_name = NULL; 639 *is_property = FALSE; 640 641 if (apr_file_eof(apr_file) == APR_EOF) 642 { 643 /* No more hunks here. */ 644 *hunk = NULL; 645 return SVN_NO_ERROR; 646 } 647 648 in_hunk = FALSE; 649 hunk_seen = FALSE; 650 leading_context = 0; 651 trailing_context = 0; 652 changed_line_seen = FALSE; 653 original_end = 0; 654 modified_end = 0; 655 *hunk = apr_pcalloc(result_pool, sizeof(**hunk)); 656 657 /* Get current seek position -- APR has no ftell() :( */ 658 pos = 0; 659 SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool)); 660 661 /* Start out assuming noise. */ 662 last_line_type = noise_line; 663 664 iterpool = svn_pool_create(scratch_pool); 665 do 666 { 667 668 svn_pool_clear(iterpool); 669 670 /* Remember the current line's offset, and read the line. */ 671 last_line = pos; 672 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX, 673 iterpool, iterpool)); 674 675 /* Update line offset for next iteration. */ 676 pos = 0; 677 SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool)); 678 679 /* Lines starting with a backslash indicate a missing EOL: 680 * "\ No newline at end of file" or "end of property". */ 681 if (line->data[0] == '\\') 682 { 683 if (in_hunk) 684 { 685 char eolbuf[2]; 686 apr_size_t len; 687 apr_off_t off; 688 apr_off_t hunk_text_end; 689 690 /* Comment terminates the hunk text and says the hunk text 691 * has no trailing EOL. Snip off trailing EOL which is part 692 * of the patch file but not part of the hunk text. */ 693 off = last_line - 2; 694 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool)); 695 len = sizeof(eolbuf); 696 SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len, 697 &eof, iterpool)); 698 if (eolbuf[0] == '\r' && eolbuf[1] == '\n') 699 hunk_text_end = last_line - 2; 700 else if (eolbuf[1] == '\n' || eolbuf[1] == '\r') 701 hunk_text_end = last_line - 1; 702 else 703 hunk_text_end = last_line; 704 705 if (last_line_type == original_line && original_end == 0) 706 original_end = hunk_text_end; 707 else if (last_line_type == modified_line && modified_end == 0) 708 modified_end = hunk_text_end; 709 else if (last_line_type == context_line) 710 { 711 if (original_end == 0) 712 original_end = hunk_text_end; 713 if (modified_end == 0) 714 modified_end = hunk_text_end; 715 } 716 717 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool)); 718 } 719 720 continue; 721 } 722 723 if (in_hunk && *is_property && *prop_name && 724 strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0) 725 { 726 svn_boolean_t found_mergeinfo; 727 728 SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch, 729 result_pool, iterpool)); 730 if (found_mergeinfo) 731 continue; /* Proceed to the next line in the patch. */ 732 } 733 734 if (in_hunk) 735 { 736 char c; 737 static const char add = '+'; 738 static const char del = '-'; 739 740 if (! hunk_seen) 741 { 742 /* We're reading the first line of the hunk, so the start 743 * of the line just read is the hunk text's byte offset. */ 744 start = last_line; 745 } 746 747 c = line->data[0]; 748 if (original_lines > 0 && modified_lines > 0 && 749 ((c == ' ') 750 /* Tolerate chopped leading spaces on empty lines. */ 751 || (! eof && line->len == 0) 752 /* Maybe tolerate chopped leading spaces on non-empty lines. */ 753 || (ignore_whitespace && c != del && c != add))) 754 { 755 /* It's a "context" line in the hunk. */ 756 hunk_seen = TRUE; 757 original_lines--; 758 modified_lines--; 759 if (changed_line_seen) 760 trailing_context++; 761 else 762 leading_context++; 763 last_line_type = context_line; 764 } 765 else if (original_lines > 0 && c == del) 766 { 767 /* It's a "deleted" line in the hunk. */ 768 hunk_seen = TRUE; 769 changed_line_seen = TRUE; 770 771 /* A hunk may have context in the middle. We only want 772 trailing lines of context. */ 773 if (trailing_context > 0) 774 trailing_context = 0; 775 776 original_lines--; 777 last_line_type = original_line; 778 } 779 else if (modified_lines > 0 && c == add) 780 { 781 /* It's an "added" line in the hunk. */ 782 hunk_seen = TRUE; 783 changed_line_seen = TRUE; 784 785 /* A hunk may have context in the middle. We only want 786 trailing lines of context. */ 787 if (trailing_context > 0) 788 trailing_context = 0; 789 790 modified_lines--; 791 last_line_type = modified_line; 792 } 793 else 794 { 795 if (eof) 796 { 797 /* The hunk ends at EOF. */ 798 end = pos; 799 } 800 else 801 { 802 /* The start of the current line marks the first byte 803 * after the hunk text. */ 804 end = last_line; 805 } 806 807 if (original_end == 0) 808 original_end = end; 809 if (modified_end == 0) 810 modified_end = end; 811 break; /* Hunk was empty or has been read. */ 812 } 813 } 814 else 815 { 816 if (starts_with(line->data, text_atat)) 817 { 818 /* Looks like we have a hunk header, try to rip it apart. */ 819 in_hunk = parse_hunk_header(line->data, *hunk, text_atat, 820 iterpool); 821 if (in_hunk) 822 { 823 original_lines = (*hunk)->original_length; 824 modified_lines = (*hunk)->modified_length; 825 *is_property = FALSE; 826 } 827 } 828 else if (starts_with(line->data, prop_atat)) 829 { 830 /* Looks like we have a property hunk header, try to rip it 831 * apart. */ 832 in_hunk = parse_hunk_header(line->data, *hunk, prop_atat, 833 iterpool); 834 if (in_hunk) 835 { 836 original_lines = (*hunk)->original_length; 837 modified_lines = (*hunk)->modified_length; 838 *is_property = TRUE; 839 } 840 } 841 else if (starts_with(line->data, "Added: ")) 842 { 843 SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ", 844 result_pool)); 845 if (*prop_name) 846 *prop_operation = svn_diff_op_added; 847 } 848 else if (starts_with(line->data, "Deleted: ")) 849 { 850 SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ", 851 result_pool)); 852 if (*prop_name) 853 *prop_operation = svn_diff_op_deleted; 854 } 855 else if (starts_with(line->data, "Modified: ")) 856 { 857 SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ", 858 result_pool)); 859 if (*prop_name) 860 *prop_operation = svn_diff_op_modified; 861 } 862 else if (starts_with(line->data, minus) 863 || starts_with(line->data, "diff --git ")) 864 /* This could be a header of another patch. Bail out. */ 865 break; 866 } 867 } 868 /* Check for the line length since a file may not have a newline at the 869 * end and we depend upon the last line to be an empty one. */ 870 while (! eof || line->len > 0); 871 svn_pool_destroy(iterpool); 872 873 if (! eof) 874 /* Rewind to the start of the line just read, so subsequent calls 875 * to this function or svn_diff_parse_next_patch() don't end 876 * up skipping the line -- it may contain a patch or hunk header. */ 877 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool)); 878 879 if (hunk_seen && start < end) 880 { 881 (*hunk)->patch = patch; 882 (*hunk)->apr_file = apr_file; 883 (*hunk)->leading_context = leading_context; 884 (*hunk)->trailing_context = trailing_context; 885 (*hunk)->diff_text_range.start = start; 886 (*hunk)->diff_text_range.current = start; 887 (*hunk)->diff_text_range.end = end; 888 (*hunk)->original_text_range.start = start; 889 (*hunk)->original_text_range.current = start; 890 (*hunk)->original_text_range.end = original_end; 891 (*hunk)->modified_text_range.start = start; 892 (*hunk)->modified_text_range.current = start; 893 (*hunk)->modified_text_range.end = modified_end; 894 } 895 else 896 /* Something went wrong, just discard the result. */ 897 *hunk = NULL; 898 899 return SVN_NO_ERROR; 900} 901 902/* Compare function for sorting hunks after parsing. 903 * We sort hunks by their original line offset. */ 904static int 905compare_hunks(const void *a, const void *b) 906{ 907 const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a); 908 const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b); 909 910 if (ha->original_start < hb->original_start) 911 return -1; 912 if (ha->original_start > hb->original_start) 913 return 1; 914 return 0; 915} 916 917/* Possible states of the diff header parser. */ 918enum parse_state 919{ 920 state_start, /* initial */ 921 state_git_diff_seen, /* diff --git */ 922 state_git_tree_seen, /* a tree operation, rather then content change */ 923 state_git_minus_seen, /* --- /dev/null; or --- a/ */ 924 state_git_plus_seen, /* +++ /dev/null; or +++ a/ */ 925 state_move_from_seen, /* rename from foo.c */ 926 state_copy_from_seen, /* copy from foo.c */ 927 state_minus_seen, /* --- foo.c */ 928 state_unidiff_found, /* valid start of a regular unidiff header */ 929 state_git_header_found /* valid start of a --git diff header */ 930}; 931 932/* Data type describing a valid state transition of the parser. */ 933struct transition 934{ 935 const char *expected_input; 936 enum parse_state required_state; 937 938 /* A callback called upon each parser state transition. */ 939 svn_error_t *(*fn)(enum parse_state *new_state, char *input, 940 svn_patch_t *patch, apr_pool_t *result_pool, 941 apr_pool_t *scratch_pool); 942}; 943 944/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */ 945static svn_error_t * 946grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool, 947 apr_pool_t *scratch_pool) 948{ 949 const char *utf8_path; 950 const char *canon_path; 951 952 /* Grab the filename and encode it in UTF-8. */ 953 /* TODO: Allow specifying the patch file's encoding. 954 * For now, we assume its encoding is native. */ 955 /* ### This can fail if the filename cannot be represented in the current 956 * ### locale's encoding. */ 957 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path, 958 line, 959 scratch_pool)); 960 961 /* Canonicalize the path name. */ 962 canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool); 963 964 *file_name = apr_pstrdup(result_pool, canon_path); 965 966 return SVN_NO_ERROR; 967} 968 969/* Parse the '--- ' line of a regular unidiff. */ 970static svn_error_t * 971diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 972 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 973{ 974 /* If we can find a tab, it separates the filename from 975 * the rest of the line which we can discard. */ 976 char *tab = strchr(line, '\t'); 977 if (tab) 978 *tab = '\0'; 979 980 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "), 981 result_pool, scratch_pool)); 982 983 *new_state = state_minus_seen; 984 985 return SVN_NO_ERROR; 986} 987 988/* Parse the '+++ ' line of a regular unidiff. */ 989static svn_error_t * 990diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 991 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 992{ 993 /* If we can find a tab, it separates the filename from 994 * the rest of the line which we can discard. */ 995 char *tab = strchr(line, '\t'); 996 if (tab) 997 *tab = '\0'; 998 999 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "), 1000 result_pool, scratch_pool)); 1001 1002 *new_state = state_unidiff_found; 1003 1004 return SVN_NO_ERROR; 1005} 1006 1007/* Parse the first line of a git extended unidiff. */ 1008static svn_error_t * 1009git_start(enum parse_state *new_state, char *line, svn_patch_t *patch, 1010 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1011{ 1012 const char *old_path_start; 1013 char *old_path_end; 1014 const char *new_path_start; 1015 const char *new_path_end; 1016 char *new_path_marker; 1017 const char *old_path_marker; 1018 1019 /* ### Add handling of escaped paths 1020 * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html: 1021 * 1022 * TAB, LF, double quote and backslash characters in pathnames are 1023 * represented as \t, \n, \" and \\, respectively. If there is need for 1024 * such substitution then the whole pathname is put in double quotes. 1025 */ 1026 1027 /* Our line should look like this: 'diff --git a/path b/path'. 1028 * 1029 * If we find any deviations from that format, we return with state reset 1030 * to start. 1031 */ 1032 old_path_marker = strstr(line, " a/"); 1033 1034 if (! old_path_marker) 1035 { 1036 *new_state = state_start; 1037 return SVN_NO_ERROR; 1038 } 1039 1040 if (! *(old_path_marker + 3)) 1041 { 1042 *new_state = state_start; 1043 return SVN_NO_ERROR; 1044 } 1045 1046 new_path_marker = strstr(old_path_marker, " b/"); 1047 1048 if (! new_path_marker) 1049 { 1050 *new_state = state_start; 1051 return SVN_NO_ERROR; 1052 } 1053 1054 if (! *(new_path_marker + 3)) 1055 { 1056 *new_state = state_start; 1057 return SVN_NO_ERROR; 1058 } 1059 1060 /* By now, we know that we have a line on the form '--git diff a/.+ b/.+' 1061 * We only need the filenames when we have deleted or added empty 1062 * files. In those cases the old_path and new_path is identical on the 1063 * 'diff --git' line. For all other cases we fetch the filenames from 1064 * other header lines. */ 1065 old_path_start = line + STRLEN_LITERAL("diff --git a/"); 1066 new_path_end = line + strlen(line); 1067 new_path_start = old_path_start; 1068 1069 while (TRUE) 1070 { 1071 ptrdiff_t len_old; 1072 ptrdiff_t len_new; 1073 1074 new_path_marker = strstr(new_path_start, " b/"); 1075 1076 /* No new path marker, bail out. */ 1077 if (! new_path_marker) 1078 break; 1079 1080 old_path_end = new_path_marker; 1081 new_path_start = new_path_marker + STRLEN_LITERAL(" b/"); 1082 1083 /* No path after the marker. */ 1084 if (! *new_path_start) 1085 break; 1086 1087 len_old = old_path_end - old_path_start; 1088 len_new = new_path_end - new_path_start; 1089 1090 /* Are the paths before and after the " b/" marker the same? */ 1091 if (len_old == len_new 1092 && ! strncmp(old_path_start, new_path_start, len_old)) 1093 { 1094 *old_path_end = '\0'; 1095 SVN_ERR(grab_filename(&patch->old_filename, old_path_start, 1096 result_pool, scratch_pool)); 1097 1098 SVN_ERR(grab_filename(&patch->new_filename, new_path_start, 1099 result_pool, scratch_pool)); 1100 break; 1101 } 1102 } 1103 1104 /* We assume that the path is only modified until we've found a 'tree' 1105 * header */ 1106 patch->operation = svn_diff_op_modified; 1107 1108 *new_state = state_git_diff_seen; 1109 return SVN_NO_ERROR; 1110} 1111 1112/* Parse the '--- ' line of a git extended unidiff. */ 1113static svn_error_t * 1114git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1115 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1116{ 1117 /* If we can find a tab, it separates the filename from 1118 * the rest of the line which we can discard. */ 1119 char *tab = strchr(line, '\t'); 1120 if (tab) 1121 *tab = '\0'; 1122 1123 if (starts_with(line, "--- /dev/null")) 1124 SVN_ERR(grab_filename(&patch->old_filename, "/dev/null", 1125 result_pool, scratch_pool)); 1126 else 1127 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"), 1128 result_pool, scratch_pool)); 1129 1130 *new_state = state_git_minus_seen; 1131 return SVN_NO_ERROR; 1132} 1133 1134/* Parse the '+++ ' line of a git extended unidiff. */ 1135static svn_error_t * 1136git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1137 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1138{ 1139 /* If we can find a tab, it separates the filename from 1140 * the rest of the line which we can discard. */ 1141 char *tab = strchr(line, '\t'); 1142 if (tab) 1143 *tab = '\0'; 1144 1145 if (starts_with(line, "+++ /dev/null")) 1146 SVN_ERR(grab_filename(&patch->new_filename, "/dev/null", 1147 result_pool, scratch_pool)); 1148 else 1149 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"), 1150 result_pool, scratch_pool)); 1151 1152 *new_state = state_git_header_found; 1153 return SVN_NO_ERROR; 1154} 1155 1156/* Parse the 'rename from ' line of a git extended unidiff. */ 1157static svn_error_t * 1158git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1159 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1160{ 1161 SVN_ERR(grab_filename(&patch->old_filename, 1162 line + STRLEN_LITERAL("rename from "), 1163 result_pool, scratch_pool)); 1164 1165 *new_state = state_move_from_seen; 1166 return SVN_NO_ERROR; 1167} 1168 1169/* Parse the 'rename to ' line of a git extended unidiff. */ 1170static svn_error_t * 1171git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1172 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1173{ 1174 SVN_ERR(grab_filename(&patch->new_filename, 1175 line + STRLEN_LITERAL("rename to "), 1176 result_pool, scratch_pool)); 1177 1178 patch->operation = svn_diff_op_moved; 1179 1180 *new_state = state_git_tree_seen; 1181 return SVN_NO_ERROR; 1182} 1183 1184/* Parse the 'copy from ' line of a git extended unidiff. */ 1185static svn_error_t * 1186git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1187 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1188{ 1189 SVN_ERR(grab_filename(&patch->old_filename, 1190 line + STRLEN_LITERAL("copy from "), 1191 result_pool, scratch_pool)); 1192 1193 *new_state = state_copy_from_seen; 1194 return SVN_NO_ERROR; 1195} 1196 1197/* Parse the 'copy to ' line of a git extended unidiff. */ 1198static svn_error_t * 1199git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1200 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1201{ 1202 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "), 1203 result_pool, scratch_pool)); 1204 1205 patch->operation = svn_diff_op_copied; 1206 1207 *new_state = state_git_tree_seen; 1208 return SVN_NO_ERROR; 1209} 1210 1211/* Parse the 'new file ' line of a git extended unidiff. */ 1212static svn_error_t * 1213git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1214 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1215{ 1216 patch->operation = svn_diff_op_added; 1217 1218 /* Filename already retrieved from diff --git header. */ 1219 1220 *new_state = state_git_tree_seen; 1221 return SVN_NO_ERROR; 1222} 1223 1224/* Parse the 'deleted file ' line of a git extended unidiff. */ 1225static svn_error_t * 1226git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1227 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1228{ 1229 patch->operation = svn_diff_op_deleted; 1230 1231 /* Filename already retrieved from diff --git header. */ 1232 1233 *new_state = state_git_tree_seen; 1234 return SVN_NO_ERROR; 1235} 1236 1237/* Add a HUNK associated with the property PROP_NAME to PATCH. */ 1238static svn_error_t * 1239add_property_hunk(svn_patch_t *patch, const char *prop_name, 1240 svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation, 1241 apr_pool_t *result_pool) 1242{ 1243 svn_prop_patch_t *prop_patch; 1244 1245 prop_patch = svn_hash_gets(patch->prop_patches, prop_name); 1246 1247 if (! prop_patch) 1248 { 1249 prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t)); 1250 prop_patch->name = prop_name; 1251 prop_patch->operation = operation; 1252 prop_patch->hunks = apr_array_make(result_pool, 1, 1253 sizeof(svn_diff_hunk_t *)); 1254 1255 svn_hash_sets(patch->prop_patches, prop_name, prop_patch); 1256 } 1257 1258 APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk; 1259 1260 return SVN_NO_ERROR; 1261} 1262 1263struct svn_patch_file_t 1264{ 1265 /* The APR file handle to the patch file. */ 1266 apr_file_t *apr_file; 1267 1268 /* The file offset at which the next patch is expected. */ 1269 apr_off_t next_patch_offset; 1270}; 1271 1272svn_error_t * 1273svn_diff_open_patch_file(svn_patch_file_t **patch_file, 1274 const char *local_abspath, 1275 apr_pool_t *result_pool) 1276{ 1277 svn_patch_file_t *p; 1278 1279 p = apr_palloc(result_pool, sizeof(*p)); 1280 SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath, 1281 APR_READ | APR_BUFFERED, APR_OS_DEFAULT, 1282 result_pool)); 1283 p->next_patch_offset = 0; 1284 *patch_file = p; 1285 1286 return SVN_NO_ERROR; 1287} 1288 1289/* Parse hunks from APR_FILE and store them in PATCH->HUNKS. 1290 * Parsing stops if no valid next hunk can be found. 1291 * If IGNORE_WHITESPACE is TRUE, lines without 1292 * leading spaces will be treated as context lines. 1293 * Allocate results in RESULT_POOL. 1294 * Use SCRATCH_POOL for temporary allocations. */ 1295static svn_error_t * 1296parse_hunks(svn_patch_t *patch, apr_file_t *apr_file, 1297 svn_boolean_t ignore_whitespace, 1298 apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1299{ 1300 svn_diff_hunk_t *hunk; 1301 svn_boolean_t is_property; 1302 const char *last_prop_name; 1303 const char *prop_name; 1304 svn_diff_operation_kind_t prop_operation; 1305 apr_pool_t *iterpool; 1306 1307 last_prop_name = NULL; 1308 1309 patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *)); 1310 patch->prop_patches = apr_hash_make(result_pool); 1311 iterpool = svn_pool_create(scratch_pool); 1312 do 1313 { 1314 svn_pool_clear(iterpool); 1315 1316 SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation, 1317 patch, apr_file, ignore_whitespace, result_pool, 1318 iterpool)); 1319 1320 if (hunk && is_property) 1321 { 1322 if (! prop_name) 1323 prop_name = last_prop_name; 1324 else 1325 last_prop_name = prop_name; 1326 1327 /* Skip svn:mergeinfo properties. 1328 * Mergeinfo data cannot be represented as a hunk and 1329 * is therefore stored in PATCH itself. */ 1330 if (strcmp(prop_name, SVN_PROP_MERGEINFO) == 0) 1331 continue; 1332 1333 SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation, 1334 result_pool)); 1335 } 1336 else if (hunk) 1337 { 1338 APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk; 1339 last_prop_name = NULL; 1340 } 1341 1342 } 1343 while (hunk); 1344 svn_pool_destroy(iterpool); 1345 1346 return SVN_NO_ERROR; 1347} 1348 1349/* State machine for the diff header parser. 1350 * Expected Input Required state Function to call */ 1351static struct transition transitions[] = 1352{ 1353 {"--- ", state_start, diff_minus}, 1354 {"+++ ", state_minus_seen, diff_plus}, 1355 {"diff --git", state_start, git_start}, 1356 {"--- a/", state_git_diff_seen, git_minus}, 1357 {"--- a/", state_git_tree_seen, git_minus}, 1358 {"--- /dev/null", state_git_tree_seen, git_minus}, 1359 {"+++ b/", state_git_minus_seen, git_plus}, 1360 {"+++ /dev/null", state_git_minus_seen, git_plus}, 1361 {"rename from ", state_git_diff_seen, git_move_from}, 1362 {"rename to ", state_move_from_seen, git_move_to}, 1363 {"copy from ", state_git_diff_seen, git_copy_from}, 1364 {"copy to ", state_copy_from_seen, git_copy_to}, 1365 {"new file ", state_git_diff_seen, git_new_file}, 1366 {"deleted file ", state_git_diff_seen, git_deleted_file}, 1367}; 1368 1369svn_error_t * 1370svn_diff_parse_next_patch(svn_patch_t **patch_p, 1371 svn_patch_file_t *patch_file, 1372 svn_boolean_t reverse, 1373 svn_boolean_t ignore_whitespace, 1374 apr_pool_t *result_pool, 1375 apr_pool_t *scratch_pool) 1376{ 1377 apr_off_t pos, last_line; 1378 svn_boolean_t eof; 1379 svn_boolean_t line_after_tree_header_read = FALSE; 1380 apr_pool_t *iterpool; 1381 svn_patch_t *patch; 1382 enum parse_state state = state_start; 1383 1384 if (apr_file_eof(patch_file->apr_file) == APR_EOF) 1385 { 1386 /* No more patches here. */ 1387 *patch_p = NULL; 1388 return SVN_NO_ERROR; 1389 } 1390 1391 patch = apr_pcalloc(result_pool, sizeof(*patch)); 1392 1393 pos = patch_file->next_patch_offset; 1394 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool)); 1395 1396 iterpool = svn_pool_create(scratch_pool); 1397 do 1398 { 1399 svn_stringbuf_t *line; 1400 svn_boolean_t valid_header_line = FALSE; 1401 int i; 1402 1403 svn_pool_clear(iterpool); 1404 1405 /* Remember the current line's offset, and read the line. */ 1406 last_line = pos; 1407 SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof, 1408 APR_SIZE_MAX, iterpool, iterpool)); 1409 1410 if (! eof) 1411 { 1412 /* Update line offset for next iteration. */ 1413 pos = 0; 1414 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos, 1415 iterpool)); 1416 } 1417 1418 /* Run the state machine. */ 1419 for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++) 1420 { 1421 if (starts_with(line->data, transitions[i].expected_input) 1422 && state == transitions[i].required_state) 1423 { 1424 SVN_ERR(transitions[i].fn(&state, line->data, patch, 1425 result_pool, iterpool)); 1426 valid_header_line = TRUE; 1427 break; 1428 } 1429 } 1430 1431 if (state == state_unidiff_found || state == state_git_header_found) 1432 { 1433 /* We have a valid diff header, yay! */ 1434 break; 1435 } 1436 else if (state == state_git_tree_seen && line_after_tree_header_read) 1437 { 1438 /* git patches can contain an index line after the file mode line */ 1439 if (!starts_with(line->data, "index ")) 1440 { 1441 /* We have a valid diff header for a patch with only tree changes. 1442 * Rewind to the start of the line just read, so subsequent calls 1443 * to this function don't end up skipping the line -- it may 1444 * contain a patch. */ 1445 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 1446 scratch_pool)); 1447 break; 1448 } 1449 } 1450 else if (state == state_git_tree_seen) 1451 { 1452 line_after_tree_header_read = TRUE; 1453 } 1454 else if (! valid_header_line && state != state_start 1455 && state != state_git_diff_seen 1456 && !starts_with(line->data, "index ")) 1457 { 1458 /* We've encountered an invalid diff header. 1459 * 1460 * Rewind to the start of the line just read - it may be a new 1461 * header that begins there. */ 1462 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 1463 scratch_pool)); 1464 state = state_start; 1465 } 1466 1467 } 1468 while (! eof); 1469 1470 patch->reverse = reverse; 1471 if (reverse) 1472 { 1473 const char *temp; 1474 temp = patch->old_filename; 1475 patch->old_filename = patch->new_filename; 1476 patch->new_filename = temp; 1477 } 1478 1479 if (patch->old_filename == NULL || patch->new_filename == NULL) 1480 { 1481 /* Something went wrong, just discard the result. */ 1482 patch = NULL; 1483 } 1484 else 1485 SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace, 1486 result_pool, iterpool)); 1487 1488 svn_pool_destroy(iterpool); 1489 1490 patch_file->next_patch_offset = 0; 1491 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, 1492 &patch_file->next_patch_offset, scratch_pool)); 1493 1494 if (patch) 1495 { 1496 /* Usually, hunks appear in the patch sorted by their original line 1497 * offset. But just in case they weren't parsed in this order for 1498 * some reason, we sort them so that our caller can assume that hunks 1499 * are sorted as if parsed from a usual patch. */ 1500 svn_sort__array(patch->hunks, compare_hunks); 1501 } 1502 1503 *patch_p = patch; 1504 return SVN_NO_ERROR; 1505} 1506 1507svn_error_t * 1508svn_diff_close_patch_file(svn_patch_file_t *patch_file, 1509 apr_pool_t *scratch_pool) 1510{ 1511 return svn_error_trace(svn_io_file_close(patch_file->apr_file, 1512 scratch_pool)); 1513} 1514