1251881Speter/* 2251881Speter * parse-diff.c: functions for parsing diff files 3251881Speter * 4251881Speter * ==================================================================== 5251881Speter * Licensed to the Apache Software Foundation (ASF) under one 6251881Speter * or more contributor license agreements. See the NOTICE file 7251881Speter * distributed with this work for additional information 8251881Speter * regarding copyright ownership. The ASF licenses this file 9251881Speter * to you under the Apache License, Version 2.0 (the 10251881Speter * "License"); you may not use this file except in compliance 11251881Speter * with the License. You may obtain a copy of the License at 12251881Speter * 13251881Speter * http://www.apache.org/licenses/LICENSE-2.0 14251881Speter * 15251881Speter * Unless required by applicable law or agreed to in writing, 16251881Speter * software distributed under the License is distributed on an 17251881Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18251881Speter * KIND, either express or implied. See the License for the 19251881Speter * specific language governing permissions and limitations 20251881Speter * under the License. 21251881Speter * ==================================================================== 22251881Speter */ 23251881Speter 24251881Speter#include <stdlib.h> 25251881Speter#include <stddef.h> 26251881Speter#include <string.h> 27251881Speter 28251881Speter#include "svn_hash.h" 29251881Speter#include "svn_types.h" 30251881Speter#include "svn_error.h" 31251881Speter#include "svn_io.h" 32251881Speter#include "svn_pools.h" 33251881Speter#include "svn_props.h" 34251881Speter#include "svn_string.h" 35251881Speter#include "svn_utf.h" 36251881Speter#include "svn_dirent_uri.h" 37251881Speter#include "svn_diff.h" 38251881Speter 39251881Speter#include "private/svn_eol_private.h" 40251881Speter#include "private/svn_dep_compat.h" 41251881Speter 42251881Speter/* Helper macro for readability */ 43251881Speter#define starts_with(str, start) \ 44251881Speter (strncmp((str), (start), strlen(start)) == 0) 45251881Speter 46251881Speter/* Like strlen() but for string literals. */ 47251881Speter#define STRLEN_LITERAL(str) (sizeof(str) - 1) 48251881Speter 49251881Speter/* This struct describes a range within a file, as well as the 50251881Speter * current cursor position within the range. All numbers are in bytes. */ 51251881Speterstruct svn_diff__hunk_range { 52251881Speter apr_off_t start; 53251881Speter apr_off_t end; 54251881Speter apr_off_t current; 55251881Speter}; 56251881Speter 57251881Speterstruct svn_diff_hunk_t { 58251881Speter /* The patch this hunk belongs to. */ 59251881Speter svn_patch_t *patch; 60251881Speter 61251881Speter /* APR file handle to the patch file this hunk came from. */ 62251881Speter apr_file_t *apr_file; 63251881Speter 64251881Speter /* Ranges used to keep track of this hunk's texts positions within 65251881Speter * the patch file. */ 66251881Speter struct svn_diff__hunk_range diff_text_range; 67251881Speter struct svn_diff__hunk_range original_text_range; 68251881Speter struct svn_diff__hunk_range modified_text_range; 69251881Speter 70251881Speter /* Hunk ranges as they appeared in the patch file. 71251881Speter * All numbers are lines, not bytes. */ 72251881Speter svn_linenum_t original_start; 73251881Speter svn_linenum_t original_length; 74251881Speter svn_linenum_t modified_start; 75251881Speter svn_linenum_t modified_length; 76251881Speter 77251881Speter /* Number of lines of leading and trailing hunk context. */ 78251881Speter svn_linenum_t leading_context; 79251881Speter svn_linenum_t trailing_context; 80251881Speter}; 81251881Speter 82251881Spetervoid 83251881Spetersvn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk) 84251881Speter{ 85251881Speter hunk->diff_text_range.current = hunk->diff_text_range.start; 86251881Speter} 87251881Speter 88251881Spetervoid 89251881Spetersvn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk) 90251881Speter{ 91251881Speter if (hunk->patch->reverse) 92251881Speter hunk->modified_text_range.current = hunk->modified_text_range.start; 93251881Speter else 94251881Speter hunk->original_text_range.current = hunk->original_text_range.start; 95251881Speter} 96251881Speter 97251881Spetervoid 98251881Spetersvn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk) 99251881Speter{ 100251881Speter if (hunk->patch->reverse) 101251881Speter hunk->original_text_range.current = hunk->original_text_range.start; 102251881Speter else 103251881Speter hunk->modified_text_range.current = hunk->modified_text_range.start; 104251881Speter} 105251881Speter 106251881Spetersvn_linenum_t 107251881Spetersvn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk) 108251881Speter{ 109251881Speter return hunk->patch->reverse ? hunk->modified_start : hunk->original_start; 110251881Speter} 111251881Speter 112251881Spetersvn_linenum_t 113251881Spetersvn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk) 114251881Speter{ 115251881Speter return hunk->patch->reverse ? hunk->modified_length : hunk->original_length; 116251881Speter} 117251881Speter 118251881Spetersvn_linenum_t 119251881Spetersvn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk) 120251881Speter{ 121251881Speter return hunk->patch->reverse ? hunk->original_start : hunk->modified_start; 122251881Speter} 123251881Speter 124251881Spetersvn_linenum_t 125251881Spetersvn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk) 126251881Speter{ 127251881Speter return hunk->patch->reverse ? hunk->original_length : hunk->modified_length; 128251881Speter} 129251881Speter 130251881Spetersvn_linenum_t 131251881Spetersvn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk) 132251881Speter{ 133251881Speter return hunk->leading_context; 134251881Speter} 135251881Speter 136251881Spetersvn_linenum_t 137251881Spetersvn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk) 138251881Speter{ 139251881Speter return hunk->trailing_context; 140251881Speter} 141251881Speter 142251881Speter/* Try to parse a positive number from a decimal number encoded 143251881Speter * in the string NUMBER. Return parsed number in OFFSET, and return 144251881Speter * TRUE if parsing was successful. */ 145251881Speterstatic svn_boolean_t 146251881Speterparse_offset(svn_linenum_t *offset, const char *number) 147251881Speter{ 148251881Speter svn_error_t *err; 149251881Speter apr_uint64_t val; 150251881Speter 151251881Speter err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10); 152251881Speter if (err) 153251881Speter { 154251881Speter svn_error_clear(err); 155251881Speter return FALSE; 156251881Speter } 157251881Speter 158251881Speter *offset = (svn_linenum_t)val; 159251881Speter 160251881Speter return TRUE; 161251881Speter} 162251881Speter 163251881Speter/* Try to parse a hunk range specification from the string RANGE. 164251881Speter * Return parsed information in *START and *LENGTH, and return TRUE 165251881Speter * if the range parsed correctly. Note: This function may modify the 166251881Speter * input value RANGE. */ 167251881Speterstatic svn_boolean_t 168251881Speterparse_range(svn_linenum_t *start, svn_linenum_t *length, char *range) 169251881Speter{ 170251881Speter char *comma; 171251881Speter 172251881Speter if (*range == 0) 173251881Speter return FALSE; 174251881Speter 175251881Speter comma = strstr(range, ","); 176251881Speter if (comma) 177251881Speter { 178251881Speter if (strlen(comma + 1) > 0) 179251881Speter { 180251881Speter /* Try to parse the length. */ 181251881Speter if (! parse_offset(length, comma + 1)) 182251881Speter return FALSE; 183251881Speter 184251881Speter /* Snip off the end of the string, 185251881Speter * so we can comfortably parse the line 186251881Speter * number the hunk starts at. */ 187251881Speter *comma = '\0'; 188251881Speter } 189251881Speter else 190251881Speter /* A comma but no length? */ 191251881Speter return FALSE; 192251881Speter } 193251881Speter else 194251881Speter { 195251881Speter *length = 1; 196251881Speter } 197251881Speter 198251881Speter /* Try to parse the line number the hunk starts at. */ 199251881Speter return parse_offset(start, range); 200251881Speter} 201251881Speter 202251881Speter/* Try to parse a hunk header in string HEADER, putting parsed information 203251881Speter * into HUNK. Return TRUE if the header parsed correctly. ATAT is the 204251881Speter * character string used to delimit the hunk header. 205251881Speter * Do all allocations in POOL. */ 206251881Speterstatic svn_boolean_t 207251881Speterparse_hunk_header(const char *header, svn_diff_hunk_t *hunk, 208251881Speter const char *atat, apr_pool_t *pool) 209251881Speter{ 210251881Speter const char *p; 211251881Speter const char *start; 212251881Speter svn_stringbuf_t *range; 213251881Speter 214251881Speter p = header + strlen(atat); 215251881Speter if (*p != ' ') 216251881Speter /* No. */ 217251881Speter return FALSE; 218251881Speter p++; 219251881Speter if (*p != '-') 220251881Speter /* Nah... */ 221251881Speter return FALSE; 222251881Speter /* OK, this may be worth allocating some memory for... */ 223251881Speter range = svn_stringbuf_create_ensure(31, pool); 224251881Speter start = ++p; 225251881Speter while (*p && *p != ' ') 226251881Speter { 227251881Speter p++; 228251881Speter } 229251881Speter 230251881Speter if (*p != ' ') 231251881Speter /* No no no... */ 232251881Speter return FALSE; 233251881Speter 234251881Speter svn_stringbuf_appendbytes(range, start, p - start); 235251881Speter 236251881Speter /* Try to parse the first range. */ 237251881Speter if (! parse_range(&hunk->original_start, &hunk->original_length, range->data)) 238251881Speter return FALSE; 239251881Speter 240251881Speter /* Clear the stringbuf so we can reuse it for the second range. */ 241251881Speter svn_stringbuf_setempty(range); 242251881Speter p++; 243251881Speter if (*p != '+') 244251881Speter /* Eeek! */ 245251881Speter return FALSE; 246251881Speter /* OK, this may be worth copying... */ 247251881Speter start = ++p; 248251881Speter while (*p && *p != ' ') 249251881Speter { 250251881Speter p++; 251251881Speter } 252251881Speter if (*p != ' ') 253251881Speter /* No no no... */ 254251881Speter return FALSE; 255251881Speter 256251881Speter svn_stringbuf_appendbytes(range, start, p - start); 257251881Speter 258251881Speter /* Check for trailing @@ */ 259251881Speter p++; 260251881Speter if (! starts_with(p, atat)) 261251881Speter return FALSE; 262251881Speter 263251881Speter /* There may be stuff like C-function names after the trailing @@, 264251881Speter * but we ignore that. */ 265251881Speter 266251881Speter /* Try to parse the second range. */ 267251881Speter if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data)) 268251881Speter return FALSE; 269251881Speter 270251881Speter /* Hunk header is good. */ 271251881Speter return TRUE; 272251881Speter} 273251881Speter 274251881Speter/* Read a line of original or modified hunk text from the specified 275251881Speter * RANGE within FILE. FILE is expected to contain unidiff text. 276251881Speter * Leading unidiff symbols ('+', '-', and ' ') are removed from the line, 277251881Speter * Any lines commencing with the VERBOTEN character are discarded. 278251881Speter * VERBOTEN should be '+' or '-', depending on which form of hunk text 279251881Speter * is being read. 280251881Speter * 281251881Speter * All other parameters are as in svn_diff_hunk_readline_original_text() 282251881Speter * and svn_diff_hunk_readline_modified_text(). 283251881Speter */ 284251881Speterstatic svn_error_t * 285251881Speterhunk_readline_original_or_modified(apr_file_t *file, 286251881Speter struct svn_diff__hunk_range *range, 287251881Speter svn_stringbuf_t **stringbuf, 288251881Speter const char **eol, 289251881Speter svn_boolean_t *eof, 290251881Speter char verboten, 291251881Speter apr_pool_t *result_pool, 292251881Speter apr_pool_t *scratch_pool) 293251881Speter{ 294251881Speter apr_size_t max_len; 295251881Speter svn_boolean_t filtered; 296251881Speter apr_off_t pos; 297251881Speter svn_stringbuf_t *str; 298251881Speter 299251881Speter if (range->current >= range->end) 300251881Speter { 301251881Speter /* We're past the range. Indicate that no bytes can be read. */ 302251881Speter *eof = TRUE; 303251881Speter if (eol) 304251881Speter *eol = NULL; 305251881Speter *stringbuf = svn_stringbuf_create_empty(result_pool); 306251881Speter return SVN_NO_ERROR; 307251881Speter } 308251881Speter 309251881Speter pos = 0; 310251881Speter SVN_ERR(svn_io_file_seek(file, APR_CUR, &pos, scratch_pool)); 311251881Speter SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool)); 312251881Speter do 313251881Speter { 314251881Speter max_len = range->end - range->current; 315251881Speter SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len, 316251881Speter result_pool, scratch_pool)); 317251881Speter range->current = 0; 318251881Speter SVN_ERR(svn_io_file_seek(file, APR_CUR, &range->current, scratch_pool)); 319251881Speter filtered = (str->data[0] == verboten || str->data[0] == '\\'); 320251881Speter } 321251881Speter while (filtered && ! *eof); 322251881Speter 323251881Speter if (filtered) 324251881Speter { 325251881Speter /* EOF, return an empty string. */ 326251881Speter *stringbuf = svn_stringbuf_create_ensure(0, result_pool); 327251881Speter } 328251881Speter else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ') 329251881Speter { 330251881Speter /* Shave off leading unidiff symbols. */ 331251881Speter *stringbuf = svn_stringbuf_create(str->data + 1, result_pool); 332251881Speter } 333251881Speter else 334251881Speter { 335251881Speter /* Return the line as-is. */ 336251881Speter *stringbuf = svn_stringbuf_dup(str, result_pool); 337251881Speter } 338251881Speter 339251881Speter SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool)); 340251881Speter 341251881Speter return SVN_NO_ERROR; 342251881Speter} 343251881Speter 344251881Spetersvn_error_t * 345251881Spetersvn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk, 346251881Speter svn_stringbuf_t **stringbuf, 347251881Speter const char **eol, 348251881Speter svn_boolean_t *eof, 349251881Speter apr_pool_t *result_pool, 350251881Speter apr_pool_t *scratch_pool) 351251881Speter{ 352251881Speter return svn_error_trace( 353251881Speter hunk_readline_original_or_modified(hunk->apr_file, 354251881Speter hunk->patch->reverse ? 355251881Speter &hunk->modified_text_range : 356251881Speter &hunk->original_text_range, 357251881Speter stringbuf, eol, eof, 358251881Speter hunk->patch->reverse ? '-' : '+', 359251881Speter result_pool, scratch_pool)); 360251881Speter} 361251881Speter 362251881Spetersvn_error_t * 363251881Spetersvn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk, 364251881Speter svn_stringbuf_t **stringbuf, 365251881Speter const char **eol, 366251881Speter svn_boolean_t *eof, 367251881Speter apr_pool_t *result_pool, 368251881Speter apr_pool_t *scratch_pool) 369251881Speter{ 370251881Speter return svn_error_trace( 371251881Speter hunk_readline_original_or_modified(hunk->apr_file, 372251881Speter hunk->patch->reverse ? 373251881Speter &hunk->original_text_range : 374251881Speter &hunk->modified_text_range, 375251881Speter stringbuf, eol, eof, 376251881Speter hunk->patch->reverse ? '+' : '-', 377251881Speter result_pool, scratch_pool)); 378251881Speter} 379251881Speter 380251881Spetersvn_error_t * 381251881Spetersvn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk, 382251881Speter svn_stringbuf_t **stringbuf, 383251881Speter const char **eol, 384251881Speter svn_boolean_t *eof, 385251881Speter apr_pool_t *result_pool, 386251881Speter apr_pool_t *scratch_pool) 387251881Speter{ 388251881Speter svn_diff_hunk_t dummy; 389251881Speter svn_stringbuf_t *line; 390251881Speter apr_size_t max_len; 391251881Speter apr_off_t pos; 392251881Speter 393251881Speter if (hunk->diff_text_range.current >= hunk->diff_text_range.end) 394251881Speter { 395251881Speter /* We're past the range. Indicate that no bytes can be read. */ 396251881Speter *eof = TRUE; 397251881Speter if (eol) 398251881Speter *eol = NULL; 399251881Speter *stringbuf = svn_stringbuf_create_empty(result_pool); 400251881Speter return SVN_NO_ERROR; 401251881Speter } 402251881Speter 403251881Speter pos = 0; 404251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, &pos, scratch_pool)); 405251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, 406251881Speter &hunk->diff_text_range.current, scratch_pool)); 407251881Speter max_len = hunk->diff_text_range.end - hunk->diff_text_range.current; 408251881Speter SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len, 409251881Speter result_pool, 410251881Speter scratch_pool)); 411251881Speter hunk->diff_text_range.current = 0; 412251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_CUR, 413251881Speter &hunk->diff_text_range.current, scratch_pool)); 414251881Speter SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool)); 415251881Speter 416251881Speter if (hunk->patch->reverse) 417251881Speter { 418251881Speter if (parse_hunk_header(line->data, &dummy, "@@", scratch_pool)) 419251881Speter { 420251881Speter /* Line is a hunk header, reverse it. */ 421251881Speter line = svn_stringbuf_createf(result_pool, 422251881Speter "@@ -%lu,%lu +%lu,%lu @@", 423251881Speter hunk->modified_start, 424251881Speter hunk->modified_length, 425251881Speter hunk->original_start, 426251881Speter hunk->original_length); 427251881Speter } 428251881Speter else if (parse_hunk_header(line->data, &dummy, "##", scratch_pool)) 429251881Speter { 430251881Speter /* Line is a hunk header, reverse it. */ 431251881Speter line = svn_stringbuf_createf(result_pool, 432251881Speter "## -%lu,%lu +%lu,%lu ##", 433251881Speter hunk->modified_start, 434251881Speter hunk->modified_length, 435251881Speter hunk->original_start, 436251881Speter hunk->original_length); 437251881Speter } 438251881Speter else 439251881Speter { 440251881Speter if (line->data[0] == '+') 441251881Speter line->data[0] = '-'; 442251881Speter else if (line->data[0] == '-') 443251881Speter line->data[0] = '+'; 444251881Speter } 445251881Speter } 446251881Speter 447251881Speter *stringbuf = line; 448251881Speter 449251881Speter return SVN_NO_ERROR; 450251881Speter} 451251881Speter 452251881Speter/* Parse *PROP_NAME from HEADER as the part after the INDICATOR line. 453251881Speter * Allocate *PROP_NAME in RESULT_POOL. 454251881Speter * Set *PROP_NAME to NULL if no valid property name was found. */ 455251881Speterstatic svn_error_t * 456251881Speterparse_prop_name(const char **prop_name, const char *header, 457251881Speter const char *indicator, apr_pool_t *result_pool) 458251881Speter{ 459251881Speter SVN_ERR(svn_utf_cstring_to_utf8(prop_name, 460251881Speter header + strlen(indicator), 461251881Speter result_pool)); 462251881Speter if (**prop_name == '\0') 463251881Speter *prop_name = NULL; 464251881Speter else if (! svn_prop_name_is_valid(*prop_name)) 465251881Speter { 466251881Speter svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool); 467251881Speter svn_stringbuf_strip_whitespace(buf); 468251881Speter *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL); 469251881Speter } 470251881Speter 471251881Speter return SVN_NO_ERROR; 472251881Speter} 473251881Speter 474251881Speter/* Return the next *HUNK from a PATCH in APR_FILE. 475251881Speter * If no hunk can be found, set *HUNK to NULL. 476251881Speter * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK 477251881Speter * is the first belonging to a certain property, then PROP_NAME and 478251881Speter * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be 479251881Speter * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be 480251881Speter * treated as context lines. Allocate results in RESULT_POOL. 481251881Speter * Use SCRATCH_POOL for all other allocations. */ 482251881Speterstatic svn_error_t * 483251881Speterparse_next_hunk(svn_diff_hunk_t **hunk, 484251881Speter svn_boolean_t *is_property, 485251881Speter const char **prop_name, 486251881Speter svn_diff_operation_kind_t *prop_operation, 487251881Speter svn_patch_t *patch, 488251881Speter apr_file_t *apr_file, 489251881Speter svn_boolean_t ignore_whitespace, 490251881Speter apr_pool_t *result_pool, 491251881Speter apr_pool_t *scratch_pool) 492251881Speter{ 493251881Speter static const char * const minus = "--- "; 494251881Speter static const char * const text_atat = "@@"; 495251881Speter static const char * const prop_atat = "##"; 496251881Speter svn_stringbuf_t *line; 497251881Speter svn_boolean_t eof, in_hunk, hunk_seen; 498251881Speter apr_off_t pos, last_line; 499251881Speter apr_off_t start, end; 500251881Speter apr_off_t original_end; 501251881Speter apr_off_t modified_end; 502251881Speter svn_linenum_t original_lines; 503251881Speter svn_linenum_t modified_lines; 504251881Speter svn_linenum_t leading_context; 505251881Speter svn_linenum_t trailing_context; 506251881Speter svn_boolean_t changed_line_seen; 507251881Speter enum { 508251881Speter noise_line, 509251881Speter original_line, 510251881Speter modified_line, 511251881Speter context_line 512251881Speter } last_line_type; 513251881Speter apr_pool_t *iterpool; 514251881Speter 515251881Speter *prop_operation = svn_diff_op_unchanged; 516251881Speter 517251881Speter /* We only set this if we have a property hunk header. */ 518251881Speter *prop_name = NULL; 519251881Speter *is_property = FALSE; 520251881Speter 521251881Speter if (apr_file_eof(apr_file) == APR_EOF) 522251881Speter { 523251881Speter /* No more hunks here. */ 524251881Speter *hunk = NULL; 525251881Speter return SVN_NO_ERROR; 526251881Speter } 527251881Speter 528251881Speter in_hunk = FALSE; 529251881Speter hunk_seen = FALSE; 530251881Speter leading_context = 0; 531251881Speter trailing_context = 0; 532251881Speter changed_line_seen = FALSE; 533251881Speter original_end = 0; 534251881Speter modified_end = 0; 535251881Speter *hunk = apr_pcalloc(result_pool, sizeof(**hunk)); 536251881Speter 537251881Speter /* Get current seek position -- APR has no ftell() :( */ 538251881Speter pos = 0; 539251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, scratch_pool)); 540251881Speter 541251881Speter /* Start out assuming noise. */ 542251881Speter last_line_type = noise_line; 543251881Speter 544251881Speter iterpool = svn_pool_create(scratch_pool); 545251881Speter do 546251881Speter { 547251881Speter 548251881Speter svn_pool_clear(iterpool); 549251881Speter 550251881Speter /* Remember the current line's offset, and read the line. */ 551251881Speter last_line = pos; 552251881Speter SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX, 553251881Speter iterpool, iterpool)); 554251881Speter 555251881Speter /* Update line offset for next iteration. */ 556251881Speter pos = 0; 557251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_CUR, &pos, iterpool)); 558251881Speter 559251881Speter /* Lines starting with a backslash indicate a missing EOL: 560251881Speter * "\ No newline at end of file" or "end of property". */ 561251881Speter if (line->data[0] == '\\') 562251881Speter { 563251881Speter if (in_hunk) 564251881Speter { 565251881Speter char eolbuf[2]; 566251881Speter apr_size_t len; 567251881Speter apr_off_t off; 568251881Speter apr_off_t hunk_text_end; 569251881Speter 570251881Speter /* Comment terminates the hunk text and says the hunk text 571251881Speter * has no trailing EOL. Snip off trailing EOL which is part 572251881Speter * of the patch file but not part of the hunk text. */ 573251881Speter off = last_line - 2; 574251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool)); 575251881Speter len = sizeof(eolbuf); 576251881Speter SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len, 577251881Speter &eof, iterpool)); 578251881Speter if (eolbuf[0] == '\r' && eolbuf[1] == '\n') 579251881Speter hunk_text_end = last_line - 2; 580251881Speter else if (eolbuf[1] == '\n' || eolbuf[1] == '\r') 581251881Speter hunk_text_end = last_line - 1; 582251881Speter else 583251881Speter hunk_text_end = last_line; 584251881Speter 585251881Speter if (last_line_type == original_line && original_end == 0) 586251881Speter original_end = hunk_text_end; 587251881Speter else if (last_line_type == modified_line && modified_end == 0) 588251881Speter modified_end = hunk_text_end; 589251881Speter else if (last_line_type == context_line) 590251881Speter { 591251881Speter if (original_end == 0) 592251881Speter original_end = hunk_text_end; 593251881Speter if (modified_end == 0) 594251881Speter modified_end = hunk_text_end; 595251881Speter } 596251881Speter 597251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool)); 598251881Speter } 599251881Speter 600251881Speter continue; 601251881Speter } 602251881Speter 603251881Speter if (in_hunk) 604251881Speter { 605251881Speter char c; 606251881Speter static const char add = '+'; 607251881Speter static const char del = '-'; 608251881Speter 609251881Speter if (! hunk_seen) 610251881Speter { 611251881Speter /* We're reading the first line of the hunk, so the start 612251881Speter * of the line just read is the hunk text's byte offset. */ 613251881Speter start = last_line; 614251881Speter } 615251881Speter 616251881Speter c = line->data[0]; 617251881Speter if (original_lines > 0 && modified_lines > 0 && 618251881Speter ((c == ' ') 619251881Speter /* Tolerate chopped leading spaces on empty lines. */ 620251881Speter || (! eof && line->len == 0) 621251881Speter /* Maybe tolerate chopped leading spaces on non-empty lines. */ 622251881Speter || (ignore_whitespace && c != del && c != add))) 623251881Speter { 624251881Speter /* It's a "context" line in the hunk. */ 625251881Speter hunk_seen = TRUE; 626251881Speter original_lines--; 627251881Speter modified_lines--; 628251881Speter if (changed_line_seen) 629251881Speter trailing_context++; 630251881Speter else 631251881Speter leading_context++; 632251881Speter last_line_type = context_line; 633251881Speter } 634251881Speter else if (original_lines > 0 && c == del) 635251881Speter { 636251881Speter /* It's a "deleted" line in the hunk. */ 637251881Speter hunk_seen = TRUE; 638251881Speter changed_line_seen = TRUE; 639251881Speter 640251881Speter /* A hunk may have context in the middle. We only want 641251881Speter trailing lines of context. */ 642251881Speter if (trailing_context > 0) 643251881Speter trailing_context = 0; 644251881Speter 645251881Speter original_lines--; 646251881Speter last_line_type = original_line; 647251881Speter } 648251881Speter else if (modified_lines > 0 && c == add) 649251881Speter { 650251881Speter /* It's an "added" line in the hunk. */ 651251881Speter hunk_seen = TRUE; 652251881Speter changed_line_seen = TRUE; 653251881Speter 654251881Speter /* A hunk may have context in the middle. We only want 655251881Speter trailing lines of context. */ 656251881Speter if (trailing_context > 0) 657251881Speter trailing_context = 0; 658251881Speter 659251881Speter modified_lines--; 660251881Speter last_line_type = modified_line; 661251881Speter } 662251881Speter else 663251881Speter { 664251881Speter if (eof) 665251881Speter { 666251881Speter /* The hunk ends at EOF. */ 667251881Speter end = pos; 668251881Speter } 669251881Speter else 670251881Speter { 671251881Speter /* The start of the current line marks the first byte 672251881Speter * after the hunk text. */ 673251881Speter end = last_line; 674251881Speter } 675251881Speter 676251881Speter if (original_end == 0) 677251881Speter original_end = end; 678251881Speter if (modified_end == 0) 679251881Speter modified_end = end; 680251881Speter break; /* Hunk was empty or has been read. */ 681251881Speter } 682251881Speter } 683251881Speter else 684251881Speter { 685251881Speter if (starts_with(line->data, text_atat)) 686251881Speter { 687251881Speter /* Looks like we have a hunk header, try to rip it apart. */ 688251881Speter in_hunk = parse_hunk_header(line->data, *hunk, text_atat, 689251881Speter iterpool); 690251881Speter if (in_hunk) 691251881Speter { 692251881Speter original_lines = (*hunk)->original_length; 693251881Speter modified_lines = (*hunk)->modified_length; 694251881Speter *is_property = FALSE; 695251881Speter } 696251881Speter } 697251881Speter else if (starts_with(line->data, prop_atat)) 698251881Speter { 699251881Speter /* Looks like we have a property hunk header, try to rip it 700251881Speter * apart. */ 701251881Speter in_hunk = parse_hunk_header(line->data, *hunk, prop_atat, 702251881Speter iterpool); 703251881Speter if (in_hunk) 704251881Speter { 705251881Speter original_lines = (*hunk)->original_length; 706251881Speter modified_lines = (*hunk)->modified_length; 707251881Speter *is_property = TRUE; 708251881Speter } 709251881Speter } 710251881Speter else if (starts_with(line->data, "Added: ")) 711251881Speter { 712251881Speter SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ", 713251881Speter result_pool)); 714251881Speter if (*prop_name) 715251881Speter *prop_operation = svn_diff_op_added; 716251881Speter } 717251881Speter else if (starts_with(line->data, "Deleted: ")) 718251881Speter { 719251881Speter SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ", 720251881Speter result_pool)); 721251881Speter if (*prop_name) 722251881Speter *prop_operation = svn_diff_op_deleted; 723251881Speter } 724251881Speter else if (starts_with(line->data, "Modified: ")) 725251881Speter { 726251881Speter SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ", 727251881Speter result_pool)); 728251881Speter if (*prop_name) 729251881Speter *prop_operation = svn_diff_op_modified; 730251881Speter } 731251881Speter else if (starts_with(line->data, minus) 732251881Speter || starts_with(line->data, "diff --git ")) 733251881Speter /* This could be a header of another patch. Bail out. */ 734251881Speter break; 735251881Speter } 736251881Speter } 737251881Speter /* Check for the line length since a file may not have a newline at the 738251881Speter * end and we depend upon the last line to be an empty one. */ 739251881Speter while (! eof || line->len > 0); 740251881Speter svn_pool_destroy(iterpool); 741251881Speter 742251881Speter if (! eof) 743251881Speter /* Rewind to the start of the line just read, so subsequent calls 744251881Speter * to this function or svn_diff_parse_next_patch() don't end 745251881Speter * up skipping the line -- it may contain a patch or hunk header. */ 746251881Speter SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool)); 747251881Speter 748251881Speter if (hunk_seen && start < end) 749251881Speter { 750251881Speter (*hunk)->patch = patch; 751251881Speter (*hunk)->apr_file = apr_file; 752251881Speter (*hunk)->leading_context = leading_context; 753251881Speter (*hunk)->trailing_context = trailing_context; 754251881Speter (*hunk)->diff_text_range.start = start; 755251881Speter (*hunk)->diff_text_range.current = start; 756251881Speter (*hunk)->diff_text_range.end = end; 757251881Speter (*hunk)->original_text_range.start = start; 758251881Speter (*hunk)->original_text_range.current = start; 759251881Speter (*hunk)->original_text_range.end = original_end; 760251881Speter (*hunk)->modified_text_range.start = start; 761251881Speter (*hunk)->modified_text_range.current = start; 762251881Speter (*hunk)->modified_text_range.end = modified_end; 763251881Speter } 764251881Speter else 765251881Speter /* Something went wrong, just discard the result. */ 766251881Speter *hunk = NULL; 767251881Speter 768251881Speter return SVN_NO_ERROR; 769251881Speter} 770251881Speter 771251881Speter/* Compare function for sorting hunks after parsing. 772251881Speter * We sort hunks by their original line offset. */ 773251881Speterstatic int 774251881Spetercompare_hunks(const void *a, const void *b) 775251881Speter{ 776251881Speter const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a); 777251881Speter const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b); 778251881Speter 779251881Speter if (ha->original_start < hb->original_start) 780251881Speter return -1; 781251881Speter if (ha->original_start > hb->original_start) 782251881Speter return 1; 783251881Speter return 0; 784251881Speter} 785251881Speter 786251881Speter/* Possible states of the diff header parser. */ 787251881Speterenum parse_state 788251881Speter{ 789251881Speter state_start, /* initial */ 790251881Speter state_git_diff_seen, /* diff --git */ 791251881Speter state_git_tree_seen, /* a tree operation, rather then content change */ 792251881Speter state_git_minus_seen, /* --- /dev/null; or --- a/ */ 793251881Speter state_git_plus_seen, /* +++ /dev/null; or +++ a/ */ 794251881Speter state_move_from_seen, /* rename from foo.c */ 795251881Speter state_copy_from_seen, /* copy from foo.c */ 796251881Speter state_minus_seen, /* --- foo.c */ 797251881Speter state_unidiff_found, /* valid start of a regular unidiff header */ 798251881Speter state_git_header_found /* valid start of a --git diff header */ 799251881Speter}; 800251881Speter 801251881Speter/* Data type describing a valid state transition of the parser. */ 802251881Speterstruct transition 803251881Speter{ 804251881Speter const char *expected_input; 805251881Speter enum parse_state required_state; 806251881Speter 807251881Speter /* A callback called upon each parser state transition. */ 808251881Speter svn_error_t *(*fn)(enum parse_state *new_state, char *input, 809251881Speter svn_patch_t *patch, apr_pool_t *result_pool, 810251881Speter apr_pool_t *scratch_pool); 811251881Speter}; 812251881Speter 813251881Speter/* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */ 814251881Speterstatic svn_error_t * 815251881Spetergrab_filename(const char **file_name, const char *line, apr_pool_t *result_pool, 816251881Speter apr_pool_t *scratch_pool) 817251881Speter{ 818251881Speter const char *utf8_path; 819251881Speter const char *canon_path; 820251881Speter 821251881Speter /* Grab the filename and encode it in UTF-8. */ 822251881Speter /* TODO: Allow specifying the patch file's encoding. 823251881Speter * For now, we assume its encoding is native. */ 824251881Speter /* ### This can fail if the filename cannot be represented in the current 825251881Speter * ### locale's encoding. */ 826251881Speter SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path, 827251881Speter line, 828251881Speter scratch_pool)); 829251881Speter 830251881Speter /* Canonicalize the path name. */ 831251881Speter canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool); 832251881Speter 833251881Speter *file_name = apr_pstrdup(result_pool, canon_path); 834251881Speter 835251881Speter return SVN_NO_ERROR; 836251881Speter} 837251881Speter 838251881Speter/* Parse the '--- ' line of a regular unidiff. */ 839251881Speterstatic svn_error_t * 840251881Speterdiff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 841251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 842251881Speter{ 843251881Speter /* If we can find a tab, it separates the filename from 844251881Speter * the rest of the line which we can discard. */ 845251881Speter char *tab = strchr(line, '\t'); 846251881Speter if (tab) 847251881Speter *tab = '\0'; 848251881Speter 849251881Speter SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "), 850251881Speter result_pool, scratch_pool)); 851251881Speter 852251881Speter *new_state = state_minus_seen; 853251881Speter 854251881Speter return SVN_NO_ERROR; 855251881Speter} 856251881Speter 857251881Speter/* Parse the '+++ ' line of a regular unidiff. */ 858251881Speterstatic svn_error_t * 859251881Speterdiff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 860251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 861251881Speter{ 862251881Speter /* If we can find a tab, it separates the filename from 863251881Speter * the rest of the line which we can discard. */ 864251881Speter char *tab = strchr(line, '\t'); 865251881Speter if (tab) 866251881Speter *tab = '\0'; 867251881Speter 868251881Speter SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "), 869251881Speter result_pool, scratch_pool)); 870251881Speter 871251881Speter *new_state = state_unidiff_found; 872251881Speter 873251881Speter return SVN_NO_ERROR; 874251881Speter} 875251881Speter 876251881Speter/* Parse the first line of a git extended unidiff. */ 877251881Speterstatic svn_error_t * 878251881Spetergit_start(enum parse_state *new_state, char *line, svn_patch_t *patch, 879251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 880251881Speter{ 881251881Speter const char *old_path_start; 882251881Speter char *old_path_end; 883251881Speter const char *new_path_start; 884251881Speter const char *new_path_end; 885251881Speter char *new_path_marker; 886251881Speter const char *old_path_marker; 887251881Speter 888251881Speter /* ### Add handling of escaped paths 889251881Speter * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html: 890251881Speter * 891251881Speter * TAB, LF, double quote and backslash characters in pathnames are 892251881Speter * represented as \t, \n, \" and \\, respectively. If there is need for 893251881Speter * such substitution then the whole pathname is put in double quotes. 894251881Speter */ 895251881Speter 896251881Speter /* Our line should look like this: 'diff --git a/path b/path'. 897251881Speter * 898251881Speter * If we find any deviations from that format, we return with state reset 899251881Speter * to start. 900251881Speter */ 901251881Speter old_path_marker = strstr(line, " a/"); 902251881Speter 903251881Speter if (! old_path_marker) 904251881Speter { 905251881Speter *new_state = state_start; 906251881Speter return SVN_NO_ERROR; 907251881Speter } 908251881Speter 909251881Speter if (! *(old_path_marker + 3)) 910251881Speter { 911251881Speter *new_state = state_start; 912251881Speter return SVN_NO_ERROR; 913251881Speter } 914251881Speter 915251881Speter new_path_marker = strstr(old_path_marker, " b/"); 916251881Speter 917251881Speter if (! new_path_marker) 918251881Speter { 919251881Speter *new_state = state_start; 920251881Speter return SVN_NO_ERROR; 921251881Speter } 922251881Speter 923251881Speter if (! *(new_path_marker + 3)) 924251881Speter { 925251881Speter *new_state = state_start; 926251881Speter return SVN_NO_ERROR; 927251881Speter } 928251881Speter 929251881Speter /* By now, we know that we have a line on the form '--git diff a/.+ b/.+' 930251881Speter * We only need the filenames when we have deleted or added empty 931251881Speter * files. In those cases the old_path and new_path is identical on the 932251881Speter * 'diff --git' line. For all other cases we fetch the filenames from 933251881Speter * other header lines. */ 934251881Speter old_path_start = line + STRLEN_LITERAL("diff --git a/"); 935251881Speter new_path_end = line + strlen(line); 936251881Speter new_path_start = old_path_start; 937251881Speter 938251881Speter while (TRUE) 939251881Speter { 940251881Speter ptrdiff_t len_old; 941251881Speter ptrdiff_t len_new; 942251881Speter 943251881Speter new_path_marker = strstr(new_path_start, " b/"); 944251881Speter 945251881Speter /* No new path marker, bail out. */ 946251881Speter if (! new_path_marker) 947251881Speter break; 948251881Speter 949251881Speter old_path_end = new_path_marker; 950251881Speter new_path_start = new_path_marker + STRLEN_LITERAL(" b/"); 951251881Speter 952251881Speter /* No path after the marker. */ 953251881Speter if (! *new_path_start) 954251881Speter break; 955251881Speter 956251881Speter len_old = old_path_end - old_path_start; 957251881Speter len_new = new_path_end - new_path_start; 958251881Speter 959251881Speter /* Are the paths before and after the " b/" marker the same? */ 960251881Speter if (len_old == len_new 961251881Speter && ! strncmp(old_path_start, new_path_start, len_old)) 962251881Speter { 963251881Speter *old_path_end = '\0'; 964251881Speter SVN_ERR(grab_filename(&patch->old_filename, old_path_start, 965251881Speter result_pool, scratch_pool)); 966251881Speter 967251881Speter SVN_ERR(grab_filename(&patch->new_filename, new_path_start, 968251881Speter result_pool, scratch_pool)); 969251881Speter break; 970251881Speter } 971251881Speter } 972251881Speter 973251881Speter /* We assume that the path is only modified until we've found a 'tree' 974251881Speter * header */ 975251881Speter patch->operation = svn_diff_op_modified; 976251881Speter 977251881Speter *new_state = state_git_diff_seen; 978251881Speter return SVN_NO_ERROR; 979251881Speter} 980251881Speter 981251881Speter/* Parse the '--- ' line of a git extended unidiff. */ 982251881Speterstatic svn_error_t * 983251881Spetergit_minus(enum parse_state *new_state, char *line, svn_patch_t *patch, 984251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 985251881Speter{ 986251881Speter /* If we can find a tab, it separates the filename from 987251881Speter * the rest of the line which we can discard. */ 988251881Speter char *tab = strchr(line, '\t'); 989251881Speter if (tab) 990251881Speter *tab = '\0'; 991251881Speter 992251881Speter if (starts_with(line, "--- /dev/null")) 993251881Speter SVN_ERR(grab_filename(&patch->old_filename, "/dev/null", 994251881Speter result_pool, scratch_pool)); 995251881Speter else 996251881Speter SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"), 997251881Speter result_pool, scratch_pool)); 998251881Speter 999251881Speter *new_state = state_git_minus_seen; 1000251881Speter return SVN_NO_ERROR; 1001251881Speter} 1002251881Speter 1003251881Speter/* Parse the '+++ ' line of a git extended unidiff. */ 1004251881Speterstatic svn_error_t * 1005251881Spetergit_plus(enum parse_state *new_state, char *line, svn_patch_t *patch, 1006251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1007251881Speter{ 1008251881Speter /* If we can find a tab, it separates the filename from 1009251881Speter * the rest of the line which we can discard. */ 1010251881Speter char *tab = strchr(line, '\t'); 1011251881Speter if (tab) 1012251881Speter *tab = '\0'; 1013251881Speter 1014251881Speter if (starts_with(line, "+++ /dev/null")) 1015251881Speter SVN_ERR(grab_filename(&patch->new_filename, "/dev/null", 1016251881Speter result_pool, scratch_pool)); 1017251881Speter else 1018251881Speter SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"), 1019251881Speter result_pool, scratch_pool)); 1020251881Speter 1021251881Speter *new_state = state_git_header_found; 1022251881Speter return SVN_NO_ERROR; 1023251881Speter} 1024251881Speter 1025251881Speter/* Parse the 'rename from ' line of a git extended unidiff. */ 1026251881Speterstatic svn_error_t * 1027251881Spetergit_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1028251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1029251881Speter{ 1030251881Speter SVN_ERR(grab_filename(&patch->old_filename, 1031251881Speter line + STRLEN_LITERAL("rename from "), 1032251881Speter result_pool, scratch_pool)); 1033251881Speter 1034251881Speter *new_state = state_move_from_seen; 1035251881Speter return SVN_NO_ERROR; 1036251881Speter} 1037251881Speter 1038251881Speter/* Parse the 'rename to ' line of a git extended unidiff. */ 1039251881Speterstatic svn_error_t * 1040251881Spetergit_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1041251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1042251881Speter{ 1043251881Speter SVN_ERR(grab_filename(&patch->new_filename, 1044251881Speter line + STRLEN_LITERAL("rename to "), 1045251881Speter result_pool, scratch_pool)); 1046251881Speter 1047251881Speter patch->operation = svn_diff_op_moved; 1048251881Speter 1049251881Speter *new_state = state_git_tree_seen; 1050251881Speter return SVN_NO_ERROR; 1051251881Speter} 1052251881Speter 1053251881Speter/* Parse the 'copy from ' line of a git extended unidiff. */ 1054251881Speterstatic svn_error_t * 1055251881Spetergit_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch, 1056251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1057251881Speter{ 1058251881Speter SVN_ERR(grab_filename(&patch->old_filename, 1059251881Speter line + STRLEN_LITERAL("copy from "), 1060251881Speter result_pool, scratch_pool)); 1061251881Speter 1062251881Speter *new_state = state_copy_from_seen; 1063251881Speter return SVN_NO_ERROR; 1064251881Speter} 1065251881Speter 1066251881Speter/* Parse the 'copy to ' line of a git extended unidiff. */ 1067251881Speterstatic svn_error_t * 1068251881Spetergit_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch, 1069251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1070251881Speter{ 1071251881Speter SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "), 1072251881Speter result_pool, scratch_pool)); 1073251881Speter 1074251881Speter patch->operation = svn_diff_op_copied; 1075251881Speter 1076251881Speter *new_state = state_git_tree_seen; 1077251881Speter return SVN_NO_ERROR; 1078251881Speter} 1079251881Speter 1080251881Speter/* Parse the 'new file ' line of a git extended unidiff. */ 1081251881Speterstatic svn_error_t * 1082251881Spetergit_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1083251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1084251881Speter{ 1085251881Speter patch->operation = svn_diff_op_added; 1086251881Speter 1087251881Speter /* Filename already retrieved from diff --git header. */ 1088251881Speter 1089251881Speter *new_state = state_git_tree_seen; 1090251881Speter return SVN_NO_ERROR; 1091251881Speter} 1092251881Speter 1093251881Speter/* Parse the 'deleted file ' line of a git extended unidiff. */ 1094251881Speterstatic svn_error_t * 1095251881Spetergit_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch, 1096251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1097251881Speter{ 1098251881Speter patch->operation = svn_diff_op_deleted; 1099251881Speter 1100251881Speter /* Filename already retrieved from diff --git header. */ 1101251881Speter 1102251881Speter *new_state = state_git_tree_seen; 1103251881Speter return SVN_NO_ERROR; 1104251881Speter} 1105251881Speter 1106251881Speter/* Add a HUNK associated with the property PROP_NAME to PATCH. */ 1107251881Speterstatic svn_error_t * 1108251881Speteradd_property_hunk(svn_patch_t *patch, const char *prop_name, 1109251881Speter svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation, 1110251881Speter apr_pool_t *result_pool) 1111251881Speter{ 1112251881Speter svn_prop_patch_t *prop_patch; 1113251881Speter 1114251881Speter prop_patch = svn_hash_gets(patch->prop_patches, prop_name); 1115251881Speter 1116251881Speter if (! prop_patch) 1117251881Speter { 1118251881Speter prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t)); 1119251881Speter prop_patch->name = prop_name; 1120251881Speter prop_patch->operation = operation; 1121251881Speter prop_patch->hunks = apr_array_make(result_pool, 1, 1122251881Speter sizeof(svn_diff_hunk_t *)); 1123251881Speter 1124251881Speter svn_hash_sets(patch->prop_patches, prop_name, prop_patch); 1125251881Speter } 1126251881Speter 1127251881Speter APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk; 1128251881Speter 1129251881Speter return SVN_NO_ERROR; 1130251881Speter} 1131251881Speter 1132251881Speterstruct svn_patch_file_t 1133251881Speter{ 1134251881Speter /* The APR file handle to the patch file. */ 1135251881Speter apr_file_t *apr_file; 1136251881Speter 1137251881Speter /* The file offset at which the next patch is expected. */ 1138251881Speter apr_off_t next_patch_offset; 1139251881Speter}; 1140251881Speter 1141251881Spetersvn_error_t * 1142251881Spetersvn_diff_open_patch_file(svn_patch_file_t **patch_file, 1143251881Speter const char *local_abspath, 1144251881Speter apr_pool_t *result_pool) 1145251881Speter{ 1146251881Speter svn_patch_file_t *p; 1147251881Speter 1148251881Speter p = apr_palloc(result_pool, sizeof(*p)); 1149251881Speter SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath, 1150251881Speter APR_READ | APR_BUFFERED, APR_OS_DEFAULT, 1151251881Speter result_pool)); 1152251881Speter p->next_patch_offset = 0; 1153251881Speter *patch_file = p; 1154251881Speter 1155251881Speter return SVN_NO_ERROR; 1156251881Speter} 1157251881Speter 1158251881Speter/* Parse hunks from APR_FILE and store them in PATCH->HUNKS. 1159251881Speter * Parsing stops if no valid next hunk can be found. 1160251881Speter * If IGNORE_WHITESPACE is TRUE, lines without 1161251881Speter * leading spaces will be treated as context lines. 1162251881Speter * Allocate results in RESULT_POOL. 1163251881Speter * Use SCRATCH_POOL for temporary allocations. */ 1164251881Speterstatic svn_error_t * 1165251881Speterparse_hunks(svn_patch_t *patch, apr_file_t *apr_file, 1166251881Speter svn_boolean_t ignore_whitespace, 1167251881Speter apr_pool_t *result_pool, apr_pool_t *scratch_pool) 1168251881Speter{ 1169251881Speter svn_diff_hunk_t *hunk; 1170251881Speter svn_boolean_t is_property; 1171251881Speter const char *last_prop_name; 1172251881Speter const char *prop_name; 1173251881Speter svn_diff_operation_kind_t prop_operation; 1174251881Speter apr_pool_t *iterpool; 1175251881Speter 1176251881Speter last_prop_name = NULL; 1177251881Speter 1178251881Speter patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *)); 1179251881Speter patch->prop_patches = apr_hash_make(result_pool); 1180251881Speter iterpool = svn_pool_create(scratch_pool); 1181251881Speter do 1182251881Speter { 1183251881Speter svn_pool_clear(iterpool); 1184251881Speter 1185251881Speter SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation, 1186251881Speter patch, apr_file, ignore_whitespace, result_pool, 1187251881Speter iterpool)); 1188251881Speter 1189251881Speter if (hunk && is_property) 1190251881Speter { 1191251881Speter if (! prop_name) 1192251881Speter prop_name = last_prop_name; 1193251881Speter else 1194251881Speter last_prop_name = prop_name; 1195251881Speter SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation, 1196251881Speter result_pool)); 1197251881Speter } 1198251881Speter else if (hunk) 1199251881Speter { 1200251881Speter APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk; 1201251881Speter last_prop_name = NULL; 1202251881Speter } 1203251881Speter 1204251881Speter } 1205251881Speter while (hunk); 1206251881Speter svn_pool_destroy(iterpool); 1207251881Speter 1208251881Speter return SVN_NO_ERROR; 1209251881Speter} 1210251881Speter 1211251881Speter/* State machine for the diff header parser. 1212251881Speter * Expected Input Required state Function to call */ 1213251881Speterstatic struct transition transitions[] = 1214251881Speter{ 1215251881Speter {"--- ", state_start, diff_minus}, 1216251881Speter {"+++ ", state_minus_seen, diff_plus}, 1217251881Speter {"diff --git", state_start, git_start}, 1218251881Speter {"--- a/", state_git_diff_seen, git_minus}, 1219251881Speter {"--- a/", state_git_tree_seen, git_minus}, 1220251881Speter {"--- /dev/null", state_git_tree_seen, git_minus}, 1221251881Speter {"+++ b/", state_git_minus_seen, git_plus}, 1222251881Speter {"+++ /dev/null", state_git_minus_seen, git_plus}, 1223251881Speter {"rename from ", state_git_diff_seen, git_move_from}, 1224251881Speter {"rename to ", state_move_from_seen, git_move_to}, 1225251881Speter {"copy from ", state_git_diff_seen, git_copy_from}, 1226251881Speter {"copy to ", state_copy_from_seen, git_copy_to}, 1227251881Speter {"new file ", state_git_diff_seen, git_new_file}, 1228251881Speter {"deleted file ", state_git_diff_seen, git_deleted_file}, 1229251881Speter}; 1230251881Speter 1231251881Spetersvn_error_t * 1232251881Spetersvn_diff_parse_next_patch(svn_patch_t **patch, 1233251881Speter svn_patch_file_t *patch_file, 1234251881Speter svn_boolean_t reverse, 1235251881Speter svn_boolean_t ignore_whitespace, 1236251881Speter apr_pool_t *result_pool, 1237251881Speter apr_pool_t *scratch_pool) 1238251881Speter{ 1239251881Speter apr_off_t pos, last_line; 1240251881Speter svn_boolean_t eof; 1241251881Speter svn_boolean_t line_after_tree_header_read = FALSE; 1242251881Speter apr_pool_t *iterpool; 1243251881Speter enum parse_state state = state_start; 1244251881Speter 1245251881Speter if (apr_file_eof(patch_file->apr_file) == APR_EOF) 1246251881Speter { 1247251881Speter /* No more patches here. */ 1248251881Speter *patch = NULL; 1249251881Speter return SVN_NO_ERROR; 1250251881Speter } 1251251881Speter 1252251881Speter *patch = apr_pcalloc(result_pool, sizeof(**patch)); 1253251881Speter 1254251881Speter pos = patch_file->next_patch_offset; 1255251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool)); 1256251881Speter 1257251881Speter iterpool = svn_pool_create(scratch_pool); 1258251881Speter do 1259251881Speter { 1260251881Speter svn_stringbuf_t *line; 1261251881Speter svn_boolean_t valid_header_line = FALSE; 1262251881Speter int i; 1263251881Speter 1264251881Speter svn_pool_clear(iterpool); 1265251881Speter 1266251881Speter /* Remember the current line's offset, and read the line. */ 1267251881Speter last_line = pos; 1268251881Speter SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof, 1269251881Speter APR_SIZE_MAX, iterpool, iterpool)); 1270251881Speter 1271251881Speter if (! eof) 1272251881Speter { 1273251881Speter /* Update line offset for next iteration. */ 1274251881Speter pos = 0; 1275251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, &pos, 1276251881Speter iterpool)); 1277251881Speter } 1278251881Speter 1279251881Speter /* Run the state machine. */ 1280251881Speter for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++) 1281251881Speter { 1282251881Speter if (starts_with(line->data, transitions[i].expected_input) 1283251881Speter && state == transitions[i].required_state) 1284251881Speter { 1285251881Speter SVN_ERR(transitions[i].fn(&state, line->data, *patch, 1286251881Speter result_pool, iterpool)); 1287251881Speter valid_header_line = TRUE; 1288251881Speter break; 1289251881Speter } 1290251881Speter } 1291251881Speter 1292251881Speter if (state == state_unidiff_found || state == state_git_header_found) 1293251881Speter { 1294251881Speter /* We have a valid diff header, yay! */ 1295251881Speter break; 1296251881Speter } 1297251881Speter else if (state == state_git_tree_seen && line_after_tree_header_read) 1298251881Speter { 1299251881Speter /* git patches can contain an index line after the file mode line */ 1300251881Speter if (!starts_with(line->data, "index ")) 1301251881Speter { 1302251881Speter /* We have a valid diff header for a patch with only tree changes. 1303251881Speter * Rewind to the start of the line just read, so subsequent calls 1304251881Speter * to this function don't end up skipping the line -- it may 1305251881Speter * contain a patch. */ 1306251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 1307251881Speter scratch_pool)); 1308251881Speter break; 1309251881Speter } 1310251881Speter } 1311251881Speter else if (state == state_git_tree_seen) 1312251881Speter { 1313251881Speter line_after_tree_header_read = TRUE; 1314251881Speter } 1315251881Speter else if (! valid_header_line && state != state_start 1316251881Speter && !starts_with(line->data, "index ")) 1317251881Speter { 1318251881Speter /* We've encountered an invalid diff header. 1319251881Speter * 1320251881Speter * Rewind to the start of the line just read - it may be a new 1321251881Speter * header that begins there. */ 1322251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line, 1323251881Speter scratch_pool)); 1324251881Speter state = state_start; 1325251881Speter } 1326251881Speter 1327251881Speter } 1328251881Speter while (! eof); 1329251881Speter 1330251881Speter (*patch)->reverse = reverse; 1331251881Speter if (reverse) 1332251881Speter { 1333251881Speter const char *temp; 1334251881Speter temp = (*patch)->old_filename; 1335251881Speter (*patch)->old_filename = (*patch)->new_filename; 1336251881Speter (*patch)->new_filename = temp; 1337251881Speter } 1338251881Speter 1339251881Speter if ((*patch)->old_filename == NULL || (*patch)->new_filename == NULL) 1340251881Speter { 1341251881Speter /* Something went wrong, just discard the result. */ 1342251881Speter *patch = NULL; 1343251881Speter } 1344251881Speter else 1345251881Speter SVN_ERR(parse_hunks(*patch, patch_file->apr_file, ignore_whitespace, 1346251881Speter result_pool, iterpool)); 1347251881Speter 1348251881Speter svn_pool_destroy(iterpool); 1349251881Speter 1350251881Speter patch_file->next_patch_offset = 0; 1351251881Speter SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_CUR, 1352251881Speter &patch_file->next_patch_offset, scratch_pool)); 1353251881Speter 1354251881Speter if (*patch) 1355251881Speter { 1356251881Speter /* Usually, hunks appear in the patch sorted by their original line 1357251881Speter * offset. But just in case they weren't parsed in this order for 1358251881Speter * some reason, we sort them so that our caller can assume that hunks 1359251881Speter * are sorted as if parsed from a usual patch. */ 1360251881Speter qsort((*patch)->hunks->elts, (*patch)->hunks->nelts, 1361251881Speter (*patch)->hunks->elt_size, compare_hunks); 1362251881Speter } 1363251881Speter 1364251881Speter return SVN_NO_ERROR; 1365251881Speter} 1366251881Speter 1367251881Spetersvn_error_t * 1368251881Spetersvn_diff_close_patch_file(svn_patch_file_t *patch_file, 1369251881Speter apr_pool_t *scratch_pool) 1370251881Speter{ 1371251881Speter return svn_error_trace(svn_io_file_close(patch_file->apr_file, 1372251881Speter scratch_pool)); 1373251881Speter} 1374