1/* dump.c --- writing filesystem contents into a portable 'dumpfile' format. 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23 24#include <stdarg.h> 25 26#include "svn_private_config.h" 27#include "svn_pools.h" 28#include "svn_error.h" 29#include "svn_fs.h" 30#include "svn_hash.h" 31#include "svn_iter.h" 32#include "svn_repos.h" 33#include "svn_string.h" 34#include "svn_dirent_uri.h" 35#include "svn_path.h" 36#include "svn_time.h" 37#include "svn_checksum.h" 38#include "svn_props.h" 39#include "svn_sorts.h" 40 41#include "private/svn_repos_private.h" 42#include "private/svn_mergeinfo_private.h" 43#include "private/svn_fs_private.h" 44#include "private/svn_sorts_private.h" 45#include "private/svn_utf_private.h" 46#include "private/svn_cache.h" 47#include "private/svn_fspath.h" 48 49#define ARE_VALID_COPY_ARGS(p,r) ((p) && SVN_IS_VALID_REVNUM(r)) 50 51/*----------------------------------------------------------------------*/ 52 53 54/* To be able to check whether a path exists in the current revision 55 (as changes come in), we need to track the relevant tree changes. 56 57 In particular, we remember deletions, additions and copies including 58 their copy-from info. Since the dump performs a pre-order tree walk, 59 we only need to store the data for the stack of parent folders. 60 61 The problem that we are trying to solve is that the dump receives 62 transforming operations whose validity depends on previous operations 63 in the same revision but cannot be checked against the final state 64 as stored in the repository as that is the state *after* we applied 65 the respective tree changes. 66 67 Note that the tracker functions don't perform any sanity or validity 68 checks. Those higher-level tests have to be done in the calling code. 69 However, there is no way to corrupt the data structure using the 70 provided functions. 71 */ 72 73/* Single entry in the path tracker. Not all levels along the path 74 hierarchy do need to have an instance of this struct but only those 75 that got changed by a tree modification. 76 77 Please note that the path info in this struct is stored in re-usable 78 stringbuf objects such that we don't need to allocate more memory than 79 the longest path we encounter. 80 */ 81typedef struct path_tracker_entry_t 82{ 83 /* path in the current tree */ 84 svn_stringbuf_t *path; 85 86 /* copy-from path (must be empty if COPYFROM_REV is SVN_INVALID_REVNUM) */ 87 svn_stringbuf_t *copyfrom_path; 88 89 /* copy-from revision (SVN_INVALID_REVNUM for additions / replacements 90 that don't copy history, i.e. with no sub-tree) */ 91 svn_revnum_t copyfrom_rev; 92 93 /* if FALSE, PATH has been deleted */ 94 svn_boolean_t exists; 95} path_tracker_entry_t; 96 97/* Tracks all tree modifications above the current path. 98 */ 99typedef struct path_tracker_t 100{ 101 /* Container for all relevant tree changes in depth order. 102 May contain more entries than DEPTH to allow for reusing memory. 103 Only entries 0 .. DEPTH-1 are valid. 104 */ 105 apr_array_header_t *stack; 106 107 /* Number of relevant entries in STACK. May be 0 */ 108 int depth; 109 110 /* Revision that we current track. If DEPTH is 0, paths are exist in 111 REVISION exactly when they exist in REVISION-1. This applies only 112 to the current state of our tree walk. 113 */ 114 svn_revnum_t revision; 115 116 /* Allocate container entries here. */ 117 apr_pool_t *pool; 118} path_tracker_t; 119 120/* Return a new path tracker object for REVISION, allocated in POOL. 121 */ 122static path_tracker_t * 123tracker_create(svn_revnum_t revision, 124 apr_pool_t *pool) 125{ 126 path_tracker_t *result = apr_pcalloc(pool, sizeof(*result)); 127 result->stack = apr_array_make(pool, 16, sizeof(path_tracker_entry_t)); 128 result->revision = revision; 129 result->pool = pool; 130 131 return result; 132} 133 134/* Remove all entries from TRACKER that are not relevant to PATH anymore. 135 * If ALLOW_EXACT_MATCH is FALSE, keep only entries that pertain to 136 * parent folders but not to PATH itself. 137 * 138 * This internal function implicitly updates the tracker state during the 139 * tree by removing "past" entries. Other functions will add entries when 140 * we encounter a new tree change. 141 */ 142static void 143tracker_trim(path_tracker_t *tracker, 144 const char *path, 145 svn_boolean_t allow_exact_match) 146{ 147 /* remove everything that is unrelated to PATH. 148 Note that TRACKER->STACK is depth-ordered, 149 i.e. stack[N] is a (maybe indirect) parent of stack[N+1] 150 for N+1 < DEPTH. 151 */ 152 for (; tracker->depth; --tracker->depth) 153 { 154 path_tracker_entry_t *parent = &APR_ARRAY_IDX(tracker->stack, 155 tracker->depth - 1, 156 path_tracker_entry_t); 157 const char *rel_path 158 = svn_dirent_skip_ancestor(parent->path->data, path); 159 160 /* always keep parents. Keep exact matches when allowed. */ 161 if (rel_path && (allow_exact_match || *rel_path != '\0')) 162 break; 163 } 164} 165 166/* Using TRACKER, check what path at what revision in the repository must 167 be checked to decide that whether PATH exists. Return the info in 168 *ORIG_PATH and *ORIG_REV, respectively. 169 170 If the path is known to not exist, *ORIG_PATH will be NULL and *ORIG_REV 171 will be SVN_INVALID_REVNUM. If *ORIG_REV is SVN_INVALID_REVNUM, PATH 172 has just been added in the revision currently being tracked. 173 174 Use POOL for allocations. Note that *ORIG_PATH may be allocated in POOL, 175 a reference to internal data with the same lifetime as TRACKER or just 176 PATH. 177 */ 178static void 179tracker_lookup(const char **orig_path, 180 svn_revnum_t *orig_rev, 181 path_tracker_t *tracker, 182 const char *path, 183 apr_pool_t *pool) 184{ 185 tracker_trim(tracker, path, TRUE); 186 if (tracker->depth == 0) 187 { 188 /* no tree changes -> paths are the same as in the previous rev. */ 189 *orig_path = path; 190 *orig_rev = tracker->revision - 1; 191 } 192 else 193 { 194 path_tracker_entry_t *parent = &APR_ARRAY_IDX(tracker->stack, 195 tracker->depth - 1, 196 path_tracker_entry_t); 197 if (parent->exists) 198 { 199 const char *rel_path 200 = svn_dirent_skip_ancestor(parent->path->data, path); 201 202 if (parent->copyfrom_rev != SVN_INVALID_REVNUM) 203 { 204 /* parent is a copy with history. Translate path. */ 205 *orig_path = svn_dirent_join(parent->copyfrom_path->data, 206 rel_path, pool); 207 *orig_rev = parent->copyfrom_rev; 208 } 209 else if (*rel_path == '\0') 210 { 211 /* added in this revision with no history */ 212 *orig_path = path; 213 *orig_rev = tracker->revision; 214 } 215 else 216 { 217 /* parent got added but not this path */ 218 *orig_path = NULL; 219 *orig_rev = SVN_INVALID_REVNUM; 220 } 221 } 222 else 223 { 224 /* (maybe parent) path has been deleted */ 225 *orig_path = NULL; 226 *orig_rev = SVN_INVALID_REVNUM; 227 } 228 } 229} 230 231/* Return a reference to the stack entry in TRACKER for PATH. If no 232 suitable entry exists, add one. Implicitly updates the tracked tree 233 location. 234 235 Only the PATH member of the result is being updated. All other members 236 will have undefined values. 237 */ 238static path_tracker_entry_t * 239tracker_add_entry(path_tracker_t *tracker, 240 const char *path) 241{ 242 path_tracker_entry_t *entry; 243 tracker_trim(tracker, path, FALSE); 244 245 if (tracker->depth == tracker->stack->nelts) 246 { 247 entry = apr_array_push(tracker->stack); 248 entry->path = svn_stringbuf_create_empty(tracker->pool); 249 entry->copyfrom_path = svn_stringbuf_create_empty(tracker->pool); 250 } 251 else 252 { 253 entry = &APR_ARRAY_IDX(tracker->stack, tracker->depth, 254 path_tracker_entry_t); 255 } 256 257 svn_stringbuf_set(entry->path, path); 258 ++tracker->depth; 259 260 return entry; 261} 262 263/* Update the TRACKER with a copy from COPYFROM_PATH@COPYFROM_REV to 264 PATH in the tracked revision. 265 */ 266static void 267tracker_path_copy(path_tracker_t *tracker, 268 const char *path, 269 const char *copyfrom_path, 270 svn_revnum_t copyfrom_rev) 271{ 272 path_tracker_entry_t *entry = tracker_add_entry(tracker, path); 273 274 svn_stringbuf_set(entry->copyfrom_path, copyfrom_path); 275 entry->copyfrom_rev = copyfrom_rev; 276 entry->exists = TRUE; 277} 278 279/* Update the TRACKER with a plain addition of PATH (without history). 280 */ 281static void 282tracker_path_add(path_tracker_t *tracker, 283 const char *path) 284{ 285 path_tracker_entry_t *entry = tracker_add_entry(tracker, path); 286 287 svn_stringbuf_setempty(entry->copyfrom_path); 288 entry->copyfrom_rev = SVN_INVALID_REVNUM; 289 entry->exists = TRUE; 290} 291 292/* Update the TRACKER with a replacement of PATH with a plain addition 293 (without history). 294 */ 295static void 296tracker_path_replace(path_tracker_t *tracker, 297 const char *path) 298{ 299 /* this will implicitly purge all previous sub-tree info from STACK. 300 Thus, no need to tack the deletion explicitly. */ 301 tracker_path_add(tracker, path); 302} 303 304/* Update the TRACKER with a deletion of PATH. 305 */ 306static void 307tracker_path_delete(path_tracker_t *tracker, 308 const char *path) 309{ 310 path_tracker_entry_t *entry = tracker_add_entry(tracker, path); 311 312 svn_stringbuf_setempty(entry->copyfrom_path); 313 entry->copyfrom_rev = SVN_INVALID_REVNUM; 314 entry->exists = FALSE; 315} 316 317 318/* Compute the delta between OLDROOT/OLDPATH and NEWROOT/NEWPATH and 319 store it into a new temporary file *TEMPFILE. OLDROOT may be NULL, 320 in which case the delta will be computed against an empty file, as 321 per the svn_fs_get_file_delta_stream docstring. Record the length 322 of the temporary file in *LEN, and rewind the file before 323 returning. */ 324static svn_error_t * 325store_delta(apr_file_t **tempfile, svn_filesize_t *len, 326 svn_fs_root_t *oldroot, const char *oldpath, 327 svn_fs_root_t *newroot, const char *newpath, apr_pool_t *pool) 328{ 329 svn_stream_t *temp_stream; 330 apr_off_t offset; 331 svn_txdelta_stream_t *delta_stream; 332 svn_txdelta_window_handler_t wh; 333 void *whb; 334 335 /* Create a temporary file and open a stream to it. Note that we need 336 the file handle in order to rewind it. */ 337 SVN_ERR(svn_io_open_unique_file3(tempfile, NULL, NULL, 338 svn_io_file_del_on_pool_cleanup, 339 pool, pool)); 340 temp_stream = svn_stream_from_aprfile2(*tempfile, TRUE, pool); 341 342 /* Compute the delta and send it to the temporary file. */ 343 SVN_ERR(svn_fs_get_file_delta_stream(&delta_stream, oldroot, oldpath, 344 newroot, newpath, pool)); 345 svn_txdelta_to_svndiff3(&wh, &whb, temp_stream, 0, 346 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool); 347 SVN_ERR(svn_txdelta_send_txstream(delta_stream, wh, whb, pool)); 348 349 /* Get the length of the temporary file and rewind it. */ 350 SVN_ERR(svn_io_file_get_offset(&offset, *tempfile, pool)); 351 *len = offset; 352 offset = 0; 353 return svn_io_file_seek(*tempfile, APR_SET, &offset, pool); 354} 355 356 357/* Send a notification of type #svn_repos_notify_warning, subtype WARNING, 358 with message WARNING_FMT formatted with the remaining variable arguments. 359 Send it by calling NOTIFY_FUNC (if not null) with NOTIFY_BATON. 360 */ 361__attribute__((format(printf, 5, 6))) 362static void 363notify_warning(apr_pool_t *scratch_pool, 364 svn_repos_notify_func_t notify_func, 365 void *notify_baton, 366 svn_repos_notify_warning_t warning, 367 const char *warning_fmt, 368 ...) 369{ 370 va_list va; 371 svn_repos_notify_t *notify; 372 373 if (notify_func == NULL) 374 return; 375 376 notify = svn_repos_notify_create(svn_repos_notify_warning, scratch_pool); 377 notify->warning = warning; 378 va_start(va, warning_fmt); 379 notify->warning_str = apr_pvsprintf(scratch_pool, warning_fmt, va); 380 va_end(va); 381 382 notify_func(notify_baton, notify, scratch_pool); 383} 384 385 386/*----------------------------------------------------------------------*/ 387 388/* Write to STREAM the header in HEADERS named KEY, if present. 389 */ 390static svn_error_t * 391write_header(svn_stream_t *stream, 392 apr_hash_t *headers, 393 const char *key, 394 apr_pool_t *scratch_pool) 395{ 396 const char *val = svn_hash_gets(headers, key); 397 398 if (val) 399 { 400 SVN_ERR(svn_stream_printf(stream, scratch_pool, 401 "%s: %s\n", key, val)); 402 } 403 return SVN_NO_ERROR; 404} 405 406/* Write headers, in arbitrary order. 407 * ### TODO: use a stable order 408 * ### Modifies HEADERS. 409 */ 410static svn_error_t * 411write_revision_headers(svn_stream_t *stream, 412 apr_hash_t *headers, 413 apr_pool_t *scratch_pool) 414{ 415 const char **h; 416 apr_hash_index_t *hi; 417 418 static const char *revision_headers_order[] = 419 { 420 SVN_REPOS_DUMPFILE_REVISION_NUMBER, /* must be first */ 421 NULL 422 }; 423 424 /* Write some headers in a given order */ 425 for (h = revision_headers_order; *h; h++) 426 { 427 SVN_ERR(write_header(stream, headers, *h, scratch_pool)); 428 svn_hash_sets(headers, *h, NULL); 429 } 430 431 /* Write any and all remaining headers except Content-length. 432 * ### TODO: use a stable order 433 */ 434 for (hi = apr_hash_first(scratch_pool, headers); hi; hi = apr_hash_next(hi)) 435 { 436 const char *key = apr_hash_this_key(hi); 437 438 if (strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH) != 0) 439 SVN_ERR(write_header(stream, headers, key, scratch_pool)); 440 } 441 442 /* Content-length must be last */ 443 SVN_ERR(write_header(stream, headers, SVN_REPOS_DUMPFILE_CONTENT_LENGTH, 444 scratch_pool)); 445 446 return SVN_NO_ERROR; 447} 448 449/* A header entry: the element type of the apr_array_header_t which is 450 * the real type of svn_repos__dumpfile_headers_t. 451 */ 452typedef struct svn_repos__dumpfile_header_entry_t { 453 const char *key, *val; 454} svn_repos__dumpfile_header_entry_t; 455 456svn_repos__dumpfile_headers_t * 457svn_repos__dumpfile_headers_create(apr_pool_t *pool) 458{ 459 svn_repos__dumpfile_headers_t *headers 460 = apr_array_make(pool, 5, sizeof(svn_repos__dumpfile_header_entry_t)); 461 462 return headers; 463} 464 465void 466svn_repos__dumpfile_header_push(svn_repos__dumpfile_headers_t *headers, 467 const char *key, 468 const char *val) 469{ 470 svn_repos__dumpfile_header_entry_t *h 471 = &APR_ARRAY_PUSH(headers, svn_repos__dumpfile_header_entry_t); 472 473 h->key = apr_pstrdup(headers->pool, key); 474 h->val = apr_pstrdup(headers->pool, val); 475} 476 477void 478svn_repos__dumpfile_header_pushf(svn_repos__dumpfile_headers_t *headers, 479 const char *key, 480 const char *val_fmt, 481 ...) 482{ 483 va_list ap; 484 svn_repos__dumpfile_header_entry_t *h 485 = &APR_ARRAY_PUSH(headers, svn_repos__dumpfile_header_entry_t); 486 487 h->key = apr_pstrdup(headers->pool, key); 488 va_start(ap, val_fmt); 489 h->val = apr_pvsprintf(headers->pool, val_fmt, ap); 490 va_end(ap); 491} 492 493svn_error_t * 494svn_repos__dump_headers(svn_stream_t *stream, 495 svn_repos__dumpfile_headers_t *headers, 496 apr_pool_t *scratch_pool) 497{ 498 int i; 499 500 for (i = 0; i < headers->nelts; i++) 501 { 502 svn_repos__dumpfile_header_entry_t *h 503 = &APR_ARRAY_IDX(headers, i, svn_repos__dumpfile_header_entry_t); 504 505 SVN_ERR(svn_stream_printf(stream, scratch_pool, 506 "%s: %s\n", h->key, h->val)); 507 } 508 509 /* End of headers */ 510 SVN_ERR(svn_stream_puts(stream, "\n")); 511 512 return SVN_NO_ERROR; 513} 514 515svn_error_t * 516svn_repos__dump_magic_header_record(svn_stream_t *dump_stream, 517 int version, 518 apr_pool_t *pool) 519{ 520 SVN_ERR(svn_stream_printf(dump_stream, pool, 521 SVN_REPOS_DUMPFILE_MAGIC_HEADER ": %d\n\n", 522 version)); 523 return SVN_NO_ERROR; 524} 525 526svn_error_t * 527svn_repos__dump_uuid_header_record(svn_stream_t *dump_stream, 528 const char *uuid, 529 apr_pool_t *pool) 530{ 531 if (uuid) 532 { 533 SVN_ERR(svn_stream_printf(dump_stream, pool, SVN_REPOS_DUMPFILE_UUID 534 ": %s\n\n", uuid)); 535 } 536 return SVN_NO_ERROR; 537} 538 539svn_error_t * 540svn_repos__dump_revision_record(svn_stream_t *dump_stream, 541 svn_revnum_t revision, 542 apr_hash_t *extra_headers, 543 apr_hash_t *revprops, 544 svn_boolean_t props_section_always, 545 apr_pool_t *scratch_pool) 546{ 547 svn_stringbuf_t *propstring = NULL; 548 apr_hash_t *headers; 549 550 if (extra_headers) 551 headers = apr_hash_copy(scratch_pool, extra_headers); 552 else 553 headers = apr_hash_make(scratch_pool); 554 555 /* ### someday write a revision-content-checksum */ 556 557 svn_hash_sets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER, 558 apr_psprintf(scratch_pool, "%ld", revision)); 559 560 if (apr_hash_count(revprops) || props_section_always) 561 { 562 svn_stream_t *propstream; 563 564 propstring = svn_stringbuf_create_empty(scratch_pool); 565 propstream = svn_stream_from_stringbuf(propstring, scratch_pool); 566 SVN_ERR(svn_hash_write2(revprops, propstream, "PROPS-END", scratch_pool)); 567 SVN_ERR(svn_stream_close(propstream)); 568 569 svn_hash_sets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH, 570 apr_psprintf(scratch_pool, 571 "%" APR_SIZE_T_FMT, propstring->len)); 572 } 573 574 if (propstring) 575 { 576 /* Write out a regular Content-length header for the benefit of 577 non-Subversion RFC-822 parsers. */ 578 svn_hash_sets(headers, SVN_REPOS_DUMPFILE_CONTENT_LENGTH, 579 apr_psprintf(scratch_pool, 580 "%" APR_SIZE_T_FMT, propstring->len)); 581 } 582 583 SVN_ERR(write_revision_headers(dump_stream, headers, scratch_pool)); 584 585 /* End of headers */ 586 SVN_ERR(svn_stream_puts(dump_stream, "\n")); 587 588 /* Property data. */ 589 if (propstring) 590 { 591 SVN_ERR(svn_stream_write(dump_stream, propstring->data, &propstring->len)); 592 } 593 594 /* put an end to revision */ 595 SVN_ERR(svn_stream_puts(dump_stream, "\n")); 596 597 return SVN_NO_ERROR; 598} 599 600svn_error_t * 601svn_repos__dump_node_record(svn_stream_t *dump_stream, 602 svn_repos__dumpfile_headers_t *headers, 603 svn_stringbuf_t *props_str, 604 svn_boolean_t has_text, 605 svn_filesize_t text_content_length, 606 svn_boolean_t content_length_always, 607 apr_pool_t *scratch_pool) 608{ 609 svn_filesize_t content_length = 0; 610 611 /* add content-length headers */ 612 if (props_str) 613 { 614 svn_repos__dumpfile_header_pushf( 615 headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH, 616 "%" APR_SIZE_T_FMT, props_str->len); 617 content_length += props_str->len; 618 } 619 if (has_text) 620 { 621 svn_repos__dumpfile_header_pushf( 622 headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH, 623 "%" SVN_FILESIZE_T_FMT, text_content_length); 624 content_length += text_content_length; 625 } 626 if (content_length_always || props_str || has_text) 627 { 628 svn_repos__dumpfile_header_pushf( 629 headers, SVN_REPOS_DUMPFILE_CONTENT_LENGTH, 630 "%" SVN_FILESIZE_T_FMT, content_length); 631 } 632 633 /* write the headers */ 634 SVN_ERR(svn_repos__dump_headers(dump_stream, headers, scratch_pool)); 635 636 /* write the props */ 637 if (props_str) 638 { 639 SVN_ERR(svn_stream_write(dump_stream, props_str->data, &props_str->len)); 640 } 641 return SVN_NO_ERROR; 642} 643 644/*----------------------------------------------------------------------*/ 645 646/** An editor which dumps node-data in 'dumpfile format' to a file. **/ 647 648/* Look, mom! No file batons! */ 649 650struct edit_baton 651{ 652 /* The relpath which implicitly prepends all full paths coming into 653 this editor. This will almost always be "". */ 654 const char *path; 655 656 /* The stream to dump to. */ 657 svn_stream_t *stream; 658 659 /* Send feedback here, if non-NULL */ 660 svn_repos_notify_func_t notify_func; 661 void *notify_baton; 662 663 /* The fs revision root, so we can read the contents of paths. */ 664 svn_fs_root_t *fs_root; 665 svn_revnum_t current_rev; 666 667 /* The fs, so we can grab historic information if needed. */ 668 svn_fs_t *fs; 669 670 /* True if dumped nodes should output deltas instead of full text. */ 671 svn_boolean_t use_deltas; 672 673 /* True if this "dump" is in fact a verify. */ 674 svn_boolean_t verify; 675 676 /* True if checking UCS normalization during a verify. */ 677 svn_boolean_t check_normalization; 678 679 /* The first revision dumped in this dumpstream. */ 680 svn_revnum_t oldest_dumped_rev; 681 682 /* If not NULL, set to true if any references to revisions older than 683 OLDEST_DUMPED_REV were found in the dumpstream. */ 684 svn_boolean_t *found_old_reference; 685 686 /* If not NULL, set to true if any mergeinfo was dumped which contains 687 revisions older than OLDEST_DUMPED_REV. */ 688 svn_boolean_t *found_old_mergeinfo; 689 690 /* Structure allows us to verify the paths currently being dumped. 691 If NULL, validity checks are being skipped. */ 692 path_tracker_t *path_tracker; 693}; 694 695struct dir_baton 696{ 697 struct edit_baton *edit_baton; 698 699 /* has this directory been written to the output stream? */ 700 svn_boolean_t written_out; 701 702 /* the repository relpath associated with this directory */ 703 const char *path; 704 705 /* The comparison repository relpath and revision of this directory. 706 If both of these are valid, use them as a source against which to 707 compare the directory instead of the default comparison source of 708 PATH in the previous revision. */ 709 const char *cmp_path; 710 svn_revnum_t cmp_rev; 711 712 /* hash of paths that need to be deleted, though some -might- be 713 replaced. maps const char * paths to this dir_baton. (they're 714 full paths, because that's what the editor driver gives us. but 715 really, they're all within this directory.) */ 716 apr_hash_t *deleted_entries; 717 718 /* A flag indicating that new entries have been added to this 719 directory in this revision. Used to optimize detection of UCS 720 representation collisions; we will only check for that in 721 revisions where new names appear in the directory. */ 722 svn_boolean_t check_name_collision; 723 724 /* pool to be used for deleting the hash items */ 725 apr_pool_t *pool; 726}; 727 728 729/* Make a directory baton to represent the directory was path 730 (relative to EDIT_BATON's path) is PATH. 731 732 CMP_PATH/CMP_REV are the path/revision against which this directory 733 should be compared for changes. If either is omitted (NULL for the 734 path, SVN_INVALID_REVNUM for the rev), just compare this directory 735 PATH against itself in the previous revision. 736 737 PB is the directory baton of this directory's parent, 738 or NULL if this is the top-level directory of the edit. 739 740 Perform all allocations in POOL. */ 741static struct svn_error_t * 742make_dir_baton(struct dir_baton **dbp, 743 const char *path, 744 const char *cmp_path, 745 svn_revnum_t cmp_rev, 746 void *edit_baton, 747 struct dir_baton *pb, 748 apr_pool_t *pool) 749{ 750 struct edit_baton *eb = edit_baton; 751 struct dir_baton *new_db = apr_pcalloc(pool, sizeof(*new_db)); 752 const char *full_path, *canonicalized_path; 753 754 /* A path relative to nothing? I don't think so. */ 755 SVN_ERR_ASSERT(!path || pb); 756 757 /* Construct the full path of this node. */ 758 if (pb) 759 full_path = svn_relpath_join(eb->path, path, pool); 760 else 761 full_path = apr_pstrdup(pool, eb->path); 762 763 /* Remove leading slashes from copyfrom paths. */ 764 if (cmp_path) 765 { 766 SVN_ERR(svn_relpath_canonicalize_safe(&canonicalized_path, NULL, 767 cmp_path, pool, pool)); 768 cmp_path = canonicalized_path; 769 } 770 771 new_db->edit_baton = eb; 772 new_db->path = full_path; 773 new_db->cmp_path = cmp_path; 774 new_db->cmp_rev = cmp_rev; 775 new_db->written_out = FALSE; 776 new_db->deleted_entries = apr_hash_make(pool); 777 new_db->check_name_collision = FALSE; 778 new_db->pool = pool; 779 780 *dbp = new_db; 781 return SVN_NO_ERROR; 782} 783 784static svn_error_t * 785fetch_kind_func(svn_node_kind_t *kind, 786 void *baton, 787 const char *path, 788 svn_revnum_t base_revision, 789 apr_pool_t *scratch_pool); 790 791/* Return an error when PATH in REVISION does not exist or is of a 792 different kind than EXPECTED_KIND. If the latter is svn_node_unknown, 793 skip that check. Use EB for context information. If REVISION is the 794 current revision, use EB's path tracker to follow renames, deletions, 795 etc. 796 797 Use SCRATCH_POOL for temporary allocations. 798 No-op if EB's path tracker has not been initialized. 799 */ 800static svn_error_t * 801node_must_exist(struct edit_baton *eb, 802 const char *path, 803 svn_revnum_t revision, 804 svn_node_kind_t expected_kind, 805 apr_pool_t *scratch_pool) 806{ 807 svn_node_kind_t kind = svn_node_none; 808 809 /* in case the caller is trying something stupid ... */ 810 if (eb->path_tracker == NULL) 811 return SVN_NO_ERROR; 812 813 /* paths pertaining to the revision currently being processed must 814 be translated / checked using our path tracker. */ 815 if (revision == eb->path_tracker->revision) 816 tracker_lookup(&path, &revision, eb->path_tracker, path, scratch_pool); 817 818 /* determine the node type (default: no such node) */ 819 if (path) 820 SVN_ERR(fetch_kind_func(&kind, eb, path, revision, scratch_pool)); 821 822 /* check results */ 823 if (kind == svn_node_none) 824 return svn_error_createf(SVN_ERR_FS_NOT_FOUND, NULL, 825 _("Path '%s' not found in r%ld."), 826 path, revision); 827 828 if (expected_kind != kind && expected_kind != svn_node_unknown) 829 return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, 830 _("Unexpected node kind %d for '%s' at r%ld. " 831 "Expected kind was %d."), 832 kind, path, revision, expected_kind); 833 834 return SVN_NO_ERROR; 835} 836 837/* Return an error when PATH exists in REVISION. Use EB for context 838 information. If REVISION is the current revision, use EB's path 839 tracker to follow renames, deletions, etc. 840 841 Use SCRATCH_POOL for temporary allocations. 842 No-op if EB's path tracker has not been initialized. 843 */ 844static svn_error_t * 845node_must_not_exist(struct edit_baton *eb, 846 const char *path, 847 svn_revnum_t revision, 848 apr_pool_t *scratch_pool) 849{ 850 svn_node_kind_t kind = svn_node_none; 851 852 /* in case the caller is trying something stupid ... */ 853 if (eb->path_tracker == NULL) 854 return SVN_NO_ERROR; 855 856 /* paths pertaining to the revision currently being processed must 857 be translated / checked using our path tracker. */ 858 if (revision == eb->path_tracker->revision) 859 tracker_lookup(&path, &revision, eb->path_tracker, path, scratch_pool); 860 861 /* determine the node type (default: no such node) */ 862 if (path) 863 SVN_ERR(fetch_kind_func(&kind, eb, path, revision, scratch_pool)); 864 865 /* check results */ 866 if (kind != svn_node_none) 867 return svn_error_createf(SVN_ERR_FS_ALREADY_EXISTS, NULL, 868 _("Path '%s' exists in r%ld."), 869 path, revision); 870 871 return SVN_NO_ERROR; 872} 873 874/* If the mergeinfo in MERGEINFO_STR refers to any revisions older than 875 * OLDEST_DUMPED_REV, issue a warning and set *FOUND_OLD_MERGEINFO to TRUE, 876 * otherwise leave *FOUND_OLD_MERGEINFO unchanged. 877 */ 878static svn_error_t * 879verify_mergeinfo_revisions(svn_boolean_t *found_old_mergeinfo, 880 const char *mergeinfo_str, 881 svn_revnum_t oldest_dumped_rev, 882 svn_repos_notify_func_t notify_func, 883 void *notify_baton, 884 apr_pool_t *pool) 885{ 886 svn_mergeinfo_t mergeinfo, old_mergeinfo; 887 888 SVN_ERR(svn_mergeinfo_parse(&mergeinfo, mergeinfo_str, pool)); 889 SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges( 890 &old_mergeinfo, mergeinfo, 891 oldest_dumped_rev - 1, 0, 892 TRUE, pool, pool)); 893 894 if (apr_hash_count(old_mergeinfo)) 895 { 896 notify_warning(pool, notify_func, notify_baton, 897 svn_repos_notify_warning_found_old_mergeinfo, 898 _("Mergeinfo referencing revision(s) prior " 899 "to the oldest dumped revision (r%ld). " 900 "Loading this dump may result in invalid " 901 "mergeinfo."), 902 oldest_dumped_rev); 903 904 if (found_old_mergeinfo) 905 *found_old_mergeinfo = TRUE; 906 } 907 908 return SVN_NO_ERROR; 909} 910 911/* Unique string pointers used by verify_mergeinfo_normalization() 912 and check_name_collision() */ 913static const char normalized_unique[] = "normalized_unique"; 914static const char normalized_collision[] = "normalized_collision"; 915 916 917/* Baton for extract_mergeinfo_paths */ 918struct extract_mergeinfo_paths_baton 919{ 920 apr_hash_t *result; 921 svn_boolean_t normalize; 922 svn_membuf_t buffer; 923}; 924 925/* Hash iterator that uniquifies all keys into a single hash table, 926 optionally normalizing them first. */ 927static svn_error_t * 928extract_mergeinfo_paths(void *baton, const void *key, apr_ssize_t klen, 929 void *val, apr_pool_t *iterpool) 930{ 931 struct extract_mergeinfo_paths_baton *const xb = baton; 932 if (xb->normalize) 933 { 934 const char *normkey; 935 SVN_ERR(svn_utf__normalize(&normkey, key, klen, &xb->buffer)); 936 svn_hash_sets(xb->result, 937 apr_pstrdup(xb->buffer.pool, normkey), 938 normalized_unique); 939 } 940 else 941 apr_hash_set(xb->result, 942 apr_pmemdup(xb->buffer.pool, key, klen + 1), klen, 943 normalized_unique); 944 return SVN_NO_ERROR; 945} 946 947/* Baton for filter_mergeinfo_paths */ 948struct filter_mergeinfo_paths_baton 949{ 950 apr_hash_t *paths; 951}; 952 953/* Compare two sets of denormalized paths from mergeinfo entries, 954 removing duplicates. */ 955static svn_error_t * 956filter_mergeinfo_paths(void *baton, const void *key, apr_ssize_t klen, 957 void *val, apr_pool_t *iterpool) 958{ 959 struct filter_mergeinfo_paths_baton *const fb = baton; 960 961 if (apr_hash_get(fb->paths, key, klen)) 962 apr_hash_set(fb->paths, key, klen, NULL); 963 964 return SVN_NO_ERROR; 965} 966 967/* Baton used by the check_mergeinfo_normalization hash iterator. */ 968struct verify_mergeinfo_normalization_baton 969{ 970 const char* path; 971 apr_hash_t *normalized_paths; 972 svn_membuf_t buffer; 973 svn_repos_notify_func_t notify_func; 974 void *notify_baton; 975}; 976 977/* Hash iterator that verifies normalization and collision of paths in 978 an svn:mergeinfo property. */ 979static svn_error_t * 980verify_mergeinfo_normalization(void *baton, const void *key, apr_ssize_t klen, 981 void *val, apr_pool_t *iterpool) 982{ 983 struct verify_mergeinfo_normalization_baton *const vb = baton; 984 985 const char *const path = key; 986 const char *normpath; 987 const char *found; 988 989 SVN_ERR(svn_utf__normalize(&normpath, path, klen, &vb->buffer)); 990 found = svn_hash_gets(vb->normalized_paths, normpath); 991 if (!found) 992 svn_hash_sets(vb->normalized_paths, 993 apr_pstrdup(vb->buffer.pool, normpath), 994 normalized_unique); 995 else if (found == normalized_collision) 996 /* Skip already reported collision */; 997 else 998 { 999 /* Report path collision in mergeinfo */ 1000 svn_hash_sets(vb->normalized_paths, 1001 apr_pstrdup(vb->buffer.pool, normpath), 1002 normalized_collision); 1003 1004 notify_warning(iterpool, vb->notify_func, vb->notify_baton, 1005 svn_repos_notify_warning_mergeinfo_collision, 1006 _("Duplicate representation of path '%s'" 1007 " in %s property of '%s'"), 1008 normpath, SVN_PROP_MERGEINFO, vb->path); 1009 } 1010 return SVN_NO_ERROR; 1011} 1012 1013/* Check UCS normalization of mergeinfo for PATH. NEW_MERGEINFO is the 1014 svn:mergeinfo property value being set; OLD_MERGEINFO is the 1015 previous property value, which may be NULL. Only the paths that 1016 were added in are checked, including collision checks. This 1017 minimizes the number of notifications we generate for a given 1018 mergeinfo property. */ 1019static svn_error_t * 1020check_mergeinfo_normalization(const char *path, 1021 const char *new_mergeinfo, 1022 const char *old_mergeinfo, 1023 svn_repos_notify_func_t notify_func, 1024 void *notify_baton, 1025 apr_pool_t *pool) 1026{ 1027 svn_mergeinfo_t mergeinfo; 1028 apr_hash_t *normalized_paths; 1029 apr_hash_t *added_paths; 1030 struct extract_mergeinfo_paths_baton extract_baton; 1031 struct verify_mergeinfo_normalization_baton verify_baton; 1032 1033 SVN_ERR(svn_mergeinfo_parse(&mergeinfo, new_mergeinfo, pool)); 1034 1035 extract_baton.result = apr_hash_make(pool); 1036 extract_baton.normalize = FALSE; 1037 svn_membuf__create(&extract_baton.buffer, 0, pool); 1038 SVN_ERR(svn_iter_apr_hash(NULL, mergeinfo, 1039 extract_mergeinfo_paths, 1040 &extract_baton, pool)); 1041 added_paths = extract_baton.result; 1042 1043 if (old_mergeinfo) 1044 { 1045 struct filter_mergeinfo_paths_baton filter_baton; 1046 svn_mergeinfo_t oldinfo; 1047 1048 extract_baton.result = apr_hash_make(pool); 1049 extract_baton.normalize = TRUE; 1050 SVN_ERR(svn_mergeinfo_parse(&oldinfo, old_mergeinfo, pool)); 1051 SVN_ERR(svn_iter_apr_hash(NULL, oldinfo, 1052 extract_mergeinfo_paths, 1053 &extract_baton, pool)); 1054 normalized_paths = extract_baton.result; 1055 1056 filter_baton.paths = added_paths; 1057 SVN_ERR(svn_iter_apr_hash(NULL, oldinfo, 1058 filter_mergeinfo_paths, 1059 &filter_baton, pool)); 1060 } 1061 else 1062 normalized_paths = apr_hash_make(pool); 1063 1064 verify_baton.path = path; 1065 verify_baton.normalized_paths = normalized_paths; 1066 verify_baton.buffer = extract_baton.buffer; 1067 verify_baton.notify_func = notify_func; 1068 verify_baton.notify_baton = notify_baton; 1069 SVN_ERR(svn_iter_apr_hash(NULL, added_paths, 1070 verify_mergeinfo_normalization, 1071 &verify_baton, pool)); 1072 1073 return SVN_NO_ERROR; 1074} 1075 1076 1077/* A special case of dump_node(), for a delete record. 1078 * 1079 * The only thing special about this version is it only writes one blank 1080 * line, not two, after the headers. Why? Historical precedent for the 1081 * case where a delete record is used as part of a (delete + add-with-history) 1082 * in implementing a replacement. 1083 * 1084 * Also it doesn't do a path-tracker check. 1085 */ 1086static svn_error_t * 1087dump_node_delete(svn_stream_t *stream, 1088 const char *node_relpath, 1089 apr_pool_t *pool) 1090{ 1091 svn_repos__dumpfile_headers_t *headers 1092 = svn_repos__dumpfile_headers_create(pool); 1093 1094 /* Node-path: ... */ 1095 svn_repos__dumpfile_header_push( 1096 headers, SVN_REPOS_DUMPFILE_NODE_PATH, node_relpath); 1097 1098 /* Node-action: delete */ 1099 svn_repos__dumpfile_header_push( 1100 headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "delete"); 1101 1102 SVN_ERR(svn_repos__dump_headers(stream, headers, pool)); 1103 return SVN_NO_ERROR; 1104} 1105 1106/* This helper is the main "meat" of the editor -- it does all the 1107 work of writing a node record. 1108 1109 Write out a node record for PATH of type KIND under EB->FS_ROOT. 1110 ACTION describes what is happening to the node (see enum svn_node_action). 1111 Write record to writable EB->STREAM. 1112 1113 If the node was itself copied, IS_COPY is TRUE and the 1114 path/revision of the copy source are in CMP_PATH/CMP_REV. If 1115 IS_COPY is FALSE, yet CMP_PATH/CMP_REV are valid, this node is part 1116 of a copied subtree. 1117 */ 1118static svn_error_t * 1119dump_node(struct edit_baton *eb, 1120 const char *path, 1121 svn_node_kind_t kind, 1122 enum svn_node_action action, 1123 svn_boolean_t is_copy, 1124 const char *cmp_path, 1125 svn_revnum_t cmp_rev, 1126 apr_pool_t *pool) 1127{ 1128 svn_stringbuf_t *propstring; 1129 apr_size_t len; 1130 svn_boolean_t must_dump_text = FALSE, must_dump_props = FALSE; 1131 const char *compare_path = path; 1132 svn_revnum_t compare_rev = eb->current_rev - 1; 1133 svn_fs_root_t *compare_root = NULL; 1134 apr_file_t *delta_file = NULL; 1135 svn_repos__dumpfile_headers_t *headers 1136 = svn_repos__dumpfile_headers_create(pool); 1137 svn_filesize_t textlen; 1138 1139 /* Maybe validate the path. */ 1140 if (eb->verify || eb->notify_func) 1141 { 1142 svn_error_t *err = svn_fs__path_valid(path, pool); 1143 1144 if (err) 1145 { 1146 if (eb->notify_func) 1147 { 1148 char errbuf[512]; /* ### svn_strerror() magic number */ 1149 1150 notify_warning(pool, eb->notify_func, eb->notify_baton, 1151 svn_repos_notify_warning_invalid_fspath, 1152 _("E%06d: While validating fspath '%s': %s"), 1153 err->apr_err, path, 1154 svn_err_best_message(err, errbuf, sizeof(errbuf))); 1155 } 1156 1157 /* Return the error in addition to notifying about it. */ 1158 if (eb->verify) 1159 return svn_error_trace(err); 1160 else 1161 svn_error_clear(err); 1162 } 1163 } 1164 1165 /* Write out metadata headers for this file node. */ 1166 svn_repos__dumpfile_header_push( 1167 headers, SVN_REPOS_DUMPFILE_NODE_PATH, path); 1168 if (kind == svn_node_file) 1169 svn_repos__dumpfile_header_push( 1170 headers, SVN_REPOS_DUMPFILE_NODE_KIND, "file"); 1171 else if (kind == svn_node_dir) 1172 svn_repos__dumpfile_header_push( 1173 headers, SVN_REPOS_DUMPFILE_NODE_KIND, "dir"); 1174 1175 /* Remove leading slashes from copyfrom paths. */ 1176 if (cmp_path) 1177 { 1178 const char *canonicalized_path; 1179 SVN_ERR(svn_relpath_canonicalize_safe(&canonicalized_path, NULL, 1180 cmp_path, pool, pool)); 1181 cmp_path = canonicalized_path; 1182 } 1183 1184 /* Validate the comparison path/rev. */ 1185 if (ARE_VALID_COPY_ARGS(cmp_path, cmp_rev)) 1186 { 1187 compare_path = cmp_path; 1188 compare_rev = cmp_rev; 1189 } 1190 1191 switch (action) 1192 { 1193 case svn_node_action_change: 1194 if (eb->path_tracker) 1195 SVN_ERR_W(node_must_exist(eb, path, eb->current_rev, kind, pool), 1196 apr_psprintf(pool, _("Change invalid path '%s' in r%ld"), 1197 path, eb->current_rev)); 1198 1199 svn_repos__dumpfile_header_push( 1200 headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "change"); 1201 1202 /* either the text or props changed, or possibly both. */ 1203 SVN_ERR(svn_fs_revision_root(&compare_root, 1204 svn_fs_root_fs(eb->fs_root), 1205 compare_rev, pool)); 1206 1207 SVN_ERR(svn_fs_props_changed(&must_dump_props, 1208 compare_root, compare_path, 1209 eb->fs_root, path, pool)); 1210 if (kind == svn_node_file) 1211 SVN_ERR(svn_fs_contents_changed(&must_dump_text, 1212 compare_root, compare_path, 1213 eb->fs_root, path, pool)); 1214 break; 1215 1216 case svn_node_action_delete: 1217 if (eb->path_tracker) 1218 { 1219 SVN_ERR_W(node_must_exist(eb, path, eb->current_rev, kind, pool), 1220 apr_psprintf(pool, _("Deleting invalid path '%s' in r%ld"), 1221 path, eb->current_rev)); 1222 tracker_path_delete(eb->path_tracker, path); 1223 } 1224 1225 svn_repos__dumpfile_header_push( 1226 headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "delete"); 1227 1228 /* we can leave this routine quietly now, don't need to dump 1229 any content. */ 1230 must_dump_text = FALSE; 1231 must_dump_props = FALSE; 1232 break; 1233 1234 case svn_node_action_replace: 1235 if (eb->path_tracker) 1236 SVN_ERR_W(node_must_exist(eb, path, eb->current_rev, 1237 svn_node_unknown, pool), 1238 apr_psprintf(pool, 1239 _("Replacing non-existent path '%s' in r%ld"), 1240 path, eb->current_rev)); 1241 1242 if (! is_copy) 1243 { 1244 if (eb->path_tracker) 1245 tracker_path_replace(eb->path_tracker, path); 1246 1247 /* a simple delete+add, implied by a single 'replace' action. */ 1248 svn_repos__dumpfile_header_push( 1249 headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "replace"); 1250 1251 /* definitely need to dump all content for a replace. */ 1252 if (kind == svn_node_file) 1253 must_dump_text = TRUE; 1254 must_dump_props = TRUE; 1255 break; 1256 } 1257 else 1258 { 1259 /* more complex: delete original, then add-with-history. */ 1260 /* ### Why not write a 'replace' record? Don't know. */ 1261 1262 if (eb->path_tracker) 1263 { 1264 tracker_path_delete(eb->path_tracker, path); 1265 } 1266 1267 /* ### Unusually, we end this 'delete' node record with only a single 1268 blank line after the header block -- no extra blank line. */ 1269 SVN_ERR(dump_node_delete(eb->stream, path, pool)); 1270 1271 /* The remaining action is a non-replacing add-with-history */ 1272 /* action = svn_node_action_add; */ 1273 } 1274 /* FALL THROUGH to 'add' */ 1275 1276 case svn_node_action_add: 1277 if (eb->path_tracker) 1278 SVN_ERR_W(node_must_not_exist(eb, path, eb->current_rev, pool), 1279 apr_psprintf(pool, 1280 _("Adding already existing path '%s' in r%ld"), 1281 path, eb->current_rev)); 1282 1283 svn_repos__dumpfile_header_push( 1284 headers, SVN_REPOS_DUMPFILE_NODE_ACTION, "add"); 1285 1286 if (! is_copy) 1287 { 1288 if (eb->path_tracker) 1289 tracker_path_add(eb->path_tracker, path); 1290 1291 /* Dump all contents for a simple 'add'. */ 1292 if (kind == svn_node_file) 1293 must_dump_text = TRUE; 1294 must_dump_props = TRUE; 1295 } 1296 else 1297 { 1298 if (eb->path_tracker) 1299 { 1300 SVN_ERR_W(node_must_exist(eb, compare_path, compare_rev, 1301 kind, pool), 1302 apr_psprintf(pool, 1303 _("Copying from invalid path to " 1304 "'%s' in r%ld"), 1305 path, eb->current_rev)); 1306 tracker_path_copy(eb->path_tracker, path, compare_path, 1307 compare_rev); 1308 } 1309 1310 if (!eb->verify && cmp_rev < eb->oldest_dumped_rev 1311 && eb->notify_func) 1312 { 1313 notify_warning(pool, eb->notify_func, eb->notify_baton, 1314 svn_repos_notify_warning_found_old_reference, 1315 _("Referencing data in revision %ld," 1316 " which is older than the oldest" 1317 " dumped revision (r%ld). Loading this dump" 1318 " into an empty repository" 1319 " will fail."), 1320 cmp_rev, eb->oldest_dumped_rev); 1321 if (eb->found_old_reference) 1322 *eb->found_old_reference = TRUE; 1323 } 1324 1325 svn_repos__dumpfile_header_pushf( 1326 headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV, "%ld", cmp_rev); 1327 svn_repos__dumpfile_header_push( 1328 headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH, cmp_path); 1329 1330 SVN_ERR(svn_fs_revision_root(&compare_root, 1331 svn_fs_root_fs(eb->fs_root), 1332 compare_rev, pool)); 1333 1334 /* Need to decide if the copied node had any extra textual or 1335 property mods as well. */ 1336 SVN_ERR(svn_fs_props_changed(&must_dump_props, 1337 compare_root, compare_path, 1338 eb->fs_root, path, pool)); 1339 if (kind == svn_node_file) 1340 { 1341 svn_checksum_t *checksum; 1342 const char *hex_digest; 1343 SVN_ERR(svn_fs_contents_changed(&must_dump_text, 1344 compare_root, compare_path, 1345 eb->fs_root, path, pool)); 1346 1347 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5, 1348 compare_root, compare_path, 1349 FALSE, pool)); 1350 hex_digest = svn_checksum_to_cstring(checksum, pool); 1351 if (hex_digest) 1352 svn_repos__dumpfile_header_push( 1353 headers, SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_MD5, hex_digest); 1354 1355 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1, 1356 compare_root, compare_path, 1357 FALSE, pool)); 1358 hex_digest = svn_checksum_to_cstring(checksum, pool); 1359 if (hex_digest) 1360 svn_repos__dumpfile_header_push( 1361 headers, SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_SHA1, hex_digest); 1362 } 1363 } 1364 break; 1365 } 1366 1367 if ((! must_dump_text) && (! must_dump_props)) 1368 { 1369 /* If we're not supposed to dump text or props, so be it, we can 1370 just go home. However, if either one needs to be dumped, 1371 then our dumpstream format demands that at a *minimum*, we 1372 see a lone "PROPS-END" as a divider between text and props 1373 content within the content-block. */ 1374 SVN_ERR(svn_repos__dump_headers(eb->stream, headers, pool)); 1375 len = 1; 1376 return svn_stream_write(eb->stream, "\n", &len); /* ### needed? */ 1377 } 1378 1379 /*** Start prepping content to dump... ***/ 1380 1381 /* If we are supposed to dump properties, write out a property 1382 length header and generate a stringbuf that contains those 1383 property values here. */ 1384 if (must_dump_props) 1385 { 1386 apr_hash_t *prophash, *oldhash = NULL; 1387 svn_stream_t *propstream; 1388 1389 SVN_ERR(svn_fs_node_proplist(&prophash, eb->fs_root, path, pool)); 1390 1391 /* If this is a partial dump, then issue a warning if we dump mergeinfo 1392 properties that refer to revisions older than the first revision 1393 dumped. */ 1394 if (!eb->verify && eb->notify_func && eb->oldest_dumped_rev > 1) 1395 { 1396 svn_string_t *mergeinfo_str = svn_hash_gets(prophash, 1397 SVN_PROP_MERGEINFO); 1398 if (mergeinfo_str) 1399 { 1400 /* An error in verifying the mergeinfo must not prevent dumping 1401 the data. Ignore any such error. */ 1402 svn_error_clear(verify_mergeinfo_revisions( 1403 eb->found_old_mergeinfo, 1404 mergeinfo_str->data, eb->oldest_dumped_rev, 1405 eb->notify_func, eb->notify_baton, 1406 pool)); 1407 } 1408 } 1409 1410 /* If we're checking UCS normalization, also parse any changed 1411 mergeinfo and warn about denormalized paths and name 1412 collisions there. */ 1413 if (eb->verify && eb->check_normalization && eb->notify_func) 1414 { 1415 /* N.B.: This hash lookup happens only once; the conditions 1416 for verifying historic mergeinfo references and checking 1417 UCS normalization are mutually exclusive. */ 1418 svn_string_t *mergeinfo_str = svn_hash_gets(prophash, 1419 SVN_PROP_MERGEINFO); 1420 if (mergeinfo_str) 1421 { 1422 svn_string_t *oldinfo_str = NULL; 1423 if (compare_root) 1424 { 1425 SVN_ERR(svn_fs_node_proplist(&oldhash, 1426 compare_root, compare_path, 1427 pool)); 1428 oldinfo_str = svn_hash_gets(oldhash, SVN_PROP_MERGEINFO); 1429 } 1430 SVN_ERR(check_mergeinfo_normalization( 1431 path, mergeinfo_str->data, 1432 (oldinfo_str ? oldinfo_str->data : NULL), 1433 eb->notify_func, eb->notify_baton, pool)); 1434 } 1435 } 1436 1437 if (eb->use_deltas && compare_root) 1438 { 1439 /* Fetch the old property hash to diff against and output a header 1440 saying that our property contents are a delta. */ 1441 if (!oldhash) /* May have been set for normalization check */ 1442 SVN_ERR(svn_fs_node_proplist(&oldhash, compare_root, compare_path, 1443 pool)); 1444 svn_repos__dumpfile_header_push( 1445 headers, SVN_REPOS_DUMPFILE_PROP_DELTA, "true"); 1446 } 1447 else 1448 oldhash = apr_hash_make(pool); 1449 propstring = svn_stringbuf_create_ensure(0, pool); 1450 propstream = svn_stream_from_stringbuf(propstring, pool); 1451 SVN_ERR(svn_hash_write_incremental(prophash, oldhash, propstream, 1452 "PROPS-END", pool)); 1453 SVN_ERR(svn_stream_close(propstream)); 1454 } 1455 1456 /* If we are supposed to dump text, write out a text length header 1457 here, and an MD5 checksum (if available). */ 1458 if (must_dump_text && (kind == svn_node_file)) 1459 { 1460 svn_checksum_t *checksum; 1461 const char *hex_digest; 1462 1463 if (eb->use_deltas) 1464 { 1465 /* Compute the text delta now and write it into a temporary 1466 file, so that we can find its length. Output a header 1467 saying our text contents are a delta. */ 1468 SVN_ERR(store_delta(&delta_file, &textlen, compare_root, 1469 compare_path, eb->fs_root, path, pool)); 1470 svn_repos__dumpfile_header_push( 1471 headers, SVN_REPOS_DUMPFILE_TEXT_DELTA, "true"); 1472 1473 if (compare_root) 1474 { 1475 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5, 1476 compare_root, compare_path, 1477 FALSE, pool)); 1478 hex_digest = svn_checksum_to_cstring(checksum, pool); 1479 if (hex_digest) 1480 svn_repos__dumpfile_header_push( 1481 headers, SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_MD5, hex_digest); 1482 1483 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1, 1484 compare_root, compare_path, 1485 FALSE, pool)); 1486 hex_digest = svn_checksum_to_cstring(checksum, pool); 1487 if (hex_digest) 1488 svn_repos__dumpfile_header_push( 1489 headers, SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_SHA1, hex_digest); 1490 } 1491 } 1492 else 1493 { 1494 /* Just fetch the length of the file. */ 1495 SVN_ERR(svn_fs_file_length(&textlen, eb->fs_root, path, pool)); 1496 } 1497 1498 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5, 1499 eb->fs_root, path, FALSE, pool)); 1500 hex_digest = svn_checksum_to_cstring(checksum, pool); 1501 if (hex_digest) 1502 svn_repos__dumpfile_header_push( 1503 headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_MD5, hex_digest); 1504 1505 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1, 1506 eb->fs_root, path, FALSE, pool)); 1507 hex_digest = svn_checksum_to_cstring(checksum, pool); 1508 if (hex_digest) 1509 svn_repos__dumpfile_header_push( 1510 headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_SHA1, hex_digest); 1511 } 1512 1513 /* 'Content-length:' is the last header before we dump the content, 1514 and is the sum of the text and prop contents lengths. We write 1515 this only for the benefit of non-Subversion RFC-822 parsers. */ 1516 SVN_ERR(svn_repos__dump_node_record(eb->stream, headers, 1517 must_dump_props ? propstring : NULL, 1518 must_dump_text, 1519 must_dump_text ? textlen : 0, 1520 TRUE /*content_length_always*/, 1521 pool)); 1522 1523 /* Dump text content */ 1524 if (must_dump_text && (kind == svn_node_file)) 1525 { 1526 svn_stream_t *contents; 1527 1528 if (delta_file) 1529 { 1530 /* Make sure to close the underlying file when the stream is 1531 closed. */ 1532 contents = svn_stream_from_aprfile2(delta_file, FALSE, pool); 1533 } 1534 else 1535 SVN_ERR(svn_fs_file_contents(&contents, eb->fs_root, path, pool)); 1536 1537 SVN_ERR(svn_stream_copy3(contents, svn_stream_disown(eb->stream, pool), 1538 NULL, NULL, pool)); 1539 } 1540 1541 len = 2; 1542 return svn_stream_write(eb->stream, "\n\n", &len); /* ### needed? */ 1543} 1544 1545 1546static svn_error_t * 1547open_root(void *edit_baton, 1548 svn_revnum_t base_revision, 1549 apr_pool_t *pool, 1550 void **root_baton) 1551{ 1552 return svn_error_trace(make_dir_baton((struct dir_baton **)root_baton, 1553 NULL, NULL, SVN_INVALID_REVNUM, 1554 edit_baton, NULL, pool)); 1555} 1556 1557 1558static svn_error_t * 1559delete_entry(const char *path, 1560 svn_revnum_t revision, 1561 void *parent_baton, 1562 apr_pool_t *pool) 1563{ 1564 struct dir_baton *pb = parent_baton; 1565 const char *mypath = apr_pstrdup(pb->pool, path); 1566 1567 /* remember this path needs to be deleted. */ 1568 svn_hash_sets(pb->deleted_entries, mypath, pb); 1569 1570 return SVN_NO_ERROR; 1571} 1572 1573 1574static svn_error_t * 1575add_directory(const char *path, 1576 void *parent_baton, 1577 const char *copyfrom_path, 1578 svn_revnum_t copyfrom_rev, 1579 apr_pool_t *pool, 1580 void **child_baton) 1581{ 1582 struct dir_baton *pb = parent_baton; 1583 struct edit_baton *eb = pb->edit_baton; 1584 void *was_deleted; 1585 svn_boolean_t is_copy = FALSE; 1586 struct dir_baton *new_db; 1587 1588 SVN_ERR(make_dir_baton(&new_db, path, copyfrom_path, copyfrom_rev, eb, 1589 pb, pool)); 1590 1591 /* This might be a replacement -- is the path already deleted? */ 1592 was_deleted = svn_hash_gets(pb->deleted_entries, path); 1593 1594 /* Detect an add-with-history. */ 1595 is_copy = ARE_VALID_COPY_ARGS(copyfrom_path, copyfrom_rev); 1596 1597 /* Dump the node. */ 1598 SVN_ERR(dump_node(eb, path, 1599 svn_node_dir, 1600 was_deleted ? svn_node_action_replace : svn_node_action_add, 1601 is_copy, 1602 is_copy ? copyfrom_path : NULL, 1603 is_copy ? copyfrom_rev : SVN_INVALID_REVNUM, 1604 pool)); 1605 1606 if (was_deleted) 1607 /* Delete the path, it's now been dumped. */ 1608 svn_hash_sets(pb->deleted_entries, path, NULL); 1609 1610 /* Check for normalized name clashes, but only if this is actually a 1611 new name in the parent, not a replacement. */ 1612 if (!was_deleted && eb->verify && eb->check_normalization && eb->notify_func) 1613 { 1614 pb->check_name_collision = TRUE; 1615 } 1616 1617 new_db->written_out = TRUE; 1618 1619 *child_baton = new_db; 1620 return SVN_NO_ERROR; 1621} 1622 1623 1624static svn_error_t * 1625open_directory(const char *path, 1626 void *parent_baton, 1627 svn_revnum_t base_revision, 1628 apr_pool_t *pool, 1629 void **child_baton) 1630{ 1631 struct dir_baton *pb = parent_baton; 1632 struct edit_baton *eb = pb->edit_baton; 1633 struct dir_baton *new_db; 1634 const char *cmp_path = NULL; 1635 svn_revnum_t cmp_rev = SVN_INVALID_REVNUM; 1636 1637 /* If the parent directory has explicit comparison path and rev, 1638 record the same for this one. */ 1639 if (ARE_VALID_COPY_ARGS(pb->cmp_path, pb->cmp_rev)) 1640 { 1641 cmp_path = svn_relpath_join(pb->cmp_path, 1642 svn_relpath_basename(path, pool), pool); 1643 cmp_rev = pb->cmp_rev; 1644 } 1645 1646 SVN_ERR(make_dir_baton(&new_db, path, cmp_path, cmp_rev, eb, pb, pool)); 1647 *child_baton = new_db; 1648 return SVN_NO_ERROR; 1649} 1650 1651 1652static svn_error_t * 1653close_directory(void *dir_baton, 1654 apr_pool_t *pool) 1655{ 1656 struct dir_baton *db = dir_baton; 1657 struct edit_baton *eb = db->edit_baton; 1658 apr_pool_t *subpool = svn_pool_create(pool); 1659 int i; 1660 apr_array_header_t *sorted_entries; 1661 1662 /* Sort entries lexically instead of as paths. Even though the entries 1663 * are full paths they're all in the same directory (see comment in struct 1664 * dir_baton definition). So we really want to sort by basename, in which 1665 * case the lexical sort function is more efficient. */ 1666 sorted_entries = svn_sort__hash(db->deleted_entries, 1667 svn_sort_compare_items_lexically, pool); 1668 for (i = 0; i < sorted_entries->nelts; i++) 1669 { 1670 const char *path = APR_ARRAY_IDX(sorted_entries, i, 1671 svn_sort__item_t).key; 1672 1673 svn_pool_clear(subpool); 1674 1675 /* By sending 'svn_node_unknown', the Node-kind: header simply won't 1676 be written out. No big deal at all, really. The loader 1677 shouldn't care. */ 1678 SVN_ERR(dump_node(eb, path, 1679 svn_node_unknown, svn_node_action_delete, 1680 FALSE, NULL, SVN_INVALID_REVNUM, subpool)); 1681 } 1682 1683 svn_pool_destroy(subpool); 1684 return SVN_NO_ERROR; 1685} 1686 1687 1688static svn_error_t * 1689add_file(const char *path, 1690 void *parent_baton, 1691 const char *copyfrom_path, 1692 svn_revnum_t copyfrom_rev, 1693 apr_pool_t *pool, 1694 void **file_baton) 1695{ 1696 struct dir_baton *pb = parent_baton; 1697 struct edit_baton *eb = pb->edit_baton; 1698 void *was_deleted; 1699 svn_boolean_t is_copy = FALSE; 1700 1701 /* This might be a replacement -- is the path already deleted? */ 1702 was_deleted = svn_hash_gets(pb->deleted_entries, path); 1703 1704 /* Detect add-with-history. */ 1705 is_copy = ARE_VALID_COPY_ARGS(copyfrom_path, copyfrom_rev); 1706 1707 /* Dump the node. */ 1708 SVN_ERR(dump_node(eb, path, 1709 svn_node_file, 1710 was_deleted ? svn_node_action_replace : svn_node_action_add, 1711 is_copy, 1712 is_copy ? copyfrom_path : NULL, 1713 is_copy ? copyfrom_rev : SVN_INVALID_REVNUM, 1714 pool)); 1715 1716 if (was_deleted) 1717 /* delete the path, it's now been dumped. */ 1718 svn_hash_sets(pb->deleted_entries, path, NULL); 1719 1720 /* Check for normalized name clashes, but only if this is actually a 1721 new name in the parent, not a replacement. */ 1722 if (!was_deleted && eb->verify && eb->check_normalization && eb->notify_func) 1723 { 1724 pb->check_name_collision = TRUE; 1725 } 1726 1727 *file_baton = NULL; /* muhahahaha */ 1728 return SVN_NO_ERROR; 1729} 1730 1731 1732static svn_error_t * 1733open_file(const char *path, 1734 void *parent_baton, 1735 svn_revnum_t ancestor_revision, 1736 apr_pool_t *pool, 1737 void **file_baton) 1738{ 1739 struct dir_baton *pb = parent_baton; 1740 struct edit_baton *eb = pb->edit_baton; 1741 const char *cmp_path = NULL; 1742 svn_revnum_t cmp_rev = SVN_INVALID_REVNUM; 1743 1744 /* If the parent directory has explicit comparison path and rev, 1745 record the same for this one. */ 1746 if (ARE_VALID_COPY_ARGS(pb->cmp_path, pb->cmp_rev)) 1747 { 1748 cmp_path = svn_relpath_join(pb->cmp_path, 1749 svn_relpath_basename(path, pool), pool); 1750 cmp_rev = pb->cmp_rev; 1751 } 1752 1753 SVN_ERR(dump_node(eb, path, 1754 svn_node_file, svn_node_action_change, 1755 FALSE, cmp_path, cmp_rev, pool)); 1756 1757 *file_baton = NULL; /* muhahahaha again */ 1758 return SVN_NO_ERROR; 1759} 1760 1761 1762static svn_error_t * 1763change_dir_prop(void *parent_baton, 1764 const char *name, 1765 const svn_string_t *value, 1766 apr_pool_t *pool) 1767{ 1768 struct dir_baton *db = parent_baton; 1769 struct edit_baton *eb = db->edit_baton; 1770 1771 /* This function is what distinguishes between a directory that is 1772 opened to merely get somewhere, vs. one that is opened because it 1773 *actually* changed by itself. 1774 1775 Instead of recording the prop changes here, we just use this method 1776 to trigger writing the node; dump_node() finds all the changes. */ 1777 if (! db->written_out) 1778 { 1779 SVN_ERR(dump_node(eb, db->path, 1780 svn_node_dir, svn_node_action_change, 1781 /* ### We pass is_copy=FALSE; this might be wrong 1782 but the parameter isn't used when action=change. */ 1783 FALSE, db->cmp_path, db->cmp_rev, pool)); 1784 db->written_out = TRUE; 1785 } 1786 return SVN_NO_ERROR; 1787} 1788 1789static svn_error_t * 1790fetch_props_func(apr_hash_t **props, 1791 void *baton, 1792 const char *path, 1793 svn_revnum_t base_revision, 1794 apr_pool_t *result_pool, 1795 apr_pool_t *scratch_pool) 1796{ 1797 struct edit_baton *eb = baton; 1798 svn_error_t *err; 1799 svn_fs_root_t *fs_root; 1800 1801 if (!SVN_IS_VALID_REVNUM(base_revision)) 1802 base_revision = eb->current_rev - 1; 1803 1804 SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool)); 1805 1806 err = svn_fs_node_proplist(props, fs_root, path, result_pool); 1807 if (err && err->apr_err == SVN_ERR_FS_NOT_FOUND) 1808 { 1809 svn_error_clear(err); 1810 *props = apr_hash_make(result_pool); 1811 return SVN_NO_ERROR; 1812 } 1813 else if (err) 1814 return svn_error_trace(err); 1815 1816 return SVN_NO_ERROR; 1817} 1818 1819static svn_error_t * 1820fetch_kind_func(svn_node_kind_t *kind, 1821 void *baton, 1822 const char *path, 1823 svn_revnum_t base_revision, 1824 apr_pool_t *scratch_pool) 1825{ 1826 struct edit_baton *eb = baton; 1827 svn_fs_root_t *fs_root; 1828 1829 if (!SVN_IS_VALID_REVNUM(base_revision)) 1830 base_revision = eb->current_rev - 1; 1831 1832 SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool)); 1833 1834 SVN_ERR(svn_fs_check_path(kind, fs_root, path, scratch_pool)); 1835 1836 return SVN_NO_ERROR; 1837} 1838 1839static svn_error_t * 1840fetch_base_func(const char **filename, 1841 void *baton, 1842 const char *path, 1843 svn_revnum_t base_revision, 1844 apr_pool_t *result_pool, 1845 apr_pool_t *scratch_pool) 1846{ 1847 struct edit_baton *eb = baton; 1848 svn_stream_t *contents; 1849 svn_stream_t *file_stream; 1850 const char *tmp_filename; 1851 svn_error_t *err; 1852 svn_fs_root_t *fs_root; 1853 1854 if (!SVN_IS_VALID_REVNUM(base_revision)) 1855 base_revision = eb->current_rev - 1; 1856 1857 SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool)); 1858 1859 err = svn_fs_file_contents(&contents, fs_root, path, scratch_pool); 1860 if (err && err->apr_err == SVN_ERR_FS_NOT_FOUND) 1861 { 1862 svn_error_clear(err); 1863 *filename = NULL; 1864 return SVN_NO_ERROR; 1865 } 1866 else if (err) 1867 return svn_error_trace(err); 1868 SVN_ERR(svn_stream_open_unique(&file_stream, &tmp_filename, NULL, 1869 svn_io_file_del_on_pool_cleanup, 1870 scratch_pool, scratch_pool)); 1871 SVN_ERR(svn_stream_copy3(contents, file_stream, NULL, NULL, scratch_pool)); 1872 1873 *filename = apr_pstrdup(result_pool, tmp_filename); 1874 1875 return SVN_NO_ERROR; 1876} 1877 1878 1879static svn_error_t * 1880get_dump_editor(const svn_delta_editor_t **editor, 1881 void **edit_baton, 1882 svn_fs_t *fs, 1883 svn_revnum_t to_rev, 1884 const char *root_path, 1885 svn_stream_t *stream, 1886 svn_boolean_t *found_old_reference, 1887 svn_boolean_t *found_old_mergeinfo, 1888 svn_error_t *(*custom_close_directory)(void *dir_baton, 1889 apr_pool_t *scratch_pool), 1890 svn_repos_notify_func_t notify_func, 1891 void *notify_baton, 1892 svn_revnum_t oldest_dumped_rev, 1893 svn_boolean_t use_deltas, 1894 svn_boolean_t verify, 1895 svn_boolean_t check_normalization, 1896 apr_pool_t *pool) 1897{ 1898 /* Allocate an edit baton to be stored in every directory baton. 1899 Set it up for the directory baton we create here, which is the 1900 root baton. */ 1901 struct edit_baton *eb = apr_pcalloc(pool, sizeof(*eb)); 1902 svn_delta_editor_t *dump_editor = svn_delta_default_editor(pool); 1903 svn_delta_shim_callbacks_t *shim_callbacks = 1904 svn_delta_shim_callbacks_default(pool); 1905 1906 /* Set up the edit baton. */ 1907 eb->stream = stream; 1908 eb->notify_func = notify_func; 1909 eb->notify_baton = notify_baton; 1910 eb->oldest_dumped_rev = oldest_dumped_rev; 1911 eb->path = apr_pstrdup(pool, root_path); 1912 SVN_ERR(svn_fs_revision_root(&(eb->fs_root), fs, to_rev, pool)); 1913 eb->fs = fs; 1914 eb->current_rev = to_rev; 1915 eb->use_deltas = use_deltas; 1916 eb->verify = verify; 1917 eb->check_normalization = check_normalization; 1918 eb->found_old_reference = found_old_reference; 1919 eb->found_old_mergeinfo = found_old_mergeinfo; 1920 1921 /* In non-verification mode, we will allow anything to be dumped because 1922 it might be an incremental dump with possible manual intervention. 1923 Also, this might be the last resort when it comes to data recovery. 1924 1925 Else, make sure that all paths exists at their respective revisions. 1926 */ 1927 eb->path_tracker = verify ? tracker_create(to_rev, pool) : NULL; 1928 1929 /* Set up the editor. */ 1930 dump_editor->open_root = open_root; 1931 dump_editor->delete_entry = delete_entry; 1932 dump_editor->add_directory = add_directory; 1933 dump_editor->open_directory = open_directory; 1934 if (custom_close_directory) 1935 dump_editor->close_directory = custom_close_directory; 1936 else 1937 dump_editor->close_directory = close_directory; 1938 dump_editor->change_dir_prop = change_dir_prop; 1939 dump_editor->add_file = add_file; 1940 dump_editor->open_file = open_file; 1941 1942 *edit_baton = eb; 1943 *editor = dump_editor; 1944 1945 shim_callbacks->fetch_kind_func = fetch_kind_func; 1946 shim_callbacks->fetch_props_func = fetch_props_func; 1947 shim_callbacks->fetch_base_func = fetch_base_func; 1948 shim_callbacks->fetch_baton = eb; 1949 1950 SVN_ERR(svn_editor__insert_shims(editor, edit_baton, *editor, *edit_baton, 1951 NULL, NULL, shim_callbacks, pool, pool)); 1952 1953 return SVN_NO_ERROR; 1954} 1955 1956/*----------------------------------------------------------------------*/ 1957 1958/** The main dumping routine, svn_repos_dump_fs. **/ 1959 1960 1961/* Helper for svn_repos_dump_fs. 1962 1963 Write a revision record of REV in REPOS to writable STREAM, using POOL. 1964 Dump revision properties as well if INCLUDE_REVPROPS has been set. 1965 AUTHZ_FUNC and AUTHZ_BATON are passed directly to the repos layer. 1966 */ 1967static svn_error_t * 1968write_revision_record(svn_stream_t *stream, 1969 svn_repos_t *repos, 1970 svn_revnum_t rev, 1971 svn_boolean_t include_revprops, 1972 svn_repos_authz_func_t authz_func, 1973 void *authz_baton, 1974 apr_pool_t *pool) 1975{ 1976 apr_hash_t *props; 1977 1978 if (include_revprops) 1979 { 1980 SVN_ERR(svn_repos_fs_revision_proplist(&props, repos, rev, 1981 authz_func, authz_baton, pool)); 1982 } 1983 else 1984 { 1985 /* Although we won't use it, we still need this container for the 1986 call below. */ 1987 props = apr_hash_make(pool); 1988 } 1989 1990 SVN_ERR(svn_repos__dump_revision_record(stream, rev, NULL, props, 1991 include_revprops, 1992 pool)); 1993 return SVN_NO_ERROR; 1994} 1995 1996/* Baton for dump_filter_authz_func(). */ 1997typedef struct dump_filter_baton_t 1998{ 1999 svn_repos_dump_filter_func_t filter_func; 2000 void *filter_baton; 2001} dump_filter_baton_t; 2002 2003/* Implements svn_repos_authz_func_t. */ 2004static svn_error_t * 2005dump_filter_authz_func(svn_boolean_t *allowed, 2006 svn_fs_root_t *root, 2007 const char *path, 2008 void *baton, 2009 apr_pool_t *pool) 2010{ 2011 dump_filter_baton_t *b = baton; 2012 2013 /* For some nodes (e.g. files under copied directory) PATH may be 2014 * non-canonical (missing leading '/'). Canonicalize PATH before 2015 * passing it to FILTER_FUNC. */ 2016 path = svn_fspath__canonicalize(path, pool); 2017 2018 return svn_error_trace(b->filter_func(allowed, root, path, b->filter_baton, 2019 pool)); 2020} 2021 2022 2023 2024/* The main dumper. */ 2025svn_error_t * 2026svn_repos_dump_fs4(svn_repos_t *repos, 2027 svn_stream_t *stream, 2028 svn_revnum_t start_rev, 2029 svn_revnum_t end_rev, 2030 svn_boolean_t incremental, 2031 svn_boolean_t use_deltas, 2032 svn_boolean_t include_revprops, 2033 svn_boolean_t include_changes, 2034 svn_repos_notify_func_t notify_func, 2035 void *notify_baton, 2036 svn_repos_dump_filter_func_t filter_func, 2037 void *filter_baton, 2038 svn_cancel_func_t cancel_func, 2039 void *cancel_baton, 2040 apr_pool_t *pool) 2041{ 2042 const svn_delta_editor_t *dump_editor; 2043 void *dump_edit_baton = NULL; 2044 svn_revnum_t rev; 2045 svn_fs_t *fs = svn_repos_fs(repos); 2046 apr_pool_t *iterpool = svn_pool_create(pool); 2047 svn_revnum_t youngest; 2048 const char *uuid; 2049 int version; 2050 svn_boolean_t found_old_reference = FALSE; 2051 svn_boolean_t found_old_mergeinfo = FALSE; 2052 svn_repos_notify_t *notify; 2053 svn_repos_authz_func_t authz_func; 2054 dump_filter_baton_t authz_baton = {0}; 2055 2056 /* Make sure we catch up on the latest revprop changes. This is the only 2057 * time we will refresh the revprop data in this query. */ 2058 SVN_ERR(svn_fs_refresh_revision_props(fs, pool)); 2059 2060 /* Determine the current youngest revision of the filesystem. */ 2061 SVN_ERR(svn_fs_youngest_rev(&youngest, fs, pool)); 2062 2063 /* Use default vals if necessary. */ 2064 if (! SVN_IS_VALID_REVNUM(start_rev)) 2065 start_rev = 0; 2066 if (! SVN_IS_VALID_REVNUM(end_rev)) 2067 end_rev = youngest; 2068 if (! stream) 2069 stream = svn_stream_empty(pool); 2070 2071 /* Validate the revisions. */ 2072 if (start_rev > end_rev) 2073 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL, 2074 _("Start revision %ld" 2075 " is greater than end revision %ld"), 2076 start_rev, end_rev); 2077 if (end_rev > youngest) 2078 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL, 2079 _("End revision %ld is invalid " 2080 "(youngest revision is %ld)"), 2081 end_rev, youngest); 2082 2083 /* We use read authz callback to implement dump filtering. If there is no 2084 * read access for some node, it will be excluded from dump as well as 2085 * references to it (e.g. copy source). */ 2086 if (filter_func) 2087 { 2088 authz_func = dump_filter_authz_func; 2089 authz_baton.filter_func = filter_func; 2090 authz_baton.filter_baton = filter_baton; 2091 } 2092 else 2093 { 2094 authz_func = NULL; 2095 } 2096 2097 /* Write out the UUID. */ 2098 SVN_ERR(svn_fs_get_uuid(fs, &uuid, pool)); 2099 2100 /* If we're not using deltas, use the previous version, for 2101 compatibility with svn 1.0.x. */ 2102 version = SVN_REPOS_DUMPFILE_FORMAT_VERSION; 2103 if (!use_deltas) 2104 version--; 2105 2106 /* Write out "general" metadata for the dumpfile. In this case, a 2107 magic header followed by a dumpfile format version. */ 2108 SVN_ERR(svn_repos__dump_magic_header_record(stream, version, pool)); 2109 SVN_ERR(svn_repos__dump_uuid_header_record(stream, uuid, pool)); 2110 2111 /* Create a notify object that we can reuse in the loop. */ 2112 if (notify_func) 2113 notify = svn_repos_notify_create(svn_repos_notify_dump_rev_end, 2114 pool); 2115 2116 /* Main loop: we're going to dump revision REV. */ 2117 for (rev = start_rev; rev <= end_rev; rev++) 2118 { 2119 svn_fs_root_t *to_root; 2120 svn_boolean_t use_deltas_for_rev; 2121 2122 svn_pool_clear(iterpool); 2123 2124 /* Check for cancellation. */ 2125 if (cancel_func) 2126 SVN_ERR(cancel_func(cancel_baton)); 2127 2128 /* Write the revision record. */ 2129 SVN_ERR(write_revision_record(stream, repos, rev, include_revprops, 2130 authz_func, &authz_baton, iterpool)); 2131 2132 /* When dumping revision 0, we just write out the revision record. 2133 The parser might want to use its properties. 2134 If we don't want revision changes at all, skip in any case. */ 2135 if (rev == 0 || !include_changes) 2136 goto loop_end; 2137 2138 /* Fetch the editor which dumps nodes to a file. Regardless of 2139 what we've been told, don't use deltas for the first rev of a 2140 non-incremental dump. */ 2141 use_deltas_for_rev = use_deltas && (incremental || rev != start_rev); 2142 SVN_ERR(get_dump_editor(&dump_editor, &dump_edit_baton, fs, rev, 2143 "", stream, &found_old_reference, 2144 &found_old_mergeinfo, NULL, 2145 notify_func, notify_baton, 2146 start_rev, use_deltas_for_rev, FALSE, FALSE, 2147 iterpool)); 2148 2149 /* Drive the editor in one way or another. */ 2150 SVN_ERR(svn_fs_revision_root(&to_root, fs, rev, iterpool)); 2151 2152 /* If this is the first revision of a non-incremental dump, 2153 we're in for a full tree dump. Otherwise, we want to simply 2154 replay the revision. */ 2155 if ((rev == start_rev) && (! incremental)) 2156 { 2157 /* Compare against revision 0, so everything appears to be added. */ 2158 svn_fs_root_t *from_root; 2159 SVN_ERR(svn_fs_revision_root(&from_root, fs, 0, iterpool)); 2160 SVN_ERR(svn_repos_dir_delta2(from_root, "", "", 2161 to_root, "", 2162 dump_editor, dump_edit_baton, 2163 authz_func, &authz_baton, 2164 FALSE, /* don't send text-deltas */ 2165 svn_depth_infinity, 2166 FALSE, /* don't send entry props */ 2167 FALSE, /* don't ignore ancestry */ 2168 iterpool)); 2169 } 2170 else 2171 { 2172 /* The normal case: compare consecutive revs. */ 2173 SVN_ERR(svn_repos_replay2(to_root, "", SVN_INVALID_REVNUM, FALSE, 2174 dump_editor, dump_edit_baton, 2175 authz_func, &authz_baton, iterpool)); 2176 2177 /* While our editor close_edit implementation is a no-op, we still 2178 do this for completeness. */ 2179 SVN_ERR(dump_editor->close_edit(dump_edit_baton, iterpool)); 2180 } 2181 2182 loop_end: 2183 if (notify_func) 2184 { 2185 notify->revision = rev; 2186 notify_func(notify_baton, notify, iterpool); 2187 } 2188 } 2189 2190 if (notify_func) 2191 { 2192 /* Did we issue any warnings about references to revisions older than 2193 the oldest dumped revision? If so, then issue a final generic 2194 warning, since the inline warnings already issued might easily be 2195 missed. */ 2196 2197 notify = svn_repos_notify_create(svn_repos_notify_dump_end, iterpool); 2198 notify_func(notify_baton, notify, iterpool); 2199 2200 if (found_old_reference) 2201 { 2202 notify_warning(iterpool, notify_func, notify_baton, 2203 svn_repos_notify_warning_found_old_reference, 2204 _("The range of revisions dumped " 2205 "contained references to " 2206 "copy sources outside that " 2207 "range.")); 2208 } 2209 2210 /* Ditto if we issued any warnings about old revisions referenced 2211 in dumped mergeinfo. */ 2212 if (found_old_mergeinfo) 2213 { 2214 notify_warning(iterpool, notify_func, notify_baton, 2215 svn_repos_notify_warning_found_old_mergeinfo, 2216 _("The range of revisions dumped " 2217 "contained mergeinfo " 2218 "which reference revisions outside " 2219 "that range.")); 2220 } 2221 } 2222 2223 svn_pool_destroy(iterpool); 2224 2225 return SVN_NO_ERROR; 2226} 2227 2228 2229/*----------------------------------------------------------------------*/ 2230 2231/* verify, based on dump */ 2232 2233 2234/* Creating a new revision that changes /A/B/E/bravo means creating new 2235 directory listings for /, /A, /A/B, and /A/B/E in the new revision, with 2236 each entry not changed in the new revision a link back to the entry in a 2237 previous revision. svn_repos_replay()ing a revision does not verify that 2238 those links are correct. 2239 2240 For paths actually changed in the revision we verify, we get directory 2241 contents or file length twice: once in the dump editor, and once here. 2242 We could create a new verify baton, store in it the changed paths, and 2243 skip those here, but that means building an entire wrapper editor and 2244 managing two levels of batons. The impact from checking these entries 2245 twice should be minimal, while the code to avoid it is not. 2246*/ 2247 2248static svn_error_t * 2249verify_directory_entry(void *baton, const void *key, apr_ssize_t klen, 2250 void *val, apr_pool_t *pool) 2251{ 2252 struct dir_baton *db = baton; 2253 svn_fs_dirent_t *dirent = (svn_fs_dirent_t *)val; 2254 char *path; 2255 svn_boolean_t right_kind; 2256 2257 path = svn_relpath_join(db->path, (const char *)key, pool); 2258 2259 /* since we can't access the directory entries directly by their ID, 2260 we need to navigate from the FS_ROOT to them (relatively expensive 2261 because we may start at a never rev than the last change to node). 2262 We check that the node kind stored in the noderev matches the dir 2263 entry. This also ensures that all entries point to valid noderevs. 2264 */ 2265 switch (dirent->kind) { 2266 case svn_node_dir: 2267 SVN_ERR(svn_fs_is_dir(&right_kind, db->edit_baton->fs_root, path, pool)); 2268 if (!right_kind) 2269 return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, 2270 _("Node '%s' is not a directory."), 2271 path); 2272 2273 break; 2274 case svn_node_file: 2275 SVN_ERR(svn_fs_is_file(&right_kind, db->edit_baton->fs_root, path, pool)); 2276 if (!right_kind) 2277 return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, 2278 _("Node '%s' is not a file."), 2279 path); 2280 break; 2281 default: 2282 return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL, 2283 _("Unexpected node kind %d for '%s'"), 2284 dirent->kind, path); 2285 } 2286 2287 return SVN_NO_ERROR; 2288} 2289 2290/* Baton used by the check_name_collision hash iterator. */ 2291struct check_name_collision_baton 2292{ 2293 struct dir_baton *dir_baton; 2294 apr_hash_t *normalized; 2295 svn_membuf_t buffer; 2296}; 2297 2298/* Scan the directory and report all entry names that differ only in 2299 Unicode character representation. */ 2300static svn_error_t * 2301check_name_collision(void *baton, const void *key, apr_ssize_t klen, 2302 void *val, apr_pool_t *iterpool) 2303{ 2304 struct check_name_collision_baton *const cb = baton; 2305 const char *name; 2306 const char *found; 2307 2308 SVN_ERR(svn_utf__normalize(&name, key, klen, &cb->buffer)); 2309 2310 found = svn_hash_gets(cb->normalized, name); 2311 if (!found) 2312 svn_hash_sets(cb->normalized, apr_pstrdup(cb->buffer.pool, name), 2313 normalized_unique); 2314 else if (found == normalized_collision) 2315 /* Skip already reported collision */; 2316 else 2317 { 2318 struct dir_baton *const db = cb->dir_baton; 2319 struct edit_baton *const eb = db->edit_baton; 2320 const char* normpath; 2321 2322 svn_hash_sets(cb->normalized, apr_pstrdup(cb->buffer.pool, name), 2323 normalized_collision); 2324 2325 SVN_ERR(svn_utf__normalize( 2326 &normpath, svn_relpath_join(db->path, name, iterpool), 2327 SVN_UTF__UNKNOWN_LENGTH, &cb->buffer)); 2328 notify_warning(iterpool, eb->notify_func, eb->notify_baton, 2329 svn_repos_notify_warning_name_collision, 2330 _("Duplicate representation of path '%s'"), normpath); 2331 } 2332 return SVN_NO_ERROR; 2333} 2334 2335 2336static svn_error_t * 2337verify_close_directory(void *dir_baton, apr_pool_t *pool) 2338{ 2339 struct dir_baton *db = dir_baton; 2340 apr_hash_t *dirents; 2341 SVN_ERR(svn_fs_dir_entries(&dirents, db->edit_baton->fs_root, 2342 db->path, pool)); 2343 SVN_ERR(svn_iter_apr_hash(NULL, dirents, verify_directory_entry, 2344 dir_baton, pool)); 2345 2346 if (db->check_name_collision) 2347 { 2348 struct check_name_collision_baton check_baton; 2349 check_baton.dir_baton = db; 2350 check_baton.normalized = apr_hash_make(pool); 2351 svn_membuf__create(&check_baton.buffer, 0, pool); 2352 SVN_ERR(svn_iter_apr_hash(NULL, dirents, check_name_collision, 2353 &check_baton, pool)); 2354 } 2355 2356 return close_directory(dir_baton, pool); 2357} 2358 2359/* Verify revision REV in file system FS. */ 2360static svn_error_t * 2361verify_one_revision(svn_fs_t *fs, 2362 svn_revnum_t rev, 2363 svn_repos_notify_func_t notify_func, 2364 void *notify_baton, 2365 svn_revnum_t start_rev, 2366 svn_boolean_t check_normalization, 2367 svn_cancel_func_t cancel_func, 2368 void *cancel_baton, 2369 apr_pool_t *scratch_pool) 2370{ 2371 const svn_delta_editor_t *dump_editor; 2372 void *dump_edit_baton; 2373 svn_fs_root_t *to_root; 2374 apr_hash_t *props; 2375 const svn_delta_editor_t *cancel_editor; 2376 void *cancel_edit_baton; 2377 2378 /* Get cancellable dump editor, but with our close_directory handler.*/ 2379 SVN_ERR(get_dump_editor(&dump_editor, &dump_edit_baton, 2380 fs, rev, "", 2381 svn_stream_empty(scratch_pool), 2382 NULL, NULL, 2383 verify_close_directory, 2384 notify_func, notify_baton, 2385 start_rev, 2386 FALSE, TRUE, /* use_deltas, verify */ 2387 check_normalization, 2388 scratch_pool)); 2389 SVN_ERR(svn_delta_get_cancellation_editor(cancel_func, cancel_baton, 2390 dump_editor, dump_edit_baton, 2391 &cancel_editor, 2392 &cancel_edit_baton, 2393 scratch_pool)); 2394 SVN_ERR(svn_fs_revision_root(&to_root, fs, rev, scratch_pool)); 2395 SVN_ERR(svn_fs_verify_root(to_root, scratch_pool)); 2396 SVN_ERR(svn_repos_replay2(to_root, "", SVN_INVALID_REVNUM, FALSE, 2397 cancel_editor, cancel_edit_baton, 2398 NULL, NULL, scratch_pool)); 2399 2400 /* While our editor close_edit implementation is a no-op, we still 2401 do this for completeness. */ 2402 SVN_ERR(cancel_editor->close_edit(cancel_edit_baton, scratch_pool)); 2403 2404 SVN_ERR(svn_fs_revision_proplist2(&props, fs, rev, FALSE, scratch_pool, 2405 scratch_pool)); 2406 2407 return SVN_NO_ERROR; 2408} 2409 2410/* Baton type used for forwarding notifications from FS API to REPOS API. */ 2411struct verify_fs_notify_func_baton_t 2412{ 2413 /* notification function to call (must not be NULL) */ 2414 svn_repos_notify_func_t notify_func; 2415 2416 /* baton to use for it */ 2417 void *notify_baton; 2418 2419 /* type of notification to send (we will simply plug in the revision) */ 2420 svn_repos_notify_t *notify; 2421}; 2422 2423/* Forward the notification to BATON. */ 2424static void 2425verify_fs_notify_func(svn_revnum_t revision, 2426 void *baton, 2427 apr_pool_t *pool) 2428{ 2429 struct verify_fs_notify_func_baton_t *notify_baton = baton; 2430 2431 notify_baton->notify->revision = revision; 2432 notify_baton->notify_func(notify_baton->notify_baton, 2433 notify_baton->notify, pool); 2434} 2435 2436static svn_error_t * 2437report_error(svn_revnum_t revision, 2438 svn_error_t *verify_err, 2439 svn_repos_verify_callback_t verify_callback, 2440 void *verify_baton, 2441 apr_pool_t *pool) 2442{ 2443 if (verify_callback) 2444 { 2445 svn_error_t *cb_err; 2446 2447 /* The caller provided us with a callback, so make him responsible 2448 for what's going to happen with the error. */ 2449 cb_err = verify_callback(verify_baton, revision, verify_err, pool); 2450 svn_error_clear(verify_err); 2451 SVN_ERR(cb_err); 2452 2453 return SVN_NO_ERROR; 2454 } 2455 else 2456 { 2457 /* No callback -- no second guessing. Just return the error. */ 2458 return svn_error_trace(verify_err); 2459 } 2460} 2461 2462svn_error_t * 2463svn_repos_verify_fs3(svn_repos_t *repos, 2464 svn_revnum_t start_rev, 2465 svn_revnum_t end_rev, 2466 svn_boolean_t check_normalization, 2467 svn_boolean_t metadata_only, 2468 svn_repos_notify_func_t notify_func, 2469 void *notify_baton, 2470 svn_repos_verify_callback_t verify_callback, 2471 void *verify_baton, 2472 svn_cancel_func_t cancel_func, 2473 void *cancel_baton, 2474 apr_pool_t *pool) 2475{ 2476 svn_fs_t *fs = svn_repos_fs(repos); 2477 svn_revnum_t youngest; 2478 svn_revnum_t rev; 2479 apr_pool_t *iterpool = svn_pool_create(pool); 2480 svn_repos_notify_t *notify; 2481 svn_fs_progress_notify_func_t verify_notify = NULL; 2482 struct verify_fs_notify_func_baton_t *verify_notify_baton = NULL; 2483 svn_error_t *err; 2484 2485 /* Make sure we catch up on the latest revprop changes. This is the only 2486 * time we will refresh the revprop data in this query. */ 2487 SVN_ERR(svn_fs_refresh_revision_props(fs, pool)); 2488 2489 /* Determine the current youngest revision of the filesystem. */ 2490 SVN_ERR(svn_fs_youngest_rev(&youngest, fs, pool)); 2491 2492 /* Use default vals if necessary. */ 2493 if (! SVN_IS_VALID_REVNUM(start_rev)) 2494 start_rev = 0; 2495 if (! SVN_IS_VALID_REVNUM(end_rev)) 2496 end_rev = youngest; 2497 2498 /* Validate the revisions. */ 2499 if (start_rev > end_rev) 2500 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL, 2501 _("Start revision %ld" 2502 " is greater than end revision %ld"), 2503 start_rev, end_rev); 2504 if (end_rev > youngest) 2505 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL, 2506 _("End revision %ld is invalid " 2507 "(youngest revision is %ld)"), 2508 end_rev, youngest); 2509 2510 /* Create a notify object that we can reuse within the loop and a 2511 forwarding structure for notifications from inside svn_fs_verify(). */ 2512 if (notify_func) 2513 { 2514 notify = svn_repos_notify_create(svn_repos_notify_verify_rev_end, pool); 2515 2516 verify_notify = verify_fs_notify_func; 2517 verify_notify_baton = apr_palloc(pool, sizeof(*verify_notify_baton)); 2518 verify_notify_baton->notify_func = notify_func; 2519 verify_notify_baton->notify_baton = notify_baton; 2520 verify_notify_baton->notify 2521 = svn_repos_notify_create(svn_repos_notify_verify_rev_structure, pool); 2522 } 2523 2524 /* Verify global metadata and backend-specific data first. */ 2525 err = svn_fs_verify(svn_fs_path(fs, pool), svn_fs_config(fs, pool), 2526 start_rev, end_rev, 2527 verify_notify, verify_notify_baton, 2528 cancel_func, cancel_baton, pool); 2529 2530 if (err && err->apr_err == SVN_ERR_CANCELLED) 2531 { 2532 return svn_error_trace(err); 2533 } 2534 else if (err) 2535 { 2536 SVN_ERR(report_error(SVN_INVALID_REVNUM, err, verify_callback, 2537 verify_baton, iterpool)); 2538 } 2539 2540 if (!metadata_only) 2541 for (rev = start_rev; rev <= end_rev; rev++) 2542 { 2543 svn_pool_clear(iterpool); 2544 2545 /* Wrapper function to catch the possible errors. */ 2546 err = verify_one_revision(fs, rev, notify_func, notify_baton, 2547 start_rev, check_normalization, 2548 cancel_func, cancel_baton, 2549 iterpool); 2550 2551 if (err && err->apr_err == SVN_ERR_CANCELLED) 2552 { 2553 return svn_error_trace(err); 2554 } 2555 else if (err) 2556 { 2557 SVN_ERR(report_error(rev, err, verify_callback, verify_baton, 2558 iterpool)); 2559 } 2560 else if (notify_func) 2561 { 2562 /* Tell the caller that we're done with this revision. */ 2563 notify->revision = rev; 2564 notify_func(notify_baton, notify, iterpool); 2565 } 2566 } 2567 2568 /* We're done. */ 2569 if (notify_func) 2570 { 2571 notify = svn_repos_notify_create(svn_repos_notify_verify_end, iterpool); 2572 notify_func(notify_baton, notify, iterpool); 2573 } 2574 2575 svn_pool_destroy(iterpool); 2576 2577 return SVN_NO_ERROR; 2578} 2579