pack.c revision 299742
1/* pack.c --- FSFS shard packing functionality 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22#include <assert.h> 23#include <string.h> 24 25#include "svn_pools.h" 26#include "svn_dirent_uri.h" 27#include "svn_sorts.h" 28#include "private/svn_temp_serializer.h" 29#include "private/svn_sorts_private.h" 30#include "private/svn_subr_private.h" 31#include "private/svn_string_private.h" 32#include "private/svn_io_private.h" 33 34#include "fs_fs.h" 35#include "pack.h" 36#include "util.h" 37#include "id.h" 38#include "index.h" 39#include "low_level.h" 40#include "revprops.h" 41#include "transaction.h" 42 43#include "../libsvn_fs/fs-loader.h" 44 45#include "svn_private_config.h" 46#include "temp_serializer.h" 47 48/* Logical addressing packing logic: 49 * 50 * We pack files on a pack file basis (e.g. 1000 revs) without changing 51 * existing pack files nor the revision files outside the range to pack. 52 * 53 * First, we will scan the revision file indexes to determine the number 54 * of items to "place" (i.e. determine their optimal position within the 55 * future pack file). For each item, we will need a constant amount of 56 * memory to track it. A MAX_MEM parameter sets a limit to the number of 57 * items we may place in one go. That means, we may not be able to add 58 * all revisions at once. Instead, we will run the placement for a subset 59 * of revisions at a time. The very unlikely worst case will simply append 60 * all revision data with just a little reshuffling inside each revision. 61 * 62 * In a second step, we read all revisions in the selected range, build 63 * the item tracking information and copy the items themselves from the 64 * revision files to temporary files. The latter serve as buckets for a 65 * very coarse bucket presort: Separate change lists, file properties, 66 * directory properties and noderevs + representations from one another. 67 * 68 * The third step will determine an optimized placement for the items in 69 * each of the 4 buckets separately. The first three will simply order 70 * their items by revision, starting with the newest once. Placing rep 71 * and noderev items is a more elaborate process documented in the code. 72 * 73 * In short, we store items in the following order: 74 * - changed paths lists 75 * - node property 76 * - directory properties 77 * - directory representations corresponding noderevs, lexical path order 78 * with special treatment of "trunk" and "branches" 79 * - same for file representations 80 * 81 * Step 4 copies the items from the temporary buckets into the final 82 * pack file and writes the temporary index files. 83 * 84 * Finally, after the last range of revisions, create the final indexes. 85 */ 86 87/* Maximum amount of memory we allocate for placement information during 88 * the pack process. 89 */ 90#define DEFAULT_MAX_MEM (64 * 1024 * 1024) 91 92/* Data structure describing a node change at PATH, REVISION. 93 * We will sort these instances by PATH and NODE_ID such that we can combine 94 * similar nodes in the same reps container and store containers in path 95 * major order. 96 */ 97typedef struct path_order_t 98{ 99 /* changed path */ 100 svn_prefix_string__t *path; 101 102 /* node ID for this PATH in REVISION */ 103 svn_fs_fs__id_part_t node_id; 104 105 /* when this change happened */ 106 svn_revnum_t revision; 107 108 /* noderev predecessor count */ 109 int predecessor_count; 110 111 /* this is a directory node */ 112 svn_boolean_t is_dir; 113 114 /* length of the expanded representation content */ 115 apr_int64_t expanded_size; 116 117 /* item ID of the noderev linked to the change. May be (0, 0). */ 118 svn_fs_fs__id_part_t noderev_id; 119 120 /* item ID of the representation containing the new data. May be (0, 0). */ 121 svn_fs_fs__id_part_t rep_id; 122} path_order_t; 123 124/* Represents a reference from item FROM to item TO. FROM may be a noderev 125 * or rep_id while TO is (currently) always a representation. We will sort 126 * them by TO which allows us to collect all dependent items. 127 */ 128typedef struct reference_t 129{ 130 svn_fs_fs__id_part_t to; 131 svn_fs_fs__id_part_t from; 132} reference_t; 133 134/* This structure keeps track of all the temporary data and status that 135 * needs to be kept around during the creation of one pack file. After 136 * each revision range (in case we can't process all revs at once due to 137 * memory restrictions), parts of the data will get re-initialized. 138 */ 139typedef struct pack_context_t 140{ 141 /* file system that we operate on */ 142 svn_fs_t *fs; 143 144 /* cancel function to invoke at regular intervals. May be NULL */ 145 svn_cancel_func_t cancel_func; 146 147 /* baton to pass to CANCEL_FUNC */ 148 void *cancel_baton; 149 150 /* first revision in the shard (and future pack file) */ 151 svn_revnum_t shard_rev; 152 153 /* first revision in the range to process (>= SHARD_REV) */ 154 svn_revnum_t start_rev; 155 156 /* first revision after the range to process (<= SHARD_END_REV) */ 157 svn_revnum_t end_rev; 158 159 /* first revision after the current shard */ 160 svn_revnum_t shard_end_rev; 161 162 /* log-to-phys proto index for the whole pack file */ 163 apr_file_t *proto_l2p_index; 164 165 /* phys-to-log proto index for the whole pack file */ 166 apr_file_t *proto_p2l_index; 167 168 /* full shard directory path (containing the unpacked revisions) */ 169 const char *shard_dir; 170 171 /* full packed shard directory path (containing the pack file + indexes) */ 172 const char *pack_file_dir; 173 174 /* full pack file path (including PACK_FILE_DIR) */ 175 const char *pack_file_path; 176 177 /* current write position (i.e. file length) in the pack file */ 178 apr_off_t pack_offset; 179 180 /* the pack file to ultimately write all data to */ 181 apr_file_t *pack_file; 182 183 /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists. 184 * Will be filled in phase 2 and be cleared after each revision range. */ 185 apr_array_header_t *changes; 186 187 /* temp file receiving all change list items (referenced by CHANGES). 188 * Will be filled in phase 2 and be cleared after each revision range. */ 189 apr_file_t *changes_file; 190 191 /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties. 192 * Will be filled in phase 2 and be cleared after each revision range. */ 193 apr_array_header_t *file_props; 194 195 /* temp file receiving all file prop items (referenced by FILE_PROPS). 196 * Will be filled in phase 2 and be cleared after each revision range.*/ 197 apr_file_t *file_props_file; 198 199 /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties. 200 * Will be filled in phase 2 and be cleared after each revision range. */ 201 apr_array_header_t *dir_props; 202 203 /* temp file receiving all directory prop items (referenced by DIR_PROPS). 204 * Will be filled in phase 2 and be cleared after each revision range.*/ 205 apr_file_t *dir_props_file; 206 207 /* container for all PATH members in PATH_ORDER. */ 208 svn_prefix_tree__t *paths; 209 210 /* array of path_order_t *. Will be filled in phase 2 and be cleared 211 * after each revision range. Sorted by PATH, NODE_ID. */ 212 apr_array_header_t *path_order; 213 214 /* array of reference_t* linking representations to their delta bases. 215 * Will be filled in phase 2 and be cleared after each revision range. 216 * It will be sorted by the FROM members (for rep->base rep lookup). */ 217 apr_array_header_t *references; 218 219 /* array of svn_fs_fs__p2l_entry_t*. Will be filled in phase 2 and be 220 * cleared after each revision range. During phase 3, we will set items 221 * to NULL that we already processed. */ 222 apr_array_header_t *reps; 223 224 /* array of int, marking for each revision, the which offset their items 225 * begin in REPS. Will be filled in phase 2 and be cleared after 226 * each revision range. */ 227 apr_array_header_t *rev_offsets; 228 229 /* temp file receiving all items referenced by REPS. 230 * Will be filled in phase 2 and be cleared after each revision range.*/ 231 apr_file_t *reps_file; 232 233 /* pool used for temporary data structures that will be cleaned up when 234 * the next range of revisions is being processed */ 235 apr_pool_t *info_pool; 236} pack_context_t; 237 238/* Create and initialize a new pack context for packing shard SHARD_REV in 239 * SHARD_DIR into PACK_FILE_DIR within filesystem FS. Allocate it in POOL 240 * and return the structure in *CONTEXT. 241 * 242 * Limit the number of items being copied per iteration to MAX_ITEMS. 243 * Set CANCEL_FUNC and CANCEL_BATON as well. 244 */ 245static svn_error_t * 246initialize_pack_context(pack_context_t *context, 247 svn_fs_t *fs, 248 const char *pack_file_dir, 249 const char *shard_dir, 250 svn_revnum_t shard_rev, 251 int max_items, 252 svn_cancel_func_t cancel_func, 253 void *cancel_baton, 254 apr_pool_t *pool) 255{ 256 fs_fs_data_t *ffd = fs->fsap_data; 257 const char *temp_dir; 258 int max_revs = MIN(ffd->max_files_per_dir, max_items); 259 260 SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT); 261 SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0); 262 263 /* where we will place our various temp files */ 264 SVN_ERR(svn_io_temp_dir(&temp_dir, pool)); 265 266 /* store parameters */ 267 context->fs = fs; 268 context->cancel_func = cancel_func; 269 context->cancel_baton = cancel_baton; 270 271 context->shard_rev = shard_rev; 272 context->start_rev = shard_rev; 273 context->end_rev = shard_rev; 274 context->shard_end_rev = shard_rev + ffd->max_files_per_dir; 275 276 /* Create the new directory and pack file. */ 277 context->shard_dir = shard_dir; 278 context->pack_file_dir = pack_file_dir; 279 context->pack_file_path 280 = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 281 SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path, 282 APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL 283 | APR_CREATE, APR_OS_DEFAULT, pool)); 284 285 /* Proto index files */ 286 SVN_ERR(svn_fs_fs__l2p_proto_index_open( 287 &context->proto_l2p_index, 288 svn_dirent_join(pack_file_dir, 289 PATH_INDEX PATH_EXT_L2P_INDEX, 290 pool), 291 pool)); 292 SVN_ERR(svn_fs_fs__p2l_proto_index_open( 293 &context->proto_p2l_index, 294 svn_dirent_join(pack_file_dir, 295 PATH_INDEX PATH_EXT_P2L_INDEX, 296 pool), 297 pool)); 298 299 /* item buckets: one item info array and one temp file per bucket */ 300 context->changes = apr_array_make(pool, max_items, 301 sizeof(svn_fs_fs__p2l_entry_t *)); 302 SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir, 303 svn_io_file_del_on_close, pool, pool)); 304 context->file_props = apr_array_make(pool, max_items, 305 sizeof(svn_fs_fs__p2l_entry_t *)); 306 SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir, 307 svn_io_file_del_on_close, pool, pool)); 308 context->dir_props = apr_array_make(pool, max_items, 309 sizeof(svn_fs_fs__p2l_entry_t *)); 310 SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir, 311 svn_io_file_del_on_close, pool, pool)); 312 313 /* noderev and representation item bucket */ 314 context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int)); 315 context->path_order = apr_array_make(pool, max_items, 316 sizeof(path_order_t *)); 317 context->references = apr_array_make(pool, max_items, 318 sizeof(reference_t *)); 319 context->reps = apr_array_make(pool, max_items, 320 sizeof(svn_fs_fs__p2l_entry_t *)); 321 SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir, 322 svn_io_file_del_on_close, pool, pool)); 323 324 /* the pool used for temp structures */ 325 context->info_pool = svn_pool_create(pool); 326 context->paths = svn_prefix_tree__create(context->info_pool); 327 328 return SVN_NO_ERROR; 329} 330 331/* Clean up / free all revision range specific data and files in CONTEXT. 332 * Use POOL for temporary allocations. 333 */ 334static svn_error_t * 335reset_pack_context(pack_context_t *context, 336 apr_pool_t *pool) 337{ 338 apr_array_clear(context->changes); 339 SVN_ERR(svn_io_file_trunc(context->changes_file, 0, pool)); 340 apr_array_clear(context->file_props); 341 SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, pool)); 342 apr_array_clear(context->dir_props); 343 SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, pool)); 344 345 apr_array_clear(context->rev_offsets); 346 apr_array_clear(context->path_order); 347 apr_array_clear(context->references); 348 apr_array_clear(context->reps); 349 SVN_ERR(svn_io_file_trunc(context->reps_file, 0, pool)); 350 351 svn_pool_clear(context->info_pool); 352 353 return SVN_NO_ERROR; 354} 355 356/* Call this after the last revision range. It will finalize all index files 357 * for CONTEXT and close any open files. Use POOL for temporary allocations. 358 */ 359static svn_error_t * 360close_pack_context(pack_context_t *context, 361 apr_pool_t *pool) 362{ 363 const char *proto_l2p_index_path; 364 const char *proto_p2l_index_path; 365 366 /* need the file names for the actual index creation call further down */ 367 SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path, 368 context->proto_l2p_index, pool)); 369 SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path, 370 context->proto_p2l_index, pool)); 371 372 /* finalize proto index files */ 373 SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool)); 374 SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool)); 375 376 /* Append the actual index data to the pack file. */ 377 SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file, 378 proto_l2p_index_path, 379 proto_p2l_index_path, 380 context->shard_rev, 381 pool)); 382 383 /* remove proto index files */ 384 SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool)); 385 SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool)); 386 387 /* Ensure that packed file is written to disk.*/ 388 SVN_ERR(svn_io_file_flush_to_disk(context->pack_file, pool)); 389 SVN_ERR(svn_io_file_close(context->pack_file, pool)); 390 391 return SVN_NO_ERROR; 392} 393 394/* Efficiently copy SIZE bytes from SOURCE to DEST. Invoke the CANCEL_FUNC 395 * from CONTEXT at regular intervals. Use POOL for allocations. 396 */ 397static svn_error_t * 398copy_file_data(pack_context_t *context, 399 apr_file_t *dest, 400 apr_file_t *source, 401 apr_off_t size, 402 apr_pool_t *pool) 403{ 404 /* most non-representation items will be small. Minimize the buffer 405 * and infrastructure overhead in that case. */ 406 enum { STACK_BUFFER_SIZE = 1024 }; 407 408 if (size < STACK_BUFFER_SIZE) 409 { 410 /* copy small data using a fixed-size buffer on stack */ 411 char buffer[STACK_BUFFER_SIZE]; 412 SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size, 413 NULL, NULL, pool)); 414 SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size, 415 NULL, pool)); 416 } 417 else 418 { 419 /* use streaming copies for larger data blocks. That may require 420 * the allocation of larger buffers and we should make sure that 421 * this extra memory is released asap. */ 422 fs_fs_data_t *ffd = context->fs->fsap_data; 423 apr_pool_t *copypool = svn_pool_create(pool); 424 char *buffer = apr_palloc(copypool, ffd->block_size); 425 426 while (size) 427 { 428 apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size)); 429 if (context->cancel_func) 430 SVN_ERR(context->cancel_func(context->cancel_baton)); 431 432 SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy, 433 NULL, NULL, pool)); 434 SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy, 435 NULL, pool)); 436 437 size -= to_copy; 438 } 439 440 svn_pool_destroy(copypool); 441 } 442 443 return SVN_NO_ERROR; 444} 445 446/* Writes SIZE bytes, all 0, to DEST. Uses POOL for allocations. 447 */ 448static svn_error_t * 449write_null_bytes(apr_file_t *dest, 450 apr_off_t size, 451 apr_pool_t *pool) 452{ 453 /* Have a collection of high-quality, easy to access NUL bytes handy. */ 454 enum { BUFFER_SIZE = 1024 }; 455 static const char buffer[BUFFER_SIZE] = { 0 }; 456 457 /* copy SIZE of them into the file's buffer */ 458 while (size) 459 { 460 apr_size_t to_write = MIN(size, BUFFER_SIZE); 461 SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool)); 462 size -= to_write; 463 } 464 465 return SVN_NO_ERROR; 466} 467 468/* Copy the "simple" item (changed paths list or property representation) 469 * from the current position in REV_FILE to TEMP_FILE using CONTEXT. Add 470 * a copy of ENTRY to ENTRIES but with an updated offset value that points 471 * to the copy destination in TEMP_FILE. Use POOL for allocations. 472 */ 473static svn_error_t * 474copy_item_to_temp(pack_context_t *context, 475 apr_array_header_t *entries, 476 apr_file_t *temp_file, 477 apr_file_t *rev_file, 478 svn_fs_fs__p2l_entry_t *entry, 479 apr_pool_t *pool) 480{ 481 svn_fs_fs__p2l_entry_t *new_entry 482 = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); 483 484 SVN_ERR(svn_fs_fs__get_file_offset(&new_entry->offset, temp_file, pool)); 485 APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry; 486 487 SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool)); 488 489 return SVN_NO_ERROR; 490} 491 492/* Return the offset within CONTEXT->REPS that corresponds to item 493 * ITEM_INDEX in REVISION. 494 */ 495static int 496get_item_array_index(pack_context_t *context, 497 svn_revnum_t revision, 498 apr_int64_t item_index) 499{ 500 assert(revision >= context->start_rev); 501 return (int)item_index + APR_ARRAY_IDX(context->rev_offsets, 502 revision - context->start_rev, 503 int); 504} 505 506/* Write INFO to the correct position in CONTEXT->REP_INFOS. The latter 507 * may need auto-expanding. Overwriting an array element is not allowed. 508 */ 509static void 510add_item_rep_mapping(pack_context_t *context, 511 svn_fs_fs__p2l_entry_t *entry) 512{ 513 int idx; 514 515 /* index of INFO */ 516 idx = get_item_array_index(context, 517 entry->item.revision, 518 entry->item.number); 519 520 /* make sure the index exists in the array */ 521 while (context->reps->nelts <= idx) 522 APR_ARRAY_PUSH(context->reps, void *) = NULL; 523 524 /* set the element. If there is already an entry, there are probably 525 * two items claiming to be the same -> bail out */ 526 assert(!APR_ARRAY_IDX(context->reps, idx, void *)); 527 APR_ARRAY_IDX(context->reps, idx, void *) = entry; 528} 529 530/* Return the P2L entry from CONTEXT->REPS for the given ID. If there is 531 * none (or not anymore), return NULL. If RESET has been specified, set 532 * the array entry to NULL after returning the entry. 533 */ 534static svn_fs_fs__p2l_entry_t * 535get_item(pack_context_t *context, 536 const svn_fs_fs__id_part_t *id, 537 svn_boolean_t reset) 538{ 539 svn_fs_fs__p2l_entry_t *result = NULL; 540 if (id->number && id->revision >= context->start_rev) 541 { 542 int idx = get_item_array_index(context, id->revision, id->number); 543 if (context->reps->nelts > idx) 544 { 545 result = APR_ARRAY_IDX(context->reps, idx, void *); 546 if (result && reset) 547 APR_ARRAY_IDX(context->reps, idx, void *) = NULL; 548 } 549 } 550 551 return result; 552} 553 554/* Copy representation item identified by ENTRY from the current position 555 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 556 * our placement algorithm to CONTEXT. Use POOL for temporary allocations. 557 */ 558static svn_error_t * 559copy_rep_to_temp(pack_context_t *context, 560 apr_file_t *rev_file, 561 svn_fs_fs__p2l_entry_t *entry, 562 apr_pool_t *pool) 563{ 564 svn_fs_fs__rep_header_t *rep_header; 565 svn_stream_t *stream; 566 apr_off_t source_offset = entry->offset; 567 568 /* create a copy of ENTRY, make it point to the copy destination and 569 * store it in CONTEXT */ 570 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); 571 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, pool)); 572 add_item_rep_mapping(context, entry); 573 574 /* read & parse the representation header */ 575 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); 576 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool)); 577 svn_stream_close(stream); 578 579 /* if the representation is a delta against some other rep, link the two */ 580 if ( rep_header->type == svn_fs_fs__rep_delta 581 && rep_header->base_revision >= context->start_rev) 582 { 583 reference_t *reference = apr_pcalloc(context->info_pool, 584 sizeof(*reference)); 585 reference->from = entry->item; 586 reference->to.revision = rep_header->base_revision; 587 reference->to.number = rep_header->base_item_index; 588 APR_ARRAY_PUSH(context->references, reference_t *) = reference; 589 } 590 591 /* copy the whole rep (including header!) to our temp file */ 592 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool)); 593 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size, 594 pool)); 595 596 return SVN_NO_ERROR; 597} 598 599/* Directories first, dirs / files sorted by name in reverse lexical order. 600 * This maximizes the chance of two items being located close to one another 601 * in *all* pack files independent of their change order. It also groups 602 * multi-project repos nicely according to their sub-projects. The reverse 603 * order aspect gives "trunk" preference over "tags" and "branches", so 604 * trunk-related items are more likely to be contiguous. 605 */ 606static int 607compare_dir_entries_format7(const svn_sort__item_t *a, 608 const svn_sort__item_t *b) 609{ 610 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value; 611 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value; 612 613 if (lhs->kind != rhs->kind) 614 return lhs->kind == svn_node_dir ? -1 : 1; 615 616 return strcmp(lhs->name, rhs->name); 617} 618 619/* Directories entries sorted by revision (decreasing - to max cache hits) 620 * and offset (increasing - to max benefit from APR file buffering). 621 */ 622static int 623compare_dir_entries_format6(const svn_sort__item_t *a, 624 const svn_sort__item_t *b) 625{ 626 const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value; 627 const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value; 628 629 const svn_fs_fs__id_part_t *lhs_rev_item 630 = svn_fs_fs__id_rev_item(lhs->id); 631 const svn_fs_fs__id_part_t *rhs_rev_item 632 = svn_fs_fs__id_rev_item(rhs->id); 633 634 /* decreasing ("reverse") order on revs */ 635 if (lhs_rev_item->revision != rhs_rev_item->revision) 636 return lhs_rev_item->revision > rhs_rev_item->revision ? -1 : 1; 637 638 /* increasing order on offsets */ 639 if (lhs_rev_item->number != rhs_rev_item->number) 640 return lhs_rev_item->number > rhs_rev_item->number ? 1 : -1; 641 642 return 0; 643} 644 645apr_array_header_t * 646svn_fs_fs__order_dir_entries(svn_fs_t *fs, 647 apr_hash_t *directory, 648 apr_pool_t *result_pool, 649 apr_pool_t *scratch_pool) 650{ 651 apr_array_header_t *ordered 652 = svn_sort__hash(directory, 653 svn_fs_fs__use_log_addressing(fs) 654 ? compare_dir_entries_format7 655 : compare_dir_entries_format6, 656 scratch_pool); 657 658 apr_array_header_t *result 659 = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *)); 660 661 int i; 662 for (i = 0; i < ordered->nelts; ++i) 663 APR_ARRAY_PUSH(result, svn_fs_dirent_t *) 664 = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value; 665 666 return result; 667} 668 669/* Return a duplicate of the the ORIGINAL path and with special sub-strins 670 * (e.g. "trunk") modified in such a way that have a lower lexicographic 671 * value than any other "normal" file name. 672 */ 673static const char * 674tweak_path_for_ordering(const char *original, 675 apr_pool_t *pool) 676{ 677 /* We may add further special cases as needed. */ 678 enum {SPECIAL_COUNT = 2}; 679 static const char *special[SPECIAL_COUNT] = {"trunk", "branch"}; 680 char *pos; 681 char *path = apr_pstrdup(pool, original); 682 int i; 683 684 /* Replace the first char of any "special" sub-string we find by 685 * a control char, i.e. '\1' .. '\31'. In the rare event that this 686 * would clash with existing paths, no data will be lost but merely 687 * the node ordering will be sub-optimal. 688 */ 689 for (i = 0; i < SPECIAL_COUNT; ++i) 690 for (pos = strstr(path, special[i]); 691 pos; 692 pos = strstr(pos + 1, special[i])) 693 { 694 *pos = (char)(i + '\1'); 695 } 696 697 return path; 698} 699 700/* Copy node revision item identified by ENTRY from the current position 701 * in REV_FILE into CONTEXT->REPS_FILE. Add all tracking into needed by 702 * our placement algorithm to CONTEXT. Use POOL for temporary allocations. 703 */ 704static svn_error_t * 705copy_node_to_temp(pack_context_t *context, 706 apr_file_t *rev_file, 707 svn_fs_fs__p2l_entry_t *entry, 708 apr_pool_t *pool) 709{ 710 path_order_t *path_order = apr_pcalloc(context->info_pool, 711 sizeof(*path_order)); 712 node_revision_t *noderev; 713 const char *sort_path; 714 svn_stream_t *stream; 715 apr_off_t source_offset = entry->offset; 716 717 /* read & parse noderev */ 718 stream = svn_stream_from_aprfile2(rev_file, TRUE, pool); 719 SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool, pool)); 720 svn_stream_close(stream); 721 722 /* create a copy of ENTRY, make it point to the copy destination and 723 * store it in CONTEXT */ 724 entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry)); 725 SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, 726 pool)); 727 add_item_rep_mapping(context, entry); 728 729 /* copy the noderev to our temp file */ 730 SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool)); 731 SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size, 732 pool)); 733 734 /* if the node has a data representation, make that the node's "base". 735 * This will (often) cause the noderev to be placed right in front of 736 * its data representation. */ 737 738 if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev) 739 { 740 path_order->rep_id.revision = noderev->data_rep->revision; 741 path_order->rep_id.number = noderev->data_rep->item_index; 742 path_order->expanded_size = noderev->data_rep->expanded_size 743 ? noderev->data_rep->expanded_size 744 : noderev->data_rep->size; 745 } 746 747 /* Sort path is the key used for ordering noderevs and associated reps. 748 * It will not be stored in the final pack file. */ 749 sort_path = tweak_path_for_ordering(noderev->created_path, pool); 750 path_order->path = svn_prefix_string__create(context->paths, sort_path); 751 path_order->node_id = *svn_fs_fs__id_node_id(noderev->id); 752 path_order->revision = svn_fs_fs__id_rev(noderev->id); 753 path_order->predecessor_count = noderev->predecessor_count; 754 path_order->is_dir = noderev->kind == svn_node_dir; 755 path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id); 756 APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order; 757 758 return SVN_NO_ERROR; 759} 760 761/* implements compare_fn_t. Bring all directories in front of the files 762 and sort descendingly by PATH, NODE_ID and REVISION. 763 */ 764static int 765compare_path_order(const path_order_t * const * lhs_p, 766 const path_order_t * const * rhs_p) 767{ 768 const path_order_t * lhs = *lhs_p; 769 const path_order_t * rhs = *rhs_p; 770 771 /* cluster all directories */ 772 int diff = rhs->is_dir - lhs->is_dir; 773 if (diff) 774 return diff; 775 776 /* lexicographic order on path and node (i.e. latest first) */ 777 diff = svn_prefix_string__compare(lhs->path, rhs->path); 778 if (diff) 779 return diff; 780 781 /* reverse order on node (i.e. latest first) */ 782 diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id); 783 if (diff) 784 return diff; 785 786 /* reverse order on revision (i.e. latest first) */ 787 if (lhs->revision != rhs->revision) 788 return lhs->revision < rhs->revision ? 1 : -1; 789 790 return 0; 791} 792 793/* implements compare_fn_t. Sort ascendingly by FROM, TO. 794 */ 795static int 796compare_references(const reference_t * const * lhs_p, 797 const reference_t * const * rhs_p) 798{ 799 const reference_t * lhs = *lhs_p; 800 const reference_t * rhs = *rhs_p; 801 802 int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from); 803 return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to); 804} 805 806/* implements compare_fn_t. Assume ascending order by FROM. 807 */ 808static int 809compare_ref_to_item(const reference_t * const * lhs_p, 810 const svn_fs_fs__id_part_t * rhs_p) 811{ 812 return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p); 813} 814 815/* implements compare_fn_t. Finds the DIR / FILE boundary. 816 */ 817static int 818compare_is_dir(const path_order_t * const * lhs_p, 819 const void *unused) 820{ 821 return (*lhs_p)->is_dir ? -1 : 0; 822} 823 824/* Look for the least significant bit set in VALUE and return the smallest 825 * number with the same property, i.e. the largest power of 2 that is a 826 * factor in VALUE. */ 827static int 828roundness(int value) 829{ 830 return value ? value - (value & (value - 1)) : INT_MAX; 831} 832 833/* Order a range of data collected in CONTEXT such that we can place them 834 * in the desired order. The input is taken from *PATH_ORDER, offsets FIRST 835 * to LAST and then written in the final order to the same range in *TEMP. 836 */ 837static void 838sort_reps_range(pack_context_t *context, 839 const path_order_t **path_order, 840 const path_order_t **temp, 841 int first, 842 int last) 843{ 844 const svn_prefix_string__t *path; 845 int i, dest, best; 846 svn_fs_fs__id_part_t rep_id; 847 fs_fs_data_t *ffd = context->fs->fsap_data; 848 849 /* The logic below would fail for empty ranges. */ 850 if (first == last) 851 return; 852 853 /* Re-order noderevs like this: 854 * 855 * (1) Most likely to be referenced by future pack files, in path order. 856 * (2) highest revision rep per path + dependency chain 857 * (3) Remaining reps in path, rev order 858 * 859 * We simply pick & chose from the existing path, rev order. 860 */ 861 dest = first; 862 path = path_order[first]->path; 863 best = first; 864 865 /* (1) For each path, pick the "roundest" representation and put it in 866 * front of all other nodes in the pack file. The "roundest" rep is 867 * the one most likely to be referenced from future pack files, i.e. we 868 * concentrate those potential "foreign link targets" in one section of 869 * the pack file. 870 * 871 * And we only apply this to reps outside the linear deltification 872 * sections because references *into* linear deltification ranges are 873 * much less likely. 874 */ 875 for (i = first; i < last; ++i) 876 { 877 /* Investigated all nodes for the current path? */ 878 if (svn_prefix_string__compare(path, path_order[i]->path)) 879 { 880 /* next path */ 881 path = path_order[i]->path; 882 883 /* Pick roundest non-linear deltified node. */ 884 if (roundness(path_order[best]->predecessor_count) 885 >= ffd->max_linear_deltification) 886 { 887 temp[dest++] = path_order[best]; 888 path_order[best] = NULL; 889 best = i; 890 } 891 } 892 893 /* next entry */ 894 if ( roundness(path_order[best]->predecessor_count) 895 < roundness(path_order[i]->predecessor_count)) 896 best = i; 897 } 898 899 /* Treat the last path the same as all others. */ 900 if (roundness(path_order[best]->predecessor_count) 901 >= ffd->max_linear_deltification) 902 { 903 temp[dest++] = path_order[best]; 904 path_order[best] = NULL; 905 } 906 907 /* (2) For each (remaining) path, pick the nodes along the delta chain 908 * for the highest revision. Due to our ordering, this is the first 909 * node we encounter for any path. 910 * 911 * Most references that don't hit a delta base picked in (1), will 912 * access HEAD of the respective path. Keeping all its dependency chain 913 * in one place turns reconstruction into a linear scan of minimal length. 914 */ 915 for (i = first; i < last; ++i) 916 if (path_order[i]) 917 { 918 /* This is the first path we still have to handle. */ 919 path = path_order[i]->path; 920 rep_id = path_order[i]->rep_id; 921 break; 922 } 923 924 for (i = first; i < last; ++i) 925 if (path_order[i]) 926 { 927 /* New path? */ 928 if (svn_prefix_string__compare(path, path_order[i]->path)) 929 { 930 path = path_order[i]->path; 931 rep_id = path_order[i]->rep_id; 932 } 933 934 /* Pick nodes along the deltification chain. Skip side-branches. */ 935 if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id)) 936 { 937 reference_t **reference; 938 939 temp[dest++] = path_order[i]; 940 path_order[i] = NULL; 941 942 reference = svn_sort__array_lookup(context->references, 943 &rep_id, NULL, 944 (int (*)(const void *, const void *))compare_ref_to_item); 945 if (reference) 946 rep_id = (*reference)->to; 947 } 948 } 949 950 /* (3) All remaining nodes in path, rev order. Linear deltification 951 * makes HEAD delta chains from (2) cover all or most of their deltas 952 * in a given pack file. So, this is just a few remnants that we put 953 * at the end of the pack file. 954 */ 955 for (i = first; i < last; ++i) 956 if (path_order[i]) 957 temp[dest++] = path_order[i]; 958 959 /* We now know the final ordering. */ 960 assert(dest == last); 961} 962 963/* Order the data collected in CONTEXT such that we can place them in the 964 * desired order. 965 */ 966static void 967sort_reps(pack_context_t *context) 968{ 969 apr_pool_t *temp_pool; 970 const path_order_t **temp, **path_order; 971 int i, count, dir_count; 972 973 /* We will later assume that there is at least one node / path. 974 */ 975 if (context->path_order->nelts == 0) 976 { 977 assert(context->references->nelts == 0); 978 return; 979 } 980 981 /* Sort containers by path and IDs, respectively. 982 */ 983 svn_sort__array(context->path_order, 984 (int (*)(const void *, const void *))compare_path_order); 985 svn_sort__array(context->references, 986 (int (*)(const void *, const void *))compare_references); 987 988 /* Directories are already in front; sort directories section and files 989 * section separately but use the same heuristics (see sub-function). 990 */ 991 temp_pool = svn_pool_create(context->info_pool); 992 count = context->path_order->nelts; 993 temp = apr_pcalloc(temp_pool, count * sizeof(*temp)); 994 path_order = (void *)context->path_order->elts; 995 996 /* Find the boundary between DIR and FILE section. */ 997 dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL, 998 (int (*)(const void *, const void *))compare_is_dir); 999 1000 /* Sort those sub-sections separately. */ 1001 sort_reps_range(context, path_order, temp, 0, dir_count); 1002 sort_reps_range(context, path_order, temp, dir_count, count); 1003 1004 /* We now know the final ordering. */ 1005 for (i = 0; i < count; ++i) 1006 path_order[i] = temp[i]; 1007 1008 svn_pool_destroy(temp_pool); 1009} 1010 1011/* implements compare_fn_t. Place LHS before RHS, if the latter is older. 1012 */ 1013static int 1014compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs, 1015 const svn_fs_fs__p2l_entry_t * const * rhs) 1016{ 1017 assert(*lhs != *rhs); 1018 1019 if ((*lhs)->item.revision == (*rhs)->item.revision) 1020 return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1; 1021 1022 return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1; 1023} 1024 1025/* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age. Place the latest 1026 * items first. 1027 */ 1028static void 1029sort_items(apr_array_header_t *entries) 1030{ 1031 svn_sort__array(entries, 1032 (int (*)(const void *, const void *))compare_p2l_info); 1033} 1034 1035/* Return the remaining unused bytes in the current block in CONTEXT's 1036 * pack file. 1037 */ 1038static apr_ssize_t 1039get_block_left(pack_context_t *context) 1040{ 1041 fs_fs_data_t *ffd = context->fs->fsap_data; 1042 return ffd->block_size - (context->pack_offset % ffd->block_size); 1043} 1044 1045/* To prevent items from overlapping a block boundary, we will usually 1046 * put them into the next block and top up the old one with NUL bytes. 1047 * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does 1048 * not fit into the current block and the padding is short enough. 1049 * Use POOL for allocations. 1050 */ 1051static svn_error_t * 1052auto_pad_block(pack_context_t *context, 1053 apr_off_t to_add, 1054 apr_pool_t *pool) 1055{ 1056 fs_fs_data_t *ffd = context->fs->fsap_data; 1057 1058 /* This is the maximum number of bytes "wasted" that way per block. 1059 * Larger items will cross the block boundaries. */ 1060 const apr_off_t max_padding = MAX(ffd->block_size / 50, 512); 1061 1062 /* Is wasted space small enough to align the current item to the next 1063 * block? */ 1064 apr_off_t padding = get_block_left(context); 1065 1066 if (padding < to_add && padding < max_padding) 1067 { 1068 /* Yes. To up with NUL bytes and don't forget to create 1069 * an P2L index entry marking this section as unused. */ 1070 svn_fs_fs__p2l_entry_t null_entry; 1071 1072 null_entry.offset = context->pack_offset; 1073 null_entry.size = padding; 1074 null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED; 1075 null_entry.item.revision = SVN_INVALID_REVNUM; 1076 null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED; 1077 null_entry.fnv1_checksum = 0; 1078 1079 SVN_ERR(write_null_bytes(context->pack_file, padding, pool)); 1080 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry( 1081 context->proto_p2l_index, &null_entry, pool)); 1082 context->pack_offset += padding; 1083 } 1084 1085 return SVN_NO_ERROR; 1086} 1087 1088/* Read the contents of ITEM, if not empty, from TEMP_FILE and write it 1089 * to CONTEXT->PACK_FILE. Use POOL for allocations. 1090 */ 1091static svn_error_t * 1092store_item(pack_context_t *context, 1093 apr_file_t *temp_file, 1094 svn_fs_fs__p2l_entry_t *item, 1095 apr_pool_t *pool) 1096{ 1097 apr_off_t safety_margin; 1098 1099 /* skip empty entries */ 1100 if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 1101 return SVN_NO_ERROR; 1102 1103 /* If the next item does not fit into the current block, auto-pad it. 1104 Take special care of textual noderevs since their parsers may 1105 prefetch up to 80 bytes and we don't want them to cross block 1106 boundaries. */ 1107 safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV 1108 ? SVN__LINE_CHUNK_SIZE 1109 : 0; 1110 SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool)); 1111 1112 /* select the item in the source file and copy it into the target 1113 * pack file */ 1114 SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &item->offset, pool)); 1115 SVN_ERR(copy_file_data(context, context->pack_file, temp_file, 1116 item->size, pool)); 1117 1118 /* write index entry and update current position */ 1119 item->offset = context->pack_offset; 1120 context->pack_offset += item->size; 1121 1122 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index, 1123 item, pool)); 1124 1125 APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item; 1126 1127 return SVN_NO_ERROR; 1128} 1129 1130/* Read the contents of the non-empty items in ITEMS from TEMP_FILE and 1131 * write them to CONTEXT->PACK_FILE. Use POOL for allocations. 1132 */ 1133static svn_error_t * 1134store_items(pack_context_t *context, 1135 apr_file_t *temp_file, 1136 apr_array_header_t *items, 1137 apr_pool_t *pool) 1138{ 1139 int i; 1140 apr_pool_t *iterpool = svn_pool_create(pool); 1141 1142 /* copy all items in strict order */ 1143 for (i = 0; i < items->nelts; ++i) 1144 { 1145 svn_pool_clear(iterpool); 1146 SVN_ERR(store_item(context, temp_file, 1147 APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *), 1148 iterpool)); 1149 } 1150 1151 svn_pool_destroy(iterpool); 1152 1153 return SVN_NO_ERROR; 1154} 1155 1156/* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements 1157 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE. 1158 * Use POOL for temporary allocations. 1159 */ 1160static svn_error_t * 1161copy_reps_from_temp(pack_context_t *context, 1162 apr_file_t *temp_file, 1163 apr_pool_t *pool) 1164{ 1165 apr_pool_t *iterpool = svn_pool_create(pool); 1166 apr_array_header_t *path_order = context->path_order; 1167 int i; 1168 1169 /* copy items in path order. */ 1170 for (i = 0; i < path_order->nelts; ++i) 1171 { 1172 path_order_t *current_path; 1173 svn_fs_fs__p2l_entry_t *node_part; 1174 svn_fs_fs__p2l_entry_t *rep_part; 1175 1176 svn_pool_clear(iterpool); 1177 1178 current_path = APR_ARRAY_IDX(path_order, i, path_order_t *); 1179 node_part = get_item(context, ¤t_path->noderev_id, TRUE); 1180 rep_part = get_item(context, ¤t_path->rep_id, TRUE); 1181 1182 if (node_part) 1183 SVN_ERR(store_item(context, temp_file, node_part, iterpool)); 1184 if (rep_part) 1185 SVN_ERR(store_item(context, temp_file, rep_part, iterpool)); 1186 } 1187 1188 svn_pool_destroy(iterpool); 1189 1190 return SVN_NO_ERROR; 1191} 1192 1193/* implements compare_fn_t. Place LHS before RHS, if the latter belongs to 1194 * a newer revision. 1195 */ 1196static int 1197compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p, 1198 const svn_fs_fs__p2l_entry_t * const * rhs_p) 1199{ 1200 const svn_fs_fs__p2l_entry_t * lhs = *lhs_p; 1201 const svn_fs_fs__p2l_entry_t * rhs = *rhs_p; 1202 1203 if (lhs->item.revision == rhs->item.revision) 1204 return 0; 1205 1206 return lhs->item.revision < rhs->item.revision ? -1 : 1; 1207} 1208 1209/* Write the log-to-phys proto index file for CONTEXT and use POOL for 1210 * temporary allocations. All items in all buckets must have been placed 1211 * by now. 1212 */ 1213static svn_error_t * 1214write_l2p_index(pack_context_t *context, 1215 apr_pool_t *pool) 1216{ 1217 apr_pool_t *iterpool = svn_pool_create(pool); 1218 svn_revnum_t prev_rev = SVN_INVALID_REVNUM; 1219 int i, dest; 1220 1221 /* eliminate empty entries from CONTEXT->REPS */ 1222 for (i = 0, dest = 0; i < context->reps->nelts; ++i) 1223 { 1224 svn_fs_fs__p2l_entry_t *entry 1225 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *); 1226 if (entry) 1227 APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *) 1228 = entry; 1229 } 1230 context->reps->nelts = dest; 1231 1232 /* we need to write the l2p index revision by revision */ 1233 svn_sort__array(context->reps, 1234 (int (*)(const void *, const void *))compare_p2l_info_rev); 1235 1236 /* write index entries */ 1237 for (i = 0; i < context->reps->nelts; ++i) 1238 { 1239 svn_fs_fs__p2l_entry_t *p2l_entry 1240 = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *); 1241 if (p2l_entry == NULL) 1242 continue; 1243 1244 /* next revision? */ 1245 if (prev_rev != p2l_entry->item.revision) 1246 { 1247 prev_rev = p2l_entry->item.revision; 1248 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision( 1249 context->proto_l2p_index, iterpool)); 1250 } 1251 1252 /* add entry */ 1253 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index, 1254 p2l_entry->offset, 1255 p2l_entry->item.number, 1256 iterpool)); 1257 1258 /* keep memory usage in check */ 1259 if (i % 256 == 0) 1260 svn_pool_clear(iterpool); 1261 } 1262 1263 svn_pool_destroy(iterpool); 1264 1265 return SVN_NO_ERROR; 1266} 1267 1268/* Pack the current revision range of CONTEXT, i.e. this covers phases 2 1269 * to 4. Use POOL for allocations. 1270 */ 1271static svn_error_t * 1272pack_range(pack_context_t *context, 1273 apr_pool_t *pool) 1274{ 1275 fs_fs_data_t *ffd = context->fs->fsap_data; 1276 apr_pool_t *revpool = svn_pool_create(pool); 1277 apr_pool_t *iterpool = svn_pool_create(pool); 1278 apr_pool_t *iterpool2 = svn_pool_create(pool); 1279 1280 /* Phase 2: Copy items into various buckets and build tracking info */ 1281 svn_revnum_t revision; 1282 for (revision = context->start_rev; revision < context->end_rev; ++revision) 1283 { 1284 apr_off_t offset = 0; 1285 svn_fs_fs__revision_file_t *rev_file; 1286 1287 svn_pool_clear(revpool); 1288 1289 /* Get the rev file dimensions (mainly index locations). */ 1290 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs, 1291 revision, revpool, iterpool)); 1292 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); 1293 1294 /* store the indirect array index */ 1295 APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts; 1296 1297 /* read the phys-to-log index file until we covered the whole rev file. 1298 * That index contains enough info to build both target indexes from it. */ 1299 while (offset < rev_file->l2p_offset) 1300 { 1301 /* read one cluster */ 1302 int i; 1303 apr_array_header_t *entries; 1304 1305 svn_pool_clear(iterpool); 1306 1307 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, 1308 rev_file, revision, offset, 1309 ffd->p2l_page_size, iterpool, 1310 iterpool)); 1311 1312 for (i = 0; i < entries->nelts; ++i) 1313 { 1314 svn_fs_fs__p2l_entry_t *entry 1315 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 1316 1317 /* skip first entry if that was duplicated due crossing a 1318 cluster boundary */ 1319 if (offset > entry->offset) 1320 continue; 1321 1322 svn_pool_clear(iterpool2); 1323 1324 /* process entry while inside the rev file */ 1325 offset = entry->offset; 1326 if (offset < rev_file->l2p_offset) 1327 { 1328 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset, 1329 iterpool2)); 1330 1331 if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES) 1332 SVN_ERR(copy_item_to_temp(context, 1333 context->changes, 1334 context->changes_file, 1335 rev_file->file, entry, 1336 iterpool2)); 1337 else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS) 1338 SVN_ERR(copy_item_to_temp(context, 1339 context->file_props, 1340 context->file_props_file, 1341 rev_file->file, entry, 1342 iterpool2)); 1343 else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS) 1344 SVN_ERR(copy_item_to_temp(context, 1345 context->dir_props, 1346 context->dir_props_file, 1347 rev_file->file, entry, 1348 iterpool2)); 1349 else if ( entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP 1350 || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP) 1351 SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry, 1352 iterpool2)); 1353 else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV) 1354 SVN_ERR(copy_node_to_temp(context, rev_file->file, entry, 1355 iterpool2)); 1356 else 1357 SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED); 1358 1359 offset += entry->size; 1360 } 1361 } 1362 1363 if (context->cancel_func) 1364 SVN_ERR(context->cancel_func(context->cancel_baton)); 1365 } 1366 } 1367 1368 svn_pool_destroy(iterpool2); 1369 svn_pool_destroy(iterpool); 1370 1371 /* phase 3: placement. 1372 * Use "newest first" placement for simple items. */ 1373 sort_items(context->changes); 1374 sort_items(context->file_props); 1375 sort_items(context->dir_props); 1376 1377 /* follow dependencies recursively for noderevs and data representations */ 1378 sort_reps(context); 1379 1380 /* phase 4: copy bucket data to pack file. Write P2L index. */ 1381 SVN_ERR(store_items(context, context->changes_file, context->changes, 1382 revpool)); 1383 svn_pool_clear(revpool); 1384 SVN_ERR(store_items(context, context->file_props_file, context->file_props, 1385 revpool)); 1386 svn_pool_clear(revpool); 1387 SVN_ERR(store_items(context, context->dir_props_file, context->dir_props, 1388 revpool)); 1389 svn_pool_clear(revpool); 1390 SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool)); 1391 svn_pool_clear(revpool); 1392 1393 /* write L2P index as well (now that we know all target offsets) */ 1394 SVN_ERR(write_l2p_index(context, revpool)); 1395 1396 svn_pool_destroy(revpool); 1397 1398 return SVN_NO_ERROR; 1399} 1400 1401/* Append CONTEXT->START_REV to the context's pack file with no re-ordering. 1402 * This function will only be used for very large revisions (>>100k changes). 1403 * Use POOL for temporary allocations. 1404 */ 1405static svn_error_t * 1406append_revision(pack_context_t *context, 1407 apr_pool_t *pool) 1408{ 1409 fs_fs_data_t *ffd = context->fs->fsap_data; 1410 apr_off_t offset = 0; 1411 apr_pool_t *iterpool = svn_pool_create(pool); 1412 svn_fs_fs__revision_file_t *rev_file; 1413 apr_finfo_t finfo; 1414 1415 /* Get the size of the file. */ 1416 const char *path = svn_dirent_join(context->shard_dir, 1417 apr_psprintf(iterpool, "%ld", 1418 context->start_rev), 1419 pool); 1420 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, pool)); 1421 1422 /* Copy all the bits from the rev file to the end of the pack file. */ 1423 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs, 1424 context->start_rev, pool, 1425 iterpool)); 1426 SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file, 1427 finfo.size, iterpool)); 1428 1429 /* mark the start of a new revision */ 1430 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index, 1431 pool)); 1432 1433 /* read the phys-to-log index file until we covered the whole rev file. 1434 * That index contains enough info to build both target indexes from it. */ 1435 while (offset < finfo.size) 1436 { 1437 /* read one cluster */ 1438 int i; 1439 apr_array_header_t *entries; 1440 1441 svn_pool_clear(iterpool); 1442 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file, 1443 context->start_rev, offset, 1444 ffd->p2l_page_size, iterpool, 1445 iterpool)); 1446 1447 for (i = 0; i < entries->nelts; ++i) 1448 { 1449 svn_fs_fs__p2l_entry_t *entry 1450 = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t); 1451 1452 /* skip first entry if that was duplicated due crossing a 1453 cluster boundary */ 1454 if (offset > entry->offset) 1455 continue; 1456 1457 /* process entry while inside the rev file */ 1458 offset = entry->offset; 1459 if (offset < finfo.size) 1460 { 1461 entry->offset += context->pack_offset; 1462 offset += entry->size; 1463 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry( 1464 context->proto_l2p_index, entry->offset, 1465 entry->item.number, iterpool)); 1466 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry( 1467 context->proto_p2l_index, entry, iterpool)); 1468 } 1469 } 1470 } 1471 1472 svn_pool_destroy(iterpool); 1473 context->pack_offset += finfo.size; 1474 1475 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 1476 1477 return SVN_NO_ERROR; 1478} 1479 1480/* Logical addressing mode packing logic. 1481 * 1482 * Pack the revision shard starting at SHARD_REV in filesystem FS from 1483 * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations. Limit 1484 * the extra memory consumption to MAX_MEM bytes. CANCEL_FUNC and 1485 * CANCEL_BATON are what you think they are. 1486 */ 1487static svn_error_t * 1488pack_log_addressed(svn_fs_t *fs, 1489 const char *pack_file_dir, 1490 const char *shard_dir, 1491 svn_revnum_t shard_rev, 1492 apr_size_t max_mem, 1493 svn_cancel_func_t cancel_func, 1494 void *cancel_baton, 1495 apr_pool_t *pool) 1496{ 1497 enum 1498 { 1499 /* estimated amount of memory used to represent one item in memory 1500 * during rev file packing */ 1501 PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t)) 1502 + APR_ALIGN_DEFAULT(2 *sizeof(void*)) 1503 + APR_ALIGN_DEFAULT(sizeof(reference_t)) 1504 + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t)) 1505 + 6 * sizeof(void*) 1506 }; 1507 1508 int max_items; 1509 apr_array_header_t *max_ids; 1510 pack_context_t context = { 0 }; 1511 int i; 1512 apr_size_t item_count = 0; 1513 apr_pool_t *iterpool = svn_pool_create(pool); 1514 1515 /* Prevent integer overflow. We use apr arrays to process the items so 1516 * the maximum number of items is INT_MAX. */ 1517 { 1518 apr_size_t temp = max_mem / PER_ITEM_MEM; 1519 SVN_ERR_ASSERT(temp <= INT_MAX); 1520 max_items = (int)temp; 1521 } 1522 1523 /* set up a pack context */ 1524 SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir, 1525 shard_rev, max_items, cancel_func, 1526 cancel_baton, pool)); 1527 1528 /* phase 1: determine the size of the revisions to pack */ 1529 SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev, 1530 context.shard_end_rev - shard_rev, 1531 pool, pool)); 1532 1533 /* pack revisions in ranges that don't exceed MAX_MEM */ 1534 for (i = 0; i < max_ids->nelts; ++i) 1535 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items) 1536 { 1537 context.end_rev++; 1538 } 1539 else 1540 { 1541 svn_pool_clear(iterpool); 1542 1543 /* some unpacked revisions before this one? */ 1544 if (context.start_rev < context.end_rev) 1545 { 1546 /* pack them intelligently (might be just 1 rev but still ...) */ 1547 SVN_ERR(pack_range(&context, iterpool)); 1548 SVN_ERR(reset_pack_context(&context, iterpool)); 1549 item_count = 0; 1550 } 1551 1552 /* next revision range is to start with the current revision */ 1553 context.start_rev = i + context.shard_rev; 1554 context.end_rev = context.start_rev + 1; 1555 1556 /* if this is a very large revision, we must place it as is */ 1557 if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items) 1558 { 1559 SVN_ERR(append_revision(&context, iterpool)); 1560 context.start_rev++; 1561 } 1562 else 1563 item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t); 1564 } 1565 1566 /* non-empty revision range at the end? */ 1567 if (context.start_rev < context.end_rev) 1568 SVN_ERR(pack_range(&context, iterpool)); 1569 1570 /* last phase: finalize indexes and clean up */ 1571 SVN_ERR(reset_pack_context(&context, iterpool)); 1572 SVN_ERR(close_pack_context(&context, iterpool)); 1573 svn_pool_destroy(iterpool); 1574 1575 return SVN_NO_ERROR; 1576} 1577 1578/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file. 1579 Use POOL for temporary allocations. */ 1580svn_error_t * 1581svn_fs_fs__get_packed_offset(apr_off_t *rev_offset, 1582 svn_fs_t *fs, 1583 svn_revnum_t rev, 1584 apr_pool_t *pool) 1585{ 1586 fs_fs_data_t *ffd = fs->fsap_data; 1587 svn_stream_t *manifest_stream; 1588 svn_boolean_t is_cached; 1589 svn_revnum_t shard; 1590 apr_int64_t shard_pos; 1591 apr_array_header_t *manifest; 1592 apr_pool_t *iterpool; 1593 1594 shard = rev / ffd->max_files_per_dir; 1595 1596 /* position of the shard within the manifest */ 1597 shard_pos = rev % ffd->max_files_per_dir; 1598 1599 /* fetch exactly that element into *rev_offset, if the manifest is found 1600 in the cache */ 1601 SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached, 1602 ffd->packed_offset_cache, &shard, 1603 svn_fs_fs__get_sharded_offset, &shard_pos, 1604 pool)); 1605 1606 if (is_cached) 1607 return SVN_NO_ERROR; 1608 1609 /* Open the manifest file. */ 1610 SVN_ERR(svn_stream_open_readonly(&manifest_stream, 1611 svn_fs_fs__path_rev_packed(fs, rev, 1612 PATH_MANIFEST, 1613 pool), 1614 pool, pool)); 1615 1616 /* While we're here, let's just read the entire manifest file into an array, 1617 so we can cache the entire thing. */ 1618 iterpool = svn_pool_create(pool); 1619 manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t)); 1620 while (1) 1621 { 1622 svn_boolean_t eof; 1623 apr_int64_t val; 1624 1625 svn_pool_clear(iterpool); 1626 SVN_ERR(svn_fs_fs__read_number_from_stream(&val, &eof, manifest_stream, 1627 iterpool)); 1628 if (eof) 1629 break; 1630 1631 APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val; 1632 } 1633 svn_pool_destroy(iterpool); 1634 1635 *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir, 1636 apr_off_t); 1637 1638 /* Close up shop and cache the array. */ 1639 SVN_ERR(svn_stream_close(manifest_stream)); 1640 return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool); 1641} 1642 1643/* Packing logic for physical addresssing mode: 1644 * Simply concatenate all revision contents. 1645 * 1646 * Pack the revision shard starting at SHARD_REV containing exactly 1647 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR, 1648 * using POOL for allocations. CANCEL_FUNC and CANCEL_BATON are what you 1649 * think they are. 1650 */ 1651static svn_error_t * 1652pack_phys_addressed(const char *pack_file_dir, 1653 const char *shard_path, 1654 svn_revnum_t start_rev, 1655 int max_files_per_dir, 1656 svn_cancel_func_t cancel_func, 1657 void *cancel_baton, 1658 apr_pool_t *pool) 1659{ 1660 const char *pack_file_path, *manifest_file_path; 1661 apr_file_t *pack_file; 1662 apr_file_t *manifest_file; 1663 svn_stream_t *manifest_stream; 1664 svn_revnum_t end_rev, rev; 1665 apr_off_t next_offset; 1666 apr_pool_t *iterpool; 1667 1668 /* Some useful paths. */ 1669 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 1670 manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool); 1671 1672 /* Create the new directory and pack file. 1673 * Use unbuffered apr_file_t since we're going to write using 16kb 1674 * chunks. */ 1675 SVN_ERR(svn_io_file_open(&pack_file, pack_file_path, 1676 APR_WRITE | APR_CREATE | APR_EXCL, 1677 APR_OS_DEFAULT, pool)); 1678 1679 /* Create the manifest file. */ 1680 SVN_ERR(svn_io_file_open(&manifest_file, manifest_file_path, 1681 APR_WRITE | APR_BUFFERED | APR_CREATE | APR_EXCL, 1682 APR_OS_DEFAULT, pool)); 1683 manifest_stream = svn_stream_from_aprfile2(manifest_file, TRUE, pool); 1684 1685 end_rev = start_rev + max_files_per_dir - 1; 1686 next_offset = 0; 1687 iterpool = svn_pool_create(pool); 1688 1689 /* Iterate over the revisions in this shard, squashing them together. */ 1690 for (rev = start_rev; rev <= end_rev; rev++) 1691 { 1692 svn_stream_t *rev_stream; 1693 apr_finfo_t finfo; 1694 const char *path; 1695 1696 svn_pool_clear(iterpool); 1697 1698 /* Get the size of the file. */ 1699 path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), 1700 iterpool); 1701 SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool)); 1702 1703 /* build manifest */ 1704 SVN_ERR(svn_stream_printf(manifest_stream, iterpool, 1705 "%" APR_OFF_T_FMT "\n", next_offset)); 1706 next_offset += finfo.size; 1707 1708 /* Copy all the bits from the rev file to the end of the pack file. */ 1709 SVN_ERR(svn_stream_open_readonly(&rev_stream, path, iterpool, iterpool)); 1710 SVN_ERR(svn_stream_copy3(rev_stream, 1711 svn_stream_from_aprfile2(pack_file, TRUE, pool), 1712 cancel_func, cancel_baton, iterpool)); 1713 } 1714 1715 /* Close stream over APR file. */ 1716 SVN_ERR(svn_stream_close(manifest_stream)); 1717 1718 /* Ensure that pack file is written to disk. */ 1719 SVN_ERR(svn_io_file_flush_to_disk(manifest_file, pool)); 1720 SVN_ERR(svn_io_file_close(manifest_file, pool)); 1721 1722 /* disallow write access to the manifest file */ 1723 SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool)); 1724 1725 /* Ensure that pack file is written to disk. */ 1726 SVN_ERR(svn_io_file_flush_to_disk(pack_file, pool)); 1727 SVN_ERR(svn_io_file_close(pack_file, pool)); 1728 1729 svn_pool_destroy(iterpool); 1730 1731 return SVN_NO_ERROR; 1732} 1733 1734/* In filesystem FS, pack the revision SHARD containing exactly 1735 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR, 1736 * using POOL for allocations. Try to limit the amount of temporary 1737 * memory needed to MAX_MEM bytes. CANCEL_FUNC and CANCEL_BATON are what 1738 * you think they are. 1739 * 1740 * If for some reason we detect a partial packing already performed, we 1741 * remove the pack file and start again. 1742 * 1743 * The actual packing will be done in a format-specific sub-function. 1744 */ 1745static svn_error_t * 1746pack_rev_shard(svn_fs_t *fs, 1747 const char *pack_file_dir, 1748 const char *shard_path, 1749 apr_int64_t shard, 1750 int max_files_per_dir, 1751 apr_size_t max_mem, 1752 svn_cancel_func_t cancel_func, 1753 void *cancel_baton, 1754 apr_pool_t *pool) 1755{ 1756 const char *pack_file_path; 1757 svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir); 1758 1759 /* Some useful paths. */ 1760 pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool); 1761 1762 /* Remove any existing pack file for this shard, since it is incomplete. */ 1763 SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, 1764 pool)); 1765 1766 /* Create the new directory and pack file. */ 1767 SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool)); 1768 1769 /* Index information files */ 1770 if (svn_fs_fs__use_log_addressing(fs)) 1771 SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev, 1772 max_mem, cancel_func, cancel_baton, pool)); 1773 else 1774 SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev, 1775 max_files_per_dir, cancel_func, 1776 cancel_baton, pool)); 1777 1778 SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool)); 1779 SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool)); 1780 1781 return SVN_NO_ERROR; 1782} 1783 1784/* Baton struct used by pack_body(), pack_shard() and synced_pack_shard(). 1785 These calls are nested and for every level additional fields will be 1786 available. */ 1787struct pack_baton 1788{ 1789 /* Valid when entering pack_body(). */ 1790 svn_fs_t *fs; 1791 svn_fs_pack_notify_t notify_func; 1792 void *notify_baton; 1793 svn_cancel_func_t cancel_func; 1794 void *cancel_baton; 1795 1796 /* Additional entries valid when entering pack_shard(). */ 1797 const char *revs_dir; 1798 const char *revsprops_dir; 1799 apr_int64_t shard; 1800 1801 /* Additional entries valid when entering synced_pack_shard(). */ 1802 const char *rev_shard_path; 1803}; 1804 1805 1806/* Part of the pack process that requires global (write) synchronization. 1807 * We pack the revision properties of the shard described by BATON and 1808 * In the file system at FS_PATH, pack the SHARD in REVS_DIR and replace 1809 * the non-packed revprop & rev shard folder(s) with the packed ones. 1810 * The packed rev folder has been created prior to calling this function. 1811 */ 1812static svn_error_t * 1813synced_pack_shard(void *baton, 1814 apr_pool_t *pool) 1815{ 1816 struct pack_baton *pb = baton; 1817 fs_fs_data_t *ffd = pb->fs->fsap_data; 1818 const char *revprops_shard_path, *revprops_pack_file_dir; 1819 1820 /* if enabled, pack the revprops in an equivalent way */ 1821 if (pb->revsprops_dir) 1822 { 1823 revprops_pack_file_dir = svn_dirent_join(pb->revsprops_dir, 1824 apr_psprintf(pool, 1825 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, 1826 pb->shard), 1827 pool); 1828 revprops_shard_path = svn_dirent_join(pb->revsprops_dir, 1829 apr_psprintf(pool, "%" APR_INT64_T_FMT, pb->shard), 1830 pool); 1831 1832 SVN_ERR(svn_fs_fs__pack_revprops_shard(revprops_pack_file_dir, 1833 revprops_shard_path, 1834 pb->shard, 1835 ffd->max_files_per_dir, 1836 (int)(0.9*ffd->revprop_pack_size), 1837 ffd->compress_packed_revprops 1838 ? SVN__COMPRESSION_ZLIB_DEFAULT 1839 : SVN__COMPRESSION_NONE, 1840 pb->cancel_func, 1841 pb->cancel_baton, 1842 pool)); 1843 } 1844 1845 /* Update the min-unpacked-rev file to reflect our newly packed shard. */ 1846 SVN_ERR(svn_fs_fs__write_min_unpacked_rev(pb->fs, 1847 (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir), 1848 pool)); 1849 ffd->min_unpacked_rev 1850 = (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir); 1851 1852 /* Finally, remove the existing shard directories. 1853 * For revprops, clean up older obsolete shards as well as they might 1854 * have been left over from an interrupted FS upgrade. */ 1855 SVN_ERR(svn_io_remove_dir2(pb->rev_shard_path, TRUE, 1856 pb->cancel_func, pb->cancel_baton, pool)); 1857 if (pb->revsprops_dir) 1858 { 1859 svn_node_kind_t kind = svn_node_dir; 1860 apr_int64_t to_cleanup = pb->shard; 1861 do 1862 { 1863 SVN_ERR(svn_fs_fs__delete_revprops_shard(revprops_shard_path, 1864 to_cleanup, 1865 ffd->max_files_per_dir, 1866 pb->cancel_func, 1867 pb->cancel_baton, 1868 pool)); 1869 1870 /* If the previous shard exists, clean it up as well. 1871 Don't try to clean up shard 0 as it we can't tell quickly 1872 whether it actually needs cleaning up. */ 1873 revprops_shard_path = svn_dirent_join(pb->revsprops_dir, 1874 apr_psprintf(pool, "%" APR_INT64_T_FMT, --to_cleanup), 1875 pool); 1876 SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, pool)); 1877 } 1878 while (kind == svn_node_dir && to_cleanup > 0); 1879 } 1880 1881 return SVN_NO_ERROR; 1882} 1883 1884/* Pack the shard described by BATON. 1885 * 1886 * If for some reason we detect a partial packing already performed, 1887 * we remove the pack file and start again. 1888 */ 1889static svn_error_t * 1890pack_shard(struct pack_baton *baton, 1891 apr_pool_t *pool) 1892{ 1893 fs_fs_data_t *ffd = baton->fs->fsap_data; 1894 const char *rev_pack_file_dir; 1895 1896 /* Notify caller we're starting to pack this shard. */ 1897 if (baton->notify_func) 1898 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard, 1899 svn_fs_pack_notify_start, pool)); 1900 1901 /* Some useful paths. */ 1902 rev_pack_file_dir = svn_dirent_join(baton->revs_dir, 1903 apr_psprintf(pool, 1904 "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, 1905 baton->shard), 1906 pool); 1907 baton->rev_shard_path = svn_dirent_join(baton->revs_dir, 1908 apr_psprintf(pool, 1909 "%" APR_INT64_T_FMT, 1910 baton->shard), 1911 pool); 1912 1913 /* pack the revision content */ 1914 SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path, 1915 baton->shard, ffd->max_files_per_dir, 1916 DEFAULT_MAX_MEM, baton->cancel_func, 1917 baton->cancel_baton, pool)); 1918 1919 /* For newer repo formats, we only acquired the pack lock so far. 1920 Before modifying the repo state by switching over to the packed 1921 data, we need to acquire the global (write) lock. */ 1922 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT) 1923 SVN_ERR(svn_fs_fs__with_write_lock(baton->fs, synced_pack_shard, baton, 1924 pool)); 1925 else 1926 SVN_ERR(synced_pack_shard(baton, pool)); 1927 1928 /* Notify caller we're starting to pack this shard. */ 1929 if (baton->notify_func) 1930 SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard, 1931 svn_fs_pack_notify_end, pool)); 1932 1933 return SVN_NO_ERROR; 1934} 1935 1936/* The work-horse for svn_fs_fs__pack, called with the FS write lock. 1937 This implements the svn_fs_fs__with_write_lock() 'body' callback 1938 type. BATON is a 'struct pack_baton *'. 1939 1940 WARNING: if you add a call to this function, please note: 1941 The code currently assumes that any piece of code running with 1942 the write-lock set can rely on the ffd->min_unpacked_rev and 1943 ffd->min_unpacked_revprop caches to be up-to-date (and, by 1944 extension, on not having to use a retry when calling 1945 svn_fs_fs__path_rev_absolute() and friends). If you add a call 1946 to this function, consider whether you have to call 1947 svn_fs_fs__update_min_unpacked_rev(). 1948 See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith 1949 */ 1950static svn_error_t * 1951pack_body(void *baton, 1952 apr_pool_t *pool) 1953{ 1954 struct pack_baton *pb = baton; 1955 fs_fs_data_t *ffd = pb->fs->fsap_data; 1956 apr_int64_t completed_shards; 1957 svn_revnum_t youngest; 1958 apr_pool_t *iterpool; 1959 1960 /* If the repository isn't a new enough format, we don't support packing. 1961 Return a friendly error to that effect. */ 1962 if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT) 1963 return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL, 1964 _("FSFS format (%d) too old to pack; please upgrade the filesystem."), 1965 ffd->format); 1966 1967 /* If we aren't using sharding, we can't do any packing, so quit. */ 1968 if (!ffd->max_files_per_dir) 1969 return SVN_NO_ERROR; 1970 1971 SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs, 1972 pool)); 1973 1974 SVN_ERR(svn_fs_fs__youngest_rev(&youngest, pb->fs, pool)); 1975 completed_shards = (youngest + 1) / ffd->max_files_per_dir; 1976 1977 /* See if we've already completed all possible shards thus far. */ 1978 if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir)) 1979 return SVN_NO_ERROR; 1980 1981 pb->revs_dir = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool); 1982 if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT) 1983 pb->revsprops_dir = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR, 1984 pool); 1985 1986 iterpool = svn_pool_create(pool); 1987 for (pb->shard = ffd->min_unpacked_rev / ffd->max_files_per_dir; 1988 pb->shard < completed_shards; 1989 pb->shard++) 1990 { 1991 svn_pool_clear(iterpool); 1992 1993 if (pb->cancel_func) 1994 SVN_ERR(pb->cancel_func(pb->cancel_baton)); 1995 1996 SVN_ERR(pack_shard(pb, iterpool)); 1997 } 1998 1999 svn_pool_destroy(iterpool); 2000 return SVN_NO_ERROR; 2001} 2002 2003svn_error_t * 2004svn_fs_fs__pack(svn_fs_t *fs, 2005 svn_fs_pack_notify_t notify_func, 2006 void *notify_baton, 2007 svn_cancel_func_t cancel_func, 2008 void *cancel_baton, 2009 apr_pool_t *pool) 2010{ 2011 struct pack_baton pb = { 0 }; 2012 fs_fs_data_t *ffd = fs->fsap_data; 2013 svn_error_t *err; 2014 2015 pb.fs = fs; 2016 pb.notify_func = notify_func; 2017 pb.notify_baton = notify_baton; 2018 pb.cancel_func = cancel_func; 2019 pb.cancel_baton = cancel_baton; 2020 2021 if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT) 2022 { 2023 /* Newer repositories provide a pack operation specific lock. 2024 Acquire it to prevent concurrent packs. 2025 2026 Since the file lock's lifetime is bound to a pool, we create a 2027 separate subpool here to release the lock immediately after the 2028 operation finished. 2029 */ 2030 err = svn_fs_fs__with_pack_lock(fs, pack_body, &pb, pool); 2031 } 2032 else 2033 { 2034 /* Use the global write lock for older repos. */ 2035 err = svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool); 2036 } 2037 2038 return svn_error_trace(err); 2039} 2040