1/* cached_data.c --- cached (read) access to FSX data 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23#include "cached_data.h" 24 25#include <assert.h> 26 27#include "svn_hash.h" 28#include "svn_ctype.h" 29#include "svn_sorts.h" 30 31#include "private/svn_io_private.h" 32#include "private/svn_sorts_private.h" 33#include "private/svn_string_private.h" 34#include "private/svn_subr_private.h" 35#include "private/svn_temp_serializer.h" 36 37#include "fs_x.h" 38#include "low_level.h" 39#include "util.h" 40#include "pack.h" 41#include "temp_serializer.h" 42#include "index.h" 43#include "changes.h" 44#include "noderevs.h" 45#include "reps.h" 46 47#include "../libsvn_fs/fs-loader.h" 48#include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */ 49 50#include "svn_private_config.h" 51 52/* forward-declare. See implementation for the docstring */ 53static svn_error_t * 54block_read(void **result, 55 svn_fs_t *fs, 56 const svn_fs_x__id_t *id, 57 svn_fs_x__revision_file_t *revision_file, 58 void *baton, 59 apr_pool_t *result_pool, 60 apr_pool_t *scratch_pool); 61 62 63/* Defined this to enable access logging via dgb__log_access 64#define SVN_FS_X__LOG_ACCESS 65*/ 66 67/* When SVN_FS_X__LOG_ACCESS has been defined, write a line to console 68 * showing where ID is located in FS and use ITEM to show details on it's 69 * contents if not NULL. Use SCRATCH_POOL for temporary allocations. 70 */ 71static svn_error_t * 72dbg__log_access(svn_fs_t *fs, 73 const svn_fs_x__id_t *id, 74 void *item, 75 apr_uint32_t item_type, 76 apr_pool_t *scratch_pool) 77{ 78 /* no-op if this macro is not defined */ 79#ifdef SVN_FS_X__LOG_ACCESS 80 svn_fs_x__data_t *ffd = fs->fsap_data; 81 apr_off_t offset = -1; 82 apr_off_t end_offset = 0; 83 apr_uint32_t sub_item = 0; 84 svn_fs_x__p2l_entry_t *entry = NULL; 85 static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop", 86 "node ", "chgs ", "rep ", "c:", "n:", "r:"}; 87 const char *description = ""; 88 const char *type = types[item_type]; 89 const char *pack = ""; 90 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set); 91 92 /* determine rev / pack file offset */ 93 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, id, scratch_pool)); 94 95 /* constructing the pack file description */ 96 if (revision < ffd->min_unpacked_rev) 97 pack = apr_psprintf(scratch_pool, "%4ld|", 98 revision / ffd->max_files_per_dir); 99 100 /* construct description if possible */ 101 if (item_type == SVN_FS_X__ITEM_TYPE_NODEREV && item != NULL) 102 { 103 svn_fs_x__noderev_t *node = item; 104 const char *data_rep 105 = node->data_rep 106 ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT, 107 svn_fs_x__get_revnum(node->data_rep->id.change_set), 108 node->data_rep->id.number) 109 : ""; 110 const char *prop_rep 111 = node->prop_rep 112 ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT, 113 svn_fs_x__get_revnum(node->prop_rep->id.change_set), 114 node->prop_rep->id.number) 115 : ""; 116 description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)", 117 node->created_path, 118 node->predecessor_count, 119 data_rep, 120 prop_rep); 121 } 122 else if (item_type == SVN_FS_X__ITEM_TYPE_ANY_REP) 123 { 124 svn_fs_x__rep_header_t *header = item; 125 if (header == NULL) 126 description = " (txdelta window)"; 127 else if (header->type == svn_fs_x__rep_self_delta) 128 description = " DELTA"; 129 else 130 description = apr_psprintf(scratch_pool, 131 " DELTA against %ld/%" APR_UINT64_T_FMT, 132 header->base_revision, 133 header->base_item_index); 134 } 135 else if (item_type == SVN_FS_X__ITEM_TYPE_CHANGES && item != NULL) 136 { 137 apr_array_header_t *changes = item; 138 switch (changes->nelts) 139 { 140 case 0: description = " no change"; 141 break; 142 case 1: description = " 1 change"; 143 break; 144 default: description = apr_psprintf(scratch_pool, " %d changes", 145 changes->nelts); 146 } 147 } 148 149 /* reverse index lookup: get item description in ENTRY */ 150 SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, revision, offset, 151 scratch_pool)); 152 if (entry) 153 { 154 /* more details */ 155 end_offset = offset + entry->size; 156 type = types[entry->type]; 157 158 /* merge the sub-item number with the container type */ 159 if ( entry->type == SVN_FS_X__ITEM_TYPE_CHANGES_CONT 160 || entry->type == SVN_FS_X__ITEM_TYPE_NODEREVS_CONT 161 || entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT) 162 type = apr_psprintf(scratch_pool, "%s%-3d", type, sub_item); 163 } 164 165 /* line output */ 166 printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n", 167 pack, (long)(offset / ffd->block_size), 168 (long)(offset % ffd->block_size), 169 (long)(end_offset / ffd->block_size), 170 (long)(end_offset % ffd->block_size), 171 type, revision, id->number, description); 172 173#endif 174 175 return SVN_NO_ERROR; 176} 177 178/* Open the revision file for the item given by ID in filesystem FS and 179 store the newly opened file in FILE. Seek to the item's location before 180 returning. 181 182 Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */ 183static svn_error_t * 184open_and_seek_revision(svn_fs_x__revision_file_t **file, 185 svn_fs_t *fs, 186 const svn_fs_x__id_t *id, 187 apr_pool_t *result_pool, 188 apr_pool_t *scratch_pool) 189{ 190 svn_fs_x__revision_file_t *rev_file; 191 apr_off_t offset = -1; 192 apr_uint32_t sub_item = 0; 193 svn_revnum_t rev = svn_fs_x__get_revnum(id->change_set); 194 195 SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool)); 196 197 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, rev, result_pool)); 198 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, id, 199 scratch_pool)); 200 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, offset)); 201 202 *file = rev_file; 203 204 return SVN_NO_ERROR; 205} 206 207/* Open the representation REP for a node-revision in filesystem FS, seek 208 to its position and store the newly opened file in FILE. 209 210 Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */ 211static svn_error_t * 212open_and_seek_transaction(svn_fs_x__revision_file_t **file, 213 svn_fs_t *fs, 214 svn_fs_x__representation_t *rep, 215 apr_pool_t *result_pool, 216 apr_pool_t *scratch_pool) 217{ 218 apr_off_t offset; 219 apr_uint32_t sub_item = 0; 220 apr_int64_t txn_id = svn_fs_x__get_txn_id(rep->id.change_set); 221 222 SVN_ERR(svn_fs_x__rev_file_open_proto_rev(file, fs, txn_id, result_pool, 223 scratch_pool)); 224 225 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, *file, &rep->id, 226 scratch_pool)); 227 SVN_ERR(svn_fs_x__rev_file_seek(*file, NULL, offset)); 228 229 return SVN_NO_ERROR; 230} 231 232/* Given a node-id ID, and a representation REP in filesystem FS, open 233 the correct file and seek to the correction location. Store this 234 file in *FILE_P. 235 236 Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */ 237static svn_error_t * 238open_and_seek_representation(svn_fs_x__revision_file_t **file_p, 239 svn_fs_t *fs, 240 svn_fs_x__representation_t *rep, 241 apr_pool_t *result_pool, 242 apr_pool_t *scratch_pool) 243{ 244 if (svn_fs_x__is_revision(rep->id.change_set)) 245 return open_and_seek_revision(file_p, fs, &rep->id, result_pool, 246 scratch_pool); 247 else 248 return open_and_seek_transaction(file_p, fs, rep, result_pool, 249 scratch_pool); 250} 251 252 253 254static svn_error_t * 255err_dangling_id(svn_fs_t *fs, 256 const svn_fs_x__id_t *id) 257{ 258 svn_string_t *id_str = svn_fs_x__id_unparse(id, fs->pool); 259 return svn_error_createf 260 (SVN_ERR_FS_ID_NOT_FOUND, 0, 261 _("Reference to non-existent node '%s' in filesystem '%s'"), 262 id_str->data, fs->path); 263} 264 265/* Get the node-revision for the node ID in FS. 266 Set *NODEREV_P to the new node-revision structure, allocated in POOL. 267 See svn_fs_x__get_node_revision, which wraps this and adds another 268 error. */ 269static svn_error_t * 270get_node_revision_body(svn_fs_x__noderev_t **noderev_p, 271 svn_fs_t *fs, 272 const svn_fs_x__id_t *id, 273 apr_pool_t *result_pool, 274 apr_pool_t *scratch_pool) 275{ 276 svn_error_t *err; 277 svn_boolean_t is_cached = FALSE; 278 svn_fs_x__data_t *ffd = fs->fsap_data; 279 280 if (svn_fs_x__is_txn(id->change_set)) 281 { 282 apr_file_t *file; 283 svn_stream_t *stream; 284 285 /* This is a transaction node-rev. Its storage logic is very 286 different from that of rev / pack files. */ 287 err = svn_io_file_open(&file, 288 svn_fs_x__path_txn_node_rev(fs, id, 289 scratch_pool, 290 scratch_pool), 291 APR_READ | APR_BUFFERED, APR_OS_DEFAULT, 292 scratch_pool); 293 if (err && APR_STATUS_IS_ENOENT(err->apr_err)) 294 { 295 svn_error_clear(err); 296 return svn_error_trace(err_dangling_id(fs, id)); 297 } 298 else if (err) 299 { 300 return svn_error_trace(err); 301 } 302 303 /* Be sure to close the file ASAP. */ 304 stream = svn_stream_from_aprfile2(file, FALSE, scratch_pool); 305 SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream, 306 result_pool, scratch_pool)); 307 } 308 else 309 { 310 svn_fs_x__revision_file_t *revision_file; 311 312 /* noderevs in rev / pack files can be cached */ 313 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set); 314 svn_fs_x__pair_cache_key_t key; 315 316 SVN_ERR(svn_fs_x__rev_file_init(&revision_file, fs, revision, 317 scratch_pool)); 318 319 /* First, try a noderevs container cache lookup. */ 320 if ( svn_fs_x__is_packed_rev(fs, revision) 321 && ffd->noderevs_container_cache) 322 { 323 apr_off_t offset; 324 apr_uint32_t sub_item; 325 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, revision_file, 326 id, scratch_pool)); 327 key.revision = svn_fs_x__packed_base_rev(fs, revision); 328 key.second = offset; 329 330 SVN_ERR(svn_cache__get_partial((void **)noderev_p, &is_cached, 331 ffd->noderevs_container_cache, &key, 332 svn_fs_x__noderevs_get_func, 333 &sub_item, result_pool)); 334 if (is_cached) 335 return SVN_NO_ERROR; 336 } 337 338 key.revision = revision; 339 key.second = id->number; 340 341 /* Not found or not applicable. Try a noderev cache lookup. 342 * If that succeeds, we are done here. */ 343 SVN_ERR(svn_cache__get((void **) noderev_p, 344 &is_cached, 345 ffd->node_revision_cache, 346 &key, 347 result_pool)); 348 if (is_cached) 349 return SVN_NO_ERROR; 350 351 /* block-read will parse the whole block and will also return 352 the one noderev that we need right now. */ 353 SVN_ERR(block_read((void **)noderev_p, fs, 354 id, 355 revision_file, 356 NULL, 357 result_pool, 358 scratch_pool)); 359 SVN_ERR(svn_fs_x__close_revision_file(revision_file)); 360 } 361 362 return SVN_NO_ERROR; 363} 364 365svn_error_t * 366svn_fs_x__get_node_revision(svn_fs_x__noderev_t **noderev_p, 367 svn_fs_t *fs, 368 const svn_fs_x__id_t *id, 369 apr_pool_t *result_pool, 370 apr_pool_t *scratch_pool) 371{ 372 svn_error_t *err = get_node_revision_body(noderev_p, fs, id, 373 result_pool, scratch_pool); 374 if (err && err->apr_err == SVN_ERR_FS_CORRUPT) 375 { 376 svn_string_t *id_string = svn_fs_x__id_unparse(id, scratch_pool); 377 return svn_error_createf(SVN_ERR_FS_CORRUPT, err, 378 "Corrupt node-revision '%s'", 379 id_string->data); 380 } 381 382 SVN_ERR(dbg__log_access(fs, id, *noderev_p, 383 SVN_FS_X__ITEM_TYPE_NODEREV, scratch_pool)); 384 385 return svn_error_trace(err); 386} 387 388 389svn_error_t * 390svn_fs_x__get_mergeinfo_count(apr_int64_t *count, 391 svn_fs_t *fs, 392 const svn_fs_x__id_t *id, 393 apr_pool_t *scratch_pool) 394{ 395 svn_fs_x__noderev_t *noderev; 396 397 /* If we want a full acccess log, we need to provide full data and 398 cannot take shortcuts here. */ 399#if !defined(SVN_FS_X__LOG_ACCESS) 400 401 /* First, try a noderevs container cache lookup. */ 402 if (! svn_fs_x__is_txn(id->change_set)) 403 { 404 /* noderevs in rev / pack files can be cached */ 405 svn_fs_x__data_t *ffd = fs->fsap_data; 406 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set); 407 408 svn_fs_x__revision_file_t *rev_file; 409 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision, 410 scratch_pool)); 411 412 if ( svn_fs_x__is_packed_rev(fs, revision) 413 && ffd->noderevs_container_cache) 414 { 415 svn_fs_x__pair_cache_key_t key; 416 apr_off_t offset; 417 apr_uint32_t sub_item; 418 svn_boolean_t is_cached; 419 420 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, 421 id, scratch_pool)); 422 key.revision = svn_fs_x__packed_base_rev(fs, revision); 423 key.second = offset; 424 425 SVN_ERR(svn_cache__get_partial((void **)count, &is_cached, 426 ffd->noderevs_container_cache, &key, 427 svn_fs_x__mergeinfo_count_get_func, 428 &sub_item, scratch_pool)); 429 if (is_cached) 430 return SVN_NO_ERROR; 431 } 432 } 433#endif 434 435 /* fallback to the naive implementation handling all edge cases */ 436 SVN_ERR(svn_fs_x__get_node_revision(&noderev, fs, id, scratch_pool, 437 scratch_pool)); 438 *count = noderev->mergeinfo_count; 439 440 return SVN_NO_ERROR; 441} 442 443/* Describes a lazily opened rev / pack file. Instances will be shared 444 between multiple instances of rep_state_t. */ 445typedef struct shared_file_t 446{ 447 /* The opened file. NULL while file is not open, yet. */ 448 svn_fs_x__revision_file_t *rfile; 449 450 /* file system to open the file in */ 451 svn_fs_t *fs; 452 453 /* a revision contained in the FILE. Since this file may be shared, 454 that value may be different from REP_STATE_T->REVISION. */ 455 svn_revnum_t revision; 456 457 /* pool to use when creating the FILE. This guarantees that the file 458 remains open / valid beyond the respective local context that required 459 the file to be opened eventually. */ 460 apr_pool_t *pool; 461} shared_file_t; 462 463/* Represents where in the current svndiff data block each 464 representation is. */ 465typedef struct rep_state_t 466{ 467 /* shared lazy-open rev/pack file structure */ 468 shared_file_t *sfile; 469 /* The txdelta window cache to use or NULL. */ 470 svn_cache__t *window_cache; 471 /* Caches un-deltified windows. May be NULL. */ 472 svn_cache__t *combined_cache; 473 /* ID addressing the representation */ 474 svn_fs_x__id_t rep_id; 475 /* length of the header at the start of the rep. 476 0 iff this is rep is stored in a container 477 (i.e. does not have a header) */ 478 apr_size_t header_size; 479 apr_off_t start; /* The starting offset for the raw 480 svndiff data minus header. 481 -1 if the offset is yet unknown. */ 482 /* sub-item index in case the rep is containered */ 483 apr_uint32_t sub_item; 484 apr_off_t current;/* The current offset relative to START. */ 485 apr_off_t size; /* The on-disk size of the representation. */ 486 int ver; /* If a delta, what svndiff version? 487 -1 for unknown delta version. */ 488 int chunk_index; /* number of the window to read */ 489} rep_state_t; 490 491/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */ 492static svn_error_t* 493auto_open_shared_file(shared_file_t *file) 494{ 495 if (file->rfile == NULL) 496 SVN_ERR(svn_fs_x__rev_file_init(&file->rfile, file->fs, 497 file->revision, file->pool)); 498 499 return SVN_NO_ERROR; 500} 501 502/* Set RS->START to the begin of the representation raw in RS->SFILE->RFILE, 503 if that hasn't been done yet. Use SCRATCH_POOL for temporary allocations. 504 */ 505static svn_error_t* 506auto_set_start_offset(rep_state_t *rs, 507 apr_pool_t *scratch_pool) 508{ 509 if (rs->start == -1) 510 { 511 SVN_ERR(svn_fs_x__item_offset(&rs->start, &rs->sub_item, 512 rs->sfile->fs, rs->sfile->rfile, 513 &rs->rep_id, scratch_pool)); 514 rs->start += rs->header_size; 515 } 516 517 return SVN_NO_ERROR; 518} 519 520/* Set RS->VER depending on what is found in the already open RS->FILE->FILE 521 if the diff version is still unknown. Use SCRATCH_POOL for temporary 522 allocations. 523 */ 524static svn_error_t* 525auto_read_diff_version(rep_state_t *rs, 526 apr_pool_t *scratch_pool) 527{ 528 if (rs->ver == -1) 529 { 530 char buf[4]; 531 SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, rs->start)); 532 SVN_ERR(svn_fs_x__rev_file_read(rs->sfile->rfile, buf, sizeof(buf))); 533 534 /* ### Layering violation */ 535 if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N'))) 536 return svn_error_create 537 (SVN_ERR_FS_CORRUPT, NULL, 538 _("Malformed svndiff data in representation")); 539 rs->ver = buf[3]; 540 541 rs->chunk_index = 0; 542 rs->current = 4; 543 } 544 545 return SVN_NO_ERROR; 546} 547 548/* See create_rep_state, which wraps this and adds another error. */ 549static svn_error_t * 550create_rep_state_body(rep_state_t **rep_state, 551 svn_fs_x__rep_header_t **rep_header, 552 shared_file_t **shared_file, 553 svn_fs_x__representation_t *rep, 554 svn_fs_t *fs, 555 apr_pool_t *result_pool, 556 apr_pool_t *scratch_pool) 557{ 558 svn_fs_x__data_t *ffd = fs->fsap_data; 559 rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs)); 560 svn_fs_x__rep_header_t *rh; 561 svn_boolean_t is_cached = FALSE; 562 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set); 563 apr_uint64_t estimated_window_storage; 564 565 /* If the hint is 566 * - given, 567 * - refers to a valid revision, 568 * - refers to a packed revision, 569 * - as does the rep we want to read, and 570 * - refers to the same pack file as the rep 571 * we can re-use the same, already open file object 572 */ 573 svn_boolean_t reuse_shared_file 574 = shared_file && *shared_file && (*shared_file)->rfile 575 && SVN_IS_VALID_REVNUM((*shared_file)->revision) 576 && (*shared_file)->revision < ffd->min_unpacked_rev 577 && revision < ffd->min_unpacked_rev 578 && ( ((*shared_file)->revision / ffd->max_files_per_dir) 579 == (revision / ffd->max_files_per_dir)); 580 581 svn_fs_x__representation_cache_key_t key = { 0 }; 582 key.revision = revision; 583 key.is_packed = revision < ffd->min_unpacked_rev; 584 key.item_index = rep->id.number; 585 586 /* continue constructing RS and RA */ 587 rs->size = rep->size; 588 rs->rep_id = rep->id; 589 rs->ver = -1; 590 rs->start = -1; 591 592 /* Very long files stored as self-delta will produce a huge number of 593 delta windows. Don't cache them lest we don't thrash the cache. 594 Since we don't know the depth of the delta chain, let's assume, the 595 whole contents get rewritten 3 times. 596 */ 597 estimated_window_storage 598 = 4 * ( (rep->expanded_size ? rep->expanded_size : rep->size) 599 + SVN_DELTA_WINDOW_SIZE); 600 estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX); 601 602 rs->window_cache = ffd->txdelta_window_cache 603 && svn_cache__is_cachable(ffd->txdelta_window_cache, 604 (apr_size_t)estimated_window_storage) 605 ? ffd->txdelta_window_cache 606 : NULL; 607 rs->combined_cache = ffd->combined_window_cache 608 && svn_cache__is_cachable(ffd->combined_window_cache, 609 (apr_size_t)estimated_window_storage) 610 ? ffd->combined_window_cache 611 : NULL; 612 613 /* cache lookup, i.e. skip reading the rep header if possible */ 614 if (SVN_IS_VALID_REVNUM(revision)) 615 SVN_ERR(svn_cache__get((void **) &rh, &is_cached, 616 ffd->rep_header_cache, &key, result_pool)); 617 618 /* initialize the (shared) FILE member in RS */ 619 if (reuse_shared_file) 620 { 621 rs->sfile = *shared_file; 622 } 623 else 624 { 625 shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file)); 626 file->revision = revision; 627 file->pool = result_pool; 628 file->fs = fs; 629 rs->sfile = file; 630 631 /* remember the current file, if suggested by the caller */ 632 if (shared_file) 633 *shared_file = file; 634 } 635 636 /* read rep header, if necessary */ 637 if (!is_cached) 638 { 639 svn_stream_t *stream; 640 641 /* we will need the on-disk location for non-txn reps */ 642 apr_off_t offset; 643 svn_boolean_t in_container = TRUE; 644 645 /* ensure file is open and navigate to the start of rep header */ 646 if (reuse_shared_file) 647 { 648 /* ... we can re-use the same, already open file object. 649 * This implies that we don't read from a txn. 650 */ 651 rs->sfile = *shared_file; 652 SVN_ERR(auto_open_shared_file(rs->sfile)); 653 } 654 else 655 { 656 /* otherwise, create a new file object. May or may not be 657 * an in-txn file. 658 */ 659 SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep, 660 result_pool, scratch_pool)); 661 } 662 663 if (SVN_IS_VALID_REVNUM(revision)) 664 { 665 apr_uint32_t sub_item; 666 667 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, 668 rs->sfile->rfile, &rep->id, 669 scratch_pool)); 670 671 /* is rep stored in some star-deltified container? */ 672 if (sub_item == 0) 673 { 674 svn_fs_x__p2l_entry_t *entry; 675 SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rs->sfile->rfile, 676 revision, offset, 677 scratch_pool, scratch_pool)); 678 in_container = entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT; 679 } 680 681 if (in_container) 682 { 683 /* construct a container rep header */ 684 *rep_header = apr_pcalloc(result_pool, sizeof(**rep_header)); 685 (*rep_header)->type = svn_fs_x__rep_container; 686 687 /* exit to caller */ 688 *rep_state = rs; 689 return SVN_NO_ERROR; 690 } 691 692 SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset)); 693 } 694 695 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile)); 696 SVN_ERR(svn_fs_x__read_rep_header(&rh, stream, 697 result_pool, scratch_pool)); 698 SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile)); 699 700 /* populate the cache if appropriate */ 701 if (SVN_IS_VALID_REVNUM(revision)) 702 { 703 SVN_ERR(block_read(NULL, fs, &rs->rep_id, rs->sfile->rfile, NULL, 704 result_pool, scratch_pool)); 705 SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh, 706 scratch_pool)); 707 } 708 } 709 710 /* finalize */ 711 SVN_ERR(dbg__log_access(fs, &rs->rep_id, rh, SVN_FS_X__ITEM_TYPE_ANY_REP, 712 scratch_pool)); 713 714 rs->header_size = rh->header_size; 715 *rep_state = rs; 716 *rep_header = rh; 717 718 rs->chunk_index = 0; 719 720 /* skip "SVNx" diff marker */ 721 rs->current = 4; 722 723 return SVN_NO_ERROR; 724} 725 726/* Read the rep args for REP in filesystem FS and create a rep_state 727 for reading the representation. Return the rep_state in *REP_STATE 728 and the rep args in *REP_ARGS, both allocated in POOL. 729 730 When reading multiple reps, i.e. a skip delta chain, you may provide 731 non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first 732 call it should be a pointer to NULL.) The function will use this 733 variable to store the previous call results and tries to re-use it. 734 This may result in significant savings in I/O for packed files and 735 number of open file handles. 736 */ 737static svn_error_t * 738create_rep_state(rep_state_t **rep_state, 739 svn_fs_x__rep_header_t **rep_header, 740 shared_file_t **shared_file, 741 svn_fs_x__representation_t *rep, 742 svn_fs_t *fs, 743 apr_pool_t *result_pool, 744 apr_pool_t *scratch_pool) 745{ 746 svn_error_t *err = create_rep_state_body(rep_state, rep_header, 747 shared_file, rep, fs, 748 result_pool, scratch_pool); 749 if (err && err->apr_err == SVN_ERR_FS_CORRUPT) 750 { 751 /* ### This always returns "-1" for transaction reps, because 752 ### this particular bit of code doesn't know if the rep is 753 ### stored in the protorev or in the mutable area (for props 754 ### or dir contents). It is pretty rare for FSX to *read* 755 ### from the protorev file, though, so this is probably OK. 756 ### And anyone going to debug corruption errors is probably 757 ### going to jump straight to this comment anyway! */ 758 return svn_error_createf(SVN_ERR_FS_CORRUPT, err, 759 "Corrupt representation '%s'", 760 rep 761 ? svn_fs_x__unparse_representation 762 (rep, TRUE, scratch_pool, 763 scratch_pool)->data 764 : "(null)"); 765 } 766 /* ### Call representation_string() ? */ 767 return svn_error_trace(err); 768} 769 770svn_error_t * 771svn_fs_x__check_rep(svn_fs_x__representation_t *rep, 772 svn_fs_t *fs, 773 apr_pool_t *scratch_pool) 774{ 775 apr_off_t offset; 776 apr_uint32_t sub_item; 777 svn_fs_x__p2l_entry_t *entry; 778 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set); 779 780 svn_fs_x__revision_file_t *rev_file; 781 SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision, scratch_pool)); 782 783 /* Does REP->ID refer to an actual item? Which one is it? */ 784 SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, &rep->id, 785 scratch_pool)); 786 787 /* What is the type of that item? */ 788 SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rev_file, revision, offset, 789 scratch_pool, scratch_pool)); 790 791 /* Verify that we've got an item that is actually a representation. */ 792 if ( entry == NULL 793 || ( entry->type != SVN_FS_X__ITEM_TYPE_FILE_REP 794 && entry->type != SVN_FS_X__ITEM_TYPE_DIR_REP 795 && entry->type != SVN_FS_X__ITEM_TYPE_FILE_PROPS 796 && entry->type != SVN_FS_X__ITEM_TYPE_DIR_PROPS 797 && entry->type != SVN_FS_X__ITEM_TYPE_REPS_CONT)) 798 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 799 _("No representation found at offset %s " 800 "for item %s in revision %ld"), 801 apr_off_t_toa(scratch_pool, offset), 802 apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT, 803 rep->id.number), 804 revision); 805 806 return SVN_NO_ERROR; 807} 808 809/* . 810 Do any allocations in POOL. */ 811svn_error_t * 812svn_fs_x__rep_chain_length(int *chain_length, 813 int *shard_count, 814 svn_fs_x__representation_t *rep, 815 svn_fs_t *fs, 816 apr_pool_t *scratch_pool) 817{ 818 svn_fs_x__data_t *ffd = fs->fsap_data; 819 svn_revnum_t shard_size = ffd->max_files_per_dir; 820 svn_boolean_t is_delta = FALSE; 821 int count = 0; 822 int shards = 1; 823 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set); 824 svn_revnum_t last_shard = revision / shard_size; 825 826 /* Note that this iteration pool will be used in a non-standard way. 827 * To reuse open file handles between iterations (e.g. while within the 828 * same pack file), we only clear this pool once in a while instead of 829 * at the start of each iteration. */ 830 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 831 832 /* Check whether the length of the deltification chain is acceptable. 833 * Otherwise, shared reps may form a non-skipping delta chain in 834 * extreme cases. */ 835 svn_fs_x__representation_t base_rep = *rep; 836 837 /* re-use open files between iterations */ 838 shared_file_t *file_hint = NULL; 839 840 svn_fs_x__rep_header_t *header; 841 842 /* follow the delta chain towards the end but for at most 843 * MAX_CHAIN_LENGTH steps. */ 844 do 845 { 846 rep_state_t *rep_state; 847 revision = svn_fs_x__get_revnum(base_rep.id.change_set); 848 if (revision / shard_size != last_shard) 849 { 850 last_shard = revision / shard_size; 851 ++shards; 852 } 853 854 SVN_ERR(create_rep_state_body(&rep_state, 855 &header, 856 &file_hint, 857 &base_rep, 858 fs, 859 iterpool, 860 iterpool)); 861 862 base_rep.id.change_set 863 = svn_fs_x__change_set_by_rev(header->base_revision); 864 base_rep.id.number = header->base_item_index; 865 base_rep.size = header->base_length; 866 is_delta = header->type == svn_fs_x__rep_delta; 867 868 /* Clear it the ITERPOOL once in a while. Doing it too frequently 869 * renders the FILE_HINT ineffective. Doing too infrequently, may 870 * leave us with too many open file handles. 871 * 872 * Note that this is mostly about efficiency, with larger values 873 * being more efficient, and any non-zero value is legal here. When 874 * reading deltified contents, we may keep 10s of rev files open at 875 * the same time and the system has to cope with that. Thus, the 876 * limit of 16 chosen below is in the same ballpark. 877 */ 878 ++count; 879 if (count % 16 == 0) 880 { 881 file_hint = NULL; 882 svn_pool_clear(iterpool); 883 } 884 } 885 while (is_delta && base_rep.id.change_set); 886 887 *chain_length = count; 888 *shard_count = shards; 889 svn_pool_destroy(iterpool); 890 891 return SVN_NO_ERROR; 892} 893 894 895typedef struct rep_read_baton_t 896{ 897 /* The FS from which we're reading. */ 898 svn_fs_t *fs; 899 900 /* Representation to read. */ 901 svn_fs_x__representation_t rep; 902 903 /* If not NULL, this is the base for the first delta window in rs_list */ 904 svn_stringbuf_t *base_window; 905 906 /* The state of all prior delta representations. */ 907 apr_array_header_t *rs_list; 908 909 /* The plaintext state, if there is a plaintext. */ 910 rep_state_t *src_state; 911 912 /* The index of the current delta chunk, if we are reading a delta. */ 913 int chunk_index; 914 915 /* The buffer where we store undeltified data. */ 916 char *buf; 917 apr_size_t buf_pos; 918 apr_size_t buf_len; 919 920 /* A checksum context for summing the data read in order to verify it. 921 Note: we don't need to use the sha1 checksum because we're only doing 922 data verification, for which md5 is perfectly safe. */ 923 svn_checksum_ctx_t *md5_checksum_ctx; 924 925 svn_boolean_t checksum_finalized; 926 927 /* The stored checksum of the representation we are reading, its 928 length, and the amount we've read so far. Some of this 929 information is redundant with rs_list and src_state, but it's 930 convenient for the checksumming code to have it here. */ 931 unsigned char md5_digest[APR_MD5_DIGESTSIZE]; 932 933 svn_filesize_t len; 934 svn_filesize_t off; 935 936 /* The key for the fulltext cache for this rep, if there is a 937 fulltext cache. */ 938 svn_fs_x__pair_cache_key_t fulltext_cache_key; 939 /* The text we've been reading, if we're going to cache it. */ 940 svn_stringbuf_t *current_fulltext; 941 942 /* If not NULL, attempt to read the data from this cache. 943 Once that lookup fails, reset it to NULL. */ 944 svn_cache__t *fulltext_cache; 945 946 /* Bytes delivered from the FULLTEXT_CACHE so far. If the next 947 lookup fails, we need to skip that much data from the reconstructed 948 window stream before we continue normal operation. */ 949 svn_filesize_t fulltext_delivered; 950 951 /* Used for temporary allocations during the read. */ 952 apr_pool_t *scratch_pool; 953 954 /* Pool used to store file handles and other data that is persistant 955 for the entire stream read. */ 956 apr_pool_t *filehandle_pool; 957} rep_read_baton_t; 958 959/* Set window key in *KEY to address the window described by RS. 960 For convenience, return the KEY. */ 961static svn_fs_x__window_cache_key_t * 962get_window_key(svn_fs_x__window_cache_key_t *key, 963 rep_state_t *rs) 964{ 965 svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set); 966 assert(revision <= APR_UINT32_MAX); 967 968 key->revision = (apr_uint32_t)revision; 969 key->item_index = rs->rep_id.number; 970 key->chunk_index = rs->chunk_index; 971 972 return key; 973} 974 975/* Read the WINDOW_P number CHUNK_INDEX for the representation given in 976 * rep state RS from the current FSX session's cache. This will be a 977 * no-op and IS_CACHED will be set to FALSE if no cache has been given. 978 * If a cache is available IS_CACHED will inform the caller about the 979 * success of the lookup. Allocations (of the window in particualar) will 980 * be made from POOL. 981 * 982 * If the information could be found, put RS to CHUNK_INDEX. 983 */ 984 985/* Return data type for get_cached_window_sizes_func. 986 */ 987typedef struct window_sizes_t 988{ 989 /* length of the txdelta window in its on-disk format */ 990 svn_filesize_t packed_len; 991 992 /* expanded (and combined) window length */ 993 svn_filesize_t target_len; 994} window_sizes_t; 995 996/* Implements svn_cache__partial_getter_func_t extracting the packed 997 * and expanded window sizes from a cached window and return the size 998 * info as a window_sizes_t* in *OUT. 999 */ 1000static svn_error_t * 1001get_cached_window_sizes_func(void **out, 1002 const void *data, 1003 apr_size_t data_len, 1004 void *baton, 1005 apr_pool_t *pool) 1006{ 1007 const svn_fs_x__txdelta_cached_window_t *window = data; 1008 const svn_txdelta_window_t *txdelta_window 1009 = svn_temp_deserializer__ptr(window, (const void **)&window->window); 1010 1011 window_sizes_t *result = apr_palloc(pool, sizeof(*result)); 1012 result->packed_len = window->end_offset - window->start_offset; 1013 result->target_len = txdelta_window->tview_len; 1014 1015 *out = result; 1016 1017 return SVN_NO_ERROR; 1018} 1019 1020/* Read the WINDOW_P number CHUNK_INDEX for the representation given in 1021 * rep state RS from the current FSFS session's cache. This will be a 1022 * no-op and IS_CACHED will be set to FALSE if no cache has been given. 1023 * If a cache is available IS_CACHED will inform the caller about the 1024 * success of the lookup. Allocations of the window in will be made 1025 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations. 1026 * 1027 * If the information could be found, put RS to CHUNK_INDEX. 1028 */ 1029static svn_error_t * 1030get_cached_window_sizes(window_sizes_t **sizes, 1031 rep_state_t *rs, 1032 svn_boolean_t *is_cached, 1033 apr_pool_t *pool) 1034{ 1035 svn_fs_x__window_cache_key_t key = { 0 }; 1036 SVN_ERR(svn_cache__get_partial((void **)sizes, 1037 is_cached, 1038 rs->window_cache, 1039 get_window_key(&key, rs), 1040 get_cached_window_sizes_func, 1041 NULL, 1042 pool)); 1043 1044 return SVN_NO_ERROR; 1045} 1046 1047static svn_error_t * 1048get_cached_window(svn_txdelta_window_t **window_p, 1049 rep_state_t *rs, 1050 int chunk_index, 1051 svn_boolean_t *is_cached, 1052 apr_pool_t *result_pool, 1053 apr_pool_t *scratch_pool) 1054{ 1055 /* ask the cache for the desired txdelta window */ 1056 svn_fs_x__txdelta_cached_window_t *cached_window; 1057 svn_fs_x__window_cache_key_t key = { 0 }; 1058 get_window_key(&key, rs); 1059 key.chunk_index = chunk_index; 1060 SVN_ERR(svn_cache__get((void **) &cached_window, 1061 is_cached, 1062 rs->window_cache, 1063 &key, 1064 result_pool)); 1065 1066 if (*is_cached) 1067 { 1068 /* found it. Pass it back to the caller. */ 1069 *window_p = cached_window->window; 1070 1071 /* manipulate the RS as if we just read the data */ 1072 rs->current = cached_window->end_offset; 1073 rs->chunk_index = chunk_index; 1074 } 1075 1076 return SVN_NO_ERROR; 1077} 1078 1079/* Store the WINDOW read for the rep state RS with the given START_OFFSET 1080 * within the pack / rev file in the current FSX session's cache. This 1081 * will be a no-op if no cache has been given. 1082 * Temporary allocations will be made from SCRATCH_POOL. */ 1083static svn_error_t * 1084set_cached_window(svn_txdelta_window_t *window, 1085 rep_state_t *rs, 1086 apr_off_t start_offset, 1087 apr_pool_t *scratch_pool) 1088{ 1089 /* store the window and the first offset _past_ it */ 1090 svn_fs_x__txdelta_cached_window_t cached_window; 1091 svn_fs_x__window_cache_key_t key = {0}; 1092 1093 cached_window.window = window; 1094 cached_window.start_offset = start_offset - rs->start; 1095 cached_window.end_offset = rs->current; 1096 1097 /* but key it with the start offset because that is the known state 1098 * when we will look it up */ 1099 SVN_ERR(svn_cache__set(rs->window_cache, 1100 get_window_key(&key, rs), 1101 &cached_window, 1102 scratch_pool)); 1103 1104 return SVN_NO_ERROR; 1105} 1106 1107/* Read the WINDOW_P for the rep state RS from the current FSX session's 1108 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no 1109 * cache has been given. If a cache is available IS_CACHED will inform 1110 * the caller about the success of the lookup. Allocations (of the window 1111 * in particular) will be made from POOL. 1112 */ 1113static svn_error_t * 1114get_cached_combined_window(svn_stringbuf_t **window_p, 1115 rep_state_t *rs, 1116 svn_boolean_t *is_cached, 1117 apr_pool_t *pool) 1118{ 1119 /* ask the cache for the desired txdelta window */ 1120 svn_fs_x__window_cache_key_t key = { 0 }; 1121 return svn_cache__get((void **)window_p, 1122 is_cached, 1123 rs->combined_cache, 1124 get_window_key(&key, rs), 1125 pool); 1126} 1127 1128/* Store the WINDOW read for the rep state RS in the current FSX session's 1129 * cache. This will be a no-op if no cache has been given. 1130 * Temporary allocations will be made from SCRATCH_POOL. */ 1131static svn_error_t * 1132set_cached_combined_window(svn_stringbuf_t *window, 1133 rep_state_t *rs, 1134 apr_pool_t *scratch_pool) 1135{ 1136 /* but key it with the start offset because that is the known state 1137 * when we will look it up */ 1138 svn_fs_x__window_cache_key_t key = { 0 }; 1139 return svn_cache__set(rs->combined_cache, 1140 get_window_key(&key, rs), 1141 window, 1142 scratch_pool); 1143} 1144 1145/* Build an array of rep_state structures in *LIST giving the delta 1146 reps from first_rep to a self-compressed rep. Set *SRC_STATE to 1147 the container rep we find at the end of the chain, or to NULL if 1148 the final delta representation is self-compressed. 1149 The representation to start from is designated by filesystem FS, id 1150 ID, and representation REP. 1151 Also, set *WINDOW_P to the base window content for *LIST, if it 1152 could be found in cache. Otherwise, *LIST will contain the base 1153 representation for the whole delta chain. 1154 */ 1155static svn_error_t * 1156build_rep_list(apr_array_header_t **list, 1157 svn_stringbuf_t **window_p, 1158 rep_state_t **src_state, 1159 svn_fs_t *fs, 1160 svn_fs_x__representation_t *first_rep, 1161 apr_pool_t *result_pool, 1162 apr_pool_t *scratch_pool) 1163{ 1164 svn_fs_x__representation_t rep; 1165 rep_state_t *rs = NULL; 1166 svn_fs_x__rep_header_t *rep_header; 1167 svn_boolean_t is_cached = FALSE; 1168 shared_file_t *shared_file = NULL; 1169 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1170 1171 *list = apr_array_make(result_pool, 1, sizeof(rep_state_t *)); 1172 rep = *first_rep; 1173 1174 /* for the top-level rep, we need the rep_args */ 1175 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, 1176 result_pool, iterpool)); 1177 1178 while (1) 1179 { 1180 svn_pool_clear(iterpool); 1181 1182 /* fetch state, if that has not been done already */ 1183 if (!rs) 1184 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, 1185 &rep, fs, result_pool, iterpool)); 1186 1187 /* for txn reps and containered reps, there won't be a cached 1188 * combined window */ 1189 if (svn_fs_x__is_revision(rep.id.change_set) 1190 && rep_header->type != svn_fs_x__rep_container 1191 && rs->combined_cache) 1192 SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, 1193 result_pool)); 1194 1195 if (is_cached) 1196 { 1197 /* We already have a reconstructed window in our cache. 1198 Write a pseudo rep_state with the full length. */ 1199 rs->start = 0; 1200 rs->current = 0; 1201 rs->size = (*window_p)->len; 1202 *src_state = rs; 1203 break; 1204 } 1205 1206 if (rep_header->type == svn_fs_x__rep_container) 1207 { 1208 /* This is a container item, so just return the current rep_state. */ 1209 *src_state = rs; 1210 break; 1211 } 1212 1213 /* Push this rep onto the list. If it's self-compressed, we're done. */ 1214 APR_ARRAY_PUSH(*list, rep_state_t *) = rs; 1215 if (rep_header->type == svn_fs_x__rep_self_delta) 1216 { 1217 *src_state = NULL; 1218 break; 1219 } 1220 1221 rep.id.change_set 1222 = svn_fs_x__change_set_by_rev(rep_header->base_revision); 1223 rep.id.number = rep_header->base_item_index; 1224 rep.size = rep_header->base_length; 1225 1226 rs = NULL; 1227 } 1228 svn_pool_destroy(iterpool); 1229 1230 return SVN_NO_ERROR; 1231} 1232 1233 1234/* Create a rep_read_baton structure for node revision NODEREV in 1235 filesystem FS and store it in *RB_P. If FULLTEXT_CACHE_KEY is not 1236 NULL, it is the rep's key in the fulltext cache, and a stringbuf 1237 must be allocated to store the text. If rep is mutable, it must be 1238 refer to file contents. 1239 1240 Allocate the result in RESULT_POOL. This includes the pools within *RB_P. 1241 */ 1242static svn_error_t * 1243rep_read_get_baton(rep_read_baton_t **rb_p, 1244 svn_fs_t *fs, 1245 svn_fs_x__representation_t *rep, 1246 svn_fs_x__pair_cache_key_t fulltext_cache_key, 1247 apr_pool_t *result_pool) 1248{ 1249 rep_read_baton_t *b; 1250 1251 b = apr_pcalloc(result_pool, sizeof(*b)); 1252 b->fs = fs; 1253 b->rep = *rep; 1254 b->base_window = NULL; 1255 b->chunk_index = 0; 1256 b->buf = NULL; 1257 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, 1258 result_pool); 1259 b->checksum_finalized = FALSE; 1260 memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest)); 1261 b->len = rep->expanded_size; 1262 b->off = 0; 1263 b->fulltext_cache_key = fulltext_cache_key; 1264 1265 /* Clearable sub-pools. Since they have to remain valid for as long as B 1266 lives, we can't take them from some scratch pool. The caller of this 1267 function will have no control over how those subpools will be used. */ 1268 b->scratch_pool = svn_pool_create(result_pool); 1269 b->filehandle_pool = svn_pool_create(result_pool); 1270 b->fulltext_cache = NULL; 1271 b->fulltext_delivered = 0; 1272 b->current_fulltext = NULL; 1273 1274 /* Save our output baton. */ 1275 *rb_p = b; 1276 1277 return SVN_NO_ERROR; 1278} 1279 1280/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta 1281 window into *NWIN. */ 1282static svn_error_t * 1283read_delta_window(svn_txdelta_window_t **nwin, int this_chunk, 1284 rep_state_t *rs, apr_pool_t *result_pool, 1285 apr_pool_t *scratch_pool) 1286{ 1287 svn_boolean_t is_cached; 1288 apr_off_t start_offset; 1289 apr_off_t end_offset; 1290 apr_pool_t *iterpool; 1291 svn_stream_t *stream; 1292 svn_fs_x__revision_file_t *file; 1293 svn_boolean_t cacheable = rs->chunk_index == 0 1294 && svn_fs_x__is_revision(rs->rep_id.change_set) 1295 && rs->window_cache; 1296 1297 SVN_ERR_ASSERT(rs->chunk_index <= this_chunk); 1298 1299 SVN_ERR(dbg__log_access(rs->sfile->fs, &rs->rep_id, NULL, 1300 SVN_FS_X__ITEM_TYPE_ANY_REP, scratch_pool)); 1301 1302 /* Read the next window. But first, try to find it in the cache. */ 1303 if (cacheable) 1304 { 1305 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached, 1306 result_pool, scratch_pool)); 1307 if (is_cached) 1308 return SVN_NO_ERROR; 1309 } 1310 1311 /* someone has to actually read the data from file. Open it */ 1312 SVN_ERR(auto_open_shared_file(rs->sfile)); 1313 file = rs->sfile->rfile; 1314 1315 /* invoke the 'block-read' feature for non-txn data. 1316 However, don't do that if we are in the middle of some representation, 1317 because the block is unlikely to contain other data. */ 1318 if (cacheable) 1319 { 1320 SVN_ERR(block_read(NULL, rs->sfile->fs, &rs->rep_id, file, NULL, 1321 result_pool, scratch_pool)); 1322 1323 /* reading the whole block probably also provided us with the 1324 desired txdelta window */ 1325 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached, 1326 result_pool, scratch_pool)); 1327 if (is_cached) 1328 return SVN_NO_ERROR; 1329 } 1330 1331 /* data is still not cached -> we need to read it. 1332 Make sure we have all the necessary info. */ 1333 SVN_ERR(auto_set_start_offset(rs, scratch_pool)); 1334 SVN_ERR(auto_read_diff_version(rs, scratch_pool)); 1335 1336 /* RS->FILE may be shared between RS instances -> make sure we point 1337 * to the right data. */ 1338 start_offset = rs->start + rs->current; 1339 SVN_ERR(svn_fs_x__rev_file_seek(file, NULL, start_offset)); 1340 1341 /* Skip windows to reach the current chunk if we aren't there yet. */ 1342 iterpool = svn_pool_create(scratch_pool); 1343 while (rs->chunk_index < this_chunk) 1344 { 1345 apr_file_t *apr_file; 1346 svn_pool_clear(iterpool); 1347 1348 SVN_ERR(svn_fs_x__rev_file_get(&apr_file, file)); 1349 SVN_ERR(svn_txdelta_skip_svndiff_window(apr_file, rs->ver, iterpool)); 1350 rs->chunk_index++; 1351 SVN_ERR(svn_io_file_get_offset(&start_offset, apr_file, iterpool)); 1352 1353 rs->current = start_offset - rs->start; 1354 if (rs->current >= rs->size) 1355 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 1356 _("Reading one svndiff window read " 1357 "beyond the end of the " 1358 "representation")); 1359 } 1360 svn_pool_destroy(iterpool); 1361 1362 /* Actually read the next window. */ 1363 SVN_ERR(svn_fs_x__rev_file_stream(&stream, file)); 1364 SVN_ERR(svn_txdelta_read_svndiff_window(nwin, stream, rs->ver, 1365 result_pool)); 1366 SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, file)); 1367 rs->current = end_offset - rs->start; 1368 if (rs->current > rs->size) 1369 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 1370 _("Reading one svndiff window read beyond " 1371 "the end of the representation")); 1372 1373 /* the window has not been cached before, thus cache it now 1374 * (if caching is used for them at all) */ 1375 if (cacheable) 1376 SVN_ERR(set_cached_window(*nwin, rs, start_offset, scratch_pool)); 1377 1378 return SVN_NO_ERROR; 1379} 1380 1381/* Read the whole representation RS and return it in *NWIN. */ 1382static svn_error_t * 1383read_container_window(svn_stringbuf_t **nwin, 1384 rep_state_t *rs, 1385 apr_size_t size, 1386 apr_pool_t *result_pool, 1387 apr_pool_t *scratch_pool) 1388{ 1389 svn_fs_x__rep_extractor_t *extractor = NULL; 1390 svn_fs_t *fs = rs->sfile->fs; 1391 svn_fs_x__data_t *ffd = fs->fsap_data; 1392 svn_fs_x__pair_cache_key_t key; 1393 svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set); 1394 svn_boolean_t is_cached = FALSE; 1395 svn_fs_x__reps_baton_t baton; 1396 1397 SVN_ERR(auto_set_start_offset(rs, scratch_pool)); 1398 key.revision = svn_fs_x__packed_base_rev(fs, revision); 1399 key.second = rs->start; 1400 1401 /* already in cache? */ 1402 baton.fs = fs; 1403 baton.idx = rs->sub_item; 1404 1405 SVN_ERR(svn_cache__get_partial((void**)&extractor, &is_cached, 1406 ffd->reps_container_cache, &key, 1407 svn_fs_x__reps_get_func, &baton, 1408 result_pool)); 1409 1410 /* read from disk, if necessary */ 1411 if (extractor == NULL) 1412 { 1413 SVN_ERR(auto_open_shared_file(rs->sfile)); 1414 SVN_ERR(block_read((void **)&extractor, fs, &rs->rep_id, 1415 rs->sfile->rfile, NULL, 1416 result_pool, scratch_pool)); 1417 } 1418 1419 SVN_ERR(svn_fs_x__extractor_drive(nwin, extractor, rs->current, size, 1420 result_pool, scratch_pool)); 1421 1422 /* Update RS. */ 1423 rs->current += (apr_off_t)size; 1424 1425 return SVN_NO_ERROR; 1426} 1427 1428/* Get the undeltified window that is a result of combining all deltas 1429 from the current desired representation identified in *RB with its 1430 base representation. Store the window in *RESULT. */ 1431static svn_error_t * 1432get_combined_window(svn_stringbuf_t **result, 1433 rep_read_baton_t *rb) 1434{ 1435 apr_pool_t *pool, *new_pool, *window_pool; 1436 int i; 1437 apr_array_header_t *windows; 1438 svn_stringbuf_t *source, *buf = rb->base_window; 1439 rep_state_t *rs; 1440 apr_pool_t *iterpool; 1441 1442 /* Read all windows that we need to combine. This is fine because 1443 the size of each window is relatively small (100kB) and skip- 1444 delta limits the number of deltas in a chain to well under 100. 1445 Stop early if one of them does not depend on its predecessors. */ 1446 window_pool = svn_pool_create(rb->scratch_pool); 1447 windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *)); 1448 iterpool = svn_pool_create(rb->scratch_pool); 1449 for (i = 0; i < rb->rs_list->nelts; ++i) 1450 { 1451 svn_txdelta_window_t *window; 1452 1453 svn_pool_clear(iterpool); 1454 1455 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *); 1456 SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool, 1457 iterpool)); 1458 1459 APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window; 1460 if (window->src_ops == 0) 1461 { 1462 ++i; 1463 break; 1464 } 1465 } 1466 1467 /* Combine in the windows from the other delta reps. */ 1468 pool = svn_pool_create(rb->scratch_pool); 1469 for (--i; i >= 0; --i) 1470 { 1471 svn_txdelta_window_t *window; 1472 1473 svn_pool_clear(iterpool); 1474 1475 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *); 1476 window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *); 1477 1478 /* Maybe, we've got a start representation in a container. If we do, 1479 read as much data from it as the needed for the txdelta window's 1480 source view. 1481 Note that BUF / SOURCE may only be NULL in the first iteration. */ 1482 source = buf; 1483 if (source == NULL && rb->src_state != NULL) 1484 SVN_ERR(read_container_window(&source, rb->src_state, 1485 window->sview_len, pool, iterpool)); 1486 1487 /* Combine this window with the current one. */ 1488 new_pool = svn_pool_create(rb->scratch_pool); 1489 buf = svn_stringbuf_create_ensure(window->tview_len, new_pool); 1490 buf->len = window->tview_len; 1491 1492 svn_txdelta_apply_instructions(window, source ? source->data : NULL, 1493 buf->data, &buf->len); 1494 if (buf->len != window->tview_len) 1495 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 1496 _("svndiff window length is " 1497 "corrupt")); 1498 1499 /* Cache windows only if the whole rep content could be read as a 1500 single chunk. Only then will no other chunk need a deeper RS 1501 list than the cached chunk. */ 1502 if ( (rb->chunk_index == 0) && (rs->current == rs->size) 1503 && svn_fs_x__is_revision(rs->rep_id.change_set) 1504 && rs->combined_cache) 1505 SVN_ERR(set_cached_combined_window(buf, rs, new_pool)); 1506 1507 rs->chunk_index++; 1508 1509 /* Cycle pools so that we only need to hold three windows at a time. */ 1510 svn_pool_destroy(pool); 1511 pool = new_pool; 1512 } 1513 svn_pool_destroy(iterpool); 1514 1515 svn_pool_destroy(window_pool); 1516 1517 *result = buf; 1518 return SVN_NO_ERROR; 1519} 1520 1521/* Returns whether or not the expanded fulltext of the file is cachable 1522 * based on its size SIZE. The decision depends on the cache used by FFD. 1523 */ 1524static svn_boolean_t 1525fulltext_size_is_cachable(svn_fs_x__data_t *ffd, 1526 svn_filesize_t size) 1527{ 1528 return (size < APR_SIZE_MAX) 1529 && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size); 1530} 1531 1532/* Close method used on streams returned by read_representation(). 1533 */ 1534static svn_error_t * 1535rep_read_contents_close(void *baton) 1536{ 1537 rep_read_baton_t *rb = baton; 1538 1539 svn_pool_destroy(rb->scratch_pool); 1540 svn_pool_destroy(rb->filehandle_pool); 1541 1542 return SVN_NO_ERROR; 1543} 1544 1545/* Inialize the representation read state RS for the given REP_HEADER and 1546 * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS. 1547 * Allocate all sub-structures of RS in RESULT_POOL. 1548 */ 1549static svn_error_t * 1550init_rep_state(rep_state_t *rs, 1551 svn_fs_x__rep_header_t *rep_header, 1552 svn_fs_t *fs, 1553 svn_fs_x__revision_file_t *rev_file, 1554 svn_fs_x__p2l_entry_t* entry, 1555 apr_pool_t *result_pool) 1556{ 1557 svn_fs_x__data_t *ffd = fs->fsap_data; 1558 shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file)); 1559 1560 /* this function does not apply to representation containers */ 1561 SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP 1562 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS); 1563 SVN_ERR_ASSERT(entry->item_count == 1); 1564 1565 shared_file->rfile = rev_file; 1566 shared_file->fs = fs; 1567 shared_file->revision = svn_fs_x__get_revnum(entry->items[0].change_set); 1568 shared_file->pool = result_pool; 1569 1570 rs->sfile = shared_file; 1571 rs->rep_id = entry->items[0]; 1572 rs->header_size = rep_header->header_size; 1573 rs->start = entry->offset + rs->header_size; 1574 rs->current = 4; 1575 rs->size = entry->size - rep_header->header_size - 7; 1576 rs->ver = 1; 1577 rs->chunk_index = 0; 1578 rs->window_cache = ffd->txdelta_window_cache; 1579 rs->combined_cache = ffd->combined_window_cache; 1580 1581 return SVN_NO_ERROR; 1582} 1583 1584/* Walk through all windows in the representation addressed by RS in FS 1585 * (excluding the delta bases) and put those not already cached into the 1586 * window caches. If MAX_OFFSET is not -1, don't read windows that start 1587 * at or beyond that offset. As a side effect, return the total sum of all 1588 * expanded window sizes in *FULLTEXT_LEN. 1589 * Use SCRATCH_POOL for temporary allocations. 1590 */ 1591static svn_error_t * 1592cache_windows(svn_filesize_t *fulltext_len, 1593 svn_fs_t *fs, 1594 rep_state_t *rs, 1595 apr_off_t max_offset, 1596 apr_pool_t *scratch_pool) 1597{ 1598 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1599 *fulltext_len = 0; 1600 1601 while (rs->current < rs->size) 1602 { 1603 svn_boolean_t is_cached = FALSE; 1604 window_sizes_t *window_sizes; 1605 1606 svn_pool_clear(iterpool); 1607 if (max_offset != -1 && rs->start + rs->current >= max_offset) 1608 { 1609 svn_pool_destroy(iterpool); 1610 return SVN_NO_ERROR; 1611 } 1612 1613 /* efficiently skip windows that are still being cached instead 1614 * of fully decoding them */ 1615 SVN_ERR(get_cached_window_sizes(&window_sizes, rs, &is_cached, 1616 iterpool)); 1617 if (is_cached) 1618 { 1619 *fulltext_len += window_sizes->target_len; 1620 rs->current += window_sizes->packed_len; 1621 } 1622 else 1623 { 1624 svn_txdelta_window_t *window; 1625 svn_fs_x__revision_file_t *file = rs->sfile->rfile; 1626 svn_stream_t *stream; 1627 apr_off_t start_offset = rs->start + rs->current; 1628 apr_off_t end_offset; 1629 apr_off_t block_start; 1630 1631 /* navigate to & read the current window */ 1632 SVN_ERR(svn_fs_x__rev_file_stream(&stream, file)); 1633 SVN_ERR(svn_fs_x__rev_file_seek(file, &block_start, start_offset)); 1634 SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, rs->ver, 1635 iterpool)); 1636 1637 /* aggregate expanded window size */ 1638 *fulltext_len += window->tview_len; 1639 1640 /* determine on-disk window size */ 1641 SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, rs->sfile->rfile)); 1642 rs->current = end_offset - rs->start; 1643 if (rs->current > rs->size) 1644 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, 1645 _("Reading one svndiff window read beyond " 1646 "the end of the representation")); 1647 1648 /* if the window has not been cached before, cache it now 1649 * (if caching is used for them at all) */ 1650 if (!is_cached) 1651 SVN_ERR(set_cached_window(window, rs, start_offset, iterpool)); 1652 } 1653 1654 rs->chunk_index++; 1655 } 1656 1657 svn_pool_destroy(iterpool); 1658 1659 return SVN_NO_ERROR; 1660} 1661 1662/* Try to get the representation header identified by KEY from FS's cache. 1663 * If it has not been cached, read it from the current position in STREAM 1664 * and put it into the cache (if caching has been enabled for rep headers). 1665 * Return the result in *REP_HEADER. Use POOL for allocations. 1666 */ 1667static svn_error_t * 1668read_rep_header(svn_fs_x__rep_header_t **rep_header, 1669 svn_fs_t *fs, 1670 svn_fs_x__revision_file_t *file, 1671 svn_fs_x__representation_cache_key_t *key, 1672 apr_pool_t *pool) 1673{ 1674 svn_fs_x__data_t *ffd = fs->fsap_data; 1675 svn_stream_t *stream; 1676 svn_boolean_t is_cached = FALSE; 1677 1678 SVN_ERR(svn_cache__get((void**)rep_header, &is_cached, 1679 ffd->rep_header_cache, key, pool)); 1680 if (is_cached) 1681 return SVN_NO_ERROR; 1682 1683 SVN_ERR(svn_fs_x__rev_file_stream(&stream, file)); 1684 SVN_ERR(svn_fs_x__read_rep_header(rep_header, stream, pool, pool)); 1685 SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header, pool)); 1686 1687 return SVN_NO_ERROR; 1688} 1689 1690svn_error_t * 1691svn_fs_x__get_representation_length(svn_filesize_t *packed_len, 1692 svn_filesize_t *expanded_len, 1693 svn_fs_t *fs, 1694 svn_fs_x__revision_file_t *rev_file, 1695 svn_fs_x__p2l_entry_t* entry, 1696 apr_pool_t *scratch_pool) 1697{ 1698 svn_fs_x__representation_cache_key_t key = { 0 }; 1699 rep_state_t rs = { 0 }; 1700 svn_fs_x__rep_header_t *rep_header; 1701 1702 /* this function does not apply to representation containers */ 1703 SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP 1704 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS); 1705 SVN_ERR_ASSERT(entry->item_count == 1); 1706 1707 /* get / read the representation header */ 1708 key.revision = svn_fs_x__get_revnum(entry->items[0].change_set); 1709 key.is_packed = svn_fs_x__is_packed_rev(fs, key.revision); 1710 key.item_index = entry->items[0].number; 1711 SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &key, scratch_pool)); 1712 1713 /* prepare representation reader state (rs) structure */ 1714 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, 1715 scratch_pool)); 1716 1717 /* RS->SFILE may be shared between RS instances -> make sure we point 1718 * to the right data. */ 1719 *packed_len = rs.size; 1720 SVN_ERR(cache_windows(expanded_len, fs, &rs, -1, scratch_pool)); 1721 1722 return SVN_NO_ERROR; 1723} 1724 1725/* Return the next *LEN bytes of the rep from our plain / delta windows 1726 and store them in *BUF. */ 1727static svn_error_t * 1728get_contents_from_windows(rep_read_baton_t *rb, 1729 char *buf, 1730 apr_size_t *len) 1731{ 1732 apr_size_t copy_len, remaining = *len; 1733 char *cur = buf; 1734 rep_state_t *rs; 1735 1736 /* Special case for when there are no delta reps, only a 1737 containered text. */ 1738 if (rb->rs_list->nelts == 0 && rb->buf == NULL) 1739 { 1740 copy_len = remaining; 1741 rs = rb->src_state; 1742 1743 /* reps in containers don't have a header */ 1744 if (rs->header_size == 0 && rb->base_window == NULL) 1745 { 1746 /* RS->SIZE is unreliable here because it is based upon 1747 * the delta rep size _before_ putting the data into a 1748 * a container. */ 1749 SVN_ERR(read_container_window(&rb->base_window, rs, rb->len, 1750 rb->scratch_pool, rb->scratch_pool)); 1751 rs->current -= rb->base_window->len; 1752 } 1753 1754 if (rb->base_window != NULL) 1755 { 1756 /* We got the desired rep directly from the cache. 1757 This is where we need the pseudo rep_state created 1758 by build_rep_list(). */ 1759 apr_size_t offset = (apr_size_t)rs->current; 1760 if (offset >= rb->base_window->len) 1761 copy_len = 0ul; 1762 else if (copy_len > rb->base_window->len - offset) 1763 copy_len = rb->base_window->len - offset; 1764 1765 memcpy (cur, rb->base_window->data + offset, copy_len); 1766 } 1767 1768 rs->current += copy_len; 1769 *len = copy_len; 1770 return SVN_NO_ERROR; 1771 } 1772 1773 while (remaining > 0) 1774 { 1775 /* If we have buffered data from a previous chunk, use that. */ 1776 if (rb->buf) 1777 { 1778 /* Determine how much to copy from the buffer. */ 1779 copy_len = rb->buf_len - rb->buf_pos; 1780 if (copy_len > remaining) 1781 copy_len = remaining; 1782 1783 /* Actually copy the data. */ 1784 memcpy(cur, rb->buf + rb->buf_pos, copy_len); 1785 rb->buf_pos += copy_len; 1786 cur += copy_len; 1787 remaining -= copy_len; 1788 1789 /* If the buffer is all used up, clear it and empty the 1790 local pool. */ 1791 if (rb->buf_pos == rb->buf_len) 1792 { 1793 svn_pool_clear(rb->scratch_pool); 1794 rb->buf = NULL; 1795 } 1796 } 1797 else 1798 { 1799 svn_stringbuf_t *sbuf = NULL; 1800 1801 rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *); 1802 if (rs->current == rs->size) 1803 break; 1804 1805 /* Get more buffered data by evaluating a chunk. */ 1806 SVN_ERR(get_combined_window(&sbuf, rb)); 1807 1808 rb->chunk_index++; 1809 rb->buf_len = sbuf->len; 1810 rb->buf = sbuf->data; 1811 rb->buf_pos = 0; 1812 } 1813 } 1814 1815 *len = cur - buf; 1816 1817 return SVN_NO_ERROR; 1818} 1819 1820/* Baton type for get_fulltext_partial. */ 1821typedef struct fulltext_baton_t 1822{ 1823 /* Target buffer to write to; of at least LEN bytes. */ 1824 char *buffer; 1825 1826 /* Offset within the respective fulltext at which we shall start to 1827 copy data into BUFFER. */ 1828 apr_size_t start; 1829 1830 /* Number of bytes to copy. The actual amount may be less in case 1831 the fulltext is short(er). */ 1832 apr_size_t len; 1833 1834 /* Number of bytes actually copied into BUFFER. */ 1835 apr_size_t read; 1836} fulltext_baton_t; 1837 1838/* Implement svn_cache__partial_getter_func_t for fulltext caches. 1839 * From the fulltext in DATA, we copy the range specified by the 1840 * fulltext_baton_t* BATON into the buffer provided by that baton. 1841 * OUT and RESULT_POOL are not used. 1842 */ 1843static svn_error_t * 1844get_fulltext_partial(void **out, 1845 const void *data, 1846 apr_size_t data_len, 1847 void *baton, 1848 apr_pool_t *result_pool) 1849{ 1850 fulltext_baton_t *fulltext_baton = baton; 1851 1852 /* We cached the fulltext with an NUL appended to it. */ 1853 apr_size_t fulltext_len = data_len - 1; 1854 1855 /* Clip the copy range to what the fulltext size allows. */ 1856 apr_size_t start = MIN(fulltext_baton->start, fulltext_len); 1857 fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len); 1858 1859 /* Copy the data to the output buffer and be done. */ 1860 memcpy(fulltext_baton->buffer, (const char *)data + start, 1861 fulltext_baton->read); 1862 1863 return SVN_NO_ERROR; 1864} 1865 1866/* Find the fulltext specified in BATON in the fulltext cache given 1867 * as well by BATON. If that succeeds, set *CACHED to TRUE and copy 1868 * up to the next *LEN bytes into BUFFER. Set *LEN to the actual 1869 * number of bytes copied. 1870 */ 1871static svn_error_t * 1872get_contents_from_fulltext(svn_boolean_t *cached, 1873 rep_read_baton_t *baton, 1874 char *buffer, 1875 apr_size_t *len) 1876{ 1877 void *dummy; 1878 fulltext_baton_t fulltext_baton; 1879 1880 SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered 1881 == baton->fulltext_delivered); 1882 fulltext_baton.buffer = buffer; 1883 fulltext_baton.start = (apr_size_t)baton->fulltext_delivered; 1884 fulltext_baton.len = *len; 1885 fulltext_baton.read = 0; 1886 1887 SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache, 1888 &baton->fulltext_cache_key, 1889 get_fulltext_partial, &fulltext_baton, 1890 baton->scratch_pool)); 1891 1892 if (*cached) 1893 { 1894 baton->fulltext_delivered += fulltext_baton.read; 1895 *len = fulltext_baton.read; 1896 } 1897 1898 return SVN_NO_ERROR; 1899} 1900 1901/* Determine the optimal size of a string buf that shall receive a 1902 * (full-) text of NEEDED bytes. 1903 * 1904 * The critical point is that those buffers may be very large and 1905 * can cause memory fragmentation. We apply simple heuristics to 1906 * make fragmentation less likely. 1907 */ 1908static apr_size_t 1909optimimal_allocation_size(apr_size_t needed) 1910{ 1911 /* For all allocations, assume some overhead that is shared between 1912 * OS memory managemnt, APR memory management and svn_stringbuf_t. */ 1913 const apr_size_t overhead = 0x400; 1914 apr_size_t optimal; 1915 1916 /* If an allocation size if safe for other ephemeral buffers, it should 1917 * be safe for ours. */ 1918 if (needed <= SVN__STREAM_CHUNK_SIZE) 1919 return needed; 1920 1921 /* Paranoia edge case: 1922 * Skip our heuristics if they created arithmetical overflow. 1923 * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */ 1924 if (needed >= APR_SIZE_MAX / 2 - overhead) 1925 return needed; 1926 1927 /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two. 1928 * Since we know NEEDED to be larger than that, use it as the 1929 * starting point. 1930 * 1931 * Heuristics: Allocate a power-of-two number of bytes that fit 1932 * NEEDED plus some OVERHEAD. The APR allocator 1933 * will round it up to the next full page size. 1934 */ 1935 optimal = SVN__STREAM_CHUNK_SIZE; 1936 while (optimal - overhead < needed) 1937 optimal *= 2; 1938 1939 /* This is above or equal to NEEDED. */ 1940 return optimal - overhead; 1941} 1942 1943/* After a fulltext cache lookup failure, we will continue to read from 1944 * combined delta or plain windows. However, we must first make that data 1945 * stream in BATON catch up tho the position LEN already delivered from the 1946 * fulltext cache. Also, we need to store the reconstructed fulltext if we 1947 * want to cache it at the end. 1948 */ 1949static svn_error_t * 1950skip_contents(rep_read_baton_t *baton, 1951 svn_filesize_t len) 1952{ 1953 svn_error_t *err = SVN_NO_ERROR; 1954 1955 /* Do we want to cache the reconstructed fulltext? */ 1956 if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision)) 1957 { 1958 char *buffer; 1959 svn_filesize_t to_alloc = MAX(len, baton->len); 1960 1961 /* This should only be happening if BATON->LEN and LEN are 1962 * cacheable, implying they fit into memory. */ 1963 SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc); 1964 1965 /* Allocate the fulltext buffer. */ 1966 baton->current_fulltext = svn_stringbuf_create_ensure( 1967 optimimal_allocation_size((apr_size_t)to_alloc), 1968 baton->filehandle_pool); 1969 1970 /* Read LEN bytes from the window stream and store the data 1971 * in the fulltext buffer (will be filled by further reads later). */ 1972 baton->current_fulltext->len = (apr_size_t)len; 1973 baton->current_fulltext->data[(apr_size_t)len] = 0; 1974 1975 buffer = baton->current_fulltext->data; 1976 while (len > 0 && !err) 1977 { 1978 apr_size_t to_read = (apr_size_t)len; 1979 err = get_contents_from_windows(baton, buffer, &to_read); 1980 len -= to_read; 1981 buffer += to_read; 1982 } 1983 1984 /* Make the MD5 calculation catch up with the data delivered 1985 * (we did not run MD5 on the data that we took from the cache). */ 1986 if (!err) 1987 { 1988 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx, 1989 baton->current_fulltext->data, 1990 baton->current_fulltext->len)); 1991 baton->off += baton->current_fulltext->len; 1992 } 1993 } 1994 else if (len > 0) 1995 { 1996 /* Simply drain LEN bytes from the window stream. */ 1997 apr_pool_t *subpool = svn_pool_create(baton->scratch_pool); 1998 char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE); 1999 2000 while (len > 0 && !err) 2001 { 2002 apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE 2003 ? SVN__STREAM_CHUNK_SIZE 2004 : (apr_size_t)len; 2005 2006 err = get_contents_from_windows(baton, buffer, &to_read); 2007 len -= to_read; 2008 2009 /* Make the MD5 calculation catch up with the data delivered 2010 * (we did not run MD5 on the data that we took from the cache). */ 2011 if (!err) 2012 { 2013 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx, 2014 buffer, to_read)); 2015 baton->off += to_read; 2016 } 2017 } 2018 2019 svn_pool_destroy(subpool); 2020 } 2021 2022 return svn_error_trace(err); 2023} 2024 2025/* BATON is of type `rep_read_baton_t'; read the next *LEN bytes of the 2026 representation and store them in *BUF. Sum as we read and verify 2027 the MD5 sum at the end. */ 2028static svn_error_t * 2029rep_read_contents(void *baton, 2030 char *buf, 2031 apr_size_t *len) 2032{ 2033 rep_read_baton_t *rb = baton; 2034 2035 /* Get data from the fulltext cache for as long as we can. */ 2036 if (rb->fulltext_cache) 2037 { 2038 svn_boolean_t cached; 2039 SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len)); 2040 if (cached) 2041 return SVN_NO_ERROR; 2042 2043 /* Cache miss. From now on, we will never read from the fulltext 2044 * cache for this representation anymore. */ 2045 rb->fulltext_cache = NULL; 2046 } 2047 2048 /* No fulltext cache to help us. We must read from the window stream. */ 2049 if (!rb->rs_list) 2050 { 2051 /* Window stream not initialized, yet. Do it now. */ 2052 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window, 2053 &rb->src_state, rb->fs, &rb->rep, 2054 rb->filehandle_pool, rb->scratch_pool)); 2055 2056 /* In case we did read from the fulltext cache before, make the 2057 * window stream catch up. Also, initialize the fulltext buffer 2058 * if we want to cache the fulltext at the end. */ 2059 SVN_ERR(skip_contents(rb, rb->fulltext_delivered)); 2060 } 2061 2062 /* Get the next block of data. 2063 * Keep in mind that the representation might be empty and leave us 2064 * already positioned at the end of the rep. */ 2065 if (rb->off == rb->len) 2066 *len = 0; 2067 else 2068 SVN_ERR(get_contents_from_windows(rb, buf, len)); 2069 2070 if (rb->current_fulltext) 2071 svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len); 2072 2073 /* Perform checksumming. We want to check the checksum as soon as 2074 the last byte of data is read, in case the caller never performs 2075 a short read, but we don't want to finalize the MD5 context 2076 twice. */ 2077 if (!rb->checksum_finalized) 2078 { 2079 SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len)); 2080 rb->off += *len; 2081 if (rb->off == rb->len) 2082 { 2083 svn_checksum_t *md5_checksum; 2084 svn_checksum_t expected; 2085 expected.kind = svn_checksum_md5; 2086 expected.digest = rb->md5_digest; 2087 2088 rb->checksum_finalized = TRUE; 2089 SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx, 2090 rb->scratch_pool)); 2091 if (!svn_checksum_match(md5_checksum, &expected)) 2092 return svn_error_create(SVN_ERR_FS_CORRUPT, 2093 svn_checksum_mismatch_err(&expected, md5_checksum, 2094 rb->scratch_pool, 2095 _("Checksum mismatch while reading representation")), 2096 NULL); 2097 } 2098 } 2099 2100 if (rb->off == rb->len && rb->current_fulltext) 2101 { 2102 svn_fs_x__data_t *ffd = rb->fs->fsap_data; 2103 SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key, 2104 rb->current_fulltext, rb->scratch_pool)); 2105 rb->current_fulltext = NULL; 2106 } 2107 2108 return SVN_NO_ERROR; 2109} 2110 2111svn_error_t * 2112svn_fs_x__get_contents(svn_stream_t **contents_p, 2113 svn_fs_t *fs, 2114 svn_fs_x__representation_t *rep, 2115 svn_boolean_t cache_fulltext, 2116 apr_pool_t *result_pool) 2117{ 2118 if (! rep) 2119 { 2120 *contents_p = svn_stream_empty(result_pool); 2121 } 2122 else 2123 { 2124 svn_fs_x__data_t *ffd = fs->fsap_data; 2125 svn_filesize_t len = rep->expanded_size; 2126 rep_read_baton_t *rb; 2127 svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set); 2128 2129 svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 }; 2130 fulltext_cache_key.revision = revision; 2131 fulltext_cache_key.second = rep->id.number; 2132 2133 /* Initialize the reader baton. Some members may added lazily 2134 * while reading from the stream */ 2135 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, 2136 result_pool)); 2137 2138 /* Make the stream attempt fulltext cache lookups if the fulltext 2139 * is cacheable. If it is not, then also don't try to buffer and 2140 * cache it. */ 2141 if ( cache_fulltext 2142 && SVN_IS_VALID_REVNUM(revision) 2143 && fulltext_size_is_cachable(ffd, len)) 2144 { 2145 rb->fulltext_cache = ffd->fulltext_cache; 2146 } 2147 else 2148 { 2149 /* This will also prevent the reconstructed fulltext from being 2150 put into the cache. */ 2151 rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM; 2152 } 2153 2154 *contents_p = svn_stream_create(rb, result_pool); 2155 svn_stream_set_read2(*contents_p, NULL /* only full read support */, 2156 rep_read_contents); 2157 svn_stream_set_close(*contents_p, rep_read_contents_close); 2158 } 2159 2160 return SVN_NO_ERROR; 2161} 2162 2163svn_error_t * 2164svn_fs_x__get_contents_from_file(svn_stream_t **contents_p, 2165 svn_fs_t *fs, 2166 svn_fs_x__representation_t *rep, 2167 apr_file_t *file, 2168 apr_off_t offset, 2169 apr_pool_t *pool) 2170{ 2171 rep_read_baton_t *rb; 2172 svn_fs_x__pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 }; 2173 rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs)); 2174 svn_fs_x__rep_header_t *rh; 2175 svn_stream_t *stream; 2176 2177 /* Initialize the reader baton. Some members may added lazily 2178 * while reading from the stream. */ 2179 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool)); 2180 2181 /* Continue constructing RS. Leave caches as NULL. */ 2182 rs->size = rep->size; 2183 rs->rep_id = rep->id; 2184 rs->ver = -1; 2185 rs->start = -1; 2186 2187 /* Provide just enough file access info to allow for a basic read from 2188 * FILE but leave all index / footer info with empty values b/c FILE 2189 * probably is not a complete revision file. */ 2190 rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile)); 2191 rs->sfile->revision = SVN_INVALID_REVNUM; 2192 rs->sfile->pool = pool; 2193 rs->sfile->fs = fs; 2194 SVN_ERR(svn_fs_x__rev_file_wrap_temp(&rs->sfile->rfile, fs, file, pool)); 2195 2196 /* Read the rep header. */ 2197 SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset)); 2198 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile)); 2199 SVN_ERR(svn_fs_x__read_rep_header(&rh, stream, pool, pool)); 2200 SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile)); 2201 rs->header_size = rh->header_size; 2202 2203 /* Log the access. */ 2204 SVN_ERR(dbg__log_access(fs, &rep->id, rh, 2205 SVN_FS_X__ITEM_TYPE_ANY_REP, pool)); 2206 2207 /* Build the representation list (delta chain). */ 2208 if (rh->type == svn_fs_x__rep_self_delta) 2209 { 2210 rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *)); 2211 APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs; 2212 rb->src_state = NULL; 2213 } 2214 else 2215 { 2216 svn_fs_x__representation_t next_rep = { 0 }; 2217 2218 /* skip "SVNx" diff marker */ 2219 rs->current = 4; 2220 2221 /* REP's base rep is inside a proper revision. 2222 * It can be reconstructed in the usual way. */ 2223 next_rep.id.change_set = svn_fs_x__change_set_by_rev(rh->base_revision); 2224 next_rep.id.number = rh->base_item_index; 2225 next_rep.size = rh->base_length; 2226 2227 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window, 2228 &rb->src_state, rb->fs, &next_rep, 2229 rb->filehandle_pool, rb->scratch_pool)); 2230 2231 /* Insert the access to REP as the first element of the delta chain. */ 2232 SVN_ERR(svn_sort__array_insert2(rb->rs_list, &rs, 0)); 2233 } 2234 2235 /* Now, the baton is complete and we can assemble the stream around it. */ 2236 *contents_p = svn_stream_create(rb, pool); 2237 svn_stream_set_read2(*contents_p, NULL /* only full read support */, 2238 rep_read_contents); 2239 svn_stream_set_close(*contents_p, rep_read_contents_close); 2240 2241 return SVN_NO_ERROR; 2242} 2243 2244/* Baton for cache_access_wrapper. Wraps the original parameters of 2245 * svn_fs_x__try_process_file_content(). 2246 */ 2247typedef struct cache_access_wrapper_baton_t 2248{ 2249 svn_fs_process_contents_func_t func; 2250 void* baton; 2251} cache_access_wrapper_baton_t; 2252 2253/* Wrapper to translate between svn_fs_process_contents_func_t and 2254 * svn_cache__partial_getter_func_t. 2255 */ 2256static svn_error_t * 2257cache_access_wrapper(void **out, 2258 const void *data, 2259 apr_size_t data_len, 2260 void *baton, 2261 apr_pool_t *pool) 2262{ 2263 cache_access_wrapper_baton_t *wrapper_baton = baton; 2264 2265 SVN_ERR(wrapper_baton->func((const unsigned char *)data, 2266 data_len - 1, /* cache adds terminating 0 */ 2267 wrapper_baton->baton, 2268 pool)); 2269 2270 /* non-NULL value to signal the calling cache that all went well */ 2271 *out = baton; 2272 2273 return SVN_NO_ERROR; 2274} 2275 2276svn_error_t * 2277svn_fs_x__try_process_file_contents(svn_boolean_t *success, 2278 svn_fs_t *fs, 2279 svn_fs_x__noderev_t *noderev, 2280 svn_fs_process_contents_func_t processor, 2281 void* baton, 2282 apr_pool_t *scratch_pool) 2283{ 2284 svn_fs_x__representation_t *rep = noderev->data_rep; 2285 if (rep) 2286 { 2287 svn_fs_x__data_t *ffd = fs->fsap_data; 2288 svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 }; 2289 2290 fulltext_cache_key.revision = svn_fs_x__get_revnum(rep->id.change_set); 2291 fulltext_cache_key.second = rep->id.number; 2292 if ( SVN_IS_VALID_REVNUM(fulltext_cache_key.revision) 2293 && fulltext_size_is_cachable(ffd, rep->expanded_size)) 2294 { 2295 cache_access_wrapper_baton_t wrapper_baton; 2296 void *dummy = NULL; 2297 2298 wrapper_baton.func = processor; 2299 wrapper_baton.baton = baton; 2300 return svn_cache__get_partial(&dummy, success, 2301 ffd->fulltext_cache, 2302 &fulltext_cache_key, 2303 cache_access_wrapper, 2304 &wrapper_baton, 2305 scratch_pool); 2306 } 2307 } 2308 2309 *success = FALSE; 2310 return SVN_NO_ERROR; 2311} 2312 2313/* Baton used when reading delta windows. */ 2314typedef struct delta_read_baton_t 2315{ 2316 struct rep_state_t *rs; 2317 unsigned char md5_digest[APR_MD5_DIGESTSIZE]; 2318} delta_read_baton_t; 2319 2320/* This implements the svn_txdelta_next_window_fn_t interface. */ 2321static svn_error_t * 2322delta_read_next_window(svn_txdelta_window_t **window, 2323 void *baton, 2324 apr_pool_t *pool) 2325{ 2326 delta_read_baton_t *drb = baton; 2327 apr_pool_t *scratch_pool = svn_pool_create(pool); 2328 2329 *window = NULL; 2330 if (drb->rs->current < drb->rs->size) 2331 { 2332 SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool, 2333 scratch_pool)); 2334 drb->rs->chunk_index++; 2335 } 2336 2337 svn_pool_destroy(scratch_pool); 2338 2339 return SVN_NO_ERROR; 2340} 2341 2342/* This implements the svn_txdelta_md5_digest_fn_t interface. */ 2343static const unsigned char * 2344delta_read_md5_digest(void *baton) 2345{ 2346 delta_read_baton_t *drb = baton; 2347 return drb->md5_digest; 2348} 2349 2350/* Return a txdelta stream for on-disk representation REP_STATE 2351 * of TARGET. Allocate the result in RESULT_POOL. 2352 */ 2353static svn_txdelta_stream_t * 2354get_storaged_delta_stream(rep_state_t *rep_state, 2355 svn_fs_x__noderev_t *target, 2356 apr_pool_t *result_pool) 2357{ 2358 /* Create the delta read baton. */ 2359 delta_read_baton_t *drb = apr_pcalloc(result_pool, sizeof(*drb)); 2360 drb->rs = rep_state; 2361 memcpy(drb->md5_digest, target->data_rep->md5_digest, 2362 sizeof(drb->md5_digest)); 2363 return svn_txdelta_stream_create(drb, delta_read_next_window, 2364 delta_read_md5_digest, result_pool); 2365} 2366 2367svn_error_t * 2368svn_fs_x__get_file_delta_stream(svn_txdelta_stream_t **stream_p, 2369 svn_fs_t *fs, 2370 svn_fs_x__noderev_t *source, 2371 svn_fs_x__noderev_t *target, 2372 apr_pool_t *result_pool, 2373 apr_pool_t *scratch_pool) 2374{ 2375 svn_stream_t *source_stream, *target_stream; 2376 rep_state_t *rep_state; 2377 svn_fs_x__rep_header_t *rep_header; 2378 2379 /* Try a shortcut: if the target is stored as a delta against the source, 2380 then just use that delta. However, prefer using the fulltext cache 2381 whenever that is available. */ 2382 if (target->data_rep && source) 2383 { 2384 /* Read target's base rep if any. */ 2385 SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL, 2386 target->data_rep, fs, result_pool, 2387 scratch_pool)); 2388 2389 /* Try a shortcut: if the target is stored as a delta against the source, 2390 then just use that delta. */ 2391 if (source && source->data_rep && target->data_rep) 2392 { 2393 /* If that matches source, then use this delta as is. 2394 Note that we want an actual delta here. E.g. a self-delta would 2395 not be good enough. */ 2396 if (rep_header->type == svn_fs_x__rep_delta 2397 && rep_header->base_revision 2398 == svn_fs_x__get_revnum(source->data_rep->id.change_set) 2399 && rep_header->base_item_index == source->data_rep->id.number) 2400 { 2401 *stream_p = get_storaged_delta_stream(rep_state, target, 2402 result_pool); 2403 return SVN_NO_ERROR; 2404 } 2405 } 2406 else if (!source) 2407 { 2408 /* We want a self-delta. There is a fair chance that TARGET got 2409 added in this revision and is already stored in the requested 2410 format. */ 2411 if (rep_header->type == svn_fs_x__rep_self_delta) 2412 { 2413 *stream_p = get_storaged_delta_stream(rep_state, target, 2414 result_pool); 2415 return SVN_NO_ERROR; 2416 } 2417 } 2418 2419 /* Don't keep file handles open for longer than necessary. */ 2420 if (rep_state->sfile->rfile) 2421 { 2422 SVN_ERR(svn_fs_x__close_revision_file(rep_state->sfile->rfile)); 2423 rep_state->sfile->rfile = NULL; 2424 } 2425 } 2426 2427 /* Read both fulltexts and construct a delta. */ 2428 if (source) 2429 SVN_ERR(svn_fs_x__get_contents(&source_stream, fs, source->data_rep, 2430 TRUE, result_pool)); 2431 else 2432 source_stream = svn_stream_empty(result_pool); 2433 2434 SVN_ERR(svn_fs_x__get_contents(&target_stream, fs, target->data_rep, 2435 TRUE, result_pool)); 2436 2437 /* Because source and target stream will already verify their content, 2438 * there is no need to do this once more. In particular if the stream 2439 * content is being fetched from cache. */ 2440 svn_txdelta2(stream_p, source_stream, target_stream, FALSE, result_pool); 2441 2442 return SVN_NO_ERROR; 2443} 2444 2445/* Return TRUE when all svn_fs_x__dirent_t* in ENTRIES are already sorted 2446 by their respective name. */ 2447static svn_boolean_t 2448sorted(apr_array_header_t *entries) 2449{ 2450 int i; 2451 2452 const svn_fs_x__dirent_t * const *dirents = (const void *)entries->elts; 2453 for (i = 0; i < entries->nelts-1; ++i) 2454 if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0) 2455 return FALSE; 2456 2457 return TRUE; 2458} 2459 2460/* Compare the names of the two dirents given in **A and **B. */ 2461static int 2462compare_dirents(const void *a, 2463 const void *b) 2464{ 2465 const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a); 2466 const svn_fs_x__dirent_t *rhs = *((const svn_fs_x__dirent_t * const *) b); 2467 2468 return strcmp(lhs->name, rhs->name); 2469} 2470 2471/* Compare the name of the dirents given in **A with the C string in *B. */ 2472static int 2473compare_dirent_name(const void *a, 2474 const void *b) 2475{ 2476 const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a); 2477 const char *rhs = b; 2478 2479 return strcmp(lhs->name, rhs); 2480} 2481 2482/* Into ENTRIES, parse all directories entries from the serialized form in 2483 * DATA. If INCREMENTAL is TRUE, read until the end of the STREAM and 2484 * update the data. ID is provided for nicer error messages. 2485 * 2486 * The contents of DATA will be shared with the items in ENTRIES, i.e. it 2487 * must not be modified afterwards and must remain valid as long as ENTRIES 2488 * is valid. Use SCRATCH_POOL for temporary allocations. 2489 */ 2490static svn_error_t * 2491parse_dir_entries(apr_array_header_t **entries_p, 2492 const svn_stringbuf_t *data, 2493 svn_boolean_t incremental, 2494 const svn_fs_x__id_t *id, 2495 apr_pool_t *result_pool, 2496 apr_pool_t *scratch_pool) 2497{ 2498 const apr_byte_t *p = (const apr_byte_t *)data->data; 2499 const apr_byte_t *end = p + data->len; 2500 apr_uint64_t count; 2501 apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL; 2502 apr_array_header_t *entries; 2503 2504 /* Construct the resulting container. */ 2505 p = svn__decode_uint(&count, p, end); 2506 if (count > INT_MAX) 2507 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2508 _("Directory for '%s' is too large"), 2509 svn_fs_x__id_unparse(id, scratch_pool)->data); 2510 2511 entries = apr_array_make(result_pool, (int)count, 2512 sizeof(svn_fs_x__dirent_t *)); 2513 2514 while (p != end) 2515 { 2516 apr_size_t len; 2517 svn_fs_x__dirent_t *dirent; 2518 dirent = apr_pcalloc(result_pool, sizeof(*dirent)); 2519 2520 /* The part of the serialized entry that is not the name will be 2521 * about 6 bytes or less. Since APR allocates with an 8 byte 2522 * alignment (4 bytes loss on average per string), simply using 2523 * the name string in DATA already gives us near-optimal memory 2524 * usage. */ 2525 dirent->name = (const char *)p; 2526 len = strlen(dirent->name); 2527 p += len + 1; 2528 if (p == end) 2529 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2530 _("Directory entry missing kind in '%s'"), 2531 svn_fs_x__id_unparse(id, scratch_pool)->data); 2532 2533 dirent->kind = (svn_node_kind_t)*(p++); 2534 if (p == end) 2535 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2536 _("Directory entry missing change set in '%s'"), 2537 svn_fs_x__id_unparse(id, scratch_pool)->data); 2538 2539 p = svn__decode_int(&dirent->id.change_set, p, end); 2540 if (p == end) 2541 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2542 _("Directory entry missing item number in '%s'"), 2543 svn_fs_x__id_unparse(id, scratch_pool)->data); 2544 2545 p = svn__decode_uint(&dirent->id.number, p, end); 2546 2547 /* In incremental mode, update the hash; otherwise, write to the 2548 * final array. */ 2549 if (incremental) 2550 { 2551 /* Insertion / update or a deletion? */ 2552 if (svn_fs_x__id_used(&dirent->id)) 2553 apr_hash_set(hash, dirent->name, len, dirent); 2554 else 2555 apr_hash_set(hash, dirent->name, len, NULL); 2556 } 2557 else 2558 { 2559 APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = dirent; 2560 } 2561 } 2562 2563 if (incremental) 2564 { 2565 /* Convert container into a sorted array. */ 2566 apr_hash_index_t *hi; 2567 for (hi = apr_hash_first(scratch_pool, hash); hi; hi = apr_hash_next(hi)) 2568 APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = apr_hash_this_val(hi); 2569 2570 if (!sorted(entries)) 2571 svn_sort__array(entries, compare_dirents); 2572 } 2573 else 2574 { 2575 /* Check that we read the expected amount of entries. */ 2576 if ((apr_uint64_t)entries->nelts != count) 2577 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, 2578 _("Directory length mismatch in '%s'"), 2579 svn_fs_x__id_unparse(id, scratch_pool)->data); 2580 } 2581 2582 *entries_p = entries; 2583 2584 return SVN_NO_ERROR; 2585} 2586 2587/* For directory NODEREV in FS, return the *FILESIZE of its in-txn 2588 * representation. If the directory representation is comitted data, 2589 * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries. 2590 */ 2591static svn_error_t * 2592get_txn_dir_info(svn_filesize_t *filesize, 2593 svn_fs_t *fs, 2594 svn_fs_x__noderev_t *noderev, 2595 apr_pool_t *scratch_pool) 2596{ 2597 if (noderev->data_rep 2598 && ! svn_fs_x__is_revision(noderev->data_rep->id.change_set)) 2599 { 2600 const svn_io_dirent2_t *dirent; 2601 const char *filename; 2602 2603 filename = svn_fs_x__path_txn_node_children(fs, &noderev->noderev_id, 2604 scratch_pool, scratch_pool); 2605 2606 SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE, 2607 scratch_pool, scratch_pool)); 2608 *filesize = dirent->filesize; 2609 } 2610 else 2611 { 2612 *filesize = SVN_INVALID_FILESIZE; 2613 } 2614 2615 return SVN_NO_ERROR; 2616} 2617 2618/* Fetch the contents of a directory into DIR. Values are stored 2619 as filename to string mappings; further conversion is necessary to 2620 convert them into svn_fs_x__dirent_t values. */ 2621static svn_error_t * 2622get_dir_contents(svn_fs_x__dir_data_t *dir, 2623 svn_fs_t *fs, 2624 svn_fs_x__noderev_t *noderev, 2625 apr_pool_t *result_pool, 2626 apr_pool_t *scratch_pool) 2627{ 2628 svn_stream_t *contents; 2629 const svn_fs_x__id_t *id = &noderev->noderev_id; 2630 apr_size_t len; 2631 svn_stringbuf_t *text; 2632 svn_boolean_t incremental; 2633 2634 /* Initialize the result. */ 2635 dir->txn_filesize = SVN_INVALID_FILESIZE; 2636 2637 /* Read dir contents - unless there is none in which case we are done. */ 2638 if (noderev->data_rep 2639 && ! svn_fs_x__is_revision(noderev->data_rep->id.change_set)) 2640 { 2641 /* Get location & current size of the directory representation. */ 2642 const char *filename; 2643 apr_file_t *file; 2644 2645 filename = svn_fs_x__path_txn_node_children(fs, id, scratch_pool, 2646 scratch_pool); 2647 2648 /* The representation is mutable. Read the old directory 2649 contents from the mutable children file, followed by the 2650 changes we've made in this transaction. */ 2651 SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED, 2652 APR_OS_DEFAULT, scratch_pool)); 2653 2654 /* Obtain txn children file size. */ 2655 SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool)); 2656 len = (apr_size_t)dir->txn_filesize; 2657 2658 /* Finally, provide stream access to FILE. */ 2659 contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool); 2660 incremental = TRUE; 2661 } 2662 else if (noderev->data_rep) 2663 { 2664 /* The representation is immutable. Read it normally. */ 2665 len = noderev->data_rep->expanded_size; 2666 SVN_ERR(svn_fs_x__get_contents(&contents, fs, noderev->data_rep, 2667 FALSE, scratch_pool)); 2668 incremental = FALSE; 2669 } 2670 else 2671 { 2672 /* Empty representation == empty directory. */ 2673 dir->entries = apr_array_make(result_pool, 0, 2674 sizeof(svn_fs_x__dirent_t *)); 2675 return SVN_NO_ERROR; 2676 } 2677 2678 /* Read the whole stream contents into a single buffer. 2679 * Due to our LEN hint, no allocation overhead occurs. 2680 * 2681 * Also, a large portion of TEXT will be file / dir names which we 2682 * directly reference from DIR->ENTRIES instead of copying them. 2683 * Hence, we need to use the RESULT_POOL here. */ 2684 SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, result_pool)); 2685 SVN_ERR(svn_stream_close(contents)); 2686 2687 /* de-serialize hash */ 2688 SVN_ERR(parse_dir_entries(&dir->entries, text, incremental, id, 2689 result_pool, scratch_pool)); 2690 2691 return SVN_NO_ERROR; 2692} 2693 2694 2695/* Return the cache object in FS responsible to storing the directory the 2696 * NODEREV plus the corresponding pre-allocated *KEY. 2697 */ 2698static svn_cache__t * 2699locate_dir_cache(svn_fs_t *fs, 2700 svn_fs_x__id_t *key, 2701 svn_fs_x__noderev_t *noderev) 2702{ 2703 svn_fs_x__data_t *ffd = fs->fsap_data; 2704 2705 if (!noderev->data_rep) 2706 { 2707 /* no data rep -> empty directory. 2708 Use a key that does definitely not clash with non-NULL reps. */ 2709 key->change_set = SVN_FS_X__INVALID_CHANGE_SET; 2710 key->number = SVN_FS_X__ITEM_INDEX_UNUSED; 2711 } 2712 else if (svn_fs_x__is_txn(noderev->noderev_id.change_set)) 2713 { 2714 /* data in txns must be addressed by noderev ID since the 2715 representation has not been created, yet. */ 2716 *key = noderev->noderev_id; 2717 } 2718 else 2719 { 2720 /* committed data can use simple rev,item pairs */ 2721 *key = noderev->data_rep->id; 2722 } 2723 2724 return ffd->dir_cache; 2725} 2726 2727svn_error_t * 2728svn_fs_x__rep_contents_dir(apr_array_header_t **entries_p, 2729 svn_fs_t *fs, 2730 svn_fs_x__noderev_t *noderev, 2731 apr_pool_t *result_pool, 2732 apr_pool_t *scratch_pool) 2733{ 2734 svn_fs_x__id_t key; 2735 svn_fs_x__dir_data_t *dir; 2736 2737 /* find the cache we may use */ 2738 svn_cache__t *cache = locate_dir_cache(fs, &key, noderev); 2739 svn_boolean_t found; 2740 2741 SVN_ERR(svn_cache__get((void **)&dir, &found, cache, &key, result_pool)); 2742 if (found) 2743 { 2744 /* Verify that the cached dir info is not stale 2745 * (no-op for committed data). */ 2746 svn_filesize_t filesize; 2747 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool)); 2748 2749 if (filesize == dir->txn_filesize) 2750 { 2751 /* Still valid. Done. */ 2752 *entries_p = dir->entries; 2753 return SVN_NO_ERROR; 2754 } 2755 } 2756 2757 /* Read in the directory contents. */ 2758 dir = apr_pcalloc(scratch_pool, sizeof(*dir)); 2759 SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool)); 2760 *entries_p = dir->entries; 2761 2762 /* Update the cache, if we are to use one. 2763 * 2764 * Don't even attempt to serialize very large directories; it would cause 2765 * an unnecessary memory allocation peak. 100 bytes/entry is about right. 2766 */ 2767 if (svn_cache__is_cachable(cache, 100 * dir->entries->nelts)) 2768 SVN_ERR(svn_cache__set(cache, &key, dir, scratch_pool)); 2769 2770 return SVN_NO_ERROR; 2771} 2772 2773svn_fs_x__dirent_t * 2774svn_fs_x__find_dir_entry(apr_array_header_t *entries, 2775 const char *name, 2776 int *hint) 2777{ 2778 svn_fs_x__dirent_t **result 2779 = svn_sort__array_lookup(entries, name, hint, compare_dirent_name); 2780 return result ? *result : NULL; 2781} 2782 2783svn_error_t * 2784svn_fs_x__rep_contents_dir_entry(svn_fs_x__dirent_t **dirent, 2785 svn_fs_t *fs, 2786 svn_fs_x__noderev_t *noderev, 2787 const char *name, 2788 apr_size_t *hint, 2789 apr_pool_t *result_pool, 2790 apr_pool_t *scratch_pool) 2791{ 2792 svn_boolean_t found = FALSE; 2793 2794 /* find the cache we may use */ 2795 svn_fs_x__id_t key; 2796 svn_cache__t *cache = locate_dir_cache(fs, &key, noderev); 2797 svn_fs_x__ede_baton_t baton; 2798 2799 svn_filesize_t filesize; 2800 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool)); 2801 2802 /* Cache lookup. */ 2803 baton.hint = *hint; 2804 baton.name = name; 2805 baton.txn_filesize = filesize; 2806 2807 SVN_ERR(svn_cache__get_partial((void **)dirent, 2808 &found, 2809 cache, 2810 &key, 2811 svn_fs_x__extract_dir_entry, 2812 &baton, 2813 result_pool)); 2814 2815 /* Remember the new clue only if we found something at that spot. */ 2816 if (found) 2817 *hint = baton.hint; 2818 2819 /* fetch data from disk if we did not find it in the cache */ 2820 if (! found || baton.out_of_date) 2821 { 2822 svn_fs_x__dirent_t *entry; 2823 svn_fs_x__dirent_t *entry_copy = NULL; 2824 svn_fs_x__dir_data_t dir; 2825 2826 /* Read in the directory contents. */ 2827 SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool, 2828 scratch_pool)); 2829 2830 /* Update the cache, if we are to use one. 2831 * 2832 * Don't even attempt to serialize very large directories; it would 2833 * cause an unnecessary memory allocation peak. 150 bytes / entry is 2834 * about right. */ 2835 if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts)) 2836 SVN_ERR(svn_cache__set(cache, &key, &dir, scratch_pool)); 2837 2838 /* find desired entry and return a copy in POOL, if found */ 2839 entry = svn_fs_x__find_dir_entry(dir.entries, name, NULL); 2840 if (entry) 2841 { 2842 entry_copy = apr_pmemdup(result_pool, entry, sizeof(*entry_copy)); 2843 entry_copy->name = apr_pstrdup(result_pool, entry->name); 2844 } 2845 2846 *dirent = entry_copy; 2847 } 2848 2849 return SVN_NO_ERROR; 2850} 2851 2852svn_error_t * 2853svn_fs_x__get_proplist(apr_hash_t **proplist, 2854 svn_fs_t *fs, 2855 svn_fs_x__noderev_t *noderev, 2856 apr_pool_t *result_pool, 2857 apr_pool_t *scratch_pool) 2858{ 2859 svn_stream_t *stream; 2860 const svn_fs_x__id_t *noderev_id = &noderev->noderev_id; 2861 2862 if (noderev->prop_rep 2863 && !svn_fs_x__is_revision(noderev->prop_rep->id.change_set)) 2864 { 2865 svn_stringbuf_t *content; 2866 svn_string_t *as_string; 2867 const char *filename = svn_fs_x__path_txn_node_props(fs, noderev_id, 2868 scratch_pool, 2869 scratch_pool); 2870 SVN_ERR(svn_stringbuf_from_file2(&content, filename, result_pool)); 2871 2872 as_string = svn_stringbuf__morph_into_string(content); 2873 SVN_ERR_W(svn_fs_x__parse_properties(proplist, as_string, result_pool), 2874 apr_psprintf(scratch_pool, 2875 "malformed property list for node-revision '%s' in '%s'", 2876 svn_fs_x__id_unparse(&noderev->noderev_id, 2877 scratch_pool)->data, 2878 filename)); 2879 } 2880 else if (noderev->prop_rep) 2881 { 2882 svn_fs_x__data_t *ffd = fs->fsap_data; 2883 svn_fs_x__representation_t *rep = noderev->prop_rep; 2884 svn_fs_x__pair_cache_key_t key = { 0 }; 2885 svn_string_t *content; 2886 svn_boolean_t is_cached; 2887 2888 key.revision = svn_fs_x__get_revnum(rep->id.change_set); 2889 key.second = rep->id.number; 2890 SVN_ERR(svn_cache__get((void **) proplist, &is_cached, 2891 ffd->properties_cache, &key, result_pool)); 2892 if (is_cached) 2893 return SVN_NO_ERROR; 2894 2895 SVN_ERR(svn_fs_x__get_contents(&stream, fs, rep, FALSE, scratch_pool)); 2896 SVN_ERR(svn_string_from_stream2(&content, stream, rep->expanded_size, 2897 result_pool)); 2898 2899 SVN_ERR_W(svn_fs_x__parse_properties(proplist, content, result_pool), 2900 apr_psprintf(scratch_pool, 2901 "malformed property list for node-revision '%s'", 2902 svn_fs_x__id_unparse(&noderev->noderev_id, 2903 scratch_pool)->data)); 2904 2905 SVN_ERR(svn_cache__set(ffd->properties_cache, &key, *proplist, 2906 scratch_pool)); 2907 } 2908 else 2909 { 2910 /* return an empty prop list if the node doesn't have any props */ 2911 *proplist = apr_hash_make(result_pool); 2912 } 2913 2914 return SVN_NO_ERROR; 2915} 2916 2917svn_error_t * 2918svn_fs_x__create_changes_context(svn_fs_x__changes_context_t **context, 2919 svn_fs_t *fs, 2920 svn_revnum_t rev, 2921 apr_pool_t *result_pool, 2922 apr_pool_t *scratch_pool) 2923{ 2924 svn_fs_x__changes_context_t *result = apr_pcalloc(result_pool, 2925 sizeof(*result)); 2926 result->fs = fs; 2927 result->revision = rev; 2928 2929 SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool)); 2930 SVN_ERR(svn_fs_x__rev_file_init(&result->revision_file, fs, rev, 2931 result_pool)); 2932 2933 *context = result; 2934 return SVN_NO_ERROR; 2935} 2936 2937svn_error_t * 2938svn_fs_x__get_changes(apr_array_header_t **changes, 2939 svn_fs_x__changes_context_t *context, 2940 apr_pool_t *result_pool, 2941 apr_pool_t *scratch_pool) 2942{ 2943 svn_boolean_t found; 2944 svn_fs_x__data_t *ffd = context->fs->fsap_data; 2945 2946 svn_fs_x__id_t id; 2947 id.change_set = svn_fs_x__change_set_by_rev(context->revision); 2948 id.number = SVN_FS_X__ITEM_INDEX_CHANGES; 2949 2950 /* try cache lookup first */ 2951 2952 if (svn_fs_x__is_packed_rev(context->fs, context->revision)) 2953 { 2954 apr_off_t offset; 2955 svn_fs_x__pair_cache_key_t key; 2956 svn_fs_x__changes_get_list_baton_t baton; 2957 baton.start = (int)context->next; 2958 baton.eol = &context->eol; 2959 2960 SVN_ERR(svn_fs_x__item_offset(&offset, &baton.sub_item, context->fs, 2961 context->revision_file, 2962 &id, scratch_pool)); 2963 key.revision = svn_fs_x__packed_base_rev(context->fs, 2964 context->revision); 2965 key.second = offset; 2966 2967 SVN_ERR(svn_cache__get_partial((void **)changes, &found, 2968 ffd->changes_container_cache, &key, 2969 svn_fs_x__changes_get_list_func, 2970 &baton, result_pool)); 2971 } 2972 else 2973 { 2974 svn_fs_x__changes_list_t *changes_list; 2975 svn_fs_x__pair_cache_key_t key; 2976 key.revision = context->revision; 2977 key.second = context->next; 2978 2979 SVN_ERR(svn_cache__get((void **)&changes_list, &found, 2980 ffd->changes_cache, &key, result_pool)); 2981 2982 if (found) 2983 { 2984 /* Where to look next - if there is more data. */ 2985 context->eol = changes_list->eol; 2986 context->next_offset = changes_list->end_offset; 2987 2988 /* Return the block as a "proper" APR array. */ 2989 (*changes) = apr_array_make(result_pool, 0, sizeof(void *)); 2990 (*changes)->elts = (char *)changes_list->changes; 2991 (*changes)->nelts = changes_list->count; 2992 (*changes)->nalloc = changes_list->count; 2993 } 2994 } 2995 2996 if (!found) 2997 { 2998 /* 'block-read' will also provide us with the desired data */ 2999 SVN_ERR(block_read((void **)changes, context->fs, &id, 3000 context->revision_file, context, 3001 result_pool, scratch_pool)); 3002 } 3003 3004 context->next += (*changes)->nelts; 3005 3006 SVN_ERR(dbg__log_access(context->fs, &id, *changes, 3007 SVN_FS_X__ITEM_TYPE_CHANGES, scratch_pool)); 3008 3009 return SVN_NO_ERROR; 3010} 3011 3012/* Fetch the representation data (header, txdelta / plain windows) 3013 * addressed by ENTRY->ITEM in FS and cache it under KEY. Read the data 3014 * from REV_FILE. If MAX_OFFSET is not -1, don't read windows that start 3015 * at or beyond that offset. Use SCRATCH_POOL for temporary allocations. 3016 */ 3017static svn_error_t * 3018block_read_contents(svn_fs_t *fs, 3019 svn_fs_x__revision_file_t *rev_file, 3020 svn_fs_x__p2l_entry_t* entry, 3021 svn_fs_x__pair_cache_key_t *key, 3022 apr_off_t max_offset, 3023 apr_pool_t *scratch_pool) 3024{ 3025 svn_fs_x__representation_cache_key_t header_key = { 0 }; 3026 rep_state_t rs = { 0 }; 3027 svn_filesize_t fulltext_len; 3028 svn_fs_x__rep_header_t *rep_header; 3029 3030 header_key.revision = (apr_int32_t)key->revision; 3031 header_key.is_packed = svn_fs_x__is_packed_rev(fs, header_key.revision); 3032 header_key.item_index = key->second; 3033 3034 SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &header_key, 3035 scratch_pool)); 3036 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, scratch_pool)); 3037 SVN_ERR(cache_windows(&fulltext_len, fs, &rs, max_offset, scratch_pool)); 3038 3039 return SVN_NO_ERROR; 3040} 3041 3042/* For the given REV_FILE in FS, in *STREAM return a stream covering the 3043 * item specified by ENTRY. Also, verify the item's content by low-level 3044 * checksum. Allocate the result in RESULT_POOL. 3045 */ 3046static svn_error_t * 3047read_item(svn_stream_t **stream, 3048 svn_fs_t *fs, 3049 svn_fs_x__revision_file_t *rev_file, 3050 svn_fs_x__p2l_entry_t* entry, 3051 apr_pool_t *result_pool) 3052{ 3053 apr_uint32_t digest; 3054 svn_checksum_t *expected, *actual; 3055 apr_uint32_t plain_digest; 3056 svn_stringbuf_t *text; 3057 3058 /* Read item into string buffer. */ 3059 text = svn_stringbuf_create_ensure(entry->size, result_pool); 3060 text->len = entry->size; 3061 text->data[text->len] = 0; 3062 SVN_ERR(svn_fs_x__rev_file_read(rev_file, text->data, text->len)); 3063 3064 /* Return (construct, calculate) stream and checksum. */ 3065 *stream = svn_stream_from_stringbuf(text, result_pool); 3066 digest = svn__fnv1a_32x4(text->data, text->len); 3067 3068 /* Checksums will match most of the time. */ 3069 if (entry->fnv1_checksum == digest) 3070 return SVN_NO_ERROR; 3071 3072 /* Construct proper checksum objects from their digests to allow for 3073 * nice error messages. */ 3074 plain_digest = htonl(entry->fnv1_checksum); 3075 expected = svn_checksum__from_digest_fnv1a_32x4( 3076 (const unsigned char *)&plain_digest, result_pool); 3077 plain_digest = htonl(digest); 3078 actual = svn_checksum__from_digest_fnv1a_32x4( 3079 (const unsigned char *)&plain_digest, result_pool); 3080 3081 /* Construct the full error message with all the info we have. */ 3082 return svn_checksum_mismatch_err(expected, actual, result_pool, 3083 _("Low-level checksum mismatch while reading\n" 3084 "%s bytes of meta data at offset %s "), 3085 apr_off_t_toa(result_pool, entry->size), 3086 apr_off_t_toa(result_pool, entry->offset)); 3087} 3088 3089/* If not already cached or if MUST_READ is set, read the changed paths 3090 * list addressed by ENTRY in FS and ret��rn it in *CHANGES. Cache the 3091 * result if caching is enabled. Read the data from REV_FILE. Trim the 3092 * data in *CHANGES to the range given by CONTEXT. Allocate *CHANGES in 3093 * RESUSLT_POOL and allocate temporaries in SCRATCH_POOL. 3094 */ 3095static svn_error_t * 3096block_read_changes(apr_array_header_t **changes, 3097 svn_fs_t *fs, 3098 svn_fs_x__revision_file_t *rev_file, 3099 svn_fs_x__p2l_entry_t* entry, 3100 svn_fs_x__changes_context_t *context, 3101 svn_boolean_t must_read, 3102 apr_pool_t *result_pool, 3103 apr_pool_t *scratch_pool) 3104{ 3105 svn_fs_x__data_t *ffd = fs->fsap_data; 3106 svn_stream_t *stream; 3107 svn_fs_x__pair_cache_key_t key; 3108 svn_fs_x__changes_list_t changes_list; 3109 3110 /* If we don't have to return any data, just read and cache the first 3111 block. This means we won't cache the remaining blocks from longer 3112 lists right away but only if they are actually needed. */ 3113 apr_size_t next = must_read ? context->next : 0; 3114 apr_size_t next_offset = must_read ? context->next_offset : 0; 3115 3116 /* we don't support containers, yet */ 3117 SVN_ERR_ASSERT(entry->item_count == 1); 3118 3119 /* The item to read / write. */ 3120 key.revision = svn_fs_x__get_revnum(entry->items[0].change_set); 3121 key.second = next; 3122 3123 /* already in cache? */ 3124 if (!must_read) 3125 { 3126 svn_boolean_t is_cached = FALSE; 3127 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key, 3128 scratch_pool)); 3129 if (is_cached) 3130 return SVN_NO_ERROR; 3131 } 3132 3133 /* Verify the whole list only once. We don't use the STREAM any further. */ 3134 if (!must_read || next == 0) 3135 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); 3136 3137 /* Seek to the block to read within the changes list. */ 3138 SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, 3139 entry->offset + next_offset)); 3140 SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file)); 3141 3142 /* read changes from revision file */ 3143 SVN_ERR(svn_fs_x__read_changes(changes, stream, SVN_FS_X__CHANGES_BLOCK_SIZE, 3144 result_pool, scratch_pool)); 3145 3146 SVN_ERR(svn_fs_x__rev_file_offset(&changes_list.end_offset, rev_file)); 3147 changes_list.end_offset -= entry->offset; 3148 changes_list.start_offset = next_offset; 3149 changes_list.count = (*changes)->nelts; 3150 changes_list.changes = (svn_fs_x__change_t **)(*changes)->elts; 3151 changes_list.eol = (changes_list.count < SVN_FS_X__CHANGES_BLOCK_SIZE) 3152 || (changes_list.end_offset + 1 >= entry->size); 3153 3154 /* cache for future reference */ 3155 3156 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list, 3157 scratch_pool)); 3158 3159 /* Trim the result: 3160 * Remove the entries that already been reported. */ 3161 if (must_read) 3162 { 3163 context->next_offset = changes_list.end_offset; 3164 context->eol = changes_list.eol; 3165 } 3166 3167 return SVN_NO_ERROR; 3168} 3169 3170/* If not already cached or if MUST_READ is set, read the changed paths 3171 * list container addressed by ENTRY in FS. Return the changes list 3172 * identified by SUB_ITEM in *CHANGES, using CONTEXT to select a sub-range 3173 * within that list. Read the data from REV_FILE and cache the result. 3174 * 3175 * Allocate *CHANGES in RESUSLT_POOL and everything else in SCRATCH_POOL. 3176 */ 3177static svn_error_t * 3178block_read_changes_container(apr_array_header_t **changes, 3179 svn_fs_t *fs, 3180 svn_fs_x__revision_file_t *rev_file, 3181 svn_fs_x__p2l_entry_t* entry, 3182 apr_uint32_t sub_item, 3183 svn_fs_x__changes_context_t *context, 3184 svn_boolean_t must_read, 3185 apr_pool_t *result_pool, 3186 apr_pool_t *scratch_pool) 3187{ 3188 svn_fs_x__data_t *ffd = fs->fsap_data; 3189 svn_fs_x__changes_t *container; 3190 svn_fs_x__pair_cache_key_t key; 3191 svn_stream_t *stream; 3192 svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set); 3193 3194 key.revision = svn_fs_x__packed_base_rev(fs, revision); 3195 key.second = entry->offset; 3196 3197 /* already in cache? */ 3198 if (!must_read) 3199 { 3200 svn_boolean_t is_cached = FALSE; 3201 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_container_cache, 3202 &key, scratch_pool)); 3203 if (is_cached) 3204 return SVN_NO_ERROR; 3205 } 3206 3207 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); 3208 3209 /* read changes from revision file */ 3210 3211 SVN_ERR(svn_fs_x__read_changes_container(&container, stream, scratch_pool, 3212 scratch_pool)); 3213 3214 /* extract requested data */ 3215 3216 if (must_read) 3217 SVN_ERR(svn_fs_x__changes_get_list(changes, container, sub_item, 3218 context, result_pool)); 3219 SVN_ERR(svn_cache__set(ffd->changes_container_cache, &key, container, 3220 scratch_pool)); 3221 3222 return SVN_NO_ERROR; 3223} 3224 3225/* If not already cached or if MUST_READ is set, read the node revision 3226 * addressed by ENTRY in FS and return it in *NODEREV_P. Cache the 3227 * result under KEY if caching is enabled. Read the data from REV_FILE. 3228 * Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in 3229 * SCRATCH_POOL. 3230 */ 3231static svn_error_t * 3232block_read_noderev(svn_fs_x__noderev_t **noderev_p, 3233 svn_fs_t *fs, 3234 svn_fs_x__revision_file_t *rev_file, 3235 svn_fs_x__p2l_entry_t* entry, 3236 svn_fs_x__pair_cache_key_t *key, 3237 svn_boolean_t must_read, 3238 apr_pool_t *result_pool, 3239 apr_pool_t *scratch_pool) 3240{ 3241 svn_fs_x__data_t *ffd = fs->fsap_data; 3242 svn_stream_t *stream; 3243 3244 /* we don't support containers, yet */ 3245 SVN_ERR_ASSERT(entry->item_count == 1); 3246 3247 /* already in cache? */ 3248 if (!must_read) 3249 { 3250 svn_boolean_t is_cached = FALSE; 3251 SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache, key, 3252 scratch_pool)); 3253 if (is_cached) 3254 return SVN_NO_ERROR; 3255 } 3256 3257 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); 3258 3259 /* read node rev from revision file */ 3260 3261 SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream, result_pool, 3262 scratch_pool)); 3263 SVN_ERR(svn_cache__set(ffd->node_revision_cache, key, *noderev_p, 3264 scratch_pool)); 3265 3266 return SVN_NO_ERROR; 3267} 3268 3269/* If not already cached or if MUST_READ is set, read the node revision 3270 * container addressed by ENTRY in FS. Return the item identified by 3271 * SUB_ITEM in *NODEREV_P. Read the data from REV_FILE and cache it. 3272 * Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in 3273 * SCRATCH_POOL. 3274 */ 3275static svn_error_t * 3276block_read_noderevs_container(svn_fs_x__noderev_t **noderev_p, 3277 svn_fs_t *fs, 3278 svn_fs_x__revision_file_t *rev_file, 3279 svn_fs_x__p2l_entry_t* entry, 3280 apr_uint32_t sub_item, 3281 svn_boolean_t must_read, 3282 apr_pool_t *result_pool, 3283 apr_pool_t *scratch_pool) 3284{ 3285 svn_fs_x__data_t *ffd = fs->fsap_data; 3286 svn_fs_x__noderevs_t *container; 3287 svn_stream_t *stream; 3288 svn_fs_x__pair_cache_key_t key; 3289 svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set); 3290 3291 key.revision = svn_fs_x__packed_base_rev(fs, revision); 3292 key.second = entry->offset; 3293 3294 /* already in cache? */ 3295 if (!must_read) 3296 { 3297 svn_boolean_t is_cached = FALSE; 3298 SVN_ERR(svn_cache__has_key(&is_cached, ffd->noderevs_container_cache, 3299 &key, scratch_pool)); 3300 if (is_cached) 3301 return SVN_NO_ERROR; 3302 } 3303 3304 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); 3305 3306 /* read noderevs from revision file */ 3307 SVN_ERR(svn_fs_x__read_noderevs_container(&container, stream, scratch_pool, 3308 scratch_pool)); 3309 3310 /* extract requested data */ 3311 if (must_read) 3312 SVN_ERR(svn_fs_x__noderevs_get(noderev_p, container, sub_item, 3313 result_pool)); 3314 3315 SVN_ERR(svn_cache__set(ffd->noderevs_container_cache, &key, container, 3316 scratch_pool)); 3317 3318 return SVN_NO_ERROR; 3319} 3320 3321/* If not already cached or if MUST_READ is set, read the representation 3322 * container addressed by ENTRY in FS. Return an extractor object for the 3323 * item identified by SUB_ITEM in *EXTRACTOR. Read the data from REV_FILE 3324 * and cache it. Allocate *EXTRACTOR in RESUSLT_POOL and all temporaries 3325 * in SCRATCH_POOL. 3326 */ 3327static svn_error_t * 3328block_read_reps_container(svn_fs_x__rep_extractor_t **extractor, 3329 svn_fs_t *fs, 3330 svn_fs_x__revision_file_t *rev_file, 3331 svn_fs_x__p2l_entry_t* entry, 3332 apr_uint32_t sub_item, 3333 svn_boolean_t must_read, 3334 apr_pool_t *result_pool, 3335 apr_pool_t *scratch_pool) 3336{ 3337 svn_fs_x__data_t *ffd = fs->fsap_data; 3338 svn_fs_x__reps_t *container; 3339 svn_stream_t *stream; 3340 svn_fs_x__pair_cache_key_t key; 3341 svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set); 3342 3343 key.revision = svn_fs_x__packed_base_rev(fs, revision); 3344 key.second = entry->offset; 3345 3346 /* already in cache? */ 3347 if (!must_read) 3348 { 3349 svn_boolean_t is_cached = FALSE; 3350 SVN_ERR(svn_cache__has_key(&is_cached, ffd->reps_container_cache, 3351 &key, scratch_pool)); 3352 if (is_cached) 3353 return SVN_NO_ERROR; 3354 } 3355 3356 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool)); 3357 3358 /* read noderevs from revision file */ 3359 SVN_ERR(svn_fs_x__read_reps_container(&container, stream, result_pool, 3360 scratch_pool)); 3361 3362 /* extract requested data */ 3363 3364 if (must_read) 3365 SVN_ERR(svn_fs_x__reps_get(extractor, fs, container, sub_item, 3366 result_pool)); 3367 3368 SVN_ERR(svn_cache__set(ffd->reps_container_cache, &key, container, 3369 scratch_pool)); 3370 3371 return SVN_NO_ERROR; 3372} 3373 3374/* Read the whole (e.g. 64kB) block containing the item identified by ID in 3375 * FS and put all data into cache. If necessary and depending on heuristics, 3376 * neighboring blocks may also get read. The data is being read from 3377 * already open REVISION_FILE, which must be the correct rev / pack file 3378 * w.r.t. ID->CHANGE_SET. 3379 * 3380 * For noderevs and changed path lists, the item fetched can be allocated 3381 * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL. 3382 * The BATON is passed along to the extractor sub-functions and will be 3383 * used only when constructing the *RESULT. SCRATCH_POOL will be used for 3384 * all temporary allocations. 3385 */ 3386static svn_error_t * 3387block_read(void **result, 3388 svn_fs_t *fs, 3389 const svn_fs_x__id_t *id, 3390 svn_fs_x__revision_file_t *revision_file, 3391 void *baton, 3392 apr_pool_t *result_pool, 3393 apr_pool_t *scratch_pool) 3394{ 3395 svn_fs_x__data_t *ffd = fs->fsap_data; 3396 apr_off_t offset, wanted_offset = 0; 3397 apr_off_t block_start = 0; 3398 apr_uint32_t wanted_sub_item = 0; 3399 svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set); 3400 apr_array_header_t *entries; 3401 int run_count = 0; 3402 int i; 3403 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 3404 3405 /* don't try this on transaction protorev files */ 3406 SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision)); 3407 3408 /* index lookup: find the OFFSET of the item we *must* read plus (in the 3409 * "do-while" block) the list of items in the same block. */ 3410 SVN_ERR(svn_fs_x__item_offset(&wanted_offset, &wanted_sub_item, fs, 3411 revision_file, id, iterpool)); 3412 3413 offset = wanted_offset; 3414 do 3415 { 3416 /* fetch list of items in the block surrounding OFFSET */ 3417 SVN_ERR(svn_fs_x__rev_file_seek(revision_file, &block_start, offset)); 3418 SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, fs, revision_file, 3419 revision, block_start, 3420 ffd->block_size, scratch_pool, 3421 scratch_pool)); 3422 3423 /* read all items from the block */ 3424 for (i = 0; i < entries->nelts; ++i) 3425 { 3426 svn_boolean_t is_result, is_wanted; 3427 apr_pool_t *pool; 3428 3429 svn_fs_x__p2l_entry_t* entry 3430 = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t); 3431 3432 /* skip empty sections */ 3433 if (entry->type == SVN_FS_X__ITEM_TYPE_UNUSED) 3434 continue; 3435 3436 /* the item / container we were looking for? */ 3437 is_wanted = entry->offset == wanted_offset 3438 && entry->item_count >= wanted_sub_item 3439 && svn_fs_x__id_eq(entry->items + wanted_sub_item, id); 3440 is_result = result && is_wanted; 3441 3442 /* select the pool that we want the item to be allocated in */ 3443 pool = is_result ? result_pool : iterpool; 3444 3445 /* handle all items that start within this block and are relatively 3446 * small (i.e. < block size). Always read the item we need to return. 3447 */ 3448 if (is_result || ( entry->offset >= block_start 3449 && entry->size < ffd->block_size)) 3450 { 3451 void *item = NULL; 3452 svn_fs_x__pair_cache_key_t key = { 0 }; 3453 key.revision = svn_fs_x__get_revnum(entry->items[0].change_set); 3454 key.second = entry->items[0].number; 3455 3456 SVN_ERR(svn_fs_x__rev_file_seek(revision_file, NULL, 3457 entry->offset)); 3458 switch (entry->type) 3459 { 3460 case SVN_FS_X__ITEM_TYPE_FILE_REP: 3461 case SVN_FS_X__ITEM_TYPE_DIR_REP: 3462 case SVN_FS_X__ITEM_TYPE_FILE_PROPS: 3463 case SVN_FS_X__ITEM_TYPE_DIR_PROPS: 3464 SVN_ERR(block_read_contents(fs, revision_file, 3465 entry, &key, 3466 is_wanted 3467 ? -1 3468 : block_start + ffd->block_size, 3469 iterpool)); 3470 break; 3471 3472 case SVN_FS_X__ITEM_TYPE_NODEREV: 3473 SVN_ERR(block_read_noderev((svn_fs_x__noderev_t **)&item, 3474 fs, revision_file, 3475 entry, &key, is_result, 3476 pool, iterpool)); 3477 break; 3478 3479 case SVN_FS_X__ITEM_TYPE_CHANGES: 3480 SVN_ERR(block_read_changes((apr_array_header_t **)&item, 3481 fs, revision_file, 3482 entry, baton, is_result, 3483 pool, iterpool)); 3484 break; 3485 3486 case SVN_FS_X__ITEM_TYPE_CHANGES_CONT: 3487 SVN_ERR(block_read_changes_container 3488 ((apr_array_header_t **)&item, 3489 fs, revision_file, 3490 entry, wanted_sub_item, 3491 baton, is_result, 3492 pool, iterpool)); 3493 break; 3494 3495 case SVN_FS_X__ITEM_TYPE_NODEREVS_CONT: 3496 SVN_ERR(block_read_noderevs_container 3497 ((svn_fs_x__noderev_t **)&item, 3498 fs, revision_file, 3499 entry, wanted_sub_item, 3500 is_result, pool, iterpool)); 3501 break; 3502 3503 case SVN_FS_X__ITEM_TYPE_REPS_CONT: 3504 SVN_ERR(block_read_reps_container 3505 ((svn_fs_x__rep_extractor_t **)&item, 3506 fs, revision_file, 3507 entry, wanted_sub_item, 3508 is_result, pool, iterpool)); 3509 break; 3510 3511 default: 3512 break; 3513 } 3514 3515 if (is_result) 3516 *result = item; 3517 3518 /* if we crossed a block boundary, read the remainder of 3519 * the last block as well */ 3520 offset = entry->offset + entry->size; 3521 if (offset - block_start > ffd->block_size) 3522 ++run_count; 3523 3524 svn_pool_clear(iterpool); 3525 } 3526 } 3527 } 3528 while(run_count++ == 1); /* can only be true once and only if a block 3529 * boundary got crossed */ 3530 3531 /* if the caller requested a result, we must have provided one by now */ 3532 assert(!result || *result); 3533 svn_pool_destroy(iterpool); 3534 3535 return SVN_NO_ERROR; 3536} 3537