index.c revision 299742
1/* index.c indexing support for FSFS support 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23#include <assert.h> 24 25#include "svn_io.h" 26#include "svn_pools.h" 27#include "svn_sorts.h" 28 29#include "svn_private_config.h" 30 31#include "private/svn_sorts_private.h" 32#include "private/svn_subr_private.h" 33#include "private/svn_temp_serializer.h" 34 35#include "index.h" 36#include "pack.h" 37#include "temp_serializer.h" 38#include "util.h" 39#include "fs_fs.h" 40 41#include "../libsvn_fs/fs-loader.h" 42 43/* maximum length of a uint64 in an 7/8b encoding */ 44#define ENCODED_INT_LENGTH 10 45 46/* APR is missing an APR_OFF_T_MAX. So, define one. We will use it to 47 * limit file offsets stored in the indexes. 48 * 49 * We assume that everything shorter than 64 bits, it is at least 32 bits. 50 * We also assume that the type is always signed meaning we only have an 51 * effective positive range of 63 or 31 bits, respectively. 52 */ 53static 54const apr_uint64_t off_t_max = (sizeof(apr_off_t) == sizeof(apr_int64_t)) 55 ? APR_INT64_MAX 56 : APR_INT32_MAX; 57 58/* We store P2L proto-index entries as 6 values, 64 bits each on disk. 59 * See also svn_fs_fs__p2l_proto_index_add_entry(). 60 */ 61#define P2L_PROTO_INDEX_ENTRY_SIZE (6 * sizeof(apr_uint64_t)) 62 63/* We put this string in front of the L2P index header. */ 64#define L2P_STREAM_PREFIX "L2P-INDEX\n" 65 66/* We put this string in front of the P2L index header. */ 67#define P2L_STREAM_PREFIX "P2L-INDEX\n" 68 69/* Size of the buffer that will fit the index header prefixes. */ 70#define STREAM_PREFIX_LEN MAX(sizeof(L2P_STREAM_PREFIX), \ 71 sizeof(P2L_STREAM_PREFIX)) 72 73/* Page tables in the log-to-phys index file exclusively contain entries 74 * of this type to describe position and size of a given page. 75 */ 76typedef struct l2p_page_table_entry_t 77{ 78 /* global offset on the page within the index file */ 79 apr_uint64_t offset; 80 81 /* number of mapping entries in that page */ 82 apr_uint32_t entry_count; 83 84 /* size of the page on disk (in the index file) */ 85 apr_uint32_t size; 86} l2p_page_table_entry_t; 87 88/* Master run-time data structure of an log-to-phys index. It contains 89 * the page tables of every revision covered by that index - but not the 90 * pages themselves. 91 */ 92typedef struct l2p_header_t 93{ 94 /* first revision covered by this index */ 95 svn_revnum_t first_revision; 96 97 /* number of revisions covered */ 98 apr_size_t revision_count; 99 100 /* (max) number of entries per page */ 101 apr_uint32_t page_size; 102 103 /* indexes into PAGE_TABLE that mark the first page of the respective 104 * revision. PAGE_TABLE_INDEX[REVISION_COUNT] points to the end of 105 * PAGE_TABLE. 106 */ 107 apr_size_t * page_table_index; 108 109 /* Page table covering all pages in the index */ 110 l2p_page_table_entry_t * page_table; 111} l2p_header_t; 112 113/* Run-time data structure containing a single log-to-phys index page. 114 */ 115typedef struct l2p_page_t 116{ 117 /* number of entries in the OFFSETS array */ 118 apr_uint32_t entry_count; 119 120 /* global file offsets (item index is the array index) within the 121 * packed or non-packed rev file. Offset will be -1 for unused / 122 * invalid item index values. */ 123 apr_uint64_t *offsets; 124} l2p_page_t; 125 126/* All of the log-to-phys proto index file consist of entries of this type. 127 */ 128typedef struct l2p_proto_entry_t 129{ 130 /* phys offset + 1 of the data container. 0 for "new revision" entries. */ 131 apr_uint64_t offset; 132 133 /* corresponding item index. 0 for "new revision" entries. */ 134 apr_uint64_t item_index; 135} l2p_proto_entry_t; 136 137/* Master run-time data structure of an phys-to-log index. It contains 138 * an array with one offset value for each rev file cluster. 139 */ 140typedef struct p2l_header_t 141{ 142 /* first revision covered by the index (and rev file) */ 143 svn_revnum_t first_revision; 144 145 /* number of bytes in the rev files covered by each p2l page */ 146 apr_uint64_t page_size; 147 148 /* number of pages / clusters in that rev file */ 149 apr_size_t page_count; 150 151 /* number of bytes in the rev file */ 152 apr_uint64_t file_size; 153 154 /* offsets of the pages / cluster descriptions within the index file */ 155 apr_off_t *offsets; 156} p2l_header_t; 157 158/* 159 * packed stream 160 * 161 * This is a utility object that will read files containing 7b/8b encoded 162 * unsigned integers. It decodes them in batches to minimize overhead 163 * and supports random access to random file locations. 164 */ 165 166/* How many numbers we will pre-fetch and buffer in a packed number stream. 167 */ 168enum { MAX_NUMBER_PREFETCH = 64 }; 169 170/* Prefetched number entry in a packed number stream. 171 */ 172typedef struct value_position_pair_t 173{ 174 /* prefetched number */ 175 apr_uint64_t value; 176 177 /* number of bytes read, *including* this number, since the buffer start */ 178 apr_size_t total_len; 179} value_position_pair_t; 180 181/* State of a prefetching packed number stream. It will read compressed 182 * index data efficiently and present it as a series of non-packed uint64. 183 */ 184struct svn_fs_fs__packed_number_stream_t 185{ 186 /* underlying data file containing the packed values */ 187 apr_file_t *file; 188 189 /* Offset within FILE at which the stream data starts 190 * (i.e. which offset will reported as offset 0 by packed_stream_offset). */ 191 apr_off_t stream_start; 192 193 /* First offset within FILE after the stream data. 194 * Attempts to read beyond this will cause an "Unexpected End Of Stream" 195 * error. */ 196 apr_off_t stream_end; 197 198 /* number of used entries in BUFFER (starting at index 0) */ 199 apr_size_t used; 200 201 /* index of the next number to read from the BUFFER (0 .. USED). 202 * If CURRENT == USED, we need to read more data upon get() */ 203 apr_size_t current; 204 205 /* offset in FILE from which the first entry in BUFFER has been read */ 206 apr_off_t start_offset; 207 208 /* offset in FILE from which the next number has to be read */ 209 apr_off_t next_offset; 210 211 /* read the file in chunks of this size */ 212 apr_size_t block_size; 213 214 /* pool to be used for file ops etc. */ 215 apr_pool_t *pool; 216 217 /* buffer for prefetched values */ 218 value_position_pair_t buffer[MAX_NUMBER_PREFETCH]; 219}; 220 221/* Return an svn_error_t * object for error ERR on STREAM with the given 222 * MESSAGE string. The latter must have a placeholder for the index file 223 * name ("%s") and the current read offset (e.g. "0x%lx"). 224 */ 225static svn_error_t * 226stream_error_create(svn_fs_fs__packed_number_stream_t *stream, 227 apr_status_t err, 228 const char *message) 229{ 230 const char *file_name; 231 apr_off_t offset; 232 SVN_ERR(svn_io_file_name_get(&file_name, stream->file, 233 stream->pool)); 234 SVN_ERR(svn_fs_fs__get_file_offset(&offset, stream->file, stream->pool)); 235 236 return svn_error_createf(err, NULL, message, file_name, 237 apr_psprintf(stream->pool, 238 "%" APR_UINT64_T_HEX_FMT, 239 (apr_uint64_t)offset)); 240} 241 242/* Read up to MAX_NUMBER_PREFETCH numbers from the STREAM->NEXT_OFFSET in 243 * STREAM->FILE and buffer them. 244 * 245 * We don't want GCC and others to inline this (infrequently called) 246 * function into packed_stream_get() because it prevents the latter from 247 * being inlined itself. 248 */ 249SVN__PREVENT_INLINE 250static svn_error_t * 251packed_stream_read(svn_fs_fs__packed_number_stream_t *stream) 252{ 253 unsigned char buffer[MAX_NUMBER_PREFETCH]; 254 apr_size_t read = 0; 255 apr_size_t i; 256 value_position_pair_t *target; 257 apr_off_t block_start = 0; 258 apr_off_t block_left = 0; 259 apr_status_t err; 260 261 /* all buffered data will have been read starting here */ 262 stream->start_offset = stream->next_offset; 263 264 /* packed numbers are usually not aligned to MAX_NUMBER_PREFETCH blocks, 265 * i.e. the last number has been incomplete (and not buffered in stream) 266 * and need to be re-read. Therefore, always correct the file pointer. 267 */ 268 SVN_ERR(svn_io_file_aligned_seek(stream->file, stream->block_size, 269 &block_start, stream->next_offset, 270 stream->pool)); 271 272 /* prefetch at least one number but, if feasible, don't cross block 273 * boundaries. This shall prevent jumping back and forth between two 274 * blocks because the extra data was not actually request _now_. 275 */ 276 read = sizeof(buffer); 277 block_left = stream->block_size - (stream->next_offset - block_start); 278 if (block_left >= 10 && block_left < read) 279 read = (apr_size_t)block_left; 280 281 /* Don't read beyond the end of the file section that belongs to this 282 * index / stream. */ 283 read = (apr_size_t)MIN(read, stream->stream_end - stream->next_offset); 284 285 err = apr_file_read(stream->file, buffer, &read); 286 if (err && !APR_STATUS_IS_EOF(err)) 287 return stream_error_create(stream, err, 288 _("Can't read index file '%s' at offset 0x%s")); 289 290 /* if the last number is incomplete, trim it from the buffer */ 291 while (read > 0 && buffer[read-1] >= 0x80) 292 --read; 293 294 /* we call read() only if get() requires more data. So, there must be 295 * at least *one* further number. */ 296 if SVN__PREDICT_FALSE(read == 0) 297 return stream_error_create(stream, err, 298 _("Unexpected end of index file %s at offset 0x%s")); 299 300 /* parse file buffer and expand into stream buffer */ 301 target = stream->buffer; 302 for (i = 0; i < read;) 303 { 304 if (buffer[i] < 0x80) 305 { 306 /* numbers < 128 are relatively frequent and particularly easy 307 * to decode. Give them special treatment. */ 308 target->value = buffer[i]; 309 ++i; 310 target->total_len = i; 311 ++target; 312 } 313 else 314 { 315 apr_uint64_t value = 0; 316 apr_uint64_t shift = 0; 317 while (buffer[i] >= 0x80) 318 { 319 value += ((apr_uint64_t)buffer[i] & 0x7f) << shift; 320 shift += 7; 321 ++i; 322 } 323 324 target->value = value + ((apr_uint64_t)buffer[i] << shift); 325 ++i; 326 target->total_len = i; 327 ++target; 328 329 /* let's catch corrupted data early. It would surely cause 330 * havoc further down the line. */ 331 if SVN__PREDICT_FALSE(shift > 8 * sizeof(value)) 332 return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 333 _("Corrupt index: number too large")); 334 } 335 } 336 337 /* update stream state */ 338 stream->used = target - stream->buffer; 339 stream->next_offset = stream->start_offset + i; 340 stream->current = 0; 341 342 return SVN_NO_ERROR; 343} 344 345/* Create and open a packed number stream reading from offsets START to 346 * END in FILE and return it in *STREAM. Access the file in chunks of 347 * BLOCK_SIZE bytes. Expect the stream to be prefixed by STREAM_PREFIX. 348 * Allocate *STREAM in RESULT_POOL and use SCRATCH_POOL for temporaries. 349 */ 350static svn_error_t * 351packed_stream_open(svn_fs_fs__packed_number_stream_t **stream, 352 apr_file_t *file, 353 apr_off_t start, 354 apr_off_t end, 355 const char *stream_prefix, 356 apr_size_t block_size, 357 apr_pool_t *result_pool, 358 apr_pool_t *scratch_pool) 359{ 360 char buffer[STREAM_PREFIX_LEN + 1] = { 0 }; 361 apr_size_t len = strlen(stream_prefix); 362 svn_fs_fs__packed_number_stream_t *result; 363 364 /* If this is violated, we forgot to adjust STREAM_PREFIX_LEN after 365 * changing the index header prefixes. */ 366 SVN_ERR_ASSERT(len < sizeof(buffer)); 367 368 /* Read the header prefix and compare it with the expected prefix */ 369 SVN_ERR(svn_io_file_aligned_seek(file, block_size, NULL, start, 370 scratch_pool)); 371 SVN_ERR(svn_io_file_read_full2(file, buffer, len, NULL, NULL, 372 scratch_pool)); 373 374 if (strncmp(buffer, stream_prefix, len)) 375 return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 376 _("Index stream header prefix mismatch.\n" 377 " expected: %s" 378 " found: %s"), stream_prefix, buffer); 379 380 /* Construct the actual stream object. */ 381 result = apr_palloc(result_pool, sizeof(*result)); 382 383 result->pool = result_pool; 384 result->file = file; 385 result->stream_start = start + len; 386 result->stream_end = end; 387 388 result->used = 0; 389 result->current = 0; 390 result->start_offset = result->stream_start; 391 result->next_offset = result->stream_start; 392 result->block_size = block_size; 393 394 *stream = result; 395 396 return SVN_NO_ERROR; 397} 398 399/* 400 * The forced inline is required for performance reasons: This is a very 401 * hot code path (called for every item we read) but e.g. GCC would rather 402 * chose to inline packed_stream_read() here, preventing packed_stream_get 403 * from being inlined itself. 404 */ 405SVN__FORCE_INLINE 406static svn_error_t* 407packed_stream_get(apr_uint64_t *value, 408 svn_fs_fs__packed_number_stream_t *stream) 409{ 410 if (stream->current == stream->used) 411 SVN_ERR(packed_stream_read(stream)); 412 413 *value = stream->buffer[stream->current].value; 414 ++stream->current; 415 416 return SVN_NO_ERROR; 417} 418 419/* Navigate STREAM to packed stream offset OFFSET. There will be no checks 420 * whether the given OFFSET is valid. 421 */ 422static void 423packed_stream_seek(svn_fs_fs__packed_number_stream_t *stream, 424 apr_off_t offset) 425{ 426 apr_off_t file_offset = offset + stream->stream_start; 427 428 if ( stream->used == 0 429 || offset < stream->start_offset 430 || offset >= stream->next_offset) 431 { 432 /* outside buffered data. Next get() will read() from OFFSET. */ 433 stream->start_offset = file_offset; 434 stream->next_offset = file_offset; 435 stream->current = 0; 436 stream->used = 0; 437 } 438 else 439 { 440 /* Find the suitable location in the stream buffer. 441 * Since our buffer is small, it is efficient enough to simply scan 442 * it for the desired position. */ 443 apr_size_t i; 444 for (i = 0; i < stream->used; ++i) 445 if (stream->buffer[i].total_len > file_offset - stream->start_offset) 446 break; 447 448 stream->current = i; 449 } 450} 451 452/* Return the packed stream offset of at which the next number in the stream 453 * can be found. 454 */ 455static apr_off_t 456packed_stream_offset(svn_fs_fs__packed_number_stream_t *stream) 457{ 458 apr_off_t file_offset 459 = stream->current == 0 460 ? stream->start_offset 461 : stream->buffer[stream->current-1].total_len + stream->start_offset; 462 463 return file_offset - stream->stream_start; 464} 465 466/* Encode VALUE as 7/8b into P and return the number of bytes written. 467 * This will be used when _writing_ packed data. packed_stream_* is for 468 * read operations only. 469 */ 470static apr_size_t 471encode_uint(unsigned char *p, apr_uint64_t value) 472{ 473 unsigned char *start = p; 474 while (value >= 0x80) 475 { 476 *p = (unsigned char)((value % 0x80) + 0x80); 477 value /= 0x80; 478 ++p; 479 } 480 481 *p = (unsigned char)(value % 0x80); 482 return (p - start) + 1; 483} 484 485/* Encode VALUE as 7/8b into P and return the number of bytes written. 486 * This maps signed ints onto unsigned ones. 487 */ 488static apr_size_t 489encode_int(unsigned char *p, apr_int64_t value) 490{ 491 return encode_uint(p, (apr_uint64_t)(value < 0 ? -1 - 2*value : 2*value)); 492} 493 494/* Append VALUE to STREAM in 7/8b encoding. 495 */ 496static svn_error_t * 497stream_write_encoded(svn_stream_t *stream, 498 apr_uint64_t value) 499{ 500 unsigned char encoded[ENCODED_INT_LENGTH]; 501 502 apr_size_t len = encode_uint(encoded, value); 503 return svn_error_trace(svn_stream_write(stream, (char *)encoded, &len)); 504} 505 506/* Map unsigned VALUE back to signed integer. 507 */ 508static apr_int64_t 509decode_int(apr_uint64_t value) 510{ 511 return (apr_int64_t)(value % 2 ? -1 - value / 2 : value / 2); 512} 513 514/* Write VALUE to the PROTO_INDEX file, using SCRATCH_POOL for temporary 515 * allocations. 516 * 517 * The point of this function is to ensure an architecture-independent 518 * proto-index file format. All data is written as unsigned 64 bits ints 519 * in little endian byte order. 64 bits is the largest portable integer 520 * we have and unsigned values have well-defined conversions in C. 521 */ 522static svn_error_t * 523write_uint64_to_proto_index(apr_file_t *proto_index, 524 apr_uint64_t value, 525 apr_pool_t *scratch_pool) 526{ 527 apr_byte_t buffer[sizeof(value)]; 528 int i; 529 apr_size_t written; 530 531 /* Split VALUE into 8 bytes using LE ordering. */ 532 for (i = 0; i < sizeof(buffer); ++i) 533 { 534 /* Unsigned conversions are well-defined ... */ 535 buffer[i] = (apr_byte_t)value; 536 value >>= CHAR_BIT; 537 } 538 539 /* Write it all to disk. */ 540 SVN_ERR(svn_io_file_write_full(proto_index, buffer, sizeof(buffer), 541 &written, scratch_pool)); 542 SVN_ERR_ASSERT(written == sizeof(buffer)); 543 544 return SVN_NO_ERROR; 545} 546 547/* Read one unsigned 64 bit value from PROTO_INDEX file and return it in 548 * *VALUE_P. If EOF is NULL, error out when trying to read beyond EOF. 549 * Use SCRATCH_POOL for temporary allocations. 550 * 551 * This function is the inverse to write_uint64_to_proto_index (see there), 552 * reading the external LE byte order and convert it into host byte order. 553 */ 554static svn_error_t * 555read_uint64_from_proto_index(apr_file_t *proto_index, 556 apr_uint64_t *value_p, 557 svn_boolean_t *eof, 558 apr_pool_t *scratch_pool) 559{ 560 apr_byte_t buffer[sizeof(*value_p)]; 561 apr_size_t read; 562 563 /* Read the full 8 bytes or our 64 bit value, unless we hit EOF. 564 * Assert that we never read partial values. */ 565 SVN_ERR(svn_io_file_read_full2(proto_index, buffer, sizeof(buffer), 566 &read, eof, scratch_pool)); 567 SVN_ERR_ASSERT((eof && *eof) || read == sizeof(buffer)); 568 569 /* If we did not hit EOF, reconstruct the uint64 value and return it. */ 570 if (!eof || !*eof) 571 { 572 int i; 573 apr_uint64_t value; 574 575 /* This could only overflow if CHAR_BIT had a value that is not 576 * a divisor of 64. */ 577 value = 0; 578 for (i = sizeof(buffer) - 1; i >= 0; --i) 579 value = (value << CHAR_BIT) + buffer[i]; 580 581 *value_p = value; 582 } 583 584 return SVN_NO_ERROR; 585} 586 587/* Convenience function similar to read_uint64_from_proto_index, but returns 588 * an uint32 value in VALUE_P. Return an error if the value does not fit. 589 */ 590static svn_error_t * 591read_uint32_from_proto_index(apr_file_t *proto_index, 592 apr_uint32_t *value_p, 593 svn_boolean_t *eof, 594 apr_pool_t *scratch_pool) 595{ 596 apr_uint64_t value; 597 SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof, 598 scratch_pool)); 599 if (!eof || !*eof) 600 { 601 if (value > APR_UINT32_MAX) 602 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL, 603 _("UINT32 0x%s too large, max = 0x%s"), 604 apr_psprintf(scratch_pool, 605 "%" APR_UINT64_T_HEX_FMT, 606 value), 607 apr_psprintf(scratch_pool, 608 "%" APR_UINT64_T_HEX_FMT, 609 (apr_uint64_t)APR_UINT32_MAX)); 610 611 /* This conversion is not lossy because the value can be represented 612 * in the target type. */ 613 *value_p = (apr_uint32_t)value; 614 } 615 616 return SVN_NO_ERROR; 617} 618 619/* Convenience function similar to read_uint64_from_proto_index, but returns 620 * an off_t value in VALUE_P. Return an error if the value does not fit. 621 */ 622static svn_error_t * 623read_off_t_from_proto_index(apr_file_t *proto_index, 624 apr_off_t *value_p, 625 svn_boolean_t *eof, 626 apr_pool_t *scratch_pool) 627{ 628 apr_uint64_t value; 629 SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof, 630 scratch_pool)); 631 if (!eof || !*eof) 632 { 633 if (value > off_t_max) 634 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL, 635 _("File offset 0x%s too large, max = 0x%s"), 636 apr_psprintf(scratch_pool, 637 "%" APR_UINT64_T_HEX_FMT, 638 value), 639 apr_psprintf(scratch_pool, 640 "%" APR_UINT64_T_HEX_FMT, 641 off_t_max)); 642 643 /* Shortening conversion from unsigned to signed int is well-defined 644 * and not lossy in C because the value can be represented in the 645 * target type. */ 646 *value_p = (apr_off_t)value; 647 } 648 649 return SVN_NO_ERROR; 650} 651 652/* 653 * log-to-phys index 654 */ 655 656/* Append ENTRY to log-to-phys PROTO_INDEX file. 657 * Use SCRATCH_POOL for temporary allocations. 658 */ 659static svn_error_t * 660write_l2p_entry_to_proto_index(apr_file_t *proto_index, 661 l2p_proto_entry_t entry, 662 apr_pool_t *scratch_pool) 663{ 664 SVN_ERR(write_uint64_to_proto_index(proto_index, entry.offset, 665 scratch_pool)); 666 SVN_ERR(write_uint64_to_proto_index(proto_index, entry.item_index, 667 scratch_pool)); 668 669 return SVN_NO_ERROR; 670} 671 672/* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file 673 * in *EOF, or error out in that case if EOF is NULL. *ENTRY is in an 674 * undefined state if an end-of-file occurred. 675 * Use SCRATCH_POOL for temporary allocations. 676 */ 677static svn_error_t * 678read_l2p_entry_from_proto_index(apr_file_t *proto_index, 679 l2p_proto_entry_t *entry, 680 svn_boolean_t *eof, 681 apr_pool_t *scratch_pool) 682{ 683 SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->offset, eof, 684 scratch_pool)); 685 SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->item_index, eof, 686 scratch_pool)); 687 688 return SVN_NO_ERROR; 689} 690 691/* Write the log-2-phys index page description for the l2p_page_entry_t 692 * array ENTRIES, starting with element START up to but not including END. 693 * Write the resulting representation into BUFFER. Use SCRATCH_POOL for 694 * temporary allocations. 695 */ 696static svn_error_t * 697encode_l2p_page(apr_array_header_t *entries, 698 int start, 699 int end, 700 svn_spillbuf_t *buffer, 701 apr_pool_t *scratch_pool) 702{ 703 unsigned char encoded[ENCODED_INT_LENGTH]; 704 int i; 705 const apr_uint64_t *values = (const apr_uint64_t *)entries->elts; 706 apr_uint64_t last_value = 0; 707 708 /* encode items */ 709 for (i = start; i < end; ++i) 710 { 711 apr_int64_t diff = values[i] - last_value; 712 last_value = values[i]; 713 SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, 714 encode_int(encoded, diff), scratch_pool)); 715 } 716 717 return SVN_NO_ERROR; 718} 719 720svn_error_t * 721svn_fs_fs__l2p_proto_index_open(apr_file_t **proto_index, 722 const char *file_name, 723 apr_pool_t *result_pool) 724{ 725 SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE 726 | APR_CREATE | APR_APPEND | APR_BUFFERED, 727 APR_OS_DEFAULT, result_pool)); 728 729 return SVN_NO_ERROR; 730} 731 732svn_error_t * 733svn_fs_fs__l2p_proto_index_add_revision(apr_file_t *proto_index, 734 apr_pool_t *scratch_pool) 735{ 736 l2p_proto_entry_t entry; 737 entry.offset = 0; 738 entry.item_index = 0; 739 740 return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry, 741 scratch_pool)); 742} 743 744svn_error_t * 745svn_fs_fs__l2p_proto_index_add_entry(apr_file_t *proto_index, 746 apr_off_t offset, 747 apr_uint64_t item_index, 748 apr_pool_t *scratch_pool) 749{ 750 l2p_proto_entry_t entry; 751 752 /* make sure the conversion to uint64 works */ 753 SVN_ERR_ASSERT(offset >= -1); 754 755 /* we support offset '-1' as a "not used" indication */ 756 entry.offset = (apr_uint64_t)offset + 1; 757 758 /* make sure we can use item_index as an array index when building the 759 * final index file */ 760 SVN_ERR_ASSERT(item_index < UINT_MAX / 2); 761 entry.item_index = item_index; 762 763 return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry, 764 scratch_pool)); 765} 766 767svn_error_t * 768svn_fs_fs__l2p_index_append(svn_checksum_t **checksum, 769 svn_fs_t *fs, 770 apr_file_t *index_file, 771 const char *proto_file_name, 772 svn_revnum_t revision, 773 apr_pool_t * result_pool, 774 apr_pool_t *scratch_pool) 775{ 776 fs_fs_data_t *ffd = fs->fsap_data; 777 apr_file_t *proto_index = NULL; 778 svn_stream_t *stream; 779 int i; 780 apr_uint64_t entry; 781 svn_boolean_t eof = FALSE; 782 783 int last_page_count = 0; /* total page count at the start of 784 the current revision */ 785 786 /* temporary data structures that collect the data which will be moved 787 to the target file in a second step */ 788 apr_pool_t *local_pool = svn_pool_create(scratch_pool); 789 apr_pool_t *iterpool = svn_pool_create(local_pool); 790 apr_array_header_t *page_counts 791 = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); 792 apr_array_header_t *page_sizes 793 = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); 794 apr_array_header_t *entry_counts 795 = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); 796 797 /* collect the item offsets and sub-item value for the current revision */ 798 apr_array_header_t *entries 799 = apr_array_make(local_pool, 256, sizeof(apr_uint64_t)); 800 801 /* 64k blocks, spill after 16MB */ 802 svn_spillbuf_t *buffer 803 = svn_spillbuf__create(0x10000, 0x1000000, local_pool); 804 805 /* Paranoia check that makes later casting to int32 safe. 806 * The current implementation is limited to 2G entries per page. */ 807 if (ffd->l2p_page_size > APR_INT32_MAX) 808 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL, 809 _("L2P index page size %s" 810 " exceeds current limit of 2G entries"), 811 apr_psprintf(local_pool, "%" APR_UINT64_T_FMT, 812 ffd->l2p_page_size)); 813 814 /* start at the beginning of the source file */ 815 SVN_ERR(svn_io_file_open(&proto_index, proto_file_name, 816 APR_READ | APR_CREATE | APR_BUFFERED, 817 APR_OS_DEFAULT, scratch_pool)); 818 819 /* process all entries until we fail due to EOF */ 820 for (entry = 0; !eof; ++entry) 821 { 822 l2p_proto_entry_t proto_entry; 823 824 /* (attempt to) read the next entry from the source */ 825 SVN_ERR(read_l2p_entry_from_proto_index(proto_index, &proto_entry, 826 &eof, local_pool)); 827 828 /* handle new revision */ 829 if ((entry > 0 && proto_entry.offset == 0) || eof) 830 { 831 /* dump entries, grouped into pages */ 832 833 int entry_count = 0; 834 for (i = 0; i < entries->nelts; i += entry_count) 835 { 836 /* 1 page with up to L2P_PAGE_SIZE entries. 837 * fsfs.conf settings validation guarantees this to fit into 838 * our address space. */ 839 apr_uint64_t last_buffer_size 840 = (apr_uint64_t)svn_spillbuf__get_size(buffer); 841 842 svn_pool_clear(iterpool); 843 844 entry_count = ffd->l2p_page_size < entries->nelts - i 845 ? (int)ffd->l2p_page_size 846 : entries->nelts - i; 847 SVN_ERR(encode_l2p_page(entries, i, i + entry_count, 848 buffer, iterpool)); 849 850 APR_ARRAY_PUSH(entry_counts, apr_uint64_t) = entry_count; 851 APR_ARRAY_PUSH(page_sizes, apr_uint64_t) 852 = svn_spillbuf__get_size(buffer) - last_buffer_size; 853 } 854 855 apr_array_clear(entries); 856 857 /* store the number of pages in this revision */ 858 APR_ARRAY_PUSH(page_counts, apr_uint64_t) 859 = page_sizes->nelts - last_page_count; 860 861 last_page_count = page_sizes->nelts; 862 } 863 else 864 { 865 int idx; 866 867 /* store the mapping in our array */ 868 if (proto_entry.item_index > APR_INT32_MAX) 869 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL, 870 _("Item index %s too large " 871 "in l2p proto index for revision %ld"), 872 apr_psprintf(local_pool, "%" APR_UINT64_T_FMT, 873 proto_entry.item_index), 874 revision + page_counts->nelts); 875 876 idx = (int)proto_entry.item_index; 877 while (idx >= entries->nelts) 878 APR_ARRAY_PUSH(entries, apr_uint64_t) = 0; 879 880 APR_ARRAY_IDX(entries, idx, apr_uint64_t) = proto_entry.offset; 881 } 882 } 883 884 /* close the source file */ 885 SVN_ERR(svn_io_file_close(proto_index, local_pool)); 886 887 /* Paranoia check that makes later casting to int32 safe. 888 * The current implementation is limited to 2G pages per index. */ 889 if (page_counts->nelts > APR_INT32_MAX) 890 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL, 891 _("L2P index page count %d" 892 " exceeds current limit of 2G pages"), 893 page_counts->nelts); 894 895 /* open target stream. */ 896 stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE, 897 local_pool), 898 NULL, checksum, svn_checksum_md5, FALSE, 899 result_pool); 900 901 902 /* write header info */ 903 SVN_ERR(svn_stream_puts(stream, L2P_STREAM_PREFIX)); 904 SVN_ERR(stream_write_encoded(stream, revision)); 905 SVN_ERR(stream_write_encoded(stream, ffd->l2p_page_size)); 906 SVN_ERR(stream_write_encoded(stream, page_counts->nelts)); 907 SVN_ERR(stream_write_encoded(stream, page_sizes->nelts)); 908 909 /* write the revision table */ 910 for (i = 0; i < page_counts->nelts; ++i) 911 { 912 apr_uint64_t value = APR_ARRAY_IDX(page_counts, i, apr_uint64_t); 913 SVN_ERR(stream_write_encoded(stream, value)); 914 } 915 916 /* write the page table */ 917 for (i = 0; i < page_sizes->nelts; ++i) 918 { 919 apr_uint64_t value = APR_ARRAY_IDX(page_sizes, i, apr_uint64_t); 920 SVN_ERR(stream_write_encoded(stream, value)); 921 value = APR_ARRAY_IDX(entry_counts, i, apr_uint64_t); 922 SVN_ERR(stream_write_encoded(stream, value)); 923 } 924 925 /* append page contents and implicitly close STREAM */ 926 SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool), 927 stream, NULL, NULL, local_pool)); 928 929 svn_pool_destroy(local_pool); 930 931 return SVN_NO_ERROR; 932} 933 934/* If REV_FILE->L2P_STREAM is NULL, create a new stream for the log-to-phys 935 * index for REVISION in FS and return it in REV_FILE. 936 */ 937static svn_error_t * 938auto_open_l2p_index(svn_fs_fs__revision_file_t *rev_file, 939 svn_fs_t *fs, 940 svn_revnum_t revision) 941{ 942 if (rev_file->l2p_stream == NULL) 943 { 944 fs_fs_data_t *ffd = fs->fsap_data; 945 946 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); 947 SVN_ERR(packed_stream_open(&rev_file->l2p_stream, 948 rev_file->file, 949 rev_file->l2p_offset, 950 rev_file->p2l_offset, 951 L2P_STREAM_PREFIX, 952 (apr_size_t)ffd->block_size, 953 rev_file->pool, 954 rev_file->pool)); 955 } 956 957 return SVN_NO_ERROR; 958} 959 960/* Read the header data structure of the log-to-phys index for REVISION 961 * in FS and return it in *HEADER, allocated in RESULT_POOL. Use REV_FILE 962 * to access on-disk data. Use SCRATCH_POOL for temporary allocations. 963 */ 964static svn_error_t * 965get_l2p_header_body(l2p_header_t **header, 966 svn_fs_fs__revision_file_t *rev_file, 967 svn_fs_t *fs, 968 svn_revnum_t revision, 969 apr_pool_t *result_pool, 970 apr_pool_t *scratch_pool) 971{ 972 fs_fs_data_t *ffd = fs->fsap_data; 973 apr_uint64_t value; 974 apr_size_t i; 975 apr_size_t page, page_count; 976 apr_off_t offset; 977 l2p_header_t *result = apr_pcalloc(result_pool, sizeof(*result)); 978 apr_size_t page_table_index; 979 svn_revnum_t next_rev; 980 981 pair_cache_key_t key; 982 key.revision = rev_file->start_revision; 983 key.second = rev_file->is_packed; 984 985 SVN_ERR(auto_open_l2p_index(rev_file, fs, revision)); 986 packed_stream_seek(rev_file->l2p_stream, 0); 987 988 /* Read the table sizes. Check the data for plausibility and 989 * consistency with other bits. */ 990 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 991 result->first_revision = (svn_revnum_t)value; 992 if (result->first_revision != rev_file->start_revision) 993 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 994 _("Index rev / pack file revision numbers do not match")); 995 996 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 997 result->page_size = (apr_uint32_t)value; 998 if (!result->page_size || (result->page_size & (result->page_size - 1))) 999 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1000 _("L2P index page size is not a power of two")); 1001 1002 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 1003 result->revision_count = (int)value; 1004 if ( result->revision_count != 1 1005 && result->revision_count != (apr_uint64_t)ffd->max_files_per_dir) 1006 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1007 _("Invalid number of revisions in L2P index")); 1008 1009 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 1010 page_count = (apr_size_t)value; 1011 if (page_count < result->revision_count) 1012 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1013 _("Fewer L2P index pages than revisions")); 1014 if (page_count > (rev_file->p2l_offset - rev_file->l2p_offset) / 2) 1015 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1016 _("L2P index page count implausibly large")); 1017 1018 next_rev = result->first_revision + (svn_revnum_t)result->revision_count; 1019 if (result->first_revision > revision || next_rev <= revision) 1020 return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1021 _("Corrupt L2P index for r%ld only covers r%ld:%ld"), 1022 revision, result->first_revision, next_rev); 1023 1024 /* allocate the page tables */ 1025 result->page_table 1026 = apr_pcalloc(result_pool, page_count * sizeof(*result->page_table)); 1027 result->page_table_index 1028 = apr_pcalloc(result_pool, (result->revision_count + 1) 1029 * sizeof(*result->page_table_index)); 1030 1031 /* read per-revision page table sizes (i.e. number of pages per rev) */ 1032 page_table_index = 0; 1033 result->page_table_index[0] = page_table_index; 1034 1035 for (i = 0; i < result->revision_count; ++i) 1036 { 1037 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 1038 if (value == 0) 1039 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1040 _("Revision with no L2P index pages")); 1041 1042 page_table_index += (apr_size_t)value; 1043 if (page_table_index > page_count) 1044 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1045 _("L2P page table exceeded")); 1046 1047 result->page_table_index[i+1] = page_table_index; 1048 } 1049 1050 if (page_table_index != page_count) 1051 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1052 _("Revisions do not cover the full L2P index page table")); 1053 1054 /* read actual page tables */ 1055 for (page = 0; page < page_count; ++page) 1056 { 1057 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 1058 if (value == 0) 1059 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1060 _("Empty L2P index page")); 1061 1062 result->page_table[page].size = (apr_uint32_t)value; 1063 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 1064 if (value > result->page_size) 1065 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1066 _("Page exceeds L2P index page size")); 1067 1068 result->page_table[page].entry_count = (apr_uint32_t)value; 1069 } 1070 1071 /* correct the page description offsets */ 1072 offset = packed_stream_offset(rev_file->l2p_stream); 1073 for (page = 0; page < page_count; ++page) 1074 { 1075 result->page_table[page].offset = offset; 1076 offset += result->page_table[page].size; 1077 } 1078 1079 /* return and cache the header */ 1080 *header = result; 1081 SVN_ERR(svn_cache__set(ffd->l2p_header_cache, &key, result, scratch_pool)); 1082 1083 return SVN_NO_ERROR; 1084} 1085 1086/* Data structure that describes which l2p page info shall be extracted 1087 * from the cache and contains the fields that receive the result. 1088 */ 1089typedef struct l2p_page_info_baton_t 1090{ 1091 /* input data: we want the page covering (REVISION,ITEM_INDEX) */ 1092 svn_revnum_t revision; 1093 apr_uint64_t item_index; 1094 1095 /* out data */ 1096 /* page location and size of the page within the l2p index file */ 1097 l2p_page_table_entry_t entry; 1098 1099 /* page number within the pages for REVISION (not l2p index global!) */ 1100 apr_uint32_t page_no; 1101 1102 /* offset of ITEM_INDEX within that page */ 1103 apr_uint32_t page_offset; 1104 1105 /* revision identifying the l2p index file, also the first rev in that */ 1106 svn_revnum_t first_revision; 1107} l2p_page_info_baton_t; 1108 1109 1110/* Utility function that copies the info requested by BATON->REVISION and 1111 * BATON->ITEM_INDEX and from HEADER and PAGE_TABLE into the output fields 1112 * of *BATON. Use SCRATCH_POOL for temporary allocations. 1113 */ 1114static svn_error_t * 1115l2p_page_info_copy(l2p_page_info_baton_t *baton, 1116 const l2p_header_t *header, 1117 const l2p_page_table_entry_t *page_table, 1118 const apr_size_t *page_table_index, 1119 apr_pool_t *scratch_pool) 1120{ 1121 /* revision offset within the index file */ 1122 apr_size_t rel_revision = baton->revision - header->first_revision; 1123 if (rel_revision >= header->revision_count) 1124 return svn_error_createf(SVN_ERR_FS_INDEX_REVISION , NULL, 1125 _("Revision %ld not covered by item index"), 1126 baton->revision); 1127 1128 /* select the relevant page */ 1129 if (baton->item_index < header->page_size) 1130 { 1131 /* most revs fit well into a single page */ 1132 baton->page_offset = (apr_uint32_t)baton->item_index; 1133 baton->page_no = 0; 1134 baton->entry = page_table[page_table_index[rel_revision]]; 1135 } 1136 else 1137 { 1138 const l2p_page_table_entry_t *first_entry; 1139 const l2p_page_table_entry_t *last_entry; 1140 apr_uint64_t max_item_index; 1141 1142 /* range of pages for this rev */ 1143 first_entry = page_table + page_table_index[rel_revision]; 1144 last_entry = page_table + page_table_index[rel_revision + 1]; 1145 1146 /* do we hit a valid index page? */ 1147 max_item_index = (apr_uint64_t)header->page_size 1148 * (last_entry - first_entry); 1149 if (baton->item_index >= max_item_index) 1150 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL, 1151 _("Item index %s exceeds l2p limit " 1152 "of %s for revision %ld"), 1153 apr_psprintf(scratch_pool, 1154 "%" APR_UINT64_T_FMT, 1155 baton->item_index), 1156 apr_psprintf(scratch_pool, 1157 "%" APR_UINT64_T_FMT, 1158 max_item_index), 1159 baton->revision); 1160 1161 /* all pages are of the same size and full, except for the last one */ 1162 baton->page_offset = (apr_uint32_t)(baton->item_index % header->page_size); 1163 baton->page_no = (apr_uint32_t)(baton->item_index / header->page_size); 1164 baton->entry = first_entry[baton->page_no]; 1165 } 1166 1167 baton->first_revision = header->first_revision; 1168 1169 return SVN_NO_ERROR; 1170} 1171 1172/* Implement svn_cache__partial_getter_func_t: copy the data requested in 1173 * l2p_page_info_baton_t *BATON from l2p_header_t *DATA into the output 1174 * fields in *BATON. 1175 */ 1176static svn_error_t * 1177l2p_page_info_access_func(void **out, 1178 const void *data, 1179 apr_size_t data_len, 1180 void *baton, 1181 apr_pool_t *result_pool) 1182{ 1183 /* resolve all pointer values of in-cache data */ 1184 const l2p_header_t *header = data; 1185 const l2p_page_table_entry_t *page_table 1186 = svn_temp_deserializer__ptr(header, 1187 (const void *const *)&header->page_table); 1188 const apr_size_t *page_table_index 1189 = svn_temp_deserializer__ptr(header, 1190 (const void *const *)&header->page_table_index); 1191 1192 /* copy the info */ 1193 return l2p_page_info_copy(baton, header, page_table, page_table_index, 1194 result_pool); 1195} 1196 1197/* Get the page info requested in *BATON from FS and set the output fields 1198 * in *BATON. Use REV_FILE for on-disk file access. 1199 * Use SCRATCH_POOL for temporary allocations. 1200 */ 1201static svn_error_t * 1202get_l2p_page_info(l2p_page_info_baton_t *baton, 1203 svn_fs_fs__revision_file_t *rev_file, 1204 svn_fs_t *fs, 1205 apr_pool_t *scratch_pool) 1206{ 1207 fs_fs_data_t *ffd = fs->fsap_data; 1208 l2p_header_t *result; 1209 svn_boolean_t is_cached = FALSE; 1210 void *dummy = NULL; 1211 1212 /* try to find the info in the cache */ 1213 pair_cache_key_t key; 1214 key.revision = rev_file->start_revision; 1215 key.second = rev_file->is_packed; 1216 SVN_ERR(svn_cache__get_partial((void**)&dummy, &is_cached, 1217 ffd->l2p_header_cache, &key, 1218 l2p_page_info_access_func, baton, 1219 scratch_pool)); 1220 if (is_cached) 1221 return SVN_NO_ERROR; 1222 1223 /* read from disk, cache and copy the result */ 1224 SVN_ERR(get_l2p_header_body(&result, rev_file, fs, baton->revision, 1225 scratch_pool, scratch_pool)); 1226 SVN_ERR(l2p_page_info_copy(baton, result, result->page_table, 1227 result->page_table_index, scratch_pool)); 1228 1229 return SVN_NO_ERROR; 1230} 1231 1232/* Data request structure used by l2p_page_table_access_func. 1233 */ 1234typedef struct l2p_page_table_baton_t 1235{ 1236 /* revision for which to read the page table */ 1237 svn_revnum_t revision; 1238 1239 /* page table entries (of type l2p_page_table_entry_t). 1240 * Must be created by caller and will be filled by callee. */ 1241 apr_array_header_t *pages; 1242} l2p_page_table_baton_t; 1243 1244/* Implement svn_cache__partial_getter_func_t: copy the data requested in 1245 * l2p_page_baton_t *BATON from l2p_page_t *DATA into BATON->PAGES and *OUT. 1246 */ 1247static svn_error_t * 1248l2p_page_table_access_func(void **out, 1249 const void *data, 1250 apr_size_t data_len, 1251 void *baton, 1252 apr_pool_t *result_pool) 1253{ 1254 /* resolve in-cache pointers */ 1255 l2p_page_table_baton_t *table_baton = baton; 1256 const l2p_header_t *header = (const l2p_header_t *)data; 1257 const l2p_page_table_entry_t *page_table 1258 = svn_temp_deserializer__ptr(header, 1259 (const void *const *)&header->page_table); 1260 const apr_size_t *page_table_index 1261 = svn_temp_deserializer__ptr(header, 1262 (const void *const *)&header->page_table_index); 1263 1264 /* copy the revision's page table into BATON */ 1265 apr_size_t rel_revision = table_baton->revision - header->first_revision; 1266 if (rel_revision < header->revision_count) 1267 { 1268 const l2p_page_table_entry_t *entry 1269 = page_table + page_table_index[rel_revision]; 1270 const l2p_page_table_entry_t *last_entry 1271 = page_table + page_table_index[rel_revision + 1]; 1272 1273 for (; entry < last_entry; ++entry) 1274 APR_ARRAY_PUSH(table_baton->pages, l2p_page_table_entry_t) 1275 = *entry; 1276 } 1277 1278 /* set output as a courtesy to the caller */ 1279 *out = table_baton->pages; 1280 1281 return SVN_NO_ERROR; 1282} 1283 1284/* Read the l2p index page table for REVISION in FS from cache and return 1285 * it in PAGES. The later must be provided by the caller (and can be 1286 * re-used); existing entries will be removed before writing the result. 1287 * If the data cannot be found in the cache, the result will be empty 1288 * (it never can be empty for a valid REVISION if the data is cached). 1289 * Use the info from REV_FILE to determine pack / rev file properties. 1290 * Use SCRATCH_POOL for temporary allocations. 1291 */ 1292static svn_error_t * 1293get_l2p_page_table(apr_array_header_t *pages, 1294 svn_fs_t *fs, 1295 svn_fs_fs__revision_file_t *rev_file, 1296 svn_revnum_t revision, 1297 apr_pool_t *scratch_pool) 1298{ 1299 fs_fs_data_t *ffd = fs->fsap_data; 1300 svn_boolean_t is_cached = FALSE; 1301 l2p_page_table_baton_t baton; 1302 1303 pair_cache_key_t key; 1304 key.revision = rev_file->start_revision; 1305 key.second = rev_file->is_packed; 1306 1307 apr_array_clear(pages); 1308 baton.revision = revision; 1309 baton.pages = pages; 1310 SVN_ERR(svn_cache__get_partial((void**)&pages, &is_cached, 1311 ffd->l2p_header_cache, &key, 1312 l2p_page_table_access_func, &baton, 1313 scratch_pool)); 1314 1315 return SVN_NO_ERROR; 1316} 1317 1318/* From the log-to-phys index file starting at START_REVISION in FS, read 1319 * the mapping page identified by TABLE_ENTRY and return it in *PAGE. 1320 * Use REV_FILE to access on-disk files. 1321 * Use RESULT_POOL for allocations. 1322 */ 1323static svn_error_t * 1324get_l2p_page(l2p_page_t **page, 1325 svn_fs_fs__revision_file_t *rev_file, 1326 svn_fs_t *fs, 1327 svn_revnum_t start_revision, 1328 l2p_page_table_entry_t *table_entry, 1329 apr_pool_t *result_pool) 1330{ 1331 apr_uint32_t i; 1332 l2p_page_t *result = apr_pcalloc(result_pool, sizeof(*result)); 1333 apr_uint64_t last_value = 0; 1334 1335 /* open index file and select page */ 1336 SVN_ERR(auto_open_l2p_index(rev_file, fs, start_revision)); 1337 packed_stream_seek(rev_file->l2p_stream, table_entry->offset); 1338 1339 /* initialize the page content */ 1340 result->entry_count = table_entry->entry_count; 1341 result->offsets = apr_pcalloc(result_pool, result->entry_count 1342 * sizeof(*result->offsets)); 1343 1344 /* read all page entries (offsets in rev file and container sub-items) */ 1345 for (i = 0; i < result->entry_count; ++i) 1346 { 1347 apr_uint64_t value = 0; 1348 SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream)); 1349 last_value += decode_int(value); 1350 result->offsets[i] = last_value - 1; 1351 } 1352 1353 /* After reading all page entries, the read cursor must have moved by 1354 * TABLE_ENTRY->SIZE bytes. */ 1355 if ( packed_stream_offset(rev_file->l2p_stream) 1356 != table_entry->offset + table_entry->size) 1357 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 1358 _("L2P actual page size does not match page table value.")); 1359 1360 *page = result; 1361 1362 return SVN_NO_ERROR; 1363} 1364 1365/* Utility function. Read the l2p index pages for REVISION in FS from 1366 * REV_FILE and put them into the cache. Skip page number EXLCUDED_PAGE_NO 1367 * (use -1 for 'skip none') and pages outside the MIN_OFFSET, MAX_OFFSET 1368 * range in the l2p index file. The index is being identified by 1369 * FIRST_REVISION. PAGES is a scratch container provided by the caller. 1370 * SCRATCH_POOL is used for temporary allocations. 1371 * 1372 * This function may be a no-op if the header cache lookup fails / misses. 1373 */ 1374static svn_error_t * 1375prefetch_l2p_pages(svn_boolean_t *end, 1376 svn_fs_t *fs, 1377 svn_fs_fs__revision_file_t *rev_file, 1378 svn_revnum_t first_revision, 1379 svn_revnum_t revision, 1380 apr_array_header_t *pages, 1381 int exlcuded_page_no, 1382 apr_off_t min_offset, 1383 apr_off_t max_offset, 1384 apr_pool_t *scratch_pool) 1385{ 1386 fs_fs_data_t *ffd = fs->fsap_data; 1387 int i; 1388 apr_pool_t *iterpool; 1389 svn_fs_fs__page_cache_key_t key = { 0 }; 1390 1391 /* Parameter check. */ 1392 if (min_offset < 0) 1393 min_offset = 0; 1394 1395 if (max_offset <= 0) 1396 { 1397 /* Nothing to do */ 1398 *end = TRUE; 1399 return SVN_NO_ERROR; 1400 } 1401 1402 /* get the page table for REVISION from cache */ 1403 *end = FALSE; 1404 SVN_ERR(get_l2p_page_table(pages, fs, rev_file, revision, scratch_pool)); 1405 if (pages->nelts == 0 || rev_file->l2p_stream == NULL) 1406 { 1407 /* not found -> we can't continue without hitting the disk again */ 1408 *end = TRUE; 1409 return SVN_NO_ERROR; 1410 } 1411 1412 /* prefetch pages individually until all are done or we found one in 1413 * the cache */ 1414 iterpool = svn_pool_create(scratch_pool); 1415 assert(revision <= APR_UINT32_MAX); 1416 key.revision = (apr_uint32_t)revision; 1417 key.is_packed = rev_file->is_packed; 1418 1419 for (i = 0; i < pages->nelts && !*end; ++i) 1420 { 1421 svn_boolean_t is_cached; 1422 1423 l2p_page_table_entry_t *entry 1424 = &APR_ARRAY_IDX(pages, i, l2p_page_table_entry_t); 1425 svn_pool_clear(iterpool); 1426 1427 if (i == exlcuded_page_no) 1428 continue; 1429 1430 /* skip pages outside the specified index file range */ 1431 if ( entry->offset < (apr_uint64_t)min_offset 1432 || entry->offset + entry->size > (apr_uint64_t)max_offset) 1433 { 1434 *end = TRUE; 1435 continue; 1436 } 1437 1438 /* page already in cache? */ 1439 key.page = i; 1440 SVN_ERR(svn_cache__has_key(&is_cached, ffd->l2p_page_cache, 1441 &key, iterpool)); 1442 if (!is_cached) 1443 { 1444 /* no in cache -> read from stream (data already buffered in APR) 1445 * and cache the result */ 1446 l2p_page_t *page = NULL; 1447 SVN_ERR(get_l2p_page(&page, rev_file, fs, first_revision, entry, 1448 iterpool)); 1449 1450 SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page, 1451 iterpool)); 1452 } 1453 } 1454 1455 svn_pool_destroy(iterpool); 1456 1457 return SVN_NO_ERROR; 1458} 1459 1460/* Request data structure for l2p_entry_access_func. 1461 */ 1462typedef struct l2p_entry_baton_t 1463{ 1464 /* in data */ 1465 /* revision. Used for error messages only */ 1466 svn_revnum_t revision; 1467 1468 /* item index to look up. Used for error messages only */ 1469 apr_uint64_t item_index; 1470 1471 /* offset within the cached page */ 1472 apr_uint32_t page_offset; 1473 1474 /* out data */ 1475 /* absolute item or container offset in rev / pack file */ 1476 apr_uint64_t offset; 1477} l2p_entry_baton_t; 1478 1479/* Return the rev / pack file offset of the item at BATON->PAGE_OFFSET in 1480 * OFFSETS of PAGE and write it to *OFFSET. 1481 */ 1482static svn_error_t * 1483l2p_page_get_entry(l2p_entry_baton_t *baton, 1484 const l2p_page_t *page, 1485 const apr_uint64_t *offsets, 1486 apr_pool_t *scratch_pool) 1487{ 1488 /* overflow check */ 1489 if (page->entry_count <= baton->page_offset) 1490 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL, 1491 _("Item index %s" 1492 " too large in revision %ld"), 1493 apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT, 1494 baton->item_index), 1495 baton->revision); 1496 1497 /* return the result */ 1498 baton->offset = offsets[baton->page_offset]; 1499 1500 return SVN_NO_ERROR; 1501} 1502 1503/* Implement svn_cache__partial_getter_func_t: copy the data requested in 1504 * l2p_entry_baton_t *BATON from l2p_page_t *DATA into BATON->OFFSET. 1505 * *OUT remains unchanged. 1506 */ 1507static svn_error_t * 1508l2p_entry_access_func(void **out, 1509 const void *data, 1510 apr_size_t data_len, 1511 void *baton, 1512 apr_pool_t *result_pool) 1513{ 1514 /* resolve all in-cache pointers */ 1515 const l2p_page_t *page = data; 1516 const apr_uint64_t *offsets 1517 = svn_temp_deserializer__ptr(page, (const void *const *)&page->offsets); 1518 1519 /* return the requested data */ 1520 return l2p_page_get_entry(baton, page, offsets, result_pool); 1521} 1522 1523/* Using the log-to-phys indexes in FS, find the absolute offset in the 1524 * rev file for (REVISION, ITEM_INDEX) and return it in *OFFSET. 1525 * Use SCRATCH_POOL for temporary allocations. 1526 */ 1527static svn_error_t * 1528l2p_index_lookup(apr_off_t *offset, 1529 svn_fs_t *fs, 1530 svn_fs_fs__revision_file_t *rev_file, 1531 svn_revnum_t revision, 1532 apr_uint64_t item_index, 1533 apr_pool_t *scratch_pool) 1534{ 1535 fs_fs_data_t *ffd = fs->fsap_data; 1536 l2p_page_info_baton_t info_baton; 1537 l2p_entry_baton_t page_baton; 1538 l2p_page_t *page = NULL; 1539 svn_fs_fs__page_cache_key_t key = { 0 }; 1540 svn_boolean_t is_cached = FALSE; 1541 void *dummy = NULL; 1542 1543 /* read index master data structure and extract the info required to 1544 * access the l2p index page for (REVISION,ITEM_INDEX)*/ 1545 info_baton.revision = revision; 1546 info_baton.item_index = item_index; 1547 SVN_ERR(get_l2p_page_info(&info_baton, rev_file, fs, scratch_pool)); 1548 1549 /* try to find the page in the cache and get the OFFSET from it */ 1550 page_baton.revision = revision; 1551 page_baton.item_index = item_index; 1552 page_baton.page_offset = info_baton.page_offset; 1553 1554 assert(revision <= APR_UINT32_MAX); 1555 key.revision = (apr_uint32_t)revision; 1556 key.is_packed = svn_fs_fs__is_packed_rev(fs, revision); 1557 key.page = info_baton.page_no; 1558 1559 SVN_ERR(svn_cache__get_partial(&dummy, &is_cached, 1560 ffd->l2p_page_cache, &key, 1561 l2p_entry_access_func, &page_baton, 1562 scratch_pool)); 1563 1564 if (!is_cached) 1565 { 1566 /* we need to read the info from disk (might already be in the 1567 * APR file buffer, though) */ 1568 apr_array_header_t *pages; 1569 svn_revnum_t prefetch_revision; 1570 svn_revnum_t last_revision 1571 = info_baton.first_revision 1572 + (key.is_packed ? ffd->max_files_per_dir : 1); 1573 svn_boolean_t end; 1574 apr_off_t max_offset 1575 = APR_ALIGN(info_baton.entry.offset + info_baton.entry.size, 1576 ffd->block_size); 1577 apr_off_t min_offset = max_offset - ffd->block_size; 1578 1579 /* read the relevant page */ 1580 SVN_ERR(get_l2p_page(&page, rev_file, fs, info_baton.first_revision, 1581 &info_baton.entry, scratch_pool)); 1582 1583 /* cache the page and extract the result we need */ 1584 SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page, scratch_pool)); 1585 SVN_ERR(l2p_page_get_entry(&page_baton, page, page->offsets, 1586 scratch_pool)); 1587 1588 if (ffd->use_block_read) 1589 { 1590 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1591 1592 /* prefetch pages from following and preceding revisions */ 1593 pages = apr_array_make(scratch_pool, 16, 1594 sizeof(l2p_page_table_entry_t)); 1595 end = FALSE; 1596 for (prefetch_revision = revision; 1597 prefetch_revision < last_revision && !end; 1598 ++prefetch_revision) 1599 { 1600 int excluded_page_no = prefetch_revision == revision 1601 ? info_baton.page_no 1602 : -1; 1603 svn_pool_clear(iterpool); 1604 1605 SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file, 1606 info_baton.first_revision, 1607 prefetch_revision, pages, 1608 excluded_page_no, min_offset, 1609 max_offset, iterpool)); 1610 } 1611 1612 end = FALSE; 1613 for (prefetch_revision = revision-1; 1614 prefetch_revision >= info_baton.first_revision && !end; 1615 --prefetch_revision) 1616 { 1617 svn_pool_clear(iterpool); 1618 1619 SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file, 1620 info_baton.first_revision, 1621 prefetch_revision, pages, -1, 1622 min_offset, max_offset, iterpool)); 1623 } 1624 1625 svn_pool_destroy(iterpool); 1626 } 1627 } 1628 1629 *offset = page_baton.offset; 1630 1631 return SVN_NO_ERROR; 1632} 1633 1634/* Using the log-to-phys proto index in transaction TXN_ID in FS, find the 1635 * absolute offset in the proto rev file for the given ITEM_INDEX and return 1636 * it in *OFFSET. Use SCRATCH_POOL for temporary allocations. 1637 */ 1638static svn_error_t * 1639l2p_proto_index_lookup(apr_off_t *offset, 1640 svn_fs_t *fs, 1641 const svn_fs_fs__id_part_t *txn_id, 1642 apr_uint64_t item_index, 1643 apr_pool_t *scratch_pool) 1644{ 1645 svn_boolean_t eof = FALSE; 1646 apr_file_t *file = NULL; 1647 SVN_ERR(svn_io_file_open(&file, 1648 svn_fs_fs__path_l2p_proto_index(fs, txn_id, 1649 scratch_pool), 1650 APR_READ | APR_BUFFERED, APR_OS_DEFAULT, 1651 scratch_pool)); 1652 1653 /* process all entries until we fail due to EOF */ 1654 *offset = -1; 1655 while (!eof) 1656 { 1657 l2p_proto_entry_t entry; 1658 1659 /* (attempt to) read the next entry from the source */ 1660 SVN_ERR(read_l2p_entry_from_proto_index(file, &entry, &eof, 1661 scratch_pool)); 1662 1663 /* handle new revision */ 1664 if (!eof && entry.item_index == item_index) 1665 { 1666 *offset = (apr_off_t)entry.offset - 1; 1667 break; 1668 } 1669 } 1670 1671 SVN_ERR(svn_io_file_close(file, scratch_pool)); 1672 1673 return SVN_NO_ERROR; 1674} 1675 1676/* Read the log-to-phys header info of the index covering REVISION from FS 1677 * and return it in *HEADER. REV_FILE provides the pack / rev status. 1678 * Allocate *HEADER in RESULT_POOL, use SCRATCH_POOL for temporary 1679 * allocations. 1680 */ 1681static svn_error_t * 1682get_l2p_header(l2p_header_t **header, 1683 svn_fs_fs__revision_file_t *rev_file, 1684 svn_fs_t *fs, 1685 svn_revnum_t revision, 1686 apr_pool_t *result_pool, 1687 apr_pool_t *scratch_pool) 1688{ 1689 fs_fs_data_t *ffd = fs->fsap_data; 1690 svn_boolean_t is_cached = FALSE; 1691 1692 /* first, try cache lookop */ 1693 pair_cache_key_t key; 1694 key.revision = rev_file->start_revision; 1695 key.second = rev_file->is_packed; 1696 SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->l2p_header_cache, 1697 &key, result_pool)); 1698 if (is_cached) 1699 return SVN_NO_ERROR; 1700 1701 /* read from disk and cache the result */ 1702 SVN_ERR(get_l2p_header_body(header, rev_file, fs, revision, result_pool, 1703 scratch_pool)); 1704 1705 return SVN_NO_ERROR; 1706} 1707 1708svn_error_t * 1709svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids, 1710 svn_fs_t *fs, 1711 svn_revnum_t start_rev, 1712 apr_size_t count, 1713 apr_pool_t *result_pool, 1714 apr_pool_t *scratch_pool) 1715{ 1716 l2p_header_t *header = NULL; 1717 svn_revnum_t revision; 1718 svn_revnum_t last_rev = (svn_revnum_t)(start_rev + count); 1719 svn_fs_fs__revision_file_t *rev_file; 1720 apr_pool_t *header_pool = svn_pool_create(scratch_pool); 1721 1722 /* read index master data structure for the index covering START_REV */ 1723 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start_rev, 1724 header_pool, header_pool)); 1725 SVN_ERR(get_l2p_header(&header, rev_file, fs, start_rev, header_pool, 1726 header_pool)); 1727 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 1728 1729 /* Determine the length of the item index list for each rev. 1730 * Read new index headers as required. */ 1731 *max_ids = apr_array_make(result_pool, (int)count, sizeof(apr_uint64_t)); 1732 for (revision = start_rev; revision < last_rev; ++revision) 1733 { 1734 apr_uint64_t full_page_count; 1735 apr_uint64_t item_count; 1736 apr_size_t first_page_index, last_page_index; 1737 1738 if (revision >= header->first_revision + header->revision_count) 1739 { 1740 /* need to read the next index. Clear up memory used for the 1741 * previous one. Note that intermittent pack runs do not change 1742 * the number of items in a revision, i.e. there is no consistency 1743 * issue here. */ 1744 svn_pool_clear(header_pool); 1745 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision, 1746 header_pool, header_pool)); 1747 SVN_ERR(get_l2p_header(&header, rev_file, fs, revision, 1748 header_pool, header_pool)); 1749 SVN_ERR(svn_fs_fs__close_revision_file(rev_file)); 1750 } 1751 1752 /* in a revision with N index pages, the first N-1 index pages are 1753 * "full", i.e. contain HEADER->PAGE_SIZE entries */ 1754 first_page_index 1755 = header->page_table_index[revision - header->first_revision]; 1756 last_page_index 1757 = header->page_table_index[revision - header->first_revision + 1]; 1758 full_page_count = last_page_index - first_page_index - 1; 1759 item_count = full_page_count * header->page_size 1760 + header->page_table[last_page_index - 1].entry_count; 1761 1762 APR_ARRAY_PUSH(*max_ids, apr_uint64_t) = item_count; 1763 } 1764 1765 svn_pool_destroy(header_pool); 1766 return SVN_NO_ERROR; 1767} 1768 1769svn_error_t * 1770svn_fs_fs__item_offset(apr_off_t *absolute_position, 1771 svn_fs_t *fs, 1772 svn_fs_fs__revision_file_t *rev_file, 1773 svn_revnum_t revision, 1774 const svn_fs_fs__id_part_t *txn_id, 1775 apr_uint64_t item_index, 1776 apr_pool_t *scratch_pool) 1777{ 1778 svn_error_t *err = SVN_NO_ERROR; 1779 if (txn_id) 1780 { 1781 if (svn_fs_fs__use_log_addressing(fs)) 1782 { 1783 /* the txn is going to produce a rev with logical addressing. 1784 So, we need to get our info from the (proto) index file. */ 1785 SVN_ERR(l2p_proto_index_lookup(absolute_position, fs, txn_id, 1786 item_index, scratch_pool)); 1787 } 1788 else 1789 { 1790 /* for data in txns, item_index *is* the offset */ 1791 *absolute_position = item_index; 1792 } 1793 } 1794 else if (svn_fs_fs__use_log_addressing(fs)) 1795 { 1796 /* ordinary index lookup */ 1797 SVN_ERR(l2p_index_lookup(absolute_position, fs, rev_file, revision, 1798 item_index, scratch_pool)); 1799 } 1800 else if (rev_file->is_packed) 1801 { 1802 /* pack file with physical addressing */ 1803 apr_off_t rev_offset; 1804 SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, revision, 1805 scratch_pool)); 1806 *absolute_position = rev_offset + item_index; 1807 } 1808 else 1809 { 1810 /* for non-packed revs with physical addressing, 1811 item_index *is* the offset */ 1812 *absolute_position = item_index; 1813 } 1814 1815 return svn_error_trace(err); 1816} 1817 1818/* 1819 * phys-to-log index 1820 */ 1821svn_error_t * 1822svn_fs_fs__p2l_proto_index_open(apr_file_t **proto_index, 1823 const char *file_name, 1824 apr_pool_t *result_pool) 1825{ 1826 SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE 1827 | APR_CREATE | APR_APPEND | APR_BUFFERED, 1828 APR_OS_DEFAULT, result_pool)); 1829 1830 return SVN_NO_ERROR; 1831} 1832 1833 1834svn_error_t * 1835svn_fs_fs__p2l_proto_index_add_entry(apr_file_t *proto_index, 1836 const svn_fs_fs__p2l_entry_t *entry, 1837 apr_pool_t *scratch_pool) 1838{ 1839 apr_uint64_t revision; 1840 1841 /* Make sure all signed elements of ENTRY have non-negative values. 1842 * 1843 * For file offsets and sizes, this is a given as we use them to describe 1844 * absolute positions and sizes. For revisions, SVN_INVALID_REVNUM is 1845 * valid, hence we have to shift it by 1. 1846 */ 1847 SVN_ERR_ASSERT(entry->offset >= 0); 1848 SVN_ERR_ASSERT(entry->size >= 0); 1849 SVN_ERR_ASSERT( entry->item.revision >= 0 1850 || entry->item.revision == SVN_INVALID_REVNUM); 1851 1852 revision = entry->item.revision == SVN_INVALID_REVNUM 1853 ? 0 1854 : ((apr_uint64_t)entry->item.revision + 1); 1855 1856 /* Now, all values will nicely convert to uint64. */ 1857 /* Make sure to keep P2L_PROTO_INDEX_ENTRY_SIZE consistent with this: */ 1858 1859 SVN_ERR(write_uint64_to_proto_index(proto_index, entry->offset, 1860 scratch_pool)); 1861 SVN_ERR(write_uint64_to_proto_index(proto_index, entry->size, 1862 scratch_pool)); 1863 SVN_ERR(write_uint64_to_proto_index(proto_index, entry->type, 1864 scratch_pool)); 1865 SVN_ERR(write_uint64_to_proto_index(proto_index, entry->fnv1_checksum, 1866 scratch_pool)); 1867 SVN_ERR(write_uint64_to_proto_index(proto_index, revision, 1868 scratch_pool)); 1869 SVN_ERR(write_uint64_to_proto_index(proto_index, entry->item.number, 1870 scratch_pool)); 1871 1872 return SVN_NO_ERROR; 1873} 1874 1875/* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file 1876 * in *EOF, or error out in that case if EOF is NULL. *ENTRY is in an 1877 * undefined state if an end-of-file occurred. 1878 * Use SCRATCH_POOL for temporary allocations. 1879 */ 1880static svn_error_t * 1881read_p2l_entry_from_proto_index(apr_file_t *proto_index, 1882 svn_fs_fs__p2l_entry_t *entry, 1883 svn_boolean_t *eof, 1884 apr_pool_t *scratch_pool) 1885{ 1886 apr_uint64_t revision; 1887 1888 SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->offset, 1889 eof, scratch_pool)); 1890 SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->size, 1891 eof, scratch_pool)); 1892 SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->type, 1893 eof, scratch_pool)); 1894 SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->fnv1_checksum, 1895 eof, scratch_pool)); 1896 SVN_ERR(read_uint64_from_proto_index(proto_index, &revision, 1897 eof, scratch_pool)); 1898 SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->item.number, 1899 eof, scratch_pool)); 1900 1901 /* Do the inverse REVSION number conversion (see 1902 * svn_fs_fs__p2l_proto_index_add_entry), if we actually read a complete 1903 * record. 1904 */ 1905 if (!eof || !*eof) 1906 { 1907 /* Be careful with the arithmetics here (overflows and wrap-around): */ 1908 if (revision > 0 && revision - 1 > LONG_MAX) 1909 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL, 1910 _("Revision 0x%s too large, max = 0x%s"), 1911 apr_psprintf(scratch_pool, 1912 "%" APR_UINT64_T_HEX_FMT, 1913 revision), 1914 apr_psprintf(scratch_pool, 1915 "%" APR_UINT64_T_HEX_FMT, 1916 (apr_uint64_t)LONG_MAX)); 1917 1918 /* Shortening conversion from unsigned to signed int is well-defined 1919 * and not lossy in C because the value can be represented in the 1920 * target type. Also, cast to 'long' instead of 'svn_revnum_t' here 1921 * to provoke a compiler warning if those types should differ and we 1922 * would need to change the overflow checking logic. 1923 */ 1924 entry->item.revision = revision == 0 1925 ? SVN_INVALID_REVNUM 1926 : (long)(revision - 1); 1927 } 1928 1929 return SVN_NO_ERROR; 1930} 1931 1932svn_error_t * 1933svn_fs_fs__p2l_proto_index_next_offset(apr_off_t *next_offset, 1934 apr_file_t *proto_index, 1935 apr_pool_t *scratch_pool) 1936{ 1937 apr_off_t offset = 0; 1938 1939 /* Empty index file? */ 1940 SVN_ERR(svn_io_file_seek(proto_index, APR_END, &offset, scratch_pool)); 1941 if (offset == 0) 1942 { 1943 *next_offset = 0; 1944 } 1945 else 1946 { 1947 /* At least one entry. Read last entry. */ 1948 svn_fs_fs__p2l_entry_t entry; 1949 offset -= P2L_PROTO_INDEX_ENTRY_SIZE; 1950 1951 SVN_ERR(svn_io_file_seek(proto_index, APR_SET, &offset, scratch_pool)); 1952 SVN_ERR(read_p2l_entry_from_proto_index(proto_index, &entry, 1953 NULL, scratch_pool)); 1954 1955 /* Return next offset. */ 1956 *next_offset = entry.offset + entry.size; 1957 } 1958 1959 return SVN_NO_ERROR; 1960} 1961 1962svn_error_t * 1963svn_fs_fs__p2l_index_append(svn_checksum_t **checksum, 1964 svn_fs_t *fs, 1965 apr_file_t *index_file, 1966 const char *proto_file_name, 1967 svn_revnum_t revision, 1968 apr_pool_t *result_pool, 1969 apr_pool_t *scratch_pool) 1970{ 1971 fs_fs_data_t *ffd = fs->fsap_data; 1972 apr_uint64_t page_size = ffd->p2l_page_size; 1973 apr_file_t *proto_index = NULL; 1974 svn_stream_t *stream; 1975 int i; 1976 svn_boolean_t eof = FALSE; 1977 unsigned char encoded[ENCODED_INT_LENGTH]; 1978 svn_revnum_t last_revision = revision; 1979 apr_uint64_t last_compound = 0; 1980 1981 apr_uint64_t last_entry_end = 0; 1982 apr_uint64_t last_page_end = 0; 1983 apr_uint64_t last_buffer_size = 0; /* byte offset in the spill buffer at 1984 the begin of the current revision */ 1985 apr_uint64_t file_size = 0; 1986 1987 /* temporary data structures that collect the data which will be moved 1988 to the target file in a second step */ 1989 apr_pool_t *local_pool = svn_pool_create(scratch_pool); 1990 apr_array_header_t *table_sizes 1991 = apr_array_make(local_pool, 16, sizeof(apr_uint64_t)); 1992 1993 /* 64k blocks, spill after 16MB */ 1994 svn_spillbuf_t *buffer 1995 = svn_spillbuf__create(0x10000, 0x1000000, local_pool); 1996 1997 /* for loop temps ... */ 1998 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 1999 2000 /* start at the beginning of the source file */ 2001 SVN_ERR(svn_io_file_open(&proto_index, proto_file_name, 2002 APR_READ | APR_CREATE | APR_BUFFERED, 2003 APR_OS_DEFAULT, scratch_pool)); 2004 2005 /* process all entries until we fail due to EOF */ 2006 while (!eof) 2007 { 2008 svn_fs_fs__p2l_entry_t entry; 2009 apr_uint64_t entry_end; 2010 svn_boolean_t new_page = svn_spillbuf__get_size(buffer) == 0; 2011 apr_uint64_t compound; 2012 apr_int64_t rev_diff, compound_diff; 2013 2014 svn_pool_clear(iterpool); 2015 2016 /* (attempt to) read the next entry from the source */ 2017 SVN_ERR(read_p2l_entry_from_proto_index(proto_index, &entry, 2018 &eof, iterpool)); 2019 2020 /* "unused" (and usually non-existent) section to cover the offsets 2021 at the end the of the last page. */ 2022 if (eof) 2023 { 2024 file_size = last_entry_end; 2025 2026 entry.offset = last_entry_end; 2027 entry.size = APR_ALIGN(entry.offset, page_size) - entry.offset; 2028 entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED; 2029 entry.fnv1_checksum = 0; 2030 entry.item.revision = last_revision; 2031 entry.item.number = 0; 2032 } 2033 else 2034 { 2035 /* fix-up items created when the txn's target rev was unknown */ 2036 if (entry.item.revision == SVN_INVALID_REVNUM) 2037 entry.item.revision = revision; 2038 } 2039 2040 /* end pages if entry is extending beyond their boundaries */ 2041 entry_end = entry.offset + entry.size; 2042 while (entry_end - last_page_end > page_size) 2043 { 2044 apr_uint64_t buffer_size = svn_spillbuf__get_size(buffer); 2045 APR_ARRAY_PUSH(table_sizes, apr_uint64_t) 2046 = buffer_size - last_buffer_size; 2047 2048 last_buffer_size = buffer_size; 2049 last_page_end += page_size; 2050 new_page = TRUE; 2051 } 2052 2053 /* this entry starts a new table -> store its offset 2054 (all following entries in the same table will store sizes only) */ 2055 if (new_page) 2056 { 2057 SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, 2058 encode_uint(encoded, entry.offset), 2059 iterpool)); 2060 last_revision = revision; 2061 last_compound = 0; 2062 } 2063 2064 /* write simple item entry */ 2065 SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, 2066 encode_uint(encoded, entry.size), 2067 iterpool)); 2068 2069 rev_diff = entry.item.revision - last_revision; 2070 last_revision = entry.item.revision; 2071 2072 compound = entry.item.number * 8 + entry.type; 2073 compound_diff = compound - last_compound; 2074 last_compound = compound; 2075 2076 SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, 2077 encode_int(encoded, compound_diff), 2078 iterpool)); 2079 SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, 2080 encode_int(encoded, rev_diff), 2081 iterpool)); 2082 SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded, 2083 encode_uint(encoded, entry.fnv1_checksum), 2084 iterpool)); 2085 2086 last_entry_end = entry_end; 2087 } 2088 2089 /* close the source file */ 2090 SVN_ERR(svn_io_file_close(proto_index, local_pool)); 2091 2092 /* store length of last table */ 2093 APR_ARRAY_PUSH(table_sizes, apr_uint64_t) 2094 = svn_spillbuf__get_size(buffer) - last_buffer_size; 2095 2096 /* Open target stream. */ 2097 stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE, 2098 local_pool), 2099 NULL, checksum, svn_checksum_md5, FALSE, 2100 result_pool); 2101 2102 /* write the start revision, file size and page size */ 2103 SVN_ERR(svn_stream_puts(stream, P2L_STREAM_PREFIX)); 2104 SVN_ERR(stream_write_encoded(stream, revision)); 2105 SVN_ERR(stream_write_encoded(stream, file_size)); 2106 SVN_ERR(stream_write_encoded(stream, page_size)); 2107 2108 /* write the page table (actually, the sizes of each page description) */ 2109 SVN_ERR(stream_write_encoded(stream, table_sizes->nelts)); 2110 for (i = 0; i < table_sizes->nelts; ++i) 2111 { 2112 apr_uint64_t value = APR_ARRAY_IDX(table_sizes, i, apr_uint64_t); 2113 SVN_ERR(stream_write_encoded(stream, value)); 2114 } 2115 2116 /* append page contents and implicitly close STREAM */ 2117 SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool), 2118 stream, NULL, NULL, local_pool)); 2119 2120 svn_pool_destroy(iterpool); 2121 svn_pool_destroy(local_pool); 2122 2123 return SVN_NO_ERROR; 2124} 2125 2126/* If REV_FILE->P2L_STREAM is NULL, create a new stream for the phys-to-log 2127 * index for REVISION in FS using the rev / pack file provided by REV_FILE. 2128 */ 2129static svn_error_t * 2130auto_open_p2l_index(svn_fs_fs__revision_file_t *rev_file, 2131 svn_fs_t *fs, 2132 svn_revnum_t revision) 2133{ 2134 if (rev_file->p2l_stream == NULL) 2135 { 2136 fs_fs_data_t *ffd = fs->fsap_data; 2137 2138 SVN_ERR(svn_fs_fs__auto_read_footer(rev_file)); 2139 SVN_ERR(packed_stream_open(&rev_file->p2l_stream, 2140 rev_file->file, 2141 rev_file->p2l_offset, 2142 rev_file->footer_offset, 2143 P2L_STREAM_PREFIX, 2144 (apr_size_t)ffd->block_size, 2145 rev_file->pool, 2146 rev_file->pool)); 2147 } 2148 2149 return SVN_NO_ERROR; 2150} 2151 2152 2153/* Read the header data structure of the phys-to-log index for REVISION in 2154 * FS and return it in *HEADER, allocated in RESULT_POOL. Use REV_FILE to 2155 * access on-disk data. Use SCRATCH_POOL for temporary allocations. 2156 */ 2157static svn_error_t * 2158get_p2l_header(p2l_header_t **header, 2159 svn_fs_fs__revision_file_t *rev_file, 2160 svn_fs_t *fs, 2161 svn_revnum_t revision, 2162 apr_pool_t *result_pool, 2163 apr_pool_t *scratch_pool) 2164{ 2165 fs_fs_data_t *ffd = fs->fsap_data; 2166 apr_uint64_t value; 2167 apr_size_t i; 2168 apr_off_t offset; 2169 p2l_header_t *result; 2170 svn_boolean_t is_cached = FALSE; 2171 2172 /* look for the header data in our cache */ 2173 pair_cache_key_t key; 2174 key.revision = rev_file->start_revision; 2175 key.second = rev_file->is_packed; 2176 2177 SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->p2l_header_cache, 2178 &key, result_pool)); 2179 if (is_cached) 2180 return SVN_NO_ERROR; 2181 2182 /* not found -> must read it from disk. 2183 * Open index file or position read pointer to the begin of the file */ 2184 if (rev_file->p2l_stream == NULL) 2185 SVN_ERR(auto_open_p2l_index(rev_file, fs, rev_file->start_revision)); 2186 else 2187 packed_stream_seek(rev_file->p2l_stream, 0); 2188 2189 /* allocate result data structure */ 2190 result = apr_pcalloc(result_pool, sizeof(*result)); 2191 2192 /* Read table sizes, check them for plausibility and allocate page array. */ 2193 SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); 2194 result->first_revision = (svn_revnum_t)value; 2195 if (result->first_revision != rev_file->start_revision) 2196 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2197 _("Index rev / pack file revision numbers do not match")); 2198 2199 SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); 2200 result->file_size = value; 2201 if (result->file_size != (apr_uint64_t)rev_file->l2p_offset) 2202 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2203 _("Index offset and rev / pack file size do not match")); 2204 2205 SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); 2206 result->page_size = value; 2207 if (!result->page_size || (result->page_size & (result->page_size - 1))) 2208 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2209 _("P2L index page size is not a power of two")); 2210 2211 SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); 2212 result->page_count = (apr_size_t)value; 2213 if (result->page_count != (result->file_size - 1) / result->page_size + 1) 2214 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2215 _("P2L page count does not match rev / pack file size")); 2216 2217 result->offsets 2218 = apr_pcalloc(result_pool, (result->page_count + 1) * sizeof(*result->offsets)); 2219 2220 /* read page sizes and derive page description offsets from them */ 2221 result->offsets[0] = 0; 2222 for (i = 0; i < result->page_count; ++i) 2223 { 2224 SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); 2225 result->offsets[i+1] = result->offsets[i] + (apr_off_t)value; 2226 } 2227 2228 /* correct the offset values */ 2229 offset = packed_stream_offset(rev_file->p2l_stream); 2230 for (i = 0; i <= result->page_count; ++i) 2231 result->offsets[i] += offset; 2232 2233 /* cache the header data */ 2234 SVN_ERR(svn_cache__set(ffd->p2l_header_cache, &key, result, scratch_pool)); 2235 2236 /* return the result */ 2237 *header = result; 2238 2239 return SVN_NO_ERROR; 2240} 2241 2242/* Data structure that describes which p2l page info shall be extracted 2243 * from the cache and contains the fields that receive the result. 2244 */ 2245typedef struct p2l_page_info_baton_t 2246{ 2247 /* input variables */ 2248 /* revision identifying the index file */ 2249 svn_revnum_t revision; 2250 2251 /* offset within the page in rev / pack file */ 2252 apr_off_t offset; 2253 2254 /* output variables */ 2255 /* page containing OFFSET */ 2256 apr_size_t page_no; 2257 2258 /* first revision in this p2l index */ 2259 svn_revnum_t first_revision; 2260 2261 /* offset within the p2l index file describing this page */ 2262 apr_off_t start_offset; 2263 2264 /* offset within the p2l index file describing the following page */ 2265 apr_off_t next_offset; 2266 2267 /* PAGE_NO * PAGE_SIZE (if <= OFFSET) */ 2268 apr_off_t page_start; 2269 2270 /* total number of pages indexed */ 2271 apr_size_t page_count; 2272 2273 /* size of each page in pack / rev file */ 2274 apr_uint64_t page_size; 2275} p2l_page_info_baton_t; 2276 2277/* From HEADER and the list of all OFFSETS, fill BATON with the page info 2278 * requested by BATON->OFFSET. 2279 */ 2280static void 2281p2l_page_info_copy(p2l_page_info_baton_t *baton, 2282 const p2l_header_t *header, 2283 const apr_off_t *offsets) 2284{ 2285 /* if the requested offset is out of bounds, return info for 2286 * a zero-sized empty page right behind the last page. 2287 */ 2288 if (baton->offset / header->page_size < header->page_count) 2289 { 2290 /* This cast is safe because the value is < header->page_count. */ 2291 baton->page_no = (apr_size_t)(baton->offset / header->page_size); 2292 baton->start_offset = offsets[baton->page_no]; 2293 baton->next_offset = offsets[baton->page_no + 1]; 2294 baton->page_size = header->page_size; 2295 } 2296 else 2297 { 2298 /* Beyond the last page. */ 2299 baton->page_no = header->page_count; 2300 baton->start_offset = offsets[baton->page_no]; 2301 baton->next_offset = offsets[baton->page_no]; 2302 baton->page_size = 0; 2303 } 2304 2305 baton->first_revision = header->first_revision; 2306 baton->page_start = (apr_off_t)(header->page_size * baton->page_no); 2307 baton->page_count = header->page_count; 2308} 2309 2310/* Implement svn_cache__partial_getter_func_t: extract the p2l page info 2311 * requested by BATON and return it in BATON. 2312 */ 2313static svn_error_t * 2314p2l_page_info_func(void **out, 2315 const void *data, 2316 apr_size_t data_len, 2317 void *baton, 2318 apr_pool_t *result_pool) 2319{ 2320 /* all the pointers to cached data we need */ 2321 const p2l_header_t *header = data; 2322 const apr_off_t *offsets 2323 = svn_temp_deserializer__ptr(header, 2324 (const void *const *)&header->offsets); 2325 2326 /* copy data from cache to BATON */ 2327 p2l_page_info_copy(baton, header, offsets); 2328 return SVN_NO_ERROR; 2329} 2330 2331/* Read the header data structure of the phys-to-log index for revision 2332 * BATON->REVISION in FS. Return in *BATON all info relevant to read the 2333 * index page for the rev / pack file offset BATON->OFFSET. Use REV_FILE 2334 * to access on-disk data. Use SCRATCH_POOL for temporary allocations. 2335 */ 2336static svn_error_t * 2337get_p2l_page_info(p2l_page_info_baton_t *baton, 2338 svn_fs_fs__revision_file_t *rev_file, 2339 svn_fs_t *fs, 2340 apr_pool_t *scratch_pool) 2341{ 2342 fs_fs_data_t *ffd = fs->fsap_data; 2343 p2l_header_t *header; 2344 svn_boolean_t is_cached = FALSE; 2345 void *dummy = NULL; 2346 2347 /* look for the header data in our cache */ 2348 pair_cache_key_t key; 2349 key.revision = rev_file->start_revision; 2350 key.second = rev_file->is_packed; 2351 2352 SVN_ERR(svn_cache__get_partial(&dummy, &is_cached, ffd->p2l_header_cache, 2353 &key, p2l_page_info_func, baton, 2354 scratch_pool)); 2355 if (is_cached) 2356 return SVN_NO_ERROR; 2357 2358 SVN_ERR(get_p2l_header(&header, rev_file, fs, baton->revision, 2359 scratch_pool, scratch_pool)); 2360 2361 /* copy the requested info into *BATON */ 2362 p2l_page_info_copy(baton, header, header->offsets); 2363 2364 return SVN_NO_ERROR; 2365} 2366 2367/* Read a mapping entry from the phys-to-log index STREAM and append it to 2368 * RESULT. *ITEM_INDEX contains the phys offset for the entry and will 2369 * be moved forward by the size of entry. 2370 */ 2371static svn_error_t * 2372read_entry(svn_fs_fs__packed_number_stream_t *stream, 2373 apr_off_t *item_offset, 2374 svn_revnum_t *last_revision, 2375 apr_uint64_t *last_compound, 2376 apr_array_header_t *result) 2377{ 2378 apr_uint64_t value; 2379 2380 svn_fs_fs__p2l_entry_t entry; 2381 2382 entry.offset = *item_offset; 2383 SVN_ERR(packed_stream_get(&value, stream)); 2384 entry.size = (apr_off_t)value; 2385 2386 SVN_ERR(packed_stream_get(&value, stream)); 2387 *last_compound += decode_int(value); 2388 2389 entry.type = *last_compound & 7; 2390 entry.item.number = *last_compound / 8; 2391 2392 /* Verify item type. */ 2393 if (entry.type > SVN_FS_FS__ITEM_TYPE_CHANGES) 2394 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2395 _("Invalid item type in P2L index")); 2396 if ( entry.type == SVN_FS_FS__ITEM_TYPE_CHANGES 2397 && entry.item.number != SVN_FS_FS__ITEM_INDEX_CHANGES) 2398 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2399 _("Changed path list must have item number 1")); 2400 2401 SVN_ERR(packed_stream_get(&value, stream)); 2402 *last_revision += (svn_revnum_t)decode_int(value); 2403 entry.item.revision = *last_revision; 2404 2405 SVN_ERR(packed_stream_get(&value, stream)); 2406 entry.fnv1_checksum = (apr_uint32_t)value; 2407 2408 /* Truncating the checksum to 32 bits may have hidden random data in the 2409 * unused extra bits of the on-disk representation (7/8 bit representation 2410 * uses 5 bytes on disk for the 32 bit value, leaving 3 bits unused). */ 2411 if (value > APR_UINT32_MAX) 2412 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2413 _("Invalid FNV1 checksum in P2L index")); 2414 2415 /* Some of the index data for empty rev / pack file sections will not be 2416 * used during normal operation. Thus, we have strict rules for the 2417 * contents of those unused fields. */ 2418 if (entry.type == SVN_FS_FS__ITEM_TYPE_UNUSED) 2419 if ( entry.item.number != SVN_FS_FS__ITEM_INDEX_UNUSED 2420 || entry.fnv1_checksum != 0) 2421 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2422 _("Empty regions must have item number 0 and checksum 0")); 2423 2424 APR_ARRAY_PUSH(result, svn_fs_fs__p2l_entry_t) = entry; 2425 *item_offset += entry.size; 2426 2427 return SVN_NO_ERROR; 2428} 2429 2430/* Read the phys-to-log mappings for the cluster beginning at rev file 2431 * offset PAGE_START from the index for START_REVISION in FS. The data 2432 * can be found in the index page beginning at START_OFFSET with the next 2433 * page beginning at NEXT_OFFSET. PAGE_SIZE is the L2P index page size. 2434 * Return the relevant index entries in *ENTRIES. Use REV_FILE to access 2435 * on-disk data. Allocate *ENTRIES in RESULT_POOL. 2436 */ 2437static svn_error_t * 2438get_p2l_page(apr_array_header_t **entries, 2439 svn_fs_fs__revision_file_t *rev_file, 2440 svn_fs_t *fs, 2441 svn_revnum_t start_revision, 2442 apr_off_t start_offset, 2443 apr_off_t next_offset, 2444 apr_off_t page_start, 2445 apr_uint64_t page_size, 2446 apr_pool_t *result_pool) 2447{ 2448 apr_uint64_t value; 2449 apr_array_header_t *result 2450 = apr_array_make(result_pool, 16, sizeof(svn_fs_fs__p2l_entry_t)); 2451 apr_off_t item_offset; 2452 apr_off_t offset; 2453 svn_revnum_t last_revision; 2454 apr_uint64_t last_compound; 2455 2456 /* open index and navigate to page start */ 2457 SVN_ERR(auto_open_p2l_index(rev_file, fs, start_revision)); 2458 packed_stream_seek(rev_file->p2l_stream, start_offset); 2459 2460 /* read rev file offset of the first page entry (all page entries will 2461 * only store their sizes). */ 2462 SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); 2463 item_offset = (apr_off_t)value; 2464 2465 /* read all entries of this page */ 2466 last_revision = start_revision; 2467 last_compound = 0; 2468 2469 /* Special case: empty pages. */ 2470 if (start_offset == next_offset) 2471 { 2472 /* Empty page. This only happens if the first entry of the next page 2473 * also covers this page (and possibly more) completely. */ 2474 SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset, 2475 &last_revision, &last_compound, result)); 2476 } 2477 else 2478 { 2479 /* Read non-empty page. */ 2480 do 2481 { 2482 SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset, 2483 &last_revision, &last_compound, result)); 2484 offset = packed_stream_offset(rev_file->p2l_stream); 2485 } 2486 while (offset < next_offset); 2487 2488 /* We should now be exactly at the next offset, i.e. the numbers in 2489 * the stream cannot overlap into the next page description. */ 2490 if (offset != next_offset) 2491 return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL, 2492 _("P2L page description overlaps with next page description")); 2493 2494 /* if we haven't covered the cluster end yet, we must read the first 2495 * entry of the next page */ 2496 if (item_offset < page_start + page_size) 2497 { 2498 SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream)); 2499 item_offset = (apr_off_t)value; 2500 last_revision = start_revision; 2501 last_compound = 0; 2502 SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset, 2503 &last_revision, &last_compound, result)); 2504 } 2505 } 2506 2507 *entries = result; 2508 2509 return SVN_NO_ERROR; 2510} 2511 2512/* If it cannot be found in FS's caches, read the p2l index page selected 2513 * by BATON->OFFSET from REV_FILE. Don't read the page if it precedes 2514 * MIN_OFFSET. Set *END to TRUE if the caller should stop refeching. 2515 * 2516 * *BATON will be updated with the selected page's info and SCRATCH_POOL 2517 * will be used for temporary allocations. If the data is alread in the 2518 * cache, descrease *LEAKING_BUCKET and increase it otherwise. With that 2519 * pattern we will still read all pages from the block even if some of 2520 * them survived in the cached. 2521 */ 2522static svn_error_t * 2523prefetch_p2l_page(svn_boolean_t *end, 2524 int *leaking_bucket, 2525 svn_fs_t *fs, 2526 svn_fs_fs__revision_file_t *rev_file, 2527 p2l_page_info_baton_t *baton, 2528 apr_off_t min_offset, 2529 apr_pool_t *scratch_pool) 2530{ 2531 fs_fs_data_t *ffd = fs->fsap_data; 2532 svn_boolean_t already_cached; 2533 apr_array_header_t *page; 2534 svn_fs_fs__page_cache_key_t key = { 0 }; 2535 2536 /* fetch the page info */ 2537 *end = FALSE; 2538 baton->revision = baton->first_revision; 2539 SVN_ERR(get_p2l_page_info(baton, rev_file, fs, scratch_pool)); 2540 if (baton->start_offset < min_offset || !rev_file->p2l_stream) 2541 { 2542 /* page outside limits -> stop prefetching */ 2543 *end = TRUE; 2544 return SVN_NO_ERROR; 2545 } 2546 2547 /* do we have that page in our caches already? */ 2548 assert(baton->first_revision <= APR_UINT32_MAX); 2549 key.revision = (apr_uint32_t)baton->first_revision; 2550 key.is_packed = svn_fs_fs__is_packed_rev(fs, baton->first_revision); 2551 key.page = baton->page_no; 2552 SVN_ERR(svn_cache__has_key(&already_cached, ffd->p2l_page_cache, 2553 &key, scratch_pool)); 2554 2555 /* yes, already cached */ 2556 if (already_cached) 2557 { 2558 /* stop prefetching if most pages are already cached. */ 2559 if (!--*leaking_bucket) 2560 *end = TRUE; 2561 2562 return SVN_NO_ERROR; 2563 } 2564 2565 ++*leaking_bucket; 2566 2567 /* read from disk */ 2568 SVN_ERR(get_p2l_page(&page, rev_file, fs, 2569 baton->first_revision, 2570 baton->start_offset, 2571 baton->next_offset, 2572 baton->page_start, 2573 baton->page_size, 2574 scratch_pool)); 2575 2576 /* and put it into our cache */ 2577 SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page, scratch_pool)); 2578 2579 return SVN_NO_ERROR; 2580} 2581 2582/* Lookup & construct the baton and key information that we will need for 2583 * a P2L page cache lookup. We want the page covering OFFSET in the rev / 2584 * pack file containing REVSION in FS. Return the results in *PAGE_INFO_P 2585 * and *KEY_P. Read data through REV_FILE. Use SCRATCH_POOL for temporary 2586 * allocations. 2587 */ 2588static svn_error_t * 2589get_p2l_keys(p2l_page_info_baton_t *page_info_p, 2590 svn_fs_fs__page_cache_key_t *key_p, 2591 svn_fs_fs__revision_file_t *rev_file, 2592 svn_fs_t *fs, 2593 svn_revnum_t revision, 2594 apr_off_t offset, 2595 apr_pool_t *scratch_pool) 2596{ 2597 p2l_page_info_baton_t page_info; 2598 2599 /* request info for the index pages that describes the pack / rev file 2600 * contents at pack / rev file position OFFSET. */ 2601 page_info.offset = offset; 2602 page_info.revision = revision; 2603 SVN_ERR(get_p2l_page_info(&page_info, rev_file, fs, scratch_pool)); 2604 2605 /* if the offset refers to a non-existent page, bail out */ 2606 if (page_info.page_count <= page_info.page_no) 2607 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL, 2608 _("Offset %s too large in revision %ld"), 2609 apr_off_t_toa(scratch_pool, offset), revision); 2610 2611 /* return results */ 2612 if (page_info_p) 2613 *page_info_p = page_info; 2614 2615 /* construct cache key */ 2616 if (key_p) 2617 { 2618 svn_fs_fs__page_cache_key_t key = { 0 }; 2619 assert(page_info.first_revision <= APR_UINT32_MAX); 2620 key.revision = (apr_uint32_t)page_info.first_revision; 2621 key.is_packed = rev_file->is_packed; 2622 key.page = page_info.page_no; 2623 2624 *key_p = key; 2625 } 2626 2627 return SVN_NO_ERROR; 2628} 2629 2630/* qsort-compatible compare function that compares the OFFSET of the 2631 * svn_fs_fs__p2l_entry_t in *LHS with the apr_off_t in *RHS. */ 2632static int 2633compare_start_p2l_entry(const void *lhs, 2634 const void *rhs) 2635{ 2636 const svn_fs_fs__p2l_entry_t *entry = lhs; 2637 apr_off_t start = *(const apr_off_t*)rhs; 2638 apr_off_t diff = entry->offset - start; 2639 2640 /* restrict result to int */ 2641 return diff < 0 ? -1 : (diff == 0 ? 0 : 1); 2642} 2643 2644/* From the PAGE_ENTRIES array of svn_fs_fs__p2l_entry_t, ordered 2645 * by their OFFSET member, copy all elements overlapping the range 2646 * [BLOCK_START, BLOCK_END) to ENTRIES. */ 2647static void 2648append_p2l_entries(apr_array_header_t *entries, 2649 apr_array_header_t *page_entries, 2650 apr_off_t block_start, 2651 apr_off_t block_end) 2652{ 2653 const svn_fs_fs__p2l_entry_t *entry; 2654 int idx = svn_sort__bsearch_lower_bound(page_entries, &block_start, 2655 compare_start_p2l_entry); 2656 2657 /* start at the first entry that overlaps with BLOCK_START */ 2658 if (idx > 0) 2659 { 2660 entry = &APR_ARRAY_IDX(page_entries, idx - 1, svn_fs_fs__p2l_entry_t); 2661 if (entry->offset + entry->size > block_start) 2662 --idx; 2663 } 2664 2665 /* copy all entries covering the requested range */ 2666 for ( ; idx < page_entries->nelts; ++idx) 2667 { 2668 entry = &APR_ARRAY_IDX(page_entries, idx, svn_fs_fs__p2l_entry_t); 2669 if (entry->offset >= block_end) 2670 break; 2671 2672 APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t) = *entry; 2673 } 2674} 2675 2676/* Auxilliary struct passed to p2l_entries_func selecting the relevant 2677 * data range. */ 2678typedef struct p2l_entries_baton_t 2679{ 2680 apr_off_t start; 2681 apr_off_t end; 2682} p2l_entries_baton_t; 2683 2684/* Implement svn_cache__partial_getter_func_t: extract p2l entries from 2685 * the page in DATA which overlap the p2l_entries_baton_t in BATON. 2686 * The target array is already provided in *OUT. 2687 */ 2688static svn_error_t * 2689p2l_entries_func(void **out, 2690 const void *data, 2691 apr_size_t data_len, 2692 void *baton, 2693 apr_pool_t *result_pool) 2694{ 2695 apr_array_header_t *entries = *(apr_array_header_t **)out; 2696 const apr_array_header_t *raw_page = data; 2697 p2l_entries_baton_t *block = baton; 2698 2699 /* Make PAGE a readable APR array. */ 2700 apr_array_header_t page = *raw_page; 2701 page.elts = (void *)svn_temp_deserializer__ptr(raw_page, 2702 (const void * const *)&raw_page->elts); 2703 2704 /* append relevant information to result */ 2705 append_p2l_entries(entries, &page, block->start, block->end); 2706 2707 return SVN_NO_ERROR; 2708} 2709 2710 2711/* Body of svn_fs_fs__p2l_index_lookup. However, do a single index page 2712 * lookup and append the result to the ENTRIES array provided by the caller. 2713 * Use successive calls to cover larger ranges. 2714 */ 2715static svn_error_t * 2716p2l_index_lookup(apr_array_header_t *entries, 2717 svn_fs_fs__revision_file_t *rev_file, 2718 svn_fs_t *fs, 2719 svn_revnum_t revision, 2720 apr_off_t block_start, 2721 apr_off_t block_end, 2722 apr_pool_t *scratch_pool) 2723{ 2724 fs_fs_data_t *ffd = fs->fsap_data; 2725 svn_fs_fs__page_cache_key_t key; 2726 svn_boolean_t is_cached = FALSE; 2727 p2l_page_info_baton_t page_info; 2728 apr_array_header_t *local_result = entries; 2729 2730 /* baton selecting the relevant entries from the one page we access */ 2731 p2l_entries_baton_t block; 2732 block.start = block_start; 2733 block.end = block_end; 2734 2735 /* if we requested an empty range, the result would be empty */ 2736 SVN_ERR_ASSERT(block_start < block_end); 2737 2738 /* look for the fist page of the range in our cache */ 2739 SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, block_start, 2740 scratch_pool)); 2741 SVN_ERR(svn_cache__get_partial((void**)&local_result, &is_cached, 2742 ffd->p2l_page_cache, &key, p2l_entries_func, 2743 &block, scratch_pool)); 2744 2745 if (!is_cached) 2746 { 2747 svn_boolean_t end; 2748 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 2749 apr_off_t original_page_start = page_info.page_start; 2750 int leaking_bucket = 4; 2751 p2l_page_info_baton_t prefetch_info = page_info; 2752 apr_array_header_t *page_entries; 2753 2754 apr_off_t max_offset 2755 = APR_ALIGN(page_info.next_offset, ffd->block_size); 2756 apr_off_t min_offset 2757 = APR_ALIGN(page_info.start_offset, ffd->block_size) - ffd->block_size; 2758 2759 /* Since we read index data in larger chunks, we probably got more 2760 * page data than we requested. Parse & cache that until either we 2761 * encounter pages already cached or reach the end of the buffer. 2762 */ 2763 2764 /* pre-fetch preceding pages */ 2765 if (ffd->use_block_read) 2766 { 2767 end = FALSE; 2768 prefetch_info.offset = original_page_start; 2769 while (prefetch_info.offset >= prefetch_info.page_size && !end) 2770 { 2771 svn_pool_clear(iterpool); 2772 2773 prefetch_info.offset -= prefetch_info.page_size; 2774 SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file, 2775 &prefetch_info, min_offset, 2776 iterpool)); 2777 } 2778 } 2779 2780 /* fetch page from disk and put it into the cache */ 2781 SVN_ERR(get_p2l_page(&page_entries, rev_file, fs, 2782 page_info.first_revision, 2783 page_info.start_offset, 2784 page_info.next_offset, 2785 page_info.page_start, 2786 page_info.page_size, iterpool)); 2787 2788 /* The last cache entry must not end beyond the range covered by 2789 * this index. The same applies for any subset of entries. */ 2790 if (page_entries->nelts) 2791 { 2792 const svn_fs_fs__p2l_entry_t *entry 2793 = &APR_ARRAY_IDX(page_entries, page_entries->nelts - 1, 2794 svn_fs_fs__p2l_entry_t); 2795 if ( entry->offset + entry->size 2796 > page_info.page_size * page_info.page_count) 2797 return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL, 2798 _("Last P2L index entry extends beyond " 2799 "the last page in revision %ld."), 2800 revision); 2801 } 2802 2803 SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page_entries, 2804 iterpool)); 2805 2806 /* append relevant information to result */ 2807 append_p2l_entries(entries, page_entries, block_start, block_end); 2808 2809 /* pre-fetch following pages */ 2810 if (ffd->use_block_read) 2811 { 2812 end = FALSE; 2813 leaking_bucket = 4; 2814 prefetch_info = page_info; 2815 prefetch_info.offset = original_page_start; 2816 while ( prefetch_info.next_offset < max_offset 2817 && prefetch_info.page_no + 1 < prefetch_info.page_count 2818 && !end) 2819 { 2820 svn_pool_clear(iterpool); 2821 2822 prefetch_info.offset += prefetch_info.page_size; 2823 SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file, 2824 &prefetch_info, min_offset, 2825 iterpool)); 2826 } 2827 } 2828 2829 svn_pool_destroy(iterpool); 2830 } 2831 2832 /* We access a valid page (otherwise, we had seen an error in the 2833 * get_p2l_keys request). Hence, at least one entry must be found. */ 2834 SVN_ERR_ASSERT(entries->nelts > 0); 2835 2836 /* Add an "unused" entry if it extends beyond the end of the data file. 2837 * Since the index page size might be smaller than the current data 2838 * read block size, the trailing "unused" entry in this index may not 2839 * fully cover the end of the last block. */ 2840 if (page_info.page_no + 1 >= page_info.page_count) 2841 { 2842 svn_fs_fs__p2l_entry_t *entry 2843 = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t); 2844 2845 apr_off_t entry_end = entry->offset + entry->size; 2846 if (entry_end < block_end) 2847 { 2848 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 2849 { 2850 /* extend the terminal filler */ 2851 entry->size = block_end - entry->offset; 2852 } 2853 else 2854 { 2855 /* No terminal filler. Add one. */ 2856 entry = apr_array_push(entries); 2857 entry->offset = entry_end; 2858 entry->size = block_end - entry_end; 2859 entry->type = SVN_FS_FS__ITEM_TYPE_UNUSED; 2860 entry->fnv1_checksum = 0; 2861 entry->item.revision = SVN_INVALID_REVNUM; 2862 entry->item.number = SVN_FS_FS__ITEM_INDEX_UNUSED; 2863 } 2864 } 2865 } 2866 2867 return SVN_NO_ERROR; 2868} 2869 2870svn_error_t * 2871svn_fs_fs__p2l_index_lookup(apr_array_header_t **entries, 2872 svn_fs_t *fs, 2873 svn_fs_fs__revision_file_t *rev_file, 2874 svn_revnum_t revision, 2875 apr_off_t block_start, 2876 apr_off_t block_size, 2877 apr_pool_t *result_pool, 2878 apr_pool_t *scratch_pool) 2879{ 2880 apr_off_t block_end = block_start + block_size; 2881 2882 /* the receiving container */ 2883 int last_count = 0; 2884 apr_array_header_t *result = apr_array_make(result_pool, 16, 2885 sizeof(svn_fs_fs__p2l_entry_t)); 2886 2887 /* Fetch entries page-by-page. Since the p2l index is supposed to cover 2888 * every single byte in the rev / pack file - even unused sections - 2889 * every iteration must result in some progress. */ 2890 while (block_start < block_end) 2891 { 2892 svn_fs_fs__p2l_entry_t *entry; 2893 SVN_ERR(p2l_index_lookup(result, rev_file, fs, revision, block_start, 2894 block_end, scratch_pool)); 2895 SVN_ERR_ASSERT(result->nelts > 0); 2896 2897 /* continue directly behind last item */ 2898 entry = &APR_ARRAY_IDX(result, result->nelts-1, svn_fs_fs__p2l_entry_t); 2899 block_start = entry->offset + entry->size; 2900 2901 /* Some paranoia check. Successive iterations should never return 2902 * duplicates but if it did, we might get into trouble later on. */ 2903 if (last_count > 0 && last_count < result->nelts) 2904 { 2905 entry = &APR_ARRAY_IDX(result, last_count - 1, 2906 svn_fs_fs__p2l_entry_t); 2907 SVN_ERR_ASSERT(APR_ARRAY_IDX(result, last_count, 2908 svn_fs_fs__p2l_entry_t).offset 2909 >= entry->offset + entry->size); 2910 } 2911 2912 last_count = result->nelts; 2913 } 2914 2915 *entries = result; 2916 return SVN_NO_ERROR; 2917} 2918 2919/* compare_fn_t comparing a svn_fs_fs__p2l_entry_t at LHS with an offset 2920 * RHS. 2921 */ 2922static int 2923compare_p2l_entry_offsets(const void *lhs, const void *rhs) 2924{ 2925 const svn_fs_fs__p2l_entry_t *entry = (const svn_fs_fs__p2l_entry_t *)lhs; 2926 apr_off_t offset = *(const apr_off_t *)rhs; 2927 2928 return entry->offset < offset ? -1 : (entry->offset == offset ? 0 : 1); 2929} 2930 2931/* Cached data extraction utility. DATA is a P2L index page, e.g. an APR 2932 * array of svn_fs_fs__p2l_entry_t elements. Return the entry for the item, 2933 * allocated in RESULT_POOL, starting at OFFSET or NULL if that's not an 2934 * the start offset of any item. Use SCRATCH_POOL for temporary allocations. 2935 */ 2936static svn_fs_fs__p2l_entry_t * 2937get_p2l_entry_from_cached_page(const void *data, 2938 apr_uint64_t offset, 2939 apr_pool_t *result_pool, 2940 apr_pool_t *scratch_pool) 2941{ 2942 /* resolve all pointer values of in-cache data */ 2943 const apr_array_header_t *page = data; 2944 apr_array_header_t *entries = apr_pmemdup(scratch_pool, page, 2945 sizeof(*page)); 2946 svn_fs_fs__p2l_entry_t *entry; 2947 2948 entries->elts = (char *)svn_temp_deserializer__ptr(page, 2949 (const void *const *)&page->elts); 2950 2951 /* search of the offset we want */ 2952 entry = svn_sort__array_lookup(entries, &offset, NULL, 2953 (int (*)(const void *, const void *))compare_p2l_entry_offsets); 2954 2955 /* return it, if it is a perfect match */ 2956 return entry ? apr_pmemdup(result_pool, entry, sizeof(*entry)) : NULL; 2957} 2958 2959/* Implements svn_cache__partial_getter_func_t for P2L index pages, copying 2960 * the entry for the apr_off_t at BATON into *OUT. *OUT will be NULL if 2961 * there is no matching entry in the index page at DATA. 2962 */ 2963static svn_error_t * 2964p2l_entry_lookup_func(void **out, 2965 const void *data, 2966 apr_size_t data_len, 2967 void *baton, 2968 apr_pool_t *result_pool) 2969{ 2970 svn_fs_fs__p2l_entry_t *entry 2971 = get_p2l_entry_from_cached_page(data, *(apr_off_t *)baton, result_pool, 2972 result_pool); 2973 2974 *out = entry && entry->offset == *(apr_off_t *)baton 2975 ? apr_pmemdup(result_pool, entry, sizeof(*entry)) 2976 : NULL; 2977 2978 return SVN_NO_ERROR; 2979} 2980 2981svn_error_t * 2982svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p2l_entry_t **entry_p, 2983 svn_fs_t *fs, 2984 svn_fs_fs__revision_file_t *rev_file, 2985 svn_revnum_t revision, 2986 apr_off_t offset, 2987 apr_pool_t *result_pool, 2988 apr_pool_t *scratch_pool) 2989{ 2990 fs_fs_data_t *ffd = fs->fsap_data; 2991 svn_fs_fs__page_cache_key_t key = { 0 }; 2992 svn_boolean_t is_cached = FALSE; 2993 p2l_page_info_baton_t page_info; 2994 2995 *entry_p = NULL; 2996 2997 /* look for this info in our cache */ 2998 SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, offset, 2999 scratch_pool)); 3000 SVN_ERR(svn_cache__get_partial((void**)entry_p, &is_cached, 3001 ffd->p2l_page_cache, &key, 3002 p2l_entry_lookup_func, &offset, 3003 result_pool)); 3004 if (!is_cached) 3005 { 3006 /* do a standard index lookup. This is will automatically prefetch 3007 * data to speed up future lookups. */ 3008 apr_array_header_t *entries = apr_array_make(result_pool, 1, 3009 sizeof(**entry_p)); 3010 SVN_ERR(p2l_index_lookup(entries, rev_file, fs, revision, offset, 3011 offset + 1, scratch_pool)); 3012 3013 /* Find the entry that we want. */ 3014 *entry_p = svn_sort__array_lookup(entries, &offset, NULL, 3015 (int (*)(const void *, const void *))compare_p2l_entry_offsets); 3016 } 3017 3018 return SVN_NO_ERROR; 3019} 3020 3021/* Implements svn_cache__partial_getter_func_t for P2L headers, setting *OUT 3022 * to the largest the first offset not covered by this P2L index. 3023 */ 3024static svn_error_t * 3025p2l_get_max_offset_func(void **out, 3026 const void *data, 3027 apr_size_t data_len, 3028 void *baton, 3029 apr_pool_t *result_pool) 3030{ 3031 const p2l_header_t *header = data; 3032 apr_off_t max_offset = header->file_size; 3033 *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset)); 3034 3035 return SVN_NO_ERROR; 3036} 3037 3038/* Core functionality of to svn_fs_fs__p2l_get_max_offset with identical 3039 * signature. */ 3040static svn_error_t * 3041p2l_get_max_offset(apr_off_t *offset, 3042 svn_fs_t *fs, 3043 svn_fs_fs__revision_file_t *rev_file, 3044 svn_revnum_t revision, 3045 apr_pool_t *scratch_pool) 3046{ 3047 fs_fs_data_t *ffd = fs->fsap_data; 3048 p2l_header_t *header; 3049 svn_boolean_t is_cached = FALSE; 3050 apr_off_t *offset_p; 3051 3052 /* look for the header data in our cache */ 3053 pair_cache_key_t key; 3054 key.revision = rev_file->start_revision; 3055 key.second = rev_file->is_packed; 3056 3057 SVN_ERR(svn_cache__get_partial((void **)&offset_p, &is_cached, 3058 ffd->p2l_header_cache, &key, 3059 p2l_get_max_offset_func, NULL, 3060 scratch_pool)); 3061 if (is_cached) 3062 { 3063 *offset = *offset_p; 3064 return SVN_NO_ERROR; 3065 } 3066 3067 SVN_ERR(get_p2l_header(&header, rev_file, fs, revision, scratch_pool, 3068 scratch_pool)); 3069 *offset = header->file_size; 3070 3071 return SVN_NO_ERROR; 3072} 3073 3074svn_error_t * 3075svn_fs_fs__p2l_get_max_offset(apr_off_t *offset, 3076 svn_fs_t *fs, 3077 svn_fs_fs__revision_file_t *rev_file, 3078 svn_revnum_t revision, 3079 apr_pool_t *scratch_pool) 3080{ 3081 return svn_error_trace(p2l_get_max_offset(offset, fs, rev_file, revision, 3082 scratch_pool)); 3083} 3084 3085/* Calculate the FNV1 checksum over the offset range in REV_FILE, covered by 3086 * ENTRY. Store the result in ENTRY->FNV1_CHECKSUM. Use SCRATCH_POOL for 3087 * temporary allocations. */ 3088static svn_error_t * 3089calc_fnv1(svn_fs_fs__p2l_entry_t *entry, 3090 svn_fs_fs__revision_file_t *rev_file, 3091 apr_pool_t *scratch_pool) 3092{ 3093 unsigned char buffer[4096]; 3094 svn_checksum_t *checksum; 3095 svn_checksum_ctx_t *context 3096 = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, scratch_pool); 3097 apr_off_t size = entry->size; 3098 3099 /* Special rules apply to unused sections / items. The data must be a 3100 * sequence of NUL bytes (not checked here) and the checksum is fixed to 0. 3101 */ 3102 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 3103 { 3104 entry->fnv1_checksum = 0; 3105 return SVN_NO_ERROR; 3106 } 3107 3108 /* Read the block and feed it to the checksum calculator. */ 3109 SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &entry->offset, 3110 scratch_pool)); 3111 while (size > 0) 3112 { 3113 apr_size_t to_read = size > sizeof(buffer) 3114 ? sizeof(buffer) 3115 : (apr_size_t)size; 3116 SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, to_read, NULL, 3117 NULL, scratch_pool)); 3118 SVN_ERR(svn_checksum_update(context, buffer, to_read)); 3119 size -= to_read; 3120 } 3121 3122 /* Store final checksum in ENTRY. */ 3123 SVN_ERR(svn_checksum_final(&checksum, context, scratch_pool)); 3124 entry->fnv1_checksum = ntohl(*(const apr_uint32_t *)checksum->digest); 3125 3126 return SVN_NO_ERROR; 3127} 3128 3129/* 3130 * Index (re-)creation utilities. 3131 */ 3132 3133svn_error_t * 3134svn_fs_fs__p2l_index_from_p2l_entries(const char **protoname, 3135 svn_fs_t *fs, 3136 svn_fs_fs__revision_file_t *rev_file, 3137 apr_array_header_t *entries, 3138 apr_pool_t *result_pool, 3139 apr_pool_t *scratch_pool) 3140{ 3141 apr_file_t *proto_index; 3142 3143 /* Use a subpool for immediate temp file cleanup at the end of this 3144 * function. */ 3145 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 3146 int i; 3147 3148 /* Create a proto-index file. */ 3149 SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL, 3150 svn_io_file_del_on_pool_cleanup, 3151 result_pool, scratch_pool)); 3152 SVN_ERR(svn_fs_fs__p2l_proto_index_open(&proto_index, *protoname, 3153 scratch_pool)); 3154 3155 /* Write ENTRIES to proto-index file and calculate checksums as we go. */ 3156 for (i = 0; i < entries->nelts; ++i) 3157 { 3158 svn_fs_fs__p2l_entry_t *entry 3159 = APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t *); 3160 svn_pool_clear(iterpool); 3161 3162 SVN_ERR(calc_fnv1(entry, rev_file, iterpool)); 3163 SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(proto_index, entry, 3164 iterpool)); 3165 } 3166 3167 /* Convert proto-index into final index and move it into position. 3168 * Note that REV_FILE contains the start revision of the shard file if it 3169 * has been packed while REVISION may be somewhere in the middle. For 3170 * non-packed shards, they will have identical values. */ 3171 SVN_ERR(svn_io_file_close(proto_index, iterpool)); 3172 3173 /* Temp file cleanup. */ 3174 svn_pool_destroy(iterpool); 3175 3176 return SVN_NO_ERROR; 3177} 3178 3179/* A svn_sort__array compatible comparator function, sorting the 3180 * svn_fs_fs__p2l_entry_t** given in LHS, RHS by revision. */ 3181static int 3182compare_p2l_entry_revision(const void *lhs, 3183 const void *rhs) 3184{ 3185 const svn_fs_fs__p2l_entry_t *lhs_entry 3186 =*(const svn_fs_fs__p2l_entry_t **)lhs; 3187 const svn_fs_fs__p2l_entry_t *rhs_entry 3188 =*(const svn_fs_fs__p2l_entry_t **)rhs; 3189 3190 if (lhs_entry->item.revision < rhs_entry->item.revision) 3191 return -1; 3192 3193 return lhs_entry->item.revision == rhs_entry->item.revision ? 0 : 1; 3194} 3195 3196svn_error_t * 3197svn_fs_fs__l2p_index_from_p2l_entries(const char **protoname, 3198 svn_fs_t *fs, 3199 apr_array_header_t *entries, 3200 apr_pool_t *result_pool, 3201 apr_pool_t *scratch_pool) 3202{ 3203 apr_file_t *proto_index; 3204 3205 /* Use a subpool for immediate temp file cleanup at the end of this 3206 * function. */ 3207 apr_pool_t *iterpool = svn_pool_create(scratch_pool); 3208 int i; 3209 svn_revnum_t last_revision = SVN_INVALID_REVNUM; 3210 svn_revnum_t revision = SVN_INVALID_REVNUM; 3211 3212 /* L2P index must be written in revision order. 3213 * Sort ENTRIES accordingly. */ 3214 svn_sort__array(entries, compare_p2l_entry_revision); 3215 3216 /* Find the first revision in the index 3217 * (must exist since no truly empty revs are allowed). */ 3218 for (i = 0; i < entries->nelts && !SVN_IS_VALID_REVNUM(revision); ++i) 3219 revision = APR_ARRAY_IDX(entries, i, const svn_fs_fs__p2l_entry_t *) 3220 ->item.revision; 3221 3222 /* Create the temporary proto-rev file. */ 3223 SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL, 3224 svn_io_file_del_on_pool_cleanup, 3225 result_pool, scratch_pool)); 3226 SVN_ERR(svn_fs_fs__l2p_proto_index_open(&proto_index, *protoname, 3227 scratch_pool)); 3228 3229 /* Write all entries. */ 3230 for (i = 0; i < entries->nelts; ++i) 3231 { 3232 const svn_fs_fs__p2l_entry_t *entry 3233 = APR_ARRAY_IDX(entries, i, const svn_fs_fs__p2l_entry_t *); 3234 svn_pool_clear(iterpool); 3235 3236 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED) 3237 continue; 3238 3239 if (last_revision != entry->item.revision) 3240 { 3241 SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(proto_index, 3242 scratch_pool)); 3243 last_revision = entry->item.revision; 3244 } 3245 3246 SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(proto_index, 3247 entry->offset, 3248 entry->item.number, 3249 iterpool)); 3250 } 3251 3252 /* Convert proto-index into final index and move it into position. */ 3253 SVN_ERR(svn_io_file_close(proto_index, iterpool)); 3254 3255 /* Temp file cleanup. */ 3256 svn_pool_destroy(iterpool); 3257 3258 return SVN_NO_ERROR; 3259} 3260 3261 3262/* 3263 * Standard (de-)serialization functions 3264 */ 3265 3266svn_error_t * 3267svn_fs_fs__serialize_l2p_header(void **data, 3268 apr_size_t *data_len, 3269 void *in, 3270 apr_pool_t *pool) 3271{ 3272 l2p_header_t *header = in; 3273 svn_temp_serializer__context_t *context; 3274 svn_stringbuf_t *serialized; 3275 apr_size_t page_count = header->page_table_index[header->revision_count]; 3276 apr_size_t page_table_size = page_count * sizeof(*header->page_table); 3277 apr_size_t index_size 3278 = (header->revision_count + 1) * sizeof(*header->page_table_index); 3279 apr_size_t data_size = sizeof(*header) + index_size + page_table_size; 3280 3281 /* serialize header and all its elements */ 3282 context = svn_temp_serializer__init(header, 3283 sizeof(*header), 3284 data_size + 32, 3285 pool); 3286 3287 /* page table index array */ 3288 svn_temp_serializer__add_leaf(context, 3289 (const void * const *)&header->page_table_index, 3290 index_size); 3291 3292 /* page table array */ 3293 svn_temp_serializer__add_leaf(context, 3294 (const void * const *)&header->page_table, 3295 page_table_size); 3296 3297 /* return the serialized result */ 3298 serialized = svn_temp_serializer__get(context); 3299 3300 *data = serialized->data; 3301 *data_len = serialized->len; 3302 3303 return SVN_NO_ERROR; 3304} 3305 3306svn_error_t * 3307svn_fs_fs__deserialize_l2p_header(void **out, 3308 void *data, 3309 apr_size_t data_len, 3310 apr_pool_t *pool) 3311{ 3312 l2p_header_t *header = (l2p_header_t *)data; 3313 3314 /* resolve the pointers in the struct */ 3315 svn_temp_deserializer__resolve(header, (void**)&header->page_table_index); 3316 svn_temp_deserializer__resolve(header, (void**)&header->page_table); 3317 3318 /* done */ 3319 *out = header; 3320 3321 return SVN_NO_ERROR; 3322} 3323 3324svn_error_t * 3325svn_fs_fs__serialize_l2p_page(void **data, 3326 apr_size_t *data_len, 3327 void *in, 3328 apr_pool_t *pool) 3329{ 3330 l2p_page_t *page = in; 3331 svn_temp_serializer__context_t *context; 3332 svn_stringbuf_t *serialized; 3333 apr_size_t of_table_size = page->entry_count * sizeof(*page->offsets); 3334 3335 /* serialize struct and all its elements */ 3336 context = svn_temp_serializer__init(page, 3337 sizeof(*page), 3338 of_table_size + sizeof(*page) + 32, 3339 pool); 3340 3341 /* offsets and sub_items arrays */ 3342 svn_temp_serializer__add_leaf(context, 3343 (const void * const *)&page->offsets, 3344 of_table_size); 3345 3346 /* return the serialized result */ 3347 serialized = svn_temp_serializer__get(context); 3348 3349 *data = serialized->data; 3350 *data_len = serialized->len; 3351 3352 return SVN_NO_ERROR; 3353} 3354 3355svn_error_t * 3356svn_fs_fs__deserialize_l2p_page(void **out, 3357 void *data, 3358 apr_size_t data_len, 3359 apr_pool_t *pool) 3360{ 3361 l2p_page_t *page = data; 3362 3363 /* resolve the pointers in the struct */ 3364 svn_temp_deserializer__resolve(page, (void**)&page->offsets); 3365 3366 /* done */ 3367 *out = page; 3368 3369 return SVN_NO_ERROR; 3370} 3371 3372svn_error_t * 3373svn_fs_fs__serialize_p2l_header(void **data, 3374 apr_size_t *data_len, 3375 void *in, 3376 apr_pool_t *pool) 3377{ 3378 p2l_header_t *header = in; 3379 svn_temp_serializer__context_t *context; 3380 svn_stringbuf_t *serialized; 3381 apr_size_t table_size = (header->page_count + 1) * sizeof(*header->offsets); 3382 3383 /* serialize header and all its elements */ 3384 context = svn_temp_serializer__init(header, 3385 sizeof(*header), 3386 table_size + sizeof(*header) + 32, 3387 pool); 3388 3389 /* offsets array */ 3390 svn_temp_serializer__add_leaf(context, 3391 (const void * const *)&header->offsets, 3392 table_size); 3393 3394 /* return the serialized result */ 3395 serialized = svn_temp_serializer__get(context); 3396 3397 *data = serialized->data; 3398 *data_len = serialized->len; 3399 3400 return SVN_NO_ERROR; 3401} 3402 3403svn_error_t * 3404svn_fs_fs__deserialize_p2l_header(void **out, 3405 void *data, 3406 apr_size_t data_len, 3407 apr_pool_t *pool) 3408{ 3409 p2l_header_t *header = data; 3410 3411 /* resolve the only pointer in the struct */ 3412 svn_temp_deserializer__resolve(header, (void**)&header->offsets); 3413 3414 /* done */ 3415 *out = header; 3416 3417 return SVN_NO_ERROR; 3418} 3419 3420svn_error_t * 3421svn_fs_fs__serialize_p2l_page(void **data, 3422 apr_size_t *data_len, 3423 void *in, 3424 apr_pool_t *pool) 3425{ 3426 apr_array_header_t *page = in; 3427 svn_temp_serializer__context_t *context; 3428 svn_stringbuf_t *serialized; 3429 apr_size_t table_size = page->elt_size * page->nelts; 3430 3431 /* serialize array header and all its elements */ 3432 context = svn_temp_serializer__init(page, 3433 sizeof(*page), 3434 table_size + sizeof(*page) + 32, 3435 pool); 3436 3437 /* items in the array */ 3438 svn_temp_serializer__add_leaf(context, 3439 (const void * const *)&page->elts, 3440 table_size); 3441 3442 /* return the serialized result */ 3443 serialized = svn_temp_serializer__get(context); 3444 3445 *data = serialized->data; 3446 *data_len = serialized->len; 3447 3448 return SVN_NO_ERROR; 3449} 3450 3451svn_error_t * 3452svn_fs_fs__deserialize_p2l_page(void **out, 3453 void *data, 3454 apr_size_t data_len, 3455 apr_pool_t *pool) 3456{ 3457 apr_array_header_t *page = (apr_array_header_t *)data; 3458 3459 /* resolve the only pointer in the struct */ 3460 svn_temp_deserializer__resolve(page, (void**)&page->elts); 3461 3462 /* patch up members */ 3463 page->pool = pool; 3464 page->nalloc = page->nelts; 3465 3466 /* done */ 3467 *out = page; 3468 3469 return SVN_NO_ERROR; 3470} 3471