index.c revision 299742
1/* index.c indexing support for FSFS support
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include <assert.h>
24
25#include "svn_io.h"
26#include "svn_pools.h"
27#include "svn_sorts.h"
28
29#include "svn_private_config.h"
30
31#include "private/svn_sorts_private.h"
32#include "private/svn_subr_private.h"
33#include "private/svn_temp_serializer.h"
34
35#include "index.h"
36#include "pack.h"
37#include "temp_serializer.h"
38#include "util.h"
39#include "fs_fs.h"
40
41#include "../libsvn_fs/fs-loader.h"
42
43/* maximum length of a uint64 in an 7/8b encoding */
44#define ENCODED_INT_LENGTH 10
45
46/* APR is missing an APR_OFF_T_MAX.  So, define one.  We will use it to
47 * limit file offsets stored in the indexes.
48 *
49 * We assume that everything shorter than 64 bits, it is at least 32 bits.
50 * We also assume that the type is always signed meaning we only have an
51 * effective positive range of 63 or 31 bits, respectively.
52 */
53static
54const apr_uint64_t off_t_max = (sizeof(apr_off_t) == sizeof(apr_int64_t))
55                             ? APR_INT64_MAX
56                             : APR_INT32_MAX;
57
58/* We store P2L proto-index entries as 6 values, 64 bits each on disk.
59 * See also svn_fs_fs__p2l_proto_index_add_entry().
60 */
61#define P2L_PROTO_INDEX_ENTRY_SIZE (6 * sizeof(apr_uint64_t))
62
63/* We put this string in front of the L2P index header. */
64#define L2P_STREAM_PREFIX "L2P-INDEX\n"
65
66/* We put this string in front of the P2L index header. */
67#define P2L_STREAM_PREFIX "P2L-INDEX\n"
68
69/* Size of the buffer that will fit the index header prefixes. */
70#define STREAM_PREFIX_LEN MAX(sizeof(L2P_STREAM_PREFIX), \
71                              sizeof(P2L_STREAM_PREFIX))
72
73/* Page tables in the log-to-phys index file exclusively contain entries
74 * of this type to describe position and size of a given page.
75 */
76typedef struct l2p_page_table_entry_t
77{
78  /* global offset on the page within the index file */
79  apr_uint64_t offset;
80
81  /* number of mapping entries in that page */
82  apr_uint32_t entry_count;
83
84  /* size of the page on disk (in the index file) */
85  apr_uint32_t size;
86} l2p_page_table_entry_t;
87
88/* Master run-time data structure of an log-to-phys index.  It contains
89 * the page tables of every revision covered by that index - but not the
90 * pages themselves.
91 */
92typedef struct l2p_header_t
93{
94  /* first revision covered by this index */
95  svn_revnum_t first_revision;
96
97  /* number of revisions covered */
98  apr_size_t revision_count;
99
100  /* (max) number of entries per page */
101  apr_uint32_t page_size;
102
103  /* indexes into PAGE_TABLE that mark the first page of the respective
104   * revision.  PAGE_TABLE_INDEX[REVISION_COUNT] points to the end of
105   * PAGE_TABLE.
106   */
107  apr_size_t * page_table_index;
108
109  /* Page table covering all pages in the index */
110  l2p_page_table_entry_t * page_table;
111} l2p_header_t;
112
113/* Run-time data structure containing a single log-to-phys index page.
114 */
115typedef struct l2p_page_t
116{
117  /* number of entries in the OFFSETS array */
118  apr_uint32_t entry_count;
119
120  /* global file offsets (item index is the array index) within the
121   * packed or non-packed rev file.  Offset will be -1 for unused /
122   * invalid item index values. */
123  apr_uint64_t *offsets;
124} l2p_page_t;
125
126/* All of the log-to-phys proto index file consist of entries of this type.
127 */
128typedef struct l2p_proto_entry_t
129{
130  /* phys offset + 1 of the data container. 0 for "new revision" entries. */
131  apr_uint64_t offset;
132
133  /* corresponding item index. 0 for "new revision" entries. */
134  apr_uint64_t item_index;
135} l2p_proto_entry_t;
136
137/* Master run-time data structure of an phys-to-log index.  It contains
138 * an array with one offset value for each rev file cluster.
139 */
140typedef struct p2l_header_t
141{
142  /* first revision covered by the index (and rev file) */
143  svn_revnum_t first_revision;
144
145  /* number of bytes in the rev files covered by each p2l page */
146  apr_uint64_t page_size;
147
148  /* number of pages / clusters in that rev file */
149  apr_size_t page_count;
150
151  /* number of bytes in the rev file */
152  apr_uint64_t file_size;
153
154  /* offsets of the pages / cluster descriptions within the index file */
155  apr_off_t *offsets;
156} p2l_header_t;
157
158/*
159 * packed stream
160 *
161 * This is a utility object that will read files containing 7b/8b encoded
162 * unsigned integers.  It decodes them in batches to minimize overhead
163 * and supports random access to random file locations.
164 */
165
166/* How many numbers we will pre-fetch and buffer in a packed number stream.
167 */
168enum { MAX_NUMBER_PREFETCH = 64 };
169
170/* Prefetched number entry in a packed number stream.
171 */
172typedef struct value_position_pair_t
173{
174  /* prefetched number */
175  apr_uint64_t value;
176
177  /* number of bytes read, *including* this number, since the buffer start */
178  apr_size_t total_len;
179} value_position_pair_t;
180
181/* State of a prefetching packed number stream.  It will read compressed
182 * index data efficiently and present it as a series of non-packed uint64.
183 */
184struct svn_fs_fs__packed_number_stream_t
185{
186  /* underlying data file containing the packed values */
187  apr_file_t *file;
188
189  /* Offset within FILE at which the stream data starts
190   * (i.e. which offset will reported as offset 0 by packed_stream_offset). */
191  apr_off_t stream_start;
192
193  /* First offset within FILE after the stream data.
194   * Attempts to read beyond this will cause an "Unexpected End Of Stream"
195   * error. */
196  apr_off_t stream_end;
197
198  /* number of used entries in BUFFER (starting at index 0) */
199  apr_size_t used;
200
201  /* index of the next number to read from the BUFFER (0 .. USED).
202   * If CURRENT == USED, we need to read more data upon get() */
203  apr_size_t current;
204
205  /* offset in FILE from which the first entry in BUFFER has been read */
206  apr_off_t start_offset;
207
208  /* offset in FILE from which the next number has to be read */
209  apr_off_t next_offset;
210
211  /* read the file in chunks of this size */
212  apr_size_t block_size;
213
214  /* pool to be used for file ops etc. */
215  apr_pool_t *pool;
216
217  /* buffer for prefetched values */
218  value_position_pair_t buffer[MAX_NUMBER_PREFETCH];
219};
220
221/* Return an svn_error_t * object for error ERR on STREAM with the given
222 * MESSAGE string.  The latter must have a placeholder for the index file
223 * name ("%s") and the current read offset (e.g. "0x%lx").
224 */
225static svn_error_t *
226stream_error_create(svn_fs_fs__packed_number_stream_t *stream,
227                    apr_status_t err,
228                    const char *message)
229{
230  const char *file_name;
231  apr_off_t offset;
232  SVN_ERR(svn_io_file_name_get(&file_name, stream->file,
233                               stream->pool));
234  SVN_ERR(svn_fs_fs__get_file_offset(&offset, stream->file, stream->pool));
235
236  return svn_error_createf(err, NULL, message, file_name,
237                           apr_psprintf(stream->pool,
238                                        "%" APR_UINT64_T_HEX_FMT,
239                                        (apr_uint64_t)offset));
240}
241
242/* Read up to MAX_NUMBER_PREFETCH numbers from the STREAM->NEXT_OFFSET in
243 * STREAM->FILE and buffer them.
244 *
245 * We don't want GCC and others to inline this (infrequently called)
246 * function into packed_stream_get() because it prevents the latter from
247 * being inlined itself.
248 */
249SVN__PREVENT_INLINE
250static svn_error_t *
251packed_stream_read(svn_fs_fs__packed_number_stream_t *stream)
252{
253  unsigned char buffer[MAX_NUMBER_PREFETCH];
254  apr_size_t read = 0;
255  apr_size_t i;
256  value_position_pair_t *target;
257  apr_off_t block_start = 0;
258  apr_off_t block_left = 0;
259  apr_status_t err;
260
261  /* all buffered data will have been read starting here */
262  stream->start_offset = stream->next_offset;
263
264  /* packed numbers are usually not aligned to MAX_NUMBER_PREFETCH blocks,
265   * i.e. the last number has been incomplete (and not buffered in stream)
266   * and need to be re-read.  Therefore, always correct the file pointer.
267   */
268  SVN_ERR(svn_io_file_aligned_seek(stream->file, stream->block_size,
269                                   &block_start, stream->next_offset,
270                                   stream->pool));
271
272  /* prefetch at least one number but, if feasible, don't cross block
273   * boundaries.  This shall prevent jumping back and forth between two
274   * blocks because the extra data was not actually request _now_.
275   */
276  read = sizeof(buffer);
277  block_left = stream->block_size - (stream->next_offset - block_start);
278  if (block_left >= 10 && block_left < read)
279    read = (apr_size_t)block_left;
280
281  /* Don't read beyond the end of the file section that belongs to this
282   * index / stream. */
283  read = (apr_size_t)MIN(read, stream->stream_end - stream->next_offset);
284
285  err = apr_file_read(stream->file, buffer, &read);
286  if (err && !APR_STATUS_IS_EOF(err))
287    return stream_error_create(stream, err,
288      _("Can't read index file '%s' at offset 0x%s"));
289
290  /* if the last number is incomplete, trim it from the buffer */
291  while (read > 0 && buffer[read-1] >= 0x80)
292    --read;
293
294  /* we call read() only if get() requires more data.  So, there must be
295   * at least *one* further number. */
296  if SVN__PREDICT_FALSE(read == 0)
297    return stream_error_create(stream, err,
298      _("Unexpected end of index file %s at offset 0x%s"));
299
300  /* parse file buffer and expand into stream buffer */
301  target = stream->buffer;
302  for (i = 0; i < read;)
303    {
304      if (buffer[i] < 0x80)
305        {
306          /* numbers < 128 are relatively frequent and particularly easy
307           * to decode.  Give them special treatment. */
308          target->value = buffer[i];
309          ++i;
310          target->total_len = i;
311          ++target;
312        }
313      else
314        {
315          apr_uint64_t value = 0;
316          apr_uint64_t shift = 0;
317          while (buffer[i] >= 0x80)
318            {
319              value += ((apr_uint64_t)buffer[i] & 0x7f) << shift;
320              shift += 7;
321              ++i;
322            }
323
324          target->value = value + ((apr_uint64_t)buffer[i] << shift);
325          ++i;
326          target->total_len = i;
327          ++target;
328
329          /* let's catch corrupted data early.  It would surely cause
330           * havoc further down the line. */
331          if SVN__PREDICT_FALSE(shift > 8 * sizeof(value))
332            return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
333                                     _("Corrupt index: number too large"));
334       }
335    }
336
337  /* update stream state */
338  stream->used = target - stream->buffer;
339  stream->next_offset = stream->start_offset + i;
340  stream->current = 0;
341
342  return SVN_NO_ERROR;
343}
344
345/* Create and open a packed number stream reading from offsets START to
346 * END in FILE and return it in *STREAM.  Access the file in chunks of
347 * BLOCK_SIZE bytes.  Expect the stream to be prefixed by STREAM_PREFIX.
348 * Allocate *STREAM in RESULT_POOL and use SCRATCH_POOL for temporaries.
349 */
350static svn_error_t *
351packed_stream_open(svn_fs_fs__packed_number_stream_t **stream,
352                   apr_file_t *file,
353                   apr_off_t start,
354                   apr_off_t end,
355                   const char *stream_prefix,
356                   apr_size_t block_size,
357                   apr_pool_t *result_pool,
358                   apr_pool_t *scratch_pool)
359{
360  char buffer[STREAM_PREFIX_LEN + 1] = { 0 };
361  apr_size_t len = strlen(stream_prefix);
362  svn_fs_fs__packed_number_stream_t *result;
363
364  /* If this is violated, we forgot to adjust STREAM_PREFIX_LEN after
365   * changing the index header prefixes. */
366  SVN_ERR_ASSERT(len < sizeof(buffer));
367
368  /* Read the header prefix and compare it with the expected prefix */
369  SVN_ERR(svn_io_file_aligned_seek(file, block_size, NULL, start,
370                                   scratch_pool));
371  SVN_ERR(svn_io_file_read_full2(file, buffer, len, NULL, NULL,
372                                 scratch_pool));
373
374  if (strncmp(buffer, stream_prefix, len))
375    return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
376                             _("Index stream header prefix mismatch.\n"
377                               "  expected: %s"
378                               "  found: %s"), stream_prefix, buffer);
379
380  /* Construct the actual stream object. */
381  result = apr_palloc(result_pool, sizeof(*result));
382
383  result->pool = result_pool;
384  result->file = file;
385  result->stream_start = start + len;
386  result->stream_end = end;
387
388  result->used = 0;
389  result->current = 0;
390  result->start_offset = result->stream_start;
391  result->next_offset = result->stream_start;
392  result->block_size = block_size;
393
394  *stream = result;
395
396  return SVN_NO_ERROR;
397}
398
399/*
400 * The forced inline is required for performance reasons:  This is a very
401 * hot code path (called for every item we read) but e.g. GCC would rather
402 * chose to inline packed_stream_read() here, preventing packed_stream_get
403 * from being inlined itself.
404 */
405SVN__FORCE_INLINE
406static svn_error_t*
407packed_stream_get(apr_uint64_t *value,
408                  svn_fs_fs__packed_number_stream_t *stream)
409{
410  if (stream->current == stream->used)
411    SVN_ERR(packed_stream_read(stream));
412
413  *value = stream->buffer[stream->current].value;
414  ++stream->current;
415
416  return SVN_NO_ERROR;
417}
418
419/* Navigate STREAM to packed stream offset OFFSET.  There will be no checks
420 * whether the given OFFSET is valid.
421 */
422static void
423packed_stream_seek(svn_fs_fs__packed_number_stream_t *stream,
424                   apr_off_t offset)
425{
426  apr_off_t file_offset = offset + stream->stream_start;
427
428  if (   stream->used == 0
429      || offset < stream->start_offset
430      || offset >= stream->next_offset)
431    {
432      /* outside buffered data.  Next get() will read() from OFFSET. */
433      stream->start_offset = file_offset;
434      stream->next_offset = file_offset;
435      stream->current = 0;
436      stream->used = 0;
437    }
438  else
439    {
440      /* Find the suitable location in the stream buffer.
441       * Since our buffer is small, it is efficient enough to simply scan
442       * it for the desired position. */
443      apr_size_t i;
444      for (i = 0; i < stream->used; ++i)
445        if (stream->buffer[i].total_len > file_offset - stream->start_offset)
446          break;
447
448      stream->current = i;
449    }
450}
451
452/* Return the packed stream offset of at which the next number in the stream
453 * can be found.
454 */
455static apr_off_t
456packed_stream_offset(svn_fs_fs__packed_number_stream_t *stream)
457{
458  apr_off_t file_offset
459       = stream->current == 0
460       ? stream->start_offset
461       : stream->buffer[stream->current-1].total_len + stream->start_offset;
462
463  return file_offset - stream->stream_start;
464}
465
466/* Encode VALUE as 7/8b into P and return the number of bytes written.
467 * This will be used when _writing_ packed data.  packed_stream_* is for
468 * read operations only.
469 */
470static apr_size_t
471encode_uint(unsigned char *p, apr_uint64_t value)
472{
473  unsigned char *start = p;
474  while (value >= 0x80)
475    {
476      *p = (unsigned char)((value % 0x80) + 0x80);
477      value /= 0x80;
478      ++p;
479    }
480
481  *p = (unsigned char)(value % 0x80);
482  return (p - start) + 1;
483}
484
485/* Encode VALUE as 7/8b into P and return the number of bytes written.
486 * This maps signed ints onto unsigned ones.
487 */
488static apr_size_t
489encode_int(unsigned char *p, apr_int64_t value)
490{
491  return encode_uint(p, (apr_uint64_t)(value < 0 ? -1 - 2*value : 2*value));
492}
493
494/* Append VALUE to STREAM in 7/8b encoding.
495 */
496static svn_error_t *
497stream_write_encoded(svn_stream_t *stream,
498                     apr_uint64_t value)
499{
500  unsigned char encoded[ENCODED_INT_LENGTH];
501
502  apr_size_t len = encode_uint(encoded, value);
503  return svn_error_trace(svn_stream_write(stream, (char *)encoded, &len));
504}
505
506/* Map unsigned VALUE back to signed integer.
507 */
508static apr_int64_t
509decode_int(apr_uint64_t value)
510{
511  return (apr_int64_t)(value % 2 ? -1 - value / 2 : value / 2);
512}
513
514/* Write VALUE to the PROTO_INDEX file, using SCRATCH_POOL for temporary
515 * allocations.
516 *
517 * The point of this function is to ensure an architecture-independent
518 * proto-index file format.  All data is written as unsigned 64 bits ints
519 * in little endian byte order.  64 bits is the largest portable integer
520 * we have and unsigned values have well-defined conversions in C.
521 */
522static svn_error_t *
523write_uint64_to_proto_index(apr_file_t *proto_index,
524                            apr_uint64_t value,
525                            apr_pool_t *scratch_pool)
526{
527  apr_byte_t buffer[sizeof(value)];
528  int i;
529  apr_size_t written;
530
531  /* Split VALUE into 8 bytes using LE ordering. */
532  for (i = 0; i < sizeof(buffer); ++i)
533    {
534      /* Unsigned conversions are well-defined ... */
535      buffer[i] = (apr_byte_t)value;
536      value >>= CHAR_BIT;
537    }
538
539  /* Write it all to disk. */
540  SVN_ERR(svn_io_file_write_full(proto_index, buffer, sizeof(buffer),
541                                 &written, scratch_pool));
542  SVN_ERR_ASSERT(written == sizeof(buffer));
543
544  return SVN_NO_ERROR;
545}
546
547/* Read one unsigned 64 bit value from PROTO_INDEX file and return it in
548 * *VALUE_P.  If EOF is NULL, error out when trying to read beyond EOF.
549 * Use SCRATCH_POOL for temporary allocations.
550 *
551 * This function is the inverse to write_uint64_to_proto_index (see there),
552 * reading the external LE byte order and convert it into host byte order.
553 */
554static svn_error_t *
555read_uint64_from_proto_index(apr_file_t *proto_index,
556                             apr_uint64_t *value_p,
557                             svn_boolean_t *eof,
558                             apr_pool_t *scratch_pool)
559{
560  apr_byte_t buffer[sizeof(*value_p)];
561  apr_size_t read;
562
563  /* Read the full 8 bytes or our 64 bit value, unless we hit EOF.
564   * Assert that we never read partial values. */
565  SVN_ERR(svn_io_file_read_full2(proto_index, buffer, sizeof(buffer),
566                                 &read, eof, scratch_pool));
567  SVN_ERR_ASSERT((eof && *eof) || read == sizeof(buffer));
568
569  /* If we did not hit EOF, reconstruct the uint64 value and return it. */
570  if (!eof || !*eof)
571    {
572      int i;
573      apr_uint64_t value;
574
575      /* This could only overflow if CHAR_BIT had a value that is not
576       * a divisor of 64. */
577      value = 0;
578      for (i = sizeof(buffer) - 1; i >= 0; --i)
579        value = (value << CHAR_BIT) + buffer[i];
580
581      *value_p = value;
582    }
583
584  return SVN_NO_ERROR;
585}
586
587/* Convenience function similar to read_uint64_from_proto_index, but returns
588 * an uint32 value in VALUE_P.  Return an error if the value does not fit.
589 */
590static svn_error_t *
591read_uint32_from_proto_index(apr_file_t *proto_index,
592                             apr_uint32_t *value_p,
593                             svn_boolean_t *eof,
594                             apr_pool_t *scratch_pool)
595{
596  apr_uint64_t value;
597  SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof,
598                                       scratch_pool));
599  if (!eof || !*eof)
600    {
601      if (value > APR_UINT32_MAX)
602        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
603                                _("UINT32 0x%s too large, max = 0x%s"),
604                                apr_psprintf(scratch_pool,
605                                             "%" APR_UINT64_T_HEX_FMT,
606                                             value),
607                                apr_psprintf(scratch_pool,
608                                             "%" APR_UINT64_T_HEX_FMT,
609                                             (apr_uint64_t)APR_UINT32_MAX));
610
611      /* This conversion is not lossy because the value can be represented
612       * in the target type. */
613      *value_p = (apr_uint32_t)value;
614    }
615
616  return SVN_NO_ERROR;
617}
618
619/* Convenience function similar to read_uint64_from_proto_index, but returns
620 * an off_t value in VALUE_P.  Return an error if the value does not fit.
621 */
622static svn_error_t *
623read_off_t_from_proto_index(apr_file_t *proto_index,
624                            apr_off_t *value_p,
625                            svn_boolean_t *eof,
626                            apr_pool_t *scratch_pool)
627{
628  apr_uint64_t value;
629  SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof,
630                                       scratch_pool));
631  if (!eof || !*eof)
632    {
633      if (value > off_t_max)
634        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
635                                _("File offset 0x%s too large, max = 0x%s"),
636                                apr_psprintf(scratch_pool,
637                                             "%" APR_UINT64_T_HEX_FMT,
638                                             value),
639                                apr_psprintf(scratch_pool,
640                                             "%" APR_UINT64_T_HEX_FMT,
641                                             off_t_max));
642
643      /* Shortening conversion from unsigned to signed int is well-defined
644       * and not lossy in C because the value can be represented in the
645       * target type. */
646      *value_p = (apr_off_t)value;
647    }
648
649  return SVN_NO_ERROR;
650}
651
652/*
653 * log-to-phys index
654 */
655
656/* Append ENTRY to log-to-phys PROTO_INDEX file.
657 * Use SCRATCH_POOL for temporary allocations.
658 */
659static svn_error_t *
660write_l2p_entry_to_proto_index(apr_file_t *proto_index,
661                               l2p_proto_entry_t entry,
662                               apr_pool_t *scratch_pool)
663{
664  SVN_ERR(write_uint64_to_proto_index(proto_index, entry.offset,
665                                      scratch_pool));
666  SVN_ERR(write_uint64_to_proto_index(proto_index, entry.item_index,
667                                      scratch_pool));
668
669  return SVN_NO_ERROR;
670}
671
672/* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file
673 * in *EOF, or error out in that case if EOF is NULL.  *ENTRY is in an
674 * undefined state if an end-of-file occurred.
675 * Use SCRATCH_POOL for temporary allocations.
676 */
677static svn_error_t *
678read_l2p_entry_from_proto_index(apr_file_t *proto_index,
679                                l2p_proto_entry_t *entry,
680                                svn_boolean_t *eof,
681                                apr_pool_t *scratch_pool)
682{
683  SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->offset, eof,
684                                       scratch_pool));
685  SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->item_index, eof,
686                                       scratch_pool));
687
688  return SVN_NO_ERROR;
689}
690
691/* Write the log-2-phys index page description for the l2p_page_entry_t
692 * array ENTRIES, starting with element START up to but not including END.
693 * Write the resulting representation into BUFFER.  Use SCRATCH_POOL for
694 * temporary allocations.
695 */
696static svn_error_t *
697encode_l2p_page(apr_array_header_t *entries,
698                int start,
699                int end,
700                svn_spillbuf_t *buffer,
701                apr_pool_t *scratch_pool)
702{
703  unsigned char encoded[ENCODED_INT_LENGTH];
704  int i;
705  const apr_uint64_t *values = (const apr_uint64_t *)entries->elts;
706  apr_uint64_t last_value = 0;
707
708  /* encode items */
709  for (i = start; i < end; ++i)
710    {
711      apr_int64_t diff = values[i] - last_value;
712      last_value = values[i];
713      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
714                                  encode_int(encoded, diff), scratch_pool));
715    }
716
717  return SVN_NO_ERROR;
718}
719
720svn_error_t *
721svn_fs_fs__l2p_proto_index_open(apr_file_t **proto_index,
722                                const char *file_name,
723                                apr_pool_t *result_pool)
724{
725  SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE
726                           | APR_CREATE | APR_APPEND | APR_BUFFERED,
727                           APR_OS_DEFAULT, result_pool));
728
729  return SVN_NO_ERROR;
730}
731
732svn_error_t *
733svn_fs_fs__l2p_proto_index_add_revision(apr_file_t *proto_index,
734                                        apr_pool_t *scratch_pool)
735{
736  l2p_proto_entry_t entry;
737  entry.offset = 0;
738  entry.item_index = 0;
739
740  return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry,
741                                                        scratch_pool));
742}
743
744svn_error_t *
745svn_fs_fs__l2p_proto_index_add_entry(apr_file_t *proto_index,
746                                     apr_off_t offset,
747                                     apr_uint64_t item_index,
748                                     apr_pool_t *scratch_pool)
749{
750  l2p_proto_entry_t entry;
751
752  /* make sure the conversion to uint64 works */
753  SVN_ERR_ASSERT(offset >= -1);
754
755  /* we support offset '-1' as a "not used" indication */
756  entry.offset = (apr_uint64_t)offset + 1;
757
758  /* make sure we can use item_index as an array index when building the
759   * final index file */
760  SVN_ERR_ASSERT(item_index < UINT_MAX / 2);
761  entry.item_index = item_index;
762
763  return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry,
764                                                        scratch_pool));
765}
766
767svn_error_t *
768svn_fs_fs__l2p_index_append(svn_checksum_t **checksum,
769                            svn_fs_t *fs,
770                            apr_file_t *index_file,
771                            const char *proto_file_name,
772                            svn_revnum_t revision,
773                            apr_pool_t * result_pool,
774                            apr_pool_t *scratch_pool)
775{
776  fs_fs_data_t *ffd = fs->fsap_data;
777  apr_file_t *proto_index = NULL;
778  svn_stream_t *stream;
779  int i;
780  apr_uint64_t entry;
781  svn_boolean_t eof = FALSE;
782
783  int last_page_count = 0;          /* total page count at the start of
784                                       the current revision */
785
786  /* temporary data structures that collect the data which will be moved
787     to the target file in a second step */
788  apr_pool_t *local_pool = svn_pool_create(scratch_pool);
789  apr_pool_t *iterpool = svn_pool_create(local_pool);
790  apr_array_header_t *page_counts
791    = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
792  apr_array_header_t *page_sizes
793    = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
794  apr_array_header_t *entry_counts
795    = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
796
797  /* collect the item offsets and sub-item value for the current revision */
798  apr_array_header_t *entries
799    = apr_array_make(local_pool, 256, sizeof(apr_uint64_t));
800
801  /* 64k blocks, spill after 16MB */
802  svn_spillbuf_t *buffer
803    = svn_spillbuf__create(0x10000, 0x1000000, local_pool);
804
805  /* Paranoia check that makes later casting to int32 safe.
806   * The current implementation is limited to 2G entries per page. */
807  if (ffd->l2p_page_size > APR_INT32_MAX)
808    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
809                            _("L2P index page size  %s"
810                              " exceeds current limit of 2G entries"),
811                            apr_psprintf(local_pool, "%" APR_UINT64_T_FMT,
812                                         ffd->l2p_page_size));
813
814  /* start at the beginning of the source file */
815  SVN_ERR(svn_io_file_open(&proto_index, proto_file_name,
816                           APR_READ | APR_CREATE | APR_BUFFERED,
817                           APR_OS_DEFAULT, scratch_pool));
818
819  /* process all entries until we fail due to EOF */
820  for (entry = 0; !eof; ++entry)
821    {
822      l2p_proto_entry_t proto_entry;
823
824      /* (attempt to) read the next entry from the source */
825      SVN_ERR(read_l2p_entry_from_proto_index(proto_index, &proto_entry,
826                                              &eof, local_pool));
827
828      /* handle new revision */
829      if ((entry > 0 && proto_entry.offset == 0) || eof)
830        {
831          /* dump entries, grouped into pages */
832
833          int entry_count = 0;
834          for (i = 0; i < entries->nelts; i += entry_count)
835            {
836              /* 1 page with up to L2P_PAGE_SIZE entries.
837               * fsfs.conf settings validation guarantees this to fit into
838               * our address space. */
839              apr_uint64_t last_buffer_size
840                = (apr_uint64_t)svn_spillbuf__get_size(buffer);
841
842              svn_pool_clear(iterpool);
843
844              entry_count = ffd->l2p_page_size < entries->nelts - i
845                          ? (int)ffd->l2p_page_size
846                          : entries->nelts - i;
847              SVN_ERR(encode_l2p_page(entries, i, i + entry_count,
848                                      buffer, iterpool));
849
850              APR_ARRAY_PUSH(entry_counts, apr_uint64_t) = entry_count;
851              APR_ARRAY_PUSH(page_sizes, apr_uint64_t)
852                = svn_spillbuf__get_size(buffer) - last_buffer_size;
853            }
854
855          apr_array_clear(entries);
856
857          /* store the number of pages in this revision */
858          APR_ARRAY_PUSH(page_counts, apr_uint64_t)
859            = page_sizes->nelts - last_page_count;
860
861          last_page_count = page_sizes->nelts;
862        }
863      else
864        {
865          int idx;
866
867          /* store the mapping in our array */
868          if (proto_entry.item_index > APR_INT32_MAX)
869            return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
870                                    _("Item index %s too large "
871                                      "in l2p proto index for revision %ld"),
872                                    apr_psprintf(local_pool, "%" APR_UINT64_T_FMT,
873                                                 proto_entry.item_index),
874                                    revision + page_counts->nelts);
875
876          idx = (int)proto_entry.item_index;
877          while (idx >= entries->nelts)
878            APR_ARRAY_PUSH(entries, apr_uint64_t) = 0;
879
880          APR_ARRAY_IDX(entries, idx, apr_uint64_t) = proto_entry.offset;
881        }
882    }
883
884  /* close the source file */
885  SVN_ERR(svn_io_file_close(proto_index, local_pool));
886
887  /* Paranoia check that makes later casting to int32 safe.
888   * The current implementation is limited to 2G pages per index. */
889  if (page_counts->nelts > APR_INT32_MAX)
890    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
891                            _("L2P index page count  %d"
892                              " exceeds current limit of 2G pages"),
893                            page_counts->nelts);
894
895  /* open target stream. */
896  stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE,
897                                                            local_pool),
898                                   NULL, checksum, svn_checksum_md5, FALSE,
899                                   result_pool);
900
901
902  /* write header info */
903  SVN_ERR(svn_stream_puts(stream, L2P_STREAM_PREFIX));
904  SVN_ERR(stream_write_encoded(stream, revision));
905  SVN_ERR(stream_write_encoded(stream, ffd->l2p_page_size));
906  SVN_ERR(stream_write_encoded(stream, page_counts->nelts));
907  SVN_ERR(stream_write_encoded(stream, page_sizes->nelts));
908
909  /* write the revision table */
910  for (i = 0; i < page_counts->nelts; ++i)
911    {
912      apr_uint64_t value = APR_ARRAY_IDX(page_counts, i, apr_uint64_t);
913      SVN_ERR(stream_write_encoded(stream, value));
914    }
915
916  /* write the page table */
917  for (i = 0; i < page_sizes->nelts; ++i)
918    {
919      apr_uint64_t value = APR_ARRAY_IDX(page_sizes, i, apr_uint64_t);
920      SVN_ERR(stream_write_encoded(stream, value));
921      value = APR_ARRAY_IDX(entry_counts, i, apr_uint64_t);
922      SVN_ERR(stream_write_encoded(stream, value));
923    }
924
925  /* append page contents and implicitly close STREAM */
926  SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool),
927                           stream, NULL, NULL, local_pool));
928
929  svn_pool_destroy(local_pool);
930
931  return SVN_NO_ERROR;
932}
933
934/* If REV_FILE->L2P_STREAM is NULL, create a new stream for the log-to-phys
935 * index for REVISION in FS and return it in REV_FILE.
936 */
937static svn_error_t *
938auto_open_l2p_index(svn_fs_fs__revision_file_t *rev_file,
939                    svn_fs_t *fs,
940                    svn_revnum_t revision)
941{
942  if (rev_file->l2p_stream == NULL)
943    {
944      fs_fs_data_t *ffd = fs->fsap_data;
945
946      SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
947      SVN_ERR(packed_stream_open(&rev_file->l2p_stream,
948                                 rev_file->file,
949                                 rev_file->l2p_offset,
950                                 rev_file->p2l_offset,
951                                 L2P_STREAM_PREFIX,
952                                 (apr_size_t)ffd->block_size,
953                                 rev_file->pool,
954                                 rev_file->pool));
955    }
956
957  return SVN_NO_ERROR;
958}
959
960/* Read the header data structure of the log-to-phys index for REVISION
961 * in FS and return it in *HEADER, allocated in RESULT_POOL.  Use REV_FILE
962 * to access on-disk data.  Use SCRATCH_POOL for temporary allocations.
963 */
964static svn_error_t *
965get_l2p_header_body(l2p_header_t **header,
966                    svn_fs_fs__revision_file_t *rev_file,
967                    svn_fs_t *fs,
968                    svn_revnum_t revision,
969                    apr_pool_t *result_pool,
970                    apr_pool_t *scratch_pool)
971{
972  fs_fs_data_t *ffd = fs->fsap_data;
973  apr_uint64_t value;
974  apr_size_t i;
975  apr_size_t page, page_count;
976  apr_off_t offset;
977  l2p_header_t *result = apr_pcalloc(result_pool, sizeof(*result));
978  apr_size_t page_table_index;
979  svn_revnum_t next_rev;
980
981  pair_cache_key_t key;
982  key.revision = rev_file->start_revision;
983  key.second = rev_file->is_packed;
984
985  SVN_ERR(auto_open_l2p_index(rev_file, fs, revision));
986  packed_stream_seek(rev_file->l2p_stream, 0);
987
988  /* Read the table sizes.  Check the data for plausibility and
989   * consistency with other bits. */
990  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
991  result->first_revision = (svn_revnum_t)value;
992  if (result->first_revision != rev_file->start_revision)
993    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
994                  _("Index rev / pack file revision numbers do not match"));
995
996  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
997  result->page_size = (apr_uint32_t)value;
998  if (!result->page_size || (result->page_size & (result->page_size - 1)))
999    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1000                            _("L2P index page size is not a power of two"));
1001
1002  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1003  result->revision_count = (int)value;
1004  if (   result->revision_count != 1
1005      && result->revision_count != (apr_uint64_t)ffd->max_files_per_dir)
1006    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1007                            _("Invalid number of revisions in L2P index"));
1008
1009  SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1010  page_count = (apr_size_t)value;
1011  if (page_count < result->revision_count)
1012    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1013                            _("Fewer L2P index pages than revisions"));
1014  if (page_count > (rev_file->p2l_offset - rev_file->l2p_offset) / 2)
1015    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1016                            _("L2P index page count implausibly large"));
1017
1018  next_rev = result->first_revision + (svn_revnum_t)result->revision_count;
1019  if (result->first_revision > revision || next_rev <= revision)
1020    return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1021                      _("Corrupt L2P index for r%ld only covers r%ld:%ld"),
1022                      revision, result->first_revision, next_rev);
1023
1024  /* allocate the page tables */
1025  result->page_table
1026    = apr_pcalloc(result_pool, page_count * sizeof(*result->page_table));
1027  result->page_table_index
1028    = apr_pcalloc(result_pool, (result->revision_count + 1)
1029                             * sizeof(*result->page_table_index));
1030
1031  /* read per-revision page table sizes (i.e. number of pages per rev) */
1032  page_table_index = 0;
1033  result->page_table_index[0] = page_table_index;
1034
1035  for (i = 0; i < result->revision_count; ++i)
1036    {
1037      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1038      if (value == 0)
1039        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1040                                _("Revision with no L2P index pages"));
1041
1042      page_table_index += (apr_size_t)value;
1043      if (page_table_index > page_count)
1044        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1045                                _("L2P page table exceeded"));
1046
1047      result->page_table_index[i+1] = page_table_index;
1048    }
1049
1050  if (page_table_index != page_count)
1051    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1052                 _("Revisions do not cover the full L2P index page table"));
1053
1054  /* read actual page tables */
1055  for (page = 0; page < page_count; ++page)
1056    {
1057      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1058      if (value == 0)
1059        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1060                                _("Empty L2P index page"));
1061
1062      result->page_table[page].size = (apr_uint32_t)value;
1063      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1064      if (value > result->page_size)
1065        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1066                                _("Page exceeds L2P index page size"));
1067
1068      result->page_table[page].entry_count = (apr_uint32_t)value;
1069    }
1070
1071  /* correct the page description offsets */
1072  offset = packed_stream_offset(rev_file->l2p_stream);
1073  for (page = 0; page < page_count; ++page)
1074    {
1075      result->page_table[page].offset = offset;
1076      offset += result->page_table[page].size;
1077    }
1078
1079  /* return and cache the header */
1080  *header = result;
1081  SVN_ERR(svn_cache__set(ffd->l2p_header_cache, &key, result, scratch_pool));
1082
1083  return SVN_NO_ERROR;
1084}
1085
1086/* Data structure that describes which l2p page info shall be extracted
1087 * from the cache and contains the fields that receive the result.
1088 */
1089typedef struct l2p_page_info_baton_t
1090{
1091  /* input data: we want the page covering (REVISION,ITEM_INDEX) */
1092  svn_revnum_t revision;
1093  apr_uint64_t item_index;
1094
1095  /* out data */
1096  /* page location and size of the page within the l2p index file */
1097  l2p_page_table_entry_t entry;
1098
1099  /* page number within the pages for REVISION (not l2p index global!) */
1100  apr_uint32_t page_no;
1101
1102  /* offset of ITEM_INDEX within that page */
1103  apr_uint32_t page_offset;
1104
1105  /* revision identifying the l2p index file, also the first rev in that */
1106  svn_revnum_t first_revision;
1107} l2p_page_info_baton_t;
1108
1109
1110/* Utility function that copies the info requested by BATON->REVISION and
1111 * BATON->ITEM_INDEX and from HEADER and PAGE_TABLE into the output fields
1112 * of *BATON.  Use SCRATCH_POOL for temporary allocations.
1113 */
1114static svn_error_t *
1115l2p_page_info_copy(l2p_page_info_baton_t *baton,
1116                   const l2p_header_t *header,
1117                   const l2p_page_table_entry_t *page_table,
1118                   const apr_size_t *page_table_index,
1119                   apr_pool_t *scratch_pool)
1120{
1121  /* revision offset within the index file */
1122  apr_size_t rel_revision = baton->revision - header->first_revision;
1123  if (rel_revision >= header->revision_count)
1124    return svn_error_createf(SVN_ERR_FS_INDEX_REVISION , NULL,
1125                             _("Revision %ld not covered by item index"),
1126                             baton->revision);
1127
1128  /* select the relevant page */
1129  if (baton->item_index < header->page_size)
1130    {
1131      /* most revs fit well into a single page */
1132      baton->page_offset = (apr_uint32_t)baton->item_index;
1133      baton->page_no = 0;
1134      baton->entry = page_table[page_table_index[rel_revision]];
1135    }
1136  else
1137    {
1138      const l2p_page_table_entry_t *first_entry;
1139      const l2p_page_table_entry_t *last_entry;
1140      apr_uint64_t max_item_index;
1141
1142      /* range of pages for this rev */
1143      first_entry = page_table + page_table_index[rel_revision];
1144      last_entry = page_table + page_table_index[rel_revision + 1];
1145
1146      /* do we hit a valid index page? */
1147      max_item_index =   (apr_uint64_t)header->page_size
1148                       * (last_entry - first_entry);
1149      if (baton->item_index >= max_item_index)
1150        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
1151                                _("Item index %s exceeds l2p limit "
1152                                  "of %s for revision %ld"),
1153                                apr_psprintf(scratch_pool,
1154                                             "%" APR_UINT64_T_FMT,
1155                                             baton->item_index),
1156                                apr_psprintf(scratch_pool,
1157                                             "%" APR_UINT64_T_FMT,
1158                                             max_item_index),
1159                                baton->revision);
1160
1161      /* all pages are of the same size and full, except for the last one */
1162      baton->page_offset = (apr_uint32_t)(baton->item_index % header->page_size);
1163      baton->page_no = (apr_uint32_t)(baton->item_index / header->page_size);
1164      baton->entry = first_entry[baton->page_no];
1165    }
1166
1167  baton->first_revision = header->first_revision;
1168
1169  return SVN_NO_ERROR;
1170}
1171
1172/* Implement svn_cache__partial_getter_func_t: copy the data requested in
1173 * l2p_page_info_baton_t *BATON from l2p_header_t *DATA into the output
1174 * fields in *BATON.
1175 */
1176static svn_error_t *
1177l2p_page_info_access_func(void **out,
1178                          const void *data,
1179                          apr_size_t data_len,
1180                          void *baton,
1181                          apr_pool_t *result_pool)
1182{
1183  /* resolve all pointer values of in-cache data */
1184  const l2p_header_t *header = data;
1185  const l2p_page_table_entry_t *page_table
1186    = svn_temp_deserializer__ptr(header,
1187                                 (const void *const *)&header->page_table);
1188  const apr_size_t *page_table_index
1189    = svn_temp_deserializer__ptr(header,
1190                           (const void *const *)&header->page_table_index);
1191
1192  /* copy the info */
1193  return l2p_page_info_copy(baton, header, page_table, page_table_index,
1194                            result_pool);
1195}
1196
1197/* Get the page info requested in *BATON from FS and set the output fields
1198 * in *BATON.  Use REV_FILE for on-disk file access.
1199 * Use SCRATCH_POOL for temporary allocations.
1200 */
1201static svn_error_t *
1202get_l2p_page_info(l2p_page_info_baton_t *baton,
1203                  svn_fs_fs__revision_file_t *rev_file,
1204                  svn_fs_t *fs,
1205                  apr_pool_t *scratch_pool)
1206{
1207  fs_fs_data_t *ffd = fs->fsap_data;
1208  l2p_header_t *result;
1209  svn_boolean_t is_cached = FALSE;
1210  void *dummy = NULL;
1211
1212  /* try to find the info in the cache */
1213  pair_cache_key_t key;
1214  key.revision = rev_file->start_revision;
1215  key.second = rev_file->is_packed;
1216  SVN_ERR(svn_cache__get_partial((void**)&dummy, &is_cached,
1217                                 ffd->l2p_header_cache, &key,
1218                                 l2p_page_info_access_func, baton,
1219                                 scratch_pool));
1220  if (is_cached)
1221    return SVN_NO_ERROR;
1222
1223  /* read from disk, cache and copy the result */
1224  SVN_ERR(get_l2p_header_body(&result, rev_file, fs, baton->revision,
1225                              scratch_pool, scratch_pool));
1226  SVN_ERR(l2p_page_info_copy(baton, result, result->page_table,
1227                             result->page_table_index, scratch_pool));
1228
1229  return SVN_NO_ERROR;
1230}
1231
1232/* Data request structure used by l2p_page_table_access_func.
1233 */
1234typedef struct l2p_page_table_baton_t
1235{
1236  /* revision for which to read the page table */
1237  svn_revnum_t revision;
1238
1239  /* page table entries (of type l2p_page_table_entry_t).
1240   * Must be created by caller and will be filled by callee. */
1241  apr_array_header_t *pages;
1242} l2p_page_table_baton_t;
1243
1244/* Implement svn_cache__partial_getter_func_t: copy the data requested in
1245 * l2p_page_baton_t *BATON from l2p_page_t *DATA into BATON->PAGES and *OUT.
1246 */
1247static svn_error_t *
1248l2p_page_table_access_func(void **out,
1249                           const void *data,
1250                           apr_size_t data_len,
1251                           void *baton,
1252                           apr_pool_t *result_pool)
1253{
1254  /* resolve in-cache pointers */
1255  l2p_page_table_baton_t *table_baton = baton;
1256  const l2p_header_t *header = (const l2p_header_t *)data;
1257  const l2p_page_table_entry_t *page_table
1258    = svn_temp_deserializer__ptr(header,
1259                                 (const void *const *)&header->page_table);
1260  const apr_size_t *page_table_index
1261    = svn_temp_deserializer__ptr(header,
1262                           (const void *const *)&header->page_table_index);
1263
1264  /* copy the revision's page table into BATON */
1265  apr_size_t rel_revision = table_baton->revision - header->first_revision;
1266  if (rel_revision < header->revision_count)
1267    {
1268      const l2p_page_table_entry_t *entry
1269        = page_table + page_table_index[rel_revision];
1270      const l2p_page_table_entry_t *last_entry
1271        = page_table + page_table_index[rel_revision + 1];
1272
1273      for (; entry < last_entry; ++entry)
1274        APR_ARRAY_PUSH(table_baton->pages, l2p_page_table_entry_t)
1275          = *entry;
1276    }
1277
1278  /* set output as a courtesy to the caller */
1279  *out = table_baton->pages;
1280
1281  return SVN_NO_ERROR;
1282}
1283
1284/* Read the l2p index page table for REVISION in FS from cache and return
1285 * it in PAGES.  The later must be provided by the caller (and can be
1286 * re-used); existing entries will be removed before writing the result.
1287 * If the data cannot be found in the cache, the result will be empty
1288 * (it never can be empty for a valid REVISION if the data is cached).
1289 * Use the info from REV_FILE to determine pack / rev file properties.
1290 * Use SCRATCH_POOL for temporary allocations.
1291 */
1292static svn_error_t *
1293get_l2p_page_table(apr_array_header_t *pages,
1294                   svn_fs_t *fs,
1295                   svn_fs_fs__revision_file_t *rev_file,
1296                   svn_revnum_t revision,
1297                   apr_pool_t *scratch_pool)
1298{
1299  fs_fs_data_t *ffd = fs->fsap_data;
1300  svn_boolean_t is_cached = FALSE;
1301  l2p_page_table_baton_t baton;
1302
1303  pair_cache_key_t key;
1304  key.revision = rev_file->start_revision;
1305  key.second = rev_file->is_packed;
1306
1307  apr_array_clear(pages);
1308  baton.revision = revision;
1309  baton.pages = pages;
1310  SVN_ERR(svn_cache__get_partial((void**)&pages, &is_cached,
1311                                 ffd->l2p_header_cache, &key,
1312                                 l2p_page_table_access_func, &baton,
1313                                 scratch_pool));
1314
1315  return SVN_NO_ERROR;
1316}
1317
1318/* From the log-to-phys index file starting at START_REVISION in FS, read
1319 * the mapping page identified by TABLE_ENTRY and return it in *PAGE.
1320 * Use REV_FILE to access on-disk files.
1321 * Use RESULT_POOL for allocations.
1322 */
1323static svn_error_t *
1324get_l2p_page(l2p_page_t **page,
1325             svn_fs_fs__revision_file_t *rev_file,
1326             svn_fs_t *fs,
1327             svn_revnum_t start_revision,
1328             l2p_page_table_entry_t *table_entry,
1329             apr_pool_t *result_pool)
1330{
1331  apr_uint32_t i;
1332  l2p_page_t *result = apr_pcalloc(result_pool, sizeof(*result));
1333  apr_uint64_t last_value = 0;
1334
1335  /* open index file and select page */
1336  SVN_ERR(auto_open_l2p_index(rev_file, fs, start_revision));
1337  packed_stream_seek(rev_file->l2p_stream, table_entry->offset);
1338
1339  /* initialize the page content */
1340  result->entry_count = table_entry->entry_count;
1341  result->offsets = apr_pcalloc(result_pool, result->entry_count
1342                                           * sizeof(*result->offsets));
1343
1344  /* read all page entries (offsets in rev file and container sub-items) */
1345  for (i = 0; i < result->entry_count; ++i)
1346    {
1347      apr_uint64_t value = 0;
1348      SVN_ERR(packed_stream_get(&value, rev_file->l2p_stream));
1349      last_value += decode_int(value);
1350      result->offsets[i] = last_value - 1;
1351    }
1352
1353  /* After reading all page entries, the read cursor must have moved by
1354   * TABLE_ENTRY->SIZE bytes. */
1355  if (   packed_stream_offset(rev_file->l2p_stream)
1356      != table_entry->offset + table_entry->size)
1357    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1358                _("L2P actual page size does not match page table value."));
1359
1360  *page = result;
1361
1362  return SVN_NO_ERROR;
1363}
1364
1365/* Utility function.  Read the l2p index pages for REVISION in FS from
1366 * REV_FILE and put them into the cache.  Skip page number EXLCUDED_PAGE_NO
1367 * (use -1 for 'skip none') and pages outside the MIN_OFFSET, MAX_OFFSET
1368 * range in the l2p index file.  The index is being identified by
1369 * FIRST_REVISION.  PAGES is a scratch container provided by the caller.
1370 * SCRATCH_POOL is used for temporary allocations.
1371 *
1372 * This function may be a no-op if the header cache lookup fails / misses.
1373 */
1374static svn_error_t *
1375prefetch_l2p_pages(svn_boolean_t *end,
1376                   svn_fs_t *fs,
1377                   svn_fs_fs__revision_file_t *rev_file,
1378                   svn_revnum_t first_revision,
1379                   svn_revnum_t revision,
1380                   apr_array_header_t *pages,
1381                   int exlcuded_page_no,
1382                   apr_off_t min_offset,
1383                   apr_off_t max_offset,
1384                   apr_pool_t *scratch_pool)
1385{
1386  fs_fs_data_t *ffd = fs->fsap_data;
1387  int i;
1388  apr_pool_t *iterpool;
1389  svn_fs_fs__page_cache_key_t key = { 0 };
1390
1391  /* Parameter check. */
1392  if (min_offset < 0)
1393    min_offset = 0;
1394
1395  if (max_offset <= 0)
1396    {
1397      /* Nothing to do */
1398      *end = TRUE;
1399      return SVN_NO_ERROR;
1400    }
1401
1402  /* get the page table for REVISION from cache */
1403  *end = FALSE;
1404  SVN_ERR(get_l2p_page_table(pages, fs, rev_file, revision, scratch_pool));
1405  if (pages->nelts == 0 || rev_file->l2p_stream == NULL)
1406    {
1407      /* not found -> we can't continue without hitting the disk again */
1408      *end = TRUE;
1409      return SVN_NO_ERROR;
1410    }
1411
1412  /* prefetch pages individually until all are done or we found one in
1413   * the cache */
1414  iterpool = svn_pool_create(scratch_pool);
1415  assert(revision <= APR_UINT32_MAX);
1416  key.revision = (apr_uint32_t)revision;
1417  key.is_packed = rev_file->is_packed;
1418
1419  for (i = 0; i < pages->nelts && !*end; ++i)
1420    {
1421      svn_boolean_t is_cached;
1422
1423      l2p_page_table_entry_t *entry
1424        = &APR_ARRAY_IDX(pages, i, l2p_page_table_entry_t);
1425      svn_pool_clear(iterpool);
1426
1427      if (i == exlcuded_page_no)
1428        continue;
1429
1430      /* skip pages outside the specified index file range */
1431      if (   entry->offset < (apr_uint64_t)min_offset
1432          || entry->offset + entry->size > (apr_uint64_t)max_offset)
1433        {
1434          *end = TRUE;
1435          continue;
1436        }
1437
1438      /* page already in cache? */
1439      key.page = i;
1440      SVN_ERR(svn_cache__has_key(&is_cached, ffd->l2p_page_cache,
1441                                 &key, iterpool));
1442      if (!is_cached)
1443        {
1444          /* no in cache -> read from stream (data already buffered in APR)
1445           * and cache the result */
1446          l2p_page_t *page = NULL;
1447          SVN_ERR(get_l2p_page(&page, rev_file, fs, first_revision, entry,
1448                               iterpool));
1449
1450          SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page,
1451                                 iterpool));
1452        }
1453    }
1454
1455  svn_pool_destroy(iterpool);
1456
1457  return SVN_NO_ERROR;
1458}
1459
1460/* Request data structure for l2p_entry_access_func.
1461 */
1462typedef struct l2p_entry_baton_t
1463{
1464  /* in data */
1465  /* revision. Used for error messages only */
1466  svn_revnum_t revision;
1467
1468  /* item index to look up. Used for error messages only */
1469  apr_uint64_t item_index;
1470
1471  /* offset within the cached page */
1472  apr_uint32_t page_offset;
1473
1474  /* out data */
1475  /* absolute item or container offset in rev / pack file */
1476  apr_uint64_t offset;
1477} l2p_entry_baton_t;
1478
1479/* Return the rev / pack file offset of the item at BATON->PAGE_OFFSET in
1480 * OFFSETS of PAGE and write it to *OFFSET.
1481 */
1482static svn_error_t *
1483l2p_page_get_entry(l2p_entry_baton_t *baton,
1484                   const l2p_page_t *page,
1485                   const apr_uint64_t *offsets,
1486                   apr_pool_t *scratch_pool)
1487{
1488  /* overflow check */
1489  if (page->entry_count <= baton->page_offset)
1490    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
1491                             _("Item index %s"
1492                               " too large in revision %ld"),
1493                             apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT,
1494                                          baton->item_index),
1495                             baton->revision);
1496
1497  /* return the result */
1498  baton->offset = offsets[baton->page_offset];
1499
1500  return SVN_NO_ERROR;
1501}
1502
1503/* Implement svn_cache__partial_getter_func_t: copy the data requested in
1504 * l2p_entry_baton_t *BATON from l2p_page_t *DATA into BATON->OFFSET.
1505 * *OUT remains unchanged.
1506 */
1507static svn_error_t *
1508l2p_entry_access_func(void **out,
1509                      const void *data,
1510                      apr_size_t data_len,
1511                      void *baton,
1512                      apr_pool_t *result_pool)
1513{
1514  /* resolve all in-cache pointers */
1515  const l2p_page_t *page = data;
1516  const apr_uint64_t *offsets
1517    = svn_temp_deserializer__ptr(page, (const void *const *)&page->offsets);
1518
1519  /* return the requested data */
1520  return l2p_page_get_entry(baton, page, offsets, result_pool);
1521}
1522
1523/* Using the log-to-phys indexes in FS, find the absolute offset in the
1524 * rev file for (REVISION, ITEM_INDEX) and return it in *OFFSET.
1525 * Use SCRATCH_POOL for temporary allocations.
1526 */
1527static svn_error_t *
1528l2p_index_lookup(apr_off_t *offset,
1529                 svn_fs_t *fs,
1530                 svn_fs_fs__revision_file_t *rev_file,
1531                 svn_revnum_t revision,
1532                 apr_uint64_t item_index,
1533                 apr_pool_t *scratch_pool)
1534{
1535  fs_fs_data_t *ffd = fs->fsap_data;
1536  l2p_page_info_baton_t info_baton;
1537  l2p_entry_baton_t page_baton;
1538  l2p_page_t *page = NULL;
1539  svn_fs_fs__page_cache_key_t key = { 0 };
1540  svn_boolean_t is_cached = FALSE;
1541  void *dummy = NULL;
1542
1543  /* read index master data structure and extract the info required to
1544   * access the l2p index page for (REVISION,ITEM_INDEX)*/
1545  info_baton.revision = revision;
1546  info_baton.item_index = item_index;
1547  SVN_ERR(get_l2p_page_info(&info_baton, rev_file, fs, scratch_pool));
1548
1549  /* try to find the page in the cache and get the OFFSET from it */
1550  page_baton.revision = revision;
1551  page_baton.item_index = item_index;
1552  page_baton.page_offset = info_baton.page_offset;
1553
1554  assert(revision <= APR_UINT32_MAX);
1555  key.revision = (apr_uint32_t)revision;
1556  key.is_packed = svn_fs_fs__is_packed_rev(fs, revision);
1557  key.page = info_baton.page_no;
1558
1559  SVN_ERR(svn_cache__get_partial(&dummy, &is_cached,
1560                                 ffd->l2p_page_cache, &key,
1561                                 l2p_entry_access_func, &page_baton,
1562                                 scratch_pool));
1563
1564  if (!is_cached)
1565    {
1566      /* we need to read the info from disk (might already be in the
1567       * APR file buffer, though) */
1568      apr_array_header_t *pages;
1569      svn_revnum_t prefetch_revision;
1570      svn_revnum_t last_revision
1571        = info_baton.first_revision
1572          + (key.is_packed ? ffd->max_files_per_dir : 1);
1573      svn_boolean_t end;
1574      apr_off_t max_offset
1575        = APR_ALIGN(info_baton.entry.offset + info_baton.entry.size,
1576                    ffd->block_size);
1577      apr_off_t min_offset = max_offset - ffd->block_size;
1578
1579      /* read the relevant page */
1580      SVN_ERR(get_l2p_page(&page, rev_file, fs, info_baton.first_revision,
1581                           &info_baton.entry, scratch_pool));
1582
1583      /* cache the page and extract the result we need */
1584      SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page, scratch_pool));
1585      SVN_ERR(l2p_page_get_entry(&page_baton, page, page->offsets,
1586                                 scratch_pool));
1587
1588      if (ffd->use_block_read)
1589        {
1590          apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1591
1592          /* prefetch pages from following and preceding revisions */
1593          pages = apr_array_make(scratch_pool, 16,
1594                                 sizeof(l2p_page_table_entry_t));
1595          end = FALSE;
1596          for (prefetch_revision = revision;
1597              prefetch_revision < last_revision && !end;
1598              ++prefetch_revision)
1599            {
1600              int excluded_page_no = prefetch_revision == revision
1601                                  ? info_baton.page_no
1602                                  : -1;
1603              svn_pool_clear(iterpool);
1604
1605              SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file,
1606                                        info_baton.first_revision,
1607                                        prefetch_revision, pages,
1608                                        excluded_page_no, min_offset,
1609                                        max_offset, iterpool));
1610            }
1611
1612          end = FALSE;
1613          for (prefetch_revision = revision-1;
1614              prefetch_revision >= info_baton.first_revision && !end;
1615              --prefetch_revision)
1616            {
1617              svn_pool_clear(iterpool);
1618
1619              SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file,
1620                                        info_baton.first_revision,
1621                                        prefetch_revision, pages, -1,
1622                                        min_offset, max_offset, iterpool));
1623            }
1624
1625          svn_pool_destroy(iterpool);
1626        }
1627    }
1628
1629  *offset = page_baton.offset;
1630
1631  return SVN_NO_ERROR;
1632}
1633
1634/* Using the log-to-phys proto index in transaction TXN_ID in FS, find the
1635 * absolute offset in the proto rev file for the given ITEM_INDEX and return
1636 * it in *OFFSET.  Use SCRATCH_POOL for temporary allocations.
1637 */
1638static svn_error_t *
1639l2p_proto_index_lookup(apr_off_t *offset,
1640                       svn_fs_t *fs,
1641                       const svn_fs_fs__id_part_t *txn_id,
1642                       apr_uint64_t item_index,
1643                       apr_pool_t *scratch_pool)
1644{
1645  svn_boolean_t eof = FALSE;
1646  apr_file_t *file = NULL;
1647  SVN_ERR(svn_io_file_open(&file,
1648                           svn_fs_fs__path_l2p_proto_index(fs, txn_id,
1649                                                           scratch_pool),
1650                           APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1651                           scratch_pool));
1652
1653  /* process all entries until we fail due to EOF */
1654  *offset = -1;
1655  while (!eof)
1656    {
1657      l2p_proto_entry_t entry;
1658
1659      /* (attempt to) read the next entry from the source */
1660      SVN_ERR(read_l2p_entry_from_proto_index(file, &entry, &eof,
1661                                              scratch_pool));
1662
1663      /* handle new revision */
1664      if (!eof && entry.item_index == item_index)
1665        {
1666          *offset = (apr_off_t)entry.offset - 1;
1667          break;
1668        }
1669    }
1670
1671  SVN_ERR(svn_io_file_close(file, scratch_pool));
1672
1673  return SVN_NO_ERROR;
1674}
1675
1676/* Read the log-to-phys header info of the index covering REVISION from FS
1677 * and return it in *HEADER.  REV_FILE provides the pack / rev status.
1678 * Allocate *HEADER in RESULT_POOL, use SCRATCH_POOL for temporary
1679 * allocations.
1680 */
1681static svn_error_t *
1682get_l2p_header(l2p_header_t **header,
1683               svn_fs_fs__revision_file_t *rev_file,
1684               svn_fs_t *fs,
1685               svn_revnum_t revision,
1686               apr_pool_t *result_pool,
1687               apr_pool_t *scratch_pool)
1688{
1689  fs_fs_data_t *ffd = fs->fsap_data;
1690  svn_boolean_t is_cached = FALSE;
1691
1692  /* first, try cache lookop */
1693  pair_cache_key_t key;
1694  key.revision = rev_file->start_revision;
1695  key.second = rev_file->is_packed;
1696  SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->l2p_header_cache,
1697                         &key, result_pool));
1698  if (is_cached)
1699    return SVN_NO_ERROR;
1700
1701  /* read from disk and cache the result */
1702  SVN_ERR(get_l2p_header_body(header, rev_file, fs, revision, result_pool,
1703                              scratch_pool));
1704
1705  return SVN_NO_ERROR;
1706}
1707
1708svn_error_t *
1709svn_fs_fs__l2p_get_max_ids(apr_array_header_t **max_ids,
1710                           svn_fs_t *fs,
1711                           svn_revnum_t start_rev,
1712                           apr_size_t count,
1713                           apr_pool_t *result_pool,
1714                           apr_pool_t *scratch_pool)
1715{
1716  l2p_header_t *header = NULL;
1717  svn_revnum_t revision;
1718  svn_revnum_t last_rev = (svn_revnum_t)(start_rev + count);
1719  svn_fs_fs__revision_file_t *rev_file;
1720  apr_pool_t *header_pool = svn_pool_create(scratch_pool);
1721
1722  /* read index master data structure for the index covering START_REV */
1723  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, start_rev,
1724                                           header_pool, header_pool));
1725  SVN_ERR(get_l2p_header(&header, rev_file, fs, start_rev, header_pool,
1726                         header_pool));
1727  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1728
1729  /* Determine the length of the item index list for each rev.
1730   * Read new index headers as required. */
1731  *max_ids = apr_array_make(result_pool, (int)count, sizeof(apr_uint64_t));
1732  for (revision = start_rev; revision < last_rev; ++revision)
1733    {
1734      apr_uint64_t full_page_count;
1735      apr_uint64_t item_count;
1736      apr_size_t first_page_index, last_page_index;
1737
1738      if (revision >= header->first_revision + header->revision_count)
1739        {
1740          /* need to read the next index. Clear up memory used for the
1741           * previous one.  Note that intermittent pack runs do not change
1742           * the number of items in a revision, i.e. there is no consistency
1743           * issue here. */
1744          svn_pool_clear(header_pool);
1745          SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
1746                                                  header_pool, header_pool));
1747          SVN_ERR(get_l2p_header(&header, rev_file, fs, revision,
1748                                 header_pool, header_pool));
1749          SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1750        }
1751
1752      /* in a revision with N index pages, the first N-1 index pages are
1753       * "full", i.e. contain HEADER->PAGE_SIZE entries */
1754      first_page_index
1755         = header->page_table_index[revision - header->first_revision];
1756      last_page_index
1757         = header->page_table_index[revision - header->first_revision + 1];
1758      full_page_count = last_page_index - first_page_index - 1;
1759      item_count = full_page_count * header->page_size
1760                 + header->page_table[last_page_index - 1].entry_count;
1761
1762      APR_ARRAY_PUSH(*max_ids, apr_uint64_t) = item_count;
1763    }
1764
1765  svn_pool_destroy(header_pool);
1766  return SVN_NO_ERROR;
1767}
1768
1769svn_error_t *
1770svn_fs_fs__item_offset(apr_off_t *absolute_position,
1771                       svn_fs_t *fs,
1772                       svn_fs_fs__revision_file_t *rev_file,
1773                       svn_revnum_t revision,
1774                       const svn_fs_fs__id_part_t *txn_id,
1775                       apr_uint64_t item_index,
1776                       apr_pool_t *scratch_pool)
1777{
1778  svn_error_t *err = SVN_NO_ERROR;
1779  if (txn_id)
1780    {
1781      if (svn_fs_fs__use_log_addressing(fs))
1782        {
1783          /* the txn is going to produce a rev with logical addressing.
1784             So, we need to get our info from the (proto) index file. */
1785          SVN_ERR(l2p_proto_index_lookup(absolute_position, fs, txn_id,
1786                                         item_index, scratch_pool));
1787        }
1788      else
1789        {
1790          /* for data in txns, item_index *is* the offset */
1791          *absolute_position = item_index;
1792        }
1793    }
1794  else if (svn_fs_fs__use_log_addressing(fs))
1795    {
1796      /* ordinary index lookup */
1797      SVN_ERR(l2p_index_lookup(absolute_position, fs, rev_file, revision,
1798                               item_index, scratch_pool));
1799    }
1800  else if (rev_file->is_packed)
1801    {
1802      /* pack file with physical addressing */
1803      apr_off_t rev_offset;
1804      SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, revision,
1805                                           scratch_pool));
1806      *absolute_position = rev_offset + item_index;
1807    }
1808  else
1809    {
1810      /* for non-packed revs with physical addressing,
1811         item_index *is* the offset */
1812      *absolute_position = item_index;
1813    }
1814
1815  return svn_error_trace(err);
1816}
1817
1818/*
1819 * phys-to-log index
1820 */
1821svn_error_t *
1822svn_fs_fs__p2l_proto_index_open(apr_file_t **proto_index,
1823                                const char *file_name,
1824                                apr_pool_t *result_pool)
1825{
1826  SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE
1827                           | APR_CREATE | APR_APPEND | APR_BUFFERED,
1828                           APR_OS_DEFAULT, result_pool));
1829
1830  return SVN_NO_ERROR;
1831}
1832
1833
1834svn_error_t *
1835svn_fs_fs__p2l_proto_index_add_entry(apr_file_t *proto_index,
1836                                     const svn_fs_fs__p2l_entry_t *entry,
1837                                     apr_pool_t *scratch_pool)
1838{
1839  apr_uint64_t revision;
1840
1841  /* Make sure all signed elements of ENTRY have non-negative values.
1842   *
1843   * For file offsets and sizes, this is a given as we use them to describe
1844   * absolute positions and sizes.  For revisions, SVN_INVALID_REVNUM is
1845   * valid, hence we have to shift it by 1.
1846   */
1847  SVN_ERR_ASSERT(entry->offset >= 0);
1848  SVN_ERR_ASSERT(entry->size >= 0);
1849  SVN_ERR_ASSERT(   entry->item.revision >= 0
1850                 || entry->item.revision == SVN_INVALID_REVNUM);
1851
1852  revision = entry->item.revision == SVN_INVALID_REVNUM
1853           ? 0
1854           : ((apr_uint64_t)entry->item.revision + 1);
1855
1856  /* Now, all values will nicely convert to uint64. */
1857  /* Make sure to keep P2L_PROTO_INDEX_ENTRY_SIZE consistent with this: */
1858
1859  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->offset,
1860                                      scratch_pool));
1861  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->size,
1862                                      scratch_pool));
1863  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->type,
1864                                      scratch_pool));
1865  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->fnv1_checksum,
1866                                      scratch_pool));
1867  SVN_ERR(write_uint64_to_proto_index(proto_index, revision,
1868                                      scratch_pool));
1869  SVN_ERR(write_uint64_to_proto_index(proto_index, entry->item.number,
1870                                      scratch_pool));
1871
1872  return SVN_NO_ERROR;
1873}
1874
1875/* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file
1876 * in *EOF, or error out in that case if EOF is NULL.  *ENTRY is in an
1877 * undefined state if an end-of-file occurred.
1878 * Use SCRATCH_POOL for temporary allocations.
1879 */
1880static svn_error_t *
1881read_p2l_entry_from_proto_index(apr_file_t *proto_index,
1882                                svn_fs_fs__p2l_entry_t *entry,
1883                                svn_boolean_t *eof,
1884                                apr_pool_t *scratch_pool)
1885{
1886  apr_uint64_t revision;
1887
1888  SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->offset,
1889                                      eof, scratch_pool));
1890  SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->size,
1891                                      eof, scratch_pool));
1892  SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->type,
1893                                       eof, scratch_pool));
1894  SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->fnv1_checksum,
1895                                       eof, scratch_pool));
1896  SVN_ERR(read_uint64_from_proto_index(proto_index, &revision,
1897                                       eof, scratch_pool));
1898  SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->item.number,
1899                                       eof, scratch_pool));
1900
1901  /* Do the inverse REVSION number conversion (see
1902   * svn_fs_fs__p2l_proto_index_add_entry), if we actually read a complete
1903   * record.
1904   */
1905  if (!eof || !*eof)
1906    {
1907      /* Be careful with the arithmetics here (overflows and wrap-around): */
1908      if (revision > 0 && revision - 1 > LONG_MAX)
1909        return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
1910                                _("Revision 0x%s too large, max = 0x%s"),
1911                                apr_psprintf(scratch_pool,
1912                                             "%" APR_UINT64_T_HEX_FMT,
1913                                             revision),
1914                                apr_psprintf(scratch_pool,
1915                                             "%" APR_UINT64_T_HEX_FMT,
1916                                             (apr_uint64_t)LONG_MAX));
1917
1918      /* Shortening conversion from unsigned to signed int is well-defined
1919       * and not lossy in C because the value can be represented in the
1920       * target type.  Also, cast to 'long' instead of 'svn_revnum_t' here
1921       * to provoke a compiler warning if those types should differ and we
1922       * would need to change the overflow checking logic.
1923       */
1924      entry->item.revision = revision == 0
1925                           ? SVN_INVALID_REVNUM
1926                           : (long)(revision - 1);
1927    }
1928
1929  return SVN_NO_ERROR;
1930}
1931
1932svn_error_t *
1933svn_fs_fs__p2l_proto_index_next_offset(apr_off_t *next_offset,
1934                                       apr_file_t *proto_index,
1935                                       apr_pool_t *scratch_pool)
1936{
1937  apr_off_t offset = 0;
1938
1939  /* Empty index file? */
1940  SVN_ERR(svn_io_file_seek(proto_index, APR_END, &offset, scratch_pool));
1941  if (offset == 0)
1942    {
1943      *next_offset = 0;
1944    }
1945  else
1946    {
1947      /* At least one entry.  Read last entry. */
1948      svn_fs_fs__p2l_entry_t entry;
1949      offset -= P2L_PROTO_INDEX_ENTRY_SIZE;
1950
1951      SVN_ERR(svn_io_file_seek(proto_index, APR_SET, &offset, scratch_pool));
1952      SVN_ERR(read_p2l_entry_from_proto_index(proto_index, &entry,
1953                                              NULL, scratch_pool));
1954
1955      /* Return next offset. */
1956      *next_offset = entry.offset + entry.size;
1957    }
1958
1959  return SVN_NO_ERROR;
1960}
1961
1962svn_error_t *
1963svn_fs_fs__p2l_index_append(svn_checksum_t **checksum,
1964                            svn_fs_t *fs,
1965                            apr_file_t *index_file,
1966                            const char *proto_file_name,
1967                            svn_revnum_t revision,
1968                            apr_pool_t *result_pool,
1969                            apr_pool_t *scratch_pool)
1970{
1971  fs_fs_data_t *ffd = fs->fsap_data;
1972  apr_uint64_t page_size = ffd->p2l_page_size;
1973  apr_file_t *proto_index = NULL;
1974  svn_stream_t *stream;
1975  int i;
1976  svn_boolean_t eof = FALSE;
1977  unsigned char encoded[ENCODED_INT_LENGTH];
1978  svn_revnum_t last_revision = revision;
1979  apr_uint64_t last_compound = 0;
1980
1981  apr_uint64_t last_entry_end = 0;
1982  apr_uint64_t last_page_end = 0;
1983  apr_uint64_t last_buffer_size = 0;  /* byte offset in the spill buffer at
1984                                         the begin of the current revision */
1985  apr_uint64_t file_size = 0;
1986
1987  /* temporary data structures that collect the data which will be moved
1988     to the target file in a second step */
1989  apr_pool_t *local_pool = svn_pool_create(scratch_pool);
1990  apr_array_header_t *table_sizes
1991     = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
1992
1993  /* 64k blocks, spill after 16MB */
1994  svn_spillbuf_t *buffer
1995     = svn_spillbuf__create(0x10000, 0x1000000, local_pool);
1996
1997  /* for loop temps ... */
1998  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1999
2000  /* start at the beginning of the source file */
2001  SVN_ERR(svn_io_file_open(&proto_index, proto_file_name,
2002                           APR_READ | APR_CREATE | APR_BUFFERED,
2003                           APR_OS_DEFAULT, scratch_pool));
2004
2005  /* process all entries until we fail due to EOF */
2006  while (!eof)
2007    {
2008      svn_fs_fs__p2l_entry_t entry;
2009      apr_uint64_t entry_end;
2010      svn_boolean_t new_page = svn_spillbuf__get_size(buffer) == 0;
2011      apr_uint64_t compound;
2012      apr_int64_t rev_diff, compound_diff;
2013
2014      svn_pool_clear(iterpool);
2015
2016      /* (attempt to) read the next entry from the source */
2017      SVN_ERR(read_p2l_entry_from_proto_index(proto_index, &entry,
2018                                              &eof, iterpool));
2019
2020      /* "unused" (and usually non-existent) section to cover the offsets
2021         at the end the of the last page. */
2022      if (eof)
2023        {
2024          file_size = last_entry_end;
2025
2026          entry.offset = last_entry_end;
2027          entry.size = APR_ALIGN(entry.offset, page_size) - entry.offset;
2028          entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
2029          entry.fnv1_checksum = 0;
2030          entry.item.revision = last_revision;
2031          entry.item.number = 0;
2032        }
2033      else
2034        {
2035          /* fix-up items created when the txn's target rev was unknown */
2036          if (entry.item.revision == SVN_INVALID_REVNUM)
2037            entry.item.revision = revision;
2038        }
2039
2040      /* end pages if entry is extending beyond their boundaries */
2041      entry_end = entry.offset + entry.size;
2042      while (entry_end - last_page_end > page_size)
2043        {
2044          apr_uint64_t buffer_size = svn_spillbuf__get_size(buffer);
2045          APR_ARRAY_PUSH(table_sizes, apr_uint64_t)
2046             = buffer_size - last_buffer_size;
2047
2048          last_buffer_size = buffer_size;
2049          last_page_end += page_size;
2050          new_page = TRUE;
2051        }
2052
2053      /* this entry starts a new table -> store its offset
2054         (all following entries in the same table will store sizes only) */
2055      if (new_page)
2056        {
2057          SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2058                                      encode_uint(encoded, entry.offset),
2059                                      iterpool));
2060          last_revision = revision;
2061          last_compound = 0;
2062        }
2063
2064      /* write simple item entry */
2065      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2066                                  encode_uint(encoded, entry.size),
2067                                  iterpool));
2068
2069      rev_diff = entry.item.revision - last_revision;
2070      last_revision = entry.item.revision;
2071
2072      compound = entry.item.number * 8 + entry.type;
2073      compound_diff = compound - last_compound;
2074      last_compound = compound;
2075
2076      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2077                                  encode_int(encoded, compound_diff),
2078                                  iterpool));
2079      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2080                                  encode_int(encoded, rev_diff),
2081                                  iterpool));
2082      SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2083                                  encode_uint(encoded, entry.fnv1_checksum),
2084                                  iterpool));
2085
2086      last_entry_end = entry_end;
2087    }
2088
2089  /* close the source file */
2090  SVN_ERR(svn_io_file_close(proto_index, local_pool));
2091
2092  /* store length of last table */
2093  APR_ARRAY_PUSH(table_sizes, apr_uint64_t)
2094      = svn_spillbuf__get_size(buffer) - last_buffer_size;
2095
2096  /* Open target stream. */
2097  stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE,
2098                                                            local_pool),
2099                                   NULL, checksum, svn_checksum_md5, FALSE,
2100                                   result_pool);
2101
2102  /* write the start revision, file size and page size */
2103  SVN_ERR(svn_stream_puts(stream, P2L_STREAM_PREFIX));
2104  SVN_ERR(stream_write_encoded(stream, revision));
2105  SVN_ERR(stream_write_encoded(stream, file_size));
2106  SVN_ERR(stream_write_encoded(stream, page_size));
2107
2108  /* write the page table (actually, the sizes of each page description) */
2109  SVN_ERR(stream_write_encoded(stream, table_sizes->nelts));
2110  for (i = 0; i < table_sizes->nelts; ++i)
2111    {
2112      apr_uint64_t value = APR_ARRAY_IDX(table_sizes, i, apr_uint64_t);
2113      SVN_ERR(stream_write_encoded(stream, value));
2114    }
2115
2116  /* append page contents and implicitly close STREAM */
2117  SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool),
2118                           stream, NULL, NULL, local_pool));
2119
2120  svn_pool_destroy(iterpool);
2121  svn_pool_destroy(local_pool);
2122
2123  return SVN_NO_ERROR;
2124}
2125
2126/* If REV_FILE->P2L_STREAM is NULL, create a new stream for the phys-to-log
2127 * index for REVISION in FS using the rev / pack file provided by REV_FILE.
2128 */
2129static svn_error_t *
2130auto_open_p2l_index(svn_fs_fs__revision_file_t *rev_file,
2131                    svn_fs_t *fs,
2132                    svn_revnum_t revision)
2133{
2134  if (rev_file->p2l_stream == NULL)
2135    {
2136      fs_fs_data_t *ffd = fs->fsap_data;
2137
2138      SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
2139      SVN_ERR(packed_stream_open(&rev_file->p2l_stream,
2140                                 rev_file->file,
2141                                 rev_file->p2l_offset,
2142                                 rev_file->footer_offset,
2143                                 P2L_STREAM_PREFIX,
2144                                 (apr_size_t)ffd->block_size,
2145                                 rev_file->pool,
2146                                 rev_file->pool));
2147    }
2148
2149  return SVN_NO_ERROR;
2150}
2151
2152
2153/* Read the header data structure of the phys-to-log index for REVISION in
2154 * FS and return it in *HEADER, allocated in RESULT_POOL. Use REV_FILE to
2155 * access on-disk data.  Use SCRATCH_POOL for temporary allocations.
2156 */
2157static svn_error_t *
2158get_p2l_header(p2l_header_t **header,
2159               svn_fs_fs__revision_file_t *rev_file,
2160               svn_fs_t *fs,
2161               svn_revnum_t revision,
2162               apr_pool_t *result_pool,
2163               apr_pool_t *scratch_pool)
2164{
2165  fs_fs_data_t *ffd = fs->fsap_data;
2166  apr_uint64_t value;
2167  apr_size_t i;
2168  apr_off_t offset;
2169  p2l_header_t *result;
2170  svn_boolean_t is_cached = FALSE;
2171
2172  /* look for the header data in our cache */
2173  pair_cache_key_t key;
2174  key.revision = rev_file->start_revision;
2175  key.second = rev_file->is_packed;
2176
2177  SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->p2l_header_cache,
2178                         &key, result_pool));
2179  if (is_cached)
2180    return SVN_NO_ERROR;
2181
2182  /* not found -> must read it from disk.
2183   * Open index file or position read pointer to the begin of the file */
2184  if (rev_file->p2l_stream == NULL)
2185    SVN_ERR(auto_open_p2l_index(rev_file, fs, rev_file->start_revision));
2186  else
2187    packed_stream_seek(rev_file->p2l_stream, 0);
2188
2189  /* allocate result data structure */
2190  result = apr_pcalloc(result_pool, sizeof(*result));
2191
2192  /* Read table sizes, check them for plausibility and allocate page array. */
2193  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2194  result->first_revision = (svn_revnum_t)value;
2195  if (result->first_revision != rev_file->start_revision)
2196    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2197                  _("Index rev / pack file revision numbers do not match"));
2198
2199  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2200  result->file_size = value;
2201  if (result->file_size != (apr_uint64_t)rev_file->l2p_offset)
2202    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2203                   _("Index offset and rev / pack file size do not match"));
2204
2205  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2206  result->page_size = value;
2207  if (!result->page_size || (result->page_size & (result->page_size - 1)))
2208    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2209                            _("P2L index page size is not a power of two"));
2210
2211  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2212  result->page_count = (apr_size_t)value;
2213  if (result->page_count != (result->file_size - 1) / result->page_size + 1)
2214    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2215                   _("P2L page count does not match rev / pack file size"));
2216
2217  result->offsets
2218    = apr_pcalloc(result_pool, (result->page_count + 1) * sizeof(*result->offsets));
2219
2220  /* read page sizes and derive page description offsets from them */
2221  result->offsets[0] = 0;
2222  for (i = 0; i < result->page_count; ++i)
2223    {
2224      SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2225      result->offsets[i+1] = result->offsets[i] + (apr_off_t)value;
2226    }
2227
2228  /* correct the offset values */
2229  offset = packed_stream_offset(rev_file->p2l_stream);
2230  for (i = 0; i <= result->page_count; ++i)
2231    result->offsets[i] += offset;
2232
2233  /* cache the header data */
2234  SVN_ERR(svn_cache__set(ffd->p2l_header_cache, &key, result, scratch_pool));
2235
2236  /* return the result */
2237  *header = result;
2238
2239  return SVN_NO_ERROR;
2240}
2241
2242/* Data structure that describes which p2l page info shall be extracted
2243 * from the cache and contains the fields that receive the result.
2244 */
2245typedef struct p2l_page_info_baton_t
2246{
2247  /* input variables */
2248  /* revision identifying the index file */
2249  svn_revnum_t revision;
2250
2251  /* offset within the page in rev / pack file */
2252  apr_off_t offset;
2253
2254  /* output variables */
2255  /* page containing OFFSET */
2256  apr_size_t page_no;
2257
2258  /* first revision in this p2l index */
2259  svn_revnum_t first_revision;
2260
2261  /* offset within the p2l index file describing this page */
2262  apr_off_t start_offset;
2263
2264  /* offset within the p2l index file describing the following page */
2265  apr_off_t next_offset;
2266
2267  /* PAGE_NO * PAGE_SIZE (if <= OFFSET) */
2268  apr_off_t page_start;
2269
2270  /* total number of pages indexed */
2271  apr_size_t page_count;
2272
2273  /* size of each page in pack / rev file */
2274  apr_uint64_t page_size;
2275} p2l_page_info_baton_t;
2276
2277/* From HEADER and the list of all OFFSETS, fill BATON with the page info
2278 * requested by BATON->OFFSET.
2279 */
2280static void
2281p2l_page_info_copy(p2l_page_info_baton_t *baton,
2282                   const p2l_header_t *header,
2283                   const apr_off_t *offsets)
2284{
2285  /* if the requested offset is out of bounds, return info for
2286   * a zero-sized empty page right behind the last page.
2287   */
2288  if (baton->offset / header->page_size < header->page_count)
2289    {
2290      /* This cast is safe because the value is < header->page_count. */
2291      baton->page_no = (apr_size_t)(baton->offset / header->page_size);
2292      baton->start_offset = offsets[baton->page_no];
2293      baton->next_offset = offsets[baton->page_no + 1];
2294      baton->page_size = header->page_size;
2295    }
2296  else
2297    {
2298      /* Beyond the last page. */
2299      baton->page_no = header->page_count;
2300      baton->start_offset = offsets[baton->page_no];
2301      baton->next_offset = offsets[baton->page_no];
2302      baton->page_size = 0;
2303    }
2304
2305  baton->first_revision = header->first_revision;
2306  baton->page_start = (apr_off_t)(header->page_size * baton->page_no);
2307  baton->page_count = header->page_count;
2308}
2309
2310/* Implement svn_cache__partial_getter_func_t: extract the p2l page info
2311 * requested by BATON and return it in BATON.
2312 */
2313static svn_error_t *
2314p2l_page_info_func(void **out,
2315                   const void *data,
2316                   apr_size_t data_len,
2317                   void *baton,
2318                   apr_pool_t *result_pool)
2319{
2320  /* all the pointers to cached data we need */
2321  const p2l_header_t *header = data;
2322  const apr_off_t *offsets
2323    = svn_temp_deserializer__ptr(header,
2324                                 (const void *const *)&header->offsets);
2325
2326  /* copy data from cache to BATON */
2327  p2l_page_info_copy(baton, header, offsets);
2328  return SVN_NO_ERROR;
2329}
2330
2331/* Read the header data structure of the phys-to-log index for revision
2332 * BATON->REVISION in FS.  Return in *BATON all info relevant to read the
2333 * index page for the rev / pack file offset BATON->OFFSET.  Use REV_FILE
2334 * to access on-disk data.  Use SCRATCH_POOL for temporary allocations.
2335 */
2336static svn_error_t *
2337get_p2l_page_info(p2l_page_info_baton_t *baton,
2338                  svn_fs_fs__revision_file_t *rev_file,
2339                  svn_fs_t *fs,
2340                  apr_pool_t *scratch_pool)
2341{
2342  fs_fs_data_t *ffd = fs->fsap_data;
2343  p2l_header_t *header;
2344  svn_boolean_t is_cached = FALSE;
2345  void *dummy = NULL;
2346
2347  /* look for the header data in our cache */
2348  pair_cache_key_t key;
2349  key.revision = rev_file->start_revision;
2350  key.second = rev_file->is_packed;
2351
2352  SVN_ERR(svn_cache__get_partial(&dummy, &is_cached, ffd->p2l_header_cache,
2353                                 &key, p2l_page_info_func, baton,
2354                                 scratch_pool));
2355  if (is_cached)
2356    return SVN_NO_ERROR;
2357
2358  SVN_ERR(get_p2l_header(&header, rev_file, fs, baton->revision,
2359                         scratch_pool, scratch_pool));
2360
2361  /* copy the requested info into *BATON */
2362  p2l_page_info_copy(baton, header, header->offsets);
2363
2364  return SVN_NO_ERROR;
2365}
2366
2367/* Read a mapping entry from the phys-to-log index STREAM and append it to
2368 * RESULT.  *ITEM_INDEX contains the phys offset for the entry and will
2369 * be moved forward by the size of entry.
2370 */
2371static svn_error_t *
2372read_entry(svn_fs_fs__packed_number_stream_t *stream,
2373           apr_off_t *item_offset,
2374           svn_revnum_t *last_revision,
2375           apr_uint64_t *last_compound,
2376           apr_array_header_t *result)
2377{
2378  apr_uint64_t value;
2379
2380  svn_fs_fs__p2l_entry_t entry;
2381
2382  entry.offset = *item_offset;
2383  SVN_ERR(packed_stream_get(&value, stream));
2384  entry.size = (apr_off_t)value;
2385
2386  SVN_ERR(packed_stream_get(&value, stream));
2387  *last_compound += decode_int(value);
2388
2389  entry.type = *last_compound & 7;
2390  entry.item.number = *last_compound / 8;
2391
2392  /* Verify item type. */
2393  if (entry.type > SVN_FS_FS__ITEM_TYPE_CHANGES)
2394    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2395                            _("Invalid item type in P2L index"));
2396  if (   entry.type == SVN_FS_FS__ITEM_TYPE_CHANGES
2397      && entry.item.number != SVN_FS_FS__ITEM_INDEX_CHANGES)
2398    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2399                            _("Changed path list must have item number 1"));
2400
2401  SVN_ERR(packed_stream_get(&value, stream));
2402  *last_revision += (svn_revnum_t)decode_int(value);
2403  entry.item.revision = *last_revision;
2404
2405  SVN_ERR(packed_stream_get(&value, stream));
2406  entry.fnv1_checksum = (apr_uint32_t)value;
2407
2408  /* Truncating the checksum to 32 bits may have hidden random data in the
2409   * unused extra bits of the on-disk representation (7/8 bit representation
2410   * uses 5 bytes on disk for the 32 bit value, leaving 3 bits unused). */
2411  if (value > APR_UINT32_MAX)
2412    return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2413                            _("Invalid FNV1 checksum in P2L index"));
2414
2415  /* Some of the index data for empty rev / pack file sections will not be
2416   * used during normal operation.  Thus, we have strict rules for the
2417   * contents of those unused fields. */
2418  if (entry.type == SVN_FS_FS__ITEM_TYPE_UNUSED)
2419    if (   entry.item.number != SVN_FS_FS__ITEM_INDEX_UNUSED
2420        || entry.fnv1_checksum != 0)
2421      return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2422                 _("Empty regions must have item number 0 and checksum 0"));
2423
2424  APR_ARRAY_PUSH(result, svn_fs_fs__p2l_entry_t) = entry;
2425  *item_offset += entry.size;
2426
2427  return SVN_NO_ERROR;
2428}
2429
2430/* Read the phys-to-log mappings for the cluster beginning at rev file
2431 * offset PAGE_START from the index for START_REVISION in FS.  The data
2432 * can be found in the index page beginning at START_OFFSET with the next
2433 * page beginning at NEXT_OFFSET.  PAGE_SIZE is the L2P index page size.
2434 * Return the relevant index entries in *ENTRIES.  Use REV_FILE to access
2435 * on-disk data.  Allocate *ENTRIES in RESULT_POOL.
2436 */
2437static svn_error_t *
2438get_p2l_page(apr_array_header_t **entries,
2439             svn_fs_fs__revision_file_t *rev_file,
2440             svn_fs_t *fs,
2441             svn_revnum_t start_revision,
2442             apr_off_t start_offset,
2443             apr_off_t next_offset,
2444             apr_off_t page_start,
2445             apr_uint64_t page_size,
2446             apr_pool_t *result_pool)
2447{
2448  apr_uint64_t value;
2449  apr_array_header_t *result
2450    = apr_array_make(result_pool, 16, sizeof(svn_fs_fs__p2l_entry_t));
2451  apr_off_t item_offset;
2452  apr_off_t offset;
2453  svn_revnum_t last_revision;
2454  apr_uint64_t last_compound;
2455
2456  /* open index and navigate to page start */
2457  SVN_ERR(auto_open_p2l_index(rev_file, fs, start_revision));
2458  packed_stream_seek(rev_file->p2l_stream, start_offset);
2459
2460  /* read rev file offset of the first page entry (all page entries will
2461   * only store their sizes). */
2462  SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2463  item_offset = (apr_off_t)value;
2464
2465  /* read all entries of this page */
2466  last_revision = start_revision;
2467  last_compound = 0;
2468
2469  /* Special case: empty pages. */
2470  if (start_offset == next_offset)
2471    {
2472      /* Empty page. This only happens if the first entry of the next page
2473       * also covers this page (and possibly more) completely. */
2474      SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset,
2475                         &last_revision, &last_compound, result));
2476    }
2477  else
2478    {
2479      /* Read non-empty page. */
2480      do
2481        {
2482          SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset,
2483                             &last_revision, &last_compound, result));
2484          offset = packed_stream_offset(rev_file->p2l_stream);
2485        }
2486      while (offset < next_offset);
2487
2488      /* We should now be exactly at the next offset, i.e. the numbers in
2489       * the stream cannot overlap into the next page description. */
2490      if (offset != next_offset)
2491        return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2492             _("P2L page description overlaps with next page description"));
2493
2494      /* if we haven't covered the cluster end yet, we must read the first
2495       * entry of the next page */
2496      if (item_offset < page_start + page_size)
2497        {
2498          SVN_ERR(packed_stream_get(&value, rev_file->p2l_stream));
2499          item_offset = (apr_off_t)value;
2500          last_revision = start_revision;
2501          last_compound = 0;
2502          SVN_ERR(read_entry(rev_file->p2l_stream, &item_offset,
2503                             &last_revision, &last_compound, result));
2504        }
2505    }
2506
2507  *entries = result;
2508
2509  return SVN_NO_ERROR;
2510}
2511
2512/* If it cannot be found in FS's caches, read the p2l index page selected
2513 * by BATON->OFFSET from REV_FILE.  Don't read the page if it precedes
2514 * MIN_OFFSET.  Set *END to TRUE if the caller should stop refeching.
2515 *
2516 * *BATON will be updated with the selected page's info and SCRATCH_POOL
2517 * will be used for temporary allocations.  If the data is alread in the
2518 * cache, descrease *LEAKING_BUCKET and increase it otherwise.  With that
2519 * pattern we will still read all pages from the block even if some of
2520 * them survived in the cached.
2521 */
2522static svn_error_t *
2523prefetch_p2l_page(svn_boolean_t *end,
2524                  int *leaking_bucket,
2525                  svn_fs_t *fs,
2526                  svn_fs_fs__revision_file_t *rev_file,
2527                  p2l_page_info_baton_t *baton,
2528                  apr_off_t min_offset,
2529                  apr_pool_t *scratch_pool)
2530{
2531  fs_fs_data_t *ffd = fs->fsap_data;
2532  svn_boolean_t already_cached;
2533  apr_array_header_t *page;
2534  svn_fs_fs__page_cache_key_t key = { 0 };
2535
2536  /* fetch the page info */
2537  *end = FALSE;
2538  baton->revision = baton->first_revision;
2539  SVN_ERR(get_p2l_page_info(baton, rev_file, fs, scratch_pool));
2540  if (baton->start_offset < min_offset || !rev_file->p2l_stream)
2541    {
2542      /* page outside limits -> stop prefetching */
2543      *end = TRUE;
2544      return SVN_NO_ERROR;
2545    }
2546
2547  /* do we have that page in our caches already? */
2548  assert(baton->first_revision <= APR_UINT32_MAX);
2549  key.revision = (apr_uint32_t)baton->first_revision;
2550  key.is_packed = svn_fs_fs__is_packed_rev(fs, baton->first_revision);
2551  key.page = baton->page_no;
2552  SVN_ERR(svn_cache__has_key(&already_cached, ffd->p2l_page_cache,
2553                             &key, scratch_pool));
2554
2555  /* yes, already cached */
2556  if (already_cached)
2557    {
2558      /* stop prefetching if most pages are already cached. */
2559      if (!--*leaking_bucket)
2560        *end = TRUE;
2561
2562      return SVN_NO_ERROR;
2563    }
2564
2565  ++*leaking_bucket;
2566
2567  /* read from disk */
2568  SVN_ERR(get_p2l_page(&page, rev_file, fs,
2569                       baton->first_revision,
2570                       baton->start_offset,
2571                       baton->next_offset,
2572                       baton->page_start,
2573                       baton->page_size,
2574                       scratch_pool));
2575
2576  /* and put it into our cache */
2577  SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page, scratch_pool));
2578
2579  return SVN_NO_ERROR;
2580}
2581
2582/* Lookup & construct the baton and key information that we will need for
2583 * a P2L page cache lookup.  We want the page covering OFFSET in the rev /
2584 * pack file containing REVSION in FS.  Return the results in *PAGE_INFO_P
2585 * and *KEY_P.  Read data through REV_FILE.  Use SCRATCH_POOL for temporary
2586 * allocations.
2587 */
2588static svn_error_t *
2589get_p2l_keys(p2l_page_info_baton_t *page_info_p,
2590             svn_fs_fs__page_cache_key_t *key_p,
2591             svn_fs_fs__revision_file_t *rev_file,
2592             svn_fs_t *fs,
2593             svn_revnum_t revision,
2594             apr_off_t offset,
2595             apr_pool_t *scratch_pool)
2596{
2597  p2l_page_info_baton_t page_info;
2598
2599  /* request info for the index pages that describes the pack / rev file
2600   * contents at pack / rev file position OFFSET. */
2601  page_info.offset = offset;
2602  page_info.revision = revision;
2603  SVN_ERR(get_p2l_page_info(&page_info, rev_file, fs, scratch_pool));
2604
2605  /* if the offset refers to a non-existent page, bail out */
2606  if (page_info.page_count <= page_info.page_no)
2607    return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
2608                              _("Offset %s too large in revision %ld"),
2609                              apr_off_t_toa(scratch_pool, offset), revision);
2610
2611  /* return results */
2612  if (page_info_p)
2613    *page_info_p = page_info;
2614
2615  /* construct cache key */
2616  if (key_p)
2617    {
2618      svn_fs_fs__page_cache_key_t key = { 0 };
2619      assert(page_info.first_revision <= APR_UINT32_MAX);
2620      key.revision = (apr_uint32_t)page_info.first_revision;
2621      key.is_packed = rev_file->is_packed;
2622      key.page = page_info.page_no;
2623
2624      *key_p = key;
2625    }
2626
2627  return SVN_NO_ERROR;
2628}
2629
2630/* qsort-compatible compare function that compares the OFFSET of the
2631 * svn_fs_fs__p2l_entry_t in *LHS with the apr_off_t in *RHS. */
2632static int
2633compare_start_p2l_entry(const void *lhs,
2634                        const void *rhs)
2635{
2636  const svn_fs_fs__p2l_entry_t *entry = lhs;
2637  apr_off_t start = *(const apr_off_t*)rhs;
2638  apr_off_t diff = entry->offset - start;
2639
2640  /* restrict result to int */
2641  return diff < 0 ? -1 : (diff == 0 ? 0 : 1);
2642}
2643
2644/* From the PAGE_ENTRIES array of svn_fs_fs__p2l_entry_t, ordered
2645 * by their OFFSET member, copy all elements overlapping the range
2646 * [BLOCK_START, BLOCK_END) to ENTRIES. */
2647static void
2648append_p2l_entries(apr_array_header_t *entries,
2649                   apr_array_header_t *page_entries,
2650                   apr_off_t block_start,
2651                   apr_off_t block_end)
2652{
2653  const svn_fs_fs__p2l_entry_t *entry;
2654  int idx = svn_sort__bsearch_lower_bound(page_entries, &block_start,
2655                                          compare_start_p2l_entry);
2656
2657  /* start at the first entry that overlaps with BLOCK_START */
2658  if (idx > 0)
2659    {
2660      entry = &APR_ARRAY_IDX(page_entries, idx - 1, svn_fs_fs__p2l_entry_t);
2661      if (entry->offset + entry->size > block_start)
2662        --idx;
2663    }
2664
2665  /* copy all entries covering the requested range */
2666  for ( ; idx < page_entries->nelts; ++idx)
2667    {
2668      entry = &APR_ARRAY_IDX(page_entries, idx, svn_fs_fs__p2l_entry_t);
2669      if (entry->offset >= block_end)
2670        break;
2671
2672      APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t) = *entry;
2673    }
2674}
2675
2676/* Auxilliary struct passed to p2l_entries_func selecting the relevant
2677 * data range. */
2678typedef struct p2l_entries_baton_t
2679{
2680  apr_off_t start;
2681  apr_off_t end;
2682} p2l_entries_baton_t;
2683
2684/* Implement svn_cache__partial_getter_func_t: extract p2l entries from
2685 * the page in DATA which overlap the p2l_entries_baton_t in BATON.
2686 * The target array is already provided in *OUT.
2687 */
2688static svn_error_t *
2689p2l_entries_func(void **out,
2690                 const void *data,
2691                 apr_size_t data_len,
2692                 void *baton,
2693                 apr_pool_t *result_pool)
2694{
2695  apr_array_header_t *entries = *(apr_array_header_t **)out;
2696  const apr_array_header_t *raw_page = data;
2697  p2l_entries_baton_t *block = baton;
2698
2699  /* Make PAGE a readable APR array. */
2700  apr_array_header_t page = *raw_page;
2701  page.elts = (void *)svn_temp_deserializer__ptr(raw_page,
2702                                    (const void * const *)&raw_page->elts);
2703
2704  /* append relevant information to result */
2705  append_p2l_entries(entries, &page, block->start, block->end);
2706
2707  return SVN_NO_ERROR;
2708}
2709
2710
2711/* Body of svn_fs_fs__p2l_index_lookup.  However, do a single index page
2712 * lookup and append the result to the ENTRIES array provided by the caller.
2713 * Use successive calls to cover larger ranges.
2714 */
2715static svn_error_t *
2716p2l_index_lookup(apr_array_header_t *entries,
2717                 svn_fs_fs__revision_file_t *rev_file,
2718                 svn_fs_t *fs,
2719                 svn_revnum_t revision,
2720                 apr_off_t block_start,
2721                 apr_off_t block_end,
2722                 apr_pool_t *scratch_pool)
2723{
2724  fs_fs_data_t *ffd = fs->fsap_data;
2725  svn_fs_fs__page_cache_key_t key;
2726  svn_boolean_t is_cached = FALSE;
2727  p2l_page_info_baton_t page_info;
2728  apr_array_header_t *local_result = entries;
2729
2730  /* baton selecting the relevant entries from the one page we access */
2731  p2l_entries_baton_t block;
2732  block.start = block_start;
2733  block.end = block_end;
2734
2735  /* if we requested an empty range, the result would be empty */
2736  SVN_ERR_ASSERT(block_start < block_end);
2737
2738  /* look for the fist page of the range in our cache */
2739  SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, block_start,
2740                       scratch_pool));
2741  SVN_ERR(svn_cache__get_partial((void**)&local_result, &is_cached,
2742                                 ffd->p2l_page_cache, &key, p2l_entries_func,
2743                                 &block, scratch_pool));
2744
2745  if (!is_cached)
2746    {
2747      svn_boolean_t end;
2748      apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2749      apr_off_t original_page_start = page_info.page_start;
2750      int leaking_bucket = 4;
2751      p2l_page_info_baton_t prefetch_info = page_info;
2752      apr_array_header_t *page_entries;
2753
2754      apr_off_t max_offset
2755        = APR_ALIGN(page_info.next_offset, ffd->block_size);
2756      apr_off_t min_offset
2757        = APR_ALIGN(page_info.start_offset, ffd->block_size) - ffd->block_size;
2758
2759      /* Since we read index data in larger chunks, we probably got more
2760       * page data than we requested.  Parse & cache that until either we
2761       * encounter pages already cached or reach the end of the buffer.
2762       */
2763
2764      /* pre-fetch preceding pages */
2765      if (ffd->use_block_read)
2766        {
2767          end = FALSE;
2768          prefetch_info.offset = original_page_start;
2769          while (prefetch_info.offset >= prefetch_info.page_size && !end)
2770            {
2771              svn_pool_clear(iterpool);
2772
2773              prefetch_info.offset -= prefetch_info.page_size;
2774              SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file,
2775                                        &prefetch_info, min_offset,
2776                                        iterpool));
2777            }
2778        }
2779
2780      /* fetch page from disk and put it into the cache */
2781      SVN_ERR(get_p2l_page(&page_entries, rev_file, fs,
2782                           page_info.first_revision,
2783                           page_info.start_offset,
2784                           page_info.next_offset,
2785                           page_info.page_start,
2786                           page_info.page_size, iterpool));
2787
2788      /* The last cache entry must not end beyond the range covered by
2789       * this index.  The same applies for any subset of entries. */
2790      if (page_entries->nelts)
2791        {
2792          const svn_fs_fs__p2l_entry_t *entry
2793            = &APR_ARRAY_IDX(page_entries, page_entries->nelts - 1,
2794                             svn_fs_fs__p2l_entry_t);
2795          if (  entry->offset + entry->size
2796              > page_info.page_size * page_info.page_count)
2797            return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
2798                                     _("Last P2L index entry extends beyond "
2799                                       "the last page in revision %ld."),
2800                                     revision);
2801        }
2802
2803      SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page_entries,
2804                             iterpool));
2805
2806      /* append relevant information to result */
2807      append_p2l_entries(entries, page_entries, block_start, block_end);
2808
2809      /* pre-fetch following pages */
2810      if (ffd->use_block_read)
2811        {
2812          end = FALSE;
2813          leaking_bucket = 4;
2814          prefetch_info = page_info;
2815          prefetch_info.offset = original_page_start;
2816          while (   prefetch_info.next_offset < max_offset
2817                && prefetch_info.page_no + 1 < prefetch_info.page_count
2818                && !end)
2819            {
2820              svn_pool_clear(iterpool);
2821
2822              prefetch_info.offset += prefetch_info.page_size;
2823              SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file,
2824                                        &prefetch_info, min_offset,
2825                                        iterpool));
2826            }
2827        }
2828
2829      svn_pool_destroy(iterpool);
2830    }
2831
2832  /* We access a valid page (otherwise, we had seen an error in the
2833   * get_p2l_keys request).  Hence, at least one entry must be found. */
2834  SVN_ERR_ASSERT(entries->nelts > 0);
2835
2836  /* Add an "unused" entry if it extends beyond the end of the data file.
2837   * Since the index page size might be smaller than the current data
2838   * read block size, the trailing "unused" entry in this index may not
2839   * fully cover the end of the last block. */
2840  if (page_info.page_no + 1 >= page_info.page_count)
2841    {
2842      svn_fs_fs__p2l_entry_t *entry
2843        = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_fs__p2l_entry_t);
2844
2845      apr_off_t entry_end = entry->offset + entry->size;
2846      if (entry_end < block_end)
2847        {
2848          if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
2849            {
2850              /* extend the terminal filler */
2851              entry->size = block_end - entry->offset;
2852            }
2853          else
2854            {
2855              /* No terminal filler. Add one. */
2856              entry = apr_array_push(entries);
2857              entry->offset = entry_end;
2858              entry->size = block_end - entry_end;
2859              entry->type = SVN_FS_FS__ITEM_TYPE_UNUSED;
2860              entry->fnv1_checksum = 0;
2861              entry->item.revision = SVN_INVALID_REVNUM;
2862              entry->item.number = SVN_FS_FS__ITEM_INDEX_UNUSED;
2863            }
2864        }
2865    }
2866
2867  return SVN_NO_ERROR;
2868}
2869
2870svn_error_t *
2871svn_fs_fs__p2l_index_lookup(apr_array_header_t **entries,
2872                            svn_fs_t *fs,
2873                            svn_fs_fs__revision_file_t *rev_file,
2874                            svn_revnum_t revision,
2875                            apr_off_t block_start,
2876                            apr_off_t block_size,
2877                            apr_pool_t *result_pool,
2878                            apr_pool_t *scratch_pool)
2879{
2880  apr_off_t block_end = block_start + block_size;
2881
2882  /* the receiving container */
2883  int last_count = 0;
2884  apr_array_header_t *result = apr_array_make(result_pool, 16,
2885                                              sizeof(svn_fs_fs__p2l_entry_t));
2886
2887  /* Fetch entries page-by-page.  Since the p2l index is supposed to cover
2888   * every single byte in the rev / pack file - even unused sections -
2889   * every iteration must result in some progress. */
2890  while (block_start < block_end)
2891    {
2892      svn_fs_fs__p2l_entry_t *entry;
2893      SVN_ERR(p2l_index_lookup(result, rev_file, fs, revision, block_start,
2894                               block_end, scratch_pool));
2895      SVN_ERR_ASSERT(result->nelts > 0);
2896
2897      /* continue directly behind last item */
2898      entry = &APR_ARRAY_IDX(result, result->nelts-1, svn_fs_fs__p2l_entry_t);
2899      block_start = entry->offset + entry->size;
2900
2901      /* Some paranoia check.  Successive iterations should never return
2902       * duplicates but if it did, we might get into trouble later on. */
2903      if (last_count > 0 && last_count < result->nelts)
2904        {
2905           entry =  &APR_ARRAY_IDX(result, last_count - 1,
2906                                   svn_fs_fs__p2l_entry_t);
2907           SVN_ERR_ASSERT(APR_ARRAY_IDX(result, last_count,
2908                                        svn_fs_fs__p2l_entry_t).offset
2909                          >= entry->offset + entry->size);
2910        }
2911
2912      last_count = result->nelts;
2913    }
2914
2915  *entries = result;
2916  return SVN_NO_ERROR;
2917}
2918
2919/* compare_fn_t comparing a svn_fs_fs__p2l_entry_t at LHS with an offset
2920 * RHS.
2921 */
2922static int
2923compare_p2l_entry_offsets(const void *lhs, const void *rhs)
2924{
2925  const svn_fs_fs__p2l_entry_t *entry = (const svn_fs_fs__p2l_entry_t *)lhs;
2926  apr_off_t offset = *(const apr_off_t *)rhs;
2927
2928  return entry->offset < offset ? -1 : (entry->offset == offset ? 0 : 1);
2929}
2930
2931/* Cached data extraction utility.  DATA is a P2L index page, e.g. an APR
2932 * array of svn_fs_fs__p2l_entry_t elements.  Return the entry for the item,
2933 * allocated in RESULT_POOL, starting at OFFSET or NULL if that's not an
2934 * the start offset of any item. Use SCRATCH_POOL for temporary allocations.
2935 */
2936static svn_fs_fs__p2l_entry_t *
2937get_p2l_entry_from_cached_page(const void *data,
2938                               apr_uint64_t offset,
2939                               apr_pool_t *result_pool,
2940                               apr_pool_t *scratch_pool)
2941{
2942  /* resolve all pointer values of in-cache data */
2943  const apr_array_header_t *page = data;
2944  apr_array_header_t *entries = apr_pmemdup(scratch_pool, page,
2945                                            sizeof(*page));
2946  svn_fs_fs__p2l_entry_t *entry;
2947
2948  entries->elts = (char *)svn_temp_deserializer__ptr(page,
2949                                     (const void *const *)&page->elts);
2950
2951  /* search of the offset we want */
2952  entry = svn_sort__array_lookup(entries, &offset, NULL,
2953      (int (*)(const void *, const void *))compare_p2l_entry_offsets);
2954
2955  /* return it, if it is a perfect match */
2956  return entry ? apr_pmemdup(result_pool, entry, sizeof(*entry)) : NULL;
2957}
2958
2959/* Implements svn_cache__partial_getter_func_t for P2L index pages, copying
2960 * the entry for the apr_off_t at BATON into *OUT.  *OUT will be NULL if
2961 * there is no matching entry in the index page at DATA.
2962 */
2963static svn_error_t *
2964p2l_entry_lookup_func(void **out,
2965                      const void *data,
2966                      apr_size_t data_len,
2967                      void *baton,
2968                      apr_pool_t *result_pool)
2969{
2970  svn_fs_fs__p2l_entry_t *entry
2971    = get_p2l_entry_from_cached_page(data, *(apr_off_t *)baton, result_pool,
2972                                     result_pool);
2973
2974  *out = entry && entry->offset == *(apr_off_t *)baton
2975       ? apr_pmemdup(result_pool, entry, sizeof(*entry))
2976       : NULL;
2977
2978  return SVN_NO_ERROR;
2979}
2980
2981svn_error_t *
2982svn_fs_fs__p2l_entry_lookup(svn_fs_fs__p2l_entry_t **entry_p,
2983                            svn_fs_t *fs,
2984                            svn_fs_fs__revision_file_t *rev_file,
2985                            svn_revnum_t revision,
2986                            apr_off_t offset,
2987                            apr_pool_t *result_pool,
2988                            apr_pool_t *scratch_pool)
2989{
2990  fs_fs_data_t *ffd = fs->fsap_data;
2991  svn_fs_fs__page_cache_key_t key = { 0 };
2992  svn_boolean_t is_cached = FALSE;
2993  p2l_page_info_baton_t page_info;
2994
2995  *entry_p = NULL;
2996
2997  /* look for this info in our cache */
2998  SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, offset,
2999                       scratch_pool));
3000  SVN_ERR(svn_cache__get_partial((void**)entry_p, &is_cached,
3001                                 ffd->p2l_page_cache, &key,
3002                                 p2l_entry_lookup_func, &offset,
3003                                 result_pool));
3004  if (!is_cached)
3005    {
3006      /* do a standard index lookup.  This is will automatically prefetch
3007       * data to speed up future lookups. */
3008      apr_array_header_t *entries = apr_array_make(result_pool, 1,
3009                                                   sizeof(**entry_p));
3010      SVN_ERR(p2l_index_lookup(entries, rev_file, fs, revision, offset,
3011                               offset + 1, scratch_pool));
3012
3013      /* Find the entry that we want. */
3014      *entry_p = svn_sort__array_lookup(entries, &offset, NULL,
3015          (int (*)(const void *, const void *))compare_p2l_entry_offsets);
3016    }
3017
3018  return SVN_NO_ERROR;
3019}
3020
3021/* Implements svn_cache__partial_getter_func_t for P2L headers, setting *OUT
3022 * to the largest the first offset not covered by this P2L index.
3023 */
3024static svn_error_t *
3025p2l_get_max_offset_func(void **out,
3026                        const void *data,
3027                        apr_size_t data_len,
3028                        void *baton,
3029                        apr_pool_t *result_pool)
3030{
3031  const p2l_header_t *header = data;
3032  apr_off_t max_offset = header->file_size;
3033  *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset));
3034
3035  return SVN_NO_ERROR;
3036}
3037
3038/* Core functionality of to svn_fs_fs__p2l_get_max_offset with identical
3039 * signature. */
3040static svn_error_t *
3041p2l_get_max_offset(apr_off_t *offset,
3042                   svn_fs_t *fs,
3043                   svn_fs_fs__revision_file_t *rev_file,
3044                   svn_revnum_t revision,
3045                   apr_pool_t *scratch_pool)
3046{
3047  fs_fs_data_t *ffd = fs->fsap_data;
3048  p2l_header_t *header;
3049  svn_boolean_t is_cached = FALSE;
3050  apr_off_t *offset_p;
3051
3052  /* look for the header data in our cache */
3053  pair_cache_key_t key;
3054  key.revision = rev_file->start_revision;
3055  key.second = rev_file->is_packed;
3056
3057  SVN_ERR(svn_cache__get_partial((void **)&offset_p, &is_cached,
3058                                 ffd->p2l_header_cache, &key,
3059                                 p2l_get_max_offset_func, NULL,
3060                                 scratch_pool));
3061  if (is_cached)
3062    {
3063      *offset = *offset_p;
3064      return SVN_NO_ERROR;
3065    }
3066
3067  SVN_ERR(get_p2l_header(&header, rev_file, fs, revision, scratch_pool,
3068                         scratch_pool));
3069  *offset = header->file_size;
3070
3071  return SVN_NO_ERROR;
3072}
3073
3074svn_error_t *
3075svn_fs_fs__p2l_get_max_offset(apr_off_t *offset,
3076                              svn_fs_t *fs,
3077                              svn_fs_fs__revision_file_t *rev_file,
3078                              svn_revnum_t revision,
3079                              apr_pool_t *scratch_pool)
3080{
3081  return svn_error_trace(p2l_get_max_offset(offset, fs, rev_file, revision,
3082                                            scratch_pool));
3083}
3084
3085/* Calculate the FNV1 checksum over the offset range in REV_FILE, covered by
3086 * ENTRY.  Store the result in ENTRY->FNV1_CHECKSUM.  Use SCRATCH_POOL for
3087 * temporary allocations. */
3088static svn_error_t *
3089calc_fnv1(svn_fs_fs__p2l_entry_t *entry,
3090          svn_fs_fs__revision_file_t *rev_file,
3091          apr_pool_t *scratch_pool)
3092{
3093  unsigned char buffer[4096];
3094  svn_checksum_t *checksum;
3095  svn_checksum_ctx_t *context
3096    = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, scratch_pool);
3097  apr_off_t size = entry->size;
3098
3099  /* Special rules apply to unused sections / items.  The data must be a
3100   * sequence of NUL bytes (not checked here) and the checksum is fixed to 0.
3101   */
3102  if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3103    {
3104      entry->fnv1_checksum = 0;
3105      return SVN_NO_ERROR;
3106    }
3107
3108  /* Read the block and feed it to the checksum calculator. */
3109  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &entry->offset,
3110                           scratch_pool));
3111  while (size > 0)
3112    {
3113      apr_size_t to_read = size > sizeof(buffer)
3114                         ? sizeof(buffer)
3115                         : (apr_size_t)size;
3116      SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, to_read, NULL,
3117                                     NULL, scratch_pool));
3118      SVN_ERR(svn_checksum_update(context, buffer, to_read));
3119      size -= to_read;
3120    }
3121
3122  /* Store final checksum in ENTRY. */
3123  SVN_ERR(svn_checksum_final(&checksum, context, scratch_pool));
3124  entry->fnv1_checksum = ntohl(*(const apr_uint32_t *)checksum->digest);
3125
3126  return SVN_NO_ERROR;
3127}
3128
3129/*
3130 * Index (re-)creation utilities.
3131 */
3132
3133svn_error_t *
3134svn_fs_fs__p2l_index_from_p2l_entries(const char **protoname,
3135                                      svn_fs_t *fs,
3136                                      svn_fs_fs__revision_file_t *rev_file,
3137                                      apr_array_header_t *entries,
3138                                      apr_pool_t *result_pool,
3139                                      apr_pool_t *scratch_pool)
3140{
3141  apr_file_t *proto_index;
3142
3143  /* Use a subpool for immediate temp file cleanup at the end of this
3144   * function. */
3145  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3146  int i;
3147
3148  /* Create a proto-index file. */
3149  SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL,
3150                                   svn_io_file_del_on_pool_cleanup,
3151                                   result_pool, scratch_pool));
3152  SVN_ERR(svn_fs_fs__p2l_proto_index_open(&proto_index, *protoname,
3153                                          scratch_pool));
3154
3155  /* Write ENTRIES to proto-index file and calculate checksums as we go. */
3156  for (i = 0; i < entries->nelts; ++i)
3157    {
3158      svn_fs_fs__p2l_entry_t *entry
3159        = APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t *);
3160      svn_pool_clear(iterpool);
3161
3162      SVN_ERR(calc_fnv1(entry, rev_file, iterpool));
3163      SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(proto_index, entry,
3164                                                   iterpool));
3165    }
3166
3167  /* Convert proto-index into final index and move it into position.
3168   * Note that REV_FILE contains the start revision of the shard file if it
3169   * has been packed while REVISION may be somewhere in the middle.  For
3170   * non-packed shards, they will have identical values. */
3171  SVN_ERR(svn_io_file_close(proto_index, iterpool));
3172
3173  /* Temp file cleanup. */
3174  svn_pool_destroy(iterpool);
3175
3176  return SVN_NO_ERROR;
3177}
3178
3179/* A svn_sort__array compatible comparator function, sorting the
3180 * svn_fs_fs__p2l_entry_t** given in LHS, RHS by revision. */
3181static int
3182compare_p2l_entry_revision(const void *lhs,
3183                           const void *rhs)
3184{
3185  const svn_fs_fs__p2l_entry_t *lhs_entry
3186    =*(const svn_fs_fs__p2l_entry_t **)lhs;
3187  const svn_fs_fs__p2l_entry_t *rhs_entry
3188    =*(const svn_fs_fs__p2l_entry_t **)rhs;
3189
3190  if (lhs_entry->item.revision < rhs_entry->item.revision)
3191    return -1;
3192
3193  return lhs_entry->item.revision == rhs_entry->item.revision ? 0 : 1;
3194}
3195
3196svn_error_t *
3197svn_fs_fs__l2p_index_from_p2l_entries(const char **protoname,
3198                                      svn_fs_t *fs,
3199                                      apr_array_header_t *entries,
3200                                      apr_pool_t *result_pool,
3201                                      apr_pool_t *scratch_pool)
3202{
3203  apr_file_t *proto_index;
3204
3205  /* Use a subpool for immediate temp file cleanup at the end of this
3206   * function. */
3207  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3208  int i;
3209  svn_revnum_t last_revision = SVN_INVALID_REVNUM;
3210  svn_revnum_t revision = SVN_INVALID_REVNUM;
3211
3212  /* L2P index must be written in revision order.
3213   * Sort ENTRIES accordingly. */
3214  svn_sort__array(entries, compare_p2l_entry_revision);
3215
3216  /* Find the first revision in the index
3217   * (must exist since no truly empty revs are allowed). */
3218  for (i = 0; i < entries->nelts && !SVN_IS_VALID_REVNUM(revision); ++i)
3219    revision = APR_ARRAY_IDX(entries, i, const svn_fs_fs__p2l_entry_t *)
3220               ->item.revision;
3221
3222  /* Create the temporary proto-rev file. */
3223  SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL,
3224                                   svn_io_file_del_on_pool_cleanup,
3225                                   result_pool, scratch_pool));
3226  SVN_ERR(svn_fs_fs__l2p_proto_index_open(&proto_index, *protoname,
3227                                          scratch_pool));
3228
3229  /*  Write all entries. */
3230  for (i = 0; i < entries->nelts; ++i)
3231    {
3232      const svn_fs_fs__p2l_entry_t *entry
3233        = APR_ARRAY_IDX(entries, i, const svn_fs_fs__p2l_entry_t *);
3234      svn_pool_clear(iterpool);
3235
3236      if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3237        continue;
3238
3239      if (last_revision != entry->item.revision)
3240        {
3241          SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(proto_index,
3242                                                          scratch_pool));
3243          last_revision = entry->item.revision;
3244        }
3245
3246      SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(proto_index,
3247                                                   entry->offset,
3248                                                   entry->item.number,
3249                                                   iterpool));
3250    }
3251
3252  /* Convert proto-index into final index and move it into position. */
3253  SVN_ERR(svn_io_file_close(proto_index, iterpool));
3254
3255  /* Temp file cleanup. */
3256  svn_pool_destroy(iterpool);
3257
3258  return SVN_NO_ERROR;
3259}
3260
3261
3262/*
3263 * Standard (de-)serialization functions
3264 */
3265
3266svn_error_t *
3267svn_fs_fs__serialize_l2p_header(void **data,
3268                                apr_size_t *data_len,
3269                                void *in,
3270                                apr_pool_t *pool)
3271{
3272  l2p_header_t *header = in;
3273  svn_temp_serializer__context_t *context;
3274  svn_stringbuf_t *serialized;
3275  apr_size_t page_count = header->page_table_index[header->revision_count];
3276  apr_size_t page_table_size = page_count * sizeof(*header->page_table);
3277  apr_size_t index_size
3278    = (header->revision_count + 1) * sizeof(*header->page_table_index);
3279  apr_size_t data_size = sizeof(*header) + index_size + page_table_size;
3280
3281  /* serialize header and all its elements */
3282  context = svn_temp_serializer__init(header,
3283                                      sizeof(*header),
3284                                      data_size + 32,
3285                                      pool);
3286
3287  /* page table index array */
3288  svn_temp_serializer__add_leaf(context,
3289                                (const void * const *)&header->page_table_index,
3290                                index_size);
3291
3292  /* page table array */
3293  svn_temp_serializer__add_leaf(context,
3294                                (const void * const *)&header->page_table,
3295                                page_table_size);
3296
3297  /* return the serialized result */
3298  serialized = svn_temp_serializer__get(context);
3299
3300  *data = serialized->data;
3301  *data_len = serialized->len;
3302
3303  return SVN_NO_ERROR;
3304}
3305
3306svn_error_t *
3307svn_fs_fs__deserialize_l2p_header(void **out,
3308                                  void *data,
3309                                  apr_size_t data_len,
3310                                  apr_pool_t *pool)
3311{
3312  l2p_header_t *header = (l2p_header_t *)data;
3313
3314  /* resolve the pointers in the struct */
3315  svn_temp_deserializer__resolve(header, (void**)&header->page_table_index);
3316  svn_temp_deserializer__resolve(header, (void**)&header->page_table);
3317
3318  /* done */
3319  *out = header;
3320
3321  return SVN_NO_ERROR;
3322}
3323
3324svn_error_t *
3325svn_fs_fs__serialize_l2p_page(void **data,
3326                              apr_size_t *data_len,
3327                              void *in,
3328                              apr_pool_t *pool)
3329{
3330  l2p_page_t *page = in;
3331  svn_temp_serializer__context_t *context;
3332  svn_stringbuf_t *serialized;
3333  apr_size_t of_table_size = page->entry_count * sizeof(*page->offsets);
3334
3335  /* serialize struct and all its elements */
3336  context = svn_temp_serializer__init(page,
3337                                      sizeof(*page),
3338                                      of_table_size + sizeof(*page) + 32,
3339                                      pool);
3340
3341  /* offsets and sub_items arrays */
3342  svn_temp_serializer__add_leaf(context,
3343                                (const void * const *)&page->offsets,
3344                                of_table_size);
3345
3346  /* return the serialized result */
3347  serialized = svn_temp_serializer__get(context);
3348
3349  *data = serialized->data;
3350  *data_len = serialized->len;
3351
3352  return SVN_NO_ERROR;
3353}
3354
3355svn_error_t *
3356svn_fs_fs__deserialize_l2p_page(void **out,
3357                                void *data,
3358                                apr_size_t data_len,
3359                                apr_pool_t *pool)
3360{
3361  l2p_page_t *page = data;
3362
3363  /* resolve the pointers in the struct */
3364  svn_temp_deserializer__resolve(page, (void**)&page->offsets);
3365
3366  /* done */
3367  *out = page;
3368
3369  return SVN_NO_ERROR;
3370}
3371
3372svn_error_t *
3373svn_fs_fs__serialize_p2l_header(void **data,
3374                                apr_size_t *data_len,
3375                                void *in,
3376                                apr_pool_t *pool)
3377{
3378  p2l_header_t *header = in;
3379  svn_temp_serializer__context_t *context;
3380  svn_stringbuf_t *serialized;
3381  apr_size_t table_size = (header->page_count + 1) * sizeof(*header->offsets);
3382
3383  /* serialize header and all its elements */
3384  context = svn_temp_serializer__init(header,
3385                                      sizeof(*header),
3386                                      table_size + sizeof(*header) + 32,
3387                                      pool);
3388
3389  /* offsets array */
3390  svn_temp_serializer__add_leaf(context,
3391                                (const void * const *)&header->offsets,
3392                                table_size);
3393
3394  /* return the serialized result */
3395  serialized = svn_temp_serializer__get(context);
3396
3397  *data = serialized->data;
3398  *data_len = serialized->len;
3399
3400  return SVN_NO_ERROR;
3401}
3402
3403svn_error_t *
3404svn_fs_fs__deserialize_p2l_header(void **out,
3405                                  void *data,
3406                                  apr_size_t data_len,
3407                                  apr_pool_t *pool)
3408{
3409  p2l_header_t *header = data;
3410
3411  /* resolve the only pointer in the struct */
3412  svn_temp_deserializer__resolve(header, (void**)&header->offsets);
3413
3414  /* done */
3415  *out = header;
3416
3417  return SVN_NO_ERROR;
3418}
3419
3420svn_error_t *
3421svn_fs_fs__serialize_p2l_page(void **data,
3422                              apr_size_t *data_len,
3423                              void *in,
3424                              apr_pool_t *pool)
3425{
3426  apr_array_header_t *page = in;
3427  svn_temp_serializer__context_t *context;
3428  svn_stringbuf_t *serialized;
3429  apr_size_t table_size = page->elt_size * page->nelts;
3430
3431  /* serialize array header and all its elements */
3432  context = svn_temp_serializer__init(page,
3433                                      sizeof(*page),
3434                                      table_size + sizeof(*page) + 32,
3435                                      pool);
3436
3437  /* items in the array */
3438  svn_temp_serializer__add_leaf(context,
3439                                (const void * const *)&page->elts,
3440                                table_size);
3441
3442  /* return the serialized result */
3443  serialized = svn_temp_serializer__get(context);
3444
3445  *data = serialized->data;
3446  *data_len = serialized->len;
3447
3448  return SVN_NO_ERROR;
3449}
3450
3451svn_error_t *
3452svn_fs_fs__deserialize_p2l_page(void **out,
3453                                void *data,
3454                                apr_size_t data_len,
3455                                apr_pool_t *pool)
3456{
3457  apr_array_header_t *page = (apr_array_header_t *)data;
3458
3459  /* resolve the only pointer in the struct */
3460  svn_temp_deserializer__resolve(page, (void**)&page->elts);
3461
3462  /* patch up members */
3463  page->pool = pool;
3464  page->nalloc = page->nelts;
3465
3466  /* done */
3467  *out = page;
3468
3469  return SVN_NO_ERROR;
3470}
3471