pack.c revision 299742
1/* pack.c --- FSFS shard packing functionality
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22#include <assert.h>
23#include <string.h>
24
25#include "svn_pools.h"
26#include "svn_dirent_uri.h"
27#include "svn_sorts.h"
28#include "private/svn_temp_serializer.h"
29#include "private/svn_sorts_private.h"
30#include "private/svn_subr_private.h"
31#include "private/svn_string_private.h"
32#include "private/svn_io_private.h"
33
34#include "fs_fs.h"
35#include "pack.h"
36#include "util.h"
37#include "id.h"
38#include "index.h"
39#include "low_level.h"
40#include "revprops.h"
41#include "transaction.h"
42
43#include "../libsvn_fs/fs-loader.h"
44
45#include "svn_private_config.h"
46#include "temp_serializer.h"
47
48/* Logical addressing packing logic:
49 *
50 * We pack files on a pack file basis (e.g. 1000 revs) without changing
51 * existing pack files nor the revision files outside the range to pack.
52 *
53 * First, we will scan the revision file indexes to determine the number
54 * of items to "place" (i.e. determine their optimal position within the
55 * future pack file).  For each item, we will need a constant amount of
56 * memory to track it.  A MAX_MEM parameter sets a limit to the number of
57 * items we may place in one go.  That means, we may not be able to add
58 * all revisions at once.  Instead, we will run the placement for a subset
59 * of revisions at a time.  The very unlikely worst case will simply append
60 * all revision data with just a little reshuffling inside each revision.
61 *
62 * In a second step, we read all revisions in the selected range, build
63 * the item tracking information and copy the items themselves from the
64 * revision files to temporary files.  The latter serve as buckets for a
65 * very coarse bucket presort:  Separate change lists, file properties,
66 * directory properties and noderevs + representations from one another.
67 *
68 * The third step will determine an optimized placement for the items in
69 * each of the 4 buckets separately.  The first three will simply order
70 * their items by revision, starting with the newest once.  Placing rep
71 * and noderev items is a more elaborate process documented in the code.
72 *
73 * In short, we store items in the following order:
74 * - changed paths lists
75 * - node property
76 * - directory properties
77 * - directory representations corresponding noderevs, lexical path order
78 *   with special treatment of "trunk" and "branches"
79 * - same for file representations
80 *
81 * Step 4 copies the items from the temporary buckets into the final
82 * pack file and writes the temporary index files.
83 *
84 * Finally, after the last range of revisions, create the final indexes.
85 */
86
87/* Maximum amount of memory we allocate for placement information during
88 * the pack process.
89 */
90#define DEFAULT_MAX_MEM (64 * 1024 * 1024)
91
92/* Data structure describing a node change at PATH, REVISION.
93 * We will sort these instances by PATH and NODE_ID such that we can combine
94 * similar nodes in the same reps container and store containers in path
95 * major order.
96 */
97typedef struct path_order_t
98{
99  /* changed path */
100  svn_prefix_string__t *path;
101
102  /* node ID for this PATH in REVISION */
103  svn_fs_fs__id_part_t node_id;
104
105  /* when this change happened */
106  svn_revnum_t revision;
107
108  /* noderev predecessor count */
109  int predecessor_count;
110
111  /* this is a directory node */
112  svn_boolean_t is_dir;
113
114  /* length of the expanded representation content */
115  apr_int64_t expanded_size;
116
117  /* item ID of the noderev linked to the change. May be (0, 0). */
118  svn_fs_fs__id_part_t noderev_id;
119
120  /* item ID of the representation containing the new data. May be (0, 0). */
121  svn_fs_fs__id_part_t rep_id;
122} path_order_t;
123
124/* Represents a reference from item FROM to item TO.  FROM may be a noderev
125 * or rep_id while TO is (currently) always a representation.  We will sort
126 * them by TO which allows us to collect all dependent items.
127 */
128typedef struct reference_t
129{
130  svn_fs_fs__id_part_t to;
131  svn_fs_fs__id_part_t from;
132} reference_t;
133
134/* This structure keeps track of all the temporary data and status that
135 * needs to be kept around during the creation of one pack file.  After
136 * each revision range (in case we can't process all revs at once due to
137 * memory restrictions), parts of the data will get re-initialized.
138 */
139typedef struct pack_context_t
140{
141  /* file system that we operate on */
142  svn_fs_t *fs;
143
144  /* cancel function to invoke at regular intervals. May be NULL */
145  svn_cancel_func_t cancel_func;
146
147  /* baton to pass to CANCEL_FUNC */
148  void *cancel_baton;
149
150  /* first revision in the shard (and future pack file) */
151  svn_revnum_t shard_rev;
152
153  /* first revision in the range to process (>= SHARD_REV) */
154  svn_revnum_t start_rev;
155
156  /* first revision after the range to process (<= SHARD_END_REV) */
157  svn_revnum_t end_rev;
158
159  /* first revision after the current shard */
160  svn_revnum_t shard_end_rev;
161
162  /* log-to-phys proto index for the whole pack file */
163  apr_file_t *proto_l2p_index;
164
165  /* phys-to-log proto index for the whole pack file */
166  apr_file_t *proto_p2l_index;
167
168  /* full shard directory path (containing the unpacked revisions) */
169  const char *shard_dir;
170
171  /* full packed shard directory path (containing the pack file + indexes) */
172  const char *pack_file_dir;
173
174  /* full pack file path (including PACK_FILE_DIR) */
175  const char *pack_file_path;
176
177  /* current write position (i.e. file length) in the pack file */
178  apr_off_t pack_offset;
179
180  /* the pack file to ultimately write all data to */
181  apr_file_t *pack_file;
182
183  /* array of svn_fs_fs__p2l_entry_t *, all referring to change lists.
184   * Will be filled in phase 2 and be cleared after each revision range. */
185  apr_array_header_t *changes;
186
187  /* temp file receiving all change list items (referenced by CHANGES).
188   * Will be filled in phase 2 and be cleared after each revision range. */
189  apr_file_t *changes_file;
190
191  /* array of svn_fs_fs__p2l_entry_t *, all referring to file properties.
192   * Will be filled in phase 2 and be cleared after each revision range. */
193  apr_array_header_t *file_props;
194
195  /* temp file receiving all file prop items (referenced by FILE_PROPS).
196   * Will be filled in phase 2 and be cleared after each revision range.*/
197  apr_file_t *file_props_file;
198
199  /* array of svn_fs_fs__p2l_entry_t *, all referring to directory properties.
200   * Will be filled in phase 2 and be cleared after each revision range. */
201  apr_array_header_t *dir_props;
202
203  /* temp file receiving all directory prop items (referenced by DIR_PROPS).
204   * Will be filled in phase 2 and be cleared after each revision range.*/
205  apr_file_t *dir_props_file;
206
207  /* container for all PATH members in PATH_ORDER. */
208  svn_prefix_tree__t *paths;
209
210  /* array of path_order_t *.  Will be filled in phase 2 and be cleared
211   * after each revision range.  Sorted by PATH, NODE_ID. */
212  apr_array_header_t *path_order;
213
214  /* array of reference_t* linking representations to their delta bases.
215   * Will be filled in phase 2 and be cleared after each revision range.
216   * It will be sorted by the FROM members (for rep->base rep lookup). */
217  apr_array_header_t *references;
218
219  /* array of svn_fs_fs__p2l_entry_t*.  Will be filled in phase 2 and be
220   * cleared after each revision range.  During phase 3, we will set items
221   * to NULL that we already processed. */
222  apr_array_header_t *reps;
223
224  /* array of int, marking for each revision, the which offset their items
225   * begin in REPS.  Will be filled in phase 2 and be cleared after
226   * each revision range. */
227  apr_array_header_t *rev_offsets;
228
229  /* temp file receiving all items referenced by REPS.
230   * Will be filled in phase 2 and be cleared after each revision range.*/
231  apr_file_t *reps_file;
232
233  /* pool used for temporary data structures that will be cleaned up when
234   * the next range of revisions is being processed */
235  apr_pool_t *info_pool;
236} pack_context_t;
237
238/* Create and initialize a new pack context for packing shard SHARD_REV in
239 * SHARD_DIR into PACK_FILE_DIR within filesystem FS.  Allocate it in POOL
240 * and return the structure in *CONTEXT.
241 *
242 * Limit the number of items being copied per iteration to MAX_ITEMS.
243 * Set CANCEL_FUNC and CANCEL_BATON as well.
244 */
245static svn_error_t *
246initialize_pack_context(pack_context_t *context,
247                        svn_fs_t *fs,
248                        const char *pack_file_dir,
249                        const char *shard_dir,
250                        svn_revnum_t shard_rev,
251                        int max_items,
252                        svn_cancel_func_t cancel_func,
253                        void *cancel_baton,
254                        apr_pool_t *pool)
255{
256  fs_fs_data_t *ffd = fs->fsap_data;
257  const char *temp_dir;
258  int max_revs = MIN(ffd->max_files_per_dir, max_items);
259
260  SVN_ERR_ASSERT(ffd->format >= SVN_FS_FS__MIN_LOG_ADDRESSING_FORMAT);
261  SVN_ERR_ASSERT(shard_rev % ffd->max_files_per_dir == 0);
262
263  /* where we will place our various temp files */
264  SVN_ERR(svn_io_temp_dir(&temp_dir, pool));
265
266  /* store parameters */
267  context->fs = fs;
268  context->cancel_func = cancel_func;
269  context->cancel_baton = cancel_baton;
270
271  context->shard_rev = shard_rev;
272  context->start_rev = shard_rev;
273  context->end_rev = shard_rev;
274  context->shard_end_rev = shard_rev + ffd->max_files_per_dir;
275
276  /* Create the new directory and pack file. */
277  context->shard_dir = shard_dir;
278  context->pack_file_dir = pack_file_dir;
279  context->pack_file_path
280    = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
281  SVN_ERR(svn_io_file_open(&context->pack_file, context->pack_file_path,
282                           APR_WRITE | APR_BUFFERED | APR_BINARY | APR_EXCL
283                             | APR_CREATE, APR_OS_DEFAULT, pool));
284
285  /* Proto index files */
286  SVN_ERR(svn_fs_fs__l2p_proto_index_open(
287             &context->proto_l2p_index,
288             svn_dirent_join(pack_file_dir,
289                             PATH_INDEX PATH_EXT_L2P_INDEX,
290                             pool),
291             pool));
292  SVN_ERR(svn_fs_fs__p2l_proto_index_open(
293             &context->proto_p2l_index,
294             svn_dirent_join(pack_file_dir,
295                             PATH_INDEX PATH_EXT_P2L_INDEX,
296                             pool),
297             pool));
298
299  /* item buckets: one item info array and one temp file per bucket */
300  context->changes = apr_array_make(pool, max_items,
301                                    sizeof(svn_fs_fs__p2l_entry_t *));
302  SVN_ERR(svn_io_open_unique_file3(&context->changes_file, NULL, temp_dir,
303                                   svn_io_file_del_on_close, pool, pool));
304  context->file_props = apr_array_make(pool, max_items,
305                                       sizeof(svn_fs_fs__p2l_entry_t *));
306  SVN_ERR(svn_io_open_unique_file3(&context->file_props_file, NULL, temp_dir,
307                                   svn_io_file_del_on_close, pool, pool));
308  context->dir_props = apr_array_make(pool, max_items,
309                                      sizeof(svn_fs_fs__p2l_entry_t *));
310  SVN_ERR(svn_io_open_unique_file3(&context->dir_props_file, NULL, temp_dir,
311                                   svn_io_file_del_on_close, pool, pool));
312
313  /* noderev and representation item bucket */
314  context->rev_offsets = apr_array_make(pool, max_revs, sizeof(int));
315  context->path_order = apr_array_make(pool, max_items,
316                                       sizeof(path_order_t *));
317  context->references = apr_array_make(pool, max_items,
318                                       sizeof(reference_t *));
319  context->reps = apr_array_make(pool, max_items,
320                                 sizeof(svn_fs_fs__p2l_entry_t *));
321  SVN_ERR(svn_io_open_unique_file3(&context->reps_file, NULL, temp_dir,
322                                   svn_io_file_del_on_close, pool, pool));
323
324  /* the pool used for temp structures */
325  context->info_pool = svn_pool_create(pool);
326  context->paths = svn_prefix_tree__create(context->info_pool);
327
328  return SVN_NO_ERROR;
329}
330
331/* Clean up / free all revision range specific data and files in CONTEXT.
332 * Use POOL for temporary allocations.
333 */
334static svn_error_t *
335reset_pack_context(pack_context_t *context,
336                   apr_pool_t *pool)
337{
338  apr_array_clear(context->changes);
339  SVN_ERR(svn_io_file_trunc(context->changes_file, 0, pool));
340  apr_array_clear(context->file_props);
341  SVN_ERR(svn_io_file_trunc(context->file_props_file, 0, pool));
342  apr_array_clear(context->dir_props);
343  SVN_ERR(svn_io_file_trunc(context->dir_props_file, 0, pool));
344
345  apr_array_clear(context->rev_offsets);
346  apr_array_clear(context->path_order);
347  apr_array_clear(context->references);
348  apr_array_clear(context->reps);
349  SVN_ERR(svn_io_file_trunc(context->reps_file, 0, pool));
350
351  svn_pool_clear(context->info_pool);
352
353  return SVN_NO_ERROR;
354}
355
356/* Call this after the last revision range.  It will finalize all index files
357 * for CONTEXT and close any open files.  Use POOL for temporary allocations.
358 */
359static svn_error_t *
360close_pack_context(pack_context_t *context,
361                   apr_pool_t *pool)
362{
363  const char *proto_l2p_index_path;
364  const char *proto_p2l_index_path;
365
366  /* need the file names for the actual index creation call further down */
367  SVN_ERR(svn_io_file_name_get(&proto_l2p_index_path,
368                               context->proto_l2p_index, pool));
369  SVN_ERR(svn_io_file_name_get(&proto_p2l_index_path,
370                               context->proto_p2l_index, pool));
371
372  /* finalize proto index files */
373  SVN_ERR(svn_io_file_close(context->proto_l2p_index, pool));
374  SVN_ERR(svn_io_file_close(context->proto_p2l_index, pool));
375
376  /* Append the actual index data to the pack file. */
377  SVN_ERR(svn_fs_fs__add_index_data(context->fs, context->pack_file,
378                                    proto_l2p_index_path,
379                                    proto_p2l_index_path,
380                                    context->shard_rev,
381                                    pool));
382
383  /* remove proto index files */
384  SVN_ERR(svn_io_remove_file2(proto_l2p_index_path, FALSE, pool));
385  SVN_ERR(svn_io_remove_file2(proto_p2l_index_path, FALSE, pool));
386
387  /* Ensure that packed file is written to disk.*/
388  SVN_ERR(svn_io_file_flush_to_disk(context->pack_file, pool));
389  SVN_ERR(svn_io_file_close(context->pack_file, pool));
390
391  return SVN_NO_ERROR;
392}
393
394/* Efficiently copy SIZE bytes from SOURCE to DEST.  Invoke the CANCEL_FUNC
395 * from CONTEXT at regular intervals.  Use POOL for allocations.
396 */
397static svn_error_t *
398copy_file_data(pack_context_t *context,
399               apr_file_t *dest,
400               apr_file_t *source,
401               apr_off_t size,
402               apr_pool_t *pool)
403{
404  /* most non-representation items will be small.  Minimize the buffer
405   * and infrastructure overhead in that case. */
406  enum { STACK_BUFFER_SIZE = 1024 };
407
408  if (size < STACK_BUFFER_SIZE)
409    {
410      /* copy small data using a fixed-size buffer on stack */
411      char buffer[STACK_BUFFER_SIZE];
412      SVN_ERR(svn_io_file_read_full2(source, buffer, (apr_size_t)size,
413                                     NULL, NULL, pool));
414      SVN_ERR(svn_io_file_write_full(dest, buffer, (apr_size_t)size,
415                                     NULL, pool));
416    }
417  else
418    {
419      /* use streaming copies for larger data blocks.  That may require
420       * the allocation of larger buffers and we should make sure that
421       * this extra memory is released asap. */
422      fs_fs_data_t *ffd = context->fs->fsap_data;
423      apr_pool_t *copypool = svn_pool_create(pool);
424      char *buffer = apr_palloc(copypool, ffd->block_size);
425
426      while (size)
427        {
428          apr_size_t to_copy = (apr_size_t)(MIN(size, ffd->block_size));
429          if (context->cancel_func)
430            SVN_ERR(context->cancel_func(context->cancel_baton));
431
432          SVN_ERR(svn_io_file_read_full2(source, buffer, to_copy,
433                                         NULL, NULL, pool));
434          SVN_ERR(svn_io_file_write_full(dest, buffer, to_copy,
435                                         NULL, pool));
436
437          size -= to_copy;
438        }
439
440      svn_pool_destroy(copypool);
441    }
442
443  return SVN_NO_ERROR;
444}
445
446/* Writes SIZE bytes, all 0, to DEST.  Uses POOL for allocations.
447 */
448static svn_error_t *
449write_null_bytes(apr_file_t *dest,
450                 apr_off_t size,
451                 apr_pool_t *pool)
452{
453  /* Have a collection of high-quality, easy to access NUL bytes handy. */
454  enum { BUFFER_SIZE = 1024 };
455  static const char buffer[BUFFER_SIZE] = { 0 };
456
457  /* copy SIZE of them into the file's buffer */
458  while (size)
459    {
460      apr_size_t to_write = MIN(size, BUFFER_SIZE);
461      SVN_ERR(svn_io_file_write_full(dest, buffer, to_write, NULL, pool));
462      size -= to_write;
463    }
464
465  return SVN_NO_ERROR;
466}
467
468/* Copy the "simple" item (changed paths list or property representation)
469 * from the current position in REV_FILE to TEMP_FILE using CONTEXT.  Add
470 * a copy of ENTRY to ENTRIES but with an updated offset value that points
471 * to the copy destination in TEMP_FILE.  Use POOL for allocations.
472 */
473static svn_error_t *
474copy_item_to_temp(pack_context_t *context,
475                  apr_array_header_t *entries,
476                  apr_file_t *temp_file,
477                  apr_file_t *rev_file,
478                  svn_fs_fs__p2l_entry_t *entry,
479                  apr_pool_t *pool)
480{
481  svn_fs_fs__p2l_entry_t *new_entry
482    = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
483
484  SVN_ERR(svn_fs_fs__get_file_offset(&new_entry->offset, temp_file, pool));
485  APR_ARRAY_PUSH(entries, svn_fs_fs__p2l_entry_t *) = new_entry;
486
487  SVN_ERR(copy_file_data(context, temp_file, rev_file, entry->size, pool));
488
489  return SVN_NO_ERROR;
490}
491
492/* Return the offset within CONTEXT->REPS that corresponds to item
493 * ITEM_INDEX in  REVISION.
494 */
495static int
496get_item_array_index(pack_context_t *context,
497                     svn_revnum_t revision,
498                     apr_int64_t item_index)
499{
500  assert(revision >= context->start_rev);
501  return (int)item_index + APR_ARRAY_IDX(context->rev_offsets,
502                                         revision - context->start_rev,
503                                         int);
504}
505
506/* Write INFO to the correct position in CONTEXT->REP_INFOS.  The latter
507 * may need auto-expanding.  Overwriting an array element is not allowed.
508 */
509static void
510add_item_rep_mapping(pack_context_t *context,
511                     svn_fs_fs__p2l_entry_t *entry)
512{
513  int idx;
514
515  /* index of INFO */
516  idx = get_item_array_index(context,
517                             entry->item.revision,
518                             entry->item.number);
519
520  /* make sure the index exists in the array */
521  while (context->reps->nelts <= idx)
522    APR_ARRAY_PUSH(context->reps, void *) = NULL;
523
524  /* set the element.  If there is already an entry, there are probably
525   * two items claiming to be the same -> bail out */
526  assert(!APR_ARRAY_IDX(context->reps, idx, void *));
527  APR_ARRAY_IDX(context->reps, idx, void *) = entry;
528}
529
530/* Return the P2L entry from CONTEXT->REPS for the given ID.  If there is
531 * none (or not anymore), return NULL.  If RESET has been specified, set
532 * the array entry to NULL after returning the entry.
533 */
534static svn_fs_fs__p2l_entry_t *
535get_item(pack_context_t *context,
536         const svn_fs_fs__id_part_t *id,
537         svn_boolean_t reset)
538{
539  svn_fs_fs__p2l_entry_t *result = NULL;
540  if (id->number && id->revision >= context->start_rev)
541    {
542      int idx = get_item_array_index(context, id->revision, id->number);
543      if (context->reps->nelts > idx)
544        {
545          result = APR_ARRAY_IDX(context->reps, idx, void *);
546          if (result && reset)
547            APR_ARRAY_IDX(context->reps, idx, void *) = NULL;
548        }
549    }
550
551  return result;
552}
553
554/* Copy representation item identified by ENTRY from the current position
555 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
556 * our placement algorithm to CONTEXT.  Use POOL for temporary allocations.
557 */
558static svn_error_t *
559copy_rep_to_temp(pack_context_t *context,
560                 apr_file_t *rev_file,
561                 svn_fs_fs__p2l_entry_t *entry,
562                 apr_pool_t *pool)
563{
564  svn_fs_fs__rep_header_t *rep_header;
565  svn_stream_t *stream;
566  apr_off_t source_offset = entry->offset;
567
568  /* create a copy of ENTRY, make it point to the copy destination and
569   * store it in CONTEXT */
570  entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
571  SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file, pool));
572  add_item_rep_mapping(context, entry);
573
574  /* read & parse the representation header */
575  stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
576  SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, stream, pool, pool));
577  svn_stream_close(stream);
578
579  /* if the representation is a delta against some other rep, link the two */
580  if (   rep_header->type == svn_fs_fs__rep_delta
581      && rep_header->base_revision >= context->start_rev)
582    {
583      reference_t *reference = apr_pcalloc(context->info_pool,
584                                           sizeof(*reference));
585      reference->from = entry->item;
586      reference->to.revision = rep_header->base_revision;
587      reference->to.number = rep_header->base_item_index;
588      APR_ARRAY_PUSH(context->references, reference_t *) = reference;
589    }
590
591  /* copy the whole rep (including header!) to our temp file */
592  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
593  SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
594                         pool));
595
596  return SVN_NO_ERROR;
597}
598
599/* Directories first, dirs / files sorted by name in reverse lexical order.
600 * This maximizes the chance of two items being located close to one another
601 * in *all* pack files independent of their change order.  It also groups
602 * multi-project repos nicely according to their sub-projects.  The reverse
603 * order aspect gives "trunk" preference over "tags" and "branches", so
604 * trunk-related items are more likely to be contiguous.
605 */
606static int
607compare_dir_entries_format7(const svn_sort__item_t *a,
608                            const svn_sort__item_t *b)
609{
610  const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
611  const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
612
613  if (lhs->kind != rhs->kind)
614    return lhs->kind == svn_node_dir ? -1 : 1;
615
616  return strcmp(lhs->name, rhs->name);
617}
618
619/* Directories entries sorted by revision (decreasing - to max cache hits)
620 * and offset (increasing - to max benefit from APR file buffering).
621 */
622static int
623compare_dir_entries_format6(const svn_sort__item_t *a,
624                            const svn_sort__item_t *b)
625{
626  const svn_fs_dirent_t *lhs = (const svn_fs_dirent_t *) a->value;
627  const svn_fs_dirent_t *rhs = (const svn_fs_dirent_t *) b->value;
628
629  const svn_fs_fs__id_part_t *lhs_rev_item
630    = svn_fs_fs__id_rev_item(lhs->id);
631  const svn_fs_fs__id_part_t *rhs_rev_item
632    = svn_fs_fs__id_rev_item(rhs->id);
633
634  /* decreasing ("reverse") order on revs */
635  if (lhs_rev_item->revision != rhs_rev_item->revision)
636    return lhs_rev_item->revision > rhs_rev_item->revision ? -1 : 1;
637
638  /* increasing order on offsets */
639  if (lhs_rev_item->number != rhs_rev_item->number)
640    return lhs_rev_item->number > rhs_rev_item->number ? 1 : -1;
641
642  return 0;
643}
644
645apr_array_header_t *
646svn_fs_fs__order_dir_entries(svn_fs_t *fs,
647                             apr_hash_t *directory,
648                             apr_pool_t *result_pool,
649                             apr_pool_t *scratch_pool)
650{
651  apr_array_header_t *ordered
652    = svn_sort__hash(directory,
653                     svn_fs_fs__use_log_addressing(fs)
654                       ? compare_dir_entries_format7
655                       : compare_dir_entries_format6,
656                     scratch_pool);
657
658  apr_array_header_t *result
659    = apr_array_make(result_pool, ordered->nelts, sizeof(svn_fs_dirent_t *));
660
661  int i;
662  for (i = 0; i < ordered->nelts; ++i)
663    APR_ARRAY_PUSH(result, svn_fs_dirent_t *)
664      = APR_ARRAY_IDX(ordered, i, svn_sort__item_t).value;
665
666  return result;
667}
668
669/* Return a duplicate of the the ORIGINAL path and with special sub-strins
670 * (e.g. "trunk") modified in such a way that have a lower lexicographic
671 * value than any other "normal" file name.
672 */
673static const char *
674tweak_path_for_ordering(const char *original,
675                        apr_pool_t *pool)
676{
677  /* We may add further special cases as needed. */
678  enum {SPECIAL_COUNT = 2};
679  static const char *special[SPECIAL_COUNT] = {"trunk", "branch"};
680  char *pos;
681  char *path = apr_pstrdup(pool, original);
682  int i;
683
684  /* Replace the first char of any "special" sub-string we find by
685   * a control char, i.e. '\1' .. '\31'.  In the rare event that this
686   * would clash with existing paths, no data will be lost but merely
687   * the node ordering will be sub-optimal.
688   */
689  for (i = 0; i < SPECIAL_COUNT; ++i)
690    for (pos = strstr(path, special[i]);
691         pos;
692         pos = strstr(pos + 1, special[i]))
693      {
694        *pos = (char)(i + '\1');
695      }
696
697   return path;
698}
699
700/* Copy node revision item identified by ENTRY from the current position
701 * in REV_FILE into CONTEXT->REPS_FILE.  Add all tracking into needed by
702 * our placement algorithm to CONTEXT.  Use POOL for temporary allocations.
703 */
704static svn_error_t *
705copy_node_to_temp(pack_context_t *context,
706                  apr_file_t *rev_file,
707                  svn_fs_fs__p2l_entry_t *entry,
708                  apr_pool_t *pool)
709{
710  path_order_t *path_order = apr_pcalloc(context->info_pool,
711                                         sizeof(*path_order));
712  node_revision_t *noderev;
713  const char *sort_path;
714  svn_stream_t *stream;
715  apr_off_t source_offset = entry->offset;
716
717  /* read & parse noderev */
718  stream = svn_stream_from_aprfile2(rev_file, TRUE, pool);
719  SVN_ERR(svn_fs_fs__read_noderev(&noderev, stream, pool, pool));
720  svn_stream_close(stream);
721
722  /* create a copy of ENTRY, make it point to the copy destination and
723   * store it in CONTEXT */
724  entry = apr_pmemdup(context->info_pool, entry, sizeof(*entry));
725  SVN_ERR(svn_fs_fs__get_file_offset(&entry->offset, context->reps_file,
726                                     pool));
727  add_item_rep_mapping(context, entry);
728
729  /* copy the noderev to our temp file */
730  SVN_ERR(svn_io_file_seek(rev_file, APR_SET, &source_offset, pool));
731  SVN_ERR(copy_file_data(context, context->reps_file, rev_file, entry->size,
732                         pool));
733
734  /* if the node has a data representation, make that the node's "base".
735   * This will (often) cause the noderev to be placed right in front of
736   * its data representation. */
737
738  if (noderev->data_rep && noderev->data_rep->revision >= context->start_rev)
739    {
740      path_order->rep_id.revision = noderev->data_rep->revision;
741      path_order->rep_id.number = noderev->data_rep->item_index;
742      path_order->expanded_size = noderev->data_rep->expanded_size
743                                ? noderev->data_rep->expanded_size
744                                : noderev->data_rep->size;
745    }
746
747  /* Sort path is the key used for ordering noderevs and associated reps.
748   * It will not be stored in the final pack file. */
749  sort_path = tweak_path_for_ordering(noderev->created_path, pool);
750  path_order->path = svn_prefix_string__create(context->paths, sort_path);
751  path_order->node_id = *svn_fs_fs__id_node_id(noderev->id);
752  path_order->revision = svn_fs_fs__id_rev(noderev->id);
753  path_order->predecessor_count = noderev->predecessor_count;
754  path_order->is_dir = noderev->kind == svn_node_dir;
755  path_order->noderev_id = *svn_fs_fs__id_rev_item(noderev->id);
756  APR_ARRAY_PUSH(context->path_order, path_order_t *) = path_order;
757
758  return SVN_NO_ERROR;
759}
760
761/* implements compare_fn_t.  Bring all directories in front of the files
762   and sort descendingly by PATH, NODE_ID and REVISION.
763 */
764static int
765compare_path_order(const path_order_t * const * lhs_p,
766                   const path_order_t * const * rhs_p)
767{
768  const path_order_t * lhs = *lhs_p;
769  const path_order_t * rhs = *rhs_p;
770
771  /* cluster all directories */
772  int diff = rhs->is_dir - lhs->is_dir;
773  if (diff)
774    return diff;
775
776  /* lexicographic order on path and node (i.e. latest first) */
777  diff = svn_prefix_string__compare(lhs->path, rhs->path);
778  if (diff)
779    return diff;
780
781  /* reverse order on node (i.e. latest first) */
782  diff = svn_fs_fs__id_part_compare(&rhs->node_id, &lhs->node_id);
783  if (diff)
784    return diff;
785
786  /* reverse order on revision (i.e. latest first) */
787  if (lhs->revision != rhs->revision)
788    return lhs->revision < rhs->revision ? 1 : -1;
789
790  return 0;
791}
792
793/* implements compare_fn_t.  Sort ascendingly by FROM, TO.
794 */
795static int
796compare_references(const reference_t * const * lhs_p,
797                   const reference_t * const * rhs_p)
798{
799  const reference_t * lhs = *lhs_p;
800  const reference_t * rhs = *rhs_p;
801
802  int diff = svn_fs_fs__id_part_compare(&lhs->from, &rhs->from);
803  return diff ? diff : svn_fs_fs__id_part_compare(&lhs->to, &rhs->to);
804}
805
806/* implements compare_fn_t.  Assume ascending order by FROM.
807 */
808static int
809compare_ref_to_item(const reference_t * const * lhs_p,
810                    const svn_fs_fs__id_part_t * rhs_p)
811{
812  return svn_fs_fs__id_part_compare(&(*lhs_p)->from, rhs_p);
813}
814
815/* implements compare_fn_t.  Finds the DIR / FILE boundary.
816 */
817static int
818compare_is_dir(const path_order_t * const * lhs_p,
819               const void *unused)
820{
821  return (*lhs_p)->is_dir ? -1 : 0;
822}
823
824/* Look for the least significant bit set in VALUE and return the smallest
825 * number with the same property, i.e. the largest power of 2 that is a
826 * factor in VALUE. */
827static int
828roundness(int value)
829{
830  return value ? value - (value & (value - 1)) : INT_MAX;
831}
832
833/* Order a range of data collected in CONTEXT such that we can place them
834 * in the desired order.  The input is taken from *PATH_ORDER, offsets FIRST
835 * to LAST and then written in the final order to the same range in *TEMP.
836 */
837static void
838sort_reps_range(pack_context_t *context,
839                const path_order_t **path_order,
840                const path_order_t **temp,
841                int first,
842                int last)
843{
844  const svn_prefix_string__t *path;
845  int i, dest, best;
846  svn_fs_fs__id_part_t rep_id;
847  fs_fs_data_t *ffd = context->fs->fsap_data;
848
849  /* The logic below would fail for empty ranges. */
850  if (first == last)
851    return;
852
853  /* Re-order noderevs like this:
854   *
855   * (1) Most likely to be referenced by future pack files, in path order.
856   * (2) highest revision rep per path + dependency chain
857   * (3) Remaining reps in path, rev order
858   *
859   * We simply pick & chose from the existing path, rev order.
860   */
861  dest = first;
862  path = path_order[first]->path;
863  best = first;
864
865  /* (1) For each path, pick the "roundest" representation and put it in
866   * front of all other nodes in the pack file.  The "roundest" rep is
867   * the one most likely to be referenced from future pack files, i.e. we
868   * concentrate those potential "foreign link targets" in one section of
869   * the pack file.
870   *
871   * And we only apply this to reps outside the linear deltification
872   * sections because references *into* linear deltification ranges are
873   * much less likely.
874   */
875  for (i = first; i < last; ++i)
876    {
877      /* Investigated all nodes for the current path? */
878      if (svn_prefix_string__compare(path, path_order[i]->path))
879        {
880          /* next path */
881          path = path_order[i]->path;
882
883          /* Pick roundest non-linear deltified node. */
884          if (roundness(path_order[best]->predecessor_count)
885              >= ffd->max_linear_deltification)
886            {
887              temp[dest++] = path_order[best];
888              path_order[best] = NULL;
889              best = i;
890            }
891        }
892
893      /* next entry */
894      if (  roundness(path_order[best]->predecessor_count)
895          < roundness(path_order[i]->predecessor_count))
896        best = i;
897    }
898
899  /* Treat the last path the same as all others. */
900  if (roundness(path_order[best]->predecessor_count)
901      >= ffd->max_linear_deltification)
902    {
903      temp[dest++] = path_order[best];
904      path_order[best] = NULL;
905    }
906
907  /* (2) For each (remaining) path, pick the nodes along the delta chain
908   * for the highest revision.  Due to our ordering, this is the first
909   * node we encounter for any path.
910   *
911   * Most references that don't hit a delta base picked in (1), will
912   * access HEAD of the respective path.  Keeping all its dependency chain
913   * in one place turns reconstruction into a linear scan of minimal length.
914   */
915  for (i = first; i < last; ++i)
916    if (path_order[i])
917      {
918        /* This is the first path we still have to handle. */
919        path = path_order[i]->path;
920        rep_id = path_order[i]->rep_id;
921        break;
922      }
923
924  for (i = first; i < last; ++i)
925    if (path_order[i])
926      {
927        /* New path? */
928        if (svn_prefix_string__compare(path, path_order[i]->path))
929          {
930            path = path_order[i]->path;
931            rep_id = path_order[i]->rep_id;
932          }
933
934        /* Pick nodes along the deltification chain.  Skip side-branches. */
935        if (svn_fs_fs__id_part_eq(&path_order[i]->rep_id, &rep_id))
936          {
937            reference_t **reference;
938
939            temp[dest++] = path_order[i];
940            path_order[i] = NULL;
941
942            reference = svn_sort__array_lookup(context->references,
943                                               &rep_id, NULL,
944              (int (*)(const void *, const void *))compare_ref_to_item);
945            if (reference)
946              rep_id = (*reference)->to;
947          }
948      }
949
950  /* (3) All remaining nodes in path, rev order.  Linear deltification
951   * makes HEAD delta chains from (2) cover all or most of their deltas
952   * in a given pack file.  So, this is just a few remnants that we put
953   * at the end of the pack file.
954   */
955  for (i = first; i < last; ++i)
956    if (path_order[i])
957      temp[dest++] = path_order[i];
958
959  /* We now know the final ordering. */
960  assert(dest == last);
961}
962
963/* Order the data collected in CONTEXT such that we can place them in the
964 * desired order.
965 */
966static void
967sort_reps(pack_context_t *context)
968{
969  apr_pool_t *temp_pool;
970  const path_order_t **temp, **path_order;
971  int i, count, dir_count;
972
973  /* We will later assume that there is at least one node / path.
974   */
975  if (context->path_order->nelts == 0)
976    {
977      assert(context->references->nelts == 0);
978      return;
979    }
980
981  /* Sort containers by path and IDs, respectively.
982   */
983  svn_sort__array(context->path_order,
984                  (int (*)(const void *, const void *))compare_path_order);
985  svn_sort__array(context->references,
986                  (int (*)(const void *, const void *))compare_references);
987
988  /* Directories are already in front; sort directories section and files
989   * section separately but use the same heuristics (see sub-function).
990   */
991  temp_pool = svn_pool_create(context->info_pool);
992  count = context->path_order->nelts;
993  temp = apr_pcalloc(temp_pool, count * sizeof(*temp));
994  path_order = (void *)context->path_order->elts;
995
996  /* Find the boundary between DIR and FILE section. */
997  dir_count = svn_sort__bsearch_lower_bound(context->path_order, NULL,
998                     (int (*)(const void *, const void *))compare_is_dir);
999
1000  /* Sort those sub-sections separately. */
1001  sort_reps_range(context, path_order, temp, 0, dir_count);
1002  sort_reps_range(context, path_order, temp, dir_count, count);
1003
1004  /* We now know the final ordering. */
1005  for (i = 0; i < count; ++i)
1006    path_order[i] = temp[i];
1007
1008  svn_pool_destroy(temp_pool);
1009}
1010
1011/* implements compare_fn_t. Place LHS before RHS, if the latter is older.
1012 */
1013static int
1014compare_p2l_info(const svn_fs_fs__p2l_entry_t * const * lhs,
1015                 const svn_fs_fs__p2l_entry_t * const * rhs)
1016{
1017  assert(*lhs != *rhs);
1018
1019  if ((*lhs)->item.revision == (*rhs)->item.revision)
1020    return (*lhs)->item.number > (*rhs)->item.number ? -1 : 1;
1021
1022  return (*lhs)->item.revision > (*rhs)->item.revision ? -1 : 1;
1023}
1024
1025/* Sort svn_fs_fs__p2l_entry_t * array ENTRIES by age.  Place the latest
1026 * items first.
1027 */
1028static void
1029sort_items(apr_array_header_t *entries)
1030{
1031  svn_sort__array(entries,
1032                  (int (*)(const void *, const void *))compare_p2l_info);
1033}
1034
1035/* Return the remaining unused bytes in the current block in CONTEXT's
1036 * pack file.
1037 */
1038static apr_ssize_t
1039get_block_left(pack_context_t *context)
1040{
1041  fs_fs_data_t *ffd = context->fs->fsap_data;
1042  return ffd->block_size - (context->pack_offset % ffd->block_size);
1043}
1044
1045/* To prevent items from overlapping a block boundary, we will usually
1046 * put them into the next block and top up the old one with NUL bytes.
1047 * Pad CONTEXT's pack file to the end of the current block, if TO_ADD does
1048 * not fit into the current block and the padding is short enough.
1049 * Use POOL for allocations.
1050 */
1051static svn_error_t *
1052auto_pad_block(pack_context_t *context,
1053               apr_off_t to_add,
1054               apr_pool_t *pool)
1055{
1056  fs_fs_data_t *ffd = context->fs->fsap_data;
1057
1058  /* This is the maximum number of bytes "wasted" that way per block.
1059   * Larger items will cross the block boundaries. */
1060  const apr_off_t max_padding = MAX(ffd->block_size / 50, 512);
1061
1062  /* Is wasted space small enough to align the current item to the next
1063   * block? */
1064  apr_off_t padding = get_block_left(context);
1065
1066  if (padding < to_add && padding < max_padding)
1067    {
1068      /* Yes. To up with NUL bytes and don't forget to create
1069       * an P2L index entry marking this section as unused. */
1070      svn_fs_fs__p2l_entry_t null_entry;
1071
1072      null_entry.offset = context->pack_offset;
1073      null_entry.size = padding;
1074      null_entry.type = SVN_FS_FS__ITEM_TYPE_UNUSED;
1075      null_entry.item.revision = SVN_INVALID_REVNUM;
1076      null_entry.item.number = SVN_FS_FS__ITEM_INDEX_UNUSED;
1077      null_entry.fnv1_checksum = 0;
1078
1079      SVN_ERR(write_null_bytes(context->pack_file, padding, pool));
1080      SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1081                   context->proto_p2l_index, &null_entry, pool));
1082      context->pack_offset += padding;
1083    }
1084
1085  return SVN_NO_ERROR;
1086}
1087
1088/* Read the contents of ITEM, if not empty, from TEMP_FILE and write it
1089 * to CONTEXT->PACK_FILE.  Use POOL for allocations.
1090 */
1091static svn_error_t *
1092store_item(pack_context_t *context,
1093           apr_file_t *temp_file,
1094           svn_fs_fs__p2l_entry_t *item,
1095           apr_pool_t *pool)
1096{
1097  apr_off_t safety_margin;
1098
1099  /* skip empty entries */
1100  if (item->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
1101    return SVN_NO_ERROR;
1102
1103  /* If the next item does not fit into the current block, auto-pad it.
1104      Take special care of textual noderevs since their parsers may
1105      prefetch up to 80 bytes and we don't want them to cross block
1106      boundaries. */
1107  safety_margin = item->type == SVN_FS_FS__ITEM_TYPE_NODEREV
1108                ? SVN__LINE_CHUNK_SIZE
1109                : 0;
1110  SVN_ERR(auto_pad_block(context, item->size + safety_margin, pool));
1111
1112  /* select the item in the source file and copy it into the target
1113    * pack file */
1114  SVN_ERR(svn_io_file_seek(temp_file, APR_SET, &item->offset, pool));
1115  SVN_ERR(copy_file_data(context, context->pack_file, temp_file,
1116                         item->size, pool));
1117
1118  /* write index entry and update current position */
1119  item->offset = context->pack_offset;
1120  context->pack_offset += item->size;
1121
1122  SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(context->proto_p2l_index,
1123                                               item, pool));
1124
1125  APR_ARRAY_PUSH(context->reps, svn_fs_fs__p2l_entry_t *) = item;
1126
1127  return SVN_NO_ERROR;
1128}
1129
1130/* Read the contents of the non-empty items in ITEMS from TEMP_FILE and
1131 * write them to CONTEXT->PACK_FILE.  Use POOL for allocations.
1132 */
1133static svn_error_t *
1134store_items(pack_context_t *context,
1135            apr_file_t *temp_file,
1136            apr_array_header_t *items,
1137            apr_pool_t *pool)
1138{
1139  int i;
1140  apr_pool_t *iterpool = svn_pool_create(pool);
1141
1142  /* copy all items in strict order */
1143  for (i = 0; i < items->nelts; ++i)
1144    {
1145      svn_pool_clear(iterpool);
1146      SVN_ERR(store_item(context, temp_file,
1147                         APR_ARRAY_IDX(items, i, svn_fs_fs__p2l_entry_t *),
1148                         iterpool));
1149    }
1150
1151  svn_pool_destroy(iterpool);
1152
1153  return SVN_NO_ERROR;
1154}
1155
1156/* Copy (append) the items identified by svn_fs_fs__p2l_entry_t * elements
1157 * in ENTRIES strictly in order from TEMP_FILE into CONTEXT->PACK_FILE.
1158 * Use POOL for temporary allocations.
1159 */
1160static svn_error_t *
1161copy_reps_from_temp(pack_context_t *context,
1162                    apr_file_t *temp_file,
1163                    apr_pool_t *pool)
1164{
1165  apr_pool_t *iterpool = svn_pool_create(pool);
1166  apr_array_header_t *path_order = context->path_order;
1167  int i;
1168
1169  /* copy items in path order. */
1170  for (i = 0; i < path_order->nelts; ++i)
1171    {
1172      path_order_t *current_path;
1173      svn_fs_fs__p2l_entry_t *node_part;
1174      svn_fs_fs__p2l_entry_t *rep_part;
1175
1176      svn_pool_clear(iterpool);
1177
1178      current_path = APR_ARRAY_IDX(path_order, i, path_order_t *);
1179      node_part = get_item(context, &current_path->noderev_id, TRUE);
1180      rep_part = get_item(context, &current_path->rep_id, TRUE);
1181
1182      if (node_part)
1183        SVN_ERR(store_item(context, temp_file, node_part, iterpool));
1184      if (rep_part)
1185        SVN_ERR(store_item(context, temp_file, rep_part, iterpool));
1186    }
1187
1188  svn_pool_destroy(iterpool);
1189
1190  return SVN_NO_ERROR;
1191}
1192
1193/* implements compare_fn_t. Place LHS before RHS, if the latter belongs to
1194 * a newer revision.
1195 */
1196static int
1197compare_p2l_info_rev(const svn_fs_fs__p2l_entry_t * const * lhs_p,
1198                     const svn_fs_fs__p2l_entry_t * const * rhs_p)
1199{
1200  const svn_fs_fs__p2l_entry_t * lhs = *lhs_p;
1201  const svn_fs_fs__p2l_entry_t * rhs = *rhs_p;
1202
1203  if (lhs->item.revision == rhs->item.revision)
1204    return 0;
1205
1206  return lhs->item.revision < rhs->item.revision ? -1 : 1;
1207}
1208
1209/* Write the log-to-phys proto index file for CONTEXT and use POOL for
1210 * temporary allocations.  All items in all buckets must have been placed
1211 * by now.
1212 */
1213static svn_error_t *
1214write_l2p_index(pack_context_t *context,
1215                apr_pool_t *pool)
1216{
1217  apr_pool_t *iterpool = svn_pool_create(pool);
1218  svn_revnum_t prev_rev = SVN_INVALID_REVNUM;
1219  int i, dest;
1220
1221  /* eliminate empty entries from CONTEXT->REPS */
1222  for (i = 0, dest = 0; i < context->reps->nelts; ++i)
1223    {
1224      svn_fs_fs__p2l_entry_t *entry
1225        = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1226      if (entry)
1227        APR_ARRAY_IDX(context->reps, dest++, svn_fs_fs__p2l_entry_t *)
1228          = entry;
1229    }
1230  context->reps->nelts = dest;
1231
1232  /* we need to write the l2p index revision by revision */
1233  svn_sort__array(context->reps,
1234                  (int (*)(const void *, const void *))compare_p2l_info_rev);
1235
1236  /* write index entries */
1237  for (i = 0; i < context->reps->nelts; ++i)
1238    {
1239      svn_fs_fs__p2l_entry_t *p2l_entry
1240        = APR_ARRAY_IDX(context->reps, i, svn_fs_fs__p2l_entry_t *);
1241      if (p2l_entry == NULL)
1242        continue;
1243
1244      /* next revision? */
1245      if (prev_rev != p2l_entry->item.revision)
1246        {
1247          prev_rev = p2l_entry->item.revision;
1248          SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(
1249                       context->proto_l2p_index, iterpool));
1250        }
1251
1252      /* add entry */
1253      SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(context->proto_l2p_index,
1254                                                   p2l_entry->offset,
1255                                                   p2l_entry->item.number,
1256                                                   iterpool));
1257
1258      /* keep memory usage in check */
1259      if (i % 256 == 0)
1260        svn_pool_clear(iterpool);
1261    }
1262
1263  svn_pool_destroy(iterpool);
1264
1265  return SVN_NO_ERROR;
1266}
1267
1268/* Pack the current revision range of CONTEXT, i.e. this covers phases 2
1269 * to 4.  Use POOL for allocations.
1270 */
1271static svn_error_t *
1272pack_range(pack_context_t *context,
1273           apr_pool_t *pool)
1274{
1275  fs_fs_data_t *ffd = context->fs->fsap_data;
1276  apr_pool_t *revpool = svn_pool_create(pool);
1277  apr_pool_t *iterpool = svn_pool_create(pool);
1278  apr_pool_t *iterpool2 = svn_pool_create(pool);
1279
1280  /* Phase 2: Copy items into various buckets and build tracking info */
1281  svn_revnum_t revision;
1282  for (revision = context->start_rev; revision < context->end_rev; ++revision)
1283    {
1284      apr_off_t offset = 0;
1285      svn_fs_fs__revision_file_t *rev_file;
1286
1287      svn_pool_clear(revpool);
1288
1289      /* Get the rev file dimensions (mainly index locations). */
1290      SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1291                                               revision, revpool, iterpool));
1292      SVN_ERR(svn_fs_fs__auto_read_footer(rev_file));
1293
1294      /* store the indirect array index */
1295      APR_ARRAY_PUSH(context->rev_offsets, int) = context->reps->nelts;
1296
1297      /* read the phys-to-log index file until we covered the whole rev file.
1298       * That index contains enough info to build both target indexes from it. */
1299      while (offset < rev_file->l2p_offset)
1300        {
1301          /* read one cluster */
1302          int i;
1303          apr_array_header_t *entries;
1304
1305          svn_pool_clear(iterpool);
1306
1307          SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs,
1308                                              rev_file, revision, offset,
1309                                              ffd->p2l_page_size, iterpool,
1310                                              iterpool));
1311
1312          for (i = 0; i < entries->nelts; ++i)
1313            {
1314              svn_fs_fs__p2l_entry_t *entry
1315                = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1316
1317              /* skip first entry if that was duplicated due crossing a
1318                 cluster boundary */
1319              if (offset > entry->offset)
1320                continue;
1321
1322              svn_pool_clear(iterpool2);
1323
1324              /* process entry while inside the rev file */
1325              offset = entry->offset;
1326              if (offset < rev_file->l2p_offset)
1327                {
1328                  SVN_ERR(svn_io_file_seek(rev_file->file, APR_SET, &offset,
1329                                           iterpool2));
1330
1331                  if (entry->type == SVN_FS_FS__ITEM_TYPE_CHANGES)
1332                    SVN_ERR(copy_item_to_temp(context,
1333                                              context->changes,
1334                                              context->changes_file,
1335                                              rev_file->file, entry,
1336                                              iterpool2));
1337                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_FILE_PROPS)
1338                    SVN_ERR(copy_item_to_temp(context,
1339                                              context->file_props,
1340                                              context->file_props_file,
1341                                              rev_file->file, entry,
1342                                              iterpool2));
1343                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1344                    SVN_ERR(copy_item_to_temp(context,
1345                                              context->dir_props,
1346                                              context->dir_props_file,
1347                                              rev_file->file, entry,
1348                                              iterpool2));
1349                  else if (   entry->type == SVN_FS_FS__ITEM_TYPE_FILE_REP
1350                           || entry->type == SVN_FS_FS__ITEM_TYPE_DIR_REP)
1351                    SVN_ERR(copy_rep_to_temp(context, rev_file->file, entry,
1352                                             iterpool2));
1353                  else if (entry->type == SVN_FS_FS__ITEM_TYPE_NODEREV)
1354                    SVN_ERR(copy_node_to_temp(context, rev_file->file, entry,
1355                                              iterpool2));
1356                  else
1357                    SVN_ERR_ASSERT(entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED);
1358
1359                  offset += entry->size;
1360                }
1361            }
1362
1363          if (context->cancel_func)
1364            SVN_ERR(context->cancel_func(context->cancel_baton));
1365        }
1366    }
1367
1368  svn_pool_destroy(iterpool2);
1369  svn_pool_destroy(iterpool);
1370
1371  /* phase 3: placement.
1372   * Use "newest first" placement for simple items. */
1373  sort_items(context->changes);
1374  sort_items(context->file_props);
1375  sort_items(context->dir_props);
1376
1377  /* follow dependencies recursively for noderevs and data representations */
1378  sort_reps(context);
1379
1380  /* phase 4: copy bucket data to pack file.  Write P2L index. */
1381  SVN_ERR(store_items(context, context->changes_file, context->changes,
1382                      revpool));
1383  svn_pool_clear(revpool);
1384  SVN_ERR(store_items(context, context->file_props_file, context->file_props,
1385                      revpool));
1386  svn_pool_clear(revpool);
1387  SVN_ERR(store_items(context, context->dir_props_file, context->dir_props,
1388                      revpool));
1389  svn_pool_clear(revpool);
1390  SVN_ERR(copy_reps_from_temp(context, context->reps_file, revpool));
1391  svn_pool_clear(revpool);
1392
1393  /* write L2P index as well (now that we know all target offsets) */
1394  SVN_ERR(write_l2p_index(context, revpool));
1395
1396  svn_pool_destroy(revpool);
1397
1398  return SVN_NO_ERROR;
1399}
1400
1401/* Append CONTEXT->START_REV to the context's pack file with no re-ordering.
1402 * This function will only be used for very large revisions (>>100k changes).
1403 * Use POOL for temporary allocations.
1404 */
1405static svn_error_t *
1406append_revision(pack_context_t *context,
1407                apr_pool_t *pool)
1408{
1409  fs_fs_data_t *ffd = context->fs->fsap_data;
1410  apr_off_t offset = 0;
1411  apr_pool_t *iterpool = svn_pool_create(pool);
1412  svn_fs_fs__revision_file_t *rev_file;
1413  apr_finfo_t finfo;
1414
1415  /* Get the size of the file. */
1416  const char *path = svn_dirent_join(context->shard_dir,
1417                                     apr_psprintf(iterpool, "%ld",
1418                                                  context->start_rev),
1419                                     pool);
1420  SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, pool));
1421
1422  /* Copy all the bits from the rev file to the end of the pack file. */
1423  SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, context->fs,
1424                                           context->start_rev, pool,
1425                                           iterpool));
1426  SVN_ERR(copy_file_data(context, context->pack_file, rev_file->file,
1427                         finfo.size, iterpool));
1428
1429  /* mark the start of a new revision */
1430  SVN_ERR(svn_fs_fs__l2p_proto_index_add_revision(context->proto_l2p_index,
1431                                                  pool));
1432
1433  /* read the phys-to-log index file until we covered the whole rev file.
1434   * That index contains enough info to build both target indexes from it. */
1435  while (offset < finfo.size)
1436    {
1437      /* read one cluster */
1438      int i;
1439      apr_array_header_t *entries;
1440
1441      svn_pool_clear(iterpool);
1442      SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, context->fs, rev_file,
1443                                          context->start_rev, offset,
1444                                          ffd->p2l_page_size, iterpool,
1445                                          iterpool));
1446
1447      for (i = 0; i < entries->nelts; ++i)
1448        {
1449          svn_fs_fs__p2l_entry_t *entry
1450            = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
1451
1452          /* skip first entry if that was duplicated due crossing a
1453             cluster boundary */
1454          if (offset > entry->offset)
1455            continue;
1456
1457          /* process entry while inside the rev file */
1458          offset = entry->offset;
1459          if (offset < finfo.size)
1460            {
1461              entry->offset += context->pack_offset;
1462              offset += entry->size;
1463              SVN_ERR(svn_fs_fs__l2p_proto_index_add_entry(
1464                         context->proto_l2p_index, entry->offset,
1465                         entry->item.number, iterpool));
1466              SVN_ERR(svn_fs_fs__p2l_proto_index_add_entry(
1467                         context->proto_p2l_index, entry, iterpool));
1468            }
1469        }
1470    }
1471
1472  svn_pool_destroy(iterpool);
1473  context->pack_offset += finfo.size;
1474
1475  SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
1476
1477  return SVN_NO_ERROR;
1478}
1479
1480/* Logical addressing mode packing logic.
1481 *
1482 * Pack the revision shard starting at SHARD_REV in filesystem FS from
1483 * SHARD_DIR into the PACK_FILE_DIR, using POOL for allocations.  Limit
1484 * the extra memory consumption to MAX_MEM bytes.  CANCEL_FUNC and
1485 * CANCEL_BATON are what you think they are.
1486 */
1487static svn_error_t *
1488pack_log_addressed(svn_fs_t *fs,
1489                   const char *pack_file_dir,
1490                   const char *shard_dir,
1491                   svn_revnum_t shard_rev,
1492                   apr_size_t max_mem,
1493                   svn_cancel_func_t cancel_func,
1494                   void *cancel_baton,
1495                   apr_pool_t *pool)
1496{
1497  enum
1498    {
1499      /* estimated amount of memory used to represent one item in memory
1500       * during rev file packing */
1501      PER_ITEM_MEM = APR_ALIGN_DEFAULT(sizeof(path_order_t))
1502                   + APR_ALIGN_DEFAULT(2 *sizeof(void*))
1503                   + APR_ALIGN_DEFAULT(sizeof(reference_t))
1504                   + APR_ALIGN_DEFAULT(sizeof(svn_fs_fs__p2l_entry_t))
1505                   + 6 * sizeof(void*)
1506    };
1507
1508  int max_items;
1509  apr_array_header_t *max_ids;
1510  pack_context_t context = { 0 };
1511  int i;
1512  apr_size_t item_count = 0;
1513  apr_pool_t *iterpool = svn_pool_create(pool);
1514
1515  /* Prevent integer overflow.  We use apr arrays to process the items so
1516   * the maximum number of items is INT_MAX. */
1517    {
1518      apr_size_t temp = max_mem / PER_ITEM_MEM;
1519      SVN_ERR_ASSERT(temp <= INT_MAX);
1520      max_items = (int)temp;
1521    }
1522
1523  /* set up a pack context */
1524  SVN_ERR(initialize_pack_context(&context, fs, pack_file_dir, shard_dir,
1525                                  shard_rev, max_items, cancel_func,
1526                                  cancel_baton, pool));
1527
1528  /* phase 1: determine the size of the revisions to pack */
1529  SVN_ERR(svn_fs_fs__l2p_get_max_ids(&max_ids, fs, shard_rev,
1530                                     context.shard_end_rev - shard_rev,
1531                                     pool, pool));
1532
1533  /* pack revisions in ranges that don't exceed MAX_MEM */
1534  for (i = 0; i < max_ids->nelts; ++i)
1535    if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) + item_count <= max_items)
1536      {
1537        context.end_rev++;
1538      }
1539    else
1540      {
1541        svn_pool_clear(iterpool);
1542
1543        /* some unpacked revisions before this one? */
1544        if (context.start_rev < context.end_rev)
1545          {
1546            /* pack them intelligently (might be just 1 rev but still ...) */
1547            SVN_ERR(pack_range(&context, iterpool));
1548            SVN_ERR(reset_pack_context(&context, iterpool));
1549            item_count = 0;
1550          }
1551
1552        /* next revision range is to start with the current revision */
1553        context.start_rev = i + context.shard_rev;
1554        context.end_rev = context.start_rev + 1;
1555
1556        /* if this is a very large revision, we must place it as is */
1557        if (APR_ARRAY_IDX(max_ids, i, apr_uint64_t) > max_items)
1558          {
1559            SVN_ERR(append_revision(&context, iterpool));
1560            context.start_rev++;
1561          }
1562        else
1563          item_count += (apr_size_t)APR_ARRAY_IDX(max_ids, i, apr_uint64_t);
1564      }
1565
1566  /* non-empty revision range at the end? */
1567  if (context.start_rev < context.end_rev)
1568    SVN_ERR(pack_range(&context, iterpool));
1569
1570  /* last phase: finalize indexes and clean up */
1571  SVN_ERR(reset_pack_context(&context, iterpool));
1572  SVN_ERR(close_pack_context(&context, iterpool));
1573  svn_pool_destroy(iterpool);
1574
1575  return SVN_NO_ERROR;
1576}
1577
1578/* Given REV in FS, set *REV_OFFSET to REV's offset in the packed file.
1579   Use POOL for temporary allocations. */
1580svn_error_t *
1581svn_fs_fs__get_packed_offset(apr_off_t *rev_offset,
1582                             svn_fs_t *fs,
1583                             svn_revnum_t rev,
1584                             apr_pool_t *pool)
1585{
1586  fs_fs_data_t *ffd = fs->fsap_data;
1587  svn_stream_t *manifest_stream;
1588  svn_boolean_t is_cached;
1589  svn_revnum_t shard;
1590  apr_int64_t shard_pos;
1591  apr_array_header_t *manifest;
1592  apr_pool_t *iterpool;
1593
1594  shard = rev / ffd->max_files_per_dir;
1595
1596  /* position of the shard within the manifest */
1597  shard_pos = rev % ffd->max_files_per_dir;
1598
1599  /* fetch exactly that element into *rev_offset, if the manifest is found
1600     in the cache */
1601  SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
1602                                 ffd->packed_offset_cache, &shard,
1603                                 svn_fs_fs__get_sharded_offset, &shard_pos,
1604                                 pool));
1605
1606  if (is_cached)
1607      return SVN_NO_ERROR;
1608
1609  /* Open the manifest file. */
1610  SVN_ERR(svn_stream_open_readonly(&manifest_stream,
1611                                   svn_fs_fs__path_rev_packed(fs, rev,
1612                                                              PATH_MANIFEST,
1613                                                              pool),
1614                                   pool, pool));
1615
1616  /* While we're here, let's just read the entire manifest file into an array,
1617     so we can cache the entire thing. */
1618  iterpool = svn_pool_create(pool);
1619  manifest = apr_array_make(pool, ffd->max_files_per_dir, sizeof(apr_off_t));
1620  while (1)
1621    {
1622      svn_boolean_t eof;
1623      apr_int64_t val;
1624
1625      svn_pool_clear(iterpool);
1626      SVN_ERR(svn_fs_fs__read_number_from_stream(&val, &eof, manifest_stream,
1627                                                 iterpool));
1628      if (eof)
1629        break;
1630
1631      APR_ARRAY_PUSH(manifest, apr_off_t) = (apr_off_t)val;
1632    }
1633  svn_pool_destroy(iterpool);
1634
1635  *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
1636                              apr_off_t);
1637
1638  /* Close up shop and cache the array. */
1639  SVN_ERR(svn_stream_close(manifest_stream));
1640  return svn_cache__set(ffd->packed_offset_cache, &shard, manifest, pool);
1641}
1642
1643/* Packing logic for physical addresssing mode:
1644 * Simply concatenate all revision contents.
1645 *
1646 * Pack the revision shard starting at SHARD_REV containing exactly
1647 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1648 * using POOL for allocations.  CANCEL_FUNC and CANCEL_BATON are what you
1649 * think they are.
1650 */
1651static svn_error_t *
1652pack_phys_addressed(const char *pack_file_dir,
1653                    const char *shard_path,
1654                    svn_revnum_t start_rev,
1655                    int max_files_per_dir,
1656                    svn_cancel_func_t cancel_func,
1657                    void *cancel_baton,
1658                    apr_pool_t *pool)
1659{
1660  const char *pack_file_path, *manifest_file_path;
1661  apr_file_t *pack_file;
1662  apr_file_t *manifest_file;
1663  svn_stream_t *manifest_stream;
1664  svn_revnum_t end_rev, rev;
1665  apr_off_t next_offset;
1666  apr_pool_t *iterpool;
1667
1668  /* Some useful paths. */
1669  pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1670  manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, pool);
1671
1672  /* Create the new directory and pack file.
1673   * Use unbuffered apr_file_t since we're going to write using 16kb
1674   * chunks. */
1675  SVN_ERR(svn_io_file_open(&pack_file, pack_file_path,
1676                           APR_WRITE | APR_CREATE | APR_EXCL,
1677                           APR_OS_DEFAULT, pool));
1678
1679  /* Create the manifest file. */
1680  SVN_ERR(svn_io_file_open(&manifest_file, manifest_file_path,
1681                           APR_WRITE | APR_BUFFERED | APR_CREATE | APR_EXCL,
1682                           APR_OS_DEFAULT, pool));
1683  manifest_stream = svn_stream_from_aprfile2(manifest_file, TRUE, pool);
1684
1685  end_rev = start_rev + max_files_per_dir - 1;
1686  next_offset = 0;
1687  iterpool = svn_pool_create(pool);
1688
1689  /* Iterate over the revisions in this shard, squashing them together. */
1690  for (rev = start_rev; rev <= end_rev; rev++)
1691    {
1692      svn_stream_t *rev_stream;
1693      apr_finfo_t finfo;
1694      const char *path;
1695
1696      svn_pool_clear(iterpool);
1697
1698      /* Get the size of the file. */
1699      path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev),
1700                             iterpool);
1701      SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool));
1702
1703      /* build manifest */
1704      SVN_ERR(svn_stream_printf(manifest_stream, iterpool,
1705                                "%" APR_OFF_T_FMT "\n", next_offset));
1706      next_offset += finfo.size;
1707
1708      /* Copy all the bits from the rev file to the end of the pack file. */
1709      SVN_ERR(svn_stream_open_readonly(&rev_stream, path, iterpool, iterpool));
1710      SVN_ERR(svn_stream_copy3(rev_stream,
1711                               svn_stream_from_aprfile2(pack_file, TRUE, pool),
1712                               cancel_func, cancel_baton, iterpool));
1713    }
1714
1715  /* Close stream over APR file. */
1716  SVN_ERR(svn_stream_close(manifest_stream));
1717
1718  /* Ensure that pack file is written to disk. */
1719  SVN_ERR(svn_io_file_flush_to_disk(manifest_file, pool));
1720  SVN_ERR(svn_io_file_close(manifest_file, pool));
1721
1722  /* disallow write access to the manifest file */
1723  SVN_ERR(svn_io_set_file_read_only(manifest_file_path, FALSE, iterpool));
1724
1725  /* Ensure that pack file is written to disk. */
1726  SVN_ERR(svn_io_file_flush_to_disk(pack_file, pool));
1727  SVN_ERR(svn_io_file_close(pack_file, pool));
1728
1729  svn_pool_destroy(iterpool);
1730
1731  return SVN_NO_ERROR;
1732}
1733
1734/* In filesystem FS, pack the revision SHARD containing exactly
1735 * MAX_FILES_PER_DIR revisions from SHARD_PATH into the PACK_FILE_DIR,
1736 * using POOL for allocations.  Try to limit the amount of temporary
1737 * memory needed to MAX_MEM bytes.  CANCEL_FUNC and CANCEL_BATON are what
1738 * you think they are.
1739 *
1740 * If for some reason we detect a partial packing already performed, we
1741 * remove the pack file and start again.
1742 *
1743 * The actual packing will be done in a format-specific sub-function.
1744 */
1745static svn_error_t *
1746pack_rev_shard(svn_fs_t *fs,
1747               const char *pack_file_dir,
1748               const char *shard_path,
1749               apr_int64_t shard,
1750               int max_files_per_dir,
1751               apr_size_t max_mem,
1752               svn_cancel_func_t cancel_func,
1753               void *cancel_baton,
1754               apr_pool_t *pool)
1755{
1756  const char *pack_file_path;
1757  svn_revnum_t shard_rev = (svn_revnum_t) (shard * max_files_per_dir);
1758
1759  /* Some useful paths. */
1760  pack_file_path = svn_dirent_join(pack_file_dir, PATH_PACKED, pool);
1761
1762  /* Remove any existing pack file for this shard, since it is incomplete. */
1763  SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton,
1764                             pool));
1765
1766  /* Create the new directory and pack file. */
1767  SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, pool));
1768
1769  /* Index information files */
1770  if (svn_fs_fs__use_log_addressing(fs))
1771    SVN_ERR(pack_log_addressed(fs, pack_file_dir, shard_path, shard_rev,
1772                               max_mem, cancel_func, cancel_baton, pool));
1773  else
1774    SVN_ERR(pack_phys_addressed(pack_file_dir, shard_path, shard_rev,
1775                                max_files_per_dir, cancel_func,
1776                                cancel_baton, pool));
1777
1778  SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, pool));
1779  SVN_ERR(svn_io_set_file_read_only(pack_file_path, FALSE, pool));
1780
1781  return SVN_NO_ERROR;
1782}
1783
1784/* Baton struct used by pack_body(), pack_shard() and synced_pack_shard().
1785   These calls are nested and for every level additional fields will be
1786   available. */
1787struct pack_baton
1788{
1789  /* Valid when entering pack_body(). */
1790  svn_fs_t *fs;
1791  svn_fs_pack_notify_t notify_func;
1792  void *notify_baton;
1793  svn_cancel_func_t cancel_func;
1794  void *cancel_baton;
1795
1796  /* Additional entries valid when entering pack_shard(). */
1797  const char *revs_dir;
1798  const char *revsprops_dir;
1799  apr_int64_t shard;
1800
1801  /* Additional entries valid when entering synced_pack_shard(). */
1802  const char *rev_shard_path;
1803};
1804
1805
1806/* Part of the pack process that requires global (write) synchronization.
1807 * We pack the revision properties of the shard described by BATON and
1808 * In the file system at FS_PATH, pack the SHARD in REVS_DIR and replace
1809 * the non-packed revprop & rev shard folder(s) with the packed ones.
1810 * The packed rev folder has been created prior to calling this function.
1811 */
1812static svn_error_t *
1813synced_pack_shard(void *baton,
1814                  apr_pool_t *pool)
1815{
1816  struct pack_baton *pb = baton;
1817  fs_fs_data_t *ffd = pb->fs->fsap_data;
1818  const char *revprops_shard_path, *revprops_pack_file_dir;
1819
1820  /* if enabled, pack the revprops in an equivalent way */
1821  if (pb->revsprops_dir)
1822    {
1823      revprops_pack_file_dir = svn_dirent_join(pb->revsprops_dir,
1824                   apr_psprintf(pool,
1825                                "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1826                                pb->shard),
1827                   pool);
1828      revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1829                    apr_psprintf(pool, "%" APR_INT64_T_FMT, pb->shard),
1830                    pool);
1831
1832      SVN_ERR(svn_fs_fs__pack_revprops_shard(revprops_pack_file_dir,
1833                                             revprops_shard_path,
1834                                             pb->shard,
1835                                             ffd->max_files_per_dir,
1836                                             (int)(0.9*ffd->revprop_pack_size),
1837                                             ffd->compress_packed_revprops
1838                                               ? SVN__COMPRESSION_ZLIB_DEFAULT
1839                                               : SVN__COMPRESSION_NONE,
1840                                             pb->cancel_func,
1841                                             pb->cancel_baton,
1842                                             pool));
1843    }
1844
1845  /* Update the min-unpacked-rev file to reflect our newly packed shard. */
1846  SVN_ERR(svn_fs_fs__write_min_unpacked_rev(pb->fs,
1847                    (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir),
1848                    pool));
1849  ffd->min_unpacked_rev
1850    = (svn_revnum_t)((pb->shard + 1) * ffd->max_files_per_dir);
1851
1852  /* Finally, remove the existing shard directories.
1853   * For revprops, clean up older obsolete shards as well as they might
1854   * have been left over from an interrupted FS upgrade. */
1855  SVN_ERR(svn_io_remove_dir2(pb->rev_shard_path, TRUE,
1856                             pb->cancel_func, pb->cancel_baton, pool));
1857  if (pb->revsprops_dir)
1858    {
1859      svn_node_kind_t kind = svn_node_dir;
1860      apr_int64_t to_cleanup = pb->shard;
1861      do
1862        {
1863          SVN_ERR(svn_fs_fs__delete_revprops_shard(revprops_shard_path,
1864                                                   to_cleanup,
1865                                                   ffd->max_files_per_dir,
1866                                                   pb->cancel_func,
1867                                                   pb->cancel_baton,
1868                                                   pool));
1869
1870          /* If the previous shard exists, clean it up as well.
1871             Don't try to clean up shard 0 as it we can't tell quickly
1872             whether it actually needs cleaning up. */
1873          revprops_shard_path = svn_dirent_join(pb->revsprops_dir,
1874                      apr_psprintf(pool, "%" APR_INT64_T_FMT, --to_cleanup),
1875                      pool);
1876          SVN_ERR(svn_io_check_path(revprops_shard_path, &kind, pool));
1877        }
1878      while (kind == svn_node_dir && to_cleanup > 0);
1879    }
1880
1881  return SVN_NO_ERROR;
1882}
1883
1884/* Pack the shard described by BATON.
1885 *
1886 * If for some reason we detect a partial packing already performed,
1887 * we remove the pack file and start again.
1888 */
1889static svn_error_t *
1890pack_shard(struct pack_baton *baton,
1891           apr_pool_t *pool)
1892{
1893  fs_fs_data_t *ffd = baton->fs->fsap_data;
1894  const char *rev_pack_file_dir;
1895
1896  /* Notify caller we're starting to pack this shard. */
1897  if (baton->notify_func)
1898    SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1899                               svn_fs_pack_notify_start, pool));
1900
1901  /* Some useful paths. */
1902  rev_pack_file_dir = svn_dirent_join(baton->revs_dir,
1903                  apr_psprintf(pool,
1904                               "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD,
1905                               baton->shard),
1906                  pool);
1907  baton->rev_shard_path = svn_dirent_join(baton->revs_dir,
1908                                          apr_psprintf(pool,
1909                                                       "%" APR_INT64_T_FMT,
1910                                                       baton->shard),
1911                                          pool);
1912
1913  /* pack the revision content */
1914  SVN_ERR(pack_rev_shard(baton->fs, rev_pack_file_dir, baton->rev_shard_path,
1915                         baton->shard, ffd->max_files_per_dir,
1916                         DEFAULT_MAX_MEM, baton->cancel_func,
1917                         baton->cancel_baton, pool));
1918
1919  /* For newer repo formats, we only acquired the pack lock so far.
1920     Before modifying the repo state by switching over to the packed
1921     data, we need to acquire the global (write) lock. */
1922  if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
1923    SVN_ERR(svn_fs_fs__with_write_lock(baton->fs, synced_pack_shard, baton,
1924                                       pool));
1925  else
1926    SVN_ERR(synced_pack_shard(baton, pool));
1927
1928  /* Notify caller we're starting to pack this shard. */
1929  if (baton->notify_func)
1930    SVN_ERR(baton->notify_func(baton->notify_baton, baton->shard,
1931                               svn_fs_pack_notify_end, pool));
1932
1933  return SVN_NO_ERROR;
1934}
1935
1936/* The work-horse for svn_fs_fs__pack, called with the FS write lock.
1937   This implements the svn_fs_fs__with_write_lock() 'body' callback
1938   type.  BATON is a 'struct pack_baton *'.
1939
1940   WARNING: if you add a call to this function, please note:
1941     The code currently assumes that any piece of code running with
1942     the write-lock set can rely on the ffd->min_unpacked_rev and
1943     ffd->min_unpacked_revprop caches to be up-to-date (and, by
1944     extension, on not having to use a retry when calling
1945     svn_fs_fs__path_rev_absolute() and friends).  If you add a call
1946     to this function, consider whether you have to call
1947     svn_fs_fs__update_min_unpacked_rev().
1948     See this thread: http://thread.gmane.org/1291206765.3782.3309.camel@edith
1949 */
1950static svn_error_t *
1951pack_body(void *baton,
1952          apr_pool_t *pool)
1953{
1954  struct pack_baton *pb = baton;
1955  fs_fs_data_t *ffd = pb->fs->fsap_data;
1956  apr_int64_t completed_shards;
1957  svn_revnum_t youngest;
1958  apr_pool_t *iterpool;
1959
1960  /* If the repository isn't a new enough format, we don't support packing.
1961     Return a friendly error to that effect. */
1962  if (ffd->format < SVN_FS_FS__MIN_PACKED_FORMAT)
1963    return svn_error_createf(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
1964      _("FSFS format (%d) too old to pack; please upgrade the filesystem."),
1965      ffd->format);
1966
1967  /* If we aren't using sharding, we can't do any packing, so quit. */
1968  if (!ffd->max_files_per_dir)
1969    return SVN_NO_ERROR;
1970
1971  SVN_ERR(svn_fs_fs__read_min_unpacked_rev(&ffd->min_unpacked_rev, pb->fs,
1972                                           pool));
1973
1974  SVN_ERR(svn_fs_fs__youngest_rev(&youngest, pb->fs, pool));
1975  completed_shards = (youngest + 1) / ffd->max_files_per_dir;
1976
1977  /* See if we've already completed all possible shards thus far. */
1978  if (ffd->min_unpacked_rev == (completed_shards * ffd->max_files_per_dir))
1979    return SVN_NO_ERROR;
1980
1981  pb->revs_dir = svn_dirent_join(pb->fs->path, PATH_REVS_DIR, pool);
1982  if (ffd->format >= SVN_FS_FS__MIN_PACKED_REVPROP_FORMAT)
1983    pb->revsprops_dir = svn_dirent_join(pb->fs->path, PATH_REVPROPS_DIR,
1984                                        pool);
1985
1986  iterpool = svn_pool_create(pool);
1987  for (pb->shard = ffd->min_unpacked_rev / ffd->max_files_per_dir;
1988       pb->shard < completed_shards;
1989       pb->shard++)
1990    {
1991      svn_pool_clear(iterpool);
1992
1993      if (pb->cancel_func)
1994        SVN_ERR(pb->cancel_func(pb->cancel_baton));
1995
1996      SVN_ERR(pack_shard(pb, iterpool));
1997    }
1998
1999  svn_pool_destroy(iterpool);
2000  return SVN_NO_ERROR;
2001}
2002
2003svn_error_t *
2004svn_fs_fs__pack(svn_fs_t *fs,
2005                svn_fs_pack_notify_t notify_func,
2006                void *notify_baton,
2007                svn_cancel_func_t cancel_func,
2008                void *cancel_baton,
2009                apr_pool_t *pool)
2010{
2011  struct pack_baton pb = { 0 };
2012  fs_fs_data_t *ffd = fs->fsap_data;
2013  svn_error_t *err;
2014
2015  pb.fs = fs;
2016  pb.notify_func = notify_func;
2017  pb.notify_baton = notify_baton;
2018  pb.cancel_func = cancel_func;
2019  pb.cancel_baton = cancel_baton;
2020
2021  if (ffd->format >= SVN_FS_FS__MIN_PACK_LOCK_FORMAT)
2022    {
2023      /* Newer repositories provide a pack operation specific lock.
2024         Acquire it to prevent concurrent packs.
2025
2026         Since the file lock's lifetime is bound to a pool, we create a
2027         separate subpool here to release the lock immediately after the
2028         operation finished.
2029       */
2030      err = svn_fs_fs__with_pack_lock(fs, pack_body, &pb, pool);
2031    }
2032  else
2033    {
2034      /* Use the global write lock for older repos. */
2035      err = svn_fs_fs__with_write_lock(fs, pack_body, &pb, pool);
2036    }
2037
2038  return svn_error_trace(err);
2039}
2040