1/* cached_data.c --- cached (read) access to FSX data
2 *
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 */
22
23#include "cached_data.h"
24
25#include <assert.h>
26
27#include "svn_hash.h"
28#include "svn_ctype.h"
29#include "svn_sorts.h"
30
31#include "private/svn_io_private.h"
32#include "private/svn_sorts_private.h"
33#include "private/svn_string_private.h"
34#include "private/svn_subr_private.h"
35#include "private/svn_temp_serializer.h"
36
37#include "fs_x.h"
38#include "low_level.h"
39#include "util.h"
40#include "pack.h"
41#include "temp_serializer.h"
42#include "index.h"
43#include "changes.h"
44#include "noderevs.h"
45#include "reps.h"
46
47#include "../libsvn_fs/fs-loader.h"
48#include "../libsvn_delta/delta.h"  /* for SVN_DELTA_WINDOW_SIZE */
49
50#include "svn_private_config.h"
51
52/* forward-declare. See implementation for the docstring */
53static svn_error_t *
54block_read(void **result,
55           svn_fs_t *fs,
56           const svn_fs_x__id_t *id,
57           svn_fs_x__revision_file_t *revision_file,
58           void *baton,
59           apr_pool_t *result_pool,
60           apr_pool_t *scratch_pool);
61
62
63/* Defined this to enable access logging via dgb__log_access
64#define SVN_FS_X__LOG_ACCESS
65*/
66
67/* When SVN_FS_X__LOG_ACCESS has been defined, write a line to console
68 * showing where ID is located in FS and use ITEM to show details on it's
69 * contents if not NULL.  Use SCRATCH_POOL for temporary allocations.
70 */
71static svn_error_t *
72dbg__log_access(svn_fs_t *fs,
73                const svn_fs_x__id_t *id,
74                void *item,
75                apr_uint32_t item_type,
76                apr_pool_t *scratch_pool)
77{
78  /* no-op if this macro is not defined */
79#ifdef SVN_FS_X__LOG_ACCESS
80  svn_fs_x__data_t *ffd = fs->fsap_data;
81  apr_off_t offset = -1;
82  apr_off_t end_offset = 0;
83  apr_uint32_t sub_item = 0;
84  svn_fs_x__p2l_entry_t *entry = NULL;
85  static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
86                                "node ", "chgs ", "rep  ", "c:", "n:", "r:"};
87  const char *description = "";
88  const char *type = types[item_type];
89  const char *pack = "";
90  svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
91
92  /* determine rev / pack file offset */
93  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, id, scratch_pool));
94
95  /* constructing the pack file description */
96  if (revision < ffd->min_unpacked_rev)
97    pack = apr_psprintf(scratch_pool, "%4ld|",
98                        revision / ffd->max_files_per_dir);
99
100  /* construct description if possible */
101  if (item_type == SVN_FS_X__ITEM_TYPE_NODEREV && item != NULL)
102    {
103      svn_fs_x__noderev_t *node = item;
104      const char *data_rep
105        = node->data_rep
106        ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
107                       svn_fs_x__get_revnum(node->data_rep->id.change_set),
108                       node->data_rep->id.number)
109        : "";
110      const char *prop_rep
111        = node->prop_rep
112        ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
113                       svn_fs_x__get_revnum(node->prop_rep->id.change_set),
114                       node->prop_rep->id.number)
115        : "";
116      description = apr_psprintf(scratch_pool, "%s   (pc=%d%s%s)",
117                                 node->created_path,
118                                 node->predecessor_count,
119                                 data_rep,
120                                 prop_rep);
121    }
122  else if (item_type == SVN_FS_X__ITEM_TYPE_ANY_REP)
123    {
124      svn_fs_x__rep_header_t *header = item;
125      if (header == NULL)
126        description = "  (txdelta window)";
127      else if (header->type == svn_fs_x__rep_self_delta)
128        description = "  DELTA";
129      else
130        description = apr_psprintf(scratch_pool,
131                                   "  DELTA against %ld/%" APR_UINT64_T_FMT,
132                                   header->base_revision,
133                                   header->base_item_index);
134    }
135  else if (item_type == SVN_FS_X__ITEM_TYPE_CHANGES && item != NULL)
136    {
137      apr_array_header_t *changes = item;
138      switch (changes->nelts)
139        {
140          case 0:  description = "  no change";
141                   break;
142          case 1:  description = "  1 change";
143                   break;
144          default: description = apr_psprintf(scratch_pool, "  %d changes",
145                                              changes->nelts);
146        }
147    }
148
149  /* reverse index lookup: get item description in ENTRY */
150  SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, revision, offset,
151                                      scratch_pool));
152  if (entry)
153    {
154      /* more details */
155      end_offset = offset + entry->size;
156      type = types[entry->type];
157
158      /* merge the sub-item number with the container type */
159      if (   entry->type == SVN_FS_X__ITEM_TYPE_CHANGES_CONT
160          || entry->type == SVN_FS_X__ITEM_TYPE_NODEREVS_CONT
161          || entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT)
162        type = apr_psprintf(scratch_pool, "%s%-3d", type, sub_item);
163    }
164
165  /* line output */
166  printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT"   %s\n",
167          pack, (long)(offset / ffd->block_size),
168          (long)(offset % ffd->block_size),
169          (long)(end_offset / ffd->block_size),
170          (long)(end_offset % ffd->block_size),
171          type, revision, id->number, description);
172
173#endif
174
175  return SVN_NO_ERROR;
176}
177
178/* Open the revision file for the item given by ID in filesystem FS and
179   store the newly opened file in FILE.  Seek to the item's location before
180   returning.
181
182   Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
183static svn_error_t *
184open_and_seek_revision(svn_fs_x__revision_file_t **file,
185                       svn_fs_t *fs,
186                       const svn_fs_x__id_t *id,
187                       apr_pool_t *result_pool,
188                       apr_pool_t *scratch_pool)
189{
190  svn_fs_x__revision_file_t *rev_file;
191  apr_off_t offset = -1;
192  apr_uint32_t sub_item = 0;
193  svn_revnum_t rev = svn_fs_x__get_revnum(id->change_set);
194
195  SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
196
197  SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, rev, result_pool));
198  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, id,
199                                scratch_pool));
200  SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, offset));
201
202  *file = rev_file;
203
204  return SVN_NO_ERROR;
205}
206
207/* Open the representation REP for a node-revision in filesystem FS, seek
208   to its position and store the newly opened file in FILE.
209
210   Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
211static svn_error_t *
212open_and_seek_transaction(svn_fs_x__revision_file_t **file,
213                          svn_fs_t *fs,
214                          svn_fs_x__representation_t *rep,
215                          apr_pool_t *result_pool,
216                          apr_pool_t *scratch_pool)
217{
218  apr_off_t offset;
219  apr_uint32_t sub_item = 0;
220  apr_int64_t txn_id = svn_fs_x__get_txn_id(rep->id.change_set);
221
222  SVN_ERR(svn_fs_x__rev_file_open_proto_rev(file, fs, txn_id, result_pool,
223                                            scratch_pool));
224
225  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, *file, &rep->id,
226                                scratch_pool));
227  SVN_ERR(svn_fs_x__rev_file_seek(*file, NULL, offset));
228
229  return SVN_NO_ERROR;
230}
231
232/* Given a node-id ID, and a representation REP in filesystem FS, open
233   the correct file and seek to the correction location.  Store this
234   file in *FILE_P.
235
236   Allocate the result in RESULT_POOL and temporaries in SCRATCH_POOL. */
237static svn_error_t *
238open_and_seek_representation(svn_fs_x__revision_file_t **file_p,
239                             svn_fs_t *fs,
240                             svn_fs_x__representation_t *rep,
241                             apr_pool_t *result_pool,
242                             apr_pool_t *scratch_pool)
243{
244  if (svn_fs_x__is_revision(rep->id.change_set))
245    return open_and_seek_revision(file_p, fs, &rep->id, result_pool,
246                                  scratch_pool);
247  else
248    return open_and_seek_transaction(file_p, fs, rep, result_pool,
249                                     scratch_pool);
250}
251
252
253
254static svn_error_t *
255err_dangling_id(svn_fs_t *fs,
256                const svn_fs_x__id_t *id)
257{
258  svn_string_t *id_str = svn_fs_x__id_unparse(id, fs->pool);
259  return svn_error_createf
260    (SVN_ERR_FS_ID_NOT_FOUND, 0,
261     _("Reference to non-existent node '%s' in filesystem '%s'"),
262     id_str->data, fs->path);
263}
264
265/* Get the node-revision for the node ID in FS.
266   Set *NODEREV_P to the new node-revision structure, allocated in POOL.
267   See svn_fs_x__get_node_revision, which wraps this and adds another
268   error. */
269static svn_error_t *
270get_node_revision_body(svn_fs_x__noderev_t **noderev_p,
271                       svn_fs_t *fs,
272                       const svn_fs_x__id_t *id,
273                       apr_pool_t *result_pool,
274                       apr_pool_t *scratch_pool)
275{
276  svn_error_t *err;
277  svn_boolean_t is_cached = FALSE;
278  svn_fs_x__data_t *ffd = fs->fsap_data;
279
280  if (svn_fs_x__is_txn(id->change_set))
281    {
282      apr_file_t *file;
283      svn_stream_t *stream;
284
285      /* This is a transaction node-rev.  Its storage logic is very
286         different from that of rev / pack files. */
287      err = svn_io_file_open(&file,
288                             svn_fs_x__path_txn_node_rev(fs, id,
289                                                         scratch_pool,
290                                                         scratch_pool),
291                             APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
292                             scratch_pool);
293      if (err && APR_STATUS_IS_ENOENT(err->apr_err))
294        {
295          svn_error_clear(err);
296          return svn_error_trace(err_dangling_id(fs, id));
297        }
298      else if (err)
299        {
300          return svn_error_trace(err);
301        }
302
303      /* Be sure to close the file ASAP. */
304      stream = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
305      SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream,
306                                     result_pool, scratch_pool));
307    }
308  else
309    {
310      svn_fs_x__revision_file_t *revision_file;
311
312      /* noderevs in rev / pack files can be cached */
313      svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
314      svn_fs_x__pair_cache_key_t key;
315
316      SVN_ERR(svn_fs_x__rev_file_init(&revision_file, fs, revision,
317                                      scratch_pool));
318
319      /* First, try a noderevs container cache lookup. */
320      if (   svn_fs_x__is_packed_rev(fs, revision)
321          && ffd->noderevs_container_cache)
322        {
323          apr_off_t offset;
324          apr_uint32_t sub_item;
325          SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, revision_file,
326                                        id, scratch_pool));
327          key.revision = svn_fs_x__packed_base_rev(fs, revision);
328          key.second = offset;
329
330          SVN_ERR(svn_cache__get_partial((void **)noderev_p, &is_cached,
331                                         ffd->noderevs_container_cache, &key,
332                                         svn_fs_x__noderevs_get_func,
333                                         &sub_item, result_pool));
334          if (is_cached)
335            return SVN_NO_ERROR;
336        }
337
338      key.revision = revision;
339      key.second = id->number;
340
341      /* Not found or not applicable. Try a noderev cache lookup.
342       * If that succeeds, we are done here. */
343      SVN_ERR(svn_cache__get((void **) noderev_p,
344                             &is_cached,
345                             ffd->node_revision_cache,
346                             &key,
347                             result_pool));
348      if (is_cached)
349        return SVN_NO_ERROR;
350
351      /* block-read will parse the whole block and will also return
352         the one noderev that we need right now. */
353      SVN_ERR(block_read((void **)noderev_p, fs,
354                         id,
355                         revision_file,
356                         NULL,
357                         result_pool,
358                         scratch_pool));
359      SVN_ERR(svn_fs_x__close_revision_file(revision_file));
360    }
361
362  return SVN_NO_ERROR;
363}
364
365svn_error_t *
366svn_fs_x__get_node_revision(svn_fs_x__noderev_t **noderev_p,
367                            svn_fs_t *fs,
368                            const svn_fs_x__id_t *id,
369                            apr_pool_t *result_pool,
370                            apr_pool_t *scratch_pool)
371{
372  svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
373                                            result_pool, scratch_pool);
374  if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
375    {
376      svn_string_t *id_string = svn_fs_x__id_unparse(id, scratch_pool);
377      return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
378                               "Corrupt node-revision '%s'",
379                               id_string->data);
380    }
381
382  SVN_ERR(dbg__log_access(fs, id, *noderev_p,
383                          SVN_FS_X__ITEM_TYPE_NODEREV, scratch_pool));
384
385  return svn_error_trace(err);
386}
387
388
389svn_error_t *
390svn_fs_x__get_mergeinfo_count(apr_int64_t *count,
391                              svn_fs_t *fs,
392                              const svn_fs_x__id_t *id,
393                              apr_pool_t *scratch_pool)
394{
395  svn_fs_x__noderev_t *noderev;
396
397  /* If we want a full acccess log, we need to provide full data and
398     cannot take shortcuts here. */
399#if !defined(SVN_FS_X__LOG_ACCESS)
400
401  /* First, try a noderevs container cache lookup. */
402  if (! svn_fs_x__is_txn(id->change_set))
403    {
404      /* noderevs in rev / pack files can be cached */
405      svn_fs_x__data_t *ffd = fs->fsap_data;
406      svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
407
408      svn_fs_x__revision_file_t *rev_file;
409      SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision,
410                                      scratch_pool));
411
412      if (   svn_fs_x__is_packed_rev(fs, revision)
413          && ffd->noderevs_container_cache)
414        {
415          svn_fs_x__pair_cache_key_t key;
416          apr_off_t offset;
417          apr_uint32_t sub_item;
418          svn_boolean_t is_cached;
419
420          SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file,
421                                        id, scratch_pool));
422          key.revision = svn_fs_x__packed_base_rev(fs, revision);
423          key.second = offset;
424
425          SVN_ERR(svn_cache__get_partial((void **)count, &is_cached,
426                                         ffd->noderevs_container_cache, &key,
427                                         svn_fs_x__mergeinfo_count_get_func,
428                                         &sub_item, scratch_pool));
429          if (is_cached)
430            return SVN_NO_ERROR;
431        }
432    }
433#endif
434
435  /* fallback to the naive implementation handling all edge cases */
436  SVN_ERR(svn_fs_x__get_node_revision(&noderev, fs, id, scratch_pool,
437                                      scratch_pool));
438  *count = noderev->mergeinfo_count;
439
440  return SVN_NO_ERROR;
441}
442
443/* Describes a lazily opened rev / pack file.  Instances will be shared
444   between multiple instances of rep_state_t. */
445typedef struct shared_file_t
446{
447  /* The opened file. NULL while file is not open, yet. */
448  svn_fs_x__revision_file_t *rfile;
449
450  /* file system to open the file in */
451  svn_fs_t *fs;
452
453  /* a revision contained in the FILE.  Since this file may be shared,
454     that value may be different from REP_STATE_T->REVISION. */
455  svn_revnum_t revision;
456
457  /* pool to use when creating the FILE.  This guarantees that the file
458     remains open / valid beyond the respective local context that required
459     the file to be opened eventually. */
460  apr_pool_t *pool;
461} shared_file_t;
462
463/* Represents where in the current svndiff data block each
464   representation is. */
465typedef struct rep_state_t
466{
467                    /* shared lazy-open rev/pack file structure */
468  shared_file_t *sfile;
469                    /* The txdelta window cache to use or NULL. */
470  svn_cache__t *window_cache;
471                    /* Caches un-deltified windows. May be NULL. */
472  svn_cache__t *combined_cache;
473                    /* ID addressing the representation */
474  svn_fs_x__id_t rep_id;
475                    /* length of the header at the start of the rep.
476                       0 iff this is rep is stored in a container
477                       (i.e. does not have a header) */
478  apr_size_t header_size;
479  apr_off_t start;  /* The starting offset for the raw
480                       svndiff data minus header.
481                       -1 if the offset is yet unknown. */
482                    /* sub-item index in case the rep is containered */
483  apr_uint32_t sub_item;
484  apr_off_t current;/* The current offset relative to START. */
485  apr_off_t size;   /* The on-disk size of the representation. */
486  int ver;          /* If a delta, what svndiff version?
487                       -1 for unknown delta version. */
488  int chunk_index;  /* number of the window to read */
489} rep_state_t;
490
491/* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
492static svn_error_t*
493auto_open_shared_file(shared_file_t *file)
494{
495  if (file->rfile == NULL)
496    SVN_ERR(svn_fs_x__rev_file_init(&file->rfile, file->fs,
497                                    file->revision, file->pool));
498
499  return SVN_NO_ERROR;
500}
501
502/* Set RS->START to the begin of the representation raw in RS->SFILE->RFILE,
503   if that hasn't been done yet.  Use SCRATCH_POOL for temporary allocations.
504 */
505static svn_error_t*
506auto_set_start_offset(rep_state_t *rs,
507                      apr_pool_t *scratch_pool)
508{
509  if (rs->start == -1)
510    {
511      SVN_ERR(svn_fs_x__item_offset(&rs->start, &rs->sub_item,
512                                    rs->sfile->fs, rs->sfile->rfile,
513                                    &rs->rep_id, scratch_pool));
514      rs->start += rs->header_size;
515    }
516
517  return SVN_NO_ERROR;
518}
519
520/* Set RS->VER depending on what is found in the already open RS->FILE->FILE
521   if the diff version is still unknown.  Use SCRATCH_POOL for temporary
522   allocations.
523 */
524static svn_error_t*
525auto_read_diff_version(rep_state_t *rs,
526                       apr_pool_t *scratch_pool)
527{
528  if (rs->ver == -1)
529    {
530      char buf[4];
531      SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, rs->start));
532      SVN_ERR(svn_fs_x__rev_file_read(rs->sfile->rfile, buf, sizeof(buf)));
533
534      /* ### Layering violation */
535      if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
536        return svn_error_create
537          (SVN_ERR_FS_CORRUPT, NULL,
538           _("Malformed svndiff data in representation"));
539      rs->ver = buf[3];
540
541      rs->chunk_index = 0;
542      rs->current = 4;
543    }
544
545  return SVN_NO_ERROR;
546}
547
548/* See create_rep_state, which wraps this and adds another error. */
549static svn_error_t *
550create_rep_state_body(rep_state_t **rep_state,
551                      svn_fs_x__rep_header_t **rep_header,
552                      shared_file_t **shared_file,
553                      svn_fs_x__representation_t *rep,
554                      svn_fs_t *fs,
555                      apr_pool_t *result_pool,
556                      apr_pool_t *scratch_pool)
557{
558  svn_fs_x__data_t *ffd = fs->fsap_data;
559  rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
560  svn_fs_x__rep_header_t *rh;
561  svn_boolean_t is_cached = FALSE;
562  svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
563  apr_uint64_t estimated_window_storage;
564
565  /* If the hint is
566   * - given,
567   * - refers to a valid revision,
568   * - refers to a packed revision,
569   * - as does the rep we want to read, and
570   * - refers to the same pack file as the rep
571   * we can re-use the same, already open file object
572   */
573  svn_boolean_t reuse_shared_file
574    =    shared_file && *shared_file && (*shared_file)->rfile
575      && SVN_IS_VALID_REVNUM((*shared_file)->revision)
576      && (*shared_file)->revision < ffd->min_unpacked_rev
577      && revision < ffd->min_unpacked_rev
578      && (   ((*shared_file)->revision / ffd->max_files_per_dir)
579          == (revision / ffd->max_files_per_dir));
580
581  svn_fs_x__representation_cache_key_t key = { 0 };
582  key.revision = revision;
583  key.is_packed = revision < ffd->min_unpacked_rev;
584  key.item_index = rep->id.number;
585
586  /* continue constructing RS and RA */
587  rs->size = rep->size;
588  rs->rep_id = rep->id;
589  rs->ver = -1;
590  rs->start = -1;
591
592  /* Very long files stored as self-delta will produce a huge number of
593     delta windows.  Don't cache them lest we don't thrash the cache.
594     Since we don't know the depth of the delta chain, let's assume, the
595     whole contents get rewritten 3 times.
596   */
597  estimated_window_storage
598    = 4 * (  (rep->expanded_size ? rep->expanded_size : rep->size)
599           + SVN_DELTA_WINDOW_SIZE);
600  estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
601
602  rs->window_cache =    ffd->txdelta_window_cache
603                     && svn_cache__is_cachable(ffd->txdelta_window_cache,
604                                       (apr_size_t)estimated_window_storage)
605                   ? ffd->txdelta_window_cache
606                   : NULL;
607  rs->combined_cache =    ffd->combined_window_cache
608                       && svn_cache__is_cachable(ffd->combined_window_cache,
609                                       (apr_size_t)estimated_window_storage)
610                     ? ffd->combined_window_cache
611                     : NULL;
612
613  /* cache lookup, i.e. skip reading the rep header if possible */
614  if (SVN_IS_VALID_REVNUM(revision))
615    SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
616                           ffd->rep_header_cache, &key, result_pool));
617
618  /* initialize the (shared) FILE member in RS */
619  if (reuse_shared_file)
620    {
621      rs->sfile = *shared_file;
622    }
623  else
624    {
625      shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
626      file->revision = revision;
627      file->pool = result_pool;
628      file->fs = fs;
629      rs->sfile = file;
630
631      /* remember the current file, if suggested by the caller */
632      if (shared_file)
633        *shared_file = file;
634    }
635
636  /* read rep header, if necessary */
637  if (!is_cached)
638    {
639      svn_stream_t *stream;
640
641      /* we will need the on-disk location for non-txn reps */
642      apr_off_t offset;
643      svn_boolean_t in_container = TRUE;
644
645      /* ensure file is open and navigate to the start of rep header */
646      if (reuse_shared_file)
647        {
648          /* ... we can re-use the same, already open file object.
649           * This implies that we don't read from a txn.
650           */
651          rs->sfile = *shared_file;
652          SVN_ERR(auto_open_shared_file(rs->sfile));
653        }
654      else
655        {
656          /* otherwise, create a new file object.  May or may not be
657           * an in-txn file.
658           */
659          SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
660                                               result_pool, scratch_pool));
661        }
662
663      if (SVN_IS_VALID_REVNUM(revision))
664        {
665          apr_uint32_t sub_item;
666
667          SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs,
668                                        rs->sfile->rfile, &rep->id,
669                                        scratch_pool));
670
671          /* is rep stored in some star-deltified container? */
672          if (sub_item == 0)
673            {
674              svn_fs_x__p2l_entry_t *entry;
675              SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rs->sfile->rfile,
676                                                 revision, offset,
677                                                 scratch_pool, scratch_pool));
678              in_container = entry->type == SVN_FS_X__ITEM_TYPE_REPS_CONT;
679            }
680
681          if (in_container)
682            {
683              /* construct a container rep header */
684              *rep_header = apr_pcalloc(result_pool, sizeof(**rep_header));
685              (*rep_header)->type = svn_fs_x__rep_container;
686
687              /* exit to caller */
688              *rep_state = rs;
689              return SVN_NO_ERROR;
690            }
691
692          SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset));
693        }
694
695      SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile));
696      SVN_ERR(svn_fs_x__read_rep_header(&rh, stream,
697                                        result_pool, scratch_pool));
698      SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile));
699
700      /* populate the cache if appropriate */
701      if (SVN_IS_VALID_REVNUM(revision))
702        {
703          SVN_ERR(block_read(NULL, fs, &rs->rep_id, rs->sfile->rfile, NULL,
704                             result_pool, scratch_pool));
705          SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
706                                 scratch_pool));
707        }
708    }
709
710  /* finalize */
711  SVN_ERR(dbg__log_access(fs, &rs->rep_id, rh, SVN_FS_X__ITEM_TYPE_ANY_REP,
712                          scratch_pool));
713
714  rs->header_size = rh->header_size;
715  *rep_state = rs;
716  *rep_header = rh;
717
718  rs->chunk_index = 0;
719
720  /* skip "SVNx" diff marker */
721  rs->current = 4;
722
723  return SVN_NO_ERROR;
724}
725
726/* Read the rep args for REP in filesystem FS and create a rep_state
727   for reading the representation.  Return the rep_state in *REP_STATE
728   and the rep args in *REP_ARGS, both allocated in POOL.
729
730   When reading multiple reps, i.e. a skip delta chain, you may provide
731   non-NULL SHARED_FILE.  (If SHARED_FILE is not NULL, in the first
732   call it should be a pointer to NULL.)  The function will use this
733   variable to store the previous call results and tries to re-use it.
734   This may result in significant savings in I/O for packed files and
735   number of open file handles.
736 */
737static svn_error_t *
738create_rep_state(rep_state_t **rep_state,
739                 svn_fs_x__rep_header_t **rep_header,
740                 shared_file_t **shared_file,
741                 svn_fs_x__representation_t *rep,
742                 svn_fs_t *fs,
743                 apr_pool_t *result_pool,
744                 apr_pool_t *scratch_pool)
745{
746  svn_error_t *err = create_rep_state_body(rep_state, rep_header,
747                                           shared_file, rep, fs,
748                                           result_pool, scratch_pool);
749  if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
750    {
751      /* ### This always returns "-1" for transaction reps, because
752         ### this particular bit of code doesn't know if the rep is
753         ### stored in the protorev or in the mutable area (for props
754         ### or dir contents).  It is pretty rare for FSX to *read*
755         ### from the protorev file, though, so this is probably OK.
756         ### And anyone going to debug corruption errors is probably
757         ### going to jump straight to this comment anyway! */
758      return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
759                               "Corrupt representation '%s'",
760                               rep
761                               ? svn_fs_x__unparse_representation
762                                   (rep, TRUE, scratch_pool,
763                                    scratch_pool)->data
764                               : "(null)");
765    }
766  /* ### Call representation_string() ? */
767  return svn_error_trace(err);
768}
769
770svn_error_t *
771svn_fs_x__check_rep(svn_fs_x__representation_t *rep,
772                    svn_fs_t *fs,
773                    apr_pool_t *scratch_pool)
774{
775  apr_off_t offset;
776  apr_uint32_t sub_item;
777  svn_fs_x__p2l_entry_t *entry;
778  svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
779
780  svn_fs_x__revision_file_t *rev_file;
781  SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision, scratch_pool));
782
783  /* Does REP->ID refer to an actual item? Which one is it? */
784  SVN_ERR(svn_fs_x__item_offset(&offset, &sub_item, fs, rev_file, &rep->id,
785                                scratch_pool));
786
787  /* What is the type of that item? */
788  SVN_ERR(svn_fs_x__p2l_entry_lookup(&entry, fs, rev_file, revision, offset,
789                                     scratch_pool, scratch_pool));
790
791  /* Verify that we've got an item that is actually a representation. */
792  if (   entry == NULL
793      || (   entry->type != SVN_FS_X__ITEM_TYPE_FILE_REP
794          && entry->type != SVN_FS_X__ITEM_TYPE_DIR_REP
795          && entry->type != SVN_FS_X__ITEM_TYPE_FILE_PROPS
796          && entry->type != SVN_FS_X__ITEM_TYPE_DIR_PROPS
797          && entry->type != SVN_FS_X__ITEM_TYPE_REPS_CONT))
798    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
799                             _("No representation found at offset %s "
800                               "for item %s in revision %ld"),
801                             apr_off_t_toa(scratch_pool, offset),
802                             apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT,
803                                          rep->id.number),
804                             revision);
805
806  return SVN_NO_ERROR;
807}
808
809/* .
810   Do any allocations in POOL. */
811svn_error_t *
812svn_fs_x__rep_chain_length(int *chain_length,
813                           int *shard_count,
814                           svn_fs_x__representation_t *rep,
815                           svn_fs_t *fs,
816                           apr_pool_t *scratch_pool)
817{
818  svn_fs_x__data_t *ffd = fs->fsap_data;
819  svn_revnum_t shard_size = ffd->max_files_per_dir;
820  svn_boolean_t is_delta = FALSE;
821  int count = 0;
822  int shards = 1;
823  svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
824  svn_revnum_t last_shard = revision / shard_size;
825
826  /* Note that this iteration pool will be used in a non-standard way.
827   * To reuse open file handles between iterations (e.g. while within the
828   * same pack file), we only clear this pool once in a while instead of
829   * at the start of each iteration. */
830  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
831
832  /* Check whether the length of the deltification chain is acceptable.
833   * Otherwise, shared reps may form a non-skipping delta chain in
834   * extreme cases. */
835  svn_fs_x__representation_t base_rep = *rep;
836
837  /* re-use open files between iterations */
838  shared_file_t *file_hint = NULL;
839
840  svn_fs_x__rep_header_t *header;
841
842  /* follow the delta chain towards the end but for at most
843   * MAX_CHAIN_LENGTH steps. */
844  do
845    {
846      rep_state_t *rep_state;
847      revision = svn_fs_x__get_revnum(base_rep.id.change_set);
848      if (revision / shard_size != last_shard)
849        {
850          last_shard = revision / shard_size;
851          ++shards;
852        }
853
854      SVN_ERR(create_rep_state_body(&rep_state,
855                                    &header,
856                                    &file_hint,
857                                    &base_rep,
858                                    fs,
859                                    iterpool,
860                                    iterpool));
861
862      base_rep.id.change_set
863        = svn_fs_x__change_set_by_rev(header->base_revision);
864      base_rep.id.number = header->base_item_index;
865      base_rep.size = header->base_length;
866      is_delta = header->type == svn_fs_x__rep_delta;
867
868      /* Clear it the ITERPOOL once in a while.  Doing it too frequently
869       * renders the FILE_HINT ineffective.  Doing too infrequently, may
870       * leave us with too many open file handles.
871       *
872       * Note that this is mostly about efficiency, with larger values
873       * being more efficient, and any non-zero value is legal here.  When
874       * reading deltified contents, we may keep 10s of rev files open at
875       * the same time and the system has to cope with that.  Thus, the
876       * limit of 16 chosen below is in the same ballpark.
877       */
878      ++count;
879      if (count % 16 == 0)
880        {
881          file_hint = NULL;
882          svn_pool_clear(iterpool);
883        }
884    }
885  while (is_delta && base_rep.id.change_set);
886
887  *chain_length = count;
888  *shard_count = shards;
889  svn_pool_destroy(iterpool);
890
891  return SVN_NO_ERROR;
892}
893
894
895typedef struct rep_read_baton_t
896{
897  /* The FS from which we're reading. */
898  svn_fs_t *fs;
899
900  /* Representation to read. */
901  svn_fs_x__representation_t rep;
902
903  /* If not NULL, this is the base for the first delta window in rs_list */
904  svn_stringbuf_t *base_window;
905
906  /* The state of all prior delta representations. */
907  apr_array_header_t *rs_list;
908
909  /* The plaintext state, if there is a plaintext. */
910  rep_state_t *src_state;
911
912  /* The index of the current delta chunk, if we are reading a delta. */
913  int chunk_index;
914
915  /* The buffer where we store undeltified data. */
916  char *buf;
917  apr_size_t buf_pos;
918  apr_size_t buf_len;
919
920  /* A checksum context for summing the data read in order to verify it.
921     Note: we don't need to use the sha1 checksum because we're only doing
922     data verification, for which md5 is perfectly safe.  */
923  svn_checksum_ctx_t *md5_checksum_ctx;
924
925  svn_boolean_t checksum_finalized;
926
927  /* The stored checksum of the representation we are reading, its
928     length, and the amount we've read so far.  Some of this
929     information is redundant with rs_list and src_state, but it's
930     convenient for the checksumming code to have it here. */
931  unsigned char md5_digest[APR_MD5_DIGESTSIZE];
932
933  svn_filesize_t len;
934  svn_filesize_t off;
935
936  /* The key for the fulltext cache for this rep, if there is a
937     fulltext cache. */
938  svn_fs_x__pair_cache_key_t fulltext_cache_key;
939  /* The text we've been reading, if we're going to cache it. */
940  svn_stringbuf_t *current_fulltext;
941
942  /* If not NULL, attempt to read the data from this cache.
943     Once that lookup fails, reset it to NULL. */
944  svn_cache__t *fulltext_cache;
945
946  /* Bytes delivered from the FULLTEXT_CACHE so far.  If the next
947     lookup fails, we need to skip that much data from the reconstructed
948     window stream before we continue normal operation. */
949  svn_filesize_t fulltext_delivered;
950
951  /* Used for temporary allocations during the read. */
952  apr_pool_t *scratch_pool;
953
954  /* Pool used to store file handles and other data that is persistant
955     for the entire stream read. */
956  apr_pool_t *filehandle_pool;
957} rep_read_baton_t;
958
959/* Set window key in *KEY to address the window described by RS.
960   For convenience, return the KEY. */
961static svn_fs_x__window_cache_key_t *
962get_window_key(svn_fs_x__window_cache_key_t *key,
963               rep_state_t *rs)
964{
965  svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
966  assert(revision <= APR_UINT32_MAX);
967
968  key->revision = (apr_uint32_t)revision;
969  key->item_index = rs->rep_id.number;
970  key->chunk_index = rs->chunk_index;
971
972  return key;
973}
974
975/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
976 * rep state RS from the current FSX session's cache.  This will be a
977 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
978 * If a cache is available IS_CACHED will inform the caller about the
979 * success of the lookup. Allocations (of the window in particualar) will
980 * be made from POOL.
981 *
982 * If the information could be found, put RS to CHUNK_INDEX.
983 */
984
985/* Return data type for get_cached_window_sizes_func.
986 */
987typedef struct window_sizes_t
988{
989  /* length of the txdelta window in its on-disk format */
990  svn_filesize_t packed_len;
991
992  /* expanded (and combined) window length */
993  svn_filesize_t target_len;
994} window_sizes_t;
995
996/* Implements svn_cache__partial_getter_func_t extracting the packed
997 * and expanded window sizes from a cached window and return the size
998 * info as a window_sizes_t* in *OUT.
999 */
1000static svn_error_t *
1001get_cached_window_sizes_func(void **out,
1002                             const void *data,
1003                             apr_size_t data_len,
1004                             void *baton,
1005                             apr_pool_t *pool)
1006{
1007  const svn_fs_x__txdelta_cached_window_t *window = data;
1008  const svn_txdelta_window_t *txdelta_window
1009    = svn_temp_deserializer__ptr(window, (const void **)&window->window);
1010
1011  window_sizes_t *result = apr_palloc(pool, sizeof(*result));
1012  result->packed_len = window->end_offset - window->start_offset;
1013  result->target_len = txdelta_window->tview_len;
1014
1015  *out = result;
1016
1017  return SVN_NO_ERROR;
1018}
1019
1020/* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1021 * rep state RS from the current FSFS session's cache.  This will be a
1022 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1023 * If a cache is available IS_CACHED will inform the caller about the
1024 * success of the lookup. Allocations of the window in will be made
1025 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1026 *
1027 * If the information could be found, put RS to CHUNK_INDEX.
1028 */
1029static svn_error_t *
1030get_cached_window_sizes(window_sizes_t **sizes,
1031                        rep_state_t *rs,
1032                        svn_boolean_t *is_cached,
1033                        apr_pool_t *pool)
1034{
1035  svn_fs_x__window_cache_key_t key = { 0 };
1036  SVN_ERR(svn_cache__get_partial((void **)sizes,
1037                                 is_cached,
1038                                 rs->window_cache,
1039                                 get_window_key(&key, rs),
1040                                 get_cached_window_sizes_func,
1041                                 NULL,
1042                                 pool));
1043
1044  return SVN_NO_ERROR;
1045}
1046
1047static svn_error_t *
1048get_cached_window(svn_txdelta_window_t **window_p,
1049                  rep_state_t *rs,
1050                  int chunk_index,
1051                  svn_boolean_t *is_cached,
1052                  apr_pool_t *result_pool,
1053                  apr_pool_t *scratch_pool)
1054{
1055  /* ask the cache for the desired txdelta window */
1056  svn_fs_x__txdelta_cached_window_t *cached_window;
1057  svn_fs_x__window_cache_key_t key = { 0 };
1058  get_window_key(&key, rs);
1059  key.chunk_index = chunk_index;
1060  SVN_ERR(svn_cache__get((void **) &cached_window,
1061                         is_cached,
1062                         rs->window_cache,
1063                         &key,
1064                         result_pool));
1065
1066  if (*is_cached)
1067    {
1068      /* found it. Pass it back to the caller. */
1069      *window_p = cached_window->window;
1070
1071      /* manipulate the RS as if we just read the data */
1072      rs->current = cached_window->end_offset;
1073      rs->chunk_index = chunk_index;
1074    }
1075
1076  return SVN_NO_ERROR;
1077}
1078
1079/* Store the WINDOW read for the rep state RS with the given START_OFFSET
1080 * within the pack / rev file in the current FSX session's cache.  This
1081 * will be a no-op if no cache has been given.
1082 * Temporary allocations will be made from SCRATCH_POOL. */
1083static svn_error_t *
1084set_cached_window(svn_txdelta_window_t *window,
1085                  rep_state_t *rs,
1086                  apr_off_t start_offset,
1087                  apr_pool_t *scratch_pool)
1088{
1089  /* store the window and the first offset _past_ it */
1090  svn_fs_x__txdelta_cached_window_t cached_window;
1091  svn_fs_x__window_cache_key_t key = {0};
1092
1093  cached_window.window = window;
1094  cached_window.start_offset = start_offset - rs->start;
1095  cached_window.end_offset = rs->current;
1096
1097  /* but key it with the start offset because that is the known state
1098   * when we will look it up */
1099  SVN_ERR(svn_cache__set(rs->window_cache,
1100                         get_window_key(&key, rs),
1101                         &cached_window,
1102                         scratch_pool));
1103
1104  return SVN_NO_ERROR;
1105}
1106
1107/* Read the WINDOW_P for the rep state RS from the current FSX session's
1108 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1109 * cache has been given. If a cache is available IS_CACHED will inform
1110 * the caller about the success of the lookup. Allocations (of the window
1111 * in particular) will be made from POOL.
1112 */
1113static svn_error_t *
1114get_cached_combined_window(svn_stringbuf_t **window_p,
1115                           rep_state_t *rs,
1116                           svn_boolean_t *is_cached,
1117                           apr_pool_t *pool)
1118{
1119  /* ask the cache for the desired txdelta window */
1120  svn_fs_x__window_cache_key_t key = { 0 };
1121  return svn_cache__get((void **)window_p,
1122                        is_cached,
1123                        rs->combined_cache,
1124                        get_window_key(&key, rs),
1125                        pool);
1126}
1127
1128/* Store the WINDOW read for the rep state RS in the current FSX session's
1129 * cache. This will be a no-op if no cache has been given.
1130 * Temporary allocations will be made from SCRATCH_POOL. */
1131static svn_error_t *
1132set_cached_combined_window(svn_stringbuf_t *window,
1133                           rep_state_t *rs,
1134                           apr_pool_t *scratch_pool)
1135{
1136  /* but key it with the start offset because that is the known state
1137   * when we will look it up */
1138  svn_fs_x__window_cache_key_t key = { 0 };
1139  return svn_cache__set(rs->combined_cache,
1140                        get_window_key(&key, rs),
1141                        window,
1142                        scratch_pool);
1143}
1144
1145/* Build an array of rep_state structures in *LIST giving the delta
1146   reps from first_rep to a  self-compressed rep.  Set *SRC_STATE to
1147   the container rep we find at the end of the chain, or to NULL if
1148   the final delta representation is self-compressed.
1149   The representation to start from is designated by filesystem FS, id
1150   ID, and representation REP.
1151   Also, set *WINDOW_P to the base window content for *LIST, if it
1152   could be found in cache. Otherwise, *LIST will contain the base
1153   representation for the whole delta chain.
1154 */
1155static svn_error_t *
1156build_rep_list(apr_array_header_t **list,
1157               svn_stringbuf_t **window_p,
1158               rep_state_t **src_state,
1159               svn_fs_t *fs,
1160               svn_fs_x__representation_t *first_rep,
1161               apr_pool_t *result_pool,
1162               apr_pool_t *scratch_pool)
1163{
1164  svn_fs_x__representation_t rep;
1165  rep_state_t *rs = NULL;
1166  svn_fs_x__rep_header_t *rep_header;
1167  svn_boolean_t is_cached = FALSE;
1168  shared_file_t *shared_file = NULL;
1169  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1170
1171  *list = apr_array_make(result_pool, 1, sizeof(rep_state_t *));
1172  rep = *first_rep;
1173
1174  /* for the top-level rep, we need the rep_args */
1175  SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs,
1176                           result_pool, iterpool));
1177
1178  while (1)
1179    {
1180      svn_pool_clear(iterpool);
1181
1182      /* fetch state, if that has not been done already */
1183      if (!rs)
1184        SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1185                                 &rep, fs, result_pool, iterpool));
1186
1187      /* for txn reps and containered reps, there won't be a cached
1188       * combined window */
1189      if (svn_fs_x__is_revision(rep.id.change_set)
1190          && rep_header->type != svn_fs_x__rep_container
1191          && rs->combined_cache)
1192        SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached,
1193                                           result_pool));
1194
1195      if (is_cached)
1196        {
1197          /* We already have a reconstructed window in our cache.
1198             Write a pseudo rep_state with the full length. */
1199          rs->start = 0;
1200          rs->current = 0;
1201          rs->size = (*window_p)->len;
1202          *src_state = rs;
1203          break;
1204        }
1205
1206      if (rep_header->type == svn_fs_x__rep_container)
1207        {
1208          /* This is a container item, so just return the current rep_state. */
1209          *src_state = rs;
1210          break;
1211        }
1212
1213      /* Push this rep onto the list.  If it's self-compressed, we're done. */
1214      APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1215      if (rep_header->type == svn_fs_x__rep_self_delta)
1216        {
1217          *src_state = NULL;
1218          break;
1219        }
1220
1221      rep.id.change_set
1222        = svn_fs_x__change_set_by_rev(rep_header->base_revision);
1223      rep.id.number = rep_header->base_item_index;
1224      rep.size = rep_header->base_length;
1225
1226      rs = NULL;
1227    }
1228  svn_pool_destroy(iterpool);
1229
1230  return SVN_NO_ERROR;
1231}
1232
1233
1234/* Create a rep_read_baton structure for node revision NODEREV in
1235   filesystem FS and store it in *RB_P.  If FULLTEXT_CACHE_KEY is not
1236   NULL, it is the rep's key in the fulltext cache, and a stringbuf
1237   must be allocated to store the text.  If rep is mutable, it must be
1238   refer to file contents.
1239
1240   Allocate the result in RESULT_POOL.  This includes the pools within *RB_P.
1241 */
1242static svn_error_t *
1243rep_read_get_baton(rep_read_baton_t **rb_p,
1244                   svn_fs_t *fs,
1245                   svn_fs_x__representation_t *rep,
1246                   svn_fs_x__pair_cache_key_t fulltext_cache_key,
1247                   apr_pool_t *result_pool)
1248{
1249  rep_read_baton_t *b;
1250
1251  b = apr_pcalloc(result_pool, sizeof(*b));
1252  b->fs = fs;
1253  b->rep = *rep;
1254  b->base_window = NULL;
1255  b->chunk_index = 0;
1256  b->buf = NULL;
1257  b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5,
1258                                                result_pool);
1259  b->checksum_finalized = FALSE;
1260  memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1261  b->len = rep->expanded_size;
1262  b->off = 0;
1263  b->fulltext_cache_key = fulltext_cache_key;
1264
1265  /* Clearable sub-pools.  Since they have to remain valid for as long as B
1266     lives, we can't take them from some scratch pool.  The caller of this
1267     function will have no control over how those subpools will be used. */
1268  b->scratch_pool = svn_pool_create(result_pool);
1269  b->filehandle_pool = svn_pool_create(result_pool);
1270  b->fulltext_cache = NULL;
1271  b->fulltext_delivered = 0;
1272  b->current_fulltext = NULL;
1273
1274  /* Save our output baton. */
1275  *rb_p = b;
1276
1277  return SVN_NO_ERROR;
1278}
1279
1280/* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1281   window into *NWIN. */
1282static svn_error_t *
1283read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1284                  rep_state_t *rs, apr_pool_t *result_pool,
1285                  apr_pool_t *scratch_pool)
1286{
1287  svn_boolean_t is_cached;
1288  apr_off_t start_offset;
1289  apr_off_t end_offset;
1290  apr_pool_t *iterpool;
1291  svn_stream_t *stream;
1292  svn_fs_x__revision_file_t *file;
1293  svn_boolean_t cacheable = rs->chunk_index == 0
1294                         && svn_fs_x__is_revision(rs->rep_id.change_set)
1295                         && rs->window_cache;
1296
1297  SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1298
1299  SVN_ERR(dbg__log_access(rs->sfile->fs, &rs->rep_id, NULL,
1300                          SVN_FS_X__ITEM_TYPE_ANY_REP, scratch_pool));
1301
1302  /* Read the next window.  But first, try to find it in the cache. */
1303  if (cacheable)
1304    {
1305      SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1306                                result_pool, scratch_pool));
1307      if (is_cached)
1308        return SVN_NO_ERROR;
1309    }
1310
1311  /* someone has to actually read the data from file.  Open it */
1312  SVN_ERR(auto_open_shared_file(rs->sfile));
1313  file = rs->sfile->rfile;
1314
1315  /* invoke the 'block-read' feature for non-txn data.
1316     However, don't do that if we are in the middle of some representation,
1317     because the block is unlikely to contain other data. */
1318  if (cacheable)
1319    {
1320      SVN_ERR(block_read(NULL, rs->sfile->fs, &rs->rep_id, file, NULL,
1321                         result_pool, scratch_pool));
1322
1323      /* reading the whole block probably also provided us with the
1324         desired txdelta window */
1325      SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1326                                result_pool, scratch_pool));
1327      if (is_cached)
1328        return SVN_NO_ERROR;
1329    }
1330
1331  /* data is still not cached -> we need to read it.
1332     Make sure we have all the necessary info. */
1333  SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1334  SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1335
1336  /* RS->FILE may be shared between RS instances -> make sure we point
1337   * to the right data. */
1338  start_offset = rs->start + rs->current;
1339  SVN_ERR(svn_fs_x__rev_file_seek(file, NULL, start_offset));
1340
1341  /* Skip windows to reach the current chunk if we aren't there yet. */
1342  iterpool = svn_pool_create(scratch_pool);
1343  while (rs->chunk_index < this_chunk)
1344    {
1345      apr_file_t *apr_file;
1346      svn_pool_clear(iterpool);
1347
1348      SVN_ERR(svn_fs_x__rev_file_get(&apr_file, file));
1349      SVN_ERR(svn_txdelta_skip_svndiff_window(apr_file, rs->ver, iterpool));
1350      rs->chunk_index++;
1351      SVN_ERR(svn_io_file_get_offset(&start_offset, apr_file, iterpool));
1352
1353      rs->current = start_offset - rs->start;
1354      if (rs->current >= rs->size)
1355        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1356                                _("Reading one svndiff window read "
1357                                  "beyond the end of the "
1358                                  "representation"));
1359    }
1360  svn_pool_destroy(iterpool);
1361
1362  /* Actually read the next window. */
1363  SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
1364  SVN_ERR(svn_txdelta_read_svndiff_window(nwin, stream, rs->ver,
1365                                          result_pool));
1366  SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, file));
1367  rs->current = end_offset - rs->start;
1368  if (rs->current > rs->size)
1369    return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1370                            _("Reading one svndiff window read beyond "
1371                              "the end of the representation"));
1372
1373  /* the window has not been cached before, thus cache it now
1374   * (if caching is used for them at all) */
1375  if (cacheable)
1376    SVN_ERR(set_cached_window(*nwin, rs, start_offset, scratch_pool));
1377
1378  return SVN_NO_ERROR;
1379}
1380
1381/* Read the whole representation RS and return it in *NWIN. */
1382static svn_error_t *
1383read_container_window(svn_stringbuf_t **nwin,
1384                      rep_state_t *rs,
1385                      apr_size_t size,
1386                      apr_pool_t *result_pool,
1387                      apr_pool_t *scratch_pool)
1388{
1389  svn_fs_x__rep_extractor_t *extractor = NULL;
1390  svn_fs_t *fs = rs->sfile->fs;
1391  svn_fs_x__data_t *ffd = fs->fsap_data;
1392  svn_fs_x__pair_cache_key_t key;
1393  svn_revnum_t revision = svn_fs_x__get_revnum(rs->rep_id.change_set);
1394  svn_boolean_t is_cached = FALSE;
1395  svn_fs_x__reps_baton_t baton;
1396
1397  SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1398  key.revision = svn_fs_x__packed_base_rev(fs, revision);
1399  key.second = rs->start;
1400
1401  /* already in cache? */
1402  baton.fs = fs;
1403  baton.idx = rs->sub_item;
1404
1405  SVN_ERR(svn_cache__get_partial((void**)&extractor, &is_cached,
1406                                 ffd->reps_container_cache, &key,
1407                                 svn_fs_x__reps_get_func, &baton,
1408                                 result_pool));
1409
1410  /* read from disk, if necessary */
1411  if (extractor == NULL)
1412    {
1413      SVN_ERR(auto_open_shared_file(rs->sfile));
1414      SVN_ERR(block_read((void **)&extractor, fs, &rs->rep_id,
1415                         rs->sfile->rfile, NULL,
1416                         result_pool, scratch_pool));
1417    }
1418
1419  SVN_ERR(svn_fs_x__extractor_drive(nwin, extractor, rs->current, size,
1420                                    result_pool, scratch_pool));
1421
1422  /* Update RS. */
1423  rs->current += (apr_off_t)size;
1424
1425  return SVN_NO_ERROR;
1426}
1427
1428/* Get the undeltified window that is a result of combining all deltas
1429   from the current desired representation identified in *RB with its
1430   base representation.  Store the window in *RESULT. */
1431static svn_error_t *
1432get_combined_window(svn_stringbuf_t **result,
1433                    rep_read_baton_t *rb)
1434{
1435  apr_pool_t *pool, *new_pool, *window_pool;
1436  int i;
1437  apr_array_header_t *windows;
1438  svn_stringbuf_t *source, *buf = rb->base_window;
1439  rep_state_t *rs;
1440  apr_pool_t *iterpool;
1441
1442  /* Read all windows that we need to combine. This is fine because
1443     the size of each window is relatively small (100kB) and skip-
1444     delta limits the number of deltas in a chain to well under 100.
1445     Stop early if one of them does not depend on its predecessors. */
1446  window_pool = svn_pool_create(rb->scratch_pool);
1447  windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1448  iterpool = svn_pool_create(rb->scratch_pool);
1449  for (i = 0; i < rb->rs_list->nelts; ++i)
1450    {
1451      svn_txdelta_window_t *window;
1452
1453      svn_pool_clear(iterpool);
1454
1455      rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1456      SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1457                                iterpool));
1458
1459      APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1460      if (window->src_ops == 0)
1461        {
1462          ++i;
1463          break;
1464        }
1465    }
1466
1467  /* Combine in the windows from the other delta reps. */
1468  pool = svn_pool_create(rb->scratch_pool);
1469  for (--i; i >= 0; --i)
1470    {
1471      svn_txdelta_window_t *window;
1472
1473      svn_pool_clear(iterpool);
1474
1475      rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1476      window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1477
1478      /* Maybe, we've got a start representation in a container.  If we do,
1479         read as much data from it as the needed for the txdelta window's
1480         source view.
1481         Note that BUF / SOURCE may only be NULL in the first iteration. */
1482      source = buf;
1483      if (source == NULL && rb->src_state != NULL)
1484        SVN_ERR(read_container_window(&source, rb->src_state,
1485                                      window->sview_len, pool, iterpool));
1486
1487      /* Combine this window with the current one. */
1488      new_pool = svn_pool_create(rb->scratch_pool);
1489      buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1490      buf->len = window->tview_len;
1491
1492      svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1493                                     buf->data, &buf->len);
1494      if (buf->len != window->tview_len)
1495        return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1496                                _("svndiff window length is "
1497                                  "corrupt"));
1498
1499      /* Cache windows only if the whole rep content could be read as a
1500         single chunk.  Only then will no other chunk need a deeper RS
1501         list than the cached chunk. */
1502      if (   (rb->chunk_index == 0) && (rs->current == rs->size)
1503          && svn_fs_x__is_revision(rs->rep_id.change_set)
1504          && rs->combined_cache)
1505        SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1506
1507      rs->chunk_index++;
1508
1509      /* Cycle pools so that we only need to hold three windows at a time. */
1510      svn_pool_destroy(pool);
1511      pool = new_pool;
1512    }
1513  svn_pool_destroy(iterpool);
1514
1515  svn_pool_destroy(window_pool);
1516
1517  *result = buf;
1518  return SVN_NO_ERROR;
1519}
1520
1521/* Returns whether or not the expanded fulltext of the file is cachable
1522 * based on its size SIZE.  The decision depends on the cache used by FFD.
1523 */
1524static svn_boolean_t
1525fulltext_size_is_cachable(svn_fs_x__data_t *ffd,
1526                          svn_filesize_t size)
1527{
1528  return (size < APR_SIZE_MAX)
1529      && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1530}
1531
1532/* Close method used on streams returned by read_representation().
1533 */
1534static svn_error_t *
1535rep_read_contents_close(void *baton)
1536{
1537  rep_read_baton_t *rb = baton;
1538
1539  svn_pool_destroy(rb->scratch_pool);
1540  svn_pool_destroy(rb->filehandle_pool);
1541
1542  return SVN_NO_ERROR;
1543}
1544
1545/* Inialize the representation read state RS for the given REP_HEADER and
1546 * p2l index ENTRY.  If not NULL, assign FILE and STREAM to RS.
1547 * Allocate all sub-structures of RS in RESULT_POOL.
1548 */
1549static svn_error_t *
1550init_rep_state(rep_state_t *rs,
1551               svn_fs_x__rep_header_t *rep_header,
1552               svn_fs_t *fs,
1553               svn_fs_x__revision_file_t *rev_file,
1554               svn_fs_x__p2l_entry_t* entry,
1555               apr_pool_t *result_pool)
1556{
1557  svn_fs_x__data_t *ffd = fs->fsap_data;
1558  shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
1559
1560  /* this function does not apply to representation containers */
1561  SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
1562                 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
1563  SVN_ERR_ASSERT(entry->item_count == 1);
1564
1565  shared_file->rfile = rev_file;
1566  shared_file->fs = fs;
1567  shared_file->revision = svn_fs_x__get_revnum(entry->items[0].change_set);
1568  shared_file->pool = result_pool;
1569
1570  rs->sfile = shared_file;
1571  rs->rep_id = entry->items[0];
1572  rs->header_size = rep_header->header_size;
1573  rs->start = entry->offset + rs->header_size;
1574  rs->current = 4;
1575  rs->size = entry->size - rep_header->header_size - 7;
1576  rs->ver = 1;
1577  rs->chunk_index = 0;
1578  rs->window_cache = ffd->txdelta_window_cache;
1579  rs->combined_cache = ffd->combined_window_cache;
1580
1581  return SVN_NO_ERROR;
1582}
1583
1584/* Walk through all windows in the representation addressed by RS in FS
1585 * (excluding the delta bases) and put those not already cached into the
1586 * window caches.  If MAX_OFFSET is not -1, don't read windows that start
1587 * at or beyond that offset.  As a side effect, return the total sum of all
1588 * expanded window sizes in *FULLTEXT_LEN.
1589 * Use SCRATCH_POOL for temporary allocations.
1590 */
1591static svn_error_t *
1592cache_windows(svn_filesize_t *fulltext_len,
1593              svn_fs_t *fs,
1594              rep_state_t *rs,
1595              apr_off_t max_offset,
1596              apr_pool_t *scratch_pool)
1597{
1598  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1599  *fulltext_len = 0;
1600
1601  while (rs->current < rs->size)
1602    {
1603      svn_boolean_t is_cached = FALSE;
1604      window_sizes_t *window_sizes;
1605
1606      svn_pool_clear(iterpool);
1607      if (max_offset != -1 && rs->start + rs->current >= max_offset)
1608        {
1609          svn_pool_destroy(iterpool);
1610          return SVN_NO_ERROR;
1611        }
1612
1613      /* efficiently skip windows that are still being cached instead
1614       * of fully decoding them */
1615      SVN_ERR(get_cached_window_sizes(&window_sizes, rs, &is_cached,
1616                                      iterpool));
1617      if (is_cached)
1618        {
1619          *fulltext_len += window_sizes->target_len;
1620          rs->current += window_sizes->packed_len;
1621        }
1622      else
1623        {
1624          svn_txdelta_window_t *window;
1625          svn_fs_x__revision_file_t *file = rs->sfile->rfile;
1626          svn_stream_t *stream;
1627          apr_off_t start_offset = rs->start + rs->current;
1628          apr_off_t end_offset;
1629          apr_off_t block_start;
1630
1631          /* navigate to & read the current window */
1632          SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
1633          SVN_ERR(svn_fs_x__rev_file_seek(file, &block_start, start_offset));
1634          SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, rs->ver,
1635                                                  iterpool));
1636
1637          /* aggregate expanded window size */
1638          *fulltext_len += window->tview_len;
1639
1640          /* determine on-disk window size */
1641          SVN_ERR(svn_fs_x__rev_file_offset(&end_offset, rs->sfile->rfile));
1642          rs->current = end_offset - rs->start;
1643          if (rs->current > rs->size)
1644            return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1645                          _("Reading one svndiff window read beyond "
1646                                      "the end of the representation"));
1647
1648          /* if the window has not been cached before, cache it now
1649           * (if caching is used for them at all) */
1650          if (!is_cached)
1651            SVN_ERR(set_cached_window(window, rs, start_offset, iterpool));
1652        }
1653
1654      rs->chunk_index++;
1655    }
1656
1657  svn_pool_destroy(iterpool);
1658
1659  return SVN_NO_ERROR;
1660}
1661
1662/* Try to get the representation header identified by KEY from FS's cache.
1663 * If it has not been cached, read it from the current position in STREAM
1664 * and put it into the cache (if caching has been enabled for rep headers).
1665 * Return the result in *REP_HEADER.  Use POOL for allocations.
1666 */
1667static svn_error_t *
1668read_rep_header(svn_fs_x__rep_header_t **rep_header,
1669                svn_fs_t *fs,
1670                svn_fs_x__revision_file_t *file,
1671                svn_fs_x__representation_cache_key_t *key,
1672                apr_pool_t *pool)
1673{
1674  svn_fs_x__data_t *ffd = fs->fsap_data;
1675  svn_stream_t *stream;
1676  svn_boolean_t is_cached = FALSE;
1677
1678  SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
1679                         ffd->rep_header_cache, key, pool));
1680  if (is_cached)
1681    return SVN_NO_ERROR;
1682
1683  SVN_ERR(svn_fs_x__rev_file_stream(&stream, file));
1684  SVN_ERR(svn_fs_x__read_rep_header(rep_header, stream, pool, pool));
1685  SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header, pool));
1686
1687  return SVN_NO_ERROR;
1688}
1689
1690svn_error_t *
1691svn_fs_x__get_representation_length(svn_filesize_t *packed_len,
1692                                    svn_filesize_t *expanded_len,
1693                                    svn_fs_t *fs,
1694                                    svn_fs_x__revision_file_t *rev_file,
1695                                    svn_fs_x__p2l_entry_t* entry,
1696                                    apr_pool_t *scratch_pool)
1697{
1698  svn_fs_x__representation_cache_key_t key = { 0 };
1699  rep_state_t rs = { 0 };
1700  svn_fs_x__rep_header_t *rep_header;
1701
1702  /* this function does not apply to representation containers */
1703  SVN_ERR_ASSERT(entry->type >= SVN_FS_X__ITEM_TYPE_FILE_REP
1704                 && entry->type <= SVN_FS_X__ITEM_TYPE_DIR_PROPS);
1705  SVN_ERR_ASSERT(entry->item_count == 1);
1706
1707  /* get / read the representation header */
1708  key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
1709  key.is_packed = svn_fs_x__is_packed_rev(fs, key.revision);
1710  key.item_index = entry->items[0].number;
1711  SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &key, scratch_pool));
1712
1713  /* prepare representation reader state (rs) structure */
1714  SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
1715                         scratch_pool));
1716
1717  /* RS->SFILE may be shared between RS instances -> make sure we point
1718   * to the right data. */
1719  *packed_len = rs.size;
1720  SVN_ERR(cache_windows(expanded_len, fs, &rs, -1, scratch_pool));
1721
1722  return SVN_NO_ERROR;
1723}
1724
1725/* Return the next *LEN bytes of the rep from our plain / delta windows
1726   and store them in *BUF. */
1727static svn_error_t *
1728get_contents_from_windows(rep_read_baton_t *rb,
1729                          char *buf,
1730                          apr_size_t *len)
1731{
1732  apr_size_t copy_len, remaining = *len;
1733  char *cur = buf;
1734  rep_state_t *rs;
1735
1736  /* Special case for when there are no delta reps, only a
1737     containered text. */
1738  if (rb->rs_list->nelts == 0 && rb->buf == NULL)
1739    {
1740      copy_len = remaining;
1741      rs = rb->src_state;
1742
1743      /* reps in containers don't have a header */
1744      if (rs->header_size == 0 && rb->base_window == NULL)
1745        {
1746          /* RS->SIZE is unreliable here because it is based upon
1747           * the delta rep size _before_ putting the data into a
1748           * a container. */
1749          SVN_ERR(read_container_window(&rb->base_window, rs, rb->len,
1750                                        rb->scratch_pool, rb->scratch_pool));
1751          rs->current -= rb->base_window->len;
1752        }
1753
1754      if (rb->base_window != NULL)
1755        {
1756          /* We got the desired rep directly from the cache.
1757             This is where we need the pseudo rep_state created
1758             by build_rep_list(). */
1759          apr_size_t offset = (apr_size_t)rs->current;
1760          if (offset >= rb->base_window->len)
1761            copy_len = 0ul;
1762          else if (copy_len > rb->base_window->len - offset)
1763            copy_len = rb->base_window->len - offset;
1764
1765          memcpy (cur, rb->base_window->data + offset, copy_len);
1766        }
1767
1768      rs->current += copy_len;
1769      *len = copy_len;
1770      return SVN_NO_ERROR;
1771    }
1772
1773  while (remaining > 0)
1774    {
1775      /* If we have buffered data from a previous chunk, use that. */
1776      if (rb->buf)
1777        {
1778          /* Determine how much to copy from the buffer. */
1779          copy_len = rb->buf_len - rb->buf_pos;
1780          if (copy_len > remaining)
1781            copy_len = remaining;
1782
1783          /* Actually copy the data. */
1784          memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1785          rb->buf_pos += copy_len;
1786          cur += copy_len;
1787          remaining -= copy_len;
1788
1789          /* If the buffer is all used up, clear it and empty the
1790             local pool. */
1791          if (rb->buf_pos == rb->buf_len)
1792            {
1793              svn_pool_clear(rb->scratch_pool);
1794              rb->buf = NULL;
1795            }
1796        }
1797      else
1798        {
1799          svn_stringbuf_t *sbuf = NULL;
1800
1801          rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1802          if (rs->current == rs->size)
1803            break;
1804
1805          /* Get more buffered data by evaluating a chunk. */
1806          SVN_ERR(get_combined_window(&sbuf, rb));
1807
1808          rb->chunk_index++;
1809          rb->buf_len = sbuf->len;
1810          rb->buf = sbuf->data;
1811          rb->buf_pos = 0;
1812        }
1813    }
1814
1815  *len = cur - buf;
1816
1817  return SVN_NO_ERROR;
1818}
1819
1820/* Baton type for get_fulltext_partial. */
1821typedef struct fulltext_baton_t
1822{
1823  /* Target buffer to write to; of at least LEN bytes. */
1824  char *buffer;
1825
1826  /* Offset within the respective fulltext at which we shall start to
1827     copy data into BUFFER. */
1828  apr_size_t start;
1829
1830  /* Number of bytes to copy.  The actual amount may be less in case
1831     the fulltext is short(er). */
1832  apr_size_t len;
1833
1834  /* Number of bytes actually copied into BUFFER. */
1835  apr_size_t read;
1836} fulltext_baton_t;
1837
1838/* Implement svn_cache__partial_getter_func_t for fulltext caches.
1839 * From the fulltext in DATA, we copy the range specified by the
1840 * fulltext_baton_t* BATON into the buffer provided by that baton.
1841 * OUT and RESULT_POOL are not used.
1842 */
1843static svn_error_t *
1844get_fulltext_partial(void **out,
1845                     const void *data,
1846                     apr_size_t data_len,
1847                     void *baton,
1848                     apr_pool_t *result_pool)
1849{
1850  fulltext_baton_t *fulltext_baton = baton;
1851
1852  /* We cached the fulltext with an NUL appended to it. */
1853  apr_size_t fulltext_len = data_len - 1;
1854
1855  /* Clip the copy range to what the fulltext size allows. */
1856  apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1857  fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1858
1859  /* Copy the data to the output buffer and be done. */
1860  memcpy(fulltext_baton->buffer, (const char *)data + start,
1861         fulltext_baton->read);
1862
1863  return SVN_NO_ERROR;
1864}
1865
1866/* Find the fulltext specified in BATON in the fulltext cache given
1867 * as well by BATON.  If that succeeds, set *CACHED to TRUE and copy
1868 * up to the next *LEN bytes into BUFFER.  Set *LEN to the actual
1869 * number of bytes copied.
1870 */
1871static svn_error_t *
1872get_contents_from_fulltext(svn_boolean_t *cached,
1873                           rep_read_baton_t *baton,
1874                           char *buffer,
1875                           apr_size_t *len)
1876{
1877  void *dummy;
1878  fulltext_baton_t fulltext_baton;
1879
1880  SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1881                 == baton->fulltext_delivered);
1882  fulltext_baton.buffer = buffer;
1883  fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1884  fulltext_baton.len = *len;
1885  fulltext_baton.read = 0;
1886
1887  SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1888                                 &baton->fulltext_cache_key,
1889                                 get_fulltext_partial, &fulltext_baton,
1890                                 baton->scratch_pool));
1891
1892  if (*cached)
1893    {
1894      baton->fulltext_delivered += fulltext_baton.read;
1895      *len = fulltext_baton.read;
1896    }
1897
1898  return SVN_NO_ERROR;
1899}
1900
1901/* Determine the optimal size of a string buf that shall receive a
1902 * (full-) text of NEEDED bytes.
1903 *
1904 * The critical point is that those buffers may be very large and
1905 * can cause memory fragmentation.  We apply simple heuristics to
1906 * make fragmentation less likely.
1907 */
1908static apr_size_t
1909optimimal_allocation_size(apr_size_t needed)
1910{
1911  /* For all allocations, assume some overhead that is shared between
1912   * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1913  const apr_size_t overhead = 0x400;
1914  apr_size_t optimal;
1915
1916  /* If an allocation size if safe for other ephemeral buffers, it should
1917   * be safe for ours. */
1918  if (needed <= SVN__STREAM_CHUNK_SIZE)
1919    return needed;
1920
1921  /* Paranoia edge case:
1922   * Skip our heuristics if they created arithmetical overflow.
1923   * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
1924  if (needed >= APR_SIZE_MAX / 2 - overhead)
1925    return needed;
1926
1927  /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
1928   * Since we know NEEDED to be larger than that, use it as the
1929   * starting point.
1930   *
1931   * Heuristics: Allocate a power-of-two number of bytes that fit
1932   *             NEEDED plus some OVERHEAD.  The APR allocator
1933   *             will round it up to the next full page size.
1934   */
1935  optimal = SVN__STREAM_CHUNK_SIZE;
1936  while (optimal - overhead < needed)
1937    optimal *= 2;
1938
1939  /* This is above or equal to NEEDED. */
1940  return optimal - overhead;
1941}
1942
1943/* After a fulltext cache lookup failure, we will continue to read from
1944 * combined delta or plain windows.  However, we must first make that data
1945 * stream in BATON catch up tho the position LEN already delivered from the
1946 * fulltext cache.  Also, we need to store the reconstructed fulltext if we
1947 * want to cache it at the end.
1948 */
1949static svn_error_t *
1950skip_contents(rep_read_baton_t *baton,
1951              svn_filesize_t len)
1952{
1953  svn_error_t *err = SVN_NO_ERROR;
1954
1955  /* Do we want to cache the reconstructed fulltext? */
1956  if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
1957    {
1958      char *buffer;
1959      svn_filesize_t to_alloc = MAX(len, baton->len);
1960
1961      /* This should only be happening if BATON->LEN and LEN are
1962       * cacheable, implying they fit into memory. */
1963      SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
1964
1965      /* Allocate the fulltext buffer. */
1966      baton->current_fulltext = svn_stringbuf_create_ensure(
1967                        optimimal_allocation_size((apr_size_t)to_alloc),
1968                        baton->filehandle_pool);
1969
1970      /* Read LEN bytes from the window stream and store the data
1971       * in the fulltext buffer (will be filled by further reads later). */
1972      baton->current_fulltext->len = (apr_size_t)len;
1973      baton->current_fulltext->data[(apr_size_t)len] = 0;
1974
1975      buffer = baton->current_fulltext->data;
1976      while (len > 0 && !err)
1977        {
1978          apr_size_t to_read = (apr_size_t)len;
1979          err = get_contents_from_windows(baton, buffer, &to_read);
1980          len -= to_read;
1981          buffer += to_read;
1982        }
1983
1984      /* Make the MD5 calculation catch up with the data delivered
1985       * (we did not run MD5 on the data that we took from the cache). */
1986      if (!err)
1987        {
1988          SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
1989                                      baton->current_fulltext->data,
1990                                      baton->current_fulltext->len));
1991          baton->off += baton->current_fulltext->len;
1992        }
1993    }
1994  else if (len > 0)
1995    {
1996      /* Simply drain LEN bytes from the window stream. */
1997      apr_pool_t *subpool = svn_pool_create(baton->scratch_pool);
1998      char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
1999
2000      while (len > 0 && !err)
2001        {
2002          apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
2003                            ? SVN__STREAM_CHUNK_SIZE
2004                            : (apr_size_t)len;
2005
2006          err = get_contents_from_windows(baton, buffer, &to_read);
2007          len -= to_read;
2008
2009          /* Make the MD5 calculation catch up with the data delivered
2010           * (we did not run MD5 on the data that we took from the cache). */
2011          if (!err)
2012            {
2013              SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2014                                          buffer, to_read));
2015              baton->off += to_read;
2016            }
2017        }
2018
2019      svn_pool_destroy(subpool);
2020    }
2021
2022  return svn_error_trace(err);
2023}
2024
2025/* BATON is of type `rep_read_baton_t'; read the next *LEN bytes of the
2026   representation and store them in *BUF.  Sum as we read and verify
2027   the MD5 sum at the end. */
2028static svn_error_t *
2029rep_read_contents(void *baton,
2030                  char *buf,
2031                  apr_size_t *len)
2032{
2033  rep_read_baton_t *rb = baton;
2034
2035  /* Get data from the fulltext cache for as long as we can. */
2036  if (rb->fulltext_cache)
2037    {
2038      svn_boolean_t cached;
2039      SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
2040      if (cached)
2041        return SVN_NO_ERROR;
2042
2043      /* Cache miss.  From now on, we will never read from the fulltext
2044       * cache for this representation anymore. */
2045      rb->fulltext_cache = NULL;
2046    }
2047
2048  /* No fulltext cache to help us.  We must read from the window stream. */
2049  if (!rb->rs_list)
2050    {
2051      /* Window stream not initialized, yet.  Do it now. */
2052      SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2053                             &rb->src_state, rb->fs, &rb->rep,
2054                             rb->filehandle_pool, rb->scratch_pool));
2055
2056      /* In case we did read from the fulltext cache before, make the
2057       * window stream catch up.  Also, initialize the fulltext buffer
2058       * if we want to cache the fulltext at the end. */
2059      SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2060    }
2061
2062  /* Get the next block of data.
2063   * Keep in mind that the representation might be empty and leave us
2064   * already positioned at the end of the rep. */
2065  if (rb->off == rb->len)
2066    *len = 0;
2067  else
2068    SVN_ERR(get_contents_from_windows(rb, buf, len));
2069
2070  if (rb->current_fulltext)
2071    svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2072
2073  /* Perform checksumming.  We want to check the checksum as soon as
2074     the last byte of data is read, in case the caller never performs
2075     a short read, but we don't want to finalize the MD5 context
2076     twice. */
2077  if (!rb->checksum_finalized)
2078    {
2079      SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2080      rb->off += *len;
2081      if (rb->off == rb->len)
2082        {
2083          svn_checksum_t *md5_checksum;
2084          svn_checksum_t expected;
2085          expected.kind = svn_checksum_md5;
2086          expected.digest = rb->md5_digest;
2087
2088          rb->checksum_finalized = TRUE;
2089          SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2090                                     rb->scratch_pool));
2091          if (!svn_checksum_match(md5_checksum, &expected))
2092            return svn_error_create(SVN_ERR_FS_CORRUPT,
2093                    svn_checksum_mismatch_err(&expected, md5_checksum,
2094                        rb->scratch_pool,
2095                        _("Checksum mismatch while reading representation")),
2096                    NULL);
2097        }
2098    }
2099
2100  if (rb->off == rb->len && rb->current_fulltext)
2101    {
2102      svn_fs_x__data_t *ffd = rb->fs->fsap_data;
2103      SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2104                             rb->current_fulltext, rb->scratch_pool));
2105      rb->current_fulltext = NULL;
2106    }
2107
2108  return SVN_NO_ERROR;
2109}
2110
2111svn_error_t *
2112svn_fs_x__get_contents(svn_stream_t **contents_p,
2113                       svn_fs_t *fs,
2114                       svn_fs_x__representation_t *rep,
2115                       svn_boolean_t cache_fulltext,
2116                       apr_pool_t *result_pool)
2117{
2118  if (! rep)
2119    {
2120      *contents_p = svn_stream_empty(result_pool);
2121    }
2122  else
2123    {
2124      svn_fs_x__data_t *ffd = fs->fsap_data;
2125      svn_filesize_t len = rep->expanded_size;
2126      rep_read_baton_t *rb;
2127      svn_revnum_t revision = svn_fs_x__get_revnum(rep->id.change_set);
2128
2129      svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
2130      fulltext_cache_key.revision = revision;
2131      fulltext_cache_key.second = rep->id.number;
2132
2133      /* Initialize the reader baton.  Some members may added lazily
2134       * while reading from the stream */
2135      SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key,
2136                                 result_pool));
2137
2138      /* Make the stream attempt fulltext cache lookups if the fulltext
2139       * is cacheable.  If it is not, then also don't try to buffer and
2140       * cache it. */
2141      if (   cache_fulltext
2142          && SVN_IS_VALID_REVNUM(revision)
2143          && fulltext_size_is_cachable(ffd, len))
2144        {
2145          rb->fulltext_cache = ffd->fulltext_cache;
2146        }
2147      else
2148        {
2149          /* This will also prevent the reconstructed fulltext from being
2150             put into the cache. */
2151          rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2152        }
2153
2154      *contents_p = svn_stream_create(rb, result_pool);
2155      svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2156                           rep_read_contents);
2157      svn_stream_set_close(*contents_p, rep_read_contents_close);
2158    }
2159
2160  return SVN_NO_ERROR;
2161}
2162
2163svn_error_t *
2164svn_fs_x__get_contents_from_file(svn_stream_t **contents_p,
2165                                 svn_fs_t *fs,
2166                                 svn_fs_x__representation_t *rep,
2167                                 apr_file_t *file,
2168                                 apr_off_t offset,
2169                                 apr_pool_t *pool)
2170{
2171  rep_read_baton_t *rb;
2172  svn_fs_x__pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
2173  rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
2174  svn_fs_x__rep_header_t *rh;
2175  svn_stream_t *stream;
2176
2177  /* Initialize the reader baton.  Some members may added lazily
2178   * while reading from the stream. */
2179  SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2180
2181  /* Continue constructing RS. Leave caches as NULL. */
2182  rs->size = rep->size;
2183  rs->rep_id = rep->id;
2184  rs->ver = -1;
2185  rs->start = -1;
2186
2187  /* Provide just enough file access info to allow for a basic read from
2188   * FILE but leave all index / footer info with empty values b/c FILE
2189   * probably is not a complete revision file. */
2190  rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
2191  rs->sfile->revision = SVN_INVALID_REVNUM;
2192  rs->sfile->pool = pool;
2193  rs->sfile->fs = fs;
2194  SVN_ERR(svn_fs_x__rev_file_wrap_temp(&rs->sfile->rfile, fs, file, pool));
2195
2196  /* Read the rep header. */
2197  SVN_ERR(svn_fs_x__rev_file_seek(rs->sfile->rfile, NULL, offset));
2198  SVN_ERR(svn_fs_x__rev_file_stream(&stream, rs->sfile->rfile));
2199  SVN_ERR(svn_fs_x__read_rep_header(&rh, stream, pool, pool));
2200  SVN_ERR(svn_fs_x__rev_file_offset(&rs->start, rs->sfile->rfile));
2201  rs->header_size = rh->header_size;
2202
2203  /* Log the access. */
2204  SVN_ERR(dbg__log_access(fs, &rep->id, rh,
2205                          SVN_FS_X__ITEM_TYPE_ANY_REP, pool));
2206
2207  /* Build the representation list (delta chain). */
2208  if (rh->type == svn_fs_x__rep_self_delta)
2209    {
2210      rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
2211      APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
2212      rb->src_state = NULL;
2213    }
2214  else
2215    {
2216      svn_fs_x__representation_t next_rep = { 0 };
2217
2218      /* skip "SVNx" diff marker */
2219      rs->current = 4;
2220
2221      /* REP's base rep is inside a proper revision.
2222       * It can be reconstructed in the usual way.  */
2223      next_rep.id.change_set = svn_fs_x__change_set_by_rev(rh->base_revision);
2224      next_rep.id.number = rh->base_item_index;
2225      next_rep.size = rh->base_length;
2226
2227      SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2228                             &rb->src_state, rb->fs, &next_rep,
2229                             rb->filehandle_pool, rb->scratch_pool));
2230
2231      /* Insert the access to REP as the first element of the delta chain. */
2232      SVN_ERR(svn_sort__array_insert2(rb->rs_list, &rs, 0));
2233    }
2234
2235  /* Now, the baton is complete and we can assemble the stream around it. */
2236  *contents_p = svn_stream_create(rb, pool);
2237  svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2238                       rep_read_contents);
2239  svn_stream_set_close(*contents_p, rep_read_contents_close);
2240
2241  return SVN_NO_ERROR;
2242}
2243
2244/* Baton for cache_access_wrapper. Wraps the original parameters of
2245 * svn_fs_x__try_process_file_content().
2246 */
2247typedef struct cache_access_wrapper_baton_t
2248{
2249  svn_fs_process_contents_func_t func;
2250  void* baton;
2251} cache_access_wrapper_baton_t;
2252
2253/* Wrapper to translate between svn_fs_process_contents_func_t and
2254 * svn_cache__partial_getter_func_t.
2255 */
2256static svn_error_t *
2257cache_access_wrapper(void **out,
2258                     const void *data,
2259                     apr_size_t data_len,
2260                     void *baton,
2261                     apr_pool_t *pool)
2262{
2263  cache_access_wrapper_baton_t *wrapper_baton = baton;
2264
2265  SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2266                              data_len - 1, /* cache adds terminating 0 */
2267                              wrapper_baton->baton,
2268                              pool));
2269
2270  /* non-NULL value to signal the calling cache that all went well */
2271  *out = baton;
2272
2273  return SVN_NO_ERROR;
2274}
2275
2276svn_error_t *
2277svn_fs_x__try_process_file_contents(svn_boolean_t *success,
2278                                    svn_fs_t *fs,
2279                                    svn_fs_x__noderev_t *noderev,
2280                                    svn_fs_process_contents_func_t processor,
2281                                    void* baton,
2282                                    apr_pool_t *scratch_pool)
2283{
2284  svn_fs_x__representation_t *rep = noderev->data_rep;
2285  if (rep)
2286    {
2287      svn_fs_x__data_t *ffd = fs->fsap_data;
2288      svn_fs_x__pair_cache_key_t fulltext_cache_key = { 0 };
2289
2290      fulltext_cache_key.revision = svn_fs_x__get_revnum(rep->id.change_set);
2291      fulltext_cache_key.second = rep->id.number;
2292      if (   SVN_IS_VALID_REVNUM(fulltext_cache_key.revision)
2293          && fulltext_size_is_cachable(ffd, rep->expanded_size))
2294        {
2295          cache_access_wrapper_baton_t wrapper_baton;
2296          void *dummy = NULL;
2297
2298          wrapper_baton.func = processor;
2299          wrapper_baton.baton = baton;
2300          return svn_cache__get_partial(&dummy, success,
2301                                        ffd->fulltext_cache,
2302                                        &fulltext_cache_key,
2303                                        cache_access_wrapper,
2304                                        &wrapper_baton,
2305                                        scratch_pool);
2306        }
2307    }
2308
2309  *success = FALSE;
2310  return SVN_NO_ERROR;
2311}
2312
2313/* Baton used when reading delta windows. */
2314typedef struct delta_read_baton_t
2315{
2316  struct rep_state_t *rs;
2317  unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2318} delta_read_baton_t;
2319
2320/* This implements the svn_txdelta_next_window_fn_t interface. */
2321static svn_error_t *
2322delta_read_next_window(svn_txdelta_window_t **window,
2323                       void *baton,
2324                       apr_pool_t *pool)
2325{
2326  delta_read_baton_t *drb = baton;
2327  apr_pool_t *scratch_pool = svn_pool_create(pool);
2328
2329  *window = NULL;
2330  if (drb->rs->current < drb->rs->size)
2331    {
2332      SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2333                                scratch_pool));
2334      drb->rs->chunk_index++;
2335    }
2336
2337  svn_pool_destroy(scratch_pool);
2338
2339  return SVN_NO_ERROR;
2340}
2341
2342/* This implements the svn_txdelta_md5_digest_fn_t interface. */
2343static const unsigned char *
2344delta_read_md5_digest(void *baton)
2345{
2346  delta_read_baton_t *drb = baton;
2347  return drb->md5_digest;
2348}
2349
2350/* Return a txdelta stream for on-disk representation REP_STATE
2351 * of TARGET.  Allocate the result in RESULT_POOL.
2352 */
2353static svn_txdelta_stream_t *
2354get_storaged_delta_stream(rep_state_t *rep_state,
2355                          svn_fs_x__noderev_t *target,
2356                          apr_pool_t *result_pool)
2357{
2358  /* Create the delta read baton. */
2359  delta_read_baton_t *drb = apr_pcalloc(result_pool, sizeof(*drb));
2360  drb->rs = rep_state;
2361  memcpy(drb->md5_digest, target->data_rep->md5_digest,
2362         sizeof(drb->md5_digest));
2363  return svn_txdelta_stream_create(drb, delta_read_next_window,
2364                                   delta_read_md5_digest, result_pool);
2365}
2366
2367svn_error_t *
2368svn_fs_x__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2369                                svn_fs_t *fs,
2370                                svn_fs_x__noderev_t *source,
2371                                svn_fs_x__noderev_t *target,
2372                                apr_pool_t *result_pool,
2373                                apr_pool_t *scratch_pool)
2374{
2375  svn_stream_t *source_stream, *target_stream;
2376  rep_state_t *rep_state;
2377  svn_fs_x__rep_header_t *rep_header;
2378
2379  /* Try a shortcut: if the target is stored as a delta against the source,
2380     then just use that delta.  However, prefer using the fulltext cache
2381     whenever that is available. */
2382  if (target->data_rep && source)
2383    {
2384      /* Read target's base rep if any. */
2385      SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2386                               target->data_rep, fs, result_pool,
2387                               scratch_pool));
2388
2389      /* Try a shortcut: if the target is stored as a delta against the source,
2390         then just use that delta. */
2391      if (source && source->data_rep && target->data_rep)
2392        {
2393          /* If that matches source, then use this delta as is.
2394             Note that we want an actual delta here.  E.g. a self-delta would
2395             not be good enough. */
2396          if (rep_header->type == svn_fs_x__rep_delta
2397              && rep_header->base_revision
2398                 == svn_fs_x__get_revnum(source->data_rep->id.change_set)
2399              && rep_header->base_item_index == source->data_rep->id.number)
2400            {
2401              *stream_p = get_storaged_delta_stream(rep_state, target,
2402                                                    result_pool);
2403              return SVN_NO_ERROR;
2404            }
2405        }
2406      else if (!source)
2407        {
2408          /* We want a self-delta. There is a fair chance that TARGET got
2409             added in this revision and is already stored in the requested
2410             format. */
2411          if (rep_header->type == svn_fs_x__rep_self_delta)
2412            {
2413              *stream_p = get_storaged_delta_stream(rep_state, target,
2414                                                    result_pool);
2415              return SVN_NO_ERROR;
2416            }
2417        }
2418
2419      /* Don't keep file handles open for longer than necessary. */
2420      if (rep_state->sfile->rfile)
2421        {
2422          SVN_ERR(svn_fs_x__close_revision_file(rep_state->sfile->rfile));
2423          rep_state->sfile->rfile = NULL;
2424        }
2425    }
2426
2427  /* Read both fulltexts and construct a delta. */
2428  if (source)
2429    SVN_ERR(svn_fs_x__get_contents(&source_stream, fs, source->data_rep,
2430                                   TRUE, result_pool));
2431  else
2432    source_stream = svn_stream_empty(result_pool);
2433
2434  SVN_ERR(svn_fs_x__get_contents(&target_stream, fs, target->data_rep,
2435                                 TRUE, result_pool));
2436
2437  /* Because source and target stream will already verify their content,
2438   * there is no need to do this once more.  In particular if the stream
2439   * content is being fetched from cache. */
2440  svn_txdelta2(stream_p, source_stream, target_stream, FALSE, result_pool);
2441
2442  return SVN_NO_ERROR;
2443}
2444
2445/* Return TRUE when all svn_fs_x__dirent_t* in ENTRIES are already sorted
2446   by their respective name. */
2447static svn_boolean_t
2448sorted(apr_array_header_t *entries)
2449{
2450  int i;
2451
2452  const svn_fs_x__dirent_t * const *dirents = (const void *)entries->elts;
2453  for (i = 0; i < entries->nelts-1; ++i)
2454    if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2455      return FALSE;
2456
2457  return TRUE;
2458}
2459
2460/* Compare the names of the two dirents given in **A and **B. */
2461static int
2462compare_dirents(const void *a,
2463                const void *b)
2464{
2465  const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
2466  const svn_fs_x__dirent_t *rhs = *((const svn_fs_x__dirent_t * const *) b);
2467
2468  return strcmp(lhs->name, rhs->name);
2469}
2470
2471/* Compare the name of the dirents given in **A with the C string in *B. */
2472static int
2473compare_dirent_name(const void *a,
2474                    const void *b)
2475{
2476  const svn_fs_x__dirent_t *lhs = *((const svn_fs_x__dirent_t * const *) a);
2477  const char *rhs = b;
2478
2479  return strcmp(lhs->name, rhs);
2480}
2481
2482/* Into ENTRIES, parse all directories entries from the serialized form in
2483 * DATA.  If INCREMENTAL is TRUE, read until the end of the STREAM and
2484 * update the data.  ID is provided for nicer error messages.
2485 *
2486 * The contents of DATA will be shared with the items in ENTRIES, i.e. it
2487 * must not be modified afterwards and must remain valid as long as ENTRIES
2488 * is valid.  Use SCRATCH_POOL for temporary allocations.
2489 */
2490static svn_error_t *
2491parse_dir_entries(apr_array_header_t **entries_p,
2492                  const svn_stringbuf_t *data,
2493                  svn_boolean_t incremental,
2494                  const svn_fs_x__id_t *id,
2495                  apr_pool_t *result_pool,
2496                  apr_pool_t *scratch_pool)
2497{
2498  const apr_byte_t *p = (const apr_byte_t *)data->data;
2499  const apr_byte_t *end = p + data->len;
2500  apr_uint64_t count;
2501  apr_hash_t *hash = incremental ? svn_hash__make(scratch_pool) : NULL;
2502  apr_array_header_t *entries;
2503
2504  /* Construct the resulting container. */
2505  p = svn__decode_uint(&count, p, end);
2506  if (count > INT_MAX)
2507    return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2508                             _("Directory for '%s' is too large"),
2509                             svn_fs_x__id_unparse(id, scratch_pool)->data);
2510
2511  entries = apr_array_make(result_pool, (int)count,
2512                           sizeof(svn_fs_x__dirent_t *));
2513
2514  while (p != end)
2515    {
2516      apr_size_t len;
2517      svn_fs_x__dirent_t *dirent;
2518      dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2519
2520      /* The part of the serialized entry that is not the name will be
2521       * about 6 bytes or less.  Since APR allocates with an 8 byte
2522       * alignment (4 bytes loss on average per string), simply using
2523       * the name string in DATA already gives us near-optimal memory
2524       * usage. */
2525      dirent->name = (const char *)p;
2526      len = strlen(dirent->name);
2527      p += len + 1;
2528      if (p == end)
2529        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2530                            _("Directory entry missing kind in '%s'"),
2531                            svn_fs_x__id_unparse(id, scratch_pool)->data);
2532
2533      dirent->kind = (svn_node_kind_t)*(p++);
2534      if (p == end)
2535        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2536                            _("Directory entry missing change set in '%s'"),
2537                            svn_fs_x__id_unparse(id, scratch_pool)->data);
2538
2539      p = svn__decode_int(&dirent->id.change_set, p, end);
2540      if (p == end)
2541        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2542                            _("Directory entry missing item number in '%s'"),
2543                            svn_fs_x__id_unparse(id, scratch_pool)->data);
2544
2545      p = svn__decode_uint(&dirent->id.number, p, end);
2546
2547      /* In incremental mode, update the hash; otherwise, write to the
2548       * final array. */
2549      if (incremental)
2550        {
2551          /* Insertion / update or a deletion? */
2552          if (svn_fs_x__id_used(&dirent->id))
2553            apr_hash_set(hash, dirent->name, len, dirent);
2554          else
2555            apr_hash_set(hash, dirent->name, len, NULL);
2556        }
2557      else
2558        {
2559          APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = dirent;
2560        }
2561    }
2562
2563  if (incremental)
2564    {
2565      /* Convert container into a sorted array. */
2566      apr_hash_index_t *hi;
2567      for (hi = apr_hash_first(scratch_pool, hash); hi; hi = apr_hash_next(hi))
2568        APR_ARRAY_PUSH(entries, svn_fs_x__dirent_t *) = apr_hash_this_val(hi);
2569
2570      if (!sorted(entries))
2571        svn_sort__array(entries, compare_dirents);
2572    }
2573  else
2574    {
2575      /* Check that we read the expected amount of entries. */
2576      if ((apr_uint64_t)entries->nelts != count)
2577        return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2578                            _("Directory length mismatch in '%s'"),
2579                            svn_fs_x__id_unparse(id, scratch_pool)->data);
2580    }
2581
2582 *entries_p = entries;
2583
2584  return SVN_NO_ERROR;
2585}
2586
2587/* For directory NODEREV in FS, return the *FILESIZE of its in-txn
2588 * representation.  If the directory representation is comitted data,
2589 * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
2590 */
2591static svn_error_t *
2592get_txn_dir_info(svn_filesize_t *filesize,
2593                 svn_fs_t *fs,
2594                 svn_fs_x__noderev_t *noderev,
2595                 apr_pool_t *scratch_pool)
2596{
2597  if (noderev->data_rep
2598      && ! svn_fs_x__is_revision(noderev->data_rep->id.change_set))
2599    {
2600      const svn_io_dirent2_t *dirent;
2601      const char *filename;
2602
2603      filename = svn_fs_x__path_txn_node_children(fs, &noderev->noderev_id,
2604                                                  scratch_pool, scratch_pool);
2605
2606      SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
2607                                  scratch_pool, scratch_pool));
2608      *filesize = dirent->filesize;
2609    }
2610  else
2611    {
2612      *filesize = SVN_INVALID_FILESIZE;
2613    }
2614
2615  return SVN_NO_ERROR;
2616}
2617
2618/* Fetch the contents of a directory into DIR.  Values are stored
2619   as filename to string mappings; further conversion is necessary to
2620   convert them into svn_fs_x__dirent_t values. */
2621static svn_error_t *
2622get_dir_contents(svn_fs_x__dir_data_t *dir,
2623                 svn_fs_t *fs,
2624                 svn_fs_x__noderev_t *noderev,
2625                 apr_pool_t *result_pool,
2626                 apr_pool_t *scratch_pool)
2627{
2628  svn_stream_t *contents;
2629  const svn_fs_x__id_t *id = &noderev->noderev_id;
2630  apr_size_t len;
2631  svn_stringbuf_t *text;
2632  svn_boolean_t incremental;
2633
2634  /* Initialize the result. */
2635  dir->txn_filesize = SVN_INVALID_FILESIZE;
2636
2637  /* Read dir contents - unless there is none in which case we are done. */
2638  if (noderev->data_rep
2639      && ! svn_fs_x__is_revision(noderev->data_rep->id.change_set))
2640    {
2641      /* Get location & current size of the directory representation. */
2642      const char *filename;
2643      apr_file_t *file;
2644
2645      filename = svn_fs_x__path_txn_node_children(fs, id, scratch_pool,
2646                                                  scratch_pool);
2647
2648      /* The representation is mutable.  Read the old directory
2649         contents from the mutable children file, followed by the
2650         changes we've made in this transaction. */
2651      SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
2652                               APR_OS_DEFAULT, scratch_pool));
2653
2654      /* Obtain txn children file size. */
2655      SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
2656      len = (apr_size_t)dir->txn_filesize;
2657
2658      /* Finally, provide stream access to FILE. */
2659      contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
2660      incremental = TRUE;
2661    }
2662  else if (noderev->data_rep)
2663    {
2664      /* The representation is immutable.  Read it normally. */
2665      len = noderev->data_rep->expanded_size;
2666      SVN_ERR(svn_fs_x__get_contents(&contents, fs, noderev->data_rep,
2667                                     FALSE, scratch_pool));
2668      incremental = FALSE;
2669    }
2670  else
2671    {
2672      /* Empty representation == empty directory. */
2673      dir->entries = apr_array_make(result_pool, 0,
2674                                    sizeof(svn_fs_x__dirent_t *));
2675      return SVN_NO_ERROR;
2676    }
2677
2678  /* Read the whole stream contents into a single buffer.
2679   * Due to our LEN hint, no allocation overhead occurs.
2680   *
2681   * Also, a large portion of TEXT will be file / dir names which we
2682   * directly reference from DIR->ENTRIES instead of copying them.
2683   * Hence, we need to use the RESULT_POOL here. */
2684  SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, result_pool));
2685  SVN_ERR(svn_stream_close(contents));
2686
2687  /* de-serialize hash */
2688  SVN_ERR(parse_dir_entries(&dir->entries, text, incremental, id,
2689                            result_pool, scratch_pool));
2690
2691  return SVN_NO_ERROR;
2692}
2693
2694
2695/* Return the cache object in FS responsible to storing the directory the
2696 * NODEREV plus the corresponding pre-allocated *KEY.
2697 */
2698static svn_cache__t *
2699locate_dir_cache(svn_fs_t *fs,
2700                 svn_fs_x__id_t *key,
2701                 svn_fs_x__noderev_t *noderev)
2702{
2703  svn_fs_x__data_t *ffd = fs->fsap_data;
2704
2705  if (!noderev->data_rep)
2706    {
2707      /* no data rep -> empty directory.
2708         Use a key that does definitely not clash with non-NULL reps. */
2709      key->change_set = SVN_FS_X__INVALID_CHANGE_SET;
2710      key->number = SVN_FS_X__ITEM_INDEX_UNUSED;
2711    }
2712  else if (svn_fs_x__is_txn(noderev->noderev_id.change_set))
2713    {
2714      /* data in txns must be addressed by noderev ID since the
2715         representation has not been created, yet. */
2716      *key = noderev->noderev_id;
2717    }
2718  else
2719    {
2720      /* committed data can use simple rev,item pairs */
2721      *key = noderev->data_rep->id;
2722    }
2723
2724  return ffd->dir_cache;
2725}
2726
2727svn_error_t *
2728svn_fs_x__rep_contents_dir(apr_array_header_t **entries_p,
2729                           svn_fs_t *fs,
2730                           svn_fs_x__noderev_t *noderev,
2731                           apr_pool_t *result_pool,
2732                           apr_pool_t *scratch_pool)
2733{
2734  svn_fs_x__id_t key;
2735  svn_fs_x__dir_data_t *dir;
2736
2737  /* find the cache we may use */
2738  svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
2739  svn_boolean_t found;
2740
2741  SVN_ERR(svn_cache__get((void **)&dir, &found, cache, &key, result_pool));
2742  if (found)
2743    {
2744      /* Verify that the cached dir info is not stale
2745       * (no-op for committed data). */
2746      svn_filesize_t filesize;
2747      SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2748
2749      if (filesize == dir->txn_filesize)
2750        {
2751          /* Still valid. Done. */
2752          *entries_p = dir->entries;
2753          return SVN_NO_ERROR;
2754        }
2755    }
2756
2757  /* Read in the directory contents. */
2758  dir = apr_pcalloc(scratch_pool, sizeof(*dir));
2759  SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
2760  *entries_p = dir->entries;
2761
2762  /* Update the cache, if we are to use one.
2763   *
2764   * Don't even attempt to serialize very large directories; it would cause
2765   * an unnecessary memory allocation peak.  100 bytes/entry is about right.
2766   */
2767  if (svn_cache__is_cachable(cache, 100 * dir->entries->nelts))
2768    SVN_ERR(svn_cache__set(cache, &key, dir, scratch_pool));
2769
2770  return SVN_NO_ERROR;
2771}
2772
2773svn_fs_x__dirent_t *
2774svn_fs_x__find_dir_entry(apr_array_header_t *entries,
2775                         const char *name,
2776                         int *hint)
2777{
2778  svn_fs_x__dirent_t **result
2779    = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2780  return result ? *result : NULL;
2781}
2782
2783svn_error_t *
2784svn_fs_x__rep_contents_dir_entry(svn_fs_x__dirent_t **dirent,
2785                                 svn_fs_t *fs,
2786                                 svn_fs_x__noderev_t *noderev,
2787                                 const char *name,
2788                                 apr_size_t *hint,
2789                                 apr_pool_t *result_pool,
2790                                 apr_pool_t *scratch_pool)
2791{
2792  svn_boolean_t found = FALSE;
2793
2794  /* find the cache we may use */
2795  svn_fs_x__id_t key;
2796  svn_cache__t *cache = locate_dir_cache(fs, &key, noderev);
2797  svn_fs_x__ede_baton_t baton;
2798
2799  svn_filesize_t filesize;
2800  SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2801
2802   /* Cache lookup. */
2803  baton.hint = *hint;
2804  baton.name = name;
2805  baton.txn_filesize = filesize;
2806
2807  SVN_ERR(svn_cache__get_partial((void **)dirent,
2808                                 &found,
2809                                 cache,
2810                                 &key,
2811                                 svn_fs_x__extract_dir_entry,
2812                                 &baton,
2813                                 result_pool));
2814
2815  /* Remember the new clue only if we found something at that spot. */
2816  if (found)
2817    *hint = baton.hint;
2818
2819  /* fetch data from disk if we did not find it in the cache */
2820  if (! found || baton.out_of_date)
2821    {
2822      svn_fs_x__dirent_t *entry;
2823      svn_fs_x__dirent_t *entry_copy = NULL;
2824      svn_fs_x__dir_data_t dir;
2825
2826      /* Read in the directory contents. */
2827      SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
2828                               scratch_pool));
2829
2830      /* Update the cache, if we are to use one.
2831       *
2832       * Don't even attempt to serialize very large directories; it would
2833       * cause an unnecessary memory allocation peak.  150 bytes / entry is
2834       * about right. */
2835      if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
2836        SVN_ERR(svn_cache__set(cache, &key, &dir, scratch_pool));
2837
2838      /* find desired entry and return a copy in POOL, if found */
2839      entry = svn_fs_x__find_dir_entry(dir.entries, name, NULL);
2840      if (entry)
2841        {
2842          entry_copy = apr_pmemdup(result_pool, entry, sizeof(*entry_copy));
2843          entry_copy->name = apr_pstrdup(result_pool, entry->name);
2844        }
2845
2846      *dirent = entry_copy;
2847    }
2848
2849  return SVN_NO_ERROR;
2850}
2851
2852svn_error_t *
2853svn_fs_x__get_proplist(apr_hash_t **proplist,
2854                       svn_fs_t *fs,
2855                       svn_fs_x__noderev_t *noderev,
2856                       apr_pool_t *result_pool,
2857                       apr_pool_t *scratch_pool)
2858{
2859  svn_stream_t *stream;
2860  const svn_fs_x__id_t *noderev_id = &noderev->noderev_id;
2861
2862  if (noderev->prop_rep
2863      && !svn_fs_x__is_revision(noderev->prop_rep->id.change_set))
2864    {
2865      svn_stringbuf_t *content;
2866      svn_string_t *as_string;
2867      const char *filename = svn_fs_x__path_txn_node_props(fs, noderev_id,
2868                                                           scratch_pool,
2869                                                           scratch_pool);
2870      SVN_ERR(svn_stringbuf_from_file2(&content, filename, result_pool));
2871
2872      as_string = svn_stringbuf__morph_into_string(content);
2873      SVN_ERR_W(svn_fs_x__parse_properties(proplist, as_string, result_pool),
2874                apr_psprintf(scratch_pool,
2875                    "malformed property list for node-revision '%s' in '%s'",
2876                    svn_fs_x__id_unparse(&noderev->noderev_id,
2877                                         scratch_pool)->data,
2878                    filename));
2879    }
2880  else if (noderev->prop_rep)
2881    {
2882      svn_fs_x__data_t *ffd = fs->fsap_data;
2883      svn_fs_x__representation_t *rep = noderev->prop_rep;
2884      svn_fs_x__pair_cache_key_t key = { 0 };
2885      svn_string_t *content;
2886      svn_boolean_t is_cached;
2887
2888      key.revision = svn_fs_x__get_revnum(rep->id.change_set);
2889      key.second = rep->id.number;
2890      SVN_ERR(svn_cache__get((void **) proplist, &is_cached,
2891                             ffd->properties_cache, &key, result_pool));
2892      if (is_cached)
2893        return SVN_NO_ERROR;
2894
2895      SVN_ERR(svn_fs_x__get_contents(&stream, fs, rep, FALSE, scratch_pool));
2896      SVN_ERR(svn_string_from_stream2(&content, stream, rep->expanded_size,
2897                                      result_pool));
2898
2899      SVN_ERR_W(svn_fs_x__parse_properties(proplist, content, result_pool),
2900                apr_psprintf(scratch_pool,
2901                    "malformed property list for node-revision '%s'",
2902                    svn_fs_x__id_unparse(&noderev->noderev_id,
2903                                         scratch_pool)->data));
2904
2905      SVN_ERR(svn_cache__set(ffd->properties_cache, &key, *proplist,
2906                             scratch_pool));
2907    }
2908  else
2909    {
2910      /* return an empty prop list if the node doesn't have any props */
2911      *proplist = apr_hash_make(result_pool);
2912    }
2913
2914  return SVN_NO_ERROR;
2915}
2916
2917svn_error_t *
2918svn_fs_x__create_changes_context(svn_fs_x__changes_context_t **context,
2919                                 svn_fs_t *fs,
2920                                 svn_revnum_t rev,
2921                                 apr_pool_t *result_pool,
2922                                 apr_pool_t *scratch_pool)
2923{
2924  svn_fs_x__changes_context_t *result = apr_pcalloc(result_pool,
2925                                                    sizeof(*result));
2926  result->fs = fs;
2927  result->revision = rev;
2928
2929  SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool));
2930  SVN_ERR(svn_fs_x__rev_file_init(&result->revision_file, fs, rev,
2931                                  result_pool));
2932
2933  *context = result;
2934  return SVN_NO_ERROR;
2935}
2936
2937svn_error_t *
2938svn_fs_x__get_changes(apr_array_header_t **changes,
2939                      svn_fs_x__changes_context_t *context,
2940                      apr_pool_t *result_pool,
2941                      apr_pool_t *scratch_pool)
2942{
2943  svn_boolean_t found;
2944  svn_fs_x__data_t *ffd = context->fs->fsap_data;
2945
2946  svn_fs_x__id_t id;
2947  id.change_set = svn_fs_x__change_set_by_rev(context->revision);
2948  id.number = SVN_FS_X__ITEM_INDEX_CHANGES;
2949
2950  /* try cache lookup first */
2951
2952  if (svn_fs_x__is_packed_rev(context->fs, context->revision))
2953    {
2954      apr_off_t offset;
2955      svn_fs_x__pair_cache_key_t key;
2956      svn_fs_x__changes_get_list_baton_t baton;
2957      baton.start = (int)context->next;
2958      baton.eol = &context->eol;
2959
2960      SVN_ERR(svn_fs_x__item_offset(&offset, &baton.sub_item, context->fs,
2961                                    context->revision_file,
2962                                    &id, scratch_pool));
2963      key.revision = svn_fs_x__packed_base_rev(context->fs,
2964                                               context->revision);
2965      key.second = offset;
2966
2967      SVN_ERR(svn_cache__get_partial((void **)changes, &found,
2968                                     ffd->changes_container_cache, &key,
2969                                     svn_fs_x__changes_get_list_func,
2970                                     &baton, result_pool));
2971    }
2972  else
2973    {
2974      svn_fs_x__changes_list_t *changes_list;
2975      svn_fs_x__pair_cache_key_t key;
2976      key.revision = context->revision;
2977      key.second = context->next;
2978
2979      SVN_ERR(svn_cache__get((void **)&changes_list, &found,
2980                             ffd->changes_cache, &key, result_pool));
2981
2982      if (found)
2983        {
2984          /* Where to look next - if there is more data. */
2985          context->eol = changes_list->eol;
2986          context->next_offset = changes_list->end_offset;
2987
2988          /* Return the block as a "proper" APR array. */
2989          (*changes) = apr_array_make(result_pool, 0, sizeof(void *));
2990          (*changes)->elts = (char *)changes_list->changes;
2991          (*changes)->nelts = changes_list->count;
2992          (*changes)->nalloc = changes_list->count;
2993        }
2994    }
2995
2996  if (!found)
2997    {
2998      /* 'block-read' will also provide us with the desired data */
2999      SVN_ERR(block_read((void **)changes, context->fs, &id,
3000                         context->revision_file, context,
3001                         result_pool, scratch_pool));
3002    }
3003
3004  context->next += (*changes)->nelts;
3005
3006  SVN_ERR(dbg__log_access(context->fs, &id, *changes,
3007                          SVN_FS_X__ITEM_TYPE_CHANGES, scratch_pool));
3008
3009  return SVN_NO_ERROR;
3010}
3011
3012/* Fetch the representation data (header, txdelta / plain windows)
3013 * addressed by ENTRY->ITEM in FS and cache it under KEY.  Read the data
3014 * from REV_FILE.  If MAX_OFFSET is not -1, don't read windows that start
3015 * at or beyond that offset.  Use SCRATCH_POOL for temporary allocations.
3016 */
3017static svn_error_t *
3018block_read_contents(svn_fs_t *fs,
3019                    svn_fs_x__revision_file_t *rev_file,
3020                    svn_fs_x__p2l_entry_t* entry,
3021                    svn_fs_x__pair_cache_key_t *key,
3022                    apr_off_t max_offset,
3023                    apr_pool_t *scratch_pool)
3024{
3025  svn_fs_x__representation_cache_key_t header_key = { 0 };
3026  rep_state_t rs = { 0 };
3027  svn_filesize_t fulltext_len;
3028  svn_fs_x__rep_header_t *rep_header;
3029
3030  header_key.revision = (apr_int32_t)key->revision;
3031  header_key.is_packed = svn_fs_x__is_packed_rev(fs, header_key.revision);
3032  header_key.item_index = key->second;
3033
3034  SVN_ERR(read_rep_header(&rep_header, fs, rev_file, &header_key,
3035                          scratch_pool));
3036  SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry, scratch_pool));
3037  SVN_ERR(cache_windows(&fulltext_len, fs, &rs, max_offset, scratch_pool));
3038
3039  return SVN_NO_ERROR;
3040}
3041
3042/* For the given REV_FILE in FS, in *STREAM return a stream covering the
3043 * item specified by ENTRY.  Also, verify the item's content by low-level
3044 * checksum.  Allocate the result in RESULT_POOL.
3045 */
3046static svn_error_t *
3047read_item(svn_stream_t **stream,
3048          svn_fs_t *fs,
3049          svn_fs_x__revision_file_t *rev_file,
3050          svn_fs_x__p2l_entry_t* entry,
3051          apr_pool_t *result_pool)
3052{
3053  apr_uint32_t digest;
3054  svn_checksum_t *expected, *actual;
3055  apr_uint32_t plain_digest;
3056  svn_stringbuf_t *text;
3057
3058  /* Read item into string buffer. */
3059  text = svn_stringbuf_create_ensure(entry->size, result_pool);
3060  text->len = entry->size;
3061  text->data[text->len] = 0;
3062  SVN_ERR(svn_fs_x__rev_file_read(rev_file, text->data, text->len));
3063
3064  /* Return (construct, calculate) stream and checksum. */
3065  *stream = svn_stream_from_stringbuf(text, result_pool);
3066  digest = svn__fnv1a_32x4(text->data, text->len);
3067
3068  /* Checksums will match most of the time. */
3069  if (entry->fnv1_checksum == digest)
3070    return SVN_NO_ERROR;
3071
3072  /* Construct proper checksum objects from their digests to allow for
3073   * nice error messages. */
3074  plain_digest = htonl(entry->fnv1_checksum);
3075  expected = svn_checksum__from_digest_fnv1a_32x4(
3076                (const unsigned char *)&plain_digest, result_pool);
3077  plain_digest = htonl(digest);
3078  actual = svn_checksum__from_digest_fnv1a_32x4(
3079                (const unsigned char *)&plain_digest, result_pool);
3080
3081  /* Construct the full error message with all the info we have. */
3082  return svn_checksum_mismatch_err(expected, actual, result_pool,
3083                 _("Low-level checksum mismatch while reading\n"
3084                   "%s bytes of meta data at offset %s "),
3085                 apr_off_t_toa(result_pool, entry->size),
3086                 apr_off_t_toa(result_pool, entry->offset));
3087}
3088
3089/* If not already cached or if MUST_READ is set, read the changed paths
3090 * list addressed by ENTRY in FS and ret��rn it in *CHANGES.  Cache the
3091 * result if caching is enabled.  Read the data from REV_FILE.  Trim the
3092 * data in *CHANGES to the range given by CONTEXT.  Allocate *CHANGES in
3093 * RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
3094 */
3095static svn_error_t *
3096block_read_changes(apr_array_header_t **changes,
3097                   svn_fs_t *fs,
3098                   svn_fs_x__revision_file_t *rev_file,
3099                   svn_fs_x__p2l_entry_t* entry,
3100                   svn_fs_x__changes_context_t *context,
3101                   svn_boolean_t must_read,
3102                   apr_pool_t *result_pool,
3103                   apr_pool_t *scratch_pool)
3104{
3105  svn_fs_x__data_t *ffd = fs->fsap_data;
3106  svn_stream_t *stream;
3107  svn_fs_x__pair_cache_key_t key;
3108  svn_fs_x__changes_list_t changes_list;
3109
3110  /* If we don't have to return any data, just read and cache the first
3111     block.  This means we won't cache the remaining blocks from longer
3112     lists right away but only if they are actually needed. */
3113  apr_size_t next = must_read ? context->next : 0;
3114  apr_size_t next_offset = must_read ? context->next_offset : 0;
3115
3116  /* we don't support containers, yet */
3117  SVN_ERR_ASSERT(entry->item_count == 1);
3118
3119  /* The item to read / write. */
3120  key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3121  key.second = next;
3122
3123  /* already in cache? */
3124  if (!must_read)
3125    {
3126      svn_boolean_t is_cached = FALSE;
3127      SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
3128                                 scratch_pool));
3129      if (is_cached)
3130        return SVN_NO_ERROR;
3131    }
3132
3133  /* Verify the whole list only once.  We don't use the STREAM any further. */
3134  if (!must_read || next == 0)
3135    SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3136
3137  /* Seek to the block to read within the changes list. */
3138  SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL,
3139                                  entry->offset + next_offset));
3140  SVN_ERR(svn_fs_x__rev_file_stream(&stream, rev_file));
3141
3142  /* read changes from revision file */
3143  SVN_ERR(svn_fs_x__read_changes(changes, stream, SVN_FS_X__CHANGES_BLOCK_SIZE,
3144                                 result_pool, scratch_pool));
3145
3146  SVN_ERR(svn_fs_x__rev_file_offset(&changes_list.end_offset, rev_file));
3147  changes_list.end_offset -= entry->offset;
3148  changes_list.start_offset = next_offset;
3149  changes_list.count = (*changes)->nelts;
3150  changes_list.changes = (svn_fs_x__change_t **)(*changes)->elts;
3151  changes_list.eol =    (changes_list.count < SVN_FS_X__CHANGES_BLOCK_SIZE)
3152                     || (changes_list.end_offset + 1 >= entry->size);
3153
3154  /* cache for future reference */
3155
3156  SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
3157                         scratch_pool));
3158
3159  /* Trim the result:
3160   * Remove the entries that already been reported. */
3161  if (must_read)
3162    {
3163      context->next_offset = changes_list.end_offset;
3164      context->eol = changes_list.eol;
3165    }
3166
3167  return SVN_NO_ERROR;
3168}
3169
3170/* If not already cached or if MUST_READ is set, read the changed paths
3171 * list container addressed by ENTRY in FS.  Return the changes list
3172 * identified by SUB_ITEM in *CHANGES, using CONTEXT to select a sub-range
3173 * within that list.  Read the data from REV_FILE and cache the result.
3174 *
3175 * Allocate *CHANGES in RESUSLT_POOL and everything else in SCRATCH_POOL.
3176 */
3177static svn_error_t *
3178block_read_changes_container(apr_array_header_t **changes,
3179                             svn_fs_t *fs,
3180                             svn_fs_x__revision_file_t *rev_file,
3181                             svn_fs_x__p2l_entry_t* entry,
3182                             apr_uint32_t sub_item,
3183                             svn_fs_x__changes_context_t *context,
3184                             svn_boolean_t must_read,
3185                             apr_pool_t *result_pool,
3186                             apr_pool_t *scratch_pool)
3187{
3188  svn_fs_x__data_t *ffd = fs->fsap_data;
3189  svn_fs_x__changes_t *container;
3190  svn_fs_x__pair_cache_key_t key;
3191  svn_stream_t *stream;
3192  svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3193
3194  key.revision = svn_fs_x__packed_base_rev(fs, revision);
3195  key.second = entry->offset;
3196
3197  /* already in cache? */
3198  if (!must_read)
3199    {
3200      svn_boolean_t is_cached = FALSE;
3201      SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_container_cache,
3202                                 &key, scratch_pool));
3203      if (is_cached)
3204        return SVN_NO_ERROR;
3205    }
3206
3207  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3208
3209  /* read changes from revision file */
3210
3211  SVN_ERR(svn_fs_x__read_changes_container(&container, stream, scratch_pool,
3212                                           scratch_pool));
3213
3214  /* extract requested data */
3215
3216  if (must_read)
3217    SVN_ERR(svn_fs_x__changes_get_list(changes, container, sub_item,
3218                                       context, result_pool));
3219  SVN_ERR(svn_cache__set(ffd->changes_container_cache, &key, container,
3220                         scratch_pool));
3221
3222  return SVN_NO_ERROR;
3223}
3224
3225/* If not already cached or if MUST_READ is set, read the node revision
3226 * addressed by ENTRY in FS and return it in *NODEREV_P.  Cache the
3227 * result under KEY if caching is enabled.  Read the data from REV_FILE.
3228 * Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in
3229 * SCRATCH_POOL.
3230 */
3231static svn_error_t *
3232block_read_noderev(svn_fs_x__noderev_t **noderev_p,
3233                   svn_fs_t *fs,
3234                   svn_fs_x__revision_file_t *rev_file,
3235                   svn_fs_x__p2l_entry_t* entry,
3236                   svn_fs_x__pair_cache_key_t *key,
3237                   svn_boolean_t must_read,
3238                   apr_pool_t *result_pool,
3239                   apr_pool_t *scratch_pool)
3240{
3241  svn_fs_x__data_t *ffd = fs->fsap_data;
3242  svn_stream_t *stream;
3243
3244  /* we don't support containers, yet */
3245  SVN_ERR_ASSERT(entry->item_count == 1);
3246
3247  /* already in cache? */
3248  if (!must_read)
3249    {
3250      svn_boolean_t is_cached = FALSE;
3251      SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache, key,
3252                                 scratch_pool));
3253      if (is_cached)
3254        return SVN_NO_ERROR;
3255    }
3256
3257  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3258
3259  /* read node rev from revision file */
3260
3261  SVN_ERR(svn_fs_x__read_noderev(noderev_p, stream, result_pool,
3262                                 scratch_pool));
3263  SVN_ERR(svn_cache__set(ffd->node_revision_cache, key, *noderev_p,
3264                         scratch_pool));
3265
3266  return SVN_NO_ERROR;
3267}
3268
3269/* If not already cached or if MUST_READ is set, read the node revision
3270 * container addressed by ENTRY in FS.  Return the item identified by
3271 * SUB_ITEM in *NODEREV_P.  Read the data from REV_FILE and cache it.
3272 * Allocate *NODEREV_P in RESUSLT_POOL and allocate temporaries in
3273 * SCRATCH_POOL.
3274 */
3275static svn_error_t *
3276block_read_noderevs_container(svn_fs_x__noderev_t **noderev_p,
3277                              svn_fs_t *fs,
3278                              svn_fs_x__revision_file_t *rev_file,
3279                              svn_fs_x__p2l_entry_t* entry,
3280                              apr_uint32_t sub_item,
3281                              svn_boolean_t must_read,
3282                              apr_pool_t *result_pool,
3283                              apr_pool_t *scratch_pool)
3284{
3285  svn_fs_x__data_t *ffd = fs->fsap_data;
3286  svn_fs_x__noderevs_t *container;
3287  svn_stream_t *stream;
3288  svn_fs_x__pair_cache_key_t key;
3289  svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3290
3291  key.revision = svn_fs_x__packed_base_rev(fs, revision);
3292  key.second = entry->offset;
3293
3294  /* already in cache? */
3295  if (!must_read)
3296    {
3297      svn_boolean_t is_cached = FALSE;
3298      SVN_ERR(svn_cache__has_key(&is_cached, ffd->noderevs_container_cache,
3299                                 &key, scratch_pool));
3300      if (is_cached)
3301        return SVN_NO_ERROR;
3302    }
3303
3304  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3305
3306  /* read noderevs from revision file */
3307  SVN_ERR(svn_fs_x__read_noderevs_container(&container, stream, scratch_pool,
3308                                            scratch_pool));
3309
3310  /* extract requested data */
3311  if (must_read)
3312    SVN_ERR(svn_fs_x__noderevs_get(noderev_p, container, sub_item,
3313                                   result_pool));
3314
3315  SVN_ERR(svn_cache__set(ffd->noderevs_container_cache, &key, container,
3316                         scratch_pool));
3317
3318  return SVN_NO_ERROR;
3319}
3320
3321/* If not already cached or if MUST_READ is set, read the representation
3322 * container addressed by ENTRY in FS.  Return an extractor object for the
3323 * item identified by SUB_ITEM in *EXTRACTOR.  Read the data from REV_FILE
3324 * and cache it.  Allocate *EXTRACTOR in RESUSLT_POOL and all temporaries
3325 * in SCRATCH_POOL.
3326 */
3327static svn_error_t *
3328block_read_reps_container(svn_fs_x__rep_extractor_t **extractor,
3329                          svn_fs_t *fs,
3330                          svn_fs_x__revision_file_t *rev_file,
3331                          svn_fs_x__p2l_entry_t* entry,
3332                          apr_uint32_t sub_item,
3333                          svn_boolean_t must_read,
3334                          apr_pool_t *result_pool,
3335                          apr_pool_t *scratch_pool)
3336{
3337  svn_fs_x__data_t *ffd = fs->fsap_data;
3338  svn_fs_x__reps_t *container;
3339  svn_stream_t *stream;
3340  svn_fs_x__pair_cache_key_t key;
3341  svn_revnum_t revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3342
3343  key.revision = svn_fs_x__packed_base_rev(fs, revision);
3344  key.second = entry->offset;
3345
3346  /* already in cache? */
3347  if (!must_read)
3348    {
3349      svn_boolean_t is_cached = FALSE;
3350      SVN_ERR(svn_cache__has_key(&is_cached, ffd->reps_container_cache,
3351                                 &key, scratch_pool));
3352      if (is_cached)
3353        return SVN_NO_ERROR;
3354    }
3355
3356  SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3357
3358  /* read noderevs from revision file */
3359  SVN_ERR(svn_fs_x__read_reps_container(&container, stream, result_pool,
3360                                        scratch_pool));
3361
3362  /* extract requested data */
3363
3364  if (must_read)
3365    SVN_ERR(svn_fs_x__reps_get(extractor, fs, container, sub_item,
3366                               result_pool));
3367
3368  SVN_ERR(svn_cache__set(ffd->reps_container_cache, &key, container,
3369                         scratch_pool));
3370
3371  return SVN_NO_ERROR;
3372}
3373
3374/* Read the whole (e.g. 64kB) block containing the item identified by ID in
3375 * FS and put all data into cache.  If necessary and depending on heuristics,
3376 * neighboring blocks may also get read.  The data is being read from
3377 * already open REVISION_FILE, which must be the correct rev / pack file
3378 * w.r.t. ID->CHANGE_SET.
3379 *
3380 * For noderevs and changed path lists, the item fetched can be allocated
3381 * RESULT_POOL and returned in *RESULT.  Otherwise, RESULT must be NULL.
3382 * The BATON is passed along to the extractor sub-functions and will be
3383 * used only when constructing the *RESULT.  SCRATCH_POOL will be used for
3384 * all temporary allocations.
3385 */
3386static svn_error_t *
3387block_read(void **result,
3388           svn_fs_t *fs,
3389           const svn_fs_x__id_t *id,
3390           svn_fs_x__revision_file_t *revision_file,
3391           void *baton,
3392           apr_pool_t *result_pool,
3393           apr_pool_t *scratch_pool)
3394{
3395  svn_fs_x__data_t *ffd = fs->fsap_data;
3396  apr_off_t offset, wanted_offset = 0;
3397  apr_off_t block_start = 0;
3398  apr_uint32_t wanted_sub_item = 0;
3399  svn_revnum_t revision = svn_fs_x__get_revnum(id->change_set);
3400  apr_array_header_t *entries;
3401  int run_count = 0;
3402  int i;
3403  apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3404
3405  /* don't try this on transaction protorev files */
3406  SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3407
3408  /* index lookup: find the OFFSET of the item we *must* read plus (in the
3409   * "do-while" block) the list of items in the same block. */
3410  SVN_ERR(svn_fs_x__item_offset(&wanted_offset, &wanted_sub_item, fs,
3411                                revision_file, id, iterpool));
3412
3413  offset = wanted_offset;
3414  do
3415    {
3416      /* fetch list of items in the block surrounding OFFSET */
3417      SVN_ERR(svn_fs_x__rev_file_seek(revision_file, &block_start, offset));
3418      SVN_ERR(svn_fs_x__p2l_index_lookup(&entries, fs, revision_file,
3419                                         revision, block_start,
3420                                         ffd->block_size, scratch_pool,
3421                                         scratch_pool));
3422
3423      /* read all items from the block */
3424      for (i = 0; i < entries->nelts; ++i)
3425        {
3426          svn_boolean_t is_result, is_wanted;
3427          apr_pool_t *pool;
3428
3429          svn_fs_x__p2l_entry_t* entry
3430            = &APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t);
3431
3432          /* skip empty sections */
3433          if (entry->type == SVN_FS_X__ITEM_TYPE_UNUSED)
3434            continue;
3435
3436          /* the item / container we were looking for? */
3437          is_wanted =    entry->offset == wanted_offset
3438                      && entry->item_count >= wanted_sub_item
3439                      && svn_fs_x__id_eq(entry->items + wanted_sub_item, id);
3440          is_result = result && is_wanted;
3441
3442          /* select the pool that we want the item to be allocated in */
3443          pool = is_result ? result_pool : iterpool;
3444
3445          /* handle all items that start within this block and are relatively
3446           * small (i.e. < block size).  Always read the item we need to return.
3447           */
3448          if (is_result || (   entry->offset >= block_start
3449                            && entry->size < ffd->block_size))
3450            {
3451              void *item = NULL;
3452              svn_fs_x__pair_cache_key_t key = { 0 };
3453              key.revision = svn_fs_x__get_revnum(entry->items[0].change_set);
3454              key.second = entry->items[0].number;
3455
3456              SVN_ERR(svn_fs_x__rev_file_seek(revision_file, NULL,
3457                                              entry->offset));
3458              switch (entry->type)
3459                {
3460                  case SVN_FS_X__ITEM_TYPE_FILE_REP:
3461                  case SVN_FS_X__ITEM_TYPE_DIR_REP:
3462                  case SVN_FS_X__ITEM_TYPE_FILE_PROPS:
3463                  case SVN_FS_X__ITEM_TYPE_DIR_PROPS:
3464                    SVN_ERR(block_read_contents(fs, revision_file,
3465                                                entry, &key,
3466                                                is_wanted
3467                                                  ? -1
3468                                                  : block_start + ffd->block_size,
3469                                                iterpool));
3470                    break;
3471
3472                  case SVN_FS_X__ITEM_TYPE_NODEREV:
3473                    SVN_ERR(block_read_noderev((svn_fs_x__noderev_t **)&item,
3474                                               fs, revision_file,
3475                                               entry, &key, is_result,
3476                                               pool, iterpool));
3477                    break;
3478
3479                  case SVN_FS_X__ITEM_TYPE_CHANGES:
3480                    SVN_ERR(block_read_changes((apr_array_header_t **)&item,
3481                                               fs, revision_file,
3482                                               entry, baton, is_result,
3483                                               pool, iterpool));
3484                    break;
3485
3486                  case SVN_FS_X__ITEM_TYPE_CHANGES_CONT:
3487                    SVN_ERR(block_read_changes_container
3488                                            ((apr_array_header_t **)&item,
3489                                             fs, revision_file,
3490                                             entry, wanted_sub_item,
3491                                             baton, is_result,
3492                                             pool, iterpool));
3493                    break;
3494
3495                  case SVN_FS_X__ITEM_TYPE_NODEREVS_CONT:
3496                    SVN_ERR(block_read_noderevs_container
3497                                            ((svn_fs_x__noderev_t **)&item,
3498                                             fs, revision_file,
3499                                             entry, wanted_sub_item,
3500                                             is_result, pool, iterpool));
3501                    break;
3502
3503                  case SVN_FS_X__ITEM_TYPE_REPS_CONT:
3504                    SVN_ERR(block_read_reps_container
3505                                      ((svn_fs_x__rep_extractor_t **)&item,
3506                                       fs, revision_file,
3507                                       entry, wanted_sub_item,
3508                                       is_result, pool, iterpool));
3509                    break;
3510
3511                  default:
3512                    break;
3513                }
3514
3515              if (is_result)
3516                *result = item;
3517
3518              /* if we crossed a block boundary, read the remainder of
3519               * the last block as well */
3520              offset = entry->offset + entry->size;
3521              if (offset - block_start > ffd->block_size)
3522                ++run_count;
3523
3524              svn_pool_clear(iterpool);
3525            }
3526        }
3527    }
3528  while(run_count++ == 1); /* can only be true once and only if a block
3529                            * boundary got crossed */
3530
3531  /* if the caller requested a result, we must have provided one by now */
3532  assert(!result || *result);
3533  svn_pool_destroy(iterpool);
3534
3535  return SVN_NO_ERROR;
3536}
3537