cache-membuffer.c revision 299742
1/*
2 * cache-membuffer.c: in-memory caching for Subversion
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24#include <assert.h>
25#include <apr_md5.h>
26#include <apr_thread_rwlock.h>
27
28#include "svn_pools.h"
29#include "svn_checksum.h"
30#include "svn_private_config.h"
31#include "svn_string.h"
32#include "svn_sorts.h"  /* get the MIN macro */
33
34#include "private/svn_atomic.h"
35#include "private/svn_dep_compat.h"
36#include "private/svn_mutex.h"
37#include "private/svn_string_private.h"
38
39#include "cache.h"
40#include "fnv1a.h"
41
42/*
43 * This svn_cache__t implementation actually consists of two parts:
44 * a shared (per-process) singleton membuffer cache instance and shallow
45 * svn_cache__t front-end instances that each use different key spaces.
46 * For data management, they all forward to the singleton membuffer cache.
47 *
48 * A membuffer cache consists of two parts:
49 *
50 * 1. A linear data buffer containing cached items in a serialized
51 *    representation, prefixed by their full cache keys. There may be
52 *    arbitrary gaps between entries.  This buffer is sub-devided into
53 *    (currently two) cache levels.
54 *
55 * 2. A directory of cache entries. This is organized similar to CPU
56 *    data caches: for every possible key, there is exactly one group
57 *    of entries that may contain the header info for an item with
58 *    that given key.  The result is a GROUP_SIZE+-way associative cache
59 *    whose associativity can be dynamically increased.
60 *
61 * Only the start address of these two data parts are given as a native
62 * pointer. All other references are expressed as offsets to these pointers.
63 * With that design, it is relatively easy to share the same data structure
64 * between different processes and / or to persist them on disk. These
65 * out-of-process features have not been implemented, yet.
66 *
67 * Superficially, cache levels are being used as usual: insertion happens
68 * into L1 and evictions will promote items to L2.  But their whole point
69 * is a different one.  L1 uses a circular buffer, i.e. we have perfect
70 * caching for the last N bytes where N is the size of L1.  L2 uses a more
71 * elaborate scheme based on priorities and hit counts as described below.
72 *
73 * The data buffer usage information is implicitly given by the directory
74 * entries. Every USED entry has a reference to the previous and the next
75 * used dictionary entry and this double-linked list is ordered by the
76 * offsets of their item data within the data buffer. So removing data,
77 * for instance, is done simply by unlinking it from the chain, implicitly
78 * marking the entry as well as the data buffer section previously
79 * associated to it as unused.  First and last element of that chain are
80 * being referenced from the respective cache level.
81 *
82 * Insertion can occur at only one, sliding position per cache level.  It is
83 * marked by its offset in the data buffer and the index of the first used
84 * entry at or behind that position.  If this gap is too small to accommodate
85 * the new item (plus its full key), the insertion window is extended as
86 * described below.  The new entry will always be inserted at the bottom end
87 * of the window and since the next used entry is known, properly sorted
88 * insertion is possible.
89 *
90 * To make the cache perform robustly in a wide range of usage scenarios,
91 * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for
92 * details). Every item holds a read hit counter and there is a global read
93 * hit counter. The more hits an entry has in relation to the average, the
94 * more it is likely to be kept using a rand()-based condition. The test is
95 * applied only to the entry following the insertion window. If it doesn't
96 * get evicted, it is moved to the begin of that window and the window is
97 * moved.
98 *
99 * Moreover, the entry's hits get halved to make that entry more likely to
100 * be removed the next time the sliding insertion / removal window comes by.
101 * As a result, frequently used entries are likely not to be dropped until
102 * they get not used for a while. Also, even a cache thrashing situation
103 * about 50% of the content survives every 50% of the cache being re-written
104 * with new entries. For details on the fine-tuning involved, see the
105 * comments in ensure_data_insertable_l2().
106 *
107 * Due to the randomized mapping of keys to entry groups, some groups may
108 * overflow.  In that case, there are spare groups that can be chained to
109 * an already used group to extend it.
110 *
111 * To limit the entry size and management overhead, not the actual item keys
112 * but only their hashed "fingerprint" will be stored.  These are reasonably
113 * unique to prevent collisions, so we only need to support up to one entry
114 * per entry key.  To guarantee that there are no conflicts, however, we
115 * store the actual full key immediately in front of the serialized item
116 * data.  That is, the entry offset actually points to the full key and the
117 * key length stored in the entry acts as an additional offset to find the
118 * actual item.
119 *
120 * All access to the cached data needs to be serialized. Because we want
121 * to scale well despite that bottleneck, we simply segment the cache into
122 * a number of independent caches (segments). Items will be multiplexed based
123 * on their hash key.
124 */
125
126/* APR's read-write lock implementation on Windows is horribly inefficient.
127 * Even with very low contention a runtime overhead of 35% percent has been
128 * measured for 'svn-bench null-export' over ra_serf.
129 *
130 * Use a simple mutex on Windows.  Because there is one mutex per segment,
131 * large machines should (and usually can) be configured with large caches
132 * such that read contention is kept low.  This is basically the situation
133 * we had before 1.8.
134 */
135#ifdef WIN32
136#  define USE_SIMPLE_MUTEX 1
137#else
138#  define USE_SIMPLE_MUTEX 0
139#endif
140
141/* For more efficient copy operations, let's align all data items properly.
142 * Since we can't portably align pointers, this is rather the item size
143 * granularity which ensures *relative* alignment within the cache - still
144 * giving us decent copy speeds on most machines.
145 *
146 * Must be a power of 2.
147 */
148#define ITEM_ALIGNMENT 16
149
150/* By default, don't create cache segments smaller than this value unless
151 * the total cache size itself is smaller.
152 */
153#define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000)
154
155/* The minimum segment size we will allow for multi-segmented caches
156 */
157#define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000)
158
159/* The maximum number of segments allowed. Larger numbers reduce the size
160 * of each segment, in turn reducing the max size of a cachable item.
161 * Also, each segment gets its own lock object. The actual number supported
162 * by the OS may therefore be lower and svn_cache__membuffer_cache_create
163 * may return an error.
164 */
165#define MAX_SEGMENT_COUNT 0x10000
166
167/* As of today, APR won't allocate chunks of 4GB or more. So, limit the
168 * segment size to slightly below that.
169 */
170#define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000)
171
172/* We don't mark the initialization status for every group but initialize
173 * a number of groups at once. That will allow for a very small init flags
174 * vector that is likely to fit into the CPU caches even for fairly large
175 * membuffer caches. For instance, the default of 32 means 8x32 groups per
176 * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index
177 * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache.
178 */
179#define GROUP_INIT_GRANULARITY 32
180
181/* Invalid index reference value. Equivalent to APR_UINT32_T(-1)
182 */
183#define NO_INDEX APR_UINT32_MAX
184
185/* To save space in our group structure, we only use 32 bit size values
186 * and, therefore, limit the size of each entry to just below 4GB.
187 * Supporting larger items is not a good idea as the data transfer
188 * to and from the cache would block other threads for a very long time.
189 */
190#define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT))
191
192/* We use this structure to identify cache entries. There cannot be two
193 * entries with the same entry key. However unlikely, though, two different
194 * full keys (see full_key_t) may have the same entry key.  That is a
195 * collision and at most one of them can be stored in the cache at any time.
196 */
197typedef struct entry_key_t
198{
199  /* 16 byte finger print of the full key. */
200  apr_uint64_t fingerprint[2];
201
202  /* Length of the full key.  This value is aligned to ITEM_ALIGNMENT to
203   * make sure the subsequent item content is properly aligned. */
204  apr_size_t key_len;
205} entry_key_t;
206
207/* A full key, i.e. the combination of the cache's key prefix with some
208 * dynamic part appended to it.  It also contains its ENTRY_KEY.
209 */
210typedef struct full_key_t
211{
212  /* Reduced form identifying the cache entry (if such an entry exists). */
213  entry_key_t entry_key;
214
215  /* This contains the full combination.  Note that the SIZE element may
216   * be larger than ENTRY_KEY.KEY_LEN, but only the latter determines the
217   * valid key size. */
218  svn_membuf_t full_key;
219} full_key_t;
220
221/* Debugging / corruption detection support.
222 * If you define this macro, the getter functions will performed expensive
223 * checks on the item data, requested keys and entry types. If there is
224 * a mismatch found in any of them when being compared with the values
225 * remembered in the setter function, an error will be returned.
226 */
227#ifdef SVN_DEBUG_CACHE_MEMBUFFER
228
229/* The prefix passed to svn_cache__create_membuffer_cache() effectively
230 * defines the type of all items stored by that cache instance. We'll take
231 * the last 15 bytes + \0 as plaintext for easy identification by the dev.
232 */
233#define PREFIX_TAIL_LEN 16
234
235/* This record will be attached to any cache entry. It tracks item data
236 * (content), key and type as hash values and is the baseline against which
237 * the getters will compare their results to detect inconsistencies.
238 */
239typedef struct entry_tag_t
240{
241  /* MD5 checksum over the serialized item data.
242   */
243  unsigned char content_hash[APR_MD5_DIGESTSIZE];
244
245  /* Hash value of the svn_cache_t instance that wrote the item
246   * (i.e. a combination of type and repository)
247   */
248  unsigned char prefix_hash[APR_MD5_DIGESTSIZE];
249
250  /* Note that this only covers the variable part of the key,
251   * i.e. it will be different from the full key hash used for
252   * cache indexing.
253   */
254  unsigned char key_hash[APR_MD5_DIGESTSIZE];
255
256  /* Last letters from of the key in human readable format
257   * (ends with the type identifier, e.g. "DAG")
258   */
259  char prefix_tail[PREFIX_TAIL_LEN];
260
261  /* Length of the variable key part.
262   */
263  apr_size_t key_len;
264
265} entry_tag_t;
266
267/* Initialize all members of TAG except for the content hash.
268 */
269static svn_error_t *store_key_part(entry_tag_t *tag,
270                                   const full_key_t *prefix_key,
271                                   const void *key,
272                                   apr_size_t key_len,
273                                   apr_pool_t *pool)
274{
275  svn_checksum_t *checksum;
276  const char *prefix = prefix_key->full_key.data;
277  apr_size_t prefix_len = strlen(prefix);
278
279  if (prefix_len > sizeof(tag->prefix_tail))
280    {
281      prefix += prefix_len - (sizeof(tag->prefix_tail) - 1);
282      prefix_len = sizeof(tag->prefix_tail) - 1;
283    }
284
285  SVN_ERR(svn_checksum(&checksum,
286                       svn_checksum_md5,
287                       key,
288                       key_len,
289                       pool));
290
291  memcpy(tag->prefix_hash, prefix_key->entry_key.fingerprint,
292         sizeof(tag->prefix_hash));
293  memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash));
294
295  memset(tag->prefix_tail, 0, sizeof(tag->key_hash));
296  memcpy(tag->prefix_tail, prefix, prefix_len + 1);
297
298  tag->key_len = key_len;
299
300  return SVN_NO_ERROR;
301}
302
303/* Initialize the content hash member of TAG.
304 */
305static svn_error_t* store_content_part(entry_tag_t *tag,
306                                       const void *data,
307                                       apr_size_t size,
308                                       apr_pool_t *pool)
309{
310  svn_checksum_t *checksum;
311  SVN_ERR(svn_checksum(&checksum,
312                       svn_checksum_md5,
313                       data,
314                       size,
315                       pool));
316
317  memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash));
318
319  return SVN_NO_ERROR;
320}
321
322/* Compare two tags and fail with an assertion upon differences.
323 */
324static svn_error_t* assert_equal_tags(const entry_tag_t *lhs,
325                                      const entry_tag_t *rhs)
326{
327  SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash,
328                        sizeof(lhs->content_hash)) == 0);
329  SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash,
330                        sizeof(lhs->prefix_hash)) == 0);
331  SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash,
332                        sizeof(lhs->key_hash)) == 0);
333  SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail,
334                        sizeof(lhs->prefix_tail)) == 0);
335
336  SVN_ERR_ASSERT(lhs->key_len == rhs->key_len);
337
338  return SVN_NO_ERROR;
339}
340
341/* Reoccurring code snippets.
342 */
343
344#define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag,
345
346#define DEBUG_CACHE_MEMBUFFER_TAG tag,
347
348#define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)                     \
349  entry_tag_t _tag;                                              \
350  entry_tag_t *tag = &_tag;                                      \
351  if (key)                                                       \
352    SVN_ERR(store_key_part(tag,                                  \
353                           &cache->prefix,                       \
354                           key,                                  \
355                           cache->key_len == APR_HASH_KEY_STRING \
356                               ? strlen((const char *) key)      \
357                               : cache->key_len,                 \
358                           pool));
359
360#else
361
362/* Don't generate any checks if consistency checks have not been enabled.
363 */
364#define DEBUG_CACHE_MEMBUFFER_TAG_ARG
365#define DEBUG_CACHE_MEMBUFFER_TAG
366#define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool)
367
368#endif /* SVN_DEBUG_CACHE_MEMBUFFER */
369
370/* A single dictionary entry. Since all entries will be allocated once
371 * during cache creation, those entries might be either used or unused.
372 * An entry is used if and only if it is contained in the doubly-linked
373 * list of used entries per cache level.
374 */
375typedef struct entry_t
376{
377  /* Identifying the data item. Only valid for used entries.
378   */
379  entry_key_t key;
380
381  /* The offset of the cached item's serialized data within the caches
382   * DATA buffer.
383   */
384  apr_uint64_t offset;
385
386  /* Size of the serialized item data. May be 0.  The MAX_ITEM_SIZE macro
387   * above ensures that there will be no overflows.
388   * Only valid for used entries.
389   */
390  apr_size_t size;
391
392  /* Number of (read) hits for this entry. Will be reset upon write.
393   * Only valid for used entries.
394   */
395  svn_atomic_t hit_count;
396
397  /* Reference to the next used entry in the order defined by offset.
398   * NO_INDEX indicates the end of the list; this entry must be referenced
399   * by the caches cache_level_t.last member.  NO_INDEX also implies that
400   * the data buffer is not used beyond offset+size.
401   * Only valid for used entries.
402   */
403  apr_uint32_t next;
404
405  /* Reference to the previous used entry in the order defined by offset.
406   * NO_INDEX indicates the end of the list; this entry must be referenced
407   * by the caches cache_level_t.first member.
408   * Only valid for used entries.
409   */
410  apr_uint32_t previous;
411
412  /* Priority of this entry.  This entry will not be replaced by lower-
413   * priority items.
414   */
415  apr_uint32_t priority;
416#ifdef SVN_DEBUG_CACHE_MEMBUFFER
417  /* Remember type, content and key hashes.
418   */
419  entry_tag_t tag;
420#endif
421} entry_t;
422
423/* Group header struct.
424 */
425typedef struct group_header_t
426{
427  /* number of entries used [0 .. USED-1] */
428  apr_uint32_t used;
429
430  /* next group in the chain or NO_INDEX for the last.
431   * For recycleable unused spare groups, this points to the next
432   * unused spare group */
433  apr_uint32_t next;
434
435  /* previously group in the chain or NO_INDEX for the first */
436  apr_uint32_t previous;
437
438  /* number of elements in the chain from start to here.
439   * >= 1 for used groups, 0 for unused spare groups */
440  apr_uint32_t chain_length;
441
442} group_header_t;
443
444/* The size of the group struct should be a power of two make sure it does
445 * not cross memory page boundaries.  Since we already access the cache
446 * randomly, having two page table lookups instead of one is bad.
447 */
448#define GROUP_BLOCK_SIZE 512
449
450/* A ~10-way associative cache seems to be a good compromise between
451 * performance (worst-case lookups) and efficiency-loss due to collisions.
452 *
453 * This value may be changed to any positive integer.
454 */
455#define GROUP_SIZE \
456          ((GROUP_BLOCK_SIZE - sizeof(group_header_t)) / sizeof(entry_t))
457
458/* Maximum number of groups in a chain, i.e. a cache index group can hold
459 * up to GROUP_SIZE * MAX_GROUP_CHAIN_LENGTH entries.
460 */
461#define MAX_GROUP_CHAIN_LENGTH 8
462
463/* We group dictionary entries to make this GROUP-SIZE-way associative.
464 */
465typedef struct entry_group_t
466{
467  /* group globals */
468  group_header_t header;
469
470  /* padding and also room for future extensions */
471  char padding[GROUP_BLOCK_SIZE - sizeof(group_header_t)
472               - sizeof(entry_t) * GROUP_SIZE];
473
474  /* the actual entries */
475  entry_t entries[GROUP_SIZE];
476
477} entry_group_t;
478
479/* Per-cache level header structure.  Instances of this are members of
480 * svn_membuffer_t and will use non-overlapping sections of its DATA buffer.
481 * All offset values are global / absolute to that whole buffer.
482 */
483typedef struct cache_level_t
484{
485  /* Reference to the first (defined by the order content in the data
486   * buffer) dictionary entry used by any data item.
487   * NO_INDEX for an empty cache.
488   */
489  apr_uint32_t first;
490
491  /* Reference to the last (defined by the order content in the data
492   * buffer) dictionary entry used by any data item.
493   * NO_INDEX for an empty cache.
494   */
495  apr_uint32_t last;
496
497  /* Reference to the first (defined by the order content in the data
498   * buffer) used dictionary entry behind the insertion position
499   * (current_data). If NO_INDEX, the data buffer is free starting at the
500   * current_data offset.
501   */
502  apr_uint32_t next;
503
504
505  /* First offset in the caches DATA buffer that belongs to this level.
506   */
507  apr_uint64_t start_offset;
508
509  /* Size of data buffer allocated to this level in bytes. Must be > 0.
510   */
511  apr_uint64_t size;
512
513  /* Offset in the data buffer where the next insertion shall occur.
514   */
515  apr_uint64_t current_data;
516
517} cache_level_t;
518
519/* The cache header structure.
520 */
521struct svn_membuffer_t
522{
523  /* Number of cache segments. Must be a power of 2.
524     Please note that this structure represents only one such segment
525     and that all segments must / will report the same values here. */
526  apr_uint32_t segment_count;
527
528  /* The dictionary, GROUP_SIZE * (group_count + spare_group_count)
529   * entries long.  Never NULL.
530   */
531  entry_group_t *directory;
532
533  /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements.
534   * Allows for efficiently marking groups as "not initialized".
535   */
536  unsigned char *group_initialized;
537
538  /* Size of dictionary in groups. Must be > 0.
539   */
540  apr_uint32_t group_count;
541
542  /* Total number of spare groups.
543   */
544  apr_uint32_t spare_group_count;
545
546  /* First recycleable spare group.
547   */
548  apr_uint32_t first_spare_group;
549
550  /* Maximum number of spare groups ever used.  I.e. group index
551   * group_count + max_spare_used is the first unused spare group
552   * if first_spare_group is NO_INDEX.
553   */
554  apr_uint32_t max_spare_used;
555
556  /* Pointer to the data buffer, data_size bytes long. Never NULL.
557   */
558  unsigned char *data;
559
560  /* Total number of data buffer bytes in use.
561   */
562  apr_uint64_t data_used;
563
564  /* Largest entry size that we would accept.  For total cache sizes
565   * less than 4TB (sic!), this is determined by the total cache size.
566   */
567  apr_uint64_t max_entry_size;
568
569  /* The cache levels, organized as sub-buffers.  Since entries in the
570   * DIRECTORY use offsets in DATA for addressing, a cache lookup does
571   * not need to know the cache level of a specific item.  Cache levels
572   * are only used to implement a hybrid insertion / eviction strategy.
573   */
574
575  /* First cache level, i.e. most insertions happen here.  Very large
576   * items might get inserted directly into L2.  L1 is a strict FIFO
577   * ring buffer that does not care about item priorities.  All evicted
578   * items get a chance to be promoted to L2.
579   */
580  cache_level_t l1;
581
582  /* Second cache level, i.e. data evicted from L1 will be added here
583   * if the item is "important" enough or the L2 insertion window is large
584   * enough.
585   */
586  cache_level_t l2;
587
588
589  /* Number of used dictionary entries, i.e. number of cached items.
590   * Purely statistical information that may be used for profiling only.
591   * Updates are not synchronized and values may be nonsensicle on some
592   * platforms.
593   */
594  apr_uint32_t used_entries;
595
596  /* Total number of calls to membuffer_cache_get.
597   * Purely statistical information that may be used for profiling only.
598   * Updates are not synchronized and values may be nonsensicle on some
599   * platforms.
600   */
601  apr_uint64_t total_reads;
602
603  /* Total number of calls to membuffer_cache_set.
604   * Purely statistical information that may be used for profiling only.
605   * Updates are not synchronized and values may be nonsensicle on some
606   * platforms.
607   */
608  apr_uint64_t total_writes;
609
610  /* Total number of hits since the cache's creation.
611   * Purely statistical information that may be used for profiling only.
612   * Updates are not synchronized and values may be nonsensicle on some
613   * platforms.
614   */
615  apr_uint64_t total_hits;
616
617#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
618  /* A lock for intra-process synchronization to the cache, or NULL if
619   * the cache's creator doesn't feel the cache needs to be
620   * thread-safe.
621   */
622  svn_mutex__t *lock;
623#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
624  /* Same for read-write lock. */
625  apr_thread_rwlock_t *lock;
626
627  /* If set, write access will wait until they get exclusive access.
628   * Otherwise, they will become no-ops if the segment is currently
629   * read-locked.  Only used when LOCK is an r/w lock.
630   */
631  svn_boolean_t allow_blocking_writes;
632#endif
633};
634
635/* Align integer VALUE to the next ITEM_ALIGNMENT boundary.
636 */
637#define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT)
638
639/* If locking is supported for CACHE, acquire a read lock for it.
640 */
641static svn_error_t *
642read_lock_cache(svn_membuffer_t *cache)
643{
644#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
645  return svn_mutex__lock(cache->lock);
646#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
647  if (cache->lock)
648  {
649    apr_status_t status = apr_thread_rwlock_rdlock(cache->lock);
650    if (status)
651      return svn_error_wrap_apr(status, _("Can't lock cache mutex"));
652  }
653
654  return SVN_NO_ERROR;
655#else
656  return SVN_NO_ERROR;
657#endif
658}
659
660/* If locking is supported for CACHE, acquire a write lock for it.
661 * Set *SUCCESS to FALSE, if we couldn't acquire the write lock;
662 * leave it untouched otherwise.
663 */
664static svn_error_t *
665write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success)
666{
667#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
668  return svn_mutex__lock(cache->lock);
669#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
670  if (cache->lock)
671    {
672      apr_status_t status;
673      if (cache->allow_blocking_writes)
674        {
675          status = apr_thread_rwlock_wrlock(cache->lock);
676        }
677      else
678        {
679          status = apr_thread_rwlock_trywrlock(cache->lock);
680          if (SVN_LOCK_IS_BUSY(status))
681            {
682              *success = FALSE;
683              status = APR_SUCCESS;
684            }
685        }
686
687      if (status)
688        return svn_error_wrap_apr(status,
689                                  _("Can't write-lock cache mutex"));
690    }
691
692  return SVN_NO_ERROR;
693#else
694  return SVN_NO_ERROR;
695#endif
696}
697
698/* If locking is supported for CACHE, acquire an unconditional write lock
699 * for it.
700 */
701static svn_error_t *
702force_write_lock_cache(svn_membuffer_t *cache)
703{
704#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
705  return svn_mutex__lock(cache->lock);
706#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
707  apr_status_t status = apr_thread_rwlock_wrlock(cache->lock);
708  if (status)
709    return svn_error_wrap_apr(status,
710                              _("Can't write-lock cache mutex"));
711
712  return SVN_NO_ERROR;
713#else
714  return SVN_NO_ERROR;
715#endif
716}
717
718/* If locking is supported for CACHE, release the current lock
719 * (read or write).  Return ERR upon success.
720 */
721static svn_error_t *
722unlock_cache(svn_membuffer_t *cache, svn_error_t *err)
723{
724#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
725  return svn_mutex__unlock(cache->lock, err);
726#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
727  if (cache->lock)
728  {
729    apr_status_t status = apr_thread_rwlock_unlock(cache->lock);
730    if (err)
731      return err;
732
733    if (status)
734      return svn_error_wrap_apr(status, _("Can't unlock cache mutex"));
735  }
736
737  return err;
738#else
739  return err;
740#endif
741}
742
743/* If supported, guard the execution of EXPR with a read lock to CACHE.
744 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
745 */
746#define WITH_READ_LOCK(cache, expr)         \
747do {                                        \
748  SVN_ERR(read_lock_cache(cache));          \
749  SVN_ERR(unlock_cache(cache, (expr)));     \
750} while (0)
751
752/* If supported, guard the execution of EXPR with a write lock to CACHE.
753 * The macro has been modeled after SVN_MUTEX__WITH_LOCK.
754 *
755 * The write lock process is complicated if we don't allow to wait for
756 * the lock: If we didn't get the lock, we may still need to remove an
757 * existing entry for the given key because that content is now stale.
758 * Once we discovered such an entry, we unconditionally do a blocking
759 * wait for the write lock.  In case no old content could be found, a
760 * failing lock attempt is simply a no-op and we exit the macro.
761 */
762#define WITH_WRITE_LOCK(cache, expr)                            \
763do {                                                            \
764  svn_boolean_t got_lock = TRUE;                                \
765  SVN_ERR(write_lock_cache(cache, &got_lock));                  \
766  if (!got_lock)                                                \
767    {                                                           \
768      svn_boolean_t exists;                                     \
769      SVN_ERR(entry_exists(cache, group_index, key, &exists));  \
770      if (exists)                                               \
771        SVN_ERR(force_write_lock_cache(cache));                 \
772      else                                                      \
773        break;                                                  \
774    }                                                           \
775  SVN_ERR(unlock_cache(cache, (expr)));                         \
776} while (0)
777
778/* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not
779 * been initialized, yet. In that case, this group can not data. Otherwise,
780 * a non-zero value is returned.
781 */
782static APR_INLINE unsigned char
783is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index)
784{
785  unsigned char flags
786    = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)];
787  unsigned char bit_mask
788    = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
789
790  return flags & bit_mask;
791}
792
793/* Initializes the section of the directory in CACHE that contains
794 * the entry group identified by GROUP_INDEX. */
795static void
796initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index)
797{
798  unsigned char bit_mask;
799  apr_uint32_t i;
800
801  /* range of groups to initialize due to GROUP_INIT_GRANULARITY */
802  apr_uint32_t first_index =
803      (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY;
804  apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY;
805  if (last_index > cache->group_count + cache->spare_group_count)
806    last_index = cache->group_count + cache->spare_group_count;
807
808  for (i = first_index; i < last_index; ++i)
809    {
810      group_header_t *header = &cache->directory[i].header;
811      header->used = 0;
812      header->chain_length = 1;
813      header->next = NO_INDEX;
814      header->previous = NO_INDEX;
815    }
816
817  /* set the "initialized" bit for these groups */
818  bit_mask
819    = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8));
820  cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]
821    |= bit_mask;
822}
823
824/* Return the next available spare group from CACHE and mark it as used.
825 * May return NULL.
826 */
827static entry_group_t *
828allocate_spare_group(svn_membuffer_t *cache)
829{
830  entry_group_t *group = NULL;
831
832  /* is there some ready-to-use group? */
833  if (cache->first_spare_group != NO_INDEX)
834    {
835      group = &cache->directory[cache->first_spare_group];
836      cache->first_spare_group = group->header.next;
837    }
838
839  /* any so far untouched spares available? */
840  else if (cache->max_spare_used < cache->spare_group_count)
841    {
842      apr_uint32_t group_index = cache->group_count + cache->max_spare_used;
843      ++cache->max_spare_used;
844
845      if (!is_group_initialized(cache, group_index))
846        initialize_group(cache, group_index);
847
848      group = &cache->directory[group_index];
849    }
850
851  /* spare groups must be empty */
852  assert(!group || !group->header.used);
853  return group;
854}
855
856/* Mark previously allocated spare group GROUP in CACHE as "unused".
857 */
858static void
859free_spare_group(svn_membuffer_t *cache,
860                 entry_group_t *group)
861{
862  assert(group->header.used == 0);
863  assert(group->header.previous != NO_INDEX);
864  assert(group - cache->directory >= (apr_ssize_t)cache->group_count);
865
866  /* unchain */
867  cache->directory[group->header.previous].header.next = NO_INDEX;
868  group->header.chain_length = 0;
869  group->header.previous = NO_INDEX;
870
871  /* add to chain of spares */
872  group->header.next = cache->first_spare_group;
873  cache->first_spare_group = (apr_uint32_t) (group - cache->directory);
874}
875
876/* Follow the group chain from GROUP in CACHE to its end and return the last
877 * group.  May return GROUP.
878 */
879static entry_group_t *
880last_group_in_chain(svn_membuffer_t *cache,
881                    entry_group_t *group)
882{
883  while (group->header.next != NO_INDEX)
884    group = &cache->directory[group->header.next];
885
886  return group;
887}
888
889/* Return the CHAIN_INDEX-th element in the group chain starting from group
890 * START_GROUP_INDEX in CACHE.
891 */
892static entry_group_t *
893get_group(svn_membuffer_t *cache,
894          apr_uint32_t start_group_index,
895          apr_uint32_t chain_index)
896{
897  entry_group_t *group = &cache->directory[start_group_index];
898  for (; chain_index; --chain_index)
899    group = &cache->directory[group->header.next];
900
901  return group;
902}
903
904/* Resolve a dictionary entry reference, i.e. return the entry
905 * for the given IDX.
906 */
907static APR_INLINE entry_t *
908get_entry(svn_membuffer_t *cache, apr_uint32_t idx)
909{
910  return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE];
911}
912
913/* Get the entry references for the given ENTRY.
914 */
915static APR_INLINE apr_uint32_t
916get_index(svn_membuffer_t *cache, entry_t *entry)
917{
918  apr_size_t group_index
919    = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t);
920
921  return (apr_uint32_t)group_index * GROUP_SIZE
922       + (apr_uint32_t)(entry - cache->directory[group_index].entries);
923}
924
925/* Return the cache level of ENTRY in CACHE.
926 */
927static cache_level_t *
928get_cache_level(svn_membuffer_t *cache, entry_t *entry)
929{
930  return entry->offset < cache->l1.size ? &cache->l1
931                                        : &cache->l2;
932}
933
934/* Insert ENTRY to the chain of items that belong to LEVEL in CACHE.  IDX
935 * is ENTRY's item index and is only given for efficiency.  The insertion
936 * takes place just before LEVEL->NEXT.  *CACHE will not be modified.
937 */
938static void
939chain_entry(svn_membuffer_t *cache,
940            cache_level_t *level,
941            entry_t *entry,
942            apr_uint32_t idx)
943{
944  /* insert ENTRY before this item */
945  entry_t *next = level->next == NO_INDEX
946                ? NULL
947                : get_entry(cache, level->next);
948  assert(idx == get_index(cache, entry));
949
950  /* update entry chain
951   */
952  entry->next = level->next;
953  if (level->first == NO_INDEX)
954    {
955      /* insert as the first entry and only in the chain
956       */
957      entry->previous = NO_INDEX;
958      level->last = idx;
959      level->first = idx;
960    }
961  else if (next == NULL)
962    {
963      /* insert as the last entry in the chain.
964       * Note that it cannot also be at the beginning of the chain.
965       */
966      entry->previous = level->last;
967      get_entry(cache, level->last)->next = idx;
968      level->last = idx;
969    }
970  else
971    {
972      /* insert either at the start of a non-empty list or
973       * somewhere in the middle
974       */
975      entry->previous = next->previous;
976      next->previous = idx;
977
978      if (entry->previous != NO_INDEX)
979        get_entry(cache, entry->previous)->next = idx;
980      else
981        level->first = idx;
982    }
983}
984
985/* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX
986 * is ENTRY's item index and is only given for efficiency.  Please note
987 * that neither *CACHE nor *ENTRY will not be modified.
988 */
989static void
990unchain_entry(svn_membuffer_t *cache,
991              cache_level_t *level,
992              entry_t *entry,
993              apr_uint32_t idx)
994{
995  assert(idx == get_index(cache, entry));
996
997  /* update
998   */
999  if (level->next == idx)
1000    level->next = entry->next;
1001
1002  /* unlink it from the chain of used entries
1003   */
1004  if (entry->previous == NO_INDEX)
1005    level->first = entry->next;
1006  else
1007    get_entry(cache, entry->previous)->next = entry->next;
1008
1009  if (entry->next == NO_INDEX)
1010    level->last = entry->previous;
1011  else
1012    get_entry(cache, entry->next)->previous = entry->previous;
1013}
1014
1015/* Remove the used ENTRY from the CACHE, i.e. make it "unused".
1016 * In contrast to insertion, removal is possible for any entry.
1017 */
1018static void
1019drop_entry(svn_membuffer_t *cache, entry_t *entry)
1020{
1021  /* the group that ENTRY belongs to plus a number of useful index values
1022   */
1023  apr_uint32_t idx = get_index(cache, entry);
1024  apr_uint32_t group_index = idx / GROUP_SIZE;
1025  entry_group_t *last_group
1026    = last_group_in_chain(cache, &cache->directory[group_index]);
1027  apr_uint32_t last_in_group
1028    = (apr_uint32_t) ((last_group - cache->directory) * GROUP_SIZE
1029    + last_group->header.used - 1);
1030
1031  cache_level_t *level = get_cache_level(cache, entry);
1032
1033  /* update global cache usage counters
1034   */
1035  cache->used_entries--;
1036  cache->data_used -= entry->size;
1037
1038  /* extend the insertion window, if the entry happens to border it
1039   */
1040  if (idx == level->next)
1041    level->next = entry->next;
1042  else
1043    if (entry->next == level->next)
1044      {
1045        /* insertion window starts right behind the entry to remove
1046         */
1047        if (entry->previous == NO_INDEX)
1048          {
1049            /* remove the first entry -> insertion may start at pos 0, now */
1050            level->current_data = level->start_offset;
1051          }
1052        else
1053          {
1054            /* insertion may start right behind the previous entry */
1055            entry_t *previous = get_entry(cache, entry->previous);
1056            level->current_data = ALIGN_VALUE(  previous->offset
1057                                              + previous->size);
1058          }
1059      }
1060
1061  /* unlink it from the chain of used entries
1062   */
1063  unchain_entry(cache, level, entry, idx);
1064
1065  /* Move last entry into hole (if the removed one is not the last used).
1066   * We need to do this since all used entries are at the beginning of
1067   * the group's entries array.
1068   */
1069  if (idx != last_in_group)
1070    {
1071      /* copy the last used entry to the removed entry's index
1072       */
1073      *entry = last_group->entries[last_group->header.used-1];
1074
1075      /* this ENTRY may belong to a different cache level than the entry
1076       * we have just removed */
1077      level = get_cache_level(cache, entry);
1078
1079      /* update foreign links to new index
1080       */
1081      if (last_in_group == level->next)
1082        level->next = idx;
1083
1084      if (entry->previous == NO_INDEX)
1085        level->first = idx;
1086      else
1087        get_entry(cache, entry->previous)->next = idx;
1088
1089      if (entry->next == NO_INDEX)
1090        level->last = idx;
1091      else
1092        get_entry(cache, entry->next)->previous = idx;
1093    }
1094
1095  /* Update the number of used entries.
1096   */
1097  last_group->header.used--;
1098
1099  /* Release the last group in the chain if it is a spare group
1100   */
1101  if (!last_group->header.used && last_group->header.previous != NO_INDEX)
1102    free_spare_group(cache, last_group);
1103}
1104
1105/* Insert ENTRY into the chain of used dictionary entries. The entry's
1106 * offset and size members must already have been initialized. Also,
1107 * the offset must match the beginning of the insertion window.
1108 */
1109static void
1110insert_entry(svn_membuffer_t *cache, entry_t *entry)
1111{
1112  /* the group that ENTRY belongs to plus a number of useful index values
1113   */
1114  apr_uint32_t idx = get_index(cache, entry);
1115  apr_uint32_t group_index = idx / GROUP_SIZE;
1116  entry_group_t *group = &cache->directory[group_index];
1117  cache_level_t *level = get_cache_level(cache, entry);
1118
1119  /* The entry must start at the beginning of the insertion window.
1120   * It must also be the first unused entry in the group.
1121   */
1122  assert(entry->offset == level->current_data);
1123  assert(idx == group_index * GROUP_SIZE + group->header.used);
1124  level->current_data = ALIGN_VALUE(entry->offset + entry->size);
1125
1126  /* update usage counters
1127   */
1128  cache->used_entries++;
1129  cache->data_used += entry->size;
1130  entry->hit_count = 0;
1131  group->header.used++;
1132
1133  /* update entry chain
1134   */
1135  chain_entry(cache, level, entry, idx);
1136
1137  /* The current insertion position must never point outside our
1138   * data buffer.
1139   */
1140  assert(level->current_data <= level->start_offset + level->size);
1141}
1142
1143/* Map a KEY of 16 bytes to the CACHE and group that shall contain the
1144 * respective item.
1145 */
1146static apr_uint32_t
1147get_group_index(svn_membuffer_t **cache,
1148                const entry_key_t *key)
1149{
1150  svn_membuffer_t *segment0 = *cache;
1151  apr_uint64_t key0 = key->fingerprint[0];
1152  apr_uint64_t key1 = key->fingerprint[1];
1153
1154  /* select the cache segment to use. they have all the same group_count.
1155   * Since key may not be well-distributed, pre-fold it to a smaller but
1156   * "denser" ranger.  The modulus is a prime larger than the largest
1157   * counts. */
1158  *cache = &segment0[(key1 % APR_UINT64_C(2809637) + (key0 / 37))
1159                     & (segment0->segment_count - 1)];
1160  return (key0 % APR_UINT64_C(5030895599)) % segment0->group_count;
1161}
1162
1163/* Reduce the hit count of ENTRY and update the accumulated hit info
1164 * in CACHE accordingly.
1165 */
1166static APR_INLINE void
1167let_entry_age(svn_membuffer_t *cache, entry_t *entry)
1168{
1169  apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1;
1170
1171  if (hits_removed)
1172    {
1173      entry->hit_count -= hits_removed;
1174    }
1175  else
1176    {
1177      entry->priority /= 2;
1178    }
1179}
1180
1181/* Return whether the keys in LHS and RHS match.
1182 */
1183static svn_boolean_t
1184entry_keys_match(const entry_key_t *lhs,
1185                 const entry_key_t *rhs)
1186{
1187  return (lhs->fingerprint[0] == rhs->fingerprint[0])
1188      && (lhs->fingerprint[1] == rhs->fingerprint[1])
1189      && (lhs->key_len == rhs->key_len);
1190}
1191
1192/* Given the GROUP_INDEX that shall contain an entry with the hash key
1193 * TO_FIND, find that entry in the specified group.
1194 *
1195 * If FIND_EMPTY is not set, this function will return the one used entry
1196 * that actually matches the hash or NULL, if no such entry exists.
1197 *
1198 * If FIND_EMPTY has been set, this function will drop the one used entry
1199 * that actually matches the hash (i.e. make it fit to be replaced with
1200 * new content), an unused entry or a forcibly removed entry (if all
1201 * group entries are currently in use). The entries' hash value will be
1202 * initialized with TO_FIND.
1203 *
1204 * Note: This function requires the caller to appropriately lock the CACHE.
1205 * For FIND_EMPTY==FALSE, a read lock is required, for FIND_EMPTY==TRUE,
1206 * the write lock must have been acquired.
1207 */
1208static entry_t *
1209find_entry(svn_membuffer_t *cache,
1210           apr_uint32_t group_index,
1211           const full_key_t *to_find,
1212           svn_boolean_t find_empty)
1213{
1214  entry_group_t *group;
1215  entry_t *entry = NULL;
1216  apr_size_t i;
1217
1218  /* get the group that *must* contain the entry
1219   */
1220  group = &cache->directory[group_index];
1221
1222  /* If the entry group has not been initialized, yet, there is no data.
1223   */
1224  if (! is_group_initialized(cache, group_index))
1225    {
1226      if (find_empty)
1227        {
1228          initialize_group(cache, group_index);
1229          entry = &group->entries[0];
1230
1231          /* initialize entry for the new key */
1232          entry->key = to_find->entry_key;
1233        }
1234
1235      return entry;
1236    }
1237
1238  /* try to find the matching entry
1239   */
1240  while (1)
1241    {
1242      for (i = 0; i < group->header.used; ++i)
1243        if (entry_keys_match(&group->entries[i].key, &to_find->entry_key))
1244          {
1245            /* This is the only entry that _may_ contain the correct data. */
1246            entry = &group->entries[i];
1247
1248            /* If we want to preserve it, check that it is actual a match. */
1249            if (!find_empty)
1250              {
1251                /* If there is no full key to compare, we are done. */
1252                if (!entry->key.key_len)
1253                  return entry;
1254
1255                /* Compare the full key. */
1256                if (memcmp(to_find->full_key.data,
1257                           cache->data + entry->offset,
1258                           entry->key.key_len) == 0)
1259                  return entry;
1260
1261                /* Key conflict. The entry to find cannot be anywhere else.
1262                 * Therefore, it is not cached. */
1263                return NULL;
1264              }
1265
1266            /* need to empty that entry */
1267            drop_entry(cache, entry);
1268            if (group->header.used == GROUP_SIZE)
1269              group = last_group_in_chain(cache, group);
1270            else if (group->header.chain_length == 0)
1271              group = last_group_in_chain(cache,
1272                                          &cache->directory[group_index]);
1273
1274            /* No entry found (actually, none left to find). */
1275            entry = NULL;
1276            break;
1277          }
1278
1279      /* end of chain? */
1280      if (group->header.next == NO_INDEX)
1281        break;
1282
1283      /* only full groups may chain */
1284      assert(group->header.used == GROUP_SIZE);
1285      group = &cache->directory[group->header.next];
1286    }
1287
1288  /* None found. Are we looking for a free entry?
1289   */
1290  if (find_empty)
1291    {
1292      /* There is no empty entry in the chain, try chaining a spare group.
1293       */
1294      if (   group->header.used == GROUP_SIZE
1295          && group->header.chain_length < MAX_GROUP_CHAIN_LENGTH)
1296        {
1297          entry_group_t *new_group = allocate_spare_group(cache);
1298          if (new_group)
1299            {
1300              /* chain groups
1301               */
1302              new_group->header.chain_length = group->header.chain_length + 1;
1303              new_group->header.previous = (apr_uint32_t) (group -
1304                                                           cache->directory);
1305              new_group->header.next = NO_INDEX;
1306              group->header.next = (apr_uint32_t) (new_group -
1307                                                   cache->directory);
1308              group = new_group;
1309            }
1310        }
1311
1312      /* if GROUP is still filled, we need to remove a random entry */
1313      if (group->header.used == GROUP_SIZE)
1314        {
1315          /* every entry gets the same chance of being removed.
1316           * Otherwise, we free the first entry, fill it and
1317           * remove it again on the next occasion without considering
1318           * the other entries in this group.
1319           *
1320           * We hit only one random group instead of processing all
1321           * groups in the chain.
1322           */
1323          cache_level_t *entry_level;
1324          int to_remove = rand() % (GROUP_SIZE * group->header.chain_length);
1325          entry_group_t *to_shrink
1326            = get_group(cache, group_index, to_remove / GROUP_SIZE);
1327
1328          entry = &to_shrink->entries[to_remove % GROUP_SIZE];
1329          entry_level = get_cache_level(cache, entry);
1330          for (i = 0; i < GROUP_SIZE; ++i)
1331            {
1332              /* keep L1 entries whenever possible */
1333
1334              cache_level_t *level
1335                = get_cache_level(cache, &to_shrink->entries[i]);
1336              if (   (level != entry_level && entry_level == &cache->l1)
1337                  || (entry->hit_count > to_shrink->entries[i].hit_count))
1338                {
1339                  entry_level = level;
1340                  entry = &to_shrink->entries[i];
1341                }
1342            }
1343
1344          /* for the entries that don't have been removed,
1345           * reduce their hit counts to put them at a relative
1346           * disadvantage the next time.
1347           */
1348          for (i = 0; i < GROUP_SIZE; ++i)
1349            if (entry != &to_shrink->entries[i])
1350              let_entry_age(cache, entry);
1351
1352          drop_entry(cache, entry);
1353        }
1354
1355      /* initialize entry for the new key
1356       */
1357      entry = &group->entries[group->header.used];
1358      entry->key = to_find->entry_key;
1359    }
1360
1361  return entry;
1362}
1363
1364/* Move a surviving ENTRY from just behind the insertion window to
1365 * its beginning and move the insertion window up accordingly.
1366 */
1367static void
1368move_entry(svn_membuffer_t *cache, entry_t *entry)
1369{
1370  apr_size_t size = ALIGN_VALUE(entry->size);
1371  cache_level_t *level = get_cache_level(cache, entry);
1372
1373  /* This entry survived this cleansing run. Reset half of its
1374   * hit count so that its removal gets more likely in the next
1375   * run unless someone read / hit this entry in the meantime.
1376   */
1377  let_entry_age(cache, entry);
1378
1379  /* Move the entry to the start of the empty / insertion section
1380   * (if it isn't there already). Size-aligned moves are legal
1381   * since all offsets and block sizes share this same alignment.
1382   * Size-aligned moves tend to be faster than non-aligned ones
1383   * because no "odd" bytes at the end need to special treatment.
1384   */
1385  if (entry->offset != level->current_data)
1386    {
1387      memmove(cache->data + level->current_data,
1388              cache->data + entry->offset,
1389              size);
1390      entry->offset = level->current_data;
1391    }
1392
1393  /* The insertion position is now directly behind this entry.
1394   */
1395  level->current_data = entry->offset + size;
1396  level->next = entry->next;
1397
1398  /* The current insertion position must never point outside our
1399   * data buffer.
1400   */
1401  assert(level->current_data <= level->start_offset + level->size);
1402}
1403
1404/* Move ENTRY in CACHE from L1 to L2.
1405 */
1406static void
1407promote_entry(svn_membuffer_t *cache, entry_t *entry)
1408{
1409  apr_uint32_t idx = get_index(cache, entry);
1410  apr_size_t size = ALIGN_VALUE(entry->size);
1411  assert(get_cache_level(cache, entry) == &cache->l1);
1412  assert(idx == cache->l1.next);
1413
1414  /* copy item from the current location in L1 to the start of L2's
1415   * insertion window */
1416  memmove(cache->data + cache->l2.current_data,
1417          cache->data + entry->offset,
1418          size);
1419  entry->offset = cache->l2.current_data;
1420
1421  /* The insertion position is now directly behind this entry.
1422   */
1423  cache->l2.current_data += size;
1424
1425  /* remove ENTRY from chain of L1 entries and put it into L2
1426   */
1427  unchain_entry(cache, &cache->l1, entry, idx);
1428  chain_entry(cache, &cache->l2, entry, idx);
1429}
1430
1431/* This function implements the cache insertion / eviction strategy for L2.
1432 *
1433 * If necessary, enlarge the insertion window of CACHE->L2 until it is at
1434 * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the
1435 * data buffer size allocated to CACHE->L2.  IDX is the item index of
1436 * TO_FIT_IN and is given for performance reasons.
1437 *
1438 * Return TRUE if enough room could be found or made.  A FALSE result
1439 * indicates that the respective item shall not be added.
1440 */
1441static svn_boolean_t
1442ensure_data_insertable_l2(svn_membuffer_t *cache,
1443                          entry_t *to_fit_in)
1444{
1445  entry_t *entry;
1446
1447  /* accumulated size of the entries that have been removed to make
1448   * room for the new one.
1449   */
1450  apr_size_t moved_size = 0;
1451
1452  /* count the number of entries that got moved.  A single large entry
1453   * being moved is not enough to reject an insertion.
1454   */
1455  apr_size_t moved_count = 0;
1456
1457  /* accumulated "worth" of items dropped so far */
1458  apr_uint64_t drop_hits = 0;
1459
1460  /* estimated "worth" of the new entry */
1461  apr_uint64_t drop_hits_limit = (to_fit_in->hit_count + 1)
1462                               * (apr_uint64_t)to_fit_in->priority;
1463
1464  /* This loop will eventually terminate because every cache entry
1465   * would get dropped eventually:
1466   *
1467   * - the incoming entry is small enough to fit into L2
1468   * - every iteration either frees parts of L2 or counts the moved size
1469   * - eventually, we either moved too many items with too much total size
1470   *   to accept the new entry, or made enough room in L2 for the new entry
1471   *
1472   * Low-prio items get rejected even sooner.
1473   */
1474  while (1)
1475    {
1476      /* first offset behind the insertion window
1477       */
1478      apr_uint64_t end = cache->l2.next == NO_INDEX
1479                       ? cache->l2.start_offset + cache->l2.size
1480                       : get_entry(cache, cache->l2.next)->offset;
1481
1482      /* leave function as soon as the insertion window is large enough
1483       */
1484      if (end >= to_fit_in->size + cache->l2.current_data)
1485        return TRUE;
1486
1487      /* Don't be too eager to cache data.  If a lot of data has been moved
1488       * around, the current item has probably a relatively low priority.
1489       * We must also limit the effort spent here (if even in case of faulty
1490       * heuristics).  Therefore, give up after some time.
1491       */
1492      if (moved_size > 4 * to_fit_in->size && moved_count > 7)
1493        return FALSE;
1494
1495      /* if the net worth (in weighted hits) of items removed is already
1496       * larger than what we want to insert, reject TO_FIT_IN because it
1497       * still does not fit in. */
1498      if (drop_hits > drop_hits_limit)
1499        return FALSE;
1500
1501      /* try to enlarge the insertion window
1502       */
1503      if (cache->l2.next == NO_INDEX)
1504        {
1505          /* We reached the end of the data buffer; restart at the beginning.
1506           * Due to the randomized nature of our LFU implementation, very
1507           * large data items may require multiple passes. Therefore, SIZE
1508           * should be restricted to significantly less than data_size.
1509           */
1510          cache->l2.current_data = cache->l2.start_offset;
1511          cache->l2.next = cache->l2.first;
1512        }
1513      else
1514        {
1515          svn_boolean_t keep;
1516          entry = get_entry(cache, cache->l2.next);
1517
1518          if (to_fit_in->priority < SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1519            {
1520              /* Low prio items can only be accepted only if the current
1521               * entry is of even lower prio and has fewer hits.
1522               */
1523              if (   entry->priority > to_fit_in->priority
1524                  || entry->hit_count > to_fit_in->hit_count)
1525                return FALSE;
1526            }
1527
1528          if (entry->priority <= SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1529            {
1530              /* Be quick to remove low-prio entries - even if the incoming
1531               * one is low-prio as well.  This makes room for more important
1532               * data and replaces existing data with newly read information.
1533               */
1534              keep = FALSE;
1535            }
1536          else
1537            {
1538              /* If the existing data is the same prio as the incoming data,
1539               * drop the existing entry if it had seen fewer (probably 0)
1540               * hits than the entry coming in from L1.  In case of different
1541               * priorities, keep the current entry of it has higher prio.
1542               * The new entry may still find room by ousting other entries.
1543               */
1544              keep = to_fit_in->priority == entry->priority
1545                   ? entry->hit_count >= to_fit_in->hit_count
1546                   : entry->priority > to_fit_in->priority;
1547            }
1548
1549          /* keepers or destroyers? */
1550          if (keep)
1551            {
1552              /* Moving entries around is not for free -> track costs. */
1553              moved_size += entry->size;
1554              moved_count++;
1555
1556              move_entry(cache, entry);
1557            }
1558          else
1559            {
1560              /* Drop the entry from the end of the insertion window.
1561               * Count the "hit importance" such that we are not sacrificing
1562               * too much of the high-hit contents.  However, don't count
1563               * low-priority hits because higher prio entries will often
1564               * provide the same data but in a further stage of processing.
1565               */
1566              if (entry->priority > SVN_CACHE__MEMBUFFER_LOW_PRIORITY)
1567                drop_hits += entry->hit_count * (apr_uint64_t)entry->priority;
1568
1569              drop_entry(cache, entry);
1570            }
1571        }
1572    }
1573
1574  /* This will never be reached. But if it was, "can't insert" was the
1575   * right answer. */
1576}
1577
1578/* This function implements the cache insertion / eviction strategy for L1.
1579 *
1580 * If necessary, enlarge the insertion window of CACHE->L1 by promoting
1581 * entries to L2 until it is at least SIZE bytes long.
1582 *
1583 * Return TRUE if enough room could be found or made.  A FALSE result
1584 * indicates that the respective item shall not be added because it is
1585 * too large.
1586 */
1587static svn_boolean_t
1588ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size)
1589{
1590  /* Guarantees that the while loop will terminate. */
1591  if (size > cache->l1.size)
1592    return FALSE;
1593
1594  /* This loop will eventually terminate because every cache entry
1595   * would get dropped eventually.
1596   */
1597  while (1)
1598    {
1599      /* first offset behind the insertion window
1600       */
1601      apr_uint32_t entry_index = cache->l1.next;
1602      entry_t *entry = get_entry(cache, entry_index);
1603      apr_uint64_t end = cache->l1.next == NO_INDEX
1604                       ? cache->l1.start_offset + cache->l1.size
1605                       : entry->offset;
1606
1607      /* leave function as soon as the insertion window is large enough
1608       */
1609      if (end >= size + cache->l1.current_data)
1610        return TRUE;
1611
1612      /* Enlarge the insertion window
1613       */
1614      if (cache->l1.next == NO_INDEX)
1615        {
1616          /* We reached the end of the data buffer; restart at the beginning.
1617           * Due to the randomized nature of our LFU implementation, very
1618           * large data items may require multiple passes. Therefore, SIZE
1619           * should be restricted to significantly less than data_size.
1620           */
1621          cache->l1.current_data = cache->l1.start_offset;
1622          cache->l1.next = cache->l1.first;
1623        }
1624      else
1625        {
1626          /* Remove the entry from the end of insertion window and promote
1627           * it to L2, if it is important enough.
1628           */
1629          svn_boolean_t keep = ensure_data_insertable_l2(cache, entry);
1630
1631          /* We might have touched the group that contains ENTRY. Recheck. */
1632          if (entry_index == cache->l1.next)
1633            {
1634              if (keep)
1635                promote_entry(cache, entry);
1636              else
1637                drop_entry(cache, entry);
1638            }
1639        }
1640    }
1641
1642  /* This will never be reached. But if it was, "can't insert" was the
1643   * right answer. */
1644}
1645
1646svn_error_t *
1647svn_cache__membuffer_cache_create(svn_membuffer_t **cache,
1648                                  apr_size_t total_size,
1649                                  apr_size_t directory_size,
1650                                  apr_size_t segment_count,
1651                                  svn_boolean_t thread_safe,
1652                                  svn_boolean_t allow_blocking_writes,
1653                                  apr_pool_t *pool)
1654{
1655  svn_membuffer_t *c;
1656
1657  apr_uint32_t seg;
1658  apr_uint32_t group_count;
1659  apr_uint32_t main_group_count;
1660  apr_uint32_t spare_group_count;
1661  apr_uint32_t group_init_size;
1662  apr_uint64_t data_size;
1663  apr_uint64_t max_entry_size;
1664
1665  /* Limit the total size (only relevant if we can address > 4GB)
1666   */
1667#if APR_SIZEOF_VOIDP > 4
1668  if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT)
1669    total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT;
1670#endif
1671
1672  /* Limit the segment count
1673   */
1674  if (segment_count > MAX_SEGMENT_COUNT)
1675    segment_count = MAX_SEGMENT_COUNT;
1676  if (segment_count * MIN_SEGMENT_SIZE > total_size)
1677    segment_count = total_size / MIN_SEGMENT_SIZE;
1678
1679  /* The segment count must be a power of two. Round it down as necessary.
1680   */
1681  while ((segment_count & (segment_count-1)) != 0)
1682    segment_count &= segment_count-1;
1683
1684  /* if the caller hasn't provided a reasonable segment count or the above
1685   * limitations set it to 0, derive one from the absolute cache size
1686   */
1687  if (segment_count < 1)
1688    {
1689      /* Determine a reasonable number of cache segments. Segmentation is
1690       * only useful for multi-threaded / multi-core servers as it reduces
1691       * lock contention on these systems.
1692       *
1693       * But on these systems, we can assume that ample memory has been
1694       * allocated to this cache. Smaller caches should not be segmented
1695       * as this severely limits the maximum size of cachable items.
1696       *
1697       * Segments should not be smaller than 32MB and max. cachable item
1698       * size should grow as fast as segmentation.
1699       */
1700
1701      apr_uint32_t segment_count_shift = 0;
1702      while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift))
1703             < total_size)
1704        ++segment_count_shift;
1705
1706      segment_count = (apr_size_t)1 << segment_count_shift;
1707    }
1708
1709  /* If we have an extremely large cache (>512 GB), the default segment
1710   * size may exceed the amount allocatable as one chunk. In that case,
1711   * increase segmentation until we are under the threshold.
1712   */
1713  while (   total_size / segment_count > MAX_SEGMENT_SIZE
1714         && segment_count < MAX_SEGMENT_COUNT)
1715    segment_count *= 2;
1716
1717  /* allocate cache as an array of segments / cache objects */
1718  c = apr_palloc(pool, segment_count * sizeof(*c));
1719
1720  /* Split total cache size into segments of equal size
1721   */
1722  total_size /= segment_count;
1723  directory_size /= segment_count;
1724
1725  /* prevent pathological conditions: ensure a certain minimum cache size
1726   */
1727  if (total_size < 2 * sizeof(entry_group_t))
1728    total_size = 2 * sizeof(entry_group_t);
1729
1730  /* adapt the dictionary size accordingly, if necessary:
1731   * It must hold at least one group and must not exceed the cache size.
1732   */
1733  if (directory_size > total_size - sizeof(entry_group_t))
1734    directory_size = total_size - sizeof(entry_group_t);
1735  if (directory_size < 2 * sizeof(entry_group_t))
1736    directory_size = 2 * sizeof(entry_group_t);
1737
1738  /* limit the data size to what we can address.
1739   * Note that this cannot overflow since all values are of size_t.
1740   * Also, make it a multiple of the item placement granularity to
1741   * prevent subtle overflows.
1742   */
1743  data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT;
1744
1745  /* For cache sizes > 16TB, individual cache segments will be larger
1746   * than 32GB allowing for >4GB entries.  But caching chunks larger
1747   * than 4GB are simply not supported.
1748   */
1749  max_entry_size = data_size / 8 > MAX_ITEM_SIZE
1750                 ? MAX_ITEM_SIZE
1751                 : data_size / 8;
1752
1753  /* to keep the entries small, we use 32 bit indexes only
1754   * -> we need to ensure that no more then 4G entries exist.
1755   *
1756   * Note, that this limit could only be exceeded in a very
1757   * theoretical setup with about 1EB of cache.
1758   */
1759  group_count = directory_size / sizeof(entry_group_t)
1760                    >= (APR_UINT32_MAX / GROUP_SIZE)
1761              ? (APR_UINT32_MAX / GROUP_SIZE) - 1
1762              : (apr_uint32_t)(directory_size / sizeof(entry_group_t));
1763
1764  /* set some of the index directory aside as over-flow (spare) buffers */
1765  spare_group_count = MAX(group_count / 4, 1);
1766  main_group_count = group_count - spare_group_count;
1767  assert(spare_group_count > 0 && main_group_count > 0);
1768
1769  group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY);
1770  for (seg = 0; seg < segment_count; ++seg)
1771    {
1772      /* allocate buffers and initialize cache members
1773       */
1774      c[seg].segment_count = (apr_uint32_t)segment_count;
1775
1776      c[seg].group_count = main_group_count;
1777      c[seg].spare_group_count = spare_group_count;
1778      c[seg].first_spare_group = NO_INDEX;
1779      c[seg].max_spare_used = 0;
1780
1781      c[seg].directory = apr_pcalloc(pool,
1782                                     group_count * sizeof(entry_group_t));
1783
1784      /* Allocate and initialize directory entries as "not initialized",
1785         hence "unused" */
1786      c[seg].group_initialized = apr_pcalloc(pool, group_init_size);
1787
1788      /* Allocate 1/4th of the data buffer to L1
1789       */
1790      c[seg].l1.first = NO_INDEX;
1791      c[seg].l1.last = NO_INDEX;
1792      c[seg].l1.next = NO_INDEX;
1793      c[seg].l1.start_offset = 0;
1794      c[seg].l1.size = ALIGN_VALUE(data_size / 4);
1795      c[seg].l1.current_data = 0;
1796
1797      /* The remaining 3/4th will be used as L2
1798       */
1799      c[seg].l2.first = NO_INDEX;
1800      c[seg].l2.last = NO_INDEX;
1801      c[seg].l2.next = NO_INDEX;
1802      c[seg].l2.start_offset = c[seg].l1.size;
1803      c[seg].l2.size = ALIGN_VALUE(data_size) - c[seg].l1.size;
1804      c[seg].l2.current_data = c[seg].l2.start_offset;
1805
1806      /* This cast is safe because DATA_SIZE <= MAX_SEGMENT_SIZE. */
1807      c[seg].data = apr_palloc(pool, (apr_size_t)ALIGN_VALUE(data_size));
1808      c[seg].data_used = 0;
1809      c[seg].max_entry_size = max_entry_size;
1810
1811      c[seg].used_entries = 0;
1812      c[seg].total_reads = 0;
1813      c[seg].total_writes = 0;
1814      c[seg].total_hits = 0;
1815
1816      /* were allocations successful?
1817       * If not, initialize a minimal cache structure.
1818       */
1819      if (c[seg].data == NULL || c[seg].directory == NULL)
1820        {
1821          /* We are OOM. There is no need to proceed with "half a cache".
1822           */
1823          return svn_error_wrap_apr(APR_ENOMEM, "OOM");
1824        }
1825
1826#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX)
1827      /* A lock for intra-process synchronization to the cache, or NULL if
1828       * the cache's creator doesn't feel the cache needs to be
1829       * thread-safe.
1830       */
1831      SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool));
1832#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX)
1833      /* Same for read-write lock. */
1834      c[seg].lock = NULL;
1835      if (thread_safe)
1836        {
1837          apr_status_t status =
1838              apr_thread_rwlock_create(&(c[seg].lock), pool);
1839          if (status)
1840            return svn_error_wrap_apr(status, _("Can't create cache mutex"));
1841        }
1842
1843      /* Select the behavior of write operations.
1844       */
1845      c[seg].allow_blocking_writes = allow_blocking_writes;
1846#endif
1847    }
1848
1849  /* done here
1850   */
1851  *cache = c;
1852  return SVN_NO_ERROR;
1853}
1854
1855svn_error_t *
1856svn_cache__membuffer_clear(svn_membuffer_t *cache)
1857{
1858  apr_size_t seg;
1859  apr_size_t segment_count = cache->segment_count;
1860
1861  /* Length of the group_initialized array in bytes.
1862     See also svn_cache__membuffer_cache_create(). */
1863  apr_size_t group_init_size
1864    = 1 + (cache->group_count + cache->spare_group_count)
1865            / (8 * GROUP_INIT_GRANULARITY);
1866
1867  /* Clear segment by segment.  This implies that other thread may read
1868     and write to other segments after we cleared them and before the
1869     last segment is done.
1870
1871     However, that is no different from a write request coming through
1872     right after we cleared all segments because dependencies between
1873     cache entries (recursive lookup / access locks) are not allowed.
1874   */
1875  for (seg = 0; seg < segment_count; ++seg)
1876    {
1877      /* Unconditionally acquire the write lock. */
1878      SVN_ERR(force_write_lock_cache(&cache[seg]));
1879
1880      /* Mark all groups as "not initialized", which implies "empty". */
1881      cache[seg].first_spare_group = NO_INDEX;
1882      cache[seg].max_spare_used = 0;
1883
1884      memset(cache[seg].group_initialized, 0, group_init_size);
1885
1886      /* Unlink L1 contents. */
1887      cache[seg].l1.first = NO_INDEX;
1888      cache[seg].l1.last = NO_INDEX;
1889      cache[seg].l1.next = NO_INDEX;
1890      cache[seg].l1.current_data = cache[seg].l1.start_offset;
1891
1892      /* Unlink L2 contents. */
1893      cache[seg].l2.first = NO_INDEX;
1894      cache[seg].l2.last = NO_INDEX;
1895      cache[seg].l2.next = NO_INDEX;
1896      cache[seg].l2.current_data = cache[seg].l2.start_offset;
1897
1898      /* Reset content counters. */
1899      cache[seg].data_used = 0;
1900      cache[seg].used_entries = 0;
1901
1902      /* Segment may be used again. */
1903      SVN_ERR(unlock_cache(&cache[seg], SVN_NO_ERROR));
1904    }
1905
1906  /* done here */
1907  return SVN_NO_ERROR;
1908}
1909
1910/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1911 * by the hash value TO_FIND and set *FOUND accordingly.
1912 *
1913 * Note: This function requires the caller to serialize access.
1914 * Don't call it directly, call entry_exists instead.
1915 */
1916static svn_error_t *
1917entry_exists_internal(svn_membuffer_t *cache,
1918                      apr_uint32_t group_index,
1919                      const full_key_t *to_find,
1920                      svn_boolean_t *found)
1921{
1922  *found = find_entry(cache, group_index, to_find, FALSE) != NULL;
1923  return SVN_NO_ERROR;
1924}
1925
1926/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
1927 * by the hash value TO_FIND and set *FOUND accordingly.
1928 */
1929static svn_error_t *
1930entry_exists(svn_membuffer_t *cache,
1931             apr_uint32_t group_index,
1932             const full_key_t *to_find,
1933             svn_boolean_t *found)
1934{
1935  WITH_READ_LOCK(cache,
1936                 entry_exists_internal(cache,
1937                                       group_index,
1938                                       to_find,
1939                                       found));
1940
1941  return SVN_NO_ERROR;
1942}
1943
1944/* Given the SIZE and PRIORITY of a new item, return the cache level
1945   (L1 or L2) in fragment CACHE that this item shall be inserted into.
1946   If we can't find nor make enough room for the item, return NULL.
1947 */
1948static cache_level_t *
1949select_level(svn_membuffer_t *cache,
1950             apr_size_t size,
1951             apr_uint32_t priority)
1952{
1953  if (cache->max_entry_size >= size)
1954    {
1955      /* Small items go into L1. */
1956      return ensure_data_insertable_l1(cache, size)
1957           ? &cache->l1
1958           : NULL;
1959    }
1960  else if (   cache->l2.size >= size
1961           && MAX_ITEM_SIZE >= size
1962           && priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY)
1963    {
1964      /* Large but important items go into L2. */
1965      entry_t dummy_entry = { { { 0 } } };
1966      dummy_entry.priority = priority;
1967      dummy_entry.size = size;
1968
1969      return ensure_data_insertable_l2(cache, &dummy_entry)
1970           ? &cache->l2
1971           : NULL;
1972    }
1973
1974  /* Don't cache large, unimportant items. */
1975  return NULL;
1976}
1977
1978/* Try to insert the serialized item given in BUFFER with ITEM_SIZE
1979 * into the group GROUP_INDEX of CACHE and uniquely identify it by
1980 * hash value TO_FIND.
1981 *
1982 * However, there is no guarantee that it will actually be put into
1983 * the cache. If there is already some data associated with TO_FIND,
1984 * it will be removed from the cache even if the new data cannot
1985 * be inserted.
1986 *
1987 * Note: This function requires the caller to serialization access.
1988 * Don't call it directly, call membuffer_cache_set instead.
1989 */
1990static svn_error_t *
1991membuffer_cache_set_internal(svn_membuffer_t *cache,
1992                             const full_key_t *to_find,
1993                             apr_uint32_t group_index,
1994                             char *buffer,
1995                             apr_size_t item_size,
1996                             apr_uint32_t priority,
1997                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
1998                             apr_pool_t *scratch_pool)
1999{
2000  cache_level_t *level;
2001  apr_size_t size = item_size + to_find->entry_key.key_len;
2002
2003  /* first, look for a previous entry for the given key */
2004  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2005
2006  /* if there is an old version of that entry and the new data fits into
2007   * the old spot, just re-use that space. */
2008  if (entry && ALIGN_VALUE(entry->size) >= size && buffer)
2009    {
2010      /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED
2011       * lest we run into trouble with 32 bit underflow *not* treated as a
2012       * negative value.
2013       */
2014      cache->data_used += (apr_uint64_t)size - entry->size;
2015      entry->size = size;
2016      entry->priority = priority;
2017
2018#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2019
2020      /* Remember original content, type and key (hashes)
2021       */
2022      SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2023      memcpy(&entry->tag, tag, sizeof(*tag));
2024
2025#endif
2026
2027      if (entry->key.key_len)
2028        memcpy(cache->data + entry->offset, to_find->full_key.data,
2029               entry->key.key_len);
2030      if (item_size)
2031        memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2032               item_size);
2033
2034      cache->total_writes++;
2035      return SVN_NO_ERROR;
2036    }
2037
2038  /* if necessary, enlarge the insertion window.
2039   */
2040  level = buffer ? select_level(cache, size, priority) : NULL;
2041  if (level)
2042    {
2043      /* Remove old data for this key, if that exists.
2044       * Get an unused entry for the key and and initialize it with
2045       * the serialized item's (future) position within data buffer.
2046       */
2047      entry = find_entry(cache, group_index, to_find, TRUE);
2048      entry->size = size;
2049      entry->offset = level->current_data;
2050      entry->priority = priority;
2051
2052#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2053
2054      /* Remember original content, type and key (hashes)
2055       */
2056      SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool));
2057      memcpy(&entry->tag, tag, sizeof(*tag));
2058
2059#endif
2060
2061      /* Link the entry properly.
2062       */
2063      insert_entry(cache, entry);
2064
2065      /* Copy the serialized item data into the cache.
2066       */
2067      if (entry->key.key_len)
2068        memcpy(cache->data + entry->offset, to_find->full_key.data,
2069               entry->key.key_len);
2070      if (item_size)
2071        memcpy(cache->data + entry->offset + entry->key.key_len, buffer,
2072               item_size);
2073
2074      cache->total_writes++;
2075    }
2076  else
2077    {
2078      /* if there is already an entry for this key, drop it.
2079       * Since ensure_data_insertable may have removed entries from
2080       * ENTRY's group, re-do the lookup.
2081       */
2082      entry = find_entry(cache, group_index, to_find, FALSE);
2083      if (entry)
2084        drop_entry(cache, entry);
2085    }
2086
2087  return SVN_NO_ERROR;
2088}
2089
2090/* Try to insert the ITEM and use the KEY to uniquely identify it.
2091 * However, there is no guarantee that it will actually be put into
2092 * the cache. If there is already some data associated to the KEY,
2093 * it will be removed from the cache even if the new data cannot
2094 * be inserted.
2095 *
2096 * The SERIALIZER is called to transform the ITEM into a single,
2097 * flat data buffer. Temporary allocations may be done in POOL.
2098 */
2099static svn_error_t *
2100membuffer_cache_set(svn_membuffer_t *cache,
2101                    const full_key_t *key,
2102                    void *item,
2103                    svn_cache__serialize_func_t serializer,
2104                    apr_uint32_t priority,
2105                    DEBUG_CACHE_MEMBUFFER_TAG_ARG
2106                    apr_pool_t *scratch_pool)
2107{
2108  apr_uint32_t group_index;
2109  void *buffer = NULL;
2110  apr_size_t size = 0;
2111
2112  /* find the entry group that will hold the key.
2113   */
2114  group_index = get_group_index(&cache, &key->entry_key);
2115
2116  /* Serialize data data.
2117   */
2118  if (item)
2119    SVN_ERR(serializer(&buffer, &size, item, scratch_pool));
2120
2121  /* The actual cache data access needs to sync'ed
2122   */
2123  WITH_WRITE_LOCK(cache,
2124                  membuffer_cache_set_internal(cache,
2125                                               key,
2126                                               group_index,
2127                                               buffer,
2128                                               size,
2129                                               priority,
2130                                               DEBUG_CACHE_MEMBUFFER_TAG
2131                                               scratch_pool));
2132  return SVN_NO_ERROR;
2133}
2134
2135/* Count a hit in ENTRY within CACHE.
2136 */
2137static void
2138increment_hit_counters(svn_membuffer_t *cache, entry_t *entry)
2139{
2140  /* To minimize the memory footprint of the cache index, we limit local
2141   * hit counters to 32 bits.  These may overflow but we don't really
2142   * care because at worst, ENTRY will be dropped from cache once every
2143   * few billion hits. */
2144  svn_atomic_inc(&entry->hit_count);
2145
2146  /* That one is for stats only. */
2147  cache->total_hits++;
2148}
2149
2150/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2151 * by the hash value TO_FIND. If no item has been stored for KEY,
2152 * *BUFFER will be NULL. Otherwise, return a copy of the serialized
2153 * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will
2154 * be done in POOL.
2155 *
2156 * Note: This function requires the caller to serialization access.
2157 * Don't call it directly, call membuffer_cache_get instead.
2158 */
2159static svn_error_t *
2160membuffer_cache_get_internal(svn_membuffer_t *cache,
2161                             apr_uint32_t group_index,
2162                             const full_key_t *to_find,
2163                             char **buffer,
2164                             apr_size_t *item_size,
2165                             DEBUG_CACHE_MEMBUFFER_TAG_ARG
2166                             apr_pool_t *result_pool)
2167{
2168  entry_t *entry;
2169  apr_size_t size;
2170
2171  /* The actual cache data access needs to sync'ed
2172   */
2173  entry = find_entry(cache, group_index, to_find, FALSE);
2174  cache->total_reads++;
2175  if (entry == NULL)
2176    {
2177      /* no such entry found.
2178       */
2179      *buffer = NULL;
2180      *item_size = 0;
2181
2182      return SVN_NO_ERROR;
2183    }
2184
2185  size = ALIGN_VALUE(entry->size) - entry->key.key_len;
2186  *buffer = apr_palloc(result_pool, size);
2187  memcpy(*buffer, cache->data + entry->offset + entry->key.key_len, size);
2188
2189#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2190
2191  /* Check for overlapping entries.
2192   */
2193  SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2194                 entry->offset + size
2195                    <= get_entry(cache, entry->next)->offset);
2196
2197  /* Compare original content, type and key (hashes)
2198   */
2199  SVN_ERR(store_content_part(tag, *buffer, entry->size - entry->key.key_len,
2200                             result_pool));
2201  SVN_ERR(assert_equal_tags(&entry->tag, tag));
2202
2203#endif
2204
2205  /* update hit statistics
2206   */
2207  increment_hit_counters(cache, entry);
2208  *item_size = entry->size - entry->key.key_len;
2209
2210  return SVN_NO_ERROR;
2211}
2212
2213/* Look for the *ITEM identified by KEY. If no item has been stored
2214 * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called
2215 * re-construct the proper object from the serialized data.
2216 * Allocations will be done in POOL.
2217 */
2218static svn_error_t *
2219membuffer_cache_get(svn_membuffer_t *cache,
2220                    const full_key_t *key,
2221                    void **item,
2222                    svn_cache__deserialize_func_t deserializer,
2223                    DEBUG_CACHE_MEMBUFFER_TAG_ARG
2224                    apr_pool_t *result_pool)
2225{
2226  apr_uint32_t group_index;
2227  char *buffer;
2228  apr_size_t size;
2229
2230  /* find the entry group that will hold the key.
2231   */
2232  group_index = get_group_index(&cache, &key->entry_key);
2233  WITH_READ_LOCK(cache,
2234                 membuffer_cache_get_internal(cache,
2235                                              group_index,
2236                                              key,
2237                                              &buffer,
2238                                              &size,
2239                                              DEBUG_CACHE_MEMBUFFER_TAG
2240                                              result_pool));
2241
2242  /* re-construct the original data object from its serialized form.
2243   */
2244  if (buffer == NULL)
2245    {
2246      *item = NULL;
2247      return SVN_NO_ERROR;
2248    }
2249
2250  return deserializer(item, buffer, size, result_pool);
2251}
2252
2253/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2254 * by the hash value TO_FIND.  If no item has been stored for KEY, *FOUND
2255 * will be FALSE and TRUE otherwise.
2256 */
2257static svn_error_t *
2258membuffer_cache_has_key_internal(svn_membuffer_t *cache,
2259                                 apr_uint32_t group_index,
2260                                 const full_key_t *to_find,
2261                                 svn_boolean_t *found)
2262{
2263  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2264  if (entry)
2265    {
2266      /* This often be called by "block read" when most data is already
2267         in L2 and only a few previously evicted items are added to L1
2268         again.  While items in L1 are well protected for a while, L2
2269         items may get evicted soon.  Thus, mark all them as "hit" to give
2270         them a higher chance of survival. */
2271      increment_hit_counters(cache, entry);
2272      *found = TRUE;
2273    }
2274  else
2275    {
2276      *found = FALSE;
2277    }
2278
2279  return SVN_NO_ERROR;
2280}
2281
2282/* Look for an entry identified by KEY.  If no item has been stored
2283 * for KEY, *FOUND will be set to FALSE and TRUE otherwise.
2284 */
2285/* Implements svn_cache__has_key for membuffer caches.
2286 */
2287static svn_error_t *
2288membuffer_cache_has_key(svn_membuffer_t *cache,
2289                        const full_key_t *key,
2290                        svn_boolean_t *found)
2291{
2292  /* find the entry group that will hold the key.
2293   */
2294  apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2295  cache->total_reads++;
2296
2297  WITH_READ_LOCK(cache,
2298                 membuffer_cache_has_key_internal(cache,
2299                                                  group_index,
2300                                                  key,
2301                                                  found));
2302
2303  return SVN_NO_ERROR;
2304}
2305
2306/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2307 * by the hash value TO_FIND. FOUND indicates whether that entry exists.
2308 * If not found, *ITEM will be NULL.
2309 *
2310 * Otherwise, the DESERIALIZER is called with that entry and the BATON
2311 * provided and will extract the desired information. The result is set
2312 * in *ITEM. Allocations will be done in POOL.
2313 *
2314 * Note: This function requires the caller to serialization access.
2315 * Don't call it directly, call membuffer_cache_get_partial instead.
2316 */
2317static svn_error_t *
2318membuffer_cache_get_partial_internal(svn_membuffer_t *cache,
2319                                     apr_uint32_t group_index,
2320                                     const full_key_t *to_find,
2321                                     void **item,
2322                                     svn_boolean_t *found,
2323                                     svn_cache__partial_getter_func_t deserializer,
2324                                     void *baton,
2325                                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2326                                     apr_pool_t *result_pool)
2327{
2328  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2329  cache->total_reads++;
2330  if (entry == NULL)
2331    {
2332      *item = NULL;
2333      *found = FALSE;
2334
2335      return SVN_NO_ERROR;
2336    }
2337  else
2338    {
2339      const void *item_data = cache->data + entry->offset + entry->key.key_len;
2340      apr_size_t item_size = entry->size - entry->key.key_len;
2341      *found = TRUE;
2342      increment_hit_counters(cache, entry);
2343
2344#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2345
2346      /* Check for overlapping entries.
2347       */
2348      SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2349                     entry->offset + entry->size
2350                        <= get_entry(cache, entry->next)->offset);
2351
2352      /* Compare original content, type and key (hashes)
2353       */
2354      SVN_ERR(store_content_part(tag, item_data, item_size, result_pool));
2355      SVN_ERR(assert_equal_tags(&entry->tag, tag));
2356
2357#endif
2358
2359      return deserializer(item, item_data, item_size, baton, result_pool);
2360    }
2361}
2362
2363/* Look for the cache entry identified by KEY. FOUND indicates
2364 * whether that entry exists. If not found, *ITEM will be NULL. Otherwise,
2365 * the DESERIALIZER is called with that entry and the BATON provided
2366 * and will extract the desired information. The result is set in *ITEM.
2367 * Allocations will be done in POOL.
2368 */
2369static svn_error_t *
2370membuffer_cache_get_partial(svn_membuffer_t *cache,
2371                            const full_key_t *key,
2372                            void **item,
2373                            svn_boolean_t *found,
2374                            svn_cache__partial_getter_func_t deserializer,
2375                            void *baton,
2376                            DEBUG_CACHE_MEMBUFFER_TAG_ARG
2377                            apr_pool_t *result_pool)
2378{
2379  apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2380
2381  WITH_READ_LOCK(cache,
2382                 membuffer_cache_get_partial_internal
2383                     (cache, group_index, key, item, found,
2384                      deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG
2385                      result_pool));
2386
2387  return SVN_NO_ERROR;
2388}
2389
2390/* Look for the cache entry in group GROUP_INDEX of CACHE, identified
2391 * by the hash value TO_FIND. If no entry has been found, the function
2392 * returns without modifying the cache.
2393 *
2394 * Otherwise, FUNC is called with that entry and the BATON provided
2395 * and may modify the cache entry. Allocations will be done in POOL.
2396 *
2397 * Note: This function requires the caller to serialization access.
2398 * Don't call it directly, call membuffer_cache_set_partial instead.
2399 */
2400static svn_error_t *
2401membuffer_cache_set_partial_internal(svn_membuffer_t *cache,
2402                                     apr_uint32_t group_index,
2403                                     const full_key_t *to_find,
2404                                     svn_cache__partial_setter_func_t func,
2405                                     void *baton,
2406                                     DEBUG_CACHE_MEMBUFFER_TAG_ARG
2407                                     apr_pool_t *scratch_pool)
2408{
2409  /* cache item lookup
2410   */
2411  entry_t *entry = find_entry(cache, group_index, to_find, FALSE);
2412  cache->total_reads++;
2413
2414  /* this function is a no-op if the item is not in cache
2415   */
2416  if (entry != NULL)
2417    {
2418      svn_error_t *err;
2419
2420      /* access the serialized cache item */
2421      apr_size_t key_len = entry->key.key_len;
2422      void *item_data = cache->data + entry->offset + key_len;
2423      void *orig_data = item_data;
2424      apr_size_t item_size = entry->size - key_len;
2425
2426      increment_hit_counters(cache, entry);
2427      cache->total_writes++;
2428
2429#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2430
2431      /* Check for overlapping entries.
2432       */
2433      SVN_ERR_ASSERT(entry->next == NO_INDEX ||
2434                     entry->offset + entry->size
2435                        <= get_entry(cache, entry->next)->offset);
2436
2437      /* Compare original content, type and key (hashes)
2438       */
2439      SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2440      SVN_ERR(assert_equal_tags(&entry->tag, tag));
2441
2442#endif
2443
2444      /* modify it, preferably in-situ.
2445       */
2446      err = func(&item_data, &item_size, baton, scratch_pool);
2447
2448      if (err)
2449        {
2450          /* Something somewhere when wrong while FUNC was modifying the
2451           * changed item. Thus, it might have become invalid /corrupted.
2452           * We better drop that.
2453           */
2454          drop_entry(cache, entry);
2455
2456          return err;
2457        }
2458      else
2459        {
2460          /* if the modification caused a re-allocation, we need to remove
2461           * the old entry and to copy the new data back into cache.
2462           */
2463          if (item_data != orig_data)
2464            {
2465              /* Remove the old entry and try to make space for the new one.
2466               */
2467              drop_entry(cache, entry);
2468              if (   (cache->max_entry_size >= item_size + key_len)
2469                  && ensure_data_insertable_l1(cache, item_size + key_len))
2470                {
2471                  /* Write the new entry.
2472                   */
2473                  entry = find_entry(cache, group_index, to_find, TRUE);
2474                  entry->size = item_size + key_len;
2475                  entry->offset = cache->l1.current_data;
2476
2477                  if (key_len)
2478                    memcpy(cache->data + entry->offset,
2479                           to_find->full_key.data, key_len);
2480                  if (item_size)
2481                    memcpy(cache->data + entry->offset + key_len, item_data,
2482                           item_size);
2483
2484                  /* Link the entry properly.
2485                   */
2486                  insert_entry(cache, entry);
2487                }
2488            }
2489
2490#ifdef SVN_DEBUG_CACHE_MEMBUFFER
2491
2492          /* Remember original content, type and key (hashes)
2493           */
2494          SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool));
2495          memcpy(&entry->tag, tag, sizeof(*tag));
2496
2497#endif
2498        }
2499    }
2500
2501  return SVN_NO_ERROR;
2502}
2503
2504/* Look for the cache entry identified by KEY. If no entry
2505 * has been found, the function returns without modifying the cache.
2506 * Otherwise, FUNC is called with that entry and the BATON provided
2507 * and may modify the cache entry. Allocations will be done in POOL.
2508 */
2509static svn_error_t *
2510membuffer_cache_set_partial(svn_membuffer_t *cache,
2511                            const full_key_t *key,
2512                            svn_cache__partial_setter_func_t func,
2513                            void *baton,
2514                            DEBUG_CACHE_MEMBUFFER_TAG_ARG
2515                            apr_pool_t *scratch_pool)
2516{
2517  /* cache item lookup
2518   */
2519  apr_uint32_t group_index = get_group_index(&cache, &key->entry_key);
2520  WITH_WRITE_LOCK(cache,
2521                  membuffer_cache_set_partial_internal
2522                     (cache, group_index, key, func, baton,
2523                      DEBUG_CACHE_MEMBUFFER_TAG
2524                      scratch_pool));
2525
2526  /* done here -> unlock the cache
2527   */
2528  return SVN_NO_ERROR;
2529}
2530
2531/* Implement the svn_cache__t interface on top of a shared membuffer cache.
2532 *
2533 * Because membuffer caches tend to be very large, there will be rather few
2534 * of them (usually only one). Thus, the same instance shall be used as the
2535 * backend to many application-visible svn_cache__t instances. This should
2536 * also achieve global resource usage fairness.
2537 *
2538 * To accommodate items from multiple resources, the individual keys must be
2539 * unique over all sources. This is achieved by simply adding a prefix key
2540 * that unambiguously identifies the item's context (e.g. path to the
2541 * respective repository). The prefix will be set upon construction of the
2542 * svn_cache__t instance.
2543 */
2544
2545/* Internal cache structure (used in svn_cache__t.cache_internal) basically
2546 * holding the additional parameters needed to call the respective membuffer
2547 * functions.
2548 */
2549typedef struct svn_membuffer_cache_t
2550{
2551  /* this is where all our data will end up in
2552   */
2553  svn_membuffer_t *membuffer;
2554
2555  /* use this conversion function when inserting an item into the memcache
2556   */
2557  svn_cache__serialize_func_t serializer;
2558
2559  /* use this conversion function when reading an item from the memcache
2560   */
2561  svn_cache__deserialize_func_t deserializer;
2562
2563  /* Prepend this byte sequence to any key passed to us.
2564   * This makes our keys different from all keys used by svn_membuffer_cache_t
2565   * instances that we don't want to share cached data with.
2566   */
2567  full_key_t prefix;
2568
2569  /* length of the keys that will be passed to us through the
2570   * svn_cache_t interface. May be APR_HASH_KEY_STRING.
2571   */
2572  apr_ssize_t key_len;
2573
2574  /* priority class for all items written through this interface */
2575  apr_uint32_t priority;
2576
2577  /* Temporary buffer containing the hash key for the current access
2578   */
2579  full_key_t combined_key;
2580
2581  /* if enabled, this will serialize the access to this instance.
2582   */
2583  svn_mutex__t *mutex;
2584} svn_membuffer_cache_t;
2585
2586/* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should
2587 * clear the svn_membuffer_cache_t.pool to keep memory consumption in check.
2588 */
2589#define ALLOCATIONS_PER_POOL_CLEAR 10
2590
2591/* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2592 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2593 * This could replace combine_key() entirely but we actually use it only
2594 * when the quick path failed.
2595 */
2596static void
2597combine_long_key(svn_membuffer_cache_t *cache,
2598                 const void *key,
2599                 apr_ssize_t key_len)
2600{
2601  apr_uint32_t *digest_buffer;
2602  char *key_copy;
2603  apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2604  apr_size_t aligned_key_len;
2605
2606  /* handle variable-length keys */
2607  if (key_len == APR_HASH_KEY_STRING)
2608    key_len = strlen((const char *) key);
2609
2610  aligned_key_len = ALIGN_VALUE(key_len);
2611
2612  /* Combine keys. */
2613  svn_membuf__ensure(&cache->combined_key.full_key,
2614                     aligned_key_len + prefix_len);
2615
2616  key_copy = (char *)cache->combined_key.full_key.data + prefix_len;
2617  cache->combined_key.entry_key.key_len = aligned_key_len + prefix_len;
2618  memcpy(key_copy, key, key_len);
2619  memset(key_copy + key_len, 0, aligned_key_len - key_len);
2620
2621  /* Hash key into 16 bytes. */
2622  digest_buffer = (apr_uint32_t *)cache->combined_key.entry_key.fingerprint;
2623  svn__fnv1a_32x4_raw(digest_buffer, key, key_len);
2624
2625  /* Combine with prefix. */
2626  cache->combined_key.entry_key.fingerprint[0]
2627    ^= cache->prefix.entry_key.fingerprint[0];
2628  cache->combined_key.entry_key.fingerprint[1]
2629    ^= cache->prefix.entry_key.fingerprint[1];
2630}
2631
2632/* Basically calculate a hash value for KEY of length KEY_LEN, combine it
2633 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
2634 */
2635static void
2636combine_key(svn_membuffer_cache_t *cache,
2637            const void *key,
2638            apr_ssize_t key_len)
2639{
2640  /* short, fixed-size keys are the most common case */
2641  if (key_len != APR_HASH_KEY_STRING && key_len <= 16)
2642    {
2643      const apr_size_t prefix_len = cache->prefix.entry_key.key_len;
2644
2645      /* Copy of *key, padded with 0.
2646       * We put it just behind the prefix already copied into the COMBINED_KEY.
2647       * The buffer space has been allocated when the cache was created. */
2648      apr_uint64_t *data = (void *)((char *)cache->combined_key.full_key.data +
2649                                    prefix_len);
2650      assert(prefix_len <= cache->combined_key.full_key.size - 16);
2651      cache->combined_key.entry_key.key_len = prefix_len + 16;
2652
2653      data[0] = 0;
2654      data[1] = 0;
2655      memcpy(data, key, key_len);
2656
2657      /* scramble key DATA.  All of this must be reversible to prevent key
2658       * collisions.  So, we limit ourselves to xor and permutations. */
2659      data[1] = (data[1] << 27) | (data[1] >> 37);
2660      data[1] ^= data[0] & 0xffff;
2661      data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
2662
2663      /* combine with this cache's namespace */
2664      cache->combined_key.entry_key.fingerprint[0]
2665        = data[0] ^ cache->prefix.entry_key.fingerprint[0];
2666      cache->combined_key.entry_key.fingerprint[1]
2667        = data[1] ^ cache->prefix.entry_key.fingerprint[1];
2668    }
2669  else
2670    {
2671      /* longer or variably sized keys */
2672      combine_long_key(cache, key, key_len);
2673    }
2674}
2675
2676/* Implement svn_cache__vtable_t.get (not thread-safe)
2677 */
2678static svn_error_t *
2679svn_membuffer_cache_get(void **value_p,
2680                        svn_boolean_t *found,
2681                        void *cache_void,
2682                        const void *key,
2683                        apr_pool_t *result_pool)
2684{
2685  svn_membuffer_cache_t *cache = cache_void;
2686
2687  DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2688
2689  /* special case */
2690  if (key == NULL)
2691    {
2692      *value_p = NULL;
2693      *found = FALSE;
2694
2695      return SVN_NO_ERROR;
2696    }
2697
2698  /* construct the full, i.e. globally unique, key by adding
2699   * this cache instances' prefix
2700   */
2701  combine_key(cache, key, cache->key_len);
2702
2703  /* Look the item up. */
2704  SVN_ERR(membuffer_cache_get(cache->membuffer,
2705                              &cache->combined_key,
2706                              value_p,
2707                              cache->deserializer,
2708                              DEBUG_CACHE_MEMBUFFER_TAG
2709                              result_pool));
2710
2711  /* return result */
2712  *found = *value_p != NULL;
2713
2714  return SVN_NO_ERROR;
2715}
2716
2717/* Implement svn_cache__vtable_t.has_key (not thread-safe)
2718 */
2719static svn_error_t *
2720svn_membuffer_cache_has_key(svn_boolean_t *found,
2721                            void *cache_void,
2722                            const void *key,
2723                            apr_pool_t *scratch_pool)
2724{
2725  svn_membuffer_cache_t *cache = cache_void;
2726
2727  /* special case */
2728  if (key == NULL)
2729    {
2730      *found = FALSE;
2731
2732      return SVN_NO_ERROR;
2733    }
2734
2735  /* construct the full, i.e. globally unique, key by adding
2736   * this cache instances' prefix
2737   */
2738  combine_key(cache, key, cache->key_len);
2739
2740  /* Look the item up. */
2741  SVN_ERR(membuffer_cache_has_key(cache->membuffer,
2742                                  &cache->combined_key,
2743                                  found));
2744
2745  /* return result */
2746  return SVN_NO_ERROR;
2747}
2748
2749/* Implement svn_cache__vtable_t.set (not thread-safe)
2750 */
2751static svn_error_t *
2752svn_membuffer_cache_set(void *cache_void,
2753                        const void *key,
2754                        void *value,
2755                        apr_pool_t *scratch_pool)
2756{
2757  svn_membuffer_cache_t *cache = cache_void;
2758
2759  DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2760
2761  /* special case */
2762  if (key == NULL)
2763    return SVN_NO_ERROR;
2764
2765  /* construct the full, i.e. globally unique, key by adding
2766   * this cache instances' prefix
2767   */
2768  combine_key(cache, key, cache->key_len);
2769
2770  /* (probably) add the item to the cache. But there is no real guarantee
2771   * that the item will actually be cached afterwards.
2772   */
2773  return membuffer_cache_set(cache->membuffer,
2774                             &cache->combined_key,
2775                             value,
2776                             cache->serializer,
2777                             cache->priority,
2778                             DEBUG_CACHE_MEMBUFFER_TAG
2779                             scratch_pool);
2780}
2781
2782/* Implement svn_cache__vtable_t.iter as "not implemented"
2783 */
2784static svn_error_t *
2785svn_membuffer_cache_iter(svn_boolean_t *completed,
2786                          void *cache_void,
2787                          svn_iter_apr_hash_cb_t user_cb,
2788                          void *user_baton,
2789                          apr_pool_t *scratch_pool)
2790{
2791  return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL,
2792                          _("Can't iterate a membuffer-based cache"));
2793}
2794
2795/* Implement svn_cache__vtable_t.get_partial (not thread-safe)
2796 */
2797static svn_error_t *
2798svn_membuffer_cache_get_partial(void **value_p,
2799                                svn_boolean_t *found,
2800                                void *cache_void,
2801                                const void *key,
2802                                svn_cache__partial_getter_func_t func,
2803                                void *baton,
2804                                apr_pool_t *result_pool)
2805{
2806  svn_membuffer_cache_t *cache = cache_void;
2807
2808  DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool)
2809
2810  if (key == NULL)
2811    {
2812      *value_p = NULL;
2813      *found = FALSE;
2814
2815      return SVN_NO_ERROR;
2816    }
2817
2818  combine_key(cache, key, cache->key_len);
2819  SVN_ERR(membuffer_cache_get_partial(cache->membuffer,
2820                                      &cache->combined_key,
2821                                      value_p,
2822                                      found,
2823                                      func,
2824                                      baton,
2825                                      DEBUG_CACHE_MEMBUFFER_TAG
2826                                      result_pool));
2827
2828  return SVN_NO_ERROR;
2829}
2830
2831/* Implement svn_cache__vtable_t.set_partial (not thread-safe)
2832 */
2833static svn_error_t *
2834svn_membuffer_cache_set_partial(void *cache_void,
2835                                const void *key,
2836                                svn_cache__partial_setter_func_t func,
2837                                void *baton,
2838                                apr_pool_t *scratch_pool)
2839{
2840  svn_membuffer_cache_t *cache = cache_void;
2841
2842  DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool)
2843
2844  if (key != NULL)
2845    {
2846      combine_key(cache, key, cache->key_len);
2847      SVN_ERR(membuffer_cache_set_partial(cache->membuffer,
2848                                          &cache->combined_key,
2849                                          func,
2850                                          baton,
2851                                          DEBUG_CACHE_MEMBUFFER_TAG
2852                                          scratch_pool));
2853    }
2854  return SVN_NO_ERROR;
2855}
2856
2857/* Implement svn_cache__vtable_t.is_cachable
2858 * (thread-safe even without mutex)
2859 */
2860static svn_boolean_t
2861svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size)
2862{
2863  /* Don't allow extremely large element sizes. Otherwise, the cache
2864   * might by thrashed by a few extremely large entries. And the size
2865   * must be small enough to be stored in a 32 bit value.
2866   */
2867  svn_membuffer_cache_t *cache = cache_void;
2868  return cache->priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY
2869       ? cache->membuffer->l2.size >= size && MAX_ITEM_SIZE >= size
2870       : size <= cache->membuffer->max_entry_size;
2871}
2872
2873/* Add statistics of SEGMENT to INFO.  If INCLUDE_HISTOGRAM is TRUE,
2874 * accumulate index bucket fill levels in INFO->HISTOGRAM.
2875 */
2876static svn_error_t *
2877svn_membuffer_get_segment_info(svn_membuffer_t *segment,
2878                               svn_cache__info_t *info,
2879                               svn_boolean_t include_histogram)
2880{
2881  apr_uint32_t i;
2882
2883  info->data_size += segment->l1.size + segment->l2.size;
2884  info->used_size += segment->data_used;
2885  info->total_size += segment->l1.size + segment->l2.size +
2886      segment->group_count * GROUP_SIZE * sizeof(entry_t);
2887
2888  info->used_entries += segment->used_entries;
2889  info->total_entries += segment->group_count * GROUP_SIZE;
2890
2891  if (include_histogram)
2892    for (i = 0; i < segment->group_count; ++i)
2893      if (is_group_initialized(segment, i))
2894        {
2895          entry_group_t *chain_end
2896            = last_group_in_chain(segment, &segment->directory[i]);
2897          apr_size_t use
2898            = MIN(chain_end->header.used,
2899                  sizeof(info->histogram) / sizeof(info->histogram[0]) - 1);
2900          info->histogram[use]++;
2901        }
2902
2903  return SVN_NO_ERROR;
2904}
2905
2906/* Implement svn_cache__vtable_t.get_info
2907 * (thread-safe even without mutex)
2908 */
2909static svn_error_t *
2910svn_membuffer_cache_get_info(void *cache_void,
2911                             svn_cache__info_t *info,
2912                             svn_boolean_t reset,
2913                             apr_pool_t *result_pool)
2914{
2915  svn_membuffer_cache_t *cache = cache_void;
2916  apr_uint32_t i;
2917
2918  /* cache front-end specific data */
2919
2920  info->id = apr_pstrdup(result_pool, cache->prefix.full_key.data);
2921
2922  /* collect info from shared cache back-end */
2923
2924  for (i = 0; i < cache->membuffer->segment_count; ++i)
2925    {
2926      svn_membuffer_t *segment = cache->membuffer + i;
2927      WITH_READ_LOCK(segment,
2928                     svn_membuffer_get_segment_info(segment, info, FALSE));
2929    }
2930
2931  return SVN_NO_ERROR;
2932}
2933
2934
2935/* the v-table for membuffer-based caches (single-threaded access)
2936 */
2937static svn_cache__vtable_t membuffer_cache_vtable = {
2938  svn_membuffer_cache_get,
2939  svn_membuffer_cache_has_key,
2940  svn_membuffer_cache_set,
2941  svn_membuffer_cache_iter,
2942  svn_membuffer_cache_is_cachable,
2943  svn_membuffer_cache_get_partial,
2944  svn_membuffer_cache_set_partial,
2945  svn_membuffer_cache_get_info
2946};
2947
2948/* Implement svn_cache__vtable_t.get and serialize all cache access.
2949 */
2950static svn_error_t *
2951svn_membuffer_cache_get_synced(void **value_p,
2952                               svn_boolean_t *found,
2953                               void *cache_void,
2954                               const void *key,
2955                               apr_pool_t *result_pool)
2956{
2957  svn_membuffer_cache_t *cache = cache_void;
2958  SVN_MUTEX__WITH_LOCK(cache->mutex,
2959                       svn_membuffer_cache_get(value_p,
2960                                               found,
2961                                               cache_void,
2962                                               key,
2963                                               result_pool));
2964
2965  return SVN_NO_ERROR;
2966}
2967
2968/* Implement svn_cache__vtable_t.has_key and serialize all cache access.
2969 */
2970static svn_error_t *
2971svn_membuffer_cache_has_key_synced(svn_boolean_t *found,
2972                                   void *cache_void,
2973                                   const void *key,
2974                                   apr_pool_t *result_pool)
2975{
2976  svn_membuffer_cache_t *cache = cache_void;
2977  SVN_MUTEX__WITH_LOCK(cache->mutex,
2978                       svn_membuffer_cache_has_key(found,
2979                                                   cache_void,
2980                                                   key,
2981                                                   result_pool));
2982
2983  return SVN_NO_ERROR;
2984}
2985
2986/* Implement svn_cache__vtable_t.set and serialize all cache access.
2987 */
2988static svn_error_t *
2989svn_membuffer_cache_set_synced(void *cache_void,
2990                               const void *key,
2991                               void *value,
2992                               apr_pool_t *scratch_pool)
2993{
2994  svn_membuffer_cache_t *cache = cache_void;
2995  SVN_MUTEX__WITH_LOCK(cache->mutex,
2996                       svn_membuffer_cache_set(cache_void,
2997                                               key,
2998                                               value,
2999                                               scratch_pool));
3000
3001  return SVN_NO_ERROR;
3002}
3003
3004/* Implement svn_cache__vtable_t.get_partial and serialize all cache access.
3005 */
3006static svn_error_t *
3007svn_membuffer_cache_get_partial_synced(void **value_p,
3008                                       svn_boolean_t *found,
3009                                       void *cache_void,
3010                                       const void *key,
3011                                       svn_cache__partial_getter_func_t func,
3012                                       void *baton,
3013                                       apr_pool_t *result_pool)
3014{
3015  svn_membuffer_cache_t *cache = cache_void;
3016  SVN_MUTEX__WITH_LOCK(cache->mutex,
3017                       svn_membuffer_cache_get_partial(value_p,
3018                                                       found,
3019                                                       cache_void,
3020                                                       key,
3021                                                       func,
3022                                                       baton,
3023                                                       result_pool));
3024
3025  return SVN_NO_ERROR;
3026}
3027
3028/* Implement svn_cache__vtable_t.set_partial and serialize all cache access.
3029 */
3030static svn_error_t *
3031svn_membuffer_cache_set_partial_synced(void *cache_void,
3032                                       const void *key,
3033                                       svn_cache__partial_setter_func_t func,
3034                                       void *baton,
3035                                       apr_pool_t *scratch_pool)
3036{
3037  svn_membuffer_cache_t *cache = cache_void;
3038  SVN_MUTEX__WITH_LOCK(cache->mutex,
3039                       svn_membuffer_cache_set_partial(cache_void,
3040                                                       key,
3041                                                       func,
3042                                                       baton,
3043                                                       scratch_pool));
3044
3045  return SVN_NO_ERROR;
3046}
3047
3048/* the v-table for membuffer-based caches with multi-threading support)
3049 */
3050static svn_cache__vtable_t membuffer_cache_synced_vtable = {
3051  svn_membuffer_cache_get_synced,
3052  svn_membuffer_cache_has_key_synced,
3053  svn_membuffer_cache_set_synced,
3054  svn_membuffer_cache_iter,               /* no sync required */
3055  svn_membuffer_cache_is_cachable,        /* no sync required */
3056  svn_membuffer_cache_get_partial_synced,
3057  svn_membuffer_cache_set_partial_synced,
3058  svn_membuffer_cache_get_info            /* no sync required */
3059};
3060
3061/* standard serialization function for svn_stringbuf_t items.
3062 * Implements svn_cache__serialize_func_t.
3063 */
3064static svn_error_t *
3065serialize_svn_stringbuf(void **buffer,
3066                        apr_size_t *buffer_size,
3067                        void *item,
3068                        apr_pool_t *result_pool)
3069{
3070  svn_stringbuf_t *value_str = item;
3071
3072  *buffer = value_str->data;
3073  *buffer_size = value_str->len + 1;
3074
3075  return SVN_NO_ERROR;
3076}
3077
3078/* standard de-serialization function for svn_stringbuf_t items.
3079 * Implements svn_cache__deserialize_func_t.
3080 */
3081static svn_error_t *
3082deserialize_svn_stringbuf(void **item,
3083                          void *buffer,
3084                          apr_size_t buffer_size,
3085                          apr_pool_t *result_pool)
3086{
3087  svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t));
3088
3089  value_str->pool = result_pool;
3090  value_str->blocksize = buffer_size;
3091  value_str->data = buffer;
3092  value_str->len = buffer_size-1;
3093  *item = value_str;
3094
3095  return SVN_NO_ERROR;
3096}
3097
3098/* Construct a svn_cache__t object on top of a shared memcache.
3099 */
3100svn_error_t *
3101svn_cache__create_membuffer_cache(svn_cache__t **cache_p,
3102                                  svn_membuffer_t *membuffer,
3103                                  svn_cache__serialize_func_t serializer,
3104                                  svn_cache__deserialize_func_t deserializer,
3105                                  apr_ssize_t klen,
3106                                  const char *prefix,
3107                                  apr_uint32_t priority,
3108                                  svn_boolean_t thread_safe,
3109                                  apr_pool_t *result_pool,
3110                                  apr_pool_t *scratch_pool)
3111{
3112  svn_checksum_t *checksum;
3113  apr_size_t prefix_len, prefix_orig_len;
3114
3115  /* allocate the cache header structures
3116   */
3117  svn_cache__t *wrapper = apr_pcalloc(result_pool, sizeof(*wrapper));
3118  svn_membuffer_cache_t *cache = apr_pcalloc(result_pool, sizeof(*cache));
3119
3120  /* initialize our internal cache header
3121   */
3122  cache->membuffer = membuffer;
3123  cache->serializer = serializer
3124                    ? serializer
3125                    : serialize_svn_stringbuf;
3126  cache->deserializer = deserializer
3127                      ? deserializer
3128                      : deserialize_svn_stringbuf;
3129  cache->priority = priority;
3130  cache->key_len = klen;
3131
3132  SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, result_pool));
3133
3134  /* Copy the prefix into the prefix full key. Align it to ITEM_ALIGMENT.
3135   * Don't forget to include the terminating NUL. */
3136  prefix_orig_len = strlen(prefix) + 1;
3137  prefix_len = ALIGN_VALUE(prefix_orig_len);
3138
3139  svn_membuf__create(&cache->prefix.full_key, prefix_len, result_pool);
3140  memcpy((char *)cache->prefix.full_key.data, prefix, prefix_orig_len);
3141  memset((char *)cache->prefix.full_key.data + prefix_orig_len, 0,
3142         prefix_len - prefix_orig_len);
3143
3144  /* Construct the folded prefix key. */
3145  SVN_ERR(svn_checksum(&checksum,
3146                       svn_checksum_md5,
3147                       prefix,
3148                       strlen(prefix),
3149                       scratch_pool));
3150  memcpy(cache->prefix.entry_key.fingerprint, checksum->digest,
3151         sizeof(cache->prefix.entry_key.fingerprint));
3152  cache->prefix.entry_key.key_len = prefix_len;
3153
3154  /* Initialize the combined key. Pre-allocate some extra room in the full
3155   * key such that we probably don't need to re-alloc. */
3156  cache->combined_key.entry_key = cache->prefix.entry_key;
3157  svn_membuf__create(&cache->combined_key.full_key, prefix_len + 200,
3158                     result_pool);
3159  memcpy(cache->combined_key.full_key.data, cache->prefix.full_key.data,
3160         prefix_len);
3161
3162  /* initialize the generic cache wrapper
3163   */
3164  wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable
3165                                : &membuffer_cache_vtable;
3166  wrapper->cache_internal = cache;
3167  wrapper->error_handler = 0;
3168  wrapper->error_baton = 0;
3169  wrapper->pretend_empty = !!getenv("SVN_X_DOES_NOT_MARK_THE_SPOT");
3170
3171  *cache_p = wrapper;
3172  return SVN_NO_ERROR;
3173}
3174
3175static svn_error_t *
3176svn_membuffer_get_global_segment_info(svn_membuffer_t *segment,
3177                                      svn_cache__info_t *info)
3178{
3179  info->gets += segment->total_reads;
3180  info->sets += segment->total_writes;
3181  info->hits += segment->total_hits;
3182
3183  WITH_READ_LOCK(segment,
3184                  svn_membuffer_get_segment_info(segment, info, TRUE));
3185
3186  return SVN_NO_ERROR;
3187}
3188
3189svn_cache__info_t *
3190svn_cache__membuffer_get_global_info(apr_pool_t *pool)
3191{
3192  apr_uint32_t i;
3193
3194  svn_membuffer_t *membuffer = svn_cache__get_global_membuffer_cache();
3195  svn_cache__info_t *info = apr_pcalloc(pool, sizeof(*info));
3196
3197  /* cache front-end specific data */
3198
3199  info->id = "membuffer globals";
3200
3201  /* collect info from shared cache back-end */
3202
3203  for (i = 0; i < membuffer->segment_count; ++i)
3204    svn_error_clear(svn_membuffer_get_global_segment_info(membuffer + i,
3205                                                          info));
3206
3207  return info;
3208}
3209