cache-membuffer.c revision 299742
1/* 2 * cache-membuffer.c: in-memory caching for Subversion 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24#include <assert.h> 25#include <apr_md5.h> 26#include <apr_thread_rwlock.h> 27 28#include "svn_pools.h" 29#include "svn_checksum.h" 30#include "svn_private_config.h" 31#include "svn_string.h" 32#include "svn_sorts.h" /* get the MIN macro */ 33 34#include "private/svn_atomic.h" 35#include "private/svn_dep_compat.h" 36#include "private/svn_mutex.h" 37#include "private/svn_string_private.h" 38 39#include "cache.h" 40#include "fnv1a.h" 41 42/* 43 * This svn_cache__t implementation actually consists of two parts: 44 * a shared (per-process) singleton membuffer cache instance and shallow 45 * svn_cache__t front-end instances that each use different key spaces. 46 * For data management, they all forward to the singleton membuffer cache. 47 * 48 * A membuffer cache consists of two parts: 49 * 50 * 1. A linear data buffer containing cached items in a serialized 51 * representation, prefixed by their full cache keys. There may be 52 * arbitrary gaps between entries. This buffer is sub-devided into 53 * (currently two) cache levels. 54 * 55 * 2. A directory of cache entries. This is organized similar to CPU 56 * data caches: for every possible key, there is exactly one group 57 * of entries that may contain the header info for an item with 58 * that given key. The result is a GROUP_SIZE+-way associative cache 59 * whose associativity can be dynamically increased. 60 * 61 * Only the start address of these two data parts are given as a native 62 * pointer. All other references are expressed as offsets to these pointers. 63 * With that design, it is relatively easy to share the same data structure 64 * between different processes and / or to persist them on disk. These 65 * out-of-process features have not been implemented, yet. 66 * 67 * Superficially, cache levels are being used as usual: insertion happens 68 * into L1 and evictions will promote items to L2. But their whole point 69 * is a different one. L1 uses a circular buffer, i.e. we have perfect 70 * caching for the last N bytes where N is the size of L1. L2 uses a more 71 * elaborate scheme based on priorities and hit counts as described below. 72 * 73 * The data buffer usage information is implicitly given by the directory 74 * entries. Every USED entry has a reference to the previous and the next 75 * used dictionary entry and this double-linked list is ordered by the 76 * offsets of their item data within the data buffer. So removing data, 77 * for instance, is done simply by unlinking it from the chain, implicitly 78 * marking the entry as well as the data buffer section previously 79 * associated to it as unused. First and last element of that chain are 80 * being referenced from the respective cache level. 81 * 82 * Insertion can occur at only one, sliding position per cache level. It is 83 * marked by its offset in the data buffer and the index of the first used 84 * entry at or behind that position. If this gap is too small to accommodate 85 * the new item (plus its full key), the insertion window is extended as 86 * described below. The new entry will always be inserted at the bottom end 87 * of the window and since the next used entry is known, properly sorted 88 * insertion is possible. 89 * 90 * To make the cache perform robustly in a wide range of usage scenarios, 91 * L2 uses a randomized variant of LFU (see ensure_data_insertable_l2 for 92 * details). Every item holds a read hit counter and there is a global read 93 * hit counter. The more hits an entry has in relation to the average, the 94 * more it is likely to be kept using a rand()-based condition. The test is 95 * applied only to the entry following the insertion window. If it doesn't 96 * get evicted, it is moved to the begin of that window and the window is 97 * moved. 98 * 99 * Moreover, the entry's hits get halved to make that entry more likely to 100 * be removed the next time the sliding insertion / removal window comes by. 101 * As a result, frequently used entries are likely not to be dropped until 102 * they get not used for a while. Also, even a cache thrashing situation 103 * about 50% of the content survives every 50% of the cache being re-written 104 * with new entries. For details on the fine-tuning involved, see the 105 * comments in ensure_data_insertable_l2(). 106 * 107 * Due to the randomized mapping of keys to entry groups, some groups may 108 * overflow. In that case, there are spare groups that can be chained to 109 * an already used group to extend it. 110 * 111 * To limit the entry size and management overhead, not the actual item keys 112 * but only their hashed "fingerprint" will be stored. These are reasonably 113 * unique to prevent collisions, so we only need to support up to one entry 114 * per entry key. To guarantee that there are no conflicts, however, we 115 * store the actual full key immediately in front of the serialized item 116 * data. That is, the entry offset actually points to the full key and the 117 * key length stored in the entry acts as an additional offset to find the 118 * actual item. 119 * 120 * All access to the cached data needs to be serialized. Because we want 121 * to scale well despite that bottleneck, we simply segment the cache into 122 * a number of independent caches (segments). Items will be multiplexed based 123 * on their hash key. 124 */ 125 126/* APR's read-write lock implementation on Windows is horribly inefficient. 127 * Even with very low contention a runtime overhead of 35% percent has been 128 * measured for 'svn-bench null-export' over ra_serf. 129 * 130 * Use a simple mutex on Windows. Because there is one mutex per segment, 131 * large machines should (and usually can) be configured with large caches 132 * such that read contention is kept low. This is basically the situation 133 * we had before 1.8. 134 */ 135#ifdef WIN32 136# define USE_SIMPLE_MUTEX 1 137#else 138# define USE_SIMPLE_MUTEX 0 139#endif 140 141/* For more efficient copy operations, let's align all data items properly. 142 * Since we can't portably align pointers, this is rather the item size 143 * granularity which ensures *relative* alignment within the cache - still 144 * giving us decent copy speeds on most machines. 145 * 146 * Must be a power of 2. 147 */ 148#define ITEM_ALIGNMENT 16 149 150/* By default, don't create cache segments smaller than this value unless 151 * the total cache size itself is smaller. 152 */ 153#define DEFAULT_MIN_SEGMENT_SIZE APR_UINT64_C(0x2000000) 154 155/* The minimum segment size we will allow for multi-segmented caches 156 */ 157#define MIN_SEGMENT_SIZE APR_UINT64_C(0x10000) 158 159/* The maximum number of segments allowed. Larger numbers reduce the size 160 * of each segment, in turn reducing the max size of a cachable item. 161 * Also, each segment gets its own lock object. The actual number supported 162 * by the OS may therefore be lower and svn_cache__membuffer_cache_create 163 * may return an error. 164 */ 165#define MAX_SEGMENT_COUNT 0x10000 166 167/* As of today, APR won't allocate chunks of 4GB or more. So, limit the 168 * segment size to slightly below that. 169 */ 170#define MAX_SEGMENT_SIZE APR_UINT64_C(0xffff0000) 171 172/* We don't mark the initialization status for every group but initialize 173 * a number of groups at once. That will allow for a very small init flags 174 * vector that is likely to fit into the CPU caches even for fairly large 175 * membuffer caches. For instance, the default of 32 means 8x32 groups per 176 * byte, i.e. 8 flags/byte x 32 groups/flag x 8 entries/group x 40 index 177 * bytes/entry x 8 cache bytes/index byte = 1kB init vector / 640MB cache. 178 */ 179#define GROUP_INIT_GRANULARITY 32 180 181/* Invalid index reference value. Equivalent to APR_UINT32_T(-1) 182 */ 183#define NO_INDEX APR_UINT32_MAX 184 185/* To save space in our group structure, we only use 32 bit size values 186 * and, therefore, limit the size of each entry to just below 4GB. 187 * Supporting larger items is not a good idea as the data transfer 188 * to and from the cache would block other threads for a very long time. 189 */ 190#define MAX_ITEM_SIZE ((apr_uint32_t)(0 - ITEM_ALIGNMENT)) 191 192/* We use this structure to identify cache entries. There cannot be two 193 * entries with the same entry key. However unlikely, though, two different 194 * full keys (see full_key_t) may have the same entry key. That is a 195 * collision and at most one of them can be stored in the cache at any time. 196 */ 197typedef struct entry_key_t 198{ 199 /* 16 byte finger print of the full key. */ 200 apr_uint64_t fingerprint[2]; 201 202 /* Length of the full key. This value is aligned to ITEM_ALIGNMENT to 203 * make sure the subsequent item content is properly aligned. */ 204 apr_size_t key_len; 205} entry_key_t; 206 207/* A full key, i.e. the combination of the cache's key prefix with some 208 * dynamic part appended to it. It also contains its ENTRY_KEY. 209 */ 210typedef struct full_key_t 211{ 212 /* Reduced form identifying the cache entry (if such an entry exists). */ 213 entry_key_t entry_key; 214 215 /* This contains the full combination. Note that the SIZE element may 216 * be larger than ENTRY_KEY.KEY_LEN, but only the latter determines the 217 * valid key size. */ 218 svn_membuf_t full_key; 219} full_key_t; 220 221/* Debugging / corruption detection support. 222 * If you define this macro, the getter functions will performed expensive 223 * checks on the item data, requested keys and entry types. If there is 224 * a mismatch found in any of them when being compared with the values 225 * remembered in the setter function, an error will be returned. 226 */ 227#ifdef SVN_DEBUG_CACHE_MEMBUFFER 228 229/* The prefix passed to svn_cache__create_membuffer_cache() effectively 230 * defines the type of all items stored by that cache instance. We'll take 231 * the last 15 bytes + \0 as plaintext for easy identification by the dev. 232 */ 233#define PREFIX_TAIL_LEN 16 234 235/* This record will be attached to any cache entry. It tracks item data 236 * (content), key and type as hash values and is the baseline against which 237 * the getters will compare their results to detect inconsistencies. 238 */ 239typedef struct entry_tag_t 240{ 241 /* MD5 checksum over the serialized item data. 242 */ 243 unsigned char content_hash[APR_MD5_DIGESTSIZE]; 244 245 /* Hash value of the svn_cache_t instance that wrote the item 246 * (i.e. a combination of type and repository) 247 */ 248 unsigned char prefix_hash[APR_MD5_DIGESTSIZE]; 249 250 /* Note that this only covers the variable part of the key, 251 * i.e. it will be different from the full key hash used for 252 * cache indexing. 253 */ 254 unsigned char key_hash[APR_MD5_DIGESTSIZE]; 255 256 /* Last letters from of the key in human readable format 257 * (ends with the type identifier, e.g. "DAG") 258 */ 259 char prefix_tail[PREFIX_TAIL_LEN]; 260 261 /* Length of the variable key part. 262 */ 263 apr_size_t key_len; 264 265} entry_tag_t; 266 267/* Initialize all members of TAG except for the content hash. 268 */ 269static svn_error_t *store_key_part(entry_tag_t *tag, 270 const full_key_t *prefix_key, 271 const void *key, 272 apr_size_t key_len, 273 apr_pool_t *pool) 274{ 275 svn_checksum_t *checksum; 276 const char *prefix = prefix_key->full_key.data; 277 apr_size_t prefix_len = strlen(prefix); 278 279 if (prefix_len > sizeof(tag->prefix_tail)) 280 { 281 prefix += prefix_len - (sizeof(tag->prefix_tail) - 1); 282 prefix_len = sizeof(tag->prefix_tail) - 1; 283 } 284 285 SVN_ERR(svn_checksum(&checksum, 286 svn_checksum_md5, 287 key, 288 key_len, 289 pool)); 290 291 memcpy(tag->prefix_hash, prefix_key->entry_key.fingerprint, 292 sizeof(tag->prefix_hash)); 293 memcpy(tag->key_hash, checksum->digest, sizeof(tag->key_hash)); 294 295 memset(tag->prefix_tail, 0, sizeof(tag->key_hash)); 296 memcpy(tag->prefix_tail, prefix, prefix_len + 1); 297 298 tag->key_len = key_len; 299 300 return SVN_NO_ERROR; 301} 302 303/* Initialize the content hash member of TAG. 304 */ 305static svn_error_t* store_content_part(entry_tag_t *tag, 306 const void *data, 307 apr_size_t size, 308 apr_pool_t *pool) 309{ 310 svn_checksum_t *checksum; 311 SVN_ERR(svn_checksum(&checksum, 312 svn_checksum_md5, 313 data, 314 size, 315 pool)); 316 317 memcpy(tag->content_hash, checksum->digest, sizeof(tag->content_hash)); 318 319 return SVN_NO_ERROR; 320} 321 322/* Compare two tags and fail with an assertion upon differences. 323 */ 324static svn_error_t* assert_equal_tags(const entry_tag_t *lhs, 325 const entry_tag_t *rhs) 326{ 327 SVN_ERR_ASSERT(memcmp(lhs->content_hash, rhs->content_hash, 328 sizeof(lhs->content_hash)) == 0); 329 SVN_ERR_ASSERT(memcmp(lhs->prefix_hash, rhs->prefix_hash, 330 sizeof(lhs->prefix_hash)) == 0); 331 SVN_ERR_ASSERT(memcmp(lhs->key_hash, rhs->key_hash, 332 sizeof(lhs->key_hash)) == 0); 333 SVN_ERR_ASSERT(memcmp(lhs->prefix_tail, rhs->prefix_tail, 334 sizeof(lhs->prefix_tail)) == 0); 335 336 SVN_ERR_ASSERT(lhs->key_len == rhs->key_len); 337 338 return SVN_NO_ERROR; 339} 340 341/* Reoccurring code snippets. 342 */ 343 344#define DEBUG_CACHE_MEMBUFFER_TAG_ARG entry_tag_t *tag, 345 346#define DEBUG_CACHE_MEMBUFFER_TAG tag, 347 348#define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool) \ 349 entry_tag_t _tag; \ 350 entry_tag_t *tag = &_tag; \ 351 if (key) \ 352 SVN_ERR(store_key_part(tag, \ 353 &cache->prefix, \ 354 key, \ 355 cache->key_len == APR_HASH_KEY_STRING \ 356 ? strlen((const char *) key) \ 357 : cache->key_len, \ 358 pool)); 359 360#else 361 362/* Don't generate any checks if consistency checks have not been enabled. 363 */ 364#define DEBUG_CACHE_MEMBUFFER_TAG_ARG 365#define DEBUG_CACHE_MEMBUFFER_TAG 366#define DEBUG_CACHE_MEMBUFFER_INIT_TAG(pool) 367 368#endif /* SVN_DEBUG_CACHE_MEMBUFFER */ 369 370/* A single dictionary entry. Since all entries will be allocated once 371 * during cache creation, those entries might be either used or unused. 372 * An entry is used if and only if it is contained in the doubly-linked 373 * list of used entries per cache level. 374 */ 375typedef struct entry_t 376{ 377 /* Identifying the data item. Only valid for used entries. 378 */ 379 entry_key_t key; 380 381 /* The offset of the cached item's serialized data within the caches 382 * DATA buffer. 383 */ 384 apr_uint64_t offset; 385 386 /* Size of the serialized item data. May be 0. The MAX_ITEM_SIZE macro 387 * above ensures that there will be no overflows. 388 * Only valid for used entries. 389 */ 390 apr_size_t size; 391 392 /* Number of (read) hits for this entry. Will be reset upon write. 393 * Only valid for used entries. 394 */ 395 svn_atomic_t hit_count; 396 397 /* Reference to the next used entry in the order defined by offset. 398 * NO_INDEX indicates the end of the list; this entry must be referenced 399 * by the caches cache_level_t.last member. NO_INDEX also implies that 400 * the data buffer is not used beyond offset+size. 401 * Only valid for used entries. 402 */ 403 apr_uint32_t next; 404 405 /* Reference to the previous used entry in the order defined by offset. 406 * NO_INDEX indicates the end of the list; this entry must be referenced 407 * by the caches cache_level_t.first member. 408 * Only valid for used entries. 409 */ 410 apr_uint32_t previous; 411 412 /* Priority of this entry. This entry will not be replaced by lower- 413 * priority items. 414 */ 415 apr_uint32_t priority; 416#ifdef SVN_DEBUG_CACHE_MEMBUFFER 417 /* Remember type, content and key hashes. 418 */ 419 entry_tag_t tag; 420#endif 421} entry_t; 422 423/* Group header struct. 424 */ 425typedef struct group_header_t 426{ 427 /* number of entries used [0 .. USED-1] */ 428 apr_uint32_t used; 429 430 /* next group in the chain or NO_INDEX for the last. 431 * For recycleable unused spare groups, this points to the next 432 * unused spare group */ 433 apr_uint32_t next; 434 435 /* previously group in the chain or NO_INDEX for the first */ 436 apr_uint32_t previous; 437 438 /* number of elements in the chain from start to here. 439 * >= 1 for used groups, 0 for unused spare groups */ 440 apr_uint32_t chain_length; 441 442} group_header_t; 443 444/* The size of the group struct should be a power of two make sure it does 445 * not cross memory page boundaries. Since we already access the cache 446 * randomly, having two page table lookups instead of one is bad. 447 */ 448#define GROUP_BLOCK_SIZE 512 449 450/* A ~10-way associative cache seems to be a good compromise between 451 * performance (worst-case lookups) and efficiency-loss due to collisions. 452 * 453 * This value may be changed to any positive integer. 454 */ 455#define GROUP_SIZE \ 456 ((GROUP_BLOCK_SIZE - sizeof(group_header_t)) / sizeof(entry_t)) 457 458/* Maximum number of groups in a chain, i.e. a cache index group can hold 459 * up to GROUP_SIZE * MAX_GROUP_CHAIN_LENGTH entries. 460 */ 461#define MAX_GROUP_CHAIN_LENGTH 8 462 463/* We group dictionary entries to make this GROUP-SIZE-way associative. 464 */ 465typedef struct entry_group_t 466{ 467 /* group globals */ 468 group_header_t header; 469 470 /* padding and also room for future extensions */ 471 char padding[GROUP_BLOCK_SIZE - sizeof(group_header_t) 472 - sizeof(entry_t) * GROUP_SIZE]; 473 474 /* the actual entries */ 475 entry_t entries[GROUP_SIZE]; 476 477} entry_group_t; 478 479/* Per-cache level header structure. Instances of this are members of 480 * svn_membuffer_t and will use non-overlapping sections of its DATA buffer. 481 * All offset values are global / absolute to that whole buffer. 482 */ 483typedef struct cache_level_t 484{ 485 /* Reference to the first (defined by the order content in the data 486 * buffer) dictionary entry used by any data item. 487 * NO_INDEX for an empty cache. 488 */ 489 apr_uint32_t first; 490 491 /* Reference to the last (defined by the order content in the data 492 * buffer) dictionary entry used by any data item. 493 * NO_INDEX for an empty cache. 494 */ 495 apr_uint32_t last; 496 497 /* Reference to the first (defined by the order content in the data 498 * buffer) used dictionary entry behind the insertion position 499 * (current_data). If NO_INDEX, the data buffer is free starting at the 500 * current_data offset. 501 */ 502 apr_uint32_t next; 503 504 505 /* First offset in the caches DATA buffer that belongs to this level. 506 */ 507 apr_uint64_t start_offset; 508 509 /* Size of data buffer allocated to this level in bytes. Must be > 0. 510 */ 511 apr_uint64_t size; 512 513 /* Offset in the data buffer where the next insertion shall occur. 514 */ 515 apr_uint64_t current_data; 516 517} cache_level_t; 518 519/* The cache header structure. 520 */ 521struct svn_membuffer_t 522{ 523 /* Number of cache segments. Must be a power of 2. 524 Please note that this structure represents only one such segment 525 and that all segments must / will report the same values here. */ 526 apr_uint32_t segment_count; 527 528 /* The dictionary, GROUP_SIZE * (group_count + spare_group_count) 529 * entries long. Never NULL. 530 */ 531 entry_group_t *directory; 532 533 /* Flag array with group_count / GROUP_INIT_GRANULARITY _bit_ elements. 534 * Allows for efficiently marking groups as "not initialized". 535 */ 536 unsigned char *group_initialized; 537 538 /* Size of dictionary in groups. Must be > 0. 539 */ 540 apr_uint32_t group_count; 541 542 /* Total number of spare groups. 543 */ 544 apr_uint32_t spare_group_count; 545 546 /* First recycleable spare group. 547 */ 548 apr_uint32_t first_spare_group; 549 550 /* Maximum number of spare groups ever used. I.e. group index 551 * group_count + max_spare_used is the first unused spare group 552 * if first_spare_group is NO_INDEX. 553 */ 554 apr_uint32_t max_spare_used; 555 556 /* Pointer to the data buffer, data_size bytes long. Never NULL. 557 */ 558 unsigned char *data; 559 560 /* Total number of data buffer bytes in use. 561 */ 562 apr_uint64_t data_used; 563 564 /* Largest entry size that we would accept. For total cache sizes 565 * less than 4TB (sic!), this is determined by the total cache size. 566 */ 567 apr_uint64_t max_entry_size; 568 569 /* The cache levels, organized as sub-buffers. Since entries in the 570 * DIRECTORY use offsets in DATA for addressing, a cache lookup does 571 * not need to know the cache level of a specific item. Cache levels 572 * are only used to implement a hybrid insertion / eviction strategy. 573 */ 574 575 /* First cache level, i.e. most insertions happen here. Very large 576 * items might get inserted directly into L2. L1 is a strict FIFO 577 * ring buffer that does not care about item priorities. All evicted 578 * items get a chance to be promoted to L2. 579 */ 580 cache_level_t l1; 581 582 /* Second cache level, i.e. data evicted from L1 will be added here 583 * if the item is "important" enough or the L2 insertion window is large 584 * enough. 585 */ 586 cache_level_t l2; 587 588 589 /* Number of used dictionary entries, i.e. number of cached items. 590 * Purely statistical information that may be used for profiling only. 591 * Updates are not synchronized and values may be nonsensicle on some 592 * platforms. 593 */ 594 apr_uint32_t used_entries; 595 596 /* Total number of calls to membuffer_cache_get. 597 * Purely statistical information that may be used for profiling only. 598 * Updates are not synchronized and values may be nonsensicle on some 599 * platforms. 600 */ 601 apr_uint64_t total_reads; 602 603 /* Total number of calls to membuffer_cache_set. 604 * Purely statistical information that may be used for profiling only. 605 * Updates are not synchronized and values may be nonsensicle on some 606 * platforms. 607 */ 608 apr_uint64_t total_writes; 609 610 /* Total number of hits since the cache's creation. 611 * Purely statistical information that may be used for profiling only. 612 * Updates are not synchronized and values may be nonsensicle on some 613 * platforms. 614 */ 615 apr_uint64_t total_hits; 616 617#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX) 618 /* A lock for intra-process synchronization to the cache, or NULL if 619 * the cache's creator doesn't feel the cache needs to be 620 * thread-safe. 621 */ 622 svn_mutex__t *lock; 623#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX) 624 /* Same for read-write lock. */ 625 apr_thread_rwlock_t *lock; 626 627 /* If set, write access will wait until they get exclusive access. 628 * Otherwise, they will become no-ops if the segment is currently 629 * read-locked. Only used when LOCK is an r/w lock. 630 */ 631 svn_boolean_t allow_blocking_writes; 632#endif 633}; 634 635/* Align integer VALUE to the next ITEM_ALIGNMENT boundary. 636 */ 637#define ALIGN_VALUE(value) (((value) + ITEM_ALIGNMENT-1) & -ITEM_ALIGNMENT) 638 639/* If locking is supported for CACHE, acquire a read lock for it. 640 */ 641static svn_error_t * 642read_lock_cache(svn_membuffer_t *cache) 643{ 644#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX) 645 return svn_mutex__lock(cache->lock); 646#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX) 647 if (cache->lock) 648 { 649 apr_status_t status = apr_thread_rwlock_rdlock(cache->lock); 650 if (status) 651 return svn_error_wrap_apr(status, _("Can't lock cache mutex")); 652 } 653 654 return SVN_NO_ERROR; 655#else 656 return SVN_NO_ERROR; 657#endif 658} 659 660/* If locking is supported for CACHE, acquire a write lock for it. 661 * Set *SUCCESS to FALSE, if we couldn't acquire the write lock; 662 * leave it untouched otherwise. 663 */ 664static svn_error_t * 665write_lock_cache(svn_membuffer_t *cache, svn_boolean_t *success) 666{ 667#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX) 668 return svn_mutex__lock(cache->lock); 669#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX) 670 if (cache->lock) 671 { 672 apr_status_t status; 673 if (cache->allow_blocking_writes) 674 { 675 status = apr_thread_rwlock_wrlock(cache->lock); 676 } 677 else 678 { 679 status = apr_thread_rwlock_trywrlock(cache->lock); 680 if (SVN_LOCK_IS_BUSY(status)) 681 { 682 *success = FALSE; 683 status = APR_SUCCESS; 684 } 685 } 686 687 if (status) 688 return svn_error_wrap_apr(status, 689 _("Can't write-lock cache mutex")); 690 } 691 692 return SVN_NO_ERROR; 693#else 694 return SVN_NO_ERROR; 695#endif 696} 697 698/* If locking is supported for CACHE, acquire an unconditional write lock 699 * for it. 700 */ 701static svn_error_t * 702force_write_lock_cache(svn_membuffer_t *cache) 703{ 704#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX) 705 return svn_mutex__lock(cache->lock); 706#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX) 707 apr_status_t status = apr_thread_rwlock_wrlock(cache->lock); 708 if (status) 709 return svn_error_wrap_apr(status, 710 _("Can't write-lock cache mutex")); 711 712 return SVN_NO_ERROR; 713#else 714 return SVN_NO_ERROR; 715#endif 716} 717 718/* If locking is supported for CACHE, release the current lock 719 * (read or write). Return ERR upon success. 720 */ 721static svn_error_t * 722unlock_cache(svn_membuffer_t *cache, svn_error_t *err) 723{ 724#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX) 725 return svn_mutex__unlock(cache->lock, err); 726#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX) 727 if (cache->lock) 728 { 729 apr_status_t status = apr_thread_rwlock_unlock(cache->lock); 730 if (err) 731 return err; 732 733 if (status) 734 return svn_error_wrap_apr(status, _("Can't unlock cache mutex")); 735 } 736 737 return err; 738#else 739 return err; 740#endif 741} 742 743/* If supported, guard the execution of EXPR with a read lock to CACHE. 744 * The macro has been modeled after SVN_MUTEX__WITH_LOCK. 745 */ 746#define WITH_READ_LOCK(cache, expr) \ 747do { \ 748 SVN_ERR(read_lock_cache(cache)); \ 749 SVN_ERR(unlock_cache(cache, (expr))); \ 750} while (0) 751 752/* If supported, guard the execution of EXPR with a write lock to CACHE. 753 * The macro has been modeled after SVN_MUTEX__WITH_LOCK. 754 * 755 * The write lock process is complicated if we don't allow to wait for 756 * the lock: If we didn't get the lock, we may still need to remove an 757 * existing entry for the given key because that content is now stale. 758 * Once we discovered such an entry, we unconditionally do a blocking 759 * wait for the write lock. In case no old content could be found, a 760 * failing lock attempt is simply a no-op and we exit the macro. 761 */ 762#define WITH_WRITE_LOCK(cache, expr) \ 763do { \ 764 svn_boolean_t got_lock = TRUE; \ 765 SVN_ERR(write_lock_cache(cache, &got_lock)); \ 766 if (!got_lock) \ 767 { \ 768 svn_boolean_t exists; \ 769 SVN_ERR(entry_exists(cache, group_index, key, &exists)); \ 770 if (exists) \ 771 SVN_ERR(force_write_lock_cache(cache)); \ 772 else \ 773 break; \ 774 } \ 775 SVN_ERR(unlock_cache(cache, (expr))); \ 776} while (0) 777 778/* Returns 0 if the entry group identified by GROUP_INDEX in CACHE has not 779 * been initialized, yet. In that case, this group can not data. Otherwise, 780 * a non-zero value is returned. 781 */ 782static APR_INLINE unsigned char 783is_group_initialized(svn_membuffer_t *cache, apr_uint32_t group_index) 784{ 785 unsigned char flags 786 = cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)]; 787 unsigned char bit_mask 788 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8)); 789 790 return flags & bit_mask; 791} 792 793/* Initializes the section of the directory in CACHE that contains 794 * the entry group identified by GROUP_INDEX. */ 795static void 796initialize_group(svn_membuffer_t *cache, apr_uint32_t group_index) 797{ 798 unsigned char bit_mask; 799 apr_uint32_t i; 800 801 /* range of groups to initialize due to GROUP_INIT_GRANULARITY */ 802 apr_uint32_t first_index = 803 (group_index / GROUP_INIT_GRANULARITY) * GROUP_INIT_GRANULARITY; 804 apr_uint32_t last_index = first_index + GROUP_INIT_GRANULARITY; 805 if (last_index > cache->group_count + cache->spare_group_count) 806 last_index = cache->group_count + cache->spare_group_count; 807 808 for (i = first_index; i < last_index; ++i) 809 { 810 group_header_t *header = &cache->directory[i].header; 811 header->used = 0; 812 header->chain_length = 1; 813 header->next = NO_INDEX; 814 header->previous = NO_INDEX; 815 } 816 817 /* set the "initialized" bit for these groups */ 818 bit_mask 819 = (unsigned char)(1 << ((group_index / GROUP_INIT_GRANULARITY) % 8)); 820 cache->group_initialized[group_index / (8 * GROUP_INIT_GRANULARITY)] 821 |= bit_mask; 822} 823 824/* Return the next available spare group from CACHE and mark it as used. 825 * May return NULL. 826 */ 827static entry_group_t * 828allocate_spare_group(svn_membuffer_t *cache) 829{ 830 entry_group_t *group = NULL; 831 832 /* is there some ready-to-use group? */ 833 if (cache->first_spare_group != NO_INDEX) 834 { 835 group = &cache->directory[cache->first_spare_group]; 836 cache->first_spare_group = group->header.next; 837 } 838 839 /* any so far untouched spares available? */ 840 else if (cache->max_spare_used < cache->spare_group_count) 841 { 842 apr_uint32_t group_index = cache->group_count + cache->max_spare_used; 843 ++cache->max_spare_used; 844 845 if (!is_group_initialized(cache, group_index)) 846 initialize_group(cache, group_index); 847 848 group = &cache->directory[group_index]; 849 } 850 851 /* spare groups must be empty */ 852 assert(!group || !group->header.used); 853 return group; 854} 855 856/* Mark previously allocated spare group GROUP in CACHE as "unused". 857 */ 858static void 859free_spare_group(svn_membuffer_t *cache, 860 entry_group_t *group) 861{ 862 assert(group->header.used == 0); 863 assert(group->header.previous != NO_INDEX); 864 assert(group - cache->directory >= (apr_ssize_t)cache->group_count); 865 866 /* unchain */ 867 cache->directory[group->header.previous].header.next = NO_INDEX; 868 group->header.chain_length = 0; 869 group->header.previous = NO_INDEX; 870 871 /* add to chain of spares */ 872 group->header.next = cache->first_spare_group; 873 cache->first_spare_group = (apr_uint32_t) (group - cache->directory); 874} 875 876/* Follow the group chain from GROUP in CACHE to its end and return the last 877 * group. May return GROUP. 878 */ 879static entry_group_t * 880last_group_in_chain(svn_membuffer_t *cache, 881 entry_group_t *group) 882{ 883 while (group->header.next != NO_INDEX) 884 group = &cache->directory[group->header.next]; 885 886 return group; 887} 888 889/* Return the CHAIN_INDEX-th element in the group chain starting from group 890 * START_GROUP_INDEX in CACHE. 891 */ 892static entry_group_t * 893get_group(svn_membuffer_t *cache, 894 apr_uint32_t start_group_index, 895 apr_uint32_t chain_index) 896{ 897 entry_group_t *group = &cache->directory[start_group_index]; 898 for (; chain_index; --chain_index) 899 group = &cache->directory[group->header.next]; 900 901 return group; 902} 903 904/* Resolve a dictionary entry reference, i.e. return the entry 905 * for the given IDX. 906 */ 907static APR_INLINE entry_t * 908get_entry(svn_membuffer_t *cache, apr_uint32_t idx) 909{ 910 return &cache->directory[idx / GROUP_SIZE].entries[idx % GROUP_SIZE]; 911} 912 913/* Get the entry references for the given ENTRY. 914 */ 915static APR_INLINE apr_uint32_t 916get_index(svn_membuffer_t *cache, entry_t *entry) 917{ 918 apr_size_t group_index 919 = ((char *)entry - (char *)cache->directory) / sizeof(entry_group_t); 920 921 return (apr_uint32_t)group_index * GROUP_SIZE 922 + (apr_uint32_t)(entry - cache->directory[group_index].entries); 923} 924 925/* Return the cache level of ENTRY in CACHE. 926 */ 927static cache_level_t * 928get_cache_level(svn_membuffer_t *cache, entry_t *entry) 929{ 930 return entry->offset < cache->l1.size ? &cache->l1 931 : &cache->l2; 932} 933 934/* Insert ENTRY to the chain of items that belong to LEVEL in CACHE. IDX 935 * is ENTRY's item index and is only given for efficiency. The insertion 936 * takes place just before LEVEL->NEXT. *CACHE will not be modified. 937 */ 938static void 939chain_entry(svn_membuffer_t *cache, 940 cache_level_t *level, 941 entry_t *entry, 942 apr_uint32_t idx) 943{ 944 /* insert ENTRY before this item */ 945 entry_t *next = level->next == NO_INDEX 946 ? NULL 947 : get_entry(cache, level->next); 948 assert(idx == get_index(cache, entry)); 949 950 /* update entry chain 951 */ 952 entry->next = level->next; 953 if (level->first == NO_INDEX) 954 { 955 /* insert as the first entry and only in the chain 956 */ 957 entry->previous = NO_INDEX; 958 level->last = idx; 959 level->first = idx; 960 } 961 else if (next == NULL) 962 { 963 /* insert as the last entry in the chain. 964 * Note that it cannot also be at the beginning of the chain. 965 */ 966 entry->previous = level->last; 967 get_entry(cache, level->last)->next = idx; 968 level->last = idx; 969 } 970 else 971 { 972 /* insert either at the start of a non-empty list or 973 * somewhere in the middle 974 */ 975 entry->previous = next->previous; 976 next->previous = idx; 977 978 if (entry->previous != NO_INDEX) 979 get_entry(cache, entry->previous)->next = idx; 980 else 981 level->first = idx; 982 } 983} 984 985/* Remove ENTRY from the chain of items that belong to LEVEL in CACHE. IDX 986 * is ENTRY's item index and is only given for efficiency. Please note 987 * that neither *CACHE nor *ENTRY will not be modified. 988 */ 989static void 990unchain_entry(svn_membuffer_t *cache, 991 cache_level_t *level, 992 entry_t *entry, 993 apr_uint32_t idx) 994{ 995 assert(idx == get_index(cache, entry)); 996 997 /* update 998 */ 999 if (level->next == idx) 1000 level->next = entry->next; 1001 1002 /* unlink it from the chain of used entries 1003 */ 1004 if (entry->previous == NO_INDEX) 1005 level->first = entry->next; 1006 else 1007 get_entry(cache, entry->previous)->next = entry->next; 1008 1009 if (entry->next == NO_INDEX) 1010 level->last = entry->previous; 1011 else 1012 get_entry(cache, entry->next)->previous = entry->previous; 1013} 1014 1015/* Remove the used ENTRY from the CACHE, i.e. make it "unused". 1016 * In contrast to insertion, removal is possible for any entry. 1017 */ 1018static void 1019drop_entry(svn_membuffer_t *cache, entry_t *entry) 1020{ 1021 /* the group that ENTRY belongs to plus a number of useful index values 1022 */ 1023 apr_uint32_t idx = get_index(cache, entry); 1024 apr_uint32_t group_index = idx / GROUP_SIZE; 1025 entry_group_t *last_group 1026 = last_group_in_chain(cache, &cache->directory[group_index]); 1027 apr_uint32_t last_in_group 1028 = (apr_uint32_t) ((last_group - cache->directory) * GROUP_SIZE 1029 + last_group->header.used - 1); 1030 1031 cache_level_t *level = get_cache_level(cache, entry); 1032 1033 /* update global cache usage counters 1034 */ 1035 cache->used_entries--; 1036 cache->data_used -= entry->size; 1037 1038 /* extend the insertion window, if the entry happens to border it 1039 */ 1040 if (idx == level->next) 1041 level->next = entry->next; 1042 else 1043 if (entry->next == level->next) 1044 { 1045 /* insertion window starts right behind the entry to remove 1046 */ 1047 if (entry->previous == NO_INDEX) 1048 { 1049 /* remove the first entry -> insertion may start at pos 0, now */ 1050 level->current_data = level->start_offset; 1051 } 1052 else 1053 { 1054 /* insertion may start right behind the previous entry */ 1055 entry_t *previous = get_entry(cache, entry->previous); 1056 level->current_data = ALIGN_VALUE( previous->offset 1057 + previous->size); 1058 } 1059 } 1060 1061 /* unlink it from the chain of used entries 1062 */ 1063 unchain_entry(cache, level, entry, idx); 1064 1065 /* Move last entry into hole (if the removed one is not the last used). 1066 * We need to do this since all used entries are at the beginning of 1067 * the group's entries array. 1068 */ 1069 if (idx != last_in_group) 1070 { 1071 /* copy the last used entry to the removed entry's index 1072 */ 1073 *entry = last_group->entries[last_group->header.used-1]; 1074 1075 /* this ENTRY may belong to a different cache level than the entry 1076 * we have just removed */ 1077 level = get_cache_level(cache, entry); 1078 1079 /* update foreign links to new index 1080 */ 1081 if (last_in_group == level->next) 1082 level->next = idx; 1083 1084 if (entry->previous == NO_INDEX) 1085 level->first = idx; 1086 else 1087 get_entry(cache, entry->previous)->next = idx; 1088 1089 if (entry->next == NO_INDEX) 1090 level->last = idx; 1091 else 1092 get_entry(cache, entry->next)->previous = idx; 1093 } 1094 1095 /* Update the number of used entries. 1096 */ 1097 last_group->header.used--; 1098 1099 /* Release the last group in the chain if it is a spare group 1100 */ 1101 if (!last_group->header.used && last_group->header.previous != NO_INDEX) 1102 free_spare_group(cache, last_group); 1103} 1104 1105/* Insert ENTRY into the chain of used dictionary entries. The entry's 1106 * offset and size members must already have been initialized. Also, 1107 * the offset must match the beginning of the insertion window. 1108 */ 1109static void 1110insert_entry(svn_membuffer_t *cache, entry_t *entry) 1111{ 1112 /* the group that ENTRY belongs to plus a number of useful index values 1113 */ 1114 apr_uint32_t idx = get_index(cache, entry); 1115 apr_uint32_t group_index = idx / GROUP_SIZE; 1116 entry_group_t *group = &cache->directory[group_index]; 1117 cache_level_t *level = get_cache_level(cache, entry); 1118 1119 /* The entry must start at the beginning of the insertion window. 1120 * It must also be the first unused entry in the group. 1121 */ 1122 assert(entry->offset == level->current_data); 1123 assert(idx == group_index * GROUP_SIZE + group->header.used); 1124 level->current_data = ALIGN_VALUE(entry->offset + entry->size); 1125 1126 /* update usage counters 1127 */ 1128 cache->used_entries++; 1129 cache->data_used += entry->size; 1130 entry->hit_count = 0; 1131 group->header.used++; 1132 1133 /* update entry chain 1134 */ 1135 chain_entry(cache, level, entry, idx); 1136 1137 /* The current insertion position must never point outside our 1138 * data buffer. 1139 */ 1140 assert(level->current_data <= level->start_offset + level->size); 1141} 1142 1143/* Map a KEY of 16 bytes to the CACHE and group that shall contain the 1144 * respective item. 1145 */ 1146static apr_uint32_t 1147get_group_index(svn_membuffer_t **cache, 1148 const entry_key_t *key) 1149{ 1150 svn_membuffer_t *segment0 = *cache; 1151 apr_uint64_t key0 = key->fingerprint[0]; 1152 apr_uint64_t key1 = key->fingerprint[1]; 1153 1154 /* select the cache segment to use. they have all the same group_count. 1155 * Since key may not be well-distributed, pre-fold it to a smaller but 1156 * "denser" ranger. The modulus is a prime larger than the largest 1157 * counts. */ 1158 *cache = &segment0[(key1 % APR_UINT64_C(2809637) + (key0 / 37)) 1159 & (segment0->segment_count - 1)]; 1160 return (key0 % APR_UINT64_C(5030895599)) % segment0->group_count; 1161} 1162 1163/* Reduce the hit count of ENTRY and update the accumulated hit info 1164 * in CACHE accordingly. 1165 */ 1166static APR_INLINE void 1167let_entry_age(svn_membuffer_t *cache, entry_t *entry) 1168{ 1169 apr_uint32_t hits_removed = (entry->hit_count + 1) >> 1; 1170 1171 if (hits_removed) 1172 { 1173 entry->hit_count -= hits_removed; 1174 } 1175 else 1176 { 1177 entry->priority /= 2; 1178 } 1179} 1180 1181/* Return whether the keys in LHS and RHS match. 1182 */ 1183static svn_boolean_t 1184entry_keys_match(const entry_key_t *lhs, 1185 const entry_key_t *rhs) 1186{ 1187 return (lhs->fingerprint[0] == rhs->fingerprint[0]) 1188 && (lhs->fingerprint[1] == rhs->fingerprint[1]) 1189 && (lhs->key_len == rhs->key_len); 1190} 1191 1192/* Given the GROUP_INDEX that shall contain an entry with the hash key 1193 * TO_FIND, find that entry in the specified group. 1194 * 1195 * If FIND_EMPTY is not set, this function will return the one used entry 1196 * that actually matches the hash or NULL, if no such entry exists. 1197 * 1198 * If FIND_EMPTY has been set, this function will drop the one used entry 1199 * that actually matches the hash (i.e. make it fit to be replaced with 1200 * new content), an unused entry or a forcibly removed entry (if all 1201 * group entries are currently in use). The entries' hash value will be 1202 * initialized with TO_FIND. 1203 * 1204 * Note: This function requires the caller to appropriately lock the CACHE. 1205 * For FIND_EMPTY==FALSE, a read lock is required, for FIND_EMPTY==TRUE, 1206 * the write lock must have been acquired. 1207 */ 1208static entry_t * 1209find_entry(svn_membuffer_t *cache, 1210 apr_uint32_t group_index, 1211 const full_key_t *to_find, 1212 svn_boolean_t find_empty) 1213{ 1214 entry_group_t *group; 1215 entry_t *entry = NULL; 1216 apr_size_t i; 1217 1218 /* get the group that *must* contain the entry 1219 */ 1220 group = &cache->directory[group_index]; 1221 1222 /* If the entry group has not been initialized, yet, there is no data. 1223 */ 1224 if (! is_group_initialized(cache, group_index)) 1225 { 1226 if (find_empty) 1227 { 1228 initialize_group(cache, group_index); 1229 entry = &group->entries[0]; 1230 1231 /* initialize entry for the new key */ 1232 entry->key = to_find->entry_key; 1233 } 1234 1235 return entry; 1236 } 1237 1238 /* try to find the matching entry 1239 */ 1240 while (1) 1241 { 1242 for (i = 0; i < group->header.used; ++i) 1243 if (entry_keys_match(&group->entries[i].key, &to_find->entry_key)) 1244 { 1245 /* This is the only entry that _may_ contain the correct data. */ 1246 entry = &group->entries[i]; 1247 1248 /* If we want to preserve it, check that it is actual a match. */ 1249 if (!find_empty) 1250 { 1251 /* If there is no full key to compare, we are done. */ 1252 if (!entry->key.key_len) 1253 return entry; 1254 1255 /* Compare the full key. */ 1256 if (memcmp(to_find->full_key.data, 1257 cache->data + entry->offset, 1258 entry->key.key_len) == 0) 1259 return entry; 1260 1261 /* Key conflict. The entry to find cannot be anywhere else. 1262 * Therefore, it is not cached. */ 1263 return NULL; 1264 } 1265 1266 /* need to empty that entry */ 1267 drop_entry(cache, entry); 1268 if (group->header.used == GROUP_SIZE) 1269 group = last_group_in_chain(cache, group); 1270 else if (group->header.chain_length == 0) 1271 group = last_group_in_chain(cache, 1272 &cache->directory[group_index]); 1273 1274 /* No entry found (actually, none left to find). */ 1275 entry = NULL; 1276 break; 1277 } 1278 1279 /* end of chain? */ 1280 if (group->header.next == NO_INDEX) 1281 break; 1282 1283 /* only full groups may chain */ 1284 assert(group->header.used == GROUP_SIZE); 1285 group = &cache->directory[group->header.next]; 1286 } 1287 1288 /* None found. Are we looking for a free entry? 1289 */ 1290 if (find_empty) 1291 { 1292 /* There is no empty entry in the chain, try chaining a spare group. 1293 */ 1294 if ( group->header.used == GROUP_SIZE 1295 && group->header.chain_length < MAX_GROUP_CHAIN_LENGTH) 1296 { 1297 entry_group_t *new_group = allocate_spare_group(cache); 1298 if (new_group) 1299 { 1300 /* chain groups 1301 */ 1302 new_group->header.chain_length = group->header.chain_length + 1; 1303 new_group->header.previous = (apr_uint32_t) (group - 1304 cache->directory); 1305 new_group->header.next = NO_INDEX; 1306 group->header.next = (apr_uint32_t) (new_group - 1307 cache->directory); 1308 group = new_group; 1309 } 1310 } 1311 1312 /* if GROUP is still filled, we need to remove a random entry */ 1313 if (group->header.used == GROUP_SIZE) 1314 { 1315 /* every entry gets the same chance of being removed. 1316 * Otherwise, we free the first entry, fill it and 1317 * remove it again on the next occasion without considering 1318 * the other entries in this group. 1319 * 1320 * We hit only one random group instead of processing all 1321 * groups in the chain. 1322 */ 1323 cache_level_t *entry_level; 1324 int to_remove = rand() % (GROUP_SIZE * group->header.chain_length); 1325 entry_group_t *to_shrink 1326 = get_group(cache, group_index, to_remove / GROUP_SIZE); 1327 1328 entry = &to_shrink->entries[to_remove % GROUP_SIZE]; 1329 entry_level = get_cache_level(cache, entry); 1330 for (i = 0; i < GROUP_SIZE; ++i) 1331 { 1332 /* keep L1 entries whenever possible */ 1333 1334 cache_level_t *level 1335 = get_cache_level(cache, &to_shrink->entries[i]); 1336 if ( (level != entry_level && entry_level == &cache->l1) 1337 || (entry->hit_count > to_shrink->entries[i].hit_count)) 1338 { 1339 entry_level = level; 1340 entry = &to_shrink->entries[i]; 1341 } 1342 } 1343 1344 /* for the entries that don't have been removed, 1345 * reduce their hit counts to put them at a relative 1346 * disadvantage the next time. 1347 */ 1348 for (i = 0; i < GROUP_SIZE; ++i) 1349 if (entry != &to_shrink->entries[i]) 1350 let_entry_age(cache, entry); 1351 1352 drop_entry(cache, entry); 1353 } 1354 1355 /* initialize entry for the new key 1356 */ 1357 entry = &group->entries[group->header.used]; 1358 entry->key = to_find->entry_key; 1359 } 1360 1361 return entry; 1362} 1363 1364/* Move a surviving ENTRY from just behind the insertion window to 1365 * its beginning and move the insertion window up accordingly. 1366 */ 1367static void 1368move_entry(svn_membuffer_t *cache, entry_t *entry) 1369{ 1370 apr_size_t size = ALIGN_VALUE(entry->size); 1371 cache_level_t *level = get_cache_level(cache, entry); 1372 1373 /* This entry survived this cleansing run. Reset half of its 1374 * hit count so that its removal gets more likely in the next 1375 * run unless someone read / hit this entry in the meantime. 1376 */ 1377 let_entry_age(cache, entry); 1378 1379 /* Move the entry to the start of the empty / insertion section 1380 * (if it isn't there already). Size-aligned moves are legal 1381 * since all offsets and block sizes share this same alignment. 1382 * Size-aligned moves tend to be faster than non-aligned ones 1383 * because no "odd" bytes at the end need to special treatment. 1384 */ 1385 if (entry->offset != level->current_data) 1386 { 1387 memmove(cache->data + level->current_data, 1388 cache->data + entry->offset, 1389 size); 1390 entry->offset = level->current_data; 1391 } 1392 1393 /* The insertion position is now directly behind this entry. 1394 */ 1395 level->current_data = entry->offset + size; 1396 level->next = entry->next; 1397 1398 /* The current insertion position must never point outside our 1399 * data buffer. 1400 */ 1401 assert(level->current_data <= level->start_offset + level->size); 1402} 1403 1404/* Move ENTRY in CACHE from L1 to L2. 1405 */ 1406static void 1407promote_entry(svn_membuffer_t *cache, entry_t *entry) 1408{ 1409 apr_uint32_t idx = get_index(cache, entry); 1410 apr_size_t size = ALIGN_VALUE(entry->size); 1411 assert(get_cache_level(cache, entry) == &cache->l1); 1412 assert(idx == cache->l1.next); 1413 1414 /* copy item from the current location in L1 to the start of L2's 1415 * insertion window */ 1416 memmove(cache->data + cache->l2.current_data, 1417 cache->data + entry->offset, 1418 size); 1419 entry->offset = cache->l2.current_data; 1420 1421 /* The insertion position is now directly behind this entry. 1422 */ 1423 cache->l2.current_data += size; 1424 1425 /* remove ENTRY from chain of L1 entries and put it into L2 1426 */ 1427 unchain_entry(cache, &cache->l1, entry, idx); 1428 chain_entry(cache, &cache->l2, entry, idx); 1429} 1430 1431/* This function implements the cache insertion / eviction strategy for L2. 1432 * 1433 * If necessary, enlarge the insertion window of CACHE->L2 until it is at 1434 * least TO_FIT_IN->SIZE bytes long. TO_FIT_IN->SIZE must not exceed the 1435 * data buffer size allocated to CACHE->L2. IDX is the item index of 1436 * TO_FIT_IN and is given for performance reasons. 1437 * 1438 * Return TRUE if enough room could be found or made. A FALSE result 1439 * indicates that the respective item shall not be added. 1440 */ 1441static svn_boolean_t 1442ensure_data_insertable_l2(svn_membuffer_t *cache, 1443 entry_t *to_fit_in) 1444{ 1445 entry_t *entry; 1446 1447 /* accumulated size of the entries that have been removed to make 1448 * room for the new one. 1449 */ 1450 apr_size_t moved_size = 0; 1451 1452 /* count the number of entries that got moved. A single large entry 1453 * being moved is not enough to reject an insertion. 1454 */ 1455 apr_size_t moved_count = 0; 1456 1457 /* accumulated "worth" of items dropped so far */ 1458 apr_uint64_t drop_hits = 0; 1459 1460 /* estimated "worth" of the new entry */ 1461 apr_uint64_t drop_hits_limit = (to_fit_in->hit_count + 1) 1462 * (apr_uint64_t)to_fit_in->priority; 1463 1464 /* This loop will eventually terminate because every cache entry 1465 * would get dropped eventually: 1466 * 1467 * - the incoming entry is small enough to fit into L2 1468 * - every iteration either frees parts of L2 or counts the moved size 1469 * - eventually, we either moved too many items with too much total size 1470 * to accept the new entry, or made enough room in L2 for the new entry 1471 * 1472 * Low-prio items get rejected even sooner. 1473 */ 1474 while (1) 1475 { 1476 /* first offset behind the insertion window 1477 */ 1478 apr_uint64_t end = cache->l2.next == NO_INDEX 1479 ? cache->l2.start_offset + cache->l2.size 1480 : get_entry(cache, cache->l2.next)->offset; 1481 1482 /* leave function as soon as the insertion window is large enough 1483 */ 1484 if (end >= to_fit_in->size + cache->l2.current_data) 1485 return TRUE; 1486 1487 /* Don't be too eager to cache data. If a lot of data has been moved 1488 * around, the current item has probably a relatively low priority. 1489 * We must also limit the effort spent here (if even in case of faulty 1490 * heuristics). Therefore, give up after some time. 1491 */ 1492 if (moved_size > 4 * to_fit_in->size && moved_count > 7) 1493 return FALSE; 1494 1495 /* if the net worth (in weighted hits) of items removed is already 1496 * larger than what we want to insert, reject TO_FIT_IN because it 1497 * still does not fit in. */ 1498 if (drop_hits > drop_hits_limit) 1499 return FALSE; 1500 1501 /* try to enlarge the insertion window 1502 */ 1503 if (cache->l2.next == NO_INDEX) 1504 { 1505 /* We reached the end of the data buffer; restart at the beginning. 1506 * Due to the randomized nature of our LFU implementation, very 1507 * large data items may require multiple passes. Therefore, SIZE 1508 * should be restricted to significantly less than data_size. 1509 */ 1510 cache->l2.current_data = cache->l2.start_offset; 1511 cache->l2.next = cache->l2.first; 1512 } 1513 else 1514 { 1515 svn_boolean_t keep; 1516 entry = get_entry(cache, cache->l2.next); 1517 1518 if (to_fit_in->priority < SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY) 1519 { 1520 /* Low prio items can only be accepted only if the current 1521 * entry is of even lower prio and has fewer hits. 1522 */ 1523 if ( entry->priority > to_fit_in->priority 1524 || entry->hit_count > to_fit_in->hit_count) 1525 return FALSE; 1526 } 1527 1528 if (entry->priority <= SVN_CACHE__MEMBUFFER_LOW_PRIORITY) 1529 { 1530 /* Be quick to remove low-prio entries - even if the incoming 1531 * one is low-prio as well. This makes room for more important 1532 * data and replaces existing data with newly read information. 1533 */ 1534 keep = FALSE; 1535 } 1536 else 1537 { 1538 /* If the existing data is the same prio as the incoming data, 1539 * drop the existing entry if it had seen fewer (probably 0) 1540 * hits than the entry coming in from L1. In case of different 1541 * priorities, keep the current entry of it has higher prio. 1542 * The new entry may still find room by ousting other entries. 1543 */ 1544 keep = to_fit_in->priority == entry->priority 1545 ? entry->hit_count >= to_fit_in->hit_count 1546 : entry->priority > to_fit_in->priority; 1547 } 1548 1549 /* keepers or destroyers? */ 1550 if (keep) 1551 { 1552 /* Moving entries around is not for free -> track costs. */ 1553 moved_size += entry->size; 1554 moved_count++; 1555 1556 move_entry(cache, entry); 1557 } 1558 else 1559 { 1560 /* Drop the entry from the end of the insertion window. 1561 * Count the "hit importance" such that we are not sacrificing 1562 * too much of the high-hit contents. However, don't count 1563 * low-priority hits because higher prio entries will often 1564 * provide the same data but in a further stage of processing. 1565 */ 1566 if (entry->priority > SVN_CACHE__MEMBUFFER_LOW_PRIORITY) 1567 drop_hits += entry->hit_count * (apr_uint64_t)entry->priority; 1568 1569 drop_entry(cache, entry); 1570 } 1571 } 1572 } 1573 1574 /* This will never be reached. But if it was, "can't insert" was the 1575 * right answer. */ 1576} 1577 1578/* This function implements the cache insertion / eviction strategy for L1. 1579 * 1580 * If necessary, enlarge the insertion window of CACHE->L1 by promoting 1581 * entries to L2 until it is at least SIZE bytes long. 1582 * 1583 * Return TRUE if enough room could be found or made. A FALSE result 1584 * indicates that the respective item shall not be added because it is 1585 * too large. 1586 */ 1587static svn_boolean_t 1588ensure_data_insertable_l1(svn_membuffer_t *cache, apr_size_t size) 1589{ 1590 /* Guarantees that the while loop will terminate. */ 1591 if (size > cache->l1.size) 1592 return FALSE; 1593 1594 /* This loop will eventually terminate because every cache entry 1595 * would get dropped eventually. 1596 */ 1597 while (1) 1598 { 1599 /* first offset behind the insertion window 1600 */ 1601 apr_uint32_t entry_index = cache->l1.next; 1602 entry_t *entry = get_entry(cache, entry_index); 1603 apr_uint64_t end = cache->l1.next == NO_INDEX 1604 ? cache->l1.start_offset + cache->l1.size 1605 : entry->offset; 1606 1607 /* leave function as soon as the insertion window is large enough 1608 */ 1609 if (end >= size + cache->l1.current_data) 1610 return TRUE; 1611 1612 /* Enlarge the insertion window 1613 */ 1614 if (cache->l1.next == NO_INDEX) 1615 { 1616 /* We reached the end of the data buffer; restart at the beginning. 1617 * Due to the randomized nature of our LFU implementation, very 1618 * large data items may require multiple passes. Therefore, SIZE 1619 * should be restricted to significantly less than data_size. 1620 */ 1621 cache->l1.current_data = cache->l1.start_offset; 1622 cache->l1.next = cache->l1.first; 1623 } 1624 else 1625 { 1626 /* Remove the entry from the end of insertion window and promote 1627 * it to L2, if it is important enough. 1628 */ 1629 svn_boolean_t keep = ensure_data_insertable_l2(cache, entry); 1630 1631 /* We might have touched the group that contains ENTRY. Recheck. */ 1632 if (entry_index == cache->l1.next) 1633 { 1634 if (keep) 1635 promote_entry(cache, entry); 1636 else 1637 drop_entry(cache, entry); 1638 } 1639 } 1640 } 1641 1642 /* This will never be reached. But if it was, "can't insert" was the 1643 * right answer. */ 1644} 1645 1646svn_error_t * 1647svn_cache__membuffer_cache_create(svn_membuffer_t **cache, 1648 apr_size_t total_size, 1649 apr_size_t directory_size, 1650 apr_size_t segment_count, 1651 svn_boolean_t thread_safe, 1652 svn_boolean_t allow_blocking_writes, 1653 apr_pool_t *pool) 1654{ 1655 svn_membuffer_t *c; 1656 1657 apr_uint32_t seg; 1658 apr_uint32_t group_count; 1659 apr_uint32_t main_group_count; 1660 apr_uint32_t spare_group_count; 1661 apr_uint32_t group_init_size; 1662 apr_uint64_t data_size; 1663 apr_uint64_t max_entry_size; 1664 1665 /* Limit the total size (only relevant if we can address > 4GB) 1666 */ 1667#if APR_SIZEOF_VOIDP > 4 1668 if (total_size > MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT) 1669 total_size = MAX_SEGMENT_SIZE * MAX_SEGMENT_COUNT; 1670#endif 1671 1672 /* Limit the segment count 1673 */ 1674 if (segment_count > MAX_SEGMENT_COUNT) 1675 segment_count = MAX_SEGMENT_COUNT; 1676 if (segment_count * MIN_SEGMENT_SIZE > total_size) 1677 segment_count = total_size / MIN_SEGMENT_SIZE; 1678 1679 /* The segment count must be a power of two. Round it down as necessary. 1680 */ 1681 while ((segment_count & (segment_count-1)) != 0) 1682 segment_count &= segment_count-1; 1683 1684 /* if the caller hasn't provided a reasonable segment count or the above 1685 * limitations set it to 0, derive one from the absolute cache size 1686 */ 1687 if (segment_count < 1) 1688 { 1689 /* Determine a reasonable number of cache segments. Segmentation is 1690 * only useful for multi-threaded / multi-core servers as it reduces 1691 * lock contention on these systems. 1692 * 1693 * But on these systems, we can assume that ample memory has been 1694 * allocated to this cache. Smaller caches should not be segmented 1695 * as this severely limits the maximum size of cachable items. 1696 * 1697 * Segments should not be smaller than 32MB and max. cachable item 1698 * size should grow as fast as segmentation. 1699 */ 1700 1701 apr_uint32_t segment_count_shift = 0; 1702 while (((2 * DEFAULT_MIN_SEGMENT_SIZE) << (2 * segment_count_shift)) 1703 < total_size) 1704 ++segment_count_shift; 1705 1706 segment_count = (apr_size_t)1 << segment_count_shift; 1707 } 1708 1709 /* If we have an extremely large cache (>512 GB), the default segment 1710 * size may exceed the amount allocatable as one chunk. In that case, 1711 * increase segmentation until we are under the threshold. 1712 */ 1713 while ( total_size / segment_count > MAX_SEGMENT_SIZE 1714 && segment_count < MAX_SEGMENT_COUNT) 1715 segment_count *= 2; 1716 1717 /* allocate cache as an array of segments / cache objects */ 1718 c = apr_palloc(pool, segment_count * sizeof(*c)); 1719 1720 /* Split total cache size into segments of equal size 1721 */ 1722 total_size /= segment_count; 1723 directory_size /= segment_count; 1724 1725 /* prevent pathological conditions: ensure a certain minimum cache size 1726 */ 1727 if (total_size < 2 * sizeof(entry_group_t)) 1728 total_size = 2 * sizeof(entry_group_t); 1729 1730 /* adapt the dictionary size accordingly, if necessary: 1731 * It must hold at least one group and must not exceed the cache size. 1732 */ 1733 if (directory_size > total_size - sizeof(entry_group_t)) 1734 directory_size = total_size - sizeof(entry_group_t); 1735 if (directory_size < 2 * sizeof(entry_group_t)) 1736 directory_size = 2 * sizeof(entry_group_t); 1737 1738 /* limit the data size to what we can address. 1739 * Note that this cannot overflow since all values are of size_t. 1740 * Also, make it a multiple of the item placement granularity to 1741 * prevent subtle overflows. 1742 */ 1743 data_size = ALIGN_VALUE(total_size - directory_size + 1) - ITEM_ALIGNMENT; 1744 1745 /* For cache sizes > 16TB, individual cache segments will be larger 1746 * than 32GB allowing for >4GB entries. But caching chunks larger 1747 * than 4GB are simply not supported. 1748 */ 1749 max_entry_size = data_size / 8 > MAX_ITEM_SIZE 1750 ? MAX_ITEM_SIZE 1751 : data_size / 8; 1752 1753 /* to keep the entries small, we use 32 bit indexes only 1754 * -> we need to ensure that no more then 4G entries exist. 1755 * 1756 * Note, that this limit could only be exceeded in a very 1757 * theoretical setup with about 1EB of cache. 1758 */ 1759 group_count = directory_size / sizeof(entry_group_t) 1760 >= (APR_UINT32_MAX / GROUP_SIZE) 1761 ? (APR_UINT32_MAX / GROUP_SIZE) - 1 1762 : (apr_uint32_t)(directory_size / sizeof(entry_group_t)); 1763 1764 /* set some of the index directory aside as over-flow (spare) buffers */ 1765 spare_group_count = MAX(group_count / 4, 1); 1766 main_group_count = group_count - spare_group_count; 1767 assert(spare_group_count > 0 && main_group_count > 0); 1768 1769 group_init_size = 1 + group_count / (8 * GROUP_INIT_GRANULARITY); 1770 for (seg = 0; seg < segment_count; ++seg) 1771 { 1772 /* allocate buffers and initialize cache members 1773 */ 1774 c[seg].segment_count = (apr_uint32_t)segment_count; 1775 1776 c[seg].group_count = main_group_count; 1777 c[seg].spare_group_count = spare_group_count; 1778 c[seg].first_spare_group = NO_INDEX; 1779 c[seg].max_spare_used = 0; 1780 1781 c[seg].directory = apr_pcalloc(pool, 1782 group_count * sizeof(entry_group_t)); 1783 1784 /* Allocate and initialize directory entries as "not initialized", 1785 hence "unused" */ 1786 c[seg].group_initialized = apr_pcalloc(pool, group_init_size); 1787 1788 /* Allocate 1/4th of the data buffer to L1 1789 */ 1790 c[seg].l1.first = NO_INDEX; 1791 c[seg].l1.last = NO_INDEX; 1792 c[seg].l1.next = NO_INDEX; 1793 c[seg].l1.start_offset = 0; 1794 c[seg].l1.size = ALIGN_VALUE(data_size / 4); 1795 c[seg].l1.current_data = 0; 1796 1797 /* The remaining 3/4th will be used as L2 1798 */ 1799 c[seg].l2.first = NO_INDEX; 1800 c[seg].l2.last = NO_INDEX; 1801 c[seg].l2.next = NO_INDEX; 1802 c[seg].l2.start_offset = c[seg].l1.size; 1803 c[seg].l2.size = ALIGN_VALUE(data_size) - c[seg].l1.size; 1804 c[seg].l2.current_data = c[seg].l2.start_offset; 1805 1806 /* This cast is safe because DATA_SIZE <= MAX_SEGMENT_SIZE. */ 1807 c[seg].data = apr_palloc(pool, (apr_size_t)ALIGN_VALUE(data_size)); 1808 c[seg].data_used = 0; 1809 c[seg].max_entry_size = max_entry_size; 1810 1811 c[seg].used_entries = 0; 1812 c[seg].total_reads = 0; 1813 c[seg].total_writes = 0; 1814 c[seg].total_hits = 0; 1815 1816 /* were allocations successful? 1817 * If not, initialize a minimal cache structure. 1818 */ 1819 if (c[seg].data == NULL || c[seg].directory == NULL) 1820 { 1821 /* We are OOM. There is no need to proceed with "half a cache". 1822 */ 1823 return svn_error_wrap_apr(APR_ENOMEM, "OOM"); 1824 } 1825 1826#if (APR_HAS_THREADS && USE_SIMPLE_MUTEX) 1827 /* A lock for intra-process synchronization to the cache, or NULL if 1828 * the cache's creator doesn't feel the cache needs to be 1829 * thread-safe. 1830 */ 1831 SVN_ERR(svn_mutex__init(&c[seg].lock, thread_safe, pool)); 1832#elif (APR_HAS_THREADS && !USE_SIMPLE_MUTEX) 1833 /* Same for read-write lock. */ 1834 c[seg].lock = NULL; 1835 if (thread_safe) 1836 { 1837 apr_status_t status = 1838 apr_thread_rwlock_create(&(c[seg].lock), pool); 1839 if (status) 1840 return svn_error_wrap_apr(status, _("Can't create cache mutex")); 1841 } 1842 1843 /* Select the behavior of write operations. 1844 */ 1845 c[seg].allow_blocking_writes = allow_blocking_writes; 1846#endif 1847 } 1848 1849 /* done here 1850 */ 1851 *cache = c; 1852 return SVN_NO_ERROR; 1853} 1854 1855svn_error_t * 1856svn_cache__membuffer_clear(svn_membuffer_t *cache) 1857{ 1858 apr_size_t seg; 1859 apr_size_t segment_count = cache->segment_count; 1860 1861 /* Length of the group_initialized array in bytes. 1862 See also svn_cache__membuffer_cache_create(). */ 1863 apr_size_t group_init_size 1864 = 1 + (cache->group_count + cache->spare_group_count) 1865 / (8 * GROUP_INIT_GRANULARITY); 1866 1867 /* Clear segment by segment. This implies that other thread may read 1868 and write to other segments after we cleared them and before the 1869 last segment is done. 1870 1871 However, that is no different from a write request coming through 1872 right after we cleared all segments because dependencies between 1873 cache entries (recursive lookup / access locks) are not allowed. 1874 */ 1875 for (seg = 0; seg < segment_count; ++seg) 1876 { 1877 /* Unconditionally acquire the write lock. */ 1878 SVN_ERR(force_write_lock_cache(&cache[seg])); 1879 1880 /* Mark all groups as "not initialized", which implies "empty". */ 1881 cache[seg].first_spare_group = NO_INDEX; 1882 cache[seg].max_spare_used = 0; 1883 1884 memset(cache[seg].group_initialized, 0, group_init_size); 1885 1886 /* Unlink L1 contents. */ 1887 cache[seg].l1.first = NO_INDEX; 1888 cache[seg].l1.last = NO_INDEX; 1889 cache[seg].l1.next = NO_INDEX; 1890 cache[seg].l1.current_data = cache[seg].l1.start_offset; 1891 1892 /* Unlink L2 contents. */ 1893 cache[seg].l2.first = NO_INDEX; 1894 cache[seg].l2.last = NO_INDEX; 1895 cache[seg].l2.next = NO_INDEX; 1896 cache[seg].l2.current_data = cache[seg].l2.start_offset; 1897 1898 /* Reset content counters. */ 1899 cache[seg].data_used = 0; 1900 cache[seg].used_entries = 0; 1901 1902 /* Segment may be used again. */ 1903 SVN_ERR(unlock_cache(&cache[seg], SVN_NO_ERROR)); 1904 } 1905 1906 /* done here */ 1907 return SVN_NO_ERROR; 1908} 1909 1910/* Look for the cache entry in group GROUP_INDEX of CACHE, identified 1911 * by the hash value TO_FIND and set *FOUND accordingly. 1912 * 1913 * Note: This function requires the caller to serialize access. 1914 * Don't call it directly, call entry_exists instead. 1915 */ 1916static svn_error_t * 1917entry_exists_internal(svn_membuffer_t *cache, 1918 apr_uint32_t group_index, 1919 const full_key_t *to_find, 1920 svn_boolean_t *found) 1921{ 1922 *found = find_entry(cache, group_index, to_find, FALSE) != NULL; 1923 return SVN_NO_ERROR; 1924} 1925 1926/* Look for the cache entry in group GROUP_INDEX of CACHE, identified 1927 * by the hash value TO_FIND and set *FOUND accordingly. 1928 */ 1929static svn_error_t * 1930entry_exists(svn_membuffer_t *cache, 1931 apr_uint32_t group_index, 1932 const full_key_t *to_find, 1933 svn_boolean_t *found) 1934{ 1935 WITH_READ_LOCK(cache, 1936 entry_exists_internal(cache, 1937 group_index, 1938 to_find, 1939 found)); 1940 1941 return SVN_NO_ERROR; 1942} 1943 1944/* Given the SIZE and PRIORITY of a new item, return the cache level 1945 (L1 or L2) in fragment CACHE that this item shall be inserted into. 1946 If we can't find nor make enough room for the item, return NULL. 1947 */ 1948static cache_level_t * 1949select_level(svn_membuffer_t *cache, 1950 apr_size_t size, 1951 apr_uint32_t priority) 1952{ 1953 if (cache->max_entry_size >= size) 1954 { 1955 /* Small items go into L1. */ 1956 return ensure_data_insertable_l1(cache, size) 1957 ? &cache->l1 1958 : NULL; 1959 } 1960 else if ( cache->l2.size >= size 1961 && MAX_ITEM_SIZE >= size 1962 && priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY) 1963 { 1964 /* Large but important items go into L2. */ 1965 entry_t dummy_entry = { { { 0 } } }; 1966 dummy_entry.priority = priority; 1967 dummy_entry.size = size; 1968 1969 return ensure_data_insertable_l2(cache, &dummy_entry) 1970 ? &cache->l2 1971 : NULL; 1972 } 1973 1974 /* Don't cache large, unimportant items. */ 1975 return NULL; 1976} 1977 1978/* Try to insert the serialized item given in BUFFER with ITEM_SIZE 1979 * into the group GROUP_INDEX of CACHE and uniquely identify it by 1980 * hash value TO_FIND. 1981 * 1982 * However, there is no guarantee that it will actually be put into 1983 * the cache. If there is already some data associated with TO_FIND, 1984 * it will be removed from the cache even if the new data cannot 1985 * be inserted. 1986 * 1987 * Note: This function requires the caller to serialization access. 1988 * Don't call it directly, call membuffer_cache_set instead. 1989 */ 1990static svn_error_t * 1991membuffer_cache_set_internal(svn_membuffer_t *cache, 1992 const full_key_t *to_find, 1993 apr_uint32_t group_index, 1994 char *buffer, 1995 apr_size_t item_size, 1996 apr_uint32_t priority, 1997 DEBUG_CACHE_MEMBUFFER_TAG_ARG 1998 apr_pool_t *scratch_pool) 1999{ 2000 cache_level_t *level; 2001 apr_size_t size = item_size + to_find->entry_key.key_len; 2002 2003 /* first, look for a previous entry for the given key */ 2004 entry_t *entry = find_entry(cache, group_index, to_find, FALSE); 2005 2006 /* if there is an old version of that entry and the new data fits into 2007 * the old spot, just re-use that space. */ 2008 if (entry && ALIGN_VALUE(entry->size) >= size && buffer) 2009 { 2010 /* Careful! We need to cast SIZE to the full width of CACHE->DATA_USED 2011 * lest we run into trouble with 32 bit underflow *not* treated as a 2012 * negative value. 2013 */ 2014 cache->data_used += (apr_uint64_t)size - entry->size; 2015 entry->size = size; 2016 entry->priority = priority; 2017 2018#ifdef SVN_DEBUG_CACHE_MEMBUFFER 2019 2020 /* Remember original content, type and key (hashes) 2021 */ 2022 SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool)); 2023 memcpy(&entry->tag, tag, sizeof(*tag)); 2024 2025#endif 2026 2027 if (entry->key.key_len) 2028 memcpy(cache->data + entry->offset, to_find->full_key.data, 2029 entry->key.key_len); 2030 if (item_size) 2031 memcpy(cache->data + entry->offset + entry->key.key_len, buffer, 2032 item_size); 2033 2034 cache->total_writes++; 2035 return SVN_NO_ERROR; 2036 } 2037 2038 /* if necessary, enlarge the insertion window. 2039 */ 2040 level = buffer ? select_level(cache, size, priority) : NULL; 2041 if (level) 2042 { 2043 /* Remove old data for this key, if that exists. 2044 * Get an unused entry for the key and and initialize it with 2045 * the serialized item's (future) position within data buffer. 2046 */ 2047 entry = find_entry(cache, group_index, to_find, TRUE); 2048 entry->size = size; 2049 entry->offset = level->current_data; 2050 entry->priority = priority; 2051 2052#ifdef SVN_DEBUG_CACHE_MEMBUFFER 2053 2054 /* Remember original content, type and key (hashes) 2055 */ 2056 SVN_ERR(store_content_part(tag, buffer, item_size, scratch_pool)); 2057 memcpy(&entry->tag, tag, sizeof(*tag)); 2058 2059#endif 2060 2061 /* Link the entry properly. 2062 */ 2063 insert_entry(cache, entry); 2064 2065 /* Copy the serialized item data into the cache. 2066 */ 2067 if (entry->key.key_len) 2068 memcpy(cache->data + entry->offset, to_find->full_key.data, 2069 entry->key.key_len); 2070 if (item_size) 2071 memcpy(cache->data + entry->offset + entry->key.key_len, buffer, 2072 item_size); 2073 2074 cache->total_writes++; 2075 } 2076 else 2077 { 2078 /* if there is already an entry for this key, drop it. 2079 * Since ensure_data_insertable may have removed entries from 2080 * ENTRY's group, re-do the lookup. 2081 */ 2082 entry = find_entry(cache, group_index, to_find, FALSE); 2083 if (entry) 2084 drop_entry(cache, entry); 2085 } 2086 2087 return SVN_NO_ERROR; 2088} 2089 2090/* Try to insert the ITEM and use the KEY to uniquely identify it. 2091 * However, there is no guarantee that it will actually be put into 2092 * the cache. If there is already some data associated to the KEY, 2093 * it will be removed from the cache even if the new data cannot 2094 * be inserted. 2095 * 2096 * The SERIALIZER is called to transform the ITEM into a single, 2097 * flat data buffer. Temporary allocations may be done in POOL. 2098 */ 2099static svn_error_t * 2100membuffer_cache_set(svn_membuffer_t *cache, 2101 const full_key_t *key, 2102 void *item, 2103 svn_cache__serialize_func_t serializer, 2104 apr_uint32_t priority, 2105 DEBUG_CACHE_MEMBUFFER_TAG_ARG 2106 apr_pool_t *scratch_pool) 2107{ 2108 apr_uint32_t group_index; 2109 void *buffer = NULL; 2110 apr_size_t size = 0; 2111 2112 /* find the entry group that will hold the key. 2113 */ 2114 group_index = get_group_index(&cache, &key->entry_key); 2115 2116 /* Serialize data data. 2117 */ 2118 if (item) 2119 SVN_ERR(serializer(&buffer, &size, item, scratch_pool)); 2120 2121 /* The actual cache data access needs to sync'ed 2122 */ 2123 WITH_WRITE_LOCK(cache, 2124 membuffer_cache_set_internal(cache, 2125 key, 2126 group_index, 2127 buffer, 2128 size, 2129 priority, 2130 DEBUG_CACHE_MEMBUFFER_TAG 2131 scratch_pool)); 2132 return SVN_NO_ERROR; 2133} 2134 2135/* Count a hit in ENTRY within CACHE. 2136 */ 2137static void 2138increment_hit_counters(svn_membuffer_t *cache, entry_t *entry) 2139{ 2140 /* To minimize the memory footprint of the cache index, we limit local 2141 * hit counters to 32 bits. These may overflow but we don't really 2142 * care because at worst, ENTRY will be dropped from cache once every 2143 * few billion hits. */ 2144 svn_atomic_inc(&entry->hit_count); 2145 2146 /* That one is for stats only. */ 2147 cache->total_hits++; 2148} 2149 2150/* Look for the cache entry in group GROUP_INDEX of CACHE, identified 2151 * by the hash value TO_FIND. If no item has been stored for KEY, 2152 * *BUFFER will be NULL. Otherwise, return a copy of the serialized 2153 * data in *BUFFER and return its size in *ITEM_SIZE. Allocations will 2154 * be done in POOL. 2155 * 2156 * Note: This function requires the caller to serialization access. 2157 * Don't call it directly, call membuffer_cache_get instead. 2158 */ 2159static svn_error_t * 2160membuffer_cache_get_internal(svn_membuffer_t *cache, 2161 apr_uint32_t group_index, 2162 const full_key_t *to_find, 2163 char **buffer, 2164 apr_size_t *item_size, 2165 DEBUG_CACHE_MEMBUFFER_TAG_ARG 2166 apr_pool_t *result_pool) 2167{ 2168 entry_t *entry; 2169 apr_size_t size; 2170 2171 /* The actual cache data access needs to sync'ed 2172 */ 2173 entry = find_entry(cache, group_index, to_find, FALSE); 2174 cache->total_reads++; 2175 if (entry == NULL) 2176 { 2177 /* no such entry found. 2178 */ 2179 *buffer = NULL; 2180 *item_size = 0; 2181 2182 return SVN_NO_ERROR; 2183 } 2184 2185 size = ALIGN_VALUE(entry->size) - entry->key.key_len; 2186 *buffer = apr_palloc(result_pool, size); 2187 memcpy(*buffer, cache->data + entry->offset + entry->key.key_len, size); 2188 2189#ifdef SVN_DEBUG_CACHE_MEMBUFFER 2190 2191 /* Check for overlapping entries. 2192 */ 2193 SVN_ERR_ASSERT(entry->next == NO_INDEX || 2194 entry->offset + size 2195 <= get_entry(cache, entry->next)->offset); 2196 2197 /* Compare original content, type and key (hashes) 2198 */ 2199 SVN_ERR(store_content_part(tag, *buffer, entry->size - entry->key.key_len, 2200 result_pool)); 2201 SVN_ERR(assert_equal_tags(&entry->tag, tag)); 2202 2203#endif 2204 2205 /* update hit statistics 2206 */ 2207 increment_hit_counters(cache, entry); 2208 *item_size = entry->size - entry->key.key_len; 2209 2210 return SVN_NO_ERROR; 2211} 2212 2213/* Look for the *ITEM identified by KEY. If no item has been stored 2214 * for KEY, *ITEM will be NULL. Otherwise, the DESERIALIZER is called 2215 * re-construct the proper object from the serialized data. 2216 * Allocations will be done in POOL. 2217 */ 2218static svn_error_t * 2219membuffer_cache_get(svn_membuffer_t *cache, 2220 const full_key_t *key, 2221 void **item, 2222 svn_cache__deserialize_func_t deserializer, 2223 DEBUG_CACHE_MEMBUFFER_TAG_ARG 2224 apr_pool_t *result_pool) 2225{ 2226 apr_uint32_t group_index; 2227 char *buffer; 2228 apr_size_t size; 2229 2230 /* find the entry group that will hold the key. 2231 */ 2232 group_index = get_group_index(&cache, &key->entry_key); 2233 WITH_READ_LOCK(cache, 2234 membuffer_cache_get_internal(cache, 2235 group_index, 2236 key, 2237 &buffer, 2238 &size, 2239 DEBUG_CACHE_MEMBUFFER_TAG 2240 result_pool)); 2241 2242 /* re-construct the original data object from its serialized form. 2243 */ 2244 if (buffer == NULL) 2245 { 2246 *item = NULL; 2247 return SVN_NO_ERROR; 2248 } 2249 2250 return deserializer(item, buffer, size, result_pool); 2251} 2252 2253/* Look for the cache entry in group GROUP_INDEX of CACHE, identified 2254 * by the hash value TO_FIND. If no item has been stored for KEY, *FOUND 2255 * will be FALSE and TRUE otherwise. 2256 */ 2257static svn_error_t * 2258membuffer_cache_has_key_internal(svn_membuffer_t *cache, 2259 apr_uint32_t group_index, 2260 const full_key_t *to_find, 2261 svn_boolean_t *found) 2262{ 2263 entry_t *entry = find_entry(cache, group_index, to_find, FALSE); 2264 if (entry) 2265 { 2266 /* This often be called by "block read" when most data is already 2267 in L2 and only a few previously evicted items are added to L1 2268 again. While items in L1 are well protected for a while, L2 2269 items may get evicted soon. Thus, mark all them as "hit" to give 2270 them a higher chance of survival. */ 2271 increment_hit_counters(cache, entry); 2272 *found = TRUE; 2273 } 2274 else 2275 { 2276 *found = FALSE; 2277 } 2278 2279 return SVN_NO_ERROR; 2280} 2281 2282/* Look for an entry identified by KEY. If no item has been stored 2283 * for KEY, *FOUND will be set to FALSE and TRUE otherwise. 2284 */ 2285/* Implements svn_cache__has_key for membuffer caches. 2286 */ 2287static svn_error_t * 2288membuffer_cache_has_key(svn_membuffer_t *cache, 2289 const full_key_t *key, 2290 svn_boolean_t *found) 2291{ 2292 /* find the entry group that will hold the key. 2293 */ 2294 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key); 2295 cache->total_reads++; 2296 2297 WITH_READ_LOCK(cache, 2298 membuffer_cache_has_key_internal(cache, 2299 group_index, 2300 key, 2301 found)); 2302 2303 return SVN_NO_ERROR; 2304} 2305 2306/* Look for the cache entry in group GROUP_INDEX of CACHE, identified 2307 * by the hash value TO_FIND. FOUND indicates whether that entry exists. 2308 * If not found, *ITEM will be NULL. 2309 * 2310 * Otherwise, the DESERIALIZER is called with that entry and the BATON 2311 * provided and will extract the desired information. The result is set 2312 * in *ITEM. Allocations will be done in POOL. 2313 * 2314 * Note: This function requires the caller to serialization access. 2315 * Don't call it directly, call membuffer_cache_get_partial instead. 2316 */ 2317static svn_error_t * 2318membuffer_cache_get_partial_internal(svn_membuffer_t *cache, 2319 apr_uint32_t group_index, 2320 const full_key_t *to_find, 2321 void **item, 2322 svn_boolean_t *found, 2323 svn_cache__partial_getter_func_t deserializer, 2324 void *baton, 2325 DEBUG_CACHE_MEMBUFFER_TAG_ARG 2326 apr_pool_t *result_pool) 2327{ 2328 entry_t *entry = find_entry(cache, group_index, to_find, FALSE); 2329 cache->total_reads++; 2330 if (entry == NULL) 2331 { 2332 *item = NULL; 2333 *found = FALSE; 2334 2335 return SVN_NO_ERROR; 2336 } 2337 else 2338 { 2339 const void *item_data = cache->data + entry->offset + entry->key.key_len; 2340 apr_size_t item_size = entry->size - entry->key.key_len; 2341 *found = TRUE; 2342 increment_hit_counters(cache, entry); 2343 2344#ifdef SVN_DEBUG_CACHE_MEMBUFFER 2345 2346 /* Check for overlapping entries. 2347 */ 2348 SVN_ERR_ASSERT(entry->next == NO_INDEX || 2349 entry->offset + entry->size 2350 <= get_entry(cache, entry->next)->offset); 2351 2352 /* Compare original content, type and key (hashes) 2353 */ 2354 SVN_ERR(store_content_part(tag, item_data, item_size, result_pool)); 2355 SVN_ERR(assert_equal_tags(&entry->tag, tag)); 2356 2357#endif 2358 2359 return deserializer(item, item_data, item_size, baton, result_pool); 2360 } 2361} 2362 2363/* Look for the cache entry identified by KEY. FOUND indicates 2364 * whether that entry exists. If not found, *ITEM will be NULL. Otherwise, 2365 * the DESERIALIZER is called with that entry and the BATON provided 2366 * and will extract the desired information. The result is set in *ITEM. 2367 * Allocations will be done in POOL. 2368 */ 2369static svn_error_t * 2370membuffer_cache_get_partial(svn_membuffer_t *cache, 2371 const full_key_t *key, 2372 void **item, 2373 svn_boolean_t *found, 2374 svn_cache__partial_getter_func_t deserializer, 2375 void *baton, 2376 DEBUG_CACHE_MEMBUFFER_TAG_ARG 2377 apr_pool_t *result_pool) 2378{ 2379 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key); 2380 2381 WITH_READ_LOCK(cache, 2382 membuffer_cache_get_partial_internal 2383 (cache, group_index, key, item, found, 2384 deserializer, baton, DEBUG_CACHE_MEMBUFFER_TAG 2385 result_pool)); 2386 2387 return SVN_NO_ERROR; 2388} 2389 2390/* Look for the cache entry in group GROUP_INDEX of CACHE, identified 2391 * by the hash value TO_FIND. If no entry has been found, the function 2392 * returns without modifying the cache. 2393 * 2394 * Otherwise, FUNC is called with that entry and the BATON provided 2395 * and may modify the cache entry. Allocations will be done in POOL. 2396 * 2397 * Note: This function requires the caller to serialization access. 2398 * Don't call it directly, call membuffer_cache_set_partial instead. 2399 */ 2400static svn_error_t * 2401membuffer_cache_set_partial_internal(svn_membuffer_t *cache, 2402 apr_uint32_t group_index, 2403 const full_key_t *to_find, 2404 svn_cache__partial_setter_func_t func, 2405 void *baton, 2406 DEBUG_CACHE_MEMBUFFER_TAG_ARG 2407 apr_pool_t *scratch_pool) 2408{ 2409 /* cache item lookup 2410 */ 2411 entry_t *entry = find_entry(cache, group_index, to_find, FALSE); 2412 cache->total_reads++; 2413 2414 /* this function is a no-op if the item is not in cache 2415 */ 2416 if (entry != NULL) 2417 { 2418 svn_error_t *err; 2419 2420 /* access the serialized cache item */ 2421 apr_size_t key_len = entry->key.key_len; 2422 void *item_data = cache->data + entry->offset + key_len; 2423 void *orig_data = item_data; 2424 apr_size_t item_size = entry->size - key_len; 2425 2426 increment_hit_counters(cache, entry); 2427 cache->total_writes++; 2428 2429#ifdef SVN_DEBUG_CACHE_MEMBUFFER 2430 2431 /* Check for overlapping entries. 2432 */ 2433 SVN_ERR_ASSERT(entry->next == NO_INDEX || 2434 entry->offset + entry->size 2435 <= get_entry(cache, entry->next)->offset); 2436 2437 /* Compare original content, type and key (hashes) 2438 */ 2439 SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool)); 2440 SVN_ERR(assert_equal_tags(&entry->tag, tag)); 2441 2442#endif 2443 2444 /* modify it, preferably in-situ. 2445 */ 2446 err = func(&item_data, &item_size, baton, scratch_pool); 2447 2448 if (err) 2449 { 2450 /* Something somewhere when wrong while FUNC was modifying the 2451 * changed item. Thus, it might have become invalid /corrupted. 2452 * We better drop that. 2453 */ 2454 drop_entry(cache, entry); 2455 2456 return err; 2457 } 2458 else 2459 { 2460 /* if the modification caused a re-allocation, we need to remove 2461 * the old entry and to copy the new data back into cache. 2462 */ 2463 if (item_data != orig_data) 2464 { 2465 /* Remove the old entry and try to make space for the new one. 2466 */ 2467 drop_entry(cache, entry); 2468 if ( (cache->max_entry_size >= item_size + key_len) 2469 && ensure_data_insertable_l1(cache, item_size + key_len)) 2470 { 2471 /* Write the new entry. 2472 */ 2473 entry = find_entry(cache, group_index, to_find, TRUE); 2474 entry->size = item_size + key_len; 2475 entry->offset = cache->l1.current_data; 2476 2477 if (key_len) 2478 memcpy(cache->data + entry->offset, 2479 to_find->full_key.data, key_len); 2480 if (item_size) 2481 memcpy(cache->data + entry->offset + key_len, item_data, 2482 item_size); 2483 2484 /* Link the entry properly. 2485 */ 2486 insert_entry(cache, entry); 2487 } 2488 } 2489 2490#ifdef SVN_DEBUG_CACHE_MEMBUFFER 2491 2492 /* Remember original content, type and key (hashes) 2493 */ 2494 SVN_ERR(store_content_part(tag, item_data, item_size, scratch_pool)); 2495 memcpy(&entry->tag, tag, sizeof(*tag)); 2496 2497#endif 2498 } 2499 } 2500 2501 return SVN_NO_ERROR; 2502} 2503 2504/* Look for the cache entry identified by KEY. If no entry 2505 * has been found, the function returns without modifying the cache. 2506 * Otherwise, FUNC is called with that entry and the BATON provided 2507 * and may modify the cache entry. Allocations will be done in POOL. 2508 */ 2509static svn_error_t * 2510membuffer_cache_set_partial(svn_membuffer_t *cache, 2511 const full_key_t *key, 2512 svn_cache__partial_setter_func_t func, 2513 void *baton, 2514 DEBUG_CACHE_MEMBUFFER_TAG_ARG 2515 apr_pool_t *scratch_pool) 2516{ 2517 /* cache item lookup 2518 */ 2519 apr_uint32_t group_index = get_group_index(&cache, &key->entry_key); 2520 WITH_WRITE_LOCK(cache, 2521 membuffer_cache_set_partial_internal 2522 (cache, group_index, key, func, baton, 2523 DEBUG_CACHE_MEMBUFFER_TAG 2524 scratch_pool)); 2525 2526 /* done here -> unlock the cache 2527 */ 2528 return SVN_NO_ERROR; 2529} 2530 2531/* Implement the svn_cache__t interface on top of a shared membuffer cache. 2532 * 2533 * Because membuffer caches tend to be very large, there will be rather few 2534 * of them (usually only one). Thus, the same instance shall be used as the 2535 * backend to many application-visible svn_cache__t instances. This should 2536 * also achieve global resource usage fairness. 2537 * 2538 * To accommodate items from multiple resources, the individual keys must be 2539 * unique over all sources. This is achieved by simply adding a prefix key 2540 * that unambiguously identifies the item's context (e.g. path to the 2541 * respective repository). The prefix will be set upon construction of the 2542 * svn_cache__t instance. 2543 */ 2544 2545/* Internal cache structure (used in svn_cache__t.cache_internal) basically 2546 * holding the additional parameters needed to call the respective membuffer 2547 * functions. 2548 */ 2549typedef struct svn_membuffer_cache_t 2550{ 2551 /* this is where all our data will end up in 2552 */ 2553 svn_membuffer_t *membuffer; 2554 2555 /* use this conversion function when inserting an item into the memcache 2556 */ 2557 svn_cache__serialize_func_t serializer; 2558 2559 /* use this conversion function when reading an item from the memcache 2560 */ 2561 svn_cache__deserialize_func_t deserializer; 2562 2563 /* Prepend this byte sequence to any key passed to us. 2564 * This makes our keys different from all keys used by svn_membuffer_cache_t 2565 * instances that we don't want to share cached data with. 2566 */ 2567 full_key_t prefix; 2568 2569 /* length of the keys that will be passed to us through the 2570 * svn_cache_t interface. May be APR_HASH_KEY_STRING. 2571 */ 2572 apr_ssize_t key_len; 2573 2574 /* priority class for all items written through this interface */ 2575 apr_uint32_t priority; 2576 2577 /* Temporary buffer containing the hash key for the current access 2578 */ 2579 full_key_t combined_key; 2580 2581 /* if enabled, this will serialize the access to this instance. 2582 */ 2583 svn_mutex__t *mutex; 2584} svn_membuffer_cache_t; 2585 2586/* After an estimated ALLOCATIONS_PER_POOL_CLEAR allocations, we should 2587 * clear the svn_membuffer_cache_t.pool to keep memory consumption in check. 2588 */ 2589#define ALLOCATIONS_PER_POOL_CLEAR 10 2590 2591/* Basically calculate a hash value for KEY of length KEY_LEN, combine it 2592 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY. 2593 * This could replace combine_key() entirely but we actually use it only 2594 * when the quick path failed. 2595 */ 2596static void 2597combine_long_key(svn_membuffer_cache_t *cache, 2598 const void *key, 2599 apr_ssize_t key_len) 2600{ 2601 apr_uint32_t *digest_buffer; 2602 char *key_copy; 2603 apr_size_t prefix_len = cache->prefix.entry_key.key_len; 2604 apr_size_t aligned_key_len; 2605 2606 /* handle variable-length keys */ 2607 if (key_len == APR_HASH_KEY_STRING) 2608 key_len = strlen((const char *) key); 2609 2610 aligned_key_len = ALIGN_VALUE(key_len); 2611 2612 /* Combine keys. */ 2613 svn_membuf__ensure(&cache->combined_key.full_key, 2614 aligned_key_len + prefix_len); 2615 2616 key_copy = (char *)cache->combined_key.full_key.data + prefix_len; 2617 cache->combined_key.entry_key.key_len = aligned_key_len + prefix_len; 2618 memcpy(key_copy, key, key_len); 2619 memset(key_copy + key_len, 0, aligned_key_len - key_len); 2620 2621 /* Hash key into 16 bytes. */ 2622 digest_buffer = (apr_uint32_t *)cache->combined_key.entry_key.fingerprint; 2623 svn__fnv1a_32x4_raw(digest_buffer, key, key_len); 2624 2625 /* Combine with prefix. */ 2626 cache->combined_key.entry_key.fingerprint[0] 2627 ^= cache->prefix.entry_key.fingerprint[0]; 2628 cache->combined_key.entry_key.fingerprint[1] 2629 ^= cache->prefix.entry_key.fingerprint[1]; 2630} 2631 2632/* Basically calculate a hash value for KEY of length KEY_LEN, combine it 2633 * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY. 2634 */ 2635static void 2636combine_key(svn_membuffer_cache_t *cache, 2637 const void *key, 2638 apr_ssize_t key_len) 2639{ 2640 /* short, fixed-size keys are the most common case */ 2641 if (key_len != APR_HASH_KEY_STRING && key_len <= 16) 2642 { 2643 const apr_size_t prefix_len = cache->prefix.entry_key.key_len; 2644 2645 /* Copy of *key, padded with 0. 2646 * We put it just behind the prefix already copied into the COMBINED_KEY. 2647 * The buffer space has been allocated when the cache was created. */ 2648 apr_uint64_t *data = (void *)((char *)cache->combined_key.full_key.data + 2649 prefix_len); 2650 assert(prefix_len <= cache->combined_key.full_key.size - 16); 2651 cache->combined_key.entry_key.key_len = prefix_len + 16; 2652 2653 data[0] = 0; 2654 data[1] = 0; 2655 memcpy(data, key, key_len); 2656 2657 /* scramble key DATA. All of this must be reversible to prevent key 2658 * collisions. So, we limit ourselves to xor and permutations. */ 2659 data[1] = (data[1] << 27) | (data[1] >> 37); 2660 data[1] ^= data[0] & 0xffff; 2661 data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000); 2662 2663 /* combine with this cache's namespace */ 2664 cache->combined_key.entry_key.fingerprint[0] 2665 = data[0] ^ cache->prefix.entry_key.fingerprint[0]; 2666 cache->combined_key.entry_key.fingerprint[1] 2667 = data[1] ^ cache->prefix.entry_key.fingerprint[1]; 2668 } 2669 else 2670 { 2671 /* longer or variably sized keys */ 2672 combine_long_key(cache, key, key_len); 2673 } 2674} 2675 2676/* Implement svn_cache__vtable_t.get (not thread-safe) 2677 */ 2678static svn_error_t * 2679svn_membuffer_cache_get(void **value_p, 2680 svn_boolean_t *found, 2681 void *cache_void, 2682 const void *key, 2683 apr_pool_t *result_pool) 2684{ 2685 svn_membuffer_cache_t *cache = cache_void; 2686 2687 DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool) 2688 2689 /* special case */ 2690 if (key == NULL) 2691 { 2692 *value_p = NULL; 2693 *found = FALSE; 2694 2695 return SVN_NO_ERROR; 2696 } 2697 2698 /* construct the full, i.e. globally unique, key by adding 2699 * this cache instances' prefix 2700 */ 2701 combine_key(cache, key, cache->key_len); 2702 2703 /* Look the item up. */ 2704 SVN_ERR(membuffer_cache_get(cache->membuffer, 2705 &cache->combined_key, 2706 value_p, 2707 cache->deserializer, 2708 DEBUG_CACHE_MEMBUFFER_TAG 2709 result_pool)); 2710 2711 /* return result */ 2712 *found = *value_p != NULL; 2713 2714 return SVN_NO_ERROR; 2715} 2716 2717/* Implement svn_cache__vtable_t.has_key (not thread-safe) 2718 */ 2719static svn_error_t * 2720svn_membuffer_cache_has_key(svn_boolean_t *found, 2721 void *cache_void, 2722 const void *key, 2723 apr_pool_t *scratch_pool) 2724{ 2725 svn_membuffer_cache_t *cache = cache_void; 2726 2727 /* special case */ 2728 if (key == NULL) 2729 { 2730 *found = FALSE; 2731 2732 return SVN_NO_ERROR; 2733 } 2734 2735 /* construct the full, i.e. globally unique, key by adding 2736 * this cache instances' prefix 2737 */ 2738 combine_key(cache, key, cache->key_len); 2739 2740 /* Look the item up. */ 2741 SVN_ERR(membuffer_cache_has_key(cache->membuffer, 2742 &cache->combined_key, 2743 found)); 2744 2745 /* return result */ 2746 return SVN_NO_ERROR; 2747} 2748 2749/* Implement svn_cache__vtable_t.set (not thread-safe) 2750 */ 2751static svn_error_t * 2752svn_membuffer_cache_set(void *cache_void, 2753 const void *key, 2754 void *value, 2755 apr_pool_t *scratch_pool) 2756{ 2757 svn_membuffer_cache_t *cache = cache_void; 2758 2759 DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool) 2760 2761 /* special case */ 2762 if (key == NULL) 2763 return SVN_NO_ERROR; 2764 2765 /* construct the full, i.e. globally unique, key by adding 2766 * this cache instances' prefix 2767 */ 2768 combine_key(cache, key, cache->key_len); 2769 2770 /* (probably) add the item to the cache. But there is no real guarantee 2771 * that the item will actually be cached afterwards. 2772 */ 2773 return membuffer_cache_set(cache->membuffer, 2774 &cache->combined_key, 2775 value, 2776 cache->serializer, 2777 cache->priority, 2778 DEBUG_CACHE_MEMBUFFER_TAG 2779 scratch_pool); 2780} 2781 2782/* Implement svn_cache__vtable_t.iter as "not implemented" 2783 */ 2784static svn_error_t * 2785svn_membuffer_cache_iter(svn_boolean_t *completed, 2786 void *cache_void, 2787 svn_iter_apr_hash_cb_t user_cb, 2788 void *user_baton, 2789 apr_pool_t *scratch_pool) 2790{ 2791 return svn_error_create(SVN_ERR_UNSUPPORTED_FEATURE, NULL, 2792 _("Can't iterate a membuffer-based cache")); 2793} 2794 2795/* Implement svn_cache__vtable_t.get_partial (not thread-safe) 2796 */ 2797static svn_error_t * 2798svn_membuffer_cache_get_partial(void **value_p, 2799 svn_boolean_t *found, 2800 void *cache_void, 2801 const void *key, 2802 svn_cache__partial_getter_func_t func, 2803 void *baton, 2804 apr_pool_t *result_pool) 2805{ 2806 svn_membuffer_cache_t *cache = cache_void; 2807 2808 DEBUG_CACHE_MEMBUFFER_INIT_TAG(result_pool) 2809 2810 if (key == NULL) 2811 { 2812 *value_p = NULL; 2813 *found = FALSE; 2814 2815 return SVN_NO_ERROR; 2816 } 2817 2818 combine_key(cache, key, cache->key_len); 2819 SVN_ERR(membuffer_cache_get_partial(cache->membuffer, 2820 &cache->combined_key, 2821 value_p, 2822 found, 2823 func, 2824 baton, 2825 DEBUG_CACHE_MEMBUFFER_TAG 2826 result_pool)); 2827 2828 return SVN_NO_ERROR; 2829} 2830 2831/* Implement svn_cache__vtable_t.set_partial (not thread-safe) 2832 */ 2833static svn_error_t * 2834svn_membuffer_cache_set_partial(void *cache_void, 2835 const void *key, 2836 svn_cache__partial_setter_func_t func, 2837 void *baton, 2838 apr_pool_t *scratch_pool) 2839{ 2840 svn_membuffer_cache_t *cache = cache_void; 2841 2842 DEBUG_CACHE_MEMBUFFER_INIT_TAG(scratch_pool) 2843 2844 if (key != NULL) 2845 { 2846 combine_key(cache, key, cache->key_len); 2847 SVN_ERR(membuffer_cache_set_partial(cache->membuffer, 2848 &cache->combined_key, 2849 func, 2850 baton, 2851 DEBUG_CACHE_MEMBUFFER_TAG 2852 scratch_pool)); 2853 } 2854 return SVN_NO_ERROR; 2855} 2856 2857/* Implement svn_cache__vtable_t.is_cachable 2858 * (thread-safe even without mutex) 2859 */ 2860static svn_boolean_t 2861svn_membuffer_cache_is_cachable(void *cache_void, apr_size_t size) 2862{ 2863 /* Don't allow extremely large element sizes. Otherwise, the cache 2864 * might by thrashed by a few extremely large entries. And the size 2865 * must be small enough to be stored in a 32 bit value. 2866 */ 2867 svn_membuffer_cache_t *cache = cache_void; 2868 return cache->priority > SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY 2869 ? cache->membuffer->l2.size >= size && MAX_ITEM_SIZE >= size 2870 : size <= cache->membuffer->max_entry_size; 2871} 2872 2873/* Add statistics of SEGMENT to INFO. If INCLUDE_HISTOGRAM is TRUE, 2874 * accumulate index bucket fill levels in INFO->HISTOGRAM. 2875 */ 2876static svn_error_t * 2877svn_membuffer_get_segment_info(svn_membuffer_t *segment, 2878 svn_cache__info_t *info, 2879 svn_boolean_t include_histogram) 2880{ 2881 apr_uint32_t i; 2882 2883 info->data_size += segment->l1.size + segment->l2.size; 2884 info->used_size += segment->data_used; 2885 info->total_size += segment->l1.size + segment->l2.size + 2886 segment->group_count * GROUP_SIZE * sizeof(entry_t); 2887 2888 info->used_entries += segment->used_entries; 2889 info->total_entries += segment->group_count * GROUP_SIZE; 2890 2891 if (include_histogram) 2892 for (i = 0; i < segment->group_count; ++i) 2893 if (is_group_initialized(segment, i)) 2894 { 2895 entry_group_t *chain_end 2896 = last_group_in_chain(segment, &segment->directory[i]); 2897 apr_size_t use 2898 = MIN(chain_end->header.used, 2899 sizeof(info->histogram) / sizeof(info->histogram[0]) - 1); 2900 info->histogram[use]++; 2901 } 2902 2903 return SVN_NO_ERROR; 2904} 2905 2906/* Implement svn_cache__vtable_t.get_info 2907 * (thread-safe even without mutex) 2908 */ 2909static svn_error_t * 2910svn_membuffer_cache_get_info(void *cache_void, 2911 svn_cache__info_t *info, 2912 svn_boolean_t reset, 2913 apr_pool_t *result_pool) 2914{ 2915 svn_membuffer_cache_t *cache = cache_void; 2916 apr_uint32_t i; 2917 2918 /* cache front-end specific data */ 2919 2920 info->id = apr_pstrdup(result_pool, cache->prefix.full_key.data); 2921 2922 /* collect info from shared cache back-end */ 2923 2924 for (i = 0; i < cache->membuffer->segment_count; ++i) 2925 { 2926 svn_membuffer_t *segment = cache->membuffer + i; 2927 WITH_READ_LOCK(segment, 2928 svn_membuffer_get_segment_info(segment, info, FALSE)); 2929 } 2930 2931 return SVN_NO_ERROR; 2932} 2933 2934 2935/* the v-table for membuffer-based caches (single-threaded access) 2936 */ 2937static svn_cache__vtable_t membuffer_cache_vtable = { 2938 svn_membuffer_cache_get, 2939 svn_membuffer_cache_has_key, 2940 svn_membuffer_cache_set, 2941 svn_membuffer_cache_iter, 2942 svn_membuffer_cache_is_cachable, 2943 svn_membuffer_cache_get_partial, 2944 svn_membuffer_cache_set_partial, 2945 svn_membuffer_cache_get_info 2946}; 2947 2948/* Implement svn_cache__vtable_t.get and serialize all cache access. 2949 */ 2950static svn_error_t * 2951svn_membuffer_cache_get_synced(void **value_p, 2952 svn_boolean_t *found, 2953 void *cache_void, 2954 const void *key, 2955 apr_pool_t *result_pool) 2956{ 2957 svn_membuffer_cache_t *cache = cache_void; 2958 SVN_MUTEX__WITH_LOCK(cache->mutex, 2959 svn_membuffer_cache_get(value_p, 2960 found, 2961 cache_void, 2962 key, 2963 result_pool)); 2964 2965 return SVN_NO_ERROR; 2966} 2967 2968/* Implement svn_cache__vtable_t.has_key and serialize all cache access. 2969 */ 2970static svn_error_t * 2971svn_membuffer_cache_has_key_synced(svn_boolean_t *found, 2972 void *cache_void, 2973 const void *key, 2974 apr_pool_t *result_pool) 2975{ 2976 svn_membuffer_cache_t *cache = cache_void; 2977 SVN_MUTEX__WITH_LOCK(cache->mutex, 2978 svn_membuffer_cache_has_key(found, 2979 cache_void, 2980 key, 2981 result_pool)); 2982 2983 return SVN_NO_ERROR; 2984} 2985 2986/* Implement svn_cache__vtable_t.set and serialize all cache access. 2987 */ 2988static svn_error_t * 2989svn_membuffer_cache_set_synced(void *cache_void, 2990 const void *key, 2991 void *value, 2992 apr_pool_t *scratch_pool) 2993{ 2994 svn_membuffer_cache_t *cache = cache_void; 2995 SVN_MUTEX__WITH_LOCK(cache->mutex, 2996 svn_membuffer_cache_set(cache_void, 2997 key, 2998 value, 2999 scratch_pool)); 3000 3001 return SVN_NO_ERROR; 3002} 3003 3004/* Implement svn_cache__vtable_t.get_partial and serialize all cache access. 3005 */ 3006static svn_error_t * 3007svn_membuffer_cache_get_partial_synced(void **value_p, 3008 svn_boolean_t *found, 3009 void *cache_void, 3010 const void *key, 3011 svn_cache__partial_getter_func_t func, 3012 void *baton, 3013 apr_pool_t *result_pool) 3014{ 3015 svn_membuffer_cache_t *cache = cache_void; 3016 SVN_MUTEX__WITH_LOCK(cache->mutex, 3017 svn_membuffer_cache_get_partial(value_p, 3018 found, 3019 cache_void, 3020 key, 3021 func, 3022 baton, 3023 result_pool)); 3024 3025 return SVN_NO_ERROR; 3026} 3027 3028/* Implement svn_cache__vtable_t.set_partial and serialize all cache access. 3029 */ 3030static svn_error_t * 3031svn_membuffer_cache_set_partial_synced(void *cache_void, 3032 const void *key, 3033 svn_cache__partial_setter_func_t func, 3034 void *baton, 3035 apr_pool_t *scratch_pool) 3036{ 3037 svn_membuffer_cache_t *cache = cache_void; 3038 SVN_MUTEX__WITH_LOCK(cache->mutex, 3039 svn_membuffer_cache_set_partial(cache_void, 3040 key, 3041 func, 3042 baton, 3043 scratch_pool)); 3044 3045 return SVN_NO_ERROR; 3046} 3047 3048/* the v-table for membuffer-based caches with multi-threading support) 3049 */ 3050static svn_cache__vtable_t membuffer_cache_synced_vtable = { 3051 svn_membuffer_cache_get_synced, 3052 svn_membuffer_cache_has_key_synced, 3053 svn_membuffer_cache_set_synced, 3054 svn_membuffer_cache_iter, /* no sync required */ 3055 svn_membuffer_cache_is_cachable, /* no sync required */ 3056 svn_membuffer_cache_get_partial_synced, 3057 svn_membuffer_cache_set_partial_synced, 3058 svn_membuffer_cache_get_info /* no sync required */ 3059}; 3060 3061/* standard serialization function for svn_stringbuf_t items. 3062 * Implements svn_cache__serialize_func_t. 3063 */ 3064static svn_error_t * 3065serialize_svn_stringbuf(void **buffer, 3066 apr_size_t *buffer_size, 3067 void *item, 3068 apr_pool_t *result_pool) 3069{ 3070 svn_stringbuf_t *value_str = item; 3071 3072 *buffer = value_str->data; 3073 *buffer_size = value_str->len + 1; 3074 3075 return SVN_NO_ERROR; 3076} 3077 3078/* standard de-serialization function for svn_stringbuf_t items. 3079 * Implements svn_cache__deserialize_func_t. 3080 */ 3081static svn_error_t * 3082deserialize_svn_stringbuf(void **item, 3083 void *buffer, 3084 apr_size_t buffer_size, 3085 apr_pool_t *result_pool) 3086{ 3087 svn_stringbuf_t *value_str = apr_palloc(result_pool, sizeof(svn_stringbuf_t)); 3088 3089 value_str->pool = result_pool; 3090 value_str->blocksize = buffer_size; 3091 value_str->data = buffer; 3092 value_str->len = buffer_size-1; 3093 *item = value_str; 3094 3095 return SVN_NO_ERROR; 3096} 3097 3098/* Construct a svn_cache__t object on top of a shared memcache. 3099 */ 3100svn_error_t * 3101svn_cache__create_membuffer_cache(svn_cache__t **cache_p, 3102 svn_membuffer_t *membuffer, 3103 svn_cache__serialize_func_t serializer, 3104 svn_cache__deserialize_func_t deserializer, 3105 apr_ssize_t klen, 3106 const char *prefix, 3107 apr_uint32_t priority, 3108 svn_boolean_t thread_safe, 3109 apr_pool_t *result_pool, 3110 apr_pool_t *scratch_pool) 3111{ 3112 svn_checksum_t *checksum; 3113 apr_size_t prefix_len, prefix_orig_len; 3114 3115 /* allocate the cache header structures 3116 */ 3117 svn_cache__t *wrapper = apr_pcalloc(result_pool, sizeof(*wrapper)); 3118 svn_membuffer_cache_t *cache = apr_pcalloc(result_pool, sizeof(*cache)); 3119 3120 /* initialize our internal cache header 3121 */ 3122 cache->membuffer = membuffer; 3123 cache->serializer = serializer 3124 ? serializer 3125 : serialize_svn_stringbuf; 3126 cache->deserializer = deserializer 3127 ? deserializer 3128 : deserialize_svn_stringbuf; 3129 cache->priority = priority; 3130 cache->key_len = klen; 3131 3132 SVN_ERR(svn_mutex__init(&cache->mutex, thread_safe, result_pool)); 3133 3134 /* Copy the prefix into the prefix full key. Align it to ITEM_ALIGMENT. 3135 * Don't forget to include the terminating NUL. */ 3136 prefix_orig_len = strlen(prefix) + 1; 3137 prefix_len = ALIGN_VALUE(prefix_orig_len); 3138 3139 svn_membuf__create(&cache->prefix.full_key, prefix_len, result_pool); 3140 memcpy((char *)cache->prefix.full_key.data, prefix, prefix_orig_len); 3141 memset((char *)cache->prefix.full_key.data + prefix_orig_len, 0, 3142 prefix_len - prefix_orig_len); 3143 3144 /* Construct the folded prefix key. */ 3145 SVN_ERR(svn_checksum(&checksum, 3146 svn_checksum_md5, 3147 prefix, 3148 strlen(prefix), 3149 scratch_pool)); 3150 memcpy(cache->prefix.entry_key.fingerprint, checksum->digest, 3151 sizeof(cache->prefix.entry_key.fingerprint)); 3152 cache->prefix.entry_key.key_len = prefix_len; 3153 3154 /* Initialize the combined key. Pre-allocate some extra room in the full 3155 * key such that we probably don't need to re-alloc. */ 3156 cache->combined_key.entry_key = cache->prefix.entry_key; 3157 svn_membuf__create(&cache->combined_key.full_key, prefix_len + 200, 3158 result_pool); 3159 memcpy(cache->combined_key.full_key.data, cache->prefix.full_key.data, 3160 prefix_len); 3161 3162 /* initialize the generic cache wrapper 3163 */ 3164 wrapper->vtable = thread_safe ? &membuffer_cache_synced_vtable 3165 : &membuffer_cache_vtable; 3166 wrapper->cache_internal = cache; 3167 wrapper->error_handler = 0; 3168 wrapper->error_baton = 0; 3169 wrapper->pretend_empty = !!getenv("SVN_X_DOES_NOT_MARK_THE_SPOT"); 3170 3171 *cache_p = wrapper; 3172 return SVN_NO_ERROR; 3173} 3174 3175static svn_error_t * 3176svn_membuffer_get_global_segment_info(svn_membuffer_t *segment, 3177 svn_cache__info_t *info) 3178{ 3179 info->gets += segment->total_reads; 3180 info->sets += segment->total_writes; 3181 info->hits += segment->total_hits; 3182 3183 WITH_READ_LOCK(segment, 3184 svn_membuffer_get_segment_info(segment, info, TRUE)); 3185 3186 return SVN_NO_ERROR; 3187} 3188 3189svn_cache__info_t * 3190svn_cache__membuffer_get_global_info(apr_pool_t *pool) 3191{ 3192 apr_uint32_t i; 3193 3194 svn_membuffer_t *membuffer = svn_cache__get_global_membuffer_cache(); 3195 svn_cache__info_t *info = apr_pcalloc(pool, sizeof(*info)); 3196 3197 /* cache front-end specific data */ 3198 3199 info->id = "membuffer globals"; 3200 3201 /* collect info from shared cache back-end */ 3202 3203 for (i = 0; i < membuffer->segment_count; ++i) 3204 svn_error_clear(svn_membuffer_get_global_segment_info(membuffer + i, 3205 info)); 3206 3207 return info; 3208} 3209