1/* 2 * Copyright (c) 2006-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Memory allocator with per-CPU caching, derived from the kmem magazine 31 * concept and implementation as described in the following paper: 32 * http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick.pdf 33 * That implementation is Copyright 2006 Sun Microsystems, Inc. All rights 34 * reserved. Use is subject to license terms. 35 * 36 * There are several major differences between this and the original kmem 37 * magazine: this derivative implementation allows for multiple objects to 38 * be allocated and freed from/to the object cache in one call; in addition, 39 * it provides for better flexibility where the user is allowed to define 40 * its own slab allocator (instead of the default zone allocator). Finally, 41 * no object construction/destruction takes place at the moment, although 42 * this could be added in future to improve efficiency. 43 */ 44 45#include <sys/param.h> 46#include <sys/types.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/queue.h> 50#include <sys/kernel.h> 51#include <sys/systm.h> 52 53#include <kern/debug.h> 54#include <kern/zalloc.h> 55#include <kern/cpu_number.h> 56#include <kern/locks.h> 57 58#include <libkern/libkern.h> 59#include <libkern/OSAtomic.h> 60#include <libkern/OSDebug.h> 61 62#include <mach/vm_param.h> 63#include <machine/limits.h> 64#include <machine/machine_routines.h> 65 66#include <string.h> 67 68#include <sys/mcache.h> 69 70#define MCACHE_SIZE(n) \ 71 ((size_t)(&((mcache_t *)0)->mc_cpu[n])) 72 73/* Allocate extra in case we need to manually align the pointer */ 74#define MCACHE_ALLOC_SIZE \ 75 (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_SIZE) 76 77#define MCACHE_CPU(c) \ 78 (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number()))) 79 80/* 81 * MCACHE_LIST_LOCK() and MCACHE_LIST_UNLOCK() are macros used 82 * to serialize accesses to the global list of caches in the system. 83 * They also record the thread currently running in the critical 84 * section, so that we can avoid recursive requests to reap the 85 * caches when memory runs low. 86 */ 87#define MCACHE_LIST_LOCK() { \ 88 lck_mtx_lock(mcache_llock); \ 89 mcache_llock_owner = current_thread(); \ 90} 91 92#define MCACHE_LIST_UNLOCK() { \ 93 mcache_llock_owner = NULL; \ 94 lck_mtx_unlock(mcache_llock); \ 95} 96 97#define MCACHE_LOCK(l) lck_mtx_lock(l) 98#define MCACHE_UNLOCK(l) lck_mtx_unlock(l) 99#define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l) 100 101static int ncpu; 102static lck_mtx_t *mcache_llock; 103static struct thread *mcache_llock_owner; 104static lck_attr_t *mcache_llock_attr; 105static lck_grp_t *mcache_llock_grp; 106static lck_grp_attr_t *mcache_llock_grp_attr; 107static struct zone *mcache_zone; 108static unsigned int mcache_reap_interval; 109static UInt32 mcache_reaping; 110static int mcache_ready; 111static int mcache_updating; 112 113static int mcache_bkt_contention = 3; 114#if DEBUG 115static unsigned int mcache_flags = MCF_DEBUG; 116#else 117static unsigned int mcache_flags = 0; 118#endif 119 120#define DUMP_MCA_BUF_SIZE 512 121static char *mca_dump_buf; 122 123static mcache_bkttype_t mcache_bkttype[] = { 124 { 1, 4096, 32768, NULL }, 125 { 3, 2048, 16384, NULL }, 126 { 7, 1024, 12288, NULL }, 127 { 15, 256, 8192, NULL }, 128 { 31, 64, 4096, NULL }, 129 { 47, 0, 2048, NULL }, 130 { 63, 0, 1024, NULL }, 131 { 95, 0, 512, NULL }, 132 { 143, 0, 256, NULL }, 133 { 165, 0, 0, NULL }, 134}; 135 136static mcache_t *mcache_create_common(const char *, size_t, size_t, 137 mcache_allocfn_t, mcache_freefn_t, mcache_auditfn_t, mcache_logfn_t, 138 mcache_notifyfn_t, void *, u_int32_t, int, int); 139static unsigned int mcache_slab_alloc(void *, mcache_obj_t ***, 140 unsigned int, int); 141static void mcache_slab_free(void *, mcache_obj_t *, boolean_t); 142static void mcache_slab_audit(void *, mcache_obj_t *, boolean_t); 143static void mcache_cpu_refill(mcache_cpu_t *, mcache_bkt_t *, int); 144static mcache_bkt_t *mcache_bkt_alloc(mcache_t *, mcache_bktlist_t *, 145 mcache_bkttype_t **); 146static void mcache_bkt_free(mcache_t *, mcache_bktlist_t *, mcache_bkt_t *); 147static void mcache_cache_bkt_enable(mcache_t *); 148static void mcache_bkt_purge(mcache_t *); 149static void mcache_bkt_destroy(mcache_t *, mcache_bkttype_t *, 150 mcache_bkt_t *, int); 151static void mcache_bkt_ws_update(mcache_t *); 152static void mcache_bkt_ws_reap(mcache_t *); 153static void mcache_dispatch(void (*)(void *), void *); 154static void mcache_cache_reap(mcache_t *); 155static void mcache_cache_update(mcache_t *); 156static void mcache_cache_bkt_resize(void *); 157static void mcache_cache_enable(void *); 158static void mcache_update(void *); 159static void mcache_update_timeout(void *); 160static void mcache_applyall(void (*)(mcache_t *)); 161static void mcache_reap_start(void *); 162static void mcache_reap_done(void *); 163static void mcache_reap_timeout(void *); 164static void mcache_notify(mcache_t *, u_int32_t); 165static void mcache_purge(void *); 166 167static LIST_HEAD(, mcache) mcache_head; 168mcache_t *mcache_audit_cache; 169 170/* 171 * Initialize the framework; this is currently called as part of BSD init. 172 */ 173__private_extern__ void 174mcache_init(void) 175{ 176 mcache_bkttype_t *btp; 177 unsigned int i; 178 char name[32]; 179 180 ncpu = ml_get_max_cpus(); 181 182 mcache_llock_grp_attr = lck_grp_attr_alloc_init(); 183 mcache_llock_grp = lck_grp_alloc_init("mcache.list", 184 mcache_llock_grp_attr); 185 mcache_llock_attr = lck_attr_alloc_init(); 186 mcache_llock = lck_mtx_alloc_init(mcache_llock_grp, mcache_llock_attr); 187 188 mcache_zone = zinit(MCACHE_ALLOC_SIZE, 256 * MCACHE_ALLOC_SIZE, 189 PAGE_SIZE, "mcache"); 190 if (mcache_zone == NULL) 191 panic("mcache_init: failed to allocate mcache zone\n"); 192 zone_change(mcache_zone, Z_CALLERACCT, FALSE); 193 194 LIST_INIT(&mcache_head); 195 196 for (i = 0; i < sizeof (mcache_bkttype) / sizeof (*btp); i++) { 197 btp = &mcache_bkttype[i]; 198 (void) snprintf(name, sizeof (name), "bkt_%d", 199 btp->bt_bktsize); 200 btp->bt_cache = mcache_create(name, 201 (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP); 202 } 203 204 PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags)); 205 mcache_flags &= MCF_FLAGS_MASK; 206 207 mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t), 208 0, 0, MCR_SLEEP); 209 210 mcache_reap_interval = 15 * hz; 211 mcache_applyall(mcache_cache_bkt_enable); 212 mcache_ready = 1; 213} 214 215/* 216 * Return the global mcache flags. 217 */ 218__private_extern__ unsigned int 219mcache_getflags(void) 220{ 221 return (mcache_flags); 222} 223 224/* 225 * Create a cache using the zone allocator as the backend slab allocator. 226 * The caller may specify any alignment for the object; if it specifies 0 227 * the default alignment (MCACHE_ALIGN) will be used. 228 */ 229__private_extern__ mcache_t * 230mcache_create(const char *name, size_t bufsize, size_t align, 231 u_int32_t flags, int wait) 232{ 233 return (mcache_create_common(name, bufsize, align, mcache_slab_alloc, 234 mcache_slab_free, mcache_slab_audit, NULL, NULL, NULL, flags, 1, 235 wait)); 236} 237 238/* 239 * Create a cache using a custom backend slab allocator. Since the caller 240 * is responsible for allocation, no alignment guarantee will be provided 241 * by this framework. 242 */ 243__private_extern__ mcache_t * 244mcache_create_ext(const char *name, size_t bufsize, 245 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 246 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, 247 u_int32_t flags, int wait) 248{ 249 return (mcache_create_common(name, bufsize, 0, allocfn, 250 freefn, auditfn, logfn, notifyfn, arg, flags, 0, wait)); 251} 252 253/* 254 * Common cache creation routine. 255 */ 256static mcache_t * 257mcache_create_common(const char *name, size_t bufsize, size_t align, 258 mcache_allocfn_t allocfn, mcache_freefn_t freefn, mcache_auditfn_t auditfn, 259 mcache_logfn_t logfn, mcache_notifyfn_t notifyfn, void *arg, 260 u_int32_t flags, int need_zone, int wait) 261{ 262 mcache_bkttype_t *btp; 263 mcache_t *cp = NULL; 264 size_t chunksize; 265 void *buf, **pbuf; 266 int c; 267 char lck_name[64]; 268 269 /* If auditing is on and print buffer is NULL, allocate it now */ 270 if ((flags & MCF_DEBUG) && mca_dump_buf == NULL) { 271 int malloc_wait = (wait & MCR_NOSLEEP) ? M_NOWAIT : M_WAITOK; 272 MALLOC(mca_dump_buf, char *, DUMP_MCA_BUF_SIZE, M_TEMP, 273 malloc_wait | M_ZERO); 274 if (mca_dump_buf == NULL) 275 return (NULL); 276 } 277 278 if (!(wait & MCR_NOSLEEP)) 279 buf = zalloc(mcache_zone); 280 else 281 buf = zalloc_noblock(mcache_zone); 282 283 if (buf == NULL) 284 goto fail; 285 286 bzero(buf, MCACHE_ALLOC_SIZE); 287 288 /* 289 * In case we didn't get a cache-aligned memory, round it up 290 * accordingly. This is needed in order to get the rest of 291 * structure members aligned properly. It also means that 292 * the memory span gets shifted due to the round up, but it 293 * is okay since we've allocated extra space for this. 294 */ 295 cp = (mcache_t *) 296 P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_SIZE); 297 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 298 *pbuf = buf; 299 300 /* 301 * Guaranteed alignment is valid only when we use the internal 302 * slab allocator (currently set to use the zone allocator). 303 */ 304 if (!need_zone) 305 align = 1; 306 else if (align == 0) 307 align = MCACHE_ALIGN; 308 309 if ((align & (align - 1)) != 0) 310 panic("mcache_create: bad alignment %lu", align); 311 312 cp->mc_align = align; 313 cp->mc_slab_alloc = allocfn; 314 cp->mc_slab_free = freefn; 315 cp->mc_slab_audit = auditfn; 316 cp->mc_slab_log = logfn; 317 cp->mc_slab_notify = notifyfn; 318 cp->mc_private = need_zone ? cp : arg; 319 cp->mc_bufsize = bufsize; 320 cp->mc_flags = (flags & MCF_FLAGS_MASK) | mcache_flags; 321 322 (void) snprintf(cp->mc_name, sizeof (cp->mc_name), "mcache.%s", name); 323 324 (void) snprintf(lck_name, sizeof (lck_name), "%s.cpu", cp->mc_name); 325 cp->mc_cpu_lock_grp_attr = lck_grp_attr_alloc_init(); 326 cp->mc_cpu_lock_grp = lck_grp_alloc_init(lck_name, 327 cp->mc_cpu_lock_grp_attr); 328 cp->mc_cpu_lock_attr = lck_attr_alloc_init(); 329 330 /* 331 * Allocation chunk size is the object's size plus any extra size 332 * needed to satisfy the object's alignment. It is enforced to be 333 * at least the size of an LP64 pointer to simplify auditing and to 334 * handle multiple-element allocation requests, where the elements 335 * returned are linked together in a list. 336 */ 337 chunksize = MAX(bufsize, sizeof (u_int64_t)); 338 if (need_zone) { 339 /* Enforce 64-bit minimum alignment for zone-based buffers */ 340 align = MAX(align, sizeof (u_int64_t)); 341 chunksize += sizeof (void *) + align; 342 chunksize = P2ROUNDUP(chunksize, align); 343 if ((cp->mc_slab_zone = zinit(chunksize, 64 * 1024 * ncpu, 344 PAGE_SIZE, cp->mc_name)) == NULL) 345 goto fail; 346 zone_change(cp->mc_slab_zone, Z_EXPAND, TRUE); 347 } 348 cp->mc_chunksize = chunksize; 349 350 /* 351 * Initialize the bucket layer. 352 */ 353 (void) snprintf(lck_name, sizeof (lck_name), "%s.bkt", cp->mc_name); 354 cp->mc_bkt_lock_grp_attr = lck_grp_attr_alloc_init(); 355 cp->mc_bkt_lock_grp = lck_grp_alloc_init(lck_name, 356 cp->mc_bkt_lock_grp_attr); 357 cp->mc_bkt_lock_attr = lck_attr_alloc_init(); 358 lck_mtx_init(&cp->mc_bkt_lock, cp->mc_bkt_lock_grp, 359 cp->mc_bkt_lock_attr); 360 361 (void) snprintf(lck_name, sizeof (lck_name), "%s.sync", cp->mc_name); 362 cp->mc_sync_lock_grp_attr = lck_grp_attr_alloc_init(); 363 cp->mc_sync_lock_grp = lck_grp_alloc_init(lck_name, 364 cp->mc_sync_lock_grp_attr); 365 cp->mc_sync_lock_attr = lck_attr_alloc_init(); 366 lck_mtx_init(&cp->mc_sync_lock, cp->mc_sync_lock_grp, 367 cp->mc_sync_lock_attr); 368 369 for (btp = mcache_bkttype; chunksize <= btp->bt_minbuf; btp++) 370 continue; 371 372 cp->cache_bkttype = btp; 373 374 /* 375 * Initialize the CPU layer. Each per-CPU structure is aligned 376 * on the CPU cache line boundary to prevent false sharing. 377 */ 378 for (c = 0; c < ncpu; c++) { 379 mcache_cpu_t *ccp = &cp->mc_cpu[c]; 380 381 VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_SIZE)); 382 lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, 383 cp->mc_cpu_lock_attr); 384 ccp->cc_objs = -1; 385 ccp->cc_pobjs = -1; 386 } 387 388 if (mcache_ready) 389 mcache_cache_bkt_enable(cp); 390 391 /* TODO: dynamically create sysctl for stats */ 392 393 MCACHE_LIST_LOCK(); 394 LIST_INSERT_HEAD(&mcache_head, cp, mc_list); 395 MCACHE_LIST_UNLOCK(); 396 397 /* 398 * If cache buckets are enabled and this is the first cache 399 * created, start the periodic cache update. 400 */ 401 if (!(mcache_flags & MCF_NOCPUCACHE) && !mcache_updating) { 402 mcache_updating = 1; 403 mcache_update_timeout(NULL); 404 } 405 if (cp->mc_flags & MCF_DEBUG) { 406 printf("mcache_create: %s (%s) arg %p bufsize %lu align %lu " 407 "chunksize %lu bktsize %d\n", name, need_zone ? "i" : "e", 408 arg, bufsize, cp->mc_align, chunksize, btp->bt_bktsize); 409 } 410 return (cp); 411 412fail: 413 if (buf != NULL) 414 zfree(mcache_zone, buf); 415 return (NULL); 416} 417 418/* 419 * Allocate one or more objects from a cache. 420 */ 421__private_extern__ unsigned int 422mcache_alloc_ext(mcache_t *cp, mcache_obj_t **list, unsigned int num, int wait) 423{ 424 mcache_cpu_t *ccp; 425 mcache_obj_t **top = &(*list); 426 mcache_bkt_t *bkt; 427 unsigned int need = num; 428 boolean_t nwretry = FALSE; 429 430 /* MCR_NOSLEEP and MCR_FAILOK are mutually exclusive */ 431 VERIFY((wait & (MCR_NOSLEEP|MCR_FAILOK)) != (MCR_NOSLEEP|MCR_FAILOK)); 432 433 ASSERT(list != NULL); 434 *list = NULL; 435 436 if (num == 0) 437 return (0); 438 439retry_alloc: 440 /* We may not always be running in the same CPU in case of retries */ 441 ccp = MCACHE_CPU(cp); 442 443 MCACHE_LOCK(&ccp->cc_lock); 444 for (;;) { 445 /* 446 * If we have an object in the current CPU's filled bucket, 447 * chain the object to any previous objects and return if 448 * we've satisfied the number of requested objects. 449 */ 450 if (ccp->cc_objs > 0) { 451 mcache_obj_t *tail; 452 int objs; 453 454 /* 455 * Objects in the bucket are already linked together 456 * with the most recently freed object at the head of 457 * the list; grab as many objects as we can. 458 */ 459 objs = MIN((unsigned int)ccp->cc_objs, need); 460 *list = ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 461 ccp->cc_objs -= objs; 462 ccp->cc_alloc += objs; 463 464 tail = ccp->cc_filled->bkt_obj[ccp->cc_objs]; 465 list = &tail->obj_next; 466 *list = NULL; 467 468 /* If we got them all, return to caller */ 469 if ((need -= objs) == 0) { 470 MCACHE_UNLOCK(&ccp->cc_lock); 471 472 if (!(cp->mc_flags & MCF_NOLEAKLOG) && 473 cp->mc_slab_log != NULL) 474 (*cp->mc_slab_log)(num, *top, TRUE); 475 476 if (cp->mc_flags & MCF_DEBUG) 477 goto debug_alloc; 478 479 return (num); 480 } 481 } 482 483 /* 484 * The CPU's filled bucket is empty. If the previous filled 485 * bucket was full, exchange and try again. 486 */ 487 if (ccp->cc_pobjs > 0) { 488 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 489 continue; 490 } 491 492 /* 493 * If the bucket layer is disabled, allocate from slab. This 494 * can happen either because MCF_NOCPUCACHE is set, or because 495 * the bucket layer is currently being resized. 496 */ 497 if (ccp->cc_bktsize == 0) 498 break; 499 500 /* 501 * Both of the CPU's buckets are empty; try to get a full 502 * bucket from the bucket layer. Upon success, refill this 503 * CPU and place any empty bucket into the empty list. 504 */ 505 bkt = mcache_bkt_alloc(cp, &cp->mc_full, NULL); 506 if (bkt != NULL) { 507 if (ccp->cc_pfilled != NULL) 508 mcache_bkt_free(cp, &cp->mc_empty, 509 ccp->cc_pfilled); 510 mcache_cpu_refill(ccp, bkt, ccp->cc_bktsize); 511 continue; 512 } 513 514 /* 515 * The bucket layer has no full buckets; allocate the 516 * object(s) directly from the slab layer. 517 */ 518 break; 519 } 520 MCACHE_UNLOCK(&ccp->cc_lock); 521 522 need -= (*cp->mc_slab_alloc)(cp->mc_private, &list, need, wait); 523 524 /* 525 * If this is a blocking allocation, or if it is non-blocking and 526 * the cache's full bucket is non-empty, then retry the allocation. 527 */ 528 if (need > 0) { 529 if (!(wait & MCR_NONBLOCKING)) { 530 atomic_add_32(&cp->mc_wretry_cnt, 1); 531 goto retry_alloc; 532 } else if ((wait & (MCR_NOSLEEP | MCR_TRYHARD)) && 533 !mcache_bkt_isempty(cp)) { 534 if (!nwretry) 535 nwretry = TRUE; 536 atomic_add_32(&cp->mc_nwretry_cnt, 1); 537 goto retry_alloc; 538 } else if (nwretry) { 539 atomic_add_32(&cp->mc_nwfail_cnt, 1); 540 } 541 } 542 543 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) 544 (*cp->mc_slab_log)((num - need), *top, TRUE); 545 546 if (!(cp->mc_flags & MCF_DEBUG)) 547 return (num - need); 548 549debug_alloc: 550 if (cp->mc_flags & MCF_DEBUG) { 551 mcache_obj_t **o = top; 552 unsigned int n; 553 554 n = 0; 555 /* 556 * Verify that the chain of objects have the same count as 557 * what we are about to report to the caller. Any mismatch 558 * here means that the object list is insanely broken and 559 * therefore we must panic. 560 */ 561 while (*o != NULL) { 562 o = &(*o)->obj_next; 563 ++n; 564 } 565 if (n != (num - need)) { 566 panic("mcache_alloc_ext: %s cp %p corrupted list " 567 "(got %d actual %d)\n", cp->mc_name, 568 (void *)cp, num - need, n); 569 } 570 } 571 572 /* Invoke the slab layer audit callback if auditing is enabled */ 573 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) 574 (*cp->mc_slab_audit)(cp->mc_private, *top, TRUE); 575 576 return (num - need); 577} 578 579/* 580 * Allocate a single object from a cache. 581 */ 582__private_extern__ void * 583mcache_alloc(mcache_t *cp, int wait) 584{ 585 mcache_obj_t *buf; 586 587 (void) mcache_alloc_ext(cp, &buf, 1, wait); 588 return (buf); 589} 590 591__private_extern__ void 592mcache_waiter_inc(mcache_t *cp) 593{ 594 atomic_add_32(&cp->mc_waiter_cnt, 1); 595} 596 597__private_extern__ void 598mcache_waiter_dec(mcache_t *cp) 599{ 600 atomic_add_32(&cp->mc_waiter_cnt, -1); 601} 602 603__private_extern__ boolean_t 604mcache_bkt_isempty(mcache_t *cp) 605{ 606 /* 607 * This isn't meant to accurately tell whether there are 608 * any full buckets in the cache; it is simply a way to 609 * obtain "hints" about the state of the cache. 610 */ 611 return (cp->mc_full.bl_total == 0); 612} 613 614/* 615 * Notify the slab layer about an event. 616 */ 617static void 618mcache_notify(mcache_t *cp, u_int32_t event) 619{ 620 if (cp->mc_slab_notify != NULL) 621 (*cp->mc_slab_notify)(cp->mc_private, event); 622} 623 624/* 625 * Purge the cache and disable its buckets. 626 */ 627static void 628mcache_purge(void *arg) 629{ 630 mcache_t *cp = arg; 631 632 mcache_bkt_purge(cp); 633 /* 634 * We cannot simply call mcache_cache_bkt_enable() from here as 635 * a bucket resize may be in flight and we would cause the CPU 636 * layers of the cache to point to different sizes. Therefore, 637 * we simply increment the enable count so that during the next 638 * periodic cache update the buckets can be reenabled. 639 */ 640 lck_mtx_lock_spin(&cp->mc_sync_lock); 641 cp->mc_enable_cnt++; 642 lck_mtx_unlock(&cp->mc_sync_lock); 643 644} 645 646__private_extern__ boolean_t 647mcache_purge_cache(mcache_t *cp) 648{ 649 /* 650 * Purging a cache that has no per-CPU caches or is already 651 * in the process of being purged is rather pointless. 652 */ 653 if (cp->mc_flags & MCF_NOCPUCACHE) 654 return (FALSE); 655 656 lck_mtx_lock_spin(&cp->mc_sync_lock); 657 if (cp->mc_purge_cnt > 0) { 658 lck_mtx_unlock(&cp->mc_sync_lock); 659 return (FALSE); 660 } 661 cp->mc_purge_cnt++; 662 lck_mtx_unlock(&cp->mc_sync_lock); 663 664 mcache_dispatch(mcache_purge, cp); 665 666 return (TRUE); 667} 668 669/* 670 * Free a single object to a cache. 671 */ 672__private_extern__ void 673mcache_free(mcache_t *cp, void *buf) 674{ 675 ((mcache_obj_t *)buf)->obj_next = NULL; 676 mcache_free_ext(cp, (mcache_obj_t *)buf); 677} 678 679/* 680 * Free one or more objects to a cache. 681 */ 682__private_extern__ void 683mcache_free_ext(mcache_t *cp, mcache_obj_t *list) 684{ 685 mcache_cpu_t *ccp = MCACHE_CPU(cp); 686 mcache_bkttype_t *btp; 687 mcache_obj_t *nlist; 688 mcache_bkt_t *bkt; 689 690 if (!(cp->mc_flags & MCF_NOLEAKLOG) && cp->mc_slab_log != NULL) 691 (*cp->mc_slab_log)(0, list, FALSE); 692 693 /* Invoke the slab layer audit callback if auditing is enabled */ 694 if ((cp->mc_flags & MCF_DEBUG) && cp->mc_slab_audit != NULL) 695 (*cp->mc_slab_audit)(cp->mc_private, list, FALSE); 696 697 MCACHE_LOCK(&ccp->cc_lock); 698 for (;;) { 699 /* 700 * If there is space in the current CPU's filled bucket, put 701 * the object there and return once all objects are freed. 702 * Note the cast to unsigned integer takes care of the case 703 * where the bucket layer is disabled (when cc_objs is -1). 704 */ 705 if ((unsigned int)ccp->cc_objs < 706 (unsigned int)ccp->cc_bktsize) { 707 /* 708 * Reverse the list while we place the object into the 709 * bucket; this effectively causes the most recently 710 * freed object(s) to be reused during allocation. 711 */ 712 nlist = list->obj_next; 713 list->obj_next = (ccp->cc_objs == 0) ? NULL : 714 ccp->cc_filled->bkt_obj[ccp->cc_objs - 1]; 715 716#if 0 717 ccp->cc_filled->bkt_obj[ccp->cc_objs++] = list; 718 ccp->cc_free++; 719#endif 720 721 if ((list = nlist) != NULL) 722 continue; 723 724 /* We are done; return to caller */ 725 MCACHE_UNLOCK(&ccp->cc_lock); 726 727 /* If there is a waiter below, notify it */ 728 if (cp->mc_waiter_cnt > 0) 729 mcache_notify(cp, MCN_RETRYALLOC); 730 return; 731 } 732 733 /* 734 * The CPU's filled bucket is full. If the previous filled 735 * bucket was empty, exchange and try again. 736 */ 737 if (ccp->cc_pobjs == 0) { 738 mcache_cpu_refill(ccp, ccp->cc_pfilled, ccp->cc_pobjs); 739 continue; 740 } 741 742 /* 743 * If the bucket layer is disabled, free to slab. This can 744 * happen either because MCF_NOCPUCACHE is set, or because 745 * the bucket layer is currently being resized. 746 */ 747 if (ccp->cc_bktsize == 0) 748 break; 749 750 /* 751 * Both of the CPU's buckets are full; try to get an empty 752 * bucket from the bucket layer. Upon success, empty this 753 * CPU and place any full bucket into the full list. 754 */ 755 bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp); 756 if (bkt != NULL) { 757 if (ccp->cc_pfilled != NULL) 758 mcache_bkt_free(cp, &cp->mc_full, 759 ccp->cc_pfilled); 760 mcache_cpu_refill(ccp, bkt, 0); 761 continue; 762 } 763 764 /* 765 * We need an empty bucket to put our freed objects into 766 * but couldn't get an empty bucket from the bucket layer; 767 * attempt to allocate one. We do not want to block for 768 * allocation here, and if the bucket allocation fails 769 * we will simply fall through to the slab layer. 770 */ 771 MCACHE_UNLOCK(&ccp->cc_lock); 772 bkt = mcache_alloc(btp->bt_cache, MCR_NOSLEEP); 773 MCACHE_LOCK(&ccp->cc_lock); 774 775 if (bkt != NULL) { 776 /* 777 * We have an empty bucket, but since we drop the 778 * CPU lock above, the cache's bucket size may have 779 * changed. If so, free the bucket and try again. 780 */ 781 if (ccp->cc_bktsize != btp->bt_bktsize) { 782 MCACHE_UNLOCK(&ccp->cc_lock); 783 mcache_free(btp->bt_cache, bkt); 784 MCACHE_LOCK(&ccp->cc_lock); 785 continue; 786 } 787 788 /* 789 * We have an empty bucket of the right size; 790 * add it to the bucket layer and try again. 791 */ 792 mcache_bkt_free(cp, &cp->mc_empty, bkt); 793 continue; 794 } 795 796 /* 797 * The bucket layer has no empty buckets; free the 798 * object(s) directly to the slab layer. 799 */ 800 break; 801 } 802 MCACHE_UNLOCK(&ccp->cc_lock); 803 804 /* If there is a waiter below, notify it */ 805 if (cp->mc_waiter_cnt > 0) 806 mcache_notify(cp, MCN_RETRYALLOC); 807 808 /* Advise the slab layer to purge the object(s) */ 809 (*cp->mc_slab_free)(cp->mc_private, list, 810 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 811} 812 813/* 814 * Cache destruction routine. 815 */ 816__private_extern__ void 817mcache_destroy(mcache_t *cp) 818{ 819 void **pbuf; 820 821 MCACHE_LIST_LOCK(); 822 LIST_REMOVE(cp, mc_list); 823 MCACHE_LIST_UNLOCK(); 824 825 mcache_bkt_purge(cp); 826 827 /* 828 * This cache is dead; there should be no further transaction. 829 * If it's still invoked, make sure that it induces a fault. 830 */ 831 cp->mc_slab_alloc = NULL; 832 cp->mc_slab_free = NULL; 833 cp->mc_slab_audit = NULL; 834 835 lck_attr_free(cp->mc_bkt_lock_attr); 836 lck_grp_free(cp->mc_bkt_lock_grp); 837 lck_grp_attr_free(cp->mc_bkt_lock_grp_attr); 838 839 lck_attr_free(cp->mc_cpu_lock_attr); 840 lck_grp_free(cp->mc_cpu_lock_grp); 841 lck_grp_attr_free(cp->mc_cpu_lock_grp_attr); 842 843 lck_attr_free(cp->mc_sync_lock_attr); 844 lck_grp_free(cp->mc_sync_lock_grp); 845 lck_grp_attr_free(cp->mc_sync_lock_grp_attr); 846 847 /* 848 * TODO: We need to destroy the zone here, but cannot do it 849 * because there is no such way to achieve that. Until then 850 * the memory allocated for the zone structure is leaked. 851 * Once it is achievable, uncomment these lines: 852 * 853 * if (cp->mc_slab_zone != NULL) { 854 * zdestroy(cp->mc_slab_zone); 855 * cp->mc_slab_zone = NULL; 856 * } 857 */ 858 859 /* Get the original address since we're about to free it */ 860 pbuf = (void **)((intptr_t)cp - sizeof (void *)); 861 862 zfree(mcache_zone, *pbuf); 863} 864 865/* 866 * Internal slab allocator used as a backend for simple caches. The current 867 * implementation uses the zone allocator for simplicity reasons. 868 */ 869static unsigned int 870mcache_slab_alloc(void *arg, mcache_obj_t ***plist, unsigned int num, int wait) 871{ 872 mcache_t *cp = arg; 873 unsigned int need = num; 874 size_t offset = 0; 875 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 876 u_int32_t flags = cp->mc_flags; 877 void *buf, *base, **pbuf; 878 mcache_obj_t **list = *plist; 879 880 *list = NULL; 881 882 /* 883 * The address of the object returned to the caller is an 884 * offset from the 64-bit aligned base address only if the 885 * cache's alignment requirement is neither 1 nor 8 bytes. 886 */ 887 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 888 offset = cp->mc_align; 889 890 for (;;) { 891 if (!(wait & MCR_NOSLEEP)) 892 buf = zalloc(cp->mc_slab_zone); 893 else 894 buf = zalloc_noblock(cp->mc_slab_zone); 895 896 if (buf == NULL) 897 break; 898 899 /* Get the 64-bit aligned base address for this object */ 900 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 901 sizeof (u_int64_t)); 902 903 /* 904 * Wind back a pointer size from the aligned base and 905 * save the original address so we can free it later. 906 */ 907 pbuf = (void **)((intptr_t)base - sizeof (void *)); 908 *pbuf = buf; 909 910 /* 911 * If auditing is enabled, patternize the contents of 912 * the buffer starting from the 64-bit aligned base to 913 * the end of the buffer; the length is rounded up to 914 * the nearest 64-bit multiply; this is because we use 915 * 64-bit memory access to set/check the pattern. 916 */ 917 if (flags & MCF_DEBUG) { 918 VERIFY(((intptr_t)base + rsize) <= 919 ((intptr_t)buf + cp->mc_chunksize)); 920 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 921 } 922 923 /* 924 * Fix up the object's address to fulfill the cache's 925 * alignment requirement (if needed) and return this 926 * to the caller. 927 */ 928 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 929 ((intptr_t)buf + cp->mc_chunksize)); 930 *list = (mcache_obj_t *)((intptr_t)base + offset); 931 932 (*list)->obj_next = NULL; 933 list = *plist = &(*list)->obj_next; 934 935 /* If we got them all, return to mcache */ 936 if (--need == 0) 937 break; 938 } 939 940 return (num - need); 941} 942 943/* 944 * Internal slab deallocator used as a backend for simple caches. 945 */ 946static void 947mcache_slab_free(void *arg, mcache_obj_t *list, __unused boolean_t purged) 948{ 949 mcache_t *cp = arg; 950 mcache_obj_t *nlist; 951 size_t offset = 0; 952 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 953 u_int32_t flags = cp->mc_flags; 954 void *base; 955 void **pbuf; 956 957 /* 958 * The address of the object is an offset from a 64-bit 959 * aligned base address only if the cache's alignment 960 * requirement is neither 1 nor 8 bytes. 961 */ 962 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 963 offset = cp->mc_align; 964 965 for (;;) { 966 nlist = list->obj_next; 967 list->obj_next = NULL; 968 969 /* Get the 64-bit aligned base address of this object */ 970 base = (void *)((intptr_t)list - offset); 971 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 972 973 /* Get the original address since we're about to free it */ 974 pbuf = (void **)((intptr_t)base - sizeof (void *)); 975 976 if (flags & MCF_DEBUG) { 977 VERIFY(((intptr_t)base + rsize) <= 978 ((intptr_t)*pbuf + cp->mc_chunksize)); 979 mcache_audit_free_verify(NULL, base, offset, rsize); 980 } 981 982 /* Free it to zone */ 983 VERIFY(((intptr_t)base + offset + cp->mc_bufsize) <= 984 ((intptr_t)*pbuf + cp->mc_chunksize)); 985 zfree(cp->mc_slab_zone, *pbuf); 986 987 /* No more objects to free; return to mcache */ 988 if ((list = nlist) == NULL) 989 break; 990 } 991} 992 993/* 994 * Internal slab auditor for simple caches. 995 */ 996static void 997mcache_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) 998{ 999 mcache_t *cp = arg; 1000 size_t offset = 0; 1001 size_t rsize = P2ROUNDUP(cp->mc_bufsize, sizeof (u_int64_t)); 1002 void *base, **pbuf; 1003 1004 /* 1005 * The address of the object returned to the caller is an 1006 * offset from the 64-bit aligned base address only if the 1007 * cache's alignment requirement is neither 1 nor 8 bytes. 1008 */ 1009 if (cp->mc_align != 1 && cp->mc_align != sizeof (u_int64_t)) 1010 offset = cp->mc_align; 1011 1012 while (list != NULL) { 1013 mcache_obj_t *next = list->obj_next; 1014 1015 /* Get the 64-bit aligned base address of this object */ 1016 base = (void *)((intptr_t)list - offset); 1017 VERIFY(IS_P2ALIGNED(base, sizeof (u_int64_t))); 1018 1019 /* Get the original address */ 1020 pbuf = (void **)((intptr_t)base - sizeof (void *)); 1021 1022 VERIFY(((intptr_t)base + rsize) <= 1023 ((intptr_t)*pbuf + cp->mc_chunksize)); 1024 1025 if (!alloc) 1026 mcache_set_pattern(MCACHE_FREE_PATTERN, base, rsize); 1027 else 1028 mcache_audit_free_verify_set(NULL, base, offset, rsize); 1029 1030 list = list->obj_next = next; 1031 } 1032} 1033 1034/* 1035 * Refill the CPU's filled bucket with bkt and save the previous one. 1036 */ 1037static void 1038mcache_cpu_refill(mcache_cpu_t *ccp, mcache_bkt_t *bkt, int objs) 1039{ 1040 ASSERT((ccp->cc_filled == NULL && ccp->cc_objs == -1) || 1041 (ccp->cc_filled && ccp->cc_objs + objs == ccp->cc_bktsize)); 1042 ASSERT(ccp->cc_bktsize > 0); 1043 1044 ccp->cc_pfilled = ccp->cc_filled; 1045 ccp->cc_pobjs = ccp->cc_objs; 1046 ccp->cc_filled = bkt; 1047 ccp->cc_objs = objs; 1048} 1049 1050/* 1051 * Allocate a bucket from the bucket layer. 1052 */ 1053static mcache_bkt_t * 1054mcache_bkt_alloc(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkttype_t **btp) 1055{ 1056 mcache_bkt_t *bkt; 1057 1058 if (!MCACHE_LOCK_TRY(&cp->mc_bkt_lock)) { 1059 /* 1060 * The bucket layer lock is held by another CPU; increase 1061 * the contention count so that we can later resize the 1062 * bucket size accordingly. 1063 */ 1064 MCACHE_LOCK(&cp->mc_bkt_lock); 1065 cp->mc_bkt_contention++; 1066 } 1067 1068 if ((bkt = blp->bl_list) != NULL) { 1069 blp->bl_list = bkt->bkt_next; 1070 if (--blp->bl_total < blp->bl_min) 1071 blp->bl_min = blp->bl_total; 1072 blp->bl_alloc++; 1073 } 1074 1075 if (btp != NULL) 1076 *btp = cp->cache_bkttype; 1077 1078 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1079 1080 return (bkt); 1081} 1082 1083/* 1084 * Free a bucket to the bucket layer. 1085 */ 1086static void 1087mcache_bkt_free(mcache_t *cp, mcache_bktlist_t *blp, mcache_bkt_t *bkt) 1088{ 1089 MCACHE_LOCK(&cp->mc_bkt_lock); 1090 1091 bkt->bkt_next = blp->bl_list; 1092 blp->bl_list = bkt; 1093 blp->bl_total++; 1094 1095 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1096} 1097 1098/* 1099 * Enable the bucket layer of a cache. 1100 */ 1101static void 1102mcache_cache_bkt_enable(mcache_t *cp) 1103{ 1104 mcache_cpu_t *ccp; 1105 int cpu; 1106 1107 if (cp->mc_flags & MCF_NOCPUCACHE) 1108 return; 1109 1110 for (cpu = 0; cpu < ncpu; cpu++) { 1111 ccp = &cp->mc_cpu[cpu]; 1112 MCACHE_LOCK(&ccp->cc_lock); 1113 ccp->cc_bktsize = cp->cache_bkttype->bt_bktsize; 1114 MCACHE_UNLOCK(&ccp->cc_lock); 1115 } 1116} 1117 1118/* 1119 * Purge all buckets from a cache and disable its bucket layer. 1120 */ 1121static void 1122mcache_bkt_purge(mcache_t *cp) 1123{ 1124 mcache_cpu_t *ccp; 1125 mcache_bkt_t *bp, *pbp; 1126 mcache_bkttype_t *btp; 1127 int cpu, objs, pobjs; 1128 1129 for (cpu = 0; cpu < ncpu; cpu++) { 1130 ccp = &cp->mc_cpu[cpu]; 1131 1132 MCACHE_LOCK(&ccp->cc_lock); 1133 1134 btp = cp->cache_bkttype; 1135 bp = ccp->cc_filled; 1136 pbp = ccp->cc_pfilled; 1137 objs = ccp->cc_objs; 1138 pobjs = ccp->cc_pobjs; 1139 ccp->cc_filled = NULL; 1140 ccp->cc_pfilled = NULL; 1141 ccp->cc_objs = -1; 1142 ccp->cc_pobjs = -1; 1143 ccp->cc_bktsize = 0; 1144 1145 MCACHE_UNLOCK(&ccp->cc_lock); 1146 1147 if (bp != NULL) 1148 mcache_bkt_destroy(cp, btp, bp, objs); 1149 if (pbp != NULL) 1150 mcache_bkt_destroy(cp, btp, pbp, pobjs); 1151 } 1152 1153 /* 1154 * Updating the working set back to back essentially sets 1155 * the working set size to zero, so everything is reapable. 1156 */ 1157 mcache_bkt_ws_update(cp); 1158 mcache_bkt_ws_update(cp); 1159 1160 mcache_bkt_ws_reap(cp); 1161} 1162 1163/* 1164 * Free one or more objects in the bucket to the slab layer, 1165 * and also free the bucket itself. 1166 */ 1167static void 1168mcache_bkt_destroy(mcache_t *cp, mcache_bkttype_t *btp, mcache_bkt_t *bkt, 1169 int nobjs) 1170{ 1171 if (nobjs > 0) { 1172 mcache_obj_t *top = bkt->bkt_obj[nobjs - 1]; 1173 1174 if (cp->mc_flags & MCF_DEBUG) { 1175 mcache_obj_t *o = top; 1176 int cnt = 0; 1177 1178 /* 1179 * Verify that the chain of objects in the bucket is 1180 * valid. Any mismatch here means a mistake when the 1181 * object(s) were freed to the CPU layer, so we panic. 1182 */ 1183 while (o != NULL) { 1184 o = o->obj_next; 1185 ++cnt; 1186 } 1187 if (cnt != nobjs) { 1188 panic("mcache_bkt_destroy: %s cp %p corrupted " 1189 "list in bkt %p (nobjs %d actual %d)\n", 1190 cp->mc_name, (void *)cp, (void *)bkt, 1191 nobjs, cnt); 1192 } 1193 } 1194 1195 /* Advise the slab layer to purge the object(s) */ 1196 (*cp->mc_slab_free)(cp->mc_private, top, 1197 (cp->mc_flags & MCF_DEBUG) || cp->mc_purge_cnt); 1198 } 1199 mcache_free(btp->bt_cache, bkt); 1200} 1201 1202/* 1203 * Update the bucket layer working set statistics. 1204 */ 1205static void 1206mcache_bkt_ws_update(mcache_t *cp) 1207{ 1208 MCACHE_LOCK(&cp->mc_bkt_lock); 1209 1210 cp->mc_full.bl_reaplimit = cp->mc_full.bl_min; 1211 cp->mc_full.bl_min = cp->mc_full.bl_total; 1212 cp->mc_empty.bl_reaplimit = cp->mc_empty.bl_min; 1213 cp->mc_empty.bl_min = cp->mc_empty.bl_total; 1214 1215 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1216} 1217 1218/* 1219 * Reap all buckets that are beyond the working set. 1220 */ 1221static void 1222mcache_bkt_ws_reap(mcache_t *cp) 1223{ 1224 long reap; 1225 mcache_bkt_t *bkt; 1226 mcache_bkttype_t *btp; 1227 1228 reap = MIN(cp->mc_full.bl_reaplimit, cp->mc_full.bl_min); 1229 while (reap-- && 1230 (bkt = mcache_bkt_alloc(cp, &cp->mc_full, &btp)) != NULL) 1231 mcache_bkt_destroy(cp, btp, bkt, btp->bt_bktsize); 1232 1233 reap = MIN(cp->mc_empty.bl_reaplimit, cp->mc_empty.bl_min); 1234 while (reap-- && 1235 (bkt = mcache_bkt_alloc(cp, &cp->mc_empty, &btp)) != NULL) 1236 mcache_bkt_destroy(cp, btp, bkt, 0); 1237} 1238 1239static void 1240mcache_reap_timeout(void *arg) 1241{ 1242 volatile UInt32 *flag = arg; 1243 1244 ASSERT(flag == &mcache_reaping); 1245 1246 *flag = 0; 1247} 1248 1249static void 1250mcache_reap_done(void *flag) 1251{ 1252 timeout(mcache_reap_timeout, flag, mcache_reap_interval); 1253} 1254 1255static void 1256mcache_reap_start(void *arg) 1257{ 1258 UInt32 *flag = arg; 1259 1260 ASSERT(flag == &mcache_reaping); 1261 1262 mcache_applyall(mcache_cache_reap); 1263 mcache_dispatch(mcache_reap_done, flag); 1264} 1265 1266__private_extern__ void 1267mcache_reap(void) 1268{ 1269 UInt32 *flag = &mcache_reaping; 1270 1271 if (mcache_llock_owner == current_thread() || 1272 !OSCompareAndSwap(0, 1, flag)) 1273 return; 1274 1275 mcache_dispatch(mcache_reap_start, flag); 1276} 1277 1278static void 1279mcache_cache_reap(mcache_t *cp) 1280{ 1281 mcache_bkt_ws_reap(cp); 1282} 1283 1284/* 1285 * Performs period maintenance on a cache. 1286 */ 1287static void 1288mcache_cache_update(mcache_t *cp) 1289{ 1290 int need_bkt_resize = 0; 1291 int need_bkt_reenable = 0; 1292 1293 lck_mtx_assert(mcache_llock, LCK_MTX_ASSERT_OWNED); 1294 1295 mcache_bkt_ws_update(cp); 1296 1297 /* 1298 * Cache resize and post-purge reenable are mutually exclusive. 1299 * If the cache was previously purged, there is no point of 1300 * increasing the bucket size as there was an indication of 1301 * memory pressure on the system. 1302 */ 1303 lck_mtx_lock_spin(&cp->mc_sync_lock); 1304 if (!(cp->mc_flags & MCF_NOCPUCACHE) && cp->mc_enable_cnt) 1305 need_bkt_reenable = 1; 1306 lck_mtx_unlock(&cp->mc_sync_lock); 1307 1308 MCACHE_LOCK(&cp->mc_bkt_lock); 1309 /* 1310 * If the contention count is greater than the threshold, and if 1311 * we are not already at the maximum bucket size, increase it. 1312 * Otherwise, if this cache was previously purged by the user 1313 * then we simply reenable it. 1314 */ 1315 if ((unsigned int)cp->mc_chunksize < cp->cache_bkttype->bt_maxbuf && 1316 (int)(cp->mc_bkt_contention - cp->mc_bkt_contention_prev) > 1317 mcache_bkt_contention && !need_bkt_reenable) 1318 need_bkt_resize = 1; 1319 1320 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention; 1321 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1322 1323 if (need_bkt_resize) 1324 mcache_dispatch(mcache_cache_bkt_resize, cp); 1325 else if (need_bkt_reenable) 1326 mcache_dispatch(mcache_cache_enable, cp); 1327} 1328 1329/* 1330 * Recompute a cache's bucket size. This is an expensive operation 1331 * and should not be done frequently; larger buckets provide for a 1332 * higher transfer rate with the bucket while smaller buckets reduce 1333 * the memory consumption. 1334 */ 1335static void 1336mcache_cache_bkt_resize(void *arg) 1337{ 1338 mcache_t *cp = arg; 1339 mcache_bkttype_t *btp = cp->cache_bkttype; 1340 1341 if ((unsigned int)cp->mc_chunksize < btp->bt_maxbuf) { 1342 mcache_bkt_purge(cp); 1343 1344 /* 1345 * Upgrade to the next bucket type with larger bucket size; 1346 * temporarily set the previous contention snapshot to a 1347 * negative number to prevent unnecessary resize request. 1348 */ 1349 MCACHE_LOCK(&cp->mc_bkt_lock); 1350 cp->cache_bkttype = ++btp; 1351 cp ->mc_bkt_contention_prev = cp->mc_bkt_contention + INT_MAX; 1352 MCACHE_UNLOCK(&cp->mc_bkt_lock); 1353 1354 mcache_cache_enable(cp); 1355 } 1356} 1357 1358/* 1359 * Reenable a previously disabled cache due to purge. 1360 */ 1361static void 1362mcache_cache_enable(void *arg) 1363{ 1364 mcache_t *cp = arg; 1365 1366 lck_mtx_lock_spin(&cp->mc_sync_lock); 1367 cp->mc_purge_cnt = 0; 1368 cp->mc_enable_cnt = 0; 1369 lck_mtx_unlock(&cp->mc_sync_lock); 1370 1371 mcache_cache_bkt_enable(cp); 1372} 1373 1374static void 1375mcache_update_timeout(__unused void *arg) 1376{ 1377 timeout(mcache_update, NULL, mcache_reap_interval); 1378} 1379 1380static void 1381mcache_update(__unused void *arg) 1382{ 1383 mcache_applyall(mcache_cache_update); 1384 mcache_dispatch(mcache_update_timeout, NULL); 1385} 1386 1387static void 1388mcache_applyall(void (*func)(mcache_t *)) 1389{ 1390 mcache_t *cp; 1391 1392 MCACHE_LIST_LOCK(); 1393 LIST_FOREACH(cp, &mcache_head, mc_list) { 1394 func(cp); 1395 } 1396 MCACHE_LIST_UNLOCK(); 1397} 1398 1399static void 1400mcache_dispatch(void (*func)(void *), void *arg) 1401{ 1402 ASSERT(func != NULL); 1403 timeout(func, arg, hz/1000); 1404} 1405 1406__private_extern__ void 1407mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp) 1408{ 1409 mca->mca_addr = addr; 1410 mca->mca_cache = cp; 1411 mca->mca_pthread = mca->mca_thread; 1412 mca->mca_thread = current_thread(); 1413 bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack)); 1414 mca->mca_pdepth = mca->mca_depth; 1415 bzero(mca->mca_stack, sizeof (mca->mca_stack)); 1416 mca->mca_depth = OSBacktrace(mca->mca_stack, MCACHE_STACK_DEPTH); 1417} 1418 1419__private_extern__ void 1420mcache_set_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1421{ 1422 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1423 u_int64_t *buf = (u_int64_t *)buf_arg; 1424 1425 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1426 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1427 1428 while (buf < buf_end) 1429 *buf++ = pattern; 1430} 1431 1432__private_extern__ void * 1433mcache_verify_pattern(u_int64_t pattern, void *buf_arg, size_t size) 1434{ 1435 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1436 u_int64_t *buf; 1437 1438 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1439 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1440 1441 for (buf = buf_arg; buf < buf_end; buf++) { 1442 if (*buf != pattern) 1443 return (buf); 1444 } 1445 return (NULL); 1446} 1447 1448__private_extern__ void * 1449mcache_verify_set_pattern(u_int64_t old, u_int64_t new, void *buf_arg, 1450 size_t size) 1451{ 1452 u_int64_t *buf_end = (u_int64_t *)((void *)((char *)buf_arg + size)); 1453 u_int64_t *buf; 1454 1455 VERIFY(IS_P2ALIGNED(buf_arg, sizeof (u_int64_t))); 1456 VERIFY(IS_P2ALIGNED(size, sizeof (u_int64_t))); 1457 1458 for (buf = buf_arg; buf < buf_end; buf++) { 1459 if (*buf != old) { 1460 mcache_set_pattern(old, buf_arg, 1461 (uintptr_t)buf - (uintptr_t)buf_arg); 1462 return (buf); 1463 } 1464 *buf = new; 1465 } 1466 return (NULL); 1467} 1468 1469__private_extern__ void 1470mcache_audit_free_verify(mcache_audit_t *mca, void *base, size_t offset, 1471 size_t size) 1472{ 1473 void *addr; 1474 u_int64_t *oaddr64; 1475 mcache_obj_t *next; 1476 1477 addr = (void *)((uintptr_t)base + offset); 1478 next = ((mcache_obj_t *)addr)->obj_next; 1479 1480 /* For the "obj_next" pointer in the buffer */ 1481 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1482 *oaddr64 = MCACHE_FREE_PATTERN; 1483 1484 if ((oaddr64 = mcache_verify_pattern(MCACHE_FREE_PATTERN, 1485 (caddr_t)base, size)) != NULL) { 1486 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1487 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1488 /* NOTREACHED */ 1489 } 1490 ((mcache_obj_t *)addr)->obj_next = next; 1491} 1492 1493__private_extern__ void 1494mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset, 1495 size_t size) 1496{ 1497 void *addr; 1498 u_int64_t *oaddr64; 1499 mcache_obj_t *next; 1500 1501 addr = (void *)((uintptr_t)base + offset); 1502 next = ((mcache_obj_t *)addr)->obj_next; 1503 1504 /* For the "obj_next" pointer in the buffer */ 1505 oaddr64 = (u_int64_t *)P2ROUNDDOWN(addr, sizeof (u_int64_t)); 1506 *oaddr64 = MCACHE_FREE_PATTERN; 1507 1508 if ((oaddr64 = mcache_verify_set_pattern(MCACHE_FREE_PATTERN, 1509 MCACHE_UNINITIALIZED_PATTERN, (caddr_t)base, size)) != NULL) { 1510 mcache_audit_panic(mca, addr, (caddr_t)oaddr64 - (caddr_t)base, 1511 (int64_t)MCACHE_FREE_PATTERN, (int64_t)*oaddr64); 1512 /* NOTREACHED */ 1513 } 1514 ((mcache_obj_t *)addr)->obj_next = next; 1515} 1516 1517#undef panic 1518 1519__private_extern__ char * 1520mcache_dump_mca(mcache_audit_t *mca) 1521{ 1522 if (mca_dump_buf == NULL) 1523 return (NULL); 1524 1525 snprintf(mca_dump_buf, DUMP_MCA_BUF_SIZE, 1526 "mca %p: addr %p, cache %p (%s)\n" 1527 "last transaction; thread %p, saved PC stack (%d deep):\n" 1528 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1529 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1530 "previous transaction; thread %p, saved PC stack (%d deep):\n" 1531 "\t%p, %p, %p, %p, %p, %p, %p, %p\n" 1532 "\t%p, %p, %p, %p, %p, %p, %p, %p\n", 1533 mca, mca->mca_addr, mca->mca_cache, 1534 mca->mca_cache ? mca->mca_cache->mc_name : "?", 1535 mca->mca_thread, mca->mca_depth, 1536 mca->mca_stack[0], mca->mca_stack[1], mca->mca_stack[2], 1537 mca->mca_stack[3], mca->mca_stack[4], mca->mca_stack[5], 1538 mca->mca_stack[6], mca->mca_stack[7], mca->mca_stack[8], 1539 mca->mca_stack[9], mca->mca_stack[10], mca->mca_stack[11], 1540 mca->mca_stack[12], mca->mca_stack[13], mca->mca_stack[14], 1541 mca->mca_stack[15], 1542 mca->mca_pthread, mca->mca_pdepth, 1543 mca->mca_pstack[0], mca->mca_pstack[1], mca->mca_pstack[2], 1544 mca->mca_pstack[3], mca->mca_pstack[4], mca->mca_pstack[5], 1545 mca->mca_pstack[6], mca->mca_pstack[7], mca->mca_pstack[8], 1546 mca->mca_pstack[9], mca->mca_pstack[10], mca->mca_pstack[11], 1547 mca->mca_pstack[12], mca->mca_pstack[13], mca->mca_pstack[14], 1548 mca->mca_pstack[15]); 1549 1550 return (mca_dump_buf); 1551} 1552 1553__private_extern__ void 1554mcache_audit_panic(mcache_audit_t *mca, void *addr, size_t offset, 1555 int64_t expected, int64_t got) 1556{ 1557 if (mca == NULL) { 1558 panic("mcache_audit: buffer %p modified after free at " 1559 "offset 0x%lx (0x%llx instead of 0x%llx)\n", addr, 1560 offset, got, expected); 1561 /* NOTREACHED */ 1562 } 1563 1564 panic("mcache_audit: buffer %p modified after free at offset 0x%lx " 1565 "(0x%llx instead of 0x%llx)\n%s\n", 1566 addr, offset, got, expected, mcache_dump_mca(mca)); 1567 /* NOTREACHED */ 1568} 1569 1570__private_extern__ int 1571assfail(const char *a, const char *f, int l) 1572{ 1573 panic("assertion failed: %s, file: %s, line: %d", a, f, l); 1574 return (0); 1575} 1576