1139825Simp/*-
2251709Sjeff * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
3148078Srwatson * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4163702Srwatson * Copyright (c) 2004-2006 Robert N. M. Watson
5148078Srwatson * All rights reserved.
692654Sjeff *
792654Sjeff * Redistribution and use in source and binary forms, with or without
892654Sjeff * modification, are permitted provided that the following conditions
992654Sjeff * are met:
1092654Sjeff * 1. Redistributions of source code must retain the above copyright
1192654Sjeff *    notice unmodified, this list of conditions, and the following
1292654Sjeff *    disclaimer.
1392654Sjeff * 2. Redistributions in binary form must reproduce the above copyright
1492654Sjeff *    notice, this list of conditions and the following disclaimer in the
1592654Sjeff *    documentation and/or other materials provided with the distribution.
1692654Sjeff *
1792654Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
1892654Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1992654Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2092654Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
2192654Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2292654Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2392654Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2492654Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2592654Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2692654Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2792654Sjeff */
2892654Sjeff
2992654Sjeff/*
3092654Sjeff * uma_core.c  Implementation of the Universal Memory allocator
3192654Sjeff *
3292654Sjeff * This allocator is intended to replace the multitude of similar object caches
3392654Sjeff * in the standard FreeBSD kernel.  The intent is to be flexible as well as
3492654Sjeff * effecient.  A primary design goal is to return unused memory to the rest of
35125246Sdes * the system.  This will make the system as a whole more flexible due to the
3692654Sjeff * ability to move memory to subsystems which most need it instead of leaving
3792654Sjeff * pools of reserved memory unused.
3892654Sjeff *
3992654Sjeff * The basic ideas stem from similar slab/zone based allocators whose algorithms
4092654Sjeff * are well known.
4192654Sjeff *
4292654Sjeff */
4392654Sjeff
4492654Sjeff/*
4592654Sjeff * TODO:
4692654Sjeff *	- Improve memory usage for large allocations
4792654Sjeff *	- Investigate cache size adjustments
4892654Sjeff */
4992654Sjeff
50116226Sobrien#include <sys/cdefs.h>
51116226Sobrien__FBSDID("$FreeBSD$");
52116226Sobrien
5392654Sjeff/* I should really use ktr.. */
5492654Sjeff/*
5592654Sjeff#define UMA_DEBUG 1
5692654Sjeff#define UMA_DEBUG_ALLOC 1
5792654Sjeff#define UMA_DEBUG_ALLOC_1 1
5892654Sjeff*/
5992654Sjeff
60151516Srwatson#include "opt_ddb.h"
6192654Sjeff#include "opt_param.h"
62226313Sglebius#include "opt_vm.h"
63151516Srwatson
6492654Sjeff#include <sys/param.h>
6592654Sjeff#include <sys/systm.h>
66251709Sjeff#include <sys/bitset.h>
6792654Sjeff#include <sys/kernel.h>
6892654Sjeff#include <sys/types.h>
6992654Sjeff#include <sys/queue.h>
7092654Sjeff#include <sys/malloc.h>
71133230Srwatson#include <sys/ktr.h>
7292654Sjeff#include <sys/lock.h>
7392654Sjeff#include <sys/sysctl.h>
7492654Sjeff#include <sys/mutex.h>
7597007Sjhb#include <sys/proc.h>
76248084Sattilio#include <sys/rwlock.h>
77147996Srwatson#include <sys/sbuf.h>
78260303Smav#include <sys/sched.h>
7992654Sjeff#include <sys/smp.h>
8094165Sjeff#include <sys/vmmeter.h>
8192654Sjeff
8292654Sjeff#include <vm/vm.h>
8392654Sjeff#include <vm/vm_object.h>
8492654Sjeff#include <vm/vm_page.h>
85247360Sattilio#include <vm/vm_pageout.h>
8692654Sjeff#include <vm/vm_param.h>
8792654Sjeff#include <vm/vm_map.h>
8892654Sjeff#include <vm/vm_kern.h>
8992654Sjeff#include <vm/vm_extern.h>
9092654Sjeff#include <vm/uma.h>
9192654Sjeff#include <vm/uma_int.h>
9295899Sjeff#include <vm/uma_dbg.h>
9392654Sjeff
94151516Srwatson#include <ddb/ddb.h>
95151516Srwatson
96226313Sglebius#ifdef DEBUG_MEMGUARD
97226313Sglebius#include <vm/memguard.h>
98226313Sglebius#endif
99226313Sglebius
10092654Sjeff/*
101129906Sbmilekic * This is the zone and keg from which all zones are spawned.  The idea is that
102129906Sbmilekic * even the zone & keg heads are allocated from the allocator, so we use the
103129906Sbmilekic * bss section to bootstrap us.
10492654Sjeff */
105129906Sbmilekicstatic struct uma_keg masterkeg;
106129906Sbmilekicstatic struct uma_zone masterzone_k;
107129906Sbmilekicstatic struct uma_zone masterzone_z;
108129906Sbmilekicstatic uma_zone_t kegs = &masterzone_k;
109129906Sbmilekicstatic uma_zone_t zones = &masterzone_z;
11092654Sjeff
11192654Sjeff/* This is the zone from which all of uma_slab_t's are allocated. */
11292654Sjeffstatic uma_zone_t slabzone;
113129906Sbmilekicstatic uma_zone_t slabrefzone;	/* With refcounters (for UMA_ZONE_REFCNT) */
11492654Sjeff
11592654Sjeff/*
11692654Sjeff * The initial hash tables come out of this zone so they can be allocated
11792654Sjeff * prior to malloc coming up.
11892654Sjeff */
11992654Sjeffstatic uma_zone_t hashzone;
12092654Sjeff
121166654Srwatson/* The boot-time adjusted value for cache line alignment. */
122219819Sjeffint uma_align_cache = 64 - 1;
123166654Srwatson
124120221Sjeffstatic MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
125120221Sjeff
12692654Sjeff/*
12794165Sjeff * Are we allowed to allocate buckets?
12894165Sjeff */
12994165Sjeffstatic int bucketdisable = 1;
13094165Sjeff
131129906Sbmilekic/* Linked list of all kegs in the system */
132201145Santoinestatic LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
13392654Sjeff
134260306Smav/* Linked list of all cache-only zones in the system */
135260306Smavstatic LIST_HEAD(,uma_zone) uma_cachezones =
136260306Smav    LIST_HEAD_INITIALIZER(uma_cachezones);
137260306Smav
138129906Sbmilekic/* This mutex protects the keg list */
139251826Sjeffstatic struct mtx_padalign uma_mtx;
14092654Sjeff
14192654Sjeff/* Linked list of boot time pages */
14292654Sjeffstatic LIST_HEAD(,uma_slab) uma_boot_pages =
143201145Santoine    LIST_HEAD_INITIALIZER(uma_boot_pages);
14492654Sjeff
145149900Salc/* This mutex protects the boot time pages list */
146251826Sjeffstatic struct mtx_padalign uma_boot_pages_mtx;
14792654Sjeff
14892654Sjeff/* Is the VM done starting up? */
14992654Sjeffstatic int booted = 0;
150222163Salc#define	UMA_STARTUP	1
151222163Salc#define	UMA_STARTUP2	2
15292654Sjeff
153120262Sjeff/*
154251709Sjeff * Only mbuf clusters use ref zones.  Just provide enough references
155251709Sjeff * to support the one user.  New code should not use the ref facility.
156251709Sjeff */
157251709Sjeffstatic const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES;
158251709Sjeff
159251709Sjeff/*
160120262Sjeff * This is the handle used to schedule events that need to happen
161120262Sjeff * outside of the allocation fast path.
162120262Sjeff */
16392654Sjeffstatic struct callout uma_callout;
164120262Sjeff#define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
16592654Sjeff
16692654Sjeff/*
16792654Sjeff * This structure is passed as the zone ctor arg so that I don't have to create
16892654Sjeff * a special allocation function just for zones.
16992654Sjeff */
17092654Sjeffstruct uma_zctor_args {
171242152Smdf	const char *name;
17295925Sarr	size_t size;
17392654Sjeff	uma_ctor ctor;
17492654Sjeff	uma_dtor dtor;
17592654Sjeff	uma_init uminit;
17692654Sjeff	uma_fini fini;
177251826Sjeff	uma_import import;
178251826Sjeff	uma_release release;
179251826Sjeff	void *arg;
180129906Sbmilekic	uma_keg_t keg;
18192654Sjeff	int align;
182249313Sglebius	uint32_t flags;
18392654Sjeff};
18492654Sjeff
185129906Sbmilekicstruct uma_kctor_args {
186129906Sbmilekic	uma_zone_t zone;
187129906Sbmilekic	size_t size;
188129906Sbmilekic	uma_init uminit;
189129906Sbmilekic	uma_fini fini;
190129906Sbmilekic	int align;
191249313Sglebius	uint32_t flags;
192129906Sbmilekic};
193129906Sbmilekic
194120218Sjeffstruct uma_bucket_zone {
195120218Sjeff	uma_zone_t	ubz_zone;
196120218Sjeff	char		*ubz_name;
197251894Sjeff	int		ubz_entries;	/* Number of items it can hold. */
198251894Sjeff	int		ubz_maxsize;	/* Maximum allocation size per-item. */
199120218Sjeff};
200120218Sjeff
201251894Sjeff/*
202251894Sjeff * Compute the actual number of bucket entries to pack them in power
203251894Sjeff * of two sizes for more efficient space utilization.
204251894Sjeff */
205251894Sjeff#define	BUCKET_SIZE(n)						\
206251894Sjeff    (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
207120218Sjeff
208267751Smav#define	BUCKET_MAX	BUCKET_SIZE(256)
209251894Sjeff
210120218Sjeffstruct uma_bucket_zone bucket_zones[] = {
211252226Sjeff	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
212260301Smav	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
213252226Sjeff	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
214260301Smav	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
215252226Sjeff	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
216251894Sjeff	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
217251894Sjeff	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
218251894Sjeff	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
219267751Smav	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
220120218Sjeff	{ NULL, NULL, 0}
221120218Sjeff};
222120218Sjeff
223137305Srwatson/*
224148070Srwatson * Flags and enumerations to be passed to internal functions.
225148070Srwatson */
226251709Sjeffenum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
227132987Sgreen
22892654Sjeff/* Prototypes.. */
22992654Sjeff
230249313Sglebiusstatic void *noobj_alloc(uma_zone_t, int, uint8_t *, int);
231249313Sglebiusstatic void *page_alloc(uma_zone_t, int, uint8_t *, int);
232249313Sglebiusstatic void *startup_alloc(uma_zone_t, int, uint8_t *, int);
233249313Sglebiusstatic void page_free(void *, int, uint8_t);
234187681Sjeffstatic uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
235120262Sjeffstatic void cache_drain(uma_zone_t);
23692654Sjeffstatic void bucket_drain(uma_zone_t, uma_bucket_t);
237125294Sjeffstatic void bucket_cache_drain(uma_zone_t zone);
238132987Sgreenstatic int keg_ctor(void *, int, void *, int);
239129906Sbmilekicstatic void keg_dtor(void *, int, void *);
240132987Sgreenstatic int zone_ctor(void *, int, void *, int);
24194161Sjeffstatic void zone_dtor(void *, int, void *);
242132987Sgreenstatic int zero_init(void *, int, int);
243187681Sjeffstatic void keg_small_init(uma_keg_t keg);
244187681Sjeffstatic void keg_large_init(uma_keg_t keg);
24592654Sjeffstatic void zone_foreach(void (*zfunc)(uma_zone_t));
24692654Sjeffstatic void zone_timeout(uma_zone_t zone);
24796493Sjeffstatic int hash_alloc(struct uma_hash *);
24896493Sjeffstatic int hash_expand(struct uma_hash *, struct uma_hash *);
24996493Sjeffstatic void hash_free(struct uma_hash *hash);
25092654Sjeffstatic void uma_timeout(void *);
25192654Sjeffstatic void uma_startup3(void);
252187681Sjeffstatic void *zone_alloc_item(uma_zone_t, void *, int);
253251826Sjeffstatic void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
25494165Sjeffstatic void bucket_enable(void);
255120218Sjeffstatic void bucket_init(void);
256252226Sjeffstatic uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
257252226Sjeffstatic void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
258120218Sjeffstatic void bucket_zone_drain(void);
259252226Sjeffstatic uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
260187681Sjeffstatic uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
261187681Sjeffstatic uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
262251826Sjeffstatic void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
263251826Sjeffstatic void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
264187681Sjeffstatic uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
265249313Sglebius    uma_fini fini, int align, uint32_t flags);
266251826Sjeffstatic int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
267251826Sjeffstatic void zone_release(uma_zone_t zone, void **bucket, int cnt);
268262739Sglebiusstatic void uma_zero_item(void *item, uma_zone_t zone);
269105853Sjeff
27092654Sjeffvoid uma_print_zone(uma_zone_t);
27192654Sjeffvoid uma_print_stats(void);
272147996Srwatsonstatic int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
273147996Srwatsonstatic int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
27492654Sjeff
27592654SjeffSYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
27692654Sjeff
277147996SrwatsonSYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
278147996Srwatson    0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
279147996Srwatson
280147996SrwatsonSYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
281147996Srwatson    0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
282147996Srwatson
283243998Spjdstatic int zone_warnings = 1;
284243998SpjdTUNABLE_INT("vm.zone_warnings", &zone_warnings);
285243998SpjdSYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RW, &zone_warnings, 0,
286243998Spjd    "Warn when UMA zones becomes full");
287243998Spjd
28894165Sjeff/*
28994165Sjeff * This routine checks to see whether or not it's safe to enable buckets.
29094165Sjeff */
29194165Sjeffstatic void
29294165Sjeffbucket_enable(void)
29394165Sjeff{
294235854Semax	bucketdisable = vm_page_count_min();
29594165Sjeff}
29694165Sjeff
297137309Srwatson/*
298137309Srwatson * Initialize bucket_zones, the array of zones of buckets of various sizes.
299137309Srwatson *
300137309Srwatson * For each zone, calculate the memory required for each bucket, consisting
301251894Sjeff * of the header and an array of pointers.
302137309Srwatson */
303120218Sjeffstatic void
304120218Sjeffbucket_init(void)
305120218Sjeff{
306120218Sjeff	struct uma_bucket_zone *ubz;
307251894Sjeff	int size;
308120218Sjeff	int i;
30994165Sjeff
310251894Sjeff	for (i = 0, ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
311120218Sjeff		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
312120218Sjeff		size += sizeof(void *) * ubz->ubz_entries;
313120218Sjeff		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
314187681Sjeff		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
315252226Sjeff		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
316120218Sjeff	}
317120218Sjeff}
318120218Sjeff
319137309Srwatson/*
320137309Srwatson * Given a desired number of entries for a bucket, return the zone from which
321137309Srwatson * to allocate the bucket.
322137309Srwatson */
323137309Srwatsonstatic struct uma_bucket_zone *
324137309Srwatsonbucket_zone_lookup(int entries)
325137309Srwatson{
326251894Sjeff	struct uma_bucket_zone *ubz;
327137309Srwatson
328251894Sjeff	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
329251894Sjeff		if (ubz->ubz_entries >= entries)
330251894Sjeff			return (ubz);
331251894Sjeff	ubz--;
332251894Sjeff	return (ubz);
333137309Srwatson}
334137309Srwatson
335251894Sjeffstatic int
336251894Sjeffbucket_select(int size)
337251894Sjeff{
338251894Sjeff	struct uma_bucket_zone *ubz;
339251894Sjeff
340251894Sjeff	ubz = &bucket_zones[0];
341251894Sjeff	if (size > ubz->ubz_maxsize)
342251894Sjeff		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
343251894Sjeff
344251894Sjeff	for (; ubz->ubz_entries != 0; ubz++)
345251894Sjeff		if (ubz->ubz_maxsize < size)
346251894Sjeff			break;
347251894Sjeff	ubz--;
348251894Sjeff	return (ubz->ubz_entries);
349251894Sjeff}
350251894Sjeff
351120218Sjeffstatic uma_bucket_t
352252226Sjeffbucket_alloc(uma_zone_t zone, void *udata, int flags)
353120218Sjeff{
354120218Sjeff	struct uma_bucket_zone *ubz;
355120218Sjeff	uma_bucket_t bucket;
356120218Sjeff
357120218Sjeff	/*
358120218Sjeff	 * This is to stop us from allocating per cpu buckets while we're
359151104Sdes	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
360120218Sjeff	 * boot pages.  This also prevents us from allocating buckets in
361120218Sjeff	 * low memory situations.
362120218Sjeff	 */
363120218Sjeff	if (bucketdisable)
364120218Sjeff		return (NULL);
365252226Sjeff	/*
366252226Sjeff	 * To limit bucket recursion we store the original zone flags
367252226Sjeff	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
368252226Sjeff	 * NOVM flag to persist even through deep recursions.  We also
369252226Sjeff	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
370252226Sjeff	 * a bucket for a bucket zone so we do not allow infinite bucket
371252226Sjeff	 * recursion.  This cookie will even persist to frees of unused
372252226Sjeff	 * buckets via the allocation path or bucket allocations in the
373252226Sjeff	 * free path.
374252226Sjeff	 */
375252226Sjeff	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
376252226Sjeff		udata = (void *)(uintptr_t)zone->uz_flags;
377260280Sglebius	else {
378260280Sglebius		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
379260280Sglebius			return (NULL);
380252226Sjeff		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
381260280Sglebius	}
382252226Sjeff	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
383252040Sjeff		flags |= M_NOVM;
384252040Sjeff	ubz = bucket_zone_lookup(zone->uz_count);
385267750Smav	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
386267750Smav		ubz++;
387252226Sjeff	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
388120218Sjeff	if (bucket) {
389120218Sjeff#ifdef INVARIANTS
390120218Sjeff		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
391120218Sjeff#endif
392120218Sjeff		bucket->ub_cnt = 0;
393120218Sjeff		bucket->ub_entries = ubz->ubz_entries;
394120218Sjeff	}
395120218Sjeff
396120218Sjeff	return (bucket);
397120218Sjeff}
398120218Sjeff
399120218Sjeffstatic void
400252226Sjeffbucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
401120218Sjeff{
402120218Sjeff	struct uma_bucket_zone *ubz;
403120218Sjeff
404251894Sjeff	KASSERT(bucket->ub_cnt == 0,
405251894Sjeff	    ("bucket_free: Freeing a non free bucket."));
406252226Sjeff	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
407252226Sjeff		udata = (void *)(uintptr_t)zone->uz_flags;
408137309Srwatson	ubz = bucket_zone_lookup(bucket->ub_entries);
409252226Sjeff	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
410120218Sjeff}
411120218Sjeff
412120218Sjeffstatic void
413120218Sjeffbucket_zone_drain(void)
414120218Sjeff{
415120218Sjeff	struct uma_bucket_zone *ubz;
416120218Sjeff
417120218Sjeff	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
418120218Sjeff		zone_drain(ubz->ubz_zone);
419120218Sjeff}
420120218Sjeff
421243998Spjdstatic void
422243998Spjdzone_log_warning(uma_zone_t zone)
423243998Spjd{
424243998Spjd	static const struct timeval warninterval = { 300, 0 };
425243998Spjd
426243998Spjd	if (!zone_warnings || zone->uz_warning == NULL)
427243998Spjd		return;
428243998Spjd
429243998Spjd	if (ratecheck(&zone->uz_ratecheck, &warninterval))
430243998Spjd		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
431243998Spjd}
432243998Spjd
433187681Sjeffstatic void
434187681Sjeffzone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
435187681Sjeff{
436187681Sjeff	uma_klink_t klink;
437187681Sjeff
438187681Sjeff	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
439187681Sjeff		kegfn(klink->kl_keg);
440187681Sjeff}
441187681Sjeff
44292654Sjeff/*
44392654Sjeff * Routine called by timeout which is used to fire off some time interval
444120262Sjeff * based calculations.  (stats, hash size, etc.)
44592654Sjeff *
44692654Sjeff * Arguments:
44792654Sjeff *	arg   Unused
448125246Sdes *
44992654Sjeff * Returns:
45092654Sjeff *	Nothing
45192654Sjeff */
45292654Sjeffstatic void
45392654Sjeffuma_timeout(void *unused)
45492654Sjeff{
45594165Sjeff	bucket_enable();
45692654Sjeff	zone_foreach(zone_timeout);
45792654Sjeff
45892654Sjeff	/* Reschedule this event */
459120262Sjeff	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
46092654Sjeff}
46192654Sjeff
46292654Sjeff/*
463120262Sjeff * Routine to perform timeout driven calculations.  This expands the
464120262Sjeff * hashes and does per cpu statistics aggregation.
46592654Sjeff *
466187681Sjeff *  Returns nothing.
46792654Sjeff */
46892654Sjeffstatic void
469187681Sjeffkeg_timeout(uma_keg_t keg)
47092654Sjeff{
47192654Sjeff
472187681Sjeff	KEG_LOCK(keg);
47392654Sjeff	/*
474187681Sjeff	 * Expand the keg hash table.
475125246Sdes	 *
47692654Sjeff	 * This is done if the number of slabs is larger than the hash size.
47792654Sjeff	 * What I'm trying to do here is completely reduce collisions.  This
47892654Sjeff	 * may be a little aggressive.  Should I allow for two collisions max?
47992654Sjeff	 */
480129906Sbmilekic	if (keg->uk_flags & UMA_ZONE_HASH &&
481129906Sbmilekic	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
482103531Sjeff		struct uma_hash newhash;
483103531Sjeff		struct uma_hash oldhash;
484103531Sjeff		int ret;
48594653Sjeff
486103531Sjeff		/*
487125246Sdes		 * This is so involved because allocating and freeing
488187681Sjeff		 * while the keg lock is held will lead to deadlock.
489103531Sjeff		 * I have to do everything in stages and check for
490103531Sjeff		 * races.
491103531Sjeff		 */
492129906Sbmilekic		newhash = keg->uk_hash;
493187681Sjeff		KEG_UNLOCK(keg);
494103531Sjeff		ret = hash_alloc(&newhash);
495187681Sjeff		KEG_LOCK(keg);
496103531Sjeff		if (ret) {
497129906Sbmilekic			if (hash_expand(&keg->uk_hash, &newhash)) {
498129906Sbmilekic				oldhash = keg->uk_hash;
499129906Sbmilekic				keg->uk_hash = newhash;
500103531Sjeff			} else
501103531Sjeff				oldhash = newhash;
502103531Sjeff
503187681Sjeff			KEG_UNLOCK(keg);
504103531Sjeff			hash_free(&oldhash);
505252358Sdavide			return;
50694653Sjeff		}
50792654Sjeff	}
508187681Sjeff	KEG_UNLOCK(keg);
50992654Sjeff}
51092654Sjeff
511187681Sjeffstatic void
512187681Sjeffzone_timeout(uma_zone_t zone)
513187681Sjeff{
514187681Sjeff
515187681Sjeff	zone_foreach_keg(zone, &keg_timeout);
516187681Sjeff}
517187681Sjeff
51892654Sjeff/*
51994653Sjeff * Allocate and zero fill the next sized hash table from the appropriate
52094653Sjeff * backing store.
52194653Sjeff *
52294653Sjeff * Arguments:
52396493Sjeff *	hash  A new hash structure with the old hash size in uh_hashsize
52494653Sjeff *
52594653Sjeff * Returns:
52696493Sjeff *	1 on sucess and 0 on failure.
52794653Sjeff */
528104094Sphkstatic int
52996493Sjeffhash_alloc(struct uma_hash *hash)
53094653Sjeff{
53196493Sjeff	int oldsize;
53294653Sjeff	int alloc;
53394653Sjeff
53496493Sjeff	oldsize = hash->uh_hashsize;
53596493Sjeff
53694653Sjeff	/* We're just going to go to a power of two greater */
53796493Sjeff	if (oldsize)  {
53896493Sjeff		hash->uh_hashsize = oldsize * 2;
53996493Sjeff		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
54096493Sjeff		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
541120221Sjeff		    M_UMAHASH, M_NOWAIT);
54294653Sjeff	} else {
54396493Sjeff		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
544187681Sjeff		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
545111119Simp		    M_WAITOK);
54696493Sjeff		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
54794653Sjeff	}
54896493Sjeff	if (hash->uh_slab_hash) {
54996493Sjeff		bzero(hash->uh_slab_hash, alloc);
55096493Sjeff		hash->uh_hashmask = hash->uh_hashsize - 1;
55196493Sjeff		return (1);
55296493Sjeff	}
55394653Sjeff
55496493Sjeff	return (0);
55594653Sjeff}
55694653Sjeff
55794653Sjeff/*
558120249Sjeff * Expands the hash table for HASH zones.  This is done from zone_timeout
559120249Sjeff * to reduce collisions.  This must not be done in the regular allocation
560120249Sjeff * path, otherwise, we can recurse on the vm while allocating pages.
56192654Sjeff *
56292654Sjeff * Arguments:
563125246Sdes *	oldhash  The hash you want to expand
56496493Sjeff *	newhash  The hash structure for the new table
56592654Sjeff *
56692654Sjeff * Returns:
567125246Sdes *	Nothing
56892654Sjeff *
56992654Sjeff * Discussion:
57092654Sjeff */
57196493Sjeffstatic int
57296493Sjeffhash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
57392654Sjeff{
57492654Sjeff	uma_slab_t slab;
57592654Sjeff	int hval;
57692654Sjeff	int i;
57792654Sjeff
57896493Sjeff	if (!newhash->uh_slab_hash)
57996493Sjeff		return (0);
58092654Sjeff
58196493Sjeff	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
58296493Sjeff		return (0);
58392654Sjeff
58492654Sjeff	/*
58592654Sjeff	 * I need to investigate hash algorithms for resizing without a
58692654Sjeff	 * full rehash.
58792654Sjeff	 */
58892654Sjeff
58996493Sjeff	for (i = 0; i < oldhash->uh_hashsize; i++)
59096493Sjeff		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
59196493Sjeff			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
59296493Sjeff			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
59396493Sjeff			hval = UMA_HASH(newhash, slab->us_data);
59496493Sjeff			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
59596493Sjeff			    slab, us_hlink);
59692654Sjeff		}
59792654Sjeff
59896493Sjeff	return (1);
59992654Sjeff}
60092654Sjeff
60194653Sjeff/*
60294653Sjeff * Free the hash bucket to the appropriate backing store.
60394653Sjeff *
60494653Sjeff * Arguments:
60594653Sjeff *	slab_hash  The hash bucket we're freeing
60694653Sjeff *	hashsize   The number of entries in that hash bucket
60794653Sjeff *
60894653Sjeff * Returns:
60994653Sjeff *	Nothing
61094653Sjeff */
61194161Sjeffstatic void
61296493Sjeffhash_free(struct uma_hash *hash)
61394161Sjeff{
61496493Sjeff	if (hash->uh_slab_hash == NULL)
61596493Sjeff		return;
61696493Sjeff	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
617251826Sjeff		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
61894161Sjeff	else
619120221Sjeff		free(hash->uh_slab_hash, M_UMAHASH);
62094161Sjeff}
62194161Sjeff
62292654Sjeff/*
62392654Sjeff * Frees all outstanding items in a bucket
62492654Sjeff *
62592654Sjeff * Arguments:
62692654Sjeff *	zone   The zone to free to, must be unlocked.
62792654Sjeff *	bucket The free/alloc bucket with items, cpu queue must be locked.
62892654Sjeff *
62992654Sjeff * Returns:
63092654Sjeff *	Nothing
63192654Sjeff */
63292654Sjeff
63392654Sjeffstatic void
63492654Sjeffbucket_drain(uma_zone_t zone, uma_bucket_t bucket)
63592654Sjeff{
636251826Sjeff	int i;
63792654Sjeff
63892654Sjeff	if (bucket == NULL)
63992654Sjeff		return;
64092654Sjeff
641251826Sjeff	if (zone->uz_fini)
642251826Sjeff		for (i = 0; i < bucket->ub_cnt; i++)
643251826Sjeff			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
644251826Sjeff	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
645251826Sjeff	bucket->ub_cnt = 0;
64692654Sjeff}
64792654Sjeff
64892654Sjeff/*
64992654Sjeff * Drains the per cpu caches for a zone.
65092654Sjeff *
651145686Srwatson * NOTE: This may only be called while the zone is being turn down, and not
652145686Srwatson * during normal operation.  This is necessary in order that we do not have
653145686Srwatson * to migrate CPUs to drain the per-CPU caches.
654145686Srwatson *
65592654Sjeff * Arguments:
656118221Sbmilekic *	zone     The zone to drain, must be unlocked.
65792654Sjeff *
65892654Sjeff * Returns:
65992654Sjeff *	Nothing
66092654Sjeff */
66192654Sjeffstatic void
662120262Sjeffcache_drain(uma_zone_t zone)
66392654Sjeff{
66492654Sjeff	uma_cache_t cache;
66592654Sjeff	int cpu;
66692654Sjeff
66792654Sjeff	/*
668145686Srwatson	 * XXX: It is safe to not lock the per-CPU caches, because we're
669145686Srwatson	 * tearing down the zone anyway.  I.e., there will be no further use
670145686Srwatson	 * of the caches at this point.
671145686Srwatson	 *
672145686Srwatson	 * XXX: It would good to be able to assert that the zone is being
673145686Srwatson	 * torn down to prevent improper use of cache_drain().
674145686Srwatson	 *
675145686Srwatson	 * XXX: We lock the zone before passing into bucket_cache_drain() as
676145686Srwatson	 * it is used elsewhere.  Should the tear-down path be made special
677145686Srwatson	 * there in some form?
67892654Sjeff	 */
679209059Sjhb	CPU_FOREACH(cpu) {
68092654Sjeff		cache = &zone->uz_cpu[cpu];
68192654Sjeff		bucket_drain(zone, cache->uc_allocbucket);
68292654Sjeff		bucket_drain(zone, cache->uc_freebucket);
683120262Sjeff		if (cache->uc_allocbucket != NULL)
684252226Sjeff			bucket_free(zone, cache->uc_allocbucket, NULL);
685120262Sjeff		if (cache->uc_freebucket != NULL)
686252226Sjeff			bucket_free(zone, cache->uc_freebucket, NULL);
687120262Sjeff		cache->uc_allocbucket = cache->uc_freebucket = NULL;
68892654Sjeff	}
689125294Sjeff	ZONE_LOCK(zone);
690125294Sjeff	bucket_cache_drain(zone);
691125294Sjeff	ZONE_UNLOCK(zone);
692125294Sjeff}
69392654Sjeff
694260303Smavstatic void
695260303Smavcache_shrink(uma_zone_t zone)
696260303Smav{
697260303Smav
698260303Smav	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
699260303Smav		return;
700260303Smav
701260303Smav	ZONE_LOCK(zone);
702260303Smav	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
703260303Smav	ZONE_UNLOCK(zone);
704260303Smav}
705260303Smav
706260303Smavstatic void
707260303Smavcache_drain_safe_cpu(uma_zone_t zone)
708260303Smav{
709260303Smav	uma_cache_t cache;
710260303Smav	uma_bucket_t b1, b2;
711260303Smav
712260303Smav	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
713260303Smav		return;
714260303Smav
715260303Smav	b1 = b2 = NULL;
716260303Smav	ZONE_LOCK(zone);
717260303Smav	critical_enter();
718260303Smav	cache = &zone->uz_cpu[curcpu];
719260303Smav	if (cache->uc_allocbucket) {
720260303Smav		if (cache->uc_allocbucket->ub_cnt != 0)
721260303Smav			LIST_INSERT_HEAD(&zone->uz_buckets,
722260303Smav			    cache->uc_allocbucket, ub_link);
723260303Smav		else
724260303Smav			b1 = cache->uc_allocbucket;
725260303Smav		cache->uc_allocbucket = NULL;
726260303Smav	}
727260303Smav	if (cache->uc_freebucket) {
728260303Smav		if (cache->uc_freebucket->ub_cnt != 0)
729260303Smav			LIST_INSERT_HEAD(&zone->uz_buckets,
730260303Smav			    cache->uc_freebucket, ub_link);
731260303Smav		else
732260303Smav			b2 = cache->uc_freebucket;
733260303Smav		cache->uc_freebucket = NULL;
734260303Smav	}
735260303Smav	critical_exit();
736260303Smav	ZONE_UNLOCK(zone);
737260303Smav	if (b1)
738260303Smav		bucket_free(zone, b1, NULL);
739260303Smav	if (b2)
740260303Smav		bucket_free(zone, b2, NULL);
741260303Smav}
742260303Smav
743125294Sjeff/*
744260303Smav * Safely drain per-CPU caches of a zone(s) to alloc bucket.
745260303Smav * This is an expensive call because it needs to bind to all CPUs
746260303Smav * one by one and enter a critical section on each of them in order
747260303Smav * to safely access their cache buckets.
748260303Smav * Zone lock must not be held on call this function.
749260303Smav */
750260303Smavstatic void
751260303Smavcache_drain_safe(uma_zone_t zone)
752260303Smav{
753260303Smav	int cpu;
754260303Smav
755260303Smav	/*
756260303Smav	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
757260303Smav	 */
758260303Smav	if (zone)
759260303Smav		cache_shrink(zone);
760260303Smav	else
761260303Smav		zone_foreach(cache_shrink);
762260303Smav
763260303Smav	CPU_FOREACH(cpu) {
764260303Smav		thread_lock(curthread);
765260303Smav		sched_bind(curthread, cpu);
766260303Smav		thread_unlock(curthread);
767260303Smav
768260303Smav		if (zone)
769260303Smav			cache_drain_safe_cpu(zone);
770260303Smav		else
771260303Smav			zone_foreach(cache_drain_safe_cpu);
772260303Smav	}
773260303Smav	thread_lock(curthread);
774260303Smav	sched_unbind(curthread);
775260303Smav	thread_unlock(curthread);
776260303Smav}
777260303Smav
778260303Smav/*
779125294Sjeff * Drain the cached buckets from a zone.  Expects a locked zone on entry.
780125294Sjeff */
781125294Sjeffstatic void
782125294Sjeffbucket_cache_drain(uma_zone_t zone)
783125294Sjeff{
784125294Sjeff	uma_bucket_t bucket;
785125294Sjeff
78692654Sjeff	/*
78792654Sjeff	 * Drain the bucket queues and free the buckets, we just keep two per
78892654Sjeff	 * cpu (alloc/free).
78992654Sjeff	 */
790251894Sjeff	while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
79192654Sjeff		LIST_REMOVE(bucket, ub_link);
79292654Sjeff		ZONE_UNLOCK(zone);
79392654Sjeff		bucket_drain(zone, bucket);
794252226Sjeff		bucket_free(zone, bucket, NULL);
79592654Sjeff		ZONE_LOCK(zone);
79692654Sjeff	}
797260300Smav
798260300Smav	/*
799260300Smav	 * Shrink further bucket sizes.  Price of single zone lock collision
800260300Smav	 * is probably lower then price of global cache drain.
801260300Smav	 */
802260300Smav	if (zone->uz_count > zone->uz_count_min)
803260300Smav		zone->uz_count--;
804251894Sjeff}
80592654Sjeff
806251894Sjeffstatic void
807251894Sjeffkeg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
808251894Sjeff{
809251894Sjeff	uint8_t *mem;
810251894Sjeff	int i;
811251894Sjeff	uint8_t flags;
812251894Sjeff
813251894Sjeff	mem = slab->us_data;
814251894Sjeff	flags = slab->us_flags;
815251894Sjeff	i = start;
816251894Sjeff	if (keg->uk_fini != NULL) {
817251894Sjeff		for (i--; i > -1; i--)
818251894Sjeff			keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
819251894Sjeff			    keg->uk_size);
82092654Sjeff	}
821251894Sjeff	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
822251894Sjeff		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
823251894Sjeff#ifdef UMA_DEBUG
824251894Sjeff	printf("%s: Returning %d bytes.\n", keg->uk_name,
825251894Sjeff	    PAGE_SIZE * keg->uk_ppera);
826251894Sjeff#endif
827251894Sjeff	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
82892654Sjeff}
82992654Sjeff
83092654Sjeff/*
831187681Sjeff * Frees pages from a keg back to the system.  This is done on demand from
83292654Sjeff * the pageout daemon.
83392654Sjeff *
834187681Sjeff * Returns nothing.
83592654Sjeff */
836187681Sjeffstatic void
837187681Sjeffkeg_drain(uma_keg_t keg)
83892654Sjeff{
839139996Sstefanf	struct slabhead freeslabs = { 0 };
84092654Sjeff	uma_slab_t slab;
84192654Sjeff	uma_slab_t n;
84292654Sjeff
84392654Sjeff	/*
844187681Sjeff	 * We don't want to take pages from statically allocated kegs at this
84592654Sjeff	 * time
84692654Sjeff	 */
847129906Sbmilekic	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
84892654Sjeff		return;
84992654Sjeff
85092654Sjeff#ifdef UMA_DEBUG
851187681Sjeff	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
85292654Sjeff#endif
853187681Sjeff	KEG_LOCK(keg);
854129906Sbmilekic	if (keg->uk_free == 0)
85592654Sjeff		goto finished;
85692654Sjeff
857129906Sbmilekic	slab = LIST_FIRST(&keg->uk_free_slab);
858120262Sjeff	while (slab) {
85992654Sjeff		n = LIST_NEXT(slab, us_link);
86092654Sjeff
86192654Sjeff		/* We have no where to free these to */
86292654Sjeff		if (slab->us_flags & UMA_SLAB_BOOT) {
86392654Sjeff			slab = n;
86492654Sjeff			continue;
86592654Sjeff		}
86692654Sjeff
86792654Sjeff		LIST_REMOVE(slab, us_link);
868129906Sbmilekic		keg->uk_pages -= keg->uk_ppera;
869129906Sbmilekic		keg->uk_free -= keg->uk_ipers;
87096496Sjeff
871129906Sbmilekic		if (keg->uk_flags & UMA_ZONE_HASH)
872129906Sbmilekic			UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
87396496Sjeff
87496496Sjeff		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
87596496Sjeff
87696496Sjeff		slab = n;
87796496Sjeff	}
87896496Sjefffinished:
879187681Sjeff	KEG_UNLOCK(keg);
88096496Sjeff
88196496Sjeff	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
88296496Sjeff		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
883255097Smckusick		keg_free_slab(keg, slab, keg->uk_ipers);
88492654Sjeff	}
88592654Sjeff}
88692654Sjeff
887187681Sjeffstatic void
888187681Sjeffzone_drain_wait(uma_zone_t zone, int waitok)
889187681Sjeff{
890187681Sjeff
891187681Sjeff	/*
892187681Sjeff	 * Set draining to interlock with zone_dtor() so we can release our
893187681Sjeff	 * locks as we go.  Only dtor() should do a WAITOK call since it
894187681Sjeff	 * is the only call that knows the structure will still be available
895187681Sjeff	 * when it wakes up.
896187681Sjeff	 */
897187681Sjeff	ZONE_LOCK(zone);
898187681Sjeff	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
899187681Sjeff		if (waitok == M_NOWAIT)
900187681Sjeff			goto out;
901187681Sjeff		mtx_unlock(&uma_mtx);
902252040Sjeff		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
903187681Sjeff		mtx_lock(&uma_mtx);
904187681Sjeff	}
905187681Sjeff	zone->uz_flags |= UMA_ZFLAG_DRAINING;
906187681Sjeff	bucket_cache_drain(zone);
907187681Sjeff	ZONE_UNLOCK(zone);
908187681Sjeff	/*
909187681Sjeff	 * The DRAINING flag protects us from being freed while
910187681Sjeff	 * we're running.  Normally the uma_mtx would protect us but we
911187681Sjeff	 * must be able to release and acquire the right lock for each keg.
912187681Sjeff	 */
913187681Sjeff	zone_foreach_keg(zone, &keg_drain);
914187681Sjeff	ZONE_LOCK(zone);
915187681Sjeff	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
916187681Sjeff	wakeup(zone);
917187681Sjeffout:
918187681Sjeff	ZONE_UNLOCK(zone);
919187681Sjeff}
920187681Sjeff
921187681Sjeffvoid
922187681Sjeffzone_drain(uma_zone_t zone)
923187681Sjeff{
924187681Sjeff
925187681Sjeff	zone_drain_wait(zone, M_NOWAIT);
926187681Sjeff}
927187681Sjeff
92892654Sjeff/*
929187681Sjeff * Allocate a new slab for a keg.  This does not insert the slab onto a list.
93092654Sjeff *
93192654Sjeff * Arguments:
93292654Sjeff *	wait  Shall we wait?
93392654Sjeff *
93492654Sjeff * Returns:
93592654Sjeff *	The slab that was allocated or NULL if there is no memory and the
93692654Sjeff *	caller specified M_NOWAIT.
93792654Sjeff */
938125246Sdesstatic uma_slab_t
939187681Sjeffkeg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
94092654Sjeff{
941129906Sbmilekic	uma_slabrefcnt_t slabref;
942187681Sjeff	uma_alloc allocf;
943129906Sbmilekic	uma_slab_t slab;
944249313Sglebius	uint8_t *mem;
945249313Sglebius	uint8_t flags;
94692654Sjeff	int i;
94792654Sjeff
948187681Sjeff	mtx_assert(&keg->uk_lock, MA_OWNED);
94994159Sjeff	slab = NULL;
950251894Sjeff	mem = NULL;
95194159Sjeff
95292654Sjeff#ifdef UMA_DEBUG
953251826Sjeff	printf("alloc_slab:  Allocating a new slab for %s\n", keg->uk_name);
95492654Sjeff#endif
955187681Sjeff	allocf = keg->uk_allocf;
956187681Sjeff	KEG_UNLOCK(keg);
95794159Sjeff
958129906Sbmilekic	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
959187681Sjeff		slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
960251894Sjeff		if (slab == NULL)
961251894Sjeff			goto out;
96294159Sjeff	}
96394159Sjeff
96498451Sjeff	/*
96598451Sjeff	 * This reproduces the old vm_zone behavior of zero filling pages the
96698451Sjeff	 * first time they are added to a zone.
96798451Sjeff	 *
96898451Sjeff	 * Malloced items are zeroed in uma_zalloc.
96998451Sjeff	 */
97098451Sjeff
971129906Sbmilekic	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
97298451Sjeff		wait |= M_ZERO;
97398451Sjeff	else
97498451Sjeff		wait &= ~M_ZERO;
97598451Sjeff
976230623Skmacy	if (keg->uk_flags & UMA_ZONE_NODUMP)
977230623Skmacy		wait |= M_NODUMP;
978230623Skmacy
979187681Sjeff	/* zone is passed for legacy reasons. */
980249264Sglebius	mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
981120224Sjeff	if (mem == NULL) {
982132987Sgreen		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
983251826Sjeff			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
984251894Sjeff		slab = NULL;
985251894Sjeff		goto out;
98692654Sjeff	}
98792654Sjeff
98898822Sjeff	/* Point the slab into the allocated memory */
989129906Sbmilekic	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
990129906Sbmilekic		slab = (uma_slab_t )(mem + keg->uk_pgoff);
99198822Sjeff
992187681Sjeff	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
993129906Sbmilekic		for (i = 0; i < keg->uk_ppera; i++)
994103531Sjeff			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
99592654Sjeff
996129906Sbmilekic	slab->us_keg = keg;
99792654Sjeff	slab->us_data = mem;
998129906Sbmilekic	slab->us_freecount = keg->uk_ipers;
99992654Sjeff	slab->us_flags = flags;
1000251709Sjeff	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1001251709Sjeff#ifdef INVARIANTS
1002251709Sjeff	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1003251709Sjeff#endif
1004129906Sbmilekic	if (keg->uk_flags & UMA_ZONE_REFCNT) {
1005129906Sbmilekic		slabref = (uma_slabrefcnt_t)slab;
1006129906Sbmilekic		for (i = 0; i < keg->uk_ipers; i++)
1007251709Sjeff			slabref->us_refcnt[i] = 0;
1008129906Sbmilekic	}
1009129906Sbmilekic
1010132987Sgreen	if (keg->uk_init != NULL) {
1011129906Sbmilekic		for (i = 0; i < keg->uk_ipers; i++)
1012132987Sgreen			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1013132987Sgreen			    keg->uk_size, wait) != 0)
1014132987Sgreen				break;
1015132987Sgreen		if (i != keg->uk_ipers) {
1016251894Sjeff			keg_free_slab(keg, slab, i);
1017251894Sjeff			slab = NULL;
1018251894Sjeff			goto out;
1019132987Sgreen		}
1020132987Sgreen	}
1021251894Sjeffout:
1022187681Sjeff	KEG_LOCK(keg);
102392654Sjeff
1024251894Sjeff	if (slab != NULL) {
1025251894Sjeff		if (keg->uk_flags & UMA_ZONE_HASH)
1026251894Sjeff			UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
102798822Sjeff
1028251894Sjeff		keg->uk_pages += keg->uk_ppera;
1029251894Sjeff		keg->uk_free += keg->uk_ipers;
1030251894Sjeff	}
103192654Sjeff
103292654Sjeff	return (slab);
103392654Sjeff}
103492654Sjeff
103592654Sjeff/*
1036120311Sjeff * This function is intended to be used early on in place of page_alloc() so
1037120311Sjeff * that we may use the boot time page cache to satisfy allocations before
1038120311Sjeff * the VM is ready.
1039120311Sjeff */
1040120311Sjeffstatic void *
1041249313Sglebiusstartup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
1042120311Sjeff{
1043129906Sbmilekic	uma_keg_t keg;
1044149900Salc	uma_slab_t tmps;
1045214782Sjhb	int pages, check_pages;
1046129906Sbmilekic
1047187681Sjeff	keg = zone_first_keg(zone);
1048214782Sjhb	pages = howmany(bytes, PAGE_SIZE);
1049214782Sjhb	check_pages = pages - 1;
1050214782Sjhb	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1051129906Sbmilekic
1052120311Sjeff	/*
1053120311Sjeff	 * Check our small startup cache to see if it has pages remaining.
1054120311Sjeff	 */
1055149900Salc	mtx_lock(&uma_boot_pages_mtx);
1056214782Sjhb
1057214782Sjhb	/* First check if we have enough room. */
1058214782Sjhb	tmps = LIST_FIRST(&uma_boot_pages);
1059214782Sjhb	while (tmps != NULL && check_pages-- > 0)
1060214782Sjhb		tmps = LIST_NEXT(tmps, us_link);
1061214782Sjhb	if (tmps != NULL) {
1062214782Sjhb		/*
1063214782Sjhb		 * It's ok to lose tmps references.  The last one will
1064214782Sjhb		 * have tmps->us_data pointing to the start address of
1065214782Sjhb		 * "pages" contiguous pages of memory.
1066214782Sjhb		 */
1067214782Sjhb		while (pages-- > 0) {
1068214782Sjhb			tmps = LIST_FIRST(&uma_boot_pages);
1069214782Sjhb			LIST_REMOVE(tmps, us_link);
1070214782Sjhb		}
1071149900Salc		mtx_unlock(&uma_boot_pages_mtx);
1072120311Sjeff		*pflag = tmps->us_flags;
1073120311Sjeff		return (tmps->us_data);
1074120311Sjeff	}
1075149900Salc	mtx_unlock(&uma_boot_pages_mtx);
1076222163Salc	if (booted < UMA_STARTUP2)
1077151104Sdes		panic("UMA: Increase vm.boot_pages");
1078120311Sjeff	/*
1079120311Sjeff	 * Now that we've booted reset these users to their real allocator.
1080120311Sjeff	 */
1081120311Sjeff#ifdef UMA_MD_SMALL_ALLOC
1082214782Sjhb	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1083120311Sjeff#else
1084129906Sbmilekic	keg->uk_allocf = page_alloc;
1085120311Sjeff#endif
1086129906Sbmilekic	return keg->uk_allocf(zone, bytes, pflag, wait);
1087120311Sjeff}
1088120311Sjeff
1089120311Sjeff/*
109092654Sjeff * Allocates a number of pages from the system
109192654Sjeff *
109292654Sjeff * Arguments:
109392654Sjeff *	bytes  The number of bytes requested
109492654Sjeff *	wait  Shall we wait?
109592654Sjeff *
109692654Sjeff * Returns:
1097125246Sdes *	A pointer to the alloced memory or possibly
109892654Sjeff *	NULL if M_NOWAIT is set.
109992654Sjeff */
110092654Sjeffstatic void *
1101249313Sglebiuspage_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
110292654Sjeff{
110392654Sjeff	void *p;	/* Returned page */
110492654Sjeff
110598451Sjeff	*pflag = UMA_SLAB_KMEM;
1106254025Sjeff	p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1107125246Sdes
110892654Sjeff	return (p);
110992654Sjeff}
111092654Sjeff
111192654Sjeff/*
111292654Sjeff * Allocates a number of pages from within an object
111392654Sjeff *
111492654Sjeff * Arguments:
111592654Sjeff *	bytes  The number of bytes requested
111692654Sjeff *	wait   Shall we wait?
111792654Sjeff *
111892654Sjeff * Returns:
1119125246Sdes *	A pointer to the alloced memory or possibly
112092654Sjeff *	NULL if M_NOWAIT is set.
112192654Sjeff */
112292654Sjeffstatic void *
1123249313Sglebiusnoobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
112492654Sjeff{
1125247360Sattilio	TAILQ_HEAD(, vm_page) alloctail;
1126247360Sattilio	u_long npages;
1127118380Salc	vm_offset_t retkva, zkva;
1128247360Sattilio	vm_page_t p, p_next;
1129187681Sjeff	uma_keg_t keg;
113092654Sjeff
1131247360Sattilio	TAILQ_INIT(&alloctail);
1132187681Sjeff	keg = zone_first_keg(zone);
113392654Sjeff
1134247360Sattilio	npages = howmany(bytes, PAGE_SIZE);
1135247360Sattilio	while (npages > 0) {
1136247360Sattilio		p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1137247360Sattilio		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1138247360Sattilio		if (p != NULL) {
1139247360Sattilio			/*
1140247360Sattilio			 * Since the page does not belong to an object, its
1141247360Sattilio			 * listq is unused.
1142247360Sattilio			 */
1143247360Sattilio			TAILQ_INSERT_TAIL(&alloctail, p, listq);
1144247360Sattilio			npages--;
1145247360Sattilio			continue;
1146118380Salc		}
1147247360Sattilio		if (wait & M_WAITOK) {
1148247360Sattilio			VM_WAIT;
1149247360Sattilio			continue;
1150247360Sattilio		}
1151247360Sattilio
1152247360Sattilio		/*
1153247360Sattilio		 * Page allocation failed, free intermediate pages and
1154247360Sattilio		 * exit.
1155247360Sattilio		 */
1156247360Sattilio		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1157247360Sattilio			vm_page_unwire(p, 0);
1158247360Sattilio			vm_page_free(p);
1159247360Sattilio		}
1160247360Sattilio		return (NULL);
1161247360Sattilio	}
1162247360Sattilio	*flags = UMA_SLAB_PRIV;
1163247360Sattilio	zkva = keg->uk_kva +
1164247360Sattilio	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1165247360Sattilio	retkva = zkva;
1166247360Sattilio	TAILQ_FOREACH(p, &alloctail, listq) {
1167118380Salc		pmap_qenter(zkva, &p, 1);
1168118380Salc		zkva += PAGE_SIZE;
116992654Sjeff	}
117092654Sjeff
117192654Sjeff	return ((void *)retkva);
117292654Sjeff}
117392654Sjeff
117492654Sjeff/*
117592654Sjeff * Frees a number of pages to the system
1176125246Sdes *
117792654Sjeff * Arguments:
117892654Sjeff *	mem   A pointer to the memory to be freed
117992654Sjeff *	size  The size of the memory being freed
118092654Sjeff *	flags The original p->us_flags field
118192654Sjeff *
118292654Sjeff * Returns:
118392654Sjeff *	Nothing
118492654Sjeff */
118592654Sjeffstatic void
1186249313Sglebiuspage_free(void *mem, int size, uint8_t flags)
118792654Sjeff{
1188254025Sjeff	struct vmem *vmem;
118998451Sjeff
119092654Sjeff	if (flags & UMA_SLAB_KMEM)
1191254025Sjeff		vmem = kmem_arena;
1192194429Salc	else if (flags & UMA_SLAB_KERNEL)
1193254025Sjeff		vmem = kernel_arena;
119492654Sjeff	else
1195194429Salc		panic("UMA: page_free used with invalid flags %d", flags);
119692654Sjeff
1197254025Sjeff	kmem_free(vmem, (vm_offset_t)mem, size);
119892654Sjeff}
119992654Sjeff
120092654Sjeff/*
120192654Sjeff * Zero fill initializer
120292654Sjeff *
120392654Sjeff * Arguments/Returns follow uma_init specifications
120492654Sjeff */
1205132987Sgreenstatic int
1206132987Sgreenzero_init(void *mem, int size, int flags)
120792654Sjeff{
120892654Sjeff	bzero(mem, size);
1209132987Sgreen	return (0);
121092654Sjeff}
121192654Sjeff
121292654Sjeff/*
1213187681Sjeff * Finish creating a small uma keg.  This calculates ipers, and the keg size.
121492654Sjeff *
121592654Sjeff * Arguments
1216187681Sjeff *	keg  The zone we should initialize
121792654Sjeff *
121892654Sjeff * Returns
121992654Sjeff *	Nothing
122092654Sjeff */
122192654Sjeffstatic void
1222187681Sjeffkeg_small_init(uma_keg_t keg)
122392654Sjeff{
1224132842Sbmilekic	u_int rsize;
1225132842Sbmilekic	u_int memused;
1226132842Sbmilekic	u_int wastedspace;
1227132842Sbmilekic	u_int shsize;
122892654Sjeff
1229249264Sglebius	if (keg->uk_flags & UMA_ZONE_PCPU) {
1230253565Sglebius		u_int ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
1231253565Sglebius
1232249264Sglebius		keg->uk_slabsize = sizeof(struct pcpu);
1233253565Sglebius		keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1234249264Sglebius		    PAGE_SIZE);
1235249264Sglebius	} else {
1236249264Sglebius		keg->uk_slabsize = UMA_SLAB_SIZE;
1237249264Sglebius		keg->uk_ppera = 1;
1238249264Sglebius	}
1239249264Sglebius
1240251709Sjeff	/*
1241251709Sjeff	 * Calculate the size of each allocation (rsize) according to
1242251709Sjeff	 * alignment.  If the requested size is smaller than we have
1243251709Sjeff	 * allocation bits for we round it up.
1244251709Sjeff	 */
1245129906Sbmilekic	rsize = keg->uk_size;
1246251709Sjeff	if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
1247251709Sjeff		rsize = keg->uk_slabsize / SLAB_SETSIZE;
1248129906Sbmilekic	if (rsize & keg->uk_align)
1249129906Sbmilekic		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1250129906Sbmilekic	keg->uk_rsize = rsize;
125192654Sjeff
1252249264Sglebius	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1253249264Sglebius	    keg->uk_rsize < sizeof(struct pcpu),
1254249264Sglebius	    ("%s: size %u too large", __func__, keg->uk_rsize));
1255249264Sglebius
1256251709Sjeff	if (keg->uk_flags & UMA_ZONE_REFCNT)
1257251709Sjeff		rsize += sizeof(uint32_t);
1258251709Sjeff
1259251709Sjeff	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1260240676Sglebius		shsize = 0;
1261251709Sjeff	else
1262132842Sbmilekic		shsize = sizeof(struct uma_slab);
126392654Sjeff
1264249264Sglebius	keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
1265251709Sjeff	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1266249264Sglebius	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1267249264Sglebius
1268132842Sbmilekic	memused = keg->uk_ipers * rsize + shsize;
1269249264Sglebius	wastedspace = keg->uk_slabsize - memused;
1270132842Sbmilekic
1271132842Sbmilekic	/*
1272132842Sbmilekic	 * We can't do OFFPAGE if we're internal or if we've been
1273132842Sbmilekic	 * asked to not go to the VM for buckets.  If we do this we
1274252226Sjeff	 * may end up going to the VM  for slabs which we do not
1275252226Sjeff	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1276252226Sjeff	 * of UMA_ZONE_VM, which clearly forbids it.
1277132842Sbmilekic	 */
1278132842Sbmilekic	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1279132842Sbmilekic	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1280132842Sbmilekic		return;
1281132842Sbmilekic
1282251709Sjeff	/*
1283251709Sjeff	 * See if using an OFFPAGE slab will limit our waste.  Only do
1284251709Sjeff	 * this if it permits more items per-slab.
1285251709Sjeff	 *
1286251709Sjeff	 * XXX We could try growing slabsize to limit max waste as well.
1287251709Sjeff	 * Historically this was not done because the VM could not
1288251709Sjeff	 * efficiently handle contiguous allocations.
1289251709Sjeff	 */
1290249264Sglebius	if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
1291249264Sglebius	    (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
1292249264Sglebius		keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
1293251709Sjeff		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1294249264Sglebius		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1295132842Sbmilekic#ifdef UMA_DEBUG
1296132842Sbmilekic		printf("UMA decided we need offpage slab headers for "
1297187681Sjeff		    "keg: %s, calculated wastedspace = %d, "
1298132842Sbmilekic		    "maximum wasted space allowed = %d, "
1299132842Sbmilekic		    "calculated ipers = %d, "
1300187681Sjeff		    "new wasted space = %d\n", keg->uk_name, wastedspace,
1301249264Sglebius		    keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1302249264Sglebius		    keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
1303132842Sbmilekic#endif
1304132842Sbmilekic		keg->uk_flags |= UMA_ZONE_OFFPAGE;
130592654Sjeff	}
1306249264Sglebius
1307249264Sglebius	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1308249264Sglebius	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1309249264Sglebius		keg->uk_flags |= UMA_ZONE_HASH;
131092654Sjeff}
131192654Sjeff
131292654Sjeff/*
1313187681Sjeff * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
131492654Sjeff * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
131592654Sjeff * more complicated.
131692654Sjeff *
131792654Sjeff * Arguments
1318187681Sjeff *	keg  The keg we should initialize
131992654Sjeff *
132092654Sjeff * Returns
132192654Sjeff *	Nothing
132292654Sjeff */
132392654Sjeffstatic void
1324187681Sjeffkeg_large_init(uma_keg_t keg)
1325125246Sdes{
1326260305Smav	u_int shsize;
132792654Sjeff
1328187681Sjeff	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1329129906Sbmilekic	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1330187681Sjeff	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1331249264Sglebius	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1332249264Sglebius	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1333118795Sbmilekic
1334249264Sglebius	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1335249264Sglebius	keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
1336129906Sbmilekic	keg->uk_ipers = 1;
1337214782Sjhb	keg->uk_rsize = keg->uk_size;
133892654Sjeff
1339214782Sjhb	/* We can't do OFFPAGE if we're internal, bail out here. */
1340214782Sjhb	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1341214782Sjhb		return;
1342214782Sjhb
1343260305Smav	/* Check whether we have enough space to not do OFFPAGE. */
1344260305Smav	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1345260305Smav		shsize = sizeof(struct uma_slab);
1346260305Smav		if (keg->uk_flags & UMA_ZONE_REFCNT)
1347260305Smav			shsize += keg->uk_ipers * sizeof(uint32_t);
1348260305Smav		if (shsize & UMA_ALIGN_PTR)
1349260305Smav			shsize = (shsize & ~UMA_ALIGN_PTR) +
1350260305Smav			    (UMA_ALIGN_PTR + 1);
1351260305Smav
1352260305Smav		if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
1353260305Smav			keg->uk_flags |= UMA_ZONE_OFFPAGE;
1354260305Smav	}
1355260305Smav
1356260305Smav	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1357260305Smav	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1358129906Sbmilekic		keg->uk_flags |= UMA_ZONE_HASH;
135992654Sjeff}
136092654Sjeff
1361187681Sjeffstatic void
1362187681Sjeffkeg_cachespread_init(uma_keg_t keg)
1363187681Sjeff{
1364187681Sjeff	int alignsize;
1365187681Sjeff	int trailer;
1366187681Sjeff	int pages;
1367187681Sjeff	int rsize;
1368187681Sjeff
1369249264Sglebius	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1370249264Sglebius	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1371249264Sglebius
1372187681Sjeff	alignsize = keg->uk_align + 1;
1373187681Sjeff	rsize = keg->uk_size;
1374187681Sjeff	/*
1375187681Sjeff	 * We want one item to start on every align boundary in a page.  To
1376187681Sjeff	 * do this we will span pages.  We will also extend the item by the
1377187681Sjeff	 * size of align if it is an even multiple of align.  Otherwise, it
1378187681Sjeff	 * would fall on the same boundary every time.
1379187681Sjeff	 */
1380187681Sjeff	if (rsize & keg->uk_align)
1381187681Sjeff		rsize = (rsize & ~keg->uk_align) + alignsize;
1382187681Sjeff	if ((rsize & alignsize) == 0)
1383187681Sjeff		rsize += alignsize;
1384187681Sjeff	trailer = rsize - keg->uk_size;
1385187681Sjeff	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1386187681Sjeff	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1387187681Sjeff	keg->uk_rsize = rsize;
1388187681Sjeff	keg->uk_ppera = pages;
1389249264Sglebius	keg->uk_slabsize = UMA_SLAB_SIZE;
1390187681Sjeff	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1391187681Sjeff	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1392262127Sdim	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1393239710Sglebius	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1394187681Sjeff	    keg->uk_ipers));
1395187681Sjeff}
1396187681Sjeff
1397125246Sdes/*
1398129906Sbmilekic * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1399129906Sbmilekic * the keg onto the global keg list.
140092654Sjeff *
140192654Sjeff * Arguments/Returns follow uma_ctor specifications
1402129906Sbmilekic *	udata  Actually uma_kctor_args
140392654Sjeff */
1404132987Sgreenstatic int
1405132987Sgreenkeg_ctor(void *mem, int size, void *udata, int flags)
140692654Sjeff{
1407129906Sbmilekic	struct uma_kctor_args *arg = udata;
1408129906Sbmilekic	uma_keg_t keg = mem;
1409129906Sbmilekic	uma_zone_t zone;
141092654Sjeff
1411129906Sbmilekic	bzero(keg, size);
1412129906Sbmilekic	keg->uk_size = arg->size;
1413129906Sbmilekic	keg->uk_init = arg->uminit;
1414129906Sbmilekic	keg->uk_fini = arg->fini;
1415129906Sbmilekic	keg->uk_align = arg->align;
1416129906Sbmilekic	keg->uk_free = 0;
1417252226Sjeff	keg->uk_reserve = 0;
1418129906Sbmilekic	keg->uk_pages = 0;
1419129906Sbmilekic	keg->uk_flags = arg->flags;
1420129906Sbmilekic	keg->uk_allocf = page_alloc;
1421129906Sbmilekic	keg->uk_freef = page_free;
1422129906Sbmilekic	keg->uk_slabzone = NULL;
142392654Sjeff
1424129906Sbmilekic	/*
1425129906Sbmilekic	 * The master zone is passed to us at keg-creation time.
1426129906Sbmilekic	 */
1427129906Sbmilekic	zone = arg->zone;
1428187681Sjeff	keg->uk_name = zone->uz_name;
142992654Sjeff
143098361Sjeff	if (arg->flags & UMA_ZONE_VM)
1431129906Sbmilekic		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
143298361Sjeff
1433129906Sbmilekic	if (arg->flags & UMA_ZONE_ZINIT)
1434129906Sbmilekic		keg->uk_init = zero_init;
1435129906Sbmilekic
1436187681Sjeff	if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1437187681Sjeff		keg->uk_flags |= UMA_ZONE_VTOSLAB;
1438187681Sjeff
1439249264Sglebius	if (arg->flags & UMA_ZONE_PCPU)
1440249264Sglebius#ifdef SMP
1441249264Sglebius		keg->uk_flags |= UMA_ZONE_OFFPAGE;
1442249264Sglebius#else
1443249264Sglebius		keg->uk_flags &= ~UMA_ZONE_PCPU;
1444249264Sglebius#endif
1445249264Sglebius
1446251709Sjeff	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1447251709Sjeff		keg_cachespread_init(keg);
1448251709Sjeff	} else if (keg->uk_flags & UMA_ZONE_REFCNT) {
1449251709Sjeff		if (keg->uk_size >
1450251709Sjeff		    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
1451251709Sjeff		    sizeof(uint32_t)))
1452187681Sjeff			keg_large_init(keg);
1453132842Sbmilekic		else
1454187681Sjeff			keg_small_init(keg);
1455132842Sbmilekic	} else {
1456251709Sjeff		if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1457187681Sjeff			keg_large_init(keg);
1458132842Sbmilekic		else
1459187681Sjeff			keg_small_init(keg);
1460132842Sbmilekic	}
1461129906Sbmilekic
1462132842Sbmilekic	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1463251709Sjeff		if (keg->uk_flags & UMA_ZONE_REFCNT) {
1464251709Sjeff			if (keg->uk_ipers > uma_max_ipers_ref)
1465251709Sjeff				panic("Too many ref items per zone: %d > %d\n",
1466251709Sjeff				    keg->uk_ipers, uma_max_ipers_ref);
1467132842Sbmilekic			keg->uk_slabzone = slabrefzone;
1468251709Sjeff		} else
1469132842Sbmilekic			keg->uk_slabzone = slabzone;
1470132842Sbmilekic	}
1471129906Sbmilekic
1472120311Sjeff	/*
1473120311Sjeff	 * If we haven't booted yet we need allocations to go through the
1474120311Sjeff	 * startup cache until the vm is ready.
1475120311Sjeff	 */
1476129906Sbmilekic	if (keg->uk_ppera == 1) {
1477120311Sjeff#ifdef UMA_MD_SMALL_ALLOC
1478129906Sbmilekic		keg->uk_allocf = uma_small_alloc;
1479129906Sbmilekic		keg->uk_freef = uma_small_free;
1480222184Salc
1481222163Salc		if (booted < UMA_STARTUP)
1482129906Sbmilekic			keg->uk_allocf = startup_alloc;
1483222184Salc#else
1484222184Salc		if (booted < UMA_STARTUP2)
1485222184Salc			keg->uk_allocf = startup_alloc;
1486222184Salc#endif
1487222163Salc	} else if (booted < UMA_STARTUP2 &&
1488222163Salc	    (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1489214782Sjhb		keg->uk_allocf = startup_alloc;
1490129906Sbmilekic
1491129906Sbmilekic	/*
1492252040Sjeff	 * Initialize keg's lock
1493129906Sbmilekic	 */
1494252040Sjeff	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
149595758Sjeff
149692654Sjeff	/*
149792654Sjeff	 * If we're putting the slab header in the actual page we need to
1498125246Sdes	 * figure out where in each page it goes.  This calculates a right
1499108533Sschweikh	 * justified offset into the memory on an ALIGN_PTR boundary.
150092654Sjeff	 */
1501129906Sbmilekic	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1502132842Sbmilekic		u_int totsize;
150392654Sjeff
150492654Sjeff		/* Size of the slab struct and free list */
1505251709Sjeff		totsize = sizeof(struct uma_slab);
1506251709Sjeff
1507251709Sjeff		/* Size of the reference counts. */
1508132842Sbmilekic		if (keg->uk_flags & UMA_ZONE_REFCNT)
1509251709Sjeff			totsize += keg->uk_ipers * sizeof(uint32_t);
1510132842Sbmilekic
151192654Sjeff		if (totsize & UMA_ALIGN_PTR)
151292654Sjeff			totsize = (totsize & ~UMA_ALIGN_PTR) +
151392654Sjeff			    (UMA_ALIGN_PTR + 1);
1514249264Sglebius		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1515132842Sbmilekic
1516132842Sbmilekic		/*
1517132842Sbmilekic		 * The only way the following is possible is if with our
1518132842Sbmilekic		 * UMA_ALIGN_PTR adjustments we are now bigger than
1519132842Sbmilekic		 * UMA_SLAB_SIZE.  I haven't checked whether this is
1520132842Sbmilekic		 * mathematically possible for all cases, so we make
1521132842Sbmilekic		 * sure here anyway.
1522132842Sbmilekic		 */
1523251709Sjeff		totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1524251709Sjeff		if (keg->uk_flags & UMA_ZONE_REFCNT)
1525251709Sjeff			totsize += keg->uk_ipers * sizeof(uint32_t);
1526249264Sglebius		if (totsize > PAGE_SIZE * keg->uk_ppera) {
152792654Sjeff			printf("zone %s ipers %d rsize %d size %d\n",
1528129906Sbmilekic			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1529129906Sbmilekic			    keg->uk_size);
1530194429Salc			panic("UMA slab won't fit.");
153192654Sjeff		}
153292654Sjeff	}
153392654Sjeff
1534129906Sbmilekic	if (keg->uk_flags & UMA_ZONE_HASH)
1535129906Sbmilekic		hash_alloc(&keg->uk_hash);
1536103531Sjeff
153792654Sjeff#ifdef UMA_DEBUG
1538241825Seadler	printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1539187681Sjeff	    zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1540187681Sjeff	    keg->uk_ipers, keg->uk_ppera,
1541187681Sjeff	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
154292654Sjeff#endif
154392654Sjeff
1544129906Sbmilekic	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1545129906Sbmilekic
154692654Sjeff	mtx_lock(&uma_mtx);
1547129906Sbmilekic	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
154892654Sjeff	mtx_unlock(&uma_mtx);
1549132987Sgreen	return (0);
1550129906Sbmilekic}
155192654Sjeff
1552129906Sbmilekic/*
1553129906Sbmilekic * Zone header ctor.  This initializes all fields, locks, etc.
1554129906Sbmilekic *
1555129906Sbmilekic * Arguments/Returns follow uma_ctor specifications
1556129906Sbmilekic *	udata  Actually uma_zctor_args
1557129906Sbmilekic */
1558132987Sgreenstatic int
1559132987Sgreenzone_ctor(void *mem, int size, void *udata, int flags)
1560129906Sbmilekic{
1561129906Sbmilekic	struct uma_zctor_args *arg = udata;
1562129906Sbmilekic	uma_zone_t zone = mem;
1563129906Sbmilekic	uma_zone_t z;
1564129906Sbmilekic	uma_keg_t keg;
1565129906Sbmilekic
1566129906Sbmilekic	bzero(zone, size);
1567129906Sbmilekic	zone->uz_name = arg->name;
1568129906Sbmilekic	zone->uz_ctor = arg->ctor;
1569129906Sbmilekic	zone->uz_dtor = arg->dtor;
1570187681Sjeff	zone->uz_slab = zone_fetch_slab;
1571129906Sbmilekic	zone->uz_init = NULL;
1572129906Sbmilekic	zone->uz_fini = NULL;
1573129906Sbmilekic	zone->uz_allocs = 0;
1574147995Srwatson	zone->uz_frees = 0;
1575148070Srwatson	zone->uz_fails = 0;
1576209215Ssbruno	zone->uz_sleeps = 0;
1577251894Sjeff	zone->uz_count = 0;
1578260300Smav	zone->uz_count_min = 0;
1579187681Sjeff	zone->uz_flags = 0;
1580243998Spjd	zone->uz_warning = NULL;
1581243998Spjd	timevalclear(&zone->uz_ratecheck);
1582187681Sjeff	keg = arg->keg;
1583129906Sbmilekic
1584252040Sjeff	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1585252040Sjeff
1586251826Sjeff	/*
1587251826Sjeff	 * This is a pure cache zone, no kegs.
1588251826Sjeff	 */
1589251826Sjeff	if (arg->import) {
1590252226Sjeff		if (arg->flags & UMA_ZONE_VM)
1591252226Sjeff			arg->flags |= UMA_ZFLAG_CACHEONLY;
1592252226Sjeff		zone->uz_flags = arg->flags;
1593252040Sjeff		zone->uz_size = arg->size;
1594251826Sjeff		zone->uz_import = arg->import;
1595251826Sjeff		zone->uz_release = arg->release;
1596251826Sjeff		zone->uz_arg = arg->arg;
1597252040Sjeff		zone->uz_lockptr = &zone->uz_lock;
1598260306Smav		mtx_lock(&uma_mtx);
1599260306Smav		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1600260306Smav		mtx_unlock(&uma_mtx);
1601252040Sjeff		goto out;
1602251826Sjeff	}
1603251826Sjeff
1604251826Sjeff	/*
1605251826Sjeff	 * Use the regular zone/keg/slab allocator.
1606251826Sjeff	 */
1607251826Sjeff	zone->uz_import = (uma_import)zone_import;
1608251826Sjeff	zone->uz_release = (uma_release)zone_release;
1609251826Sjeff	zone->uz_arg = zone;
1610251826Sjeff
1611129906Sbmilekic	if (arg->flags & UMA_ZONE_SECONDARY) {
1612129906Sbmilekic		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1613129906Sbmilekic		zone->uz_init = arg->uminit;
1614129906Sbmilekic		zone->uz_fini = arg->fini;
1615252040Sjeff		zone->uz_lockptr = &keg->uk_lock;
1616187681Sjeff		zone->uz_flags |= UMA_ZONE_SECONDARY;
1617129906Sbmilekic		mtx_lock(&uma_mtx);
1618129906Sbmilekic		ZONE_LOCK(zone);
1619129906Sbmilekic		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1620129906Sbmilekic			if (LIST_NEXT(z, uz_link) == NULL) {
1621129906Sbmilekic				LIST_INSERT_AFTER(z, zone, uz_link);
1622129906Sbmilekic				break;
1623129906Sbmilekic			}
1624129906Sbmilekic		}
1625129906Sbmilekic		ZONE_UNLOCK(zone);
1626129906Sbmilekic		mtx_unlock(&uma_mtx);
1627187681Sjeff	} else if (keg == NULL) {
1628187681Sjeff		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1629187681Sjeff		    arg->align, arg->flags)) == NULL)
1630132987Sgreen			return (ENOMEM);
1631129906Sbmilekic	} else {
1632129906Sbmilekic		struct uma_kctor_args karg;
1633132987Sgreen		int error;
1634129906Sbmilekic
1635129906Sbmilekic		/* We should only be here from uma_startup() */
1636129906Sbmilekic		karg.size = arg->size;
1637129906Sbmilekic		karg.uminit = arg->uminit;
1638129906Sbmilekic		karg.fini = arg->fini;
1639129906Sbmilekic		karg.align = arg->align;
1640129906Sbmilekic		karg.flags = arg->flags;
1641129906Sbmilekic		karg.zone = zone;
1642132987Sgreen		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1643132987Sgreen		    flags);
1644132987Sgreen		if (error)
1645132987Sgreen			return (error);
1646129906Sbmilekic	}
1647251826Sjeff
1648187681Sjeff	/*
1649187681Sjeff	 * Link in the first keg.
1650187681Sjeff	 */
1651187681Sjeff	zone->uz_klink.kl_keg = keg;
1652187681Sjeff	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1653252040Sjeff	zone->uz_lockptr = &keg->uk_lock;
1654187681Sjeff	zone->uz_size = keg->uk_size;
1655187681Sjeff	zone->uz_flags |= (keg->uk_flags &
1656187681Sjeff	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1657129906Sbmilekic
165892654Sjeff	/*
165992654Sjeff	 * Some internal zones don't have room allocated for the per cpu
166092654Sjeff	 * caches.  If we're internal, bail out here.
166192654Sjeff	 */
1662129906Sbmilekic	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1663187681Sjeff		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1664129906Sbmilekic		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1665132987Sgreen		return (0);
1666129906Sbmilekic	}
166792654Sjeff
1668252040Sjeffout:
1669252040Sjeff	if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1670252040Sjeff		zone->uz_count = bucket_select(zone->uz_size);
167194159Sjeff	else
1672120218Sjeff		zone->uz_count = BUCKET_MAX;
1673260300Smav	zone->uz_count_min = zone->uz_count;
1674251894Sjeff
1675132987Sgreen	return (0);
167692654Sjeff}
167792654Sjeff
1678125246Sdes/*
1679129906Sbmilekic * Keg header dtor.  This frees all data, destroys locks, frees the hash
1680129906Sbmilekic * table and removes the keg from the global list.
168194161Sjeff *
168294161Sjeff * Arguments/Returns follow uma_dtor specifications
168394161Sjeff *	udata  unused
168494161Sjeff */
1685129906Sbmilekicstatic void
1686129906Sbmilekickeg_dtor(void *arg, int size, void *udata)
1687129906Sbmilekic{
1688129906Sbmilekic	uma_keg_t keg;
168994161Sjeff
1690129906Sbmilekic	keg = (uma_keg_t)arg;
1691187681Sjeff	KEG_LOCK(keg);
1692129906Sbmilekic	if (keg->uk_free != 0) {
1693258911Srodrigc		printf("Freed UMA keg (%s) was not empty (%d items). "
1694129906Sbmilekic		    " Lost %d pages of memory.\n",
1695258911Srodrigc		    keg->uk_name ? keg->uk_name : "",
1696129906Sbmilekic		    keg->uk_free, keg->uk_pages);
1697129906Sbmilekic	}
1698187681Sjeff	KEG_UNLOCK(keg);
1699129906Sbmilekic
1700187681Sjeff	hash_free(&keg->uk_hash);
1701129906Sbmilekic
1702187681Sjeff	KEG_LOCK_FINI(keg);
1703129906Sbmilekic}
1704129906Sbmilekic
1705129906Sbmilekic/*
1706129906Sbmilekic * Zone header dtor.
1707129906Sbmilekic *
1708129906Sbmilekic * Arguments/Returns follow uma_dtor specifications
1709129906Sbmilekic *	udata  unused
1710129906Sbmilekic */
171194161Sjeffstatic void
171294161Sjeffzone_dtor(void *arg, int size, void *udata)
171394161Sjeff{
1714187681Sjeff	uma_klink_t klink;
171594161Sjeff	uma_zone_t zone;
1716129906Sbmilekic	uma_keg_t keg;
171794161Sjeff
171894161Sjeff	zone = (uma_zone_t)arg;
1719187681Sjeff	keg = zone_first_keg(zone);
1720120262Sjeff
1721187681Sjeff	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1722120262Sjeff		cache_drain(zone);
1723129906Sbmilekic
172499472Sjeff	mtx_lock(&uma_mtx);
1725187681Sjeff	LIST_REMOVE(zone, uz_link);
1726187681Sjeff	mtx_unlock(&uma_mtx);
1727187681Sjeff	/*
1728187681Sjeff	 * XXX there are some races here where
1729187681Sjeff	 * the zone can be drained but zone lock
1730187681Sjeff	 * released and then refilled before we
1731187681Sjeff	 * remove it... we dont care for now
1732187681Sjeff	 */
1733187681Sjeff	zone_drain_wait(zone, M_WAITOK);
1734187681Sjeff	/*
1735187681Sjeff	 * Unlink all of our kegs.
1736187681Sjeff	 */
1737187681Sjeff	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1738187681Sjeff		klink->kl_keg = NULL;
1739187681Sjeff		LIST_REMOVE(klink, kl_link);
1740187681Sjeff		if (klink == &zone->uz_klink)
1741187681Sjeff			continue;
1742187681Sjeff		free(klink, M_TEMP);
1743187681Sjeff	}
1744187681Sjeff	/*
1745187681Sjeff	 * We only destroy kegs from non secondary zones.
1746187681Sjeff	 */
1747251826Sjeff	if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1748187681Sjeff		mtx_lock(&uma_mtx);
1749129906Sbmilekic		LIST_REMOVE(keg, uk_link);
1750129906Sbmilekic		mtx_unlock(&uma_mtx);
1751251826Sjeff		zone_free_item(kegs, keg, NULL, SKIP_NONE);
1752123057Sjeff	}
1753252040Sjeff	ZONE_LOCK_FINI(zone);
1754129906Sbmilekic}
175594161Sjeff
175692654Sjeff/*
175792654Sjeff * Traverses every zone in the system and calls a callback
175892654Sjeff *
175992654Sjeff * Arguments:
176092654Sjeff *	zfunc  A pointer to a function which accepts a zone
176192654Sjeff *		as an argument.
1762125246Sdes *
176392654Sjeff * Returns:
176492654Sjeff *	Nothing
176592654Sjeff */
1766125246Sdesstatic void
176792654Sjeffzone_foreach(void (*zfunc)(uma_zone_t))
176892654Sjeff{
1769129906Sbmilekic	uma_keg_t keg;
177092654Sjeff	uma_zone_t zone;
177192654Sjeff
177292654Sjeff	mtx_lock(&uma_mtx);
1773129906Sbmilekic	LIST_FOREACH(keg, &uma_kegs, uk_link) {
1774129906Sbmilekic		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1775129906Sbmilekic			zfunc(zone);
1776129906Sbmilekic	}
177792654Sjeff	mtx_unlock(&uma_mtx);
177892654Sjeff}
177992654Sjeff
178092654Sjeff/* Public functions */
178192654Sjeff/* See uma.h */
178292654Sjeffvoid
1783151104Sdesuma_startup(void *bootmem, int boot_pages)
178492654Sjeff{
178592654Sjeff	struct uma_zctor_args args;
178692654Sjeff	uma_slab_t slab;
1787132842Sbmilekic	u_int slabsize;
178892654Sjeff	int i;
178992654Sjeff
179092654Sjeff#ifdef UMA_DEBUG
1791129906Sbmilekic	printf("Creating uma keg headers zone and keg.\n");
179292654Sjeff#endif
1793149900Salc	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1794129906Sbmilekic
1795129906Sbmilekic	/* "manually" create the initial zone */
1796251826Sjeff	memset(&args, 0, sizeof(args));
1797129906Sbmilekic	args.name = "UMA Kegs";
1798129906Sbmilekic	args.size = sizeof(struct uma_keg);
1799129906Sbmilekic	args.ctor = keg_ctor;
1800129906Sbmilekic	args.dtor = keg_dtor;
180192654Sjeff	args.uminit = zero_init;
180292654Sjeff	args.fini = NULL;
1803129906Sbmilekic	args.keg = &masterkeg;
180492654Sjeff	args.align = 32 - 1;
1805120223Sjeff	args.flags = UMA_ZFLAG_INTERNAL;
180692654Sjeff	/* The initial zone has no Per cpu queues so it's smaller */
1807132987Sgreen	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
180892654Sjeff
180992654Sjeff#ifdef UMA_DEBUG
181092654Sjeff	printf("Filling boot free list.\n");
181192654Sjeff#endif
1812151104Sdes	for (i = 0; i < boot_pages; i++) {
1813249313Sglebius		slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1814249313Sglebius		slab->us_data = (uint8_t *)slab;
181592654Sjeff		slab->us_flags = UMA_SLAB_BOOT;
181692654Sjeff		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
181792654Sjeff	}
1818149900Salc	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
181992654Sjeff
182092654Sjeff#ifdef UMA_DEBUG
1821129906Sbmilekic	printf("Creating uma zone headers zone and keg.\n");
182292654Sjeff#endif
1823129906Sbmilekic	args.name = "UMA Zones";
1824129906Sbmilekic	args.size = sizeof(struct uma_zone) +
1825129906Sbmilekic	    (sizeof(struct uma_cache) * (mp_maxid + 1));
1826129906Sbmilekic	args.ctor = zone_ctor;
1827129906Sbmilekic	args.dtor = zone_dtor;
1828129906Sbmilekic	args.uminit = zero_init;
1829129906Sbmilekic	args.fini = NULL;
1830129906Sbmilekic	args.keg = NULL;
1831129906Sbmilekic	args.align = 32 - 1;
1832129906Sbmilekic	args.flags = UMA_ZFLAG_INTERNAL;
1833129906Sbmilekic	/* The initial zone has no Per cpu queues so it's smaller */
1834132987Sgreen	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
183592654Sjeff
1836129906Sbmilekic#ifdef UMA_DEBUG
1837129906Sbmilekic	printf("Initializing pcpu cache locks.\n");
1838129906Sbmilekic#endif
1839129906Sbmilekic#ifdef UMA_DEBUG
1840129906Sbmilekic	printf("Creating slab and hash zones.\n");
1841129906Sbmilekic#endif
1842129906Sbmilekic
184392654Sjeff	/* Now make a zone for slab headers */
184492654Sjeff	slabzone = uma_zcreate("UMA Slabs",
1845251709Sjeff				sizeof(struct uma_slab),
184692654Sjeff				NULL, NULL, NULL, NULL,
1847120223Sjeff				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
184892654Sjeff
1849129906Sbmilekic	/*
1850129906Sbmilekic	 * We also create a zone for the bigger slabs with reference
1851129906Sbmilekic	 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1852129906Sbmilekic	 */
1853251709Sjeff	slabsize = sizeof(struct uma_slab_refcnt);
1854251709Sjeff	slabsize += uma_max_ipers_ref * sizeof(uint32_t);
1855129906Sbmilekic	slabrefzone = uma_zcreate("UMA RCntSlabs",
1856129906Sbmilekic				  slabsize,
1857129906Sbmilekic				  NULL, NULL, NULL, NULL,
1858130278Sbmilekic				  UMA_ALIGN_PTR,
1859130283Sbmilekic				  UMA_ZFLAG_INTERNAL);
1860129906Sbmilekic
186192654Sjeff	hashzone = uma_zcreate("UMA Hash",
186292654Sjeff	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
186392654Sjeff	    NULL, NULL, NULL, NULL,
1864120223Sjeff	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
186592654Sjeff
1866120218Sjeff	bucket_init();
186792654Sjeff
1868222163Salc	booted = UMA_STARTUP;
186992654Sjeff
187092654Sjeff#ifdef UMA_DEBUG
187192654Sjeff	printf("UMA startup complete.\n");
187292654Sjeff#endif
187392654Sjeff}
187492654Sjeff
187592654Sjeff/* see uma.h */
187692654Sjeffvoid
1877103531Sjeffuma_startup2(void)
187892654Sjeff{
1879222163Salc	booted = UMA_STARTUP2;
188094165Sjeff	bucket_enable();
188192654Sjeff#ifdef UMA_DEBUG
188292654Sjeff	printf("UMA startup2 complete.\n");
188392654Sjeff#endif
188492654Sjeff}
188592654Sjeff
188692654Sjeff/*
188792654Sjeff * Initialize our callout handle
188892654Sjeff *
188992654Sjeff */
189092654Sjeff
189192654Sjeffstatic void
189292654Sjeffuma_startup3(void)
189392654Sjeff{
189492654Sjeff#ifdef UMA_DEBUG
189592654Sjeff	printf("Starting callout.\n");
189692654Sjeff#endif
1897126714Srwatson	callout_init(&uma_callout, CALLOUT_MPSAFE);
1898120262Sjeff	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
189992654Sjeff#ifdef UMA_DEBUG
190092654Sjeff	printf("UMA startup3 complete.\n");
190192654Sjeff#endif
190292654Sjeff}
190392654Sjeff
1904187681Sjeffstatic uma_keg_t
1905129906Sbmilekicuma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1906249313Sglebius		int align, uint32_t flags)
1907129906Sbmilekic{
1908129906Sbmilekic	struct uma_kctor_args args;
1909129906Sbmilekic
1910129906Sbmilekic	args.size = size;
1911129906Sbmilekic	args.uminit = uminit;
1912129906Sbmilekic	args.fini = fini;
1913166654Srwatson	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1914129906Sbmilekic	args.flags = flags;
1915129906Sbmilekic	args.zone = zone;
1916187681Sjeff	return (zone_alloc_item(kegs, &args, M_WAITOK));
1917129906Sbmilekic}
1918129906Sbmilekic
191992654Sjeff/* See uma.h */
1920166654Srwatsonvoid
1921166654Srwatsonuma_set_align(int align)
1922166654Srwatson{
1923166654Srwatson
1924166654Srwatson	if (align != UMA_ALIGN_CACHE)
1925166654Srwatson		uma_align_cache = align;
1926166654Srwatson}
1927166654Srwatson
1928166654Srwatson/* See uma.h */
1929125246Sdesuma_zone_t
1930242152Smdfuma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1931249313Sglebius		uma_init uminit, uma_fini fini, int align, uint32_t flags)
1932125246Sdes
193392654Sjeff{
193492654Sjeff	struct uma_zctor_args args;
193592654Sjeff
193692654Sjeff	/* This stuff is essential for the zone ctor */
1937251826Sjeff	memset(&args, 0, sizeof(args));
193892654Sjeff	args.name = name;
193992654Sjeff	args.size = size;
194092654Sjeff	args.ctor = ctor;
194192654Sjeff	args.dtor = dtor;
194292654Sjeff	args.uminit = uminit;
194392654Sjeff	args.fini = fini;
194492654Sjeff	args.align = align;
194592654Sjeff	args.flags = flags;
1946129906Sbmilekic	args.keg = NULL;
194792654Sjeff
1948187681Sjeff	return (zone_alloc_item(zones, &args, M_WAITOK));
194992654Sjeff}
195092654Sjeff
195192654Sjeff/* See uma.h */
1952129906Sbmilekicuma_zone_t
1953129906Sbmilekicuma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1954129906Sbmilekic		    uma_init zinit, uma_fini zfini, uma_zone_t master)
1955129906Sbmilekic{
1956129906Sbmilekic	struct uma_zctor_args args;
1957187681Sjeff	uma_keg_t keg;
1958129906Sbmilekic
1959187681Sjeff	keg = zone_first_keg(master);
1960251826Sjeff	memset(&args, 0, sizeof(args));
1961129906Sbmilekic	args.name = name;
1962187681Sjeff	args.size = keg->uk_size;
1963129906Sbmilekic	args.ctor = ctor;
1964129906Sbmilekic	args.dtor = dtor;
1965129906Sbmilekic	args.uminit = zinit;
1966129906Sbmilekic	args.fini = zfini;
1967187681Sjeff	args.align = keg->uk_align;
1968187681Sjeff	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1969187681Sjeff	args.keg = keg;
1970129906Sbmilekic
1971187681Sjeff	/* XXX Attaches only one keg of potentially many. */
1972187681Sjeff	return (zone_alloc_item(zones, &args, M_WAITOK));
1973129906Sbmilekic}
1974129906Sbmilekic
1975251826Sjeff/* See uma.h */
1976251826Sjeffuma_zone_t
1977252040Sjeffuma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
1978252040Sjeff		    uma_init zinit, uma_fini zfini, uma_import zimport,
1979252040Sjeff		    uma_release zrelease, void *arg, int flags)
1980251826Sjeff{
1981251826Sjeff	struct uma_zctor_args args;
1982251826Sjeff
1983251826Sjeff	memset(&args, 0, sizeof(args));
1984251826Sjeff	args.name = name;
1985252040Sjeff	args.size = size;
1986251826Sjeff	args.ctor = ctor;
1987251826Sjeff	args.dtor = dtor;
1988251826Sjeff	args.uminit = zinit;
1989251826Sjeff	args.fini = zfini;
1990251826Sjeff	args.import = zimport;
1991251826Sjeff	args.release = zrelease;
1992251826Sjeff	args.arg = arg;
1993251826Sjeff	args.align = 0;
1994251826Sjeff	args.flags = flags;
1995251826Sjeff
1996251826Sjeff	return (zone_alloc_item(zones, &args, M_WAITOK));
1997251826Sjeff}
1998251826Sjeff
1999187681Sjeffstatic void
2000187681Sjeffzone_lock_pair(uma_zone_t a, uma_zone_t b)
2001187681Sjeff{
2002187681Sjeff	if (a < b) {
2003187681Sjeff		ZONE_LOCK(a);
2004252040Sjeff		mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2005187681Sjeff	} else {
2006187681Sjeff		ZONE_LOCK(b);
2007252040Sjeff		mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2008187681Sjeff	}
2009187681Sjeff}
2010187681Sjeff
2011187681Sjeffstatic void
2012187681Sjeffzone_unlock_pair(uma_zone_t a, uma_zone_t b)
2013187681Sjeff{
2014187681Sjeff
2015187681Sjeff	ZONE_UNLOCK(a);
2016187681Sjeff	ZONE_UNLOCK(b);
2017187681Sjeff}
2018187681Sjeff
2019187681Sjeffint
2020187681Sjeffuma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2021187681Sjeff{
2022187681Sjeff	uma_klink_t klink;
2023187681Sjeff	uma_klink_t kl;
2024187681Sjeff	int error;
2025187681Sjeff
2026187681Sjeff	error = 0;
2027187681Sjeff	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
2028187681Sjeff
2029187681Sjeff	zone_lock_pair(zone, master);
2030187681Sjeff	/*
2031187681Sjeff	 * zone must use vtoslab() to resolve objects and must already be
2032187681Sjeff	 * a secondary.
2033187681Sjeff	 */
2034187681Sjeff	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
2035187681Sjeff	    != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
2036187681Sjeff		error = EINVAL;
2037187681Sjeff		goto out;
2038187681Sjeff	}
2039187681Sjeff	/*
2040187681Sjeff	 * The new master must also use vtoslab().
2041187681Sjeff	 */
2042187681Sjeff	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2043187681Sjeff		error = EINVAL;
2044187681Sjeff		goto out;
2045187681Sjeff	}
2046187681Sjeff	/*
2047187681Sjeff	 * Both must either be refcnt, or not be refcnt.
2048187681Sjeff	 */
2049187681Sjeff	if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
2050187681Sjeff	    (master->uz_flags & UMA_ZONE_REFCNT)) {
2051187681Sjeff		error = EINVAL;
2052187681Sjeff		goto out;
2053187681Sjeff	}
2054187681Sjeff	/*
2055187681Sjeff	 * The underlying object must be the same size.  rsize
2056187681Sjeff	 * may be different.
2057187681Sjeff	 */
2058187681Sjeff	if (master->uz_size != zone->uz_size) {
2059187681Sjeff		error = E2BIG;
2060187681Sjeff		goto out;
2061187681Sjeff	}
2062187681Sjeff	/*
2063187681Sjeff	 * Put it at the end of the list.
2064187681Sjeff	 */
2065187681Sjeff	klink->kl_keg = zone_first_keg(master);
2066187681Sjeff	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2067187681Sjeff		if (LIST_NEXT(kl, kl_link) == NULL) {
2068187681Sjeff			LIST_INSERT_AFTER(kl, klink, kl_link);
2069187681Sjeff			break;
2070187681Sjeff		}
2071187681Sjeff	}
2072187681Sjeff	klink = NULL;
2073187681Sjeff	zone->uz_flags |= UMA_ZFLAG_MULTI;
2074187681Sjeff	zone->uz_slab = zone_fetch_slab_multi;
2075187681Sjeff
2076187681Sjeffout:
2077187681Sjeff	zone_unlock_pair(zone, master);
2078187681Sjeff	if (klink != NULL)
2079187681Sjeff		free(klink, M_TEMP);
2080187681Sjeff
2081187681Sjeff	return (error);
2082187681Sjeff}
2083187681Sjeff
2084187681Sjeff
2085129906Sbmilekic/* See uma.h */
208694161Sjeffvoid
208794161Sjeffuma_zdestroy(uma_zone_t zone)
208894161Sjeff{
2089148194Srwatson
2090251826Sjeff	zone_free_item(zones, zone, NULL, SKIP_NONE);
209194161Sjeff}
209294161Sjeff
209394161Sjeff/* See uma.h */
209492654Sjeffvoid *
209595766Sjeffuma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
209692654Sjeff{
209792654Sjeff	void *item;
209892654Sjeff	uma_cache_t cache;
209992654Sjeff	uma_bucket_t bucket;
2100251894Sjeff	int lockfail;
210192654Sjeff	int cpu;
210292654Sjeff
210392654Sjeff	/* This is the fast path allocation */
210492654Sjeff#ifdef UMA_DEBUG_ALLOC_1
210592654Sjeff	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
210692654Sjeff#endif
2107133230Srwatson	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2108133230Srwatson	    zone->uz_name, flags);
210994159Sjeff
2110165928Srwatson	if (flags & M_WAITOK) {
2111165928Srwatson		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2112165928Srwatson		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
211397007Sjhb	}
2114226313Sglebius#ifdef DEBUG_MEMGUARD
2115226313Sglebius	if (memguard_cmp_zone(zone)) {
2116226313Sglebius		item = memguard_alloc(zone->uz_size, flags);
2117226313Sglebius		if (item != NULL) {
2118226313Sglebius			/*
2119226313Sglebius			 * Avoid conflict with the use-after-free
2120226313Sglebius			 * protecting infrastructure from INVARIANTS.
2121226313Sglebius			 */
2122226313Sglebius			if (zone->uz_init != NULL &&
2123226313Sglebius			    zone->uz_init != mtrash_init &&
2124226313Sglebius			    zone->uz_init(item, zone->uz_size, flags) != 0)
2125226313Sglebius				return (NULL);
2126226313Sglebius			if (zone->uz_ctor != NULL &&
2127226313Sglebius			    zone->uz_ctor != mtrash_ctor &&
2128251894Sjeff			    zone->uz_ctor(item, zone->uz_size, udata,
2129251894Sjeff			    flags) != 0) {
2130226313Sglebius			    	zone->uz_fini(item, zone->uz_size);
2131226313Sglebius				return (NULL);
2132226313Sglebius			}
2133226313Sglebius			return (item);
2134226313Sglebius		}
2135226313Sglebius		/* This is unfortunate but should not be fatal. */
2136226313Sglebius	}
2137226313Sglebius#endif
2138145686Srwatson	/*
2139145686Srwatson	 * If possible, allocate from the per-CPU cache.  There are two
2140145686Srwatson	 * requirements for safe access to the per-CPU cache: (1) the thread
2141145686Srwatson	 * accessing the cache must not be preempted or yield during access,
2142145686Srwatson	 * and (2) the thread must not migrate CPUs without switching which
2143145686Srwatson	 * cache it accesses.  We rely on a critical section to prevent
2144145686Srwatson	 * preemption and migration.  We release the critical section in
2145145686Srwatson	 * order to acquire the zone mutex if we are unable to allocate from
2146145686Srwatson	 * the current cache; when we re-acquire the critical section, we
2147145686Srwatson	 * must detect and handle migration if it has occurred.
2148145686Srwatson	 */
2149145686Srwatson	critical_enter();
2150145686Srwatson	cpu = curcpu;
215192654Sjeff	cache = &zone->uz_cpu[cpu];
215292654Sjeff
215392654Sjeffzalloc_start:
215492654Sjeff	bucket = cache->uc_allocbucket;
2155251894Sjeff	if (bucket != NULL && bucket->ub_cnt > 0) {
2156251894Sjeff		bucket->ub_cnt--;
2157251894Sjeff		item = bucket->ub_bucket[bucket->ub_cnt];
215892654Sjeff#ifdef INVARIANTS
2159251894Sjeff		bucket->ub_bucket[bucket->ub_cnt] = NULL;
216092654Sjeff#endif
2161251894Sjeff		KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2162251894Sjeff		cache->uc_allocs++;
2163251894Sjeff		critical_exit();
2164251894Sjeff		if (zone->uz_ctor != NULL &&
2165251894Sjeff		    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2166251894Sjeff			atomic_add_long(&zone->uz_fails, 1);
2167251894Sjeff			zone_free_item(zone, item, udata, SKIP_DTOR);
2168251894Sjeff			return (NULL);
2169251894Sjeff		}
2170251709Sjeff#ifdef INVARIANTS
2171251894Sjeff		uma_dbg_alloc(zone, NULL, item);
2172251709Sjeff#endif
2173251894Sjeff		if (flags & M_ZERO)
2174262739Sglebius			uma_zero_item(item, zone);
2175251894Sjeff		return (item);
2176251894Sjeff	}
2177251894Sjeff
2178251894Sjeff	/*
2179251894Sjeff	 * We have run out of items in our alloc bucket.
2180251894Sjeff	 * See if we can switch with our free bucket.
2181251894Sjeff	 */
2182251894Sjeff	bucket = cache->uc_freebucket;
2183251894Sjeff	if (bucket != NULL && bucket->ub_cnt > 0) {
218492654Sjeff#ifdef UMA_DEBUG_ALLOC
2185251894Sjeff		printf("uma_zalloc: Swapping empty with alloc.\n");
218692654Sjeff#endif
2187251894Sjeff		cache->uc_freebucket = cache->uc_allocbucket;
2188251894Sjeff		cache->uc_allocbucket = bucket;
2189251894Sjeff		goto zalloc_start;
2190251894Sjeff	}
219192654Sjeff
2192145686Srwatson	/*
2193251894Sjeff	 * Discard any empty allocation bucket while we hold no locks.
2194251894Sjeff	 */
2195251894Sjeff	bucket = cache->uc_allocbucket;
2196251894Sjeff	cache->uc_allocbucket = NULL;
2197251894Sjeff	critical_exit();
2198251894Sjeff	if (bucket != NULL)
2199252226Sjeff		bucket_free(zone, bucket, udata);
2200251894Sjeff
2201251894Sjeff	/* Short-circuit for zones without buckets and low memory. */
2202251894Sjeff	if (zone->uz_count == 0 || bucketdisable)
2203251894Sjeff		goto zalloc_item;
2204251894Sjeff
2205251894Sjeff	/*
2206145686Srwatson	 * Attempt to retrieve the item from the per-CPU cache has failed, so
2207145686Srwatson	 * we must go back to the zone.  This requires the zone lock, so we
2208145686Srwatson	 * must drop the critical section, then re-acquire it when we go back
2209145686Srwatson	 * to the cache.  Since the critical section is released, we may be
2210145686Srwatson	 * preempted or migrate.  As such, make sure not to maintain any
2211145686Srwatson	 * thread-local state specific to the cache from prior to releasing
2212145686Srwatson	 * the critical section.
2213145686Srwatson	 */
2214251894Sjeff	lockfail = 0;
2215251894Sjeff	if (ZONE_TRYLOCK(zone) == 0) {
2216251894Sjeff		/* Record contention to size the buckets. */
2217251894Sjeff		ZONE_LOCK(zone);
2218251894Sjeff		lockfail = 1;
2219251894Sjeff	}
2220145686Srwatson	critical_enter();
2221145686Srwatson	cpu = curcpu;
2222145686Srwatson	cache = &zone->uz_cpu[cpu];
2223145686Srwatson
2224251894Sjeff	/*
2225251894Sjeff	 * Since we have locked the zone we may as well send back our stats.
2226251894Sjeff	 */
2227251826Sjeff	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2228251826Sjeff	atomic_add_long(&zone->uz_frees, cache->uc_frees);
222994159Sjeff	cache->uc_allocs = 0;
2230147995Srwatson	cache->uc_frees = 0;
223194159Sjeff
2232251894Sjeff	/* See if we lost the race to fill the cache. */
2233251894Sjeff	if (cache->uc_allocbucket != NULL) {
2234251894Sjeff		ZONE_UNLOCK(zone);
2235251894Sjeff		goto zalloc_start;
223694159Sjeff	}
223794159Sjeff
2238251894Sjeff	/*
2239251894Sjeff	 * Check the zone's cache of buckets.
2240251894Sjeff	 */
2241251894Sjeff	if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2242120218Sjeff		KASSERT(bucket->ub_cnt != 0,
224394159Sjeff		    ("uma_zalloc_arg: Returning an empty bucket."));
224494159Sjeff
224594159Sjeff		LIST_REMOVE(bucket, ub_link);
224694159Sjeff		cache->uc_allocbucket = bucket;
224794159Sjeff		ZONE_UNLOCK(zone);
224894159Sjeff		goto zalloc_start;
2249125246Sdes	}
2250145686Srwatson	/* We are no longer associated with this CPU. */
2251145686Srwatson	critical_exit();
2252105853Sjeff
2253251894Sjeff	/*
2254251894Sjeff	 * We bump the uz count when the cache size is insufficient to
2255251894Sjeff	 * handle the working set.
2256251894Sjeff	 */
2257252226Sjeff	if (lockfail && zone->uz_count < BUCKET_MAX)
225894159Sjeff		zone->uz_count++;
2259251894Sjeff	ZONE_UNLOCK(zone);
2260129906Sbmilekic
226192654Sjeff	/*
226294159Sjeff	 * Now lets just fill a bucket and put it on the free list.  If that
2263251894Sjeff	 * works we'll restart the allocation from the begining and it
2264251894Sjeff	 * will use the just filled bucket.
2265105853Sjeff	 */
2266252226Sjeff	bucket = zone_alloc_bucket(zone, udata, flags);
2267251894Sjeff	if (bucket != NULL) {
2268251894Sjeff		ZONE_LOCK(zone);
2269251894Sjeff		critical_enter();
2270251894Sjeff		cpu = curcpu;
2271251894Sjeff		cache = &zone->uz_cpu[cpu];
2272251894Sjeff		/*
2273251894Sjeff		 * See if we lost the race or were migrated.  Cache the
2274251894Sjeff		 * initialized bucket to make this less likely or claim
2275251894Sjeff		 * the memory directly.
2276251894Sjeff		 */
2277251894Sjeff		if (cache->uc_allocbucket == NULL)
2278251894Sjeff			cache->uc_allocbucket = bucket;
2279251894Sjeff		else
2280251894Sjeff			LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2281105853Sjeff		ZONE_UNLOCK(zone);
2282251894Sjeff		goto zalloc_start;
2283105853Sjeff	}
2284251894Sjeff
2285105853Sjeff	/*
2286105853Sjeff	 * We may not be able to get a bucket so return an actual item.
2287105853Sjeff	 */
2288105853Sjeff#ifdef UMA_DEBUG
2289105853Sjeff	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2290105853Sjeff#endif
2291105853Sjeff
2292251894Sjeffzalloc_item:
2293187681Sjeff	item = zone_alloc_item(zone, udata, flags);
2294251894Sjeff
2295187681Sjeff	return (item);
2296105853Sjeff}
2297105853Sjeff
2298105853Sjeffstatic uma_slab_t
2299187681Sjeffkeg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2300105853Sjeff{
2301105853Sjeff	uma_slab_t slab;
2302252226Sjeff	int reserve;
2303105853Sjeff
2304187681Sjeff	mtx_assert(&keg->uk_lock, MA_OWNED);
2305105853Sjeff	slab = NULL;
2306252226Sjeff	reserve = 0;
2307252226Sjeff	if ((flags & M_USE_RESERVE) == 0)
2308252226Sjeff		reserve = keg->uk_reserve;
2309105853Sjeff
2310105853Sjeff	for (;;) {
2311105853Sjeff		/*
2312105853Sjeff		 * Find a slab with some space.  Prefer slabs that are partially
2313105853Sjeff		 * used over those that are totally full.  This helps to reduce
2314105853Sjeff		 * fragmentation.
2315105853Sjeff		 */
2316252226Sjeff		if (keg->uk_free > reserve) {
2317129906Sbmilekic			if (!LIST_EMPTY(&keg->uk_part_slab)) {
2318129906Sbmilekic				slab = LIST_FIRST(&keg->uk_part_slab);
2319105853Sjeff			} else {
2320129906Sbmilekic				slab = LIST_FIRST(&keg->uk_free_slab);
2321105853Sjeff				LIST_REMOVE(slab, us_link);
2322129906Sbmilekic				LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2323129906Sbmilekic				    us_link);
2324105853Sjeff			}
2325187681Sjeff			MPASS(slab->us_keg == keg);
2326105853Sjeff			return (slab);
2327105853Sjeff		}
2328105853Sjeff
2329105853Sjeff		/*
2330105853Sjeff		 * M_NOVM means don't ask at all!
2331105853Sjeff		 */
2332105853Sjeff		if (flags & M_NOVM)
2333105853Sjeff			break;
2334105853Sjeff
2335187681Sjeff		if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2336129906Sbmilekic			keg->uk_flags |= UMA_ZFLAG_FULL;
2337187681Sjeff			/*
2338187681Sjeff			 * If this is not a multi-zone, set the FULL bit.
2339187681Sjeff			 * Otherwise slab_multi() takes care of it.
2340187681Sjeff			 */
2341243998Spjd			if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2342187681Sjeff				zone->uz_flags |= UMA_ZFLAG_FULL;
2343243998Spjd				zone_log_warning(zone);
2344243998Spjd			}
2345109548Sjeff			if (flags & M_NOWAIT)
2346109548Sjeff				break;
2347238206Seadler			zone->uz_sleeps++;
2348187681Sjeff			msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2349105853Sjeff			continue;
2350105853Sjeff		}
2351187681Sjeff		slab = keg_alloc_slab(keg, zone, flags);
2352125246Sdes		/*
2353105853Sjeff		 * If we got a slab here it's safe to mark it partially used
2354105853Sjeff		 * and return.  We assume that the caller is going to remove
2355105853Sjeff		 * at least one item.
2356105853Sjeff		 */
2357105853Sjeff		if (slab) {
2358187681Sjeff			MPASS(slab->us_keg == keg);
2359129906Sbmilekic			LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2360105853Sjeff			return (slab);
2361105853Sjeff		}
2362125246Sdes		/*
2363105853Sjeff		 * We might not have been able to get a slab but another cpu
2364105853Sjeff		 * could have while we were unlocked.  Check again before we
2365105853Sjeff		 * fail.
2366105853Sjeff		 */
2367187681Sjeff		flags |= M_NOVM;
2368105853Sjeff	}
2369105853Sjeff	return (slab);
2370105853Sjeff}
2371105853Sjeff
2372187681Sjeffstatic uma_slab_t
2373187681Sjeffzone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2374187681Sjeff{
2375187681Sjeff	uma_slab_t slab;
2376187681Sjeff
2377252040Sjeff	if (keg == NULL) {
2378187681Sjeff		keg = zone_first_keg(zone);
2379252040Sjeff		KEG_LOCK(keg);
2380252040Sjeff	}
2381187681Sjeff
2382187681Sjeff	for (;;) {
2383187681Sjeff		slab = keg_fetch_slab(keg, zone, flags);
2384187681Sjeff		if (slab)
2385187681Sjeff			return (slab);
2386187681Sjeff		if (flags & (M_NOWAIT | M_NOVM))
2387187681Sjeff			break;
2388187681Sjeff	}
2389252040Sjeff	KEG_UNLOCK(keg);
2390187681Sjeff	return (NULL);
2391187681Sjeff}
2392187681Sjeff
2393187681Sjeff/*
2394187681Sjeff * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2395252040Sjeff * with the keg locked.  On NULL no lock is held.
2396187681Sjeff *
2397187681Sjeff * The last pointer is used to seed the search.  It is not required.
2398187681Sjeff */
2399187681Sjeffstatic uma_slab_t
2400187681Sjeffzone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2401187681Sjeff{
2402187681Sjeff	uma_klink_t klink;
2403187681Sjeff	uma_slab_t slab;
2404187681Sjeff	uma_keg_t keg;
2405187681Sjeff	int flags;
2406187681Sjeff	int empty;
2407187681Sjeff	int full;
2408187681Sjeff
2409187681Sjeff	/*
2410187681Sjeff	 * Don't wait on the first pass.  This will skip limit tests
2411187681Sjeff	 * as well.  We don't want to block if we can find a provider
2412187681Sjeff	 * without blocking.
2413187681Sjeff	 */
2414187681Sjeff	flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2415187681Sjeff	/*
2416187681Sjeff	 * Use the last slab allocated as a hint for where to start
2417187681Sjeff	 * the search.
2418187681Sjeff	 */
2419252040Sjeff	if (last != NULL) {
2420187681Sjeff		slab = keg_fetch_slab(last, zone, flags);
2421187681Sjeff		if (slab)
2422187681Sjeff			return (slab);
2423252040Sjeff		KEG_UNLOCK(last);
2424187681Sjeff	}
2425187681Sjeff	/*
2426187681Sjeff	 * Loop until we have a slab incase of transient failures
2427187681Sjeff	 * while M_WAITOK is specified.  I'm not sure this is 100%
2428187681Sjeff	 * required but we've done it for so long now.
2429187681Sjeff	 */
2430187681Sjeff	for (;;) {
2431187681Sjeff		empty = 0;
2432187681Sjeff		full = 0;
2433187681Sjeff		/*
2434187681Sjeff		 * Search the available kegs for slabs.  Be careful to hold the
2435187681Sjeff		 * correct lock while calling into the keg layer.
2436187681Sjeff		 */
2437187681Sjeff		LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2438187681Sjeff			keg = klink->kl_keg;
2439252040Sjeff			KEG_LOCK(keg);
2440187681Sjeff			if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2441187681Sjeff				slab = keg_fetch_slab(keg, zone, flags);
2442187681Sjeff				if (slab)
2443187681Sjeff					return (slab);
2444187681Sjeff			}
2445187681Sjeff			if (keg->uk_flags & UMA_ZFLAG_FULL)
2446187681Sjeff				full++;
2447187681Sjeff			else
2448187681Sjeff				empty++;
2449252040Sjeff			KEG_UNLOCK(keg);
2450187681Sjeff		}
2451187681Sjeff		if (rflags & (M_NOWAIT | M_NOVM))
2452187681Sjeff			break;
2453187681Sjeff		flags = rflags;
2454187681Sjeff		/*
2455187681Sjeff		 * All kegs are full.  XXX We can't atomically check all kegs
2456187681Sjeff		 * and sleep so just sleep for a short period and retry.
2457187681Sjeff		 */
2458187681Sjeff		if (full && !empty) {
2459252040Sjeff			ZONE_LOCK(zone);
2460187681Sjeff			zone->uz_flags |= UMA_ZFLAG_FULL;
2461209215Ssbruno			zone->uz_sleeps++;
2462243998Spjd			zone_log_warning(zone);
2463252040Sjeff			msleep(zone, zone->uz_lockptr, PVM,
2464252040Sjeff			    "zonelimit", hz/100);
2465187681Sjeff			zone->uz_flags &= ~UMA_ZFLAG_FULL;
2466252040Sjeff			ZONE_UNLOCK(zone);
2467187681Sjeff			continue;
2468187681Sjeff		}
2469187681Sjeff	}
2470187681Sjeff	return (NULL);
2471187681Sjeff}
2472187681Sjeff
2473118221Sbmilekicstatic void *
2474251826Sjeffslab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2475105853Sjeff{
2476105853Sjeff	void *item;
2477249313Sglebius	uint8_t freei;
2478125246Sdes
2479251826Sjeff	MPASS(keg == slab->us_keg);
2480187681Sjeff	mtx_assert(&keg->uk_lock, MA_OWNED);
2481129906Sbmilekic
2482251709Sjeff	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2483251709Sjeff	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2484129906Sbmilekic	item = slab->us_data + (keg->uk_rsize * freei);
2485105853Sjeff	slab->us_freecount--;
2486129906Sbmilekic	keg->uk_free--;
2487251709Sjeff
2488105853Sjeff	/* Move this slab to the full list */
2489105853Sjeff	if (slab->us_freecount == 0) {
2490105853Sjeff		LIST_REMOVE(slab, us_link);
2491129906Sbmilekic		LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2492105853Sjeff	}
2493105853Sjeff
2494105853Sjeff	return (item);
2495105853Sjeff}
2496105853Sjeff
2497105853Sjeffstatic int
2498251826Sjeffzone_import(uma_zone_t zone, void **bucket, int max, int flags)
2499105853Sjeff{
2500105853Sjeff	uma_slab_t slab;
2501187681Sjeff	uma_keg_t keg;
2502251826Sjeff	int i;
2503105853Sjeff
2504251826Sjeff	slab = NULL;
2505251826Sjeff	keg = NULL;
2506252040Sjeff	/* Try to keep the buckets totally full */
2507251826Sjeff	for (i = 0; i < max; ) {
2508251826Sjeff		if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2509251826Sjeff			break;
2510251826Sjeff		keg = slab->us_keg;
2511252226Sjeff		while (slab->us_freecount && i < max) {
2512251826Sjeff			bucket[i++] = slab_alloc_item(keg, slab);
2513252226Sjeff			if (keg->uk_free <= keg->uk_reserve)
2514252226Sjeff				break;
2515252226Sjeff		}
2516252226Sjeff		/* Don't grab more than one slab at a time. */
2517251826Sjeff		flags &= ~M_WAITOK;
2518251826Sjeff		flags |= M_NOWAIT;
2519251826Sjeff	}
2520251826Sjeff	if (slab != NULL)
2521251826Sjeff		KEG_UNLOCK(keg);
252292654Sjeff
2523251826Sjeff	return i;
2524251826Sjeff}
2525105853Sjeff
2526251894Sjeffstatic uma_bucket_t
2527252226Sjeffzone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2528251826Sjeff{
2529251826Sjeff	uma_bucket_t bucket;
2530251826Sjeff	int max;
2531251826Sjeff
2532252226Sjeff	/* Don't wait for buckets, preserve caller's NOVM setting. */
2533252226Sjeff	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
2534251894Sjeff	if (bucket == NULL)
2535260304Smav		return (NULL);
253694159Sjeff
2537252040Sjeff	max = MIN(bucket->ub_entries, zone->uz_count);
2538251826Sjeff	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2539251826Sjeff	    max, flags);
2540251826Sjeff
2541129906Sbmilekic	/*
2542251826Sjeff	 * Initialize the memory if necessary.
2543129906Sbmilekic	 */
2544251826Sjeff	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2545129906Sbmilekic		int i;
2546129906Sbmilekic
2547251826Sjeff		for (i = 0; i < bucket->ub_cnt; i++)
2548187681Sjeff			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2549251826Sjeff			    flags) != 0)
2550132987Sgreen				break;
2551132987Sgreen		/*
2552132987Sgreen		 * If we couldn't initialize the whole bucket, put the
2553132987Sgreen		 * rest back onto the freelist.
2554132987Sgreen		 */
2555132987Sgreen		if (i != bucket->ub_cnt) {
2556252040Sjeff			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2557251826Sjeff			    bucket->ub_cnt - i);
2558137001Sbmilekic#ifdef INVARIANTS
2559251826Sjeff			bzero(&bucket->ub_bucket[i],
2560251826Sjeff			    sizeof(void *) * (bucket->ub_cnt - i));
2561137001Sbmilekic#endif
2562132987Sgreen			bucket->ub_cnt = i;
2563132987Sgreen		}
2564129906Sbmilekic	}
2565129906Sbmilekic
2566260304Smav	if (bucket->ub_cnt == 0) {
2567260304Smav		bucket_free(zone, bucket, udata);
2568251894Sjeff		atomic_add_long(&zone->uz_fails, 1);
2569251894Sjeff		return (NULL);
2570105853Sjeff	}
2571105853Sjeff
2572251894Sjeff	return (bucket);
257392654Sjeff}
2574251894Sjeff
257592654Sjeff/*
2576251826Sjeff * Allocates a single item from a zone.
257792654Sjeff *
257892654Sjeff * Arguments
257992654Sjeff *	zone   The zone to alloc for.
258092654Sjeff *	udata  The data to be passed to the constructor.
2581111119Simp *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
258292654Sjeff *
258392654Sjeff * Returns
258492654Sjeff *	NULL if there is no memory and M_NOWAIT is set
2585105853Sjeff *	An item if successful
258692654Sjeff */
258792654Sjeff
258892654Sjeffstatic void *
2589187681Sjeffzone_alloc_item(uma_zone_t zone, void *udata, int flags)
259092654Sjeff{
259192654Sjeff	void *item;
259292654Sjeff
259392654Sjeff	item = NULL;
259492654Sjeff
259592654Sjeff#ifdef UMA_DEBUG_ALLOC
259692654Sjeff	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
259792654Sjeff#endif
2598251826Sjeff	if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2599251826Sjeff		goto fail;
2600251826Sjeff	atomic_add_long(&zone->uz_allocs, 1);
260192654Sjeff
2602129906Sbmilekic	/*
2603129906Sbmilekic	 * We have to call both the zone's init (not the keg's init)
2604129906Sbmilekic	 * and the zone's ctor.  This is because the item is going from
2605129906Sbmilekic	 * a keg slab directly to the user, and the user is expecting it
2606129906Sbmilekic	 * to be both zone-init'd as well as zone-ctor'd.
2607129906Sbmilekic	 */
2608132987Sgreen	if (zone->uz_init != NULL) {
2609187681Sjeff		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2610251826Sjeff			zone_free_item(zone, item, udata, SKIP_FINI);
2611251826Sjeff			goto fail;
2612132987Sgreen		}
2613132987Sgreen	}
2614132987Sgreen	if (zone->uz_ctor != NULL) {
2615187681Sjeff		if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2616251826Sjeff			zone_free_item(zone, item, udata, SKIP_DTOR);
2617251826Sjeff			goto fail;
2618132987Sgreen		}
2619132987Sgreen	}
2620251709Sjeff#ifdef INVARIANTS
2621251826Sjeff	uma_dbg_alloc(zone, NULL, item);
2622251709Sjeff#endif
2623105853Sjeff	if (flags & M_ZERO)
2624262739Sglebius		uma_zero_item(item, zone);
262592654Sjeff
262692654Sjeff	return (item);
2627251826Sjeff
2628251826Sjefffail:
2629251826Sjeff	atomic_add_long(&zone->uz_fails, 1);
2630251826Sjeff	return (NULL);
263192654Sjeff}
263292654Sjeff
263392654Sjeff/* See uma.h */
263492654Sjeffvoid
263592654Sjeffuma_zfree_arg(uma_zone_t zone, void *item, void *udata)
263692654Sjeff{
263792654Sjeff	uma_cache_t cache;
263892654Sjeff	uma_bucket_t bucket;
2639260302Smav	int lockfail;
264092654Sjeff	int cpu;
264192654Sjeff
264292654Sjeff#ifdef UMA_DEBUG_ALLOC_1
264392654Sjeff	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
264492654Sjeff#endif
2645133230Srwatson	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2646133230Srwatson	    zone->uz_name);
2647133230Srwatson
2648214062Smdf        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2649214062Smdf        if (item == NULL)
2650214062Smdf                return;
2651226313Sglebius#ifdef DEBUG_MEMGUARD
2652226313Sglebius	if (is_memguard_addr(item)) {
2653226313Sglebius		if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
2654226313Sglebius			zone->uz_dtor(item, zone->uz_size, udata);
2655226313Sglebius		if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
2656226313Sglebius			zone->uz_fini(item, zone->uz_size);
2657226313Sglebius		memguard_free(item);
2658226313Sglebius		return;
2659226313Sglebius	}
2660226313Sglebius#endif
2661145686Srwatson#ifdef INVARIANTS
2662187681Sjeff	if (zone->uz_flags & UMA_ZONE_MALLOC)
2663145686Srwatson		uma_dbg_free(zone, udata, item);
2664145686Srwatson	else
2665145686Srwatson		uma_dbg_free(zone, NULL, item);
2666145686Srwatson#endif
2667251894Sjeff	if (zone->uz_dtor != NULL)
2668251709Sjeff		zone->uz_dtor(item, zone->uz_size, udata);
2669251709Sjeff
267094631Sjeff	/*
267194631Sjeff	 * The race here is acceptable.  If we miss it we'll just have to wait
267294631Sjeff	 * a little longer for the limits to be reset.
267394631Sjeff	 */
2674187681Sjeff	if (zone->uz_flags & UMA_ZFLAG_FULL)
2675251894Sjeff		goto zfree_item;
267694631Sjeff
2677145686Srwatson	/*
2678145686Srwatson	 * If possible, free to the per-CPU cache.  There are two
2679145686Srwatson	 * requirements for safe access to the per-CPU cache: (1) the thread
2680145686Srwatson	 * accessing the cache must not be preempted or yield during access,
2681145686Srwatson	 * and (2) the thread must not migrate CPUs without switching which
2682145686Srwatson	 * cache it accesses.  We rely on a critical section to prevent
2683145686Srwatson	 * preemption and migration.  We release the critical section in
2684145686Srwatson	 * order to acquire the zone mutex if we are unable to free to the
2685145686Srwatson	 * current cache; when we re-acquire the critical section, we must
2686145686Srwatson	 * detect and handle migration if it has occurred.
2687145686Srwatson	 */
268894159Sjeffzfree_restart:
2689145686Srwatson	critical_enter();
2690145686Srwatson	cpu = curcpu;
269192654Sjeff	cache = &zone->uz_cpu[cpu];
269292654Sjeff
269392654Sjeffzfree_start:
2694251894Sjeff	/*
2695251894Sjeff	 * Try to free into the allocbucket first to give LIFO ordering
2696251894Sjeff	 * for cache-hot datastructures.  Spill over into the freebucket
2697251894Sjeff	 * if necessary.  Alloc will swap them if one runs dry.
2698251894Sjeff	 */
2699251894Sjeff	bucket = cache->uc_allocbucket;
2700251894Sjeff	if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2701251894Sjeff		bucket = cache->uc_freebucket;
2702251894Sjeff	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2703251894Sjeff		KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2704251894Sjeff		    ("uma_zfree: Freeing to non free bucket index."));
2705251894Sjeff		bucket->ub_bucket[bucket->ub_cnt] = item;
2706251894Sjeff		bucket->ub_cnt++;
2707251894Sjeff		cache->uc_frees++;
2708251894Sjeff		critical_exit();
2709251894Sjeff		return;
2710251894Sjeff	}
271192654Sjeff
271292654Sjeff	/*
2713145686Srwatson	 * We must go back the zone, which requires acquiring the zone lock,
2714145686Srwatson	 * which in turn means we must release and re-acquire the critical
2715145686Srwatson	 * section.  Since the critical section is released, we may be
2716145686Srwatson	 * preempted or migrate.  As such, make sure not to maintain any
2717145686Srwatson	 * thread-local state specific to the cache from prior to releasing
2718145686Srwatson	 * the critical section.
271992654Sjeff	 */
2720145686Srwatson	critical_exit();
2721251894Sjeff	if (zone->uz_count == 0 || bucketdisable)
2722251894Sjeff		goto zfree_item;
2723251894Sjeff
2724260302Smav	lockfail = 0;
2725260302Smav	if (ZONE_TRYLOCK(zone) == 0) {
2726260302Smav		/* Record contention to size the buckets. */
2727260302Smav		ZONE_LOCK(zone);
2728260302Smav		lockfail = 1;
2729260302Smav	}
2730145686Srwatson	critical_enter();
2731145686Srwatson	cpu = curcpu;
2732145686Srwatson	cache = &zone->uz_cpu[cpu];
273392654Sjeff
2734251894Sjeff	/*
2735251894Sjeff	 * Since we have locked the zone we may as well send back our stats.
2736251894Sjeff	 */
2737251826Sjeff	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2738251826Sjeff	atomic_add_long(&zone->uz_frees, cache->uc_frees);
2739148194Srwatson	cache->uc_allocs = 0;
2740148194Srwatson	cache->uc_frees = 0;
2741148194Srwatson
274294159Sjeff	bucket = cache->uc_freebucket;
2743251894Sjeff	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2744251894Sjeff		ZONE_UNLOCK(zone);
2745251894Sjeff		goto zfree_start;
2746251894Sjeff	}
274794159Sjeff	cache->uc_freebucket = NULL;
274892654Sjeff
274994159Sjeff	/* Can we throw this on the zone full list? */
275094159Sjeff	if (bucket != NULL) {
275192654Sjeff#ifdef UMA_DEBUG_ALLOC
275294159Sjeff		printf("uma_zfree: Putting old bucket on the free list.\n");
275392654Sjeff#endif
2754120218Sjeff		/* ub_cnt is pointing to the last free item */
2755120218Sjeff		KASSERT(bucket->ub_cnt != 0,
275694159Sjeff		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2757251894Sjeff		LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
275894159Sjeff	}
2759251894Sjeff
2760145686Srwatson	/* We are no longer associated with this CPU. */
2761145686Srwatson	critical_exit();
276294159Sjeff
2763260302Smav	/*
2764260302Smav	 * We bump the uz count when the cache size is insufficient to
2765260302Smav	 * handle the working set.
2766260302Smav	 */
2767260302Smav	if (lockfail && zone->uz_count < BUCKET_MAX)
2768260302Smav		zone->uz_count++;
276994159Sjeff	ZONE_UNLOCK(zone);
277094159Sjeff
277192654Sjeff#ifdef UMA_DEBUG_ALLOC
277294159Sjeff	printf("uma_zfree: Allocating new free bucket.\n");
277392654Sjeff#endif
2774252226Sjeff	bucket = bucket_alloc(zone, udata, M_NOWAIT);
277594159Sjeff	if (bucket) {
2776251894Sjeff		critical_enter();
2777251894Sjeff		cpu = curcpu;
2778251894Sjeff		cache = &zone->uz_cpu[cpu];
2779251894Sjeff		if (cache->uc_freebucket == NULL) {
2780251894Sjeff			cache->uc_freebucket = bucket;
2781251894Sjeff			goto zfree_start;
2782251894Sjeff		}
2783251894Sjeff		/*
2784251894Sjeff		 * We lost the race, start over.  We have to drop our
2785251894Sjeff		 * critical section to free the bucket.
2786251894Sjeff		 */
2787251894Sjeff		critical_exit();
2788252226Sjeff		bucket_free(zone, bucket, udata);
278994159Sjeff		goto zfree_restart;
279094159Sjeff	}
279192654Sjeff
279294159Sjeff	/*
279394159Sjeff	 * If nothing else caught this, we'll just do an internal free.
279494159Sjeff	 */
2795251894Sjeffzfree_item:
2796251826Sjeff	zone_free_item(zone, item, udata, SKIP_DTOR);
279794631Sjeff
279892654Sjeff	return;
279992654Sjeff}
280092654Sjeff
280192654Sjeffstatic void
2802251826Sjeffslab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
280392654Sjeff{
2804249313Sglebius	uint8_t freei;
280592654Sjeff
2806251826Sjeff	mtx_assert(&keg->uk_lock, MA_OWNED);
2807187681Sjeff	MPASS(keg == slab->us_keg);
280892654Sjeff
280992654Sjeff	/* Do we need to remove from any lists? */
2810129906Sbmilekic	if (slab->us_freecount+1 == keg->uk_ipers) {
2811116131Sphk		LIST_REMOVE(slab, us_link);
2812129906Sbmilekic		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
281392654Sjeff	} else if (slab->us_freecount == 0) {
281492654Sjeff		LIST_REMOVE(slab, us_link);
2815129906Sbmilekic		LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
281692654Sjeff	}
281792654Sjeff
2818251709Sjeff	/* Slab management. */
2819251709Sjeff	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2820251709Sjeff	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
282192654Sjeff	slab->us_freecount++;
282292654Sjeff
2823251709Sjeff	/* Keg statistics. */
2824129906Sbmilekic	keg->uk_free++;
2825251826Sjeff}
282692654Sjeff
2827251826Sjeffstatic void
2828251826Sjeffzone_release(uma_zone_t zone, void **bucket, int cnt)
2829251826Sjeff{
2830251826Sjeff	void *item;
2831251826Sjeff	uma_slab_t slab;
2832251826Sjeff	uma_keg_t keg;
2833251826Sjeff	uint8_t *mem;
2834251826Sjeff	int clearfull;
2835251826Sjeff	int i;
2836251826Sjeff
2837187681Sjeff	clearfull = 0;
2838251826Sjeff	keg = zone_first_keg(zone);
2839252040Sjeff	KEG_LOCK(keg);
2840251826Sjeff	for (i = 0; i < cnt; i++) {
2841251826Sjeff		item = bucket[i];
2842251826Sjeff		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2843251826Sjeff			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2844251826Sjeff			if (zone->uz_flags & UMA_ZONE_HASH) {
2845251826Sjeff				slab = hash_sfind(&keg->uk_hash, mem);
2846251826Sjeff			} else {
2847251826Sjeff				mem += keg->uk_pgoff;
2848251826Sjeff				slab = (uma_slab_t)mem;
2849251826Sjeff			}
2850251826Sjeff		} else {
2851251826Sjeff			slab = vtoslab((vm_offset_t)item);
2852251826Sjeff			if (slab->us_keg != keg) {
2853251826Sjeff				KEG_UNLOCK(keg);
2854251826Sjeff				keg = slab->us_keg;
2855251826Sjeff				KEG_LOCK(keg);
2856251826Sjeff			}
2857187681Sjeff		}
2858251826Sjeff		slab_free_item(keg, slab, item);
2859251826Sjeff		if (keg->uk_flags & UMA_ZFLAG_FULL) {
2860251826Sjeff			if (keg->uk_pages < keg->uk_maxpages) {
2861251826Sjeff				keg->uk_flags &= ~UMA_ZFLAG_FULL;
2862251826Sjeff				clearfull = 1;
2863251826Sjeff			}
286494631Sjeff
2865251826Sjeff			/*
2866251826Sjeff			 * We can handle one more allocation. Since we're
2867251826Sjeff			 * clearing ZFLAG_FULL, wake up all procs blocked
2868251826Sjeff			 * on pages. This should be uncommon, so keeping this
2869251826Sjeff			 * simple for now (rather than adding count of blocked
2870251826Sjeff			 * threads etc).
2871251826Sjeff			 */
2872251826Sjeff			wakeup(keg);
2873251826Sjeff		}
287494631Sjeff	}
2875252040Sjeff	KEG_UNLOCK(keg);
2876187681Sjeff	if (clearfull) {
2877252040Sjeff		ZONE_LOCK(zone);
2878187681Sjeff		zone->uz_flags &= ~UMA_ZFLAG_FULL;
2879187681Sjeff		wakeup(zone);
2880252040Sjeff		ZONE_UNLOCK(zone);
2881251826Sjeff	}
2882251709Sjeff
288392654Sjeff}
288492654Sjeff
2885251826Sjeff/*
2886251826Sjeff * Frees a single item to any zone.
2887251826Sjeff *
2888251826Sjeff * Arguments:
2889251826Sjeff *	zone   The zone to free to
2890251826Sjeff *	item   The item we're freeing
2891251826Sjeff *	udata  User supplied data for the dtor
2892251826Sjeff *	skip   Skip dtors and finis
2893251826Sjeff */
2894251826Sjeffstatic void
2895251826Sjeffzone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2896251826Sjeff{
2897251826Sjeff
2898251826Sjeff#ifdef INVARIANTS
2899251826Sjeff	if (skip == SKIP_NONE) {
2900251826Sjeff		if (zone->uz_flags & UMA_ZONE_MALLOC)
2901251826Sjeff			uma_dbg_free(zone, udata, item);
2902251826Sjeff		else
2903251826Sjeff			uma_dbg_free(zone, NULL, item);
2904251826Sjeff	}
2905251826Sjeff#endif
2906251826Sjeff	if (skip < SKIP_DTOR && zone->uz_dtor)
2907251826Sjeff		zone->uz_dtor(item, zone->uz_size, udata);
2908251826Sjeff
2909251826Sjeff	if (skip < SKIP_FINI && zone->uz_fini)
2910251826Sjeff		zone->uz_fini(item, zone->uz_size);
2911251826Sjeff
2912251826Sjeff	atomic_add_long(&zone->uz_frees, 1);
2913251826Sjeff	zone->uz_release(zone->uz_arg, &item, 1);
2914251826Sjeff}
2915251826Sjeff
291692654Sjeff/* See uma.h */
2917213911Slstewartint
291892758Sjeffuma_zone_set_max(uma_zone_t zone, int nitems)
291992758Sjeff{
2920129906Sbmilekic	uma_keg_t keg;
2921129906Sbmilekic
2922251826Sjeff	keg = zone_first_keg(zone);
2923251826Sjeff	if (keg == NULL)
2924251826Sjeff		return (0);
2925252040Sjeff	KEG_LOCK(keg);
2926187681Sjeff	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2927129906Sbmilekic	if (keg->uk_maxpages * keg->uk_ipers < nitems)
2928187681Sjeff		keg->uk_maxpages += keg->uk_ppera;
2929213911Slstewart	nitems = keg->uk_maxpages * keg->uk_ipers;
2930252040Sjeff	KEG_UNLOCK(keg);
293195758Sjeff
2932213911Slstewart	return (nitems);
293392758Sjeff}
293492758Sjeff
293592758Sjeff/* See uma.h */
2936211396Sandreint
2937211396Sandreuma_zone_get_max(uma_zone_t zone)
2938211396Sandre{
2939211396Sandre	int nitems;
2940211396Sandre	uma_keg_t keg;
2941211396Sandre
2942251826Sjeff	keg = zone_first_keg(zone);
2943251826Sjeff	if (keg == NULL)
2944251826Sjeff		return (0);
2945252040Sjeff	KEG_LOCK(keg);
2946213910Slstewart	nitems = keg->uk_maxpages * keg->uk_ipers;
2947252040Sjeff	KEG_UNLOCK(keg);
2948211396Sandre
2949211396Sandre	return (nitems);
2950211396Sandre}
2951211396Sandre
2952211396Sandre/* See uma.h */
2953243998Spjdvoid
2954243998Spjduma_zone_set_warning(uma_zone_t zone, const char *warning)
2955243998Spjd{
2956243998Spjd
2957243998Spjd	ZONE_LOCK(zone);
2958243998Spjd	zone->uz_warning = warning;
2959243998Spjd	ZONE_UNLOCK(zone);
2960243998Spjd}
2961243998Spjd
2962243998Spjd/* See uma.h */
2963213910Slstewartint
2964213910Slstewartuma_zone_get_cur(uma_zone_t zone)
2965213910Slstewart{
2966213910Slstewart	int64_t nitems;
2967213910Slstewart	u_int i;
2968213910Slstewart
2969213910Slstewart	ZONE_LOCK(zone);
2970213910Slstewart	nitems = zone->uz_allocs - zone->uz_frees;
2971213910Slstewart	CPU_FOREACH(i) {
2972213910Slstewart		/*
2973213910Slstewart		 * See the comment in sysctl_vm_zone_stats() regarding the
2974213910Slstewart		 * safety of accessing the per-cpu caches. With the zone lock
2975213910Slstewart		 * held, it is safe, but can potentially result in stale data.
2976213910Slstewart		 */
2977213910Slstewart		nitems += zone->uz_cpu[i].uc_allocs -
2978213910Slstewart		    zone->uz_cpu[i].uc_frees;
2979213910Slstewart	}
2980213910Slstewart	ZONE_UNLOCK(zone);
2981213910Slstewart
2982213910Slstewart	return (nitems < 0 ? 0 : nitems);
2983213910Slstewart}
2984213910Slstewart
2985213910Slstewart/* See uma.h */
298692758Sjeffvoid
2987129906Sbmilekicuma_zone_set_init(uma_zone_t zone, uma_init uminit)
2988129906Sbmilekic{
2989187681Sjeff	uma_keg_t keg;
2990187681Sjeff
2991187681Sjeff	keg = zone_first_keg(zone);
2992251826Sjeff	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2993252040Sjeff	KEG_LOCK(keg);
2994187681Sjeff	KASSERT(keg->uk_pages == 0,
2995129906Sbmilekic	    ("uma_zone_set_init on non-empty keg"));
2996187681Sjeff	keg->uk_init = uminit;
2997252040Sjeff	KEG_UNLOCK(keg);
2998129906Sbmilekic}
2999129906Sbmilekic
3000129906Sbmilekic/* See uma.h */
3001129906Sbmilekicvoid
3002129906Sbmilekicuma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3003129906Sbmilekic{
3004187681Sjeff	uma_keg_t keg;
3005187681Sjeff
3006187681Sjeff	keg = zone_first_keg(zone);
3007251826Sjeff	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3008252040Sjeff	KEG_LOCK(keg);
3009187681Sjeff	KASSERT(keg->uk_pages == 0,
3010129906Sbmilekic	    ("uma_zone_set_fini on non-empty keg"));
3011187681Sjeff	keg->uk_fini = fini;
3012252040Sjeff	KEG_UNLOCK(keg);
3013129906Sbmilekic}
3014129906Sbmilekic
3015129906Sbmilekic/* See uma.h */
3016129906Sbmilekicvoid
3017129906Sbmilekicuma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3018129906Sbmilekic{
3019252040Sjeff
3020129906Sbmilekic	ZONE_LOCK(zone);
3021187681Sjeff	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3022129906Sbmilekic	    ("uma_zone_set_zinit on non-empty keg"));
3023129906Sbmilekic	zone->uz_init = zinit;
3024129906Sbmilekic	ZONE_UNLOCK(zone);
3025129906Sbmilekic}
3026129906Sbmilekic
3027129906Sbmilekic/* See uma.h */
3028129906Sbmilekicvoid
3029129906Sbmilekicuma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3030129906Sbmilekic{
3031252040Sjeff
3032129906Sbmilekic	ZONE_LOCK(zone);
3033187681Sjeff	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3034129906Sbmilekic	    ("uma_zone_set_zfini on non-empty keg"));
3035129906Sbmilekic	zone->uz_fini = zfini;
3036129906Sbmilekic	ZONE_UNLOCK(zone);
3037129906Sbmilekic}
3038129906Sbmilekic
3039129906Sbmilekic/* See uma.h */
3040132987Sgreen/* XXX uk_freef is not actually used with the zone locked */
3041129906Sbmilekicvoid
304292654Sjeffuma_zone_set_freef(uma_zone_t zone, uma_free freef)
304392654Sjeff{
3044251826Sjeff	uma_keg_t keg;
3045187681Sjeff
3046251826Sjeff	keg = zone_first_keg(zone);
3047251826Sjeff	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3048252040Sjeff	KEG_LOCK(keg);
3049251826Sjeff	keg->uk_freef = freef;
3050252040Sjeff	KEG_UNLOCK(keg);
305192654Sjeff}
305292654Sjeff
305392654Sjeff/* See uma.h */
3054132987Sgreen/* XXX uk_allocf is not actually used with the zone locked */
305592654Sjeffvoid
305692654Sjeffuma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
305792654Sjeff{
3058187681Sjeff	uma_keg_t keg;
3059187681Sjeff
3060187681Sjeff	keg = zone_first_keg(zone);
3061252040Sjeff	KEG_LOCK(keg);
3062187681Sjeff	keg->uk_allocf = allocf;
3063252040Sjeff	KEG_UNLOCK(keg);
306492654Sjeff}
306592654Sjeff
306692654Sjeff/* See uma.h */
3067252226Sjeffvoid
3068252226Sjeffuma_zone_reserve(uma_zone_t zone, int items)
3069252226Sjeff{
3070252226Sjeff	uma_keg_t keg;
3071252226Sjeff
3072252226Sjeff	keg = zone_first_keg(zone);
3073252226Sjeff	if (keg == NULL)
3074252226Sjeff		return;
3075252226Sjeff	KEG_LOCK(keg);
3076252226Sjeff	keg->uk_reserve = items;
3077252226Sjeff	KEG_UNLOCK(keg);
3078252226Sjeff
3079252226Sjeff	return;
3080252226Sjeff}
3081252226Sjeff
3082252226Sjeff/* See uma.h */
308392654Sjeffint
3084247360Sattiliouma_zone_reserve_kva(uma_zone_t zone, int count)
308592654Sjeff{
3086129906Sbmilekic	uma_keg_t keg;
3087129906Sbmilekic	vm_offset_t kva;
308892654Sjeff	int pages;
308992654Sjeff
3090187681Sjeff	keg = zone_first_keg(zone);
3091251826Sjeff	if (keg == NULL)
3092251826Sjeff		return (0);
3093129906Sbmilekic	pages = count / keg->uk_ipers;
309492654Sjeff
3095129906Sbmilekic	if (pages * keg->uk_ipers < count)
309692654Sjeff		pages++;
309794159Sjeff
3098247360Sattilio#ifdef UMA_MD_SMALL_ALLOC
3099247360Sattilio	if (keg->uk_ppera > 1) {
3100247360Sattilio#else
3101247360Sattilio	if (1) {
3102247360Sattilio#endif
3103254025Sjeff		kva = kva_alloc(pages * UMA_SLAB_SIZE);
3104247360Sattilio		if (kva == 0)
3105247360Sattilio			return (0);
3106247360Sattilio	} else
3107247360Sattilio		kva = 0;
3108252040Sjeff	KEG_LOCK(keg);
3109129906Sbmilekic	keg->uk_kva = kva;
3110247360Sattilio	keg->uk_offset = 0;
3111129906Sbmilekic	keg->uk_maxpages = pages;
3112247360Sattilio#ifdef UMA_MD_SMALL_ALLOC
3113247360Sattilio	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3114247360Sattilio#else
3115247360Sattilio	keg->uk_allocf = noobj_alloc;
3116247360Sattilio#endif
3117252226Sjeff	keg->uk_flags |= UMA_ZONE_NOFREE;
3118252040Sjeff	KEG_UNLOCK(keg);
3119252040Sjeff
312092654Sjeff	return (1);
312192654Sjeff}
312292654Sjeff
312392654Sjeff/* See uma.h */
312492654Sjeffvoid
312592654Sjeffuma_prealloc(uma_zone_t zone, int items)
312692654Sjeff{
312792654Sjeff	int slabs;
312892654Sjeff	uma_slab_t slab;
3129129906Sbmilekic	uma_keg_t keg;
313092654Sjeff
3131187681Sjeff	keg = zone_first_keg(zone);
3132251826Sjeff	if (keg == NULL)
3133251826Sjeff		return;
3134252040Sjeff	KEG_LOCK(keg);
3135129906Sbmilekic	slabs = items / keg->uk_ipers;
3136129906Sbmilekic	if (slabs * keg->uk_ipers < items)
313792654Sjeff		slabs++;
313892654Sjeff	while (slabs > 0) {
3139187681Sjeff		slab = keg_alloc_slab(keg, zone, M_WAITOK);
3140187681Sjeff		if (slab == NULL)
3141187681Sjeff			break;
3142187681Sjeff		MPASS(slab->us_keg == keg);
3143129906Sbmilekic		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
314492654Sjeff		slabs--;
314592654Sjeff	}
3146252040Sjeff	KEG_UNLOCK(keg);
314792654Sjeff}
314892654Sjeff
314992654Sjeff/* See uma.h */
3150249313Sglebiusuint32_t *
3151129906Sbmilekicuma_find_refcnt(uma_zone_t zone, void *item)
3152129906Sbmilekic{
3153136276Sgreen	uma_slabrefcnt_t slabref;
3154251709Sjeff	uma_slab_t slab;
3155129906Sbmilekic	uma_keg_t keg;
3156249313Sglebius	uint32_t *refcnt;
3157129906Sbmilekic	int idx;
3158129906Sbmilekic
3159251709Sjeff	slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
3160251709Sjeff	slabref = (uma_slabrefcnt_t)slab;
3161251709Sjeff	keg = slab->us_keg;
3162251709Sjeff	KASSERT(keg->uk_flags & UMA_ZONE_REFCNT,
3163129906Sbmilekic	    ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3164251709Sjeff	idx = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3165251709Sjeff	refcnt = &slabref->us_refcnt[idx];
3166129906Sbmilekic	return refcnt;
3167129906Sbmilekic}
3168129906Sbmilekic
3169129906Sbmilekic/* See uma.h */
317092654Sjeffvoid
317192654Sjeffuma_reclaim(void)
317292654Sjeff{
317392654Sjeff#ifdef UMA_DEBUG
317492654Sjeff	printf("UMA: vm asked us to release pages!\n");
317592654Sjeff#endif
317694165Sjeff	bucket_enable();
317792654Sjeff	zone_foreach(zone_drain);
3178260303Smav	if (vm_page_count_min()) {
3179260303Smav		cache_drain_safe(NULL);
3180260303Smav		zone_foreach(zone_drain);
3181260303Smav	}
318292654Sjeff	/*
318392654Sjeff	 * Some slabs may have been freed but this zone will be visited early
318492654Sjeff	 * we visit again so that we can free pages that are empty once other
318592654Sjeff	 * zones are drained.  We have to do the same for buckets.
318692654Sjeff	 */
3187120262Sjeff	zone_drain(slabzone);
3188129906Sbmilekic	zone_drain(slabrefzone);
3189120218Sjeff	bucket_zone_drain();
319092654Sjeff}
319192654Sjeff
3192165809Sjhb/* See uma.h */
3193165809Sjhbint
3194165809Sjhbuma_zone_exhausted(uma_zone_t zone)
3195165809Sjhb{
3196165809Sjhb	int full;
3197165809Sjhb
3198165809Sjhb	ZONE_LOCK(zone);
3199187681Sjeff	full = (zone->uz_flags & UMA_ZFLAG_FULL);
3200165809Sjhb	ZONE_UNLOCK(zone);
3201165809Sjhb	return (full);
3202165809Sjhb}
3203165809Sjhb
3204166213Smohansint
3205166213Smohansuma_zone_exhausted_nolock(uma_zone_t zone)
3206166213Smohans{
3207187681Sjeff	return (zone->uz_flags & UMA_ZFLAG_FULL);
3208166213Smohans}
3209166213Smohans
321092654Sjeffvoid *
321192654Sjeffuma_large_malloc(int size, int wait)
321292654Sjeff{
321392654Sjeff	void *mem;
321492654Sjeff	uma_slab_t slab;
3215249313Sglebius	uint8_t flags;
321692654Sjeff
3217187681Sjeff	slab = zone_alloc_item(slabzone, NULL, wait);
321892654Sjeff	if (slab == NULL)
321992654Sjeff		return (NULL);
3220118040Salc	mem = page_alloc(NULL, size, &flags, wait);
322192654Sjeff	if (mem) {
3222103531Sjeff		vsetslab((vm_offset_t)mem, slab);
322392654Sjeff		slab->us_data = mem;
322492654Sjeff		slab->us_flags = flags | UMA_SLAB_MALLOC;
322592654Sjeff		slab->us_size = size;
322692654Sjeff	} else {
3227251826Sjeff		zone_free_item(slabzone, slab, NULL, SKIP_NONE);
322892654Sjeff	}
322992654Sjeff
323092654Sjeff	return (mem);
323192654Sjeff}
323292654Sjeff
323392654Sjeffvoid
323492654Sjeffuma_large_free(uma_slab_t slab)
323592654Sjeff{
3236254182Skib
3237126793Salc	page_free(slab->us_data, slab->us_size, slab->us_flags);
3238251826Sjeff	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
323992654Sjeff}
324092654Sjeff
3241262739Sglebiusstatic void
3242262739Sglebiusuma_zero_item(void *item, uma_zone_t zone)
3243262739Sglebius{
3244262739Sglebius
3245262739Sglebius	if (zone->uz_flags & UMA_ZONE_PCPU) {
3246262739Sglebius		for (int i = 0; i < mp_ncpus; i++)
3247262739Sglebius			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3248262739Sglebius	} else
3249262739Sglebius		bzero(item, zone->uz_size);
3250262739Sglebius}
3251262739Sglebius
325292654Sjeffvoid
325392654Sjeffuma_print_stats(void)
325492654Sjeff{
325592654Sjeff	zone_foreach(uma_print_zone);
325692654Sjeff}
325792654Sjeff
3258123057Sjeffstatic void
3259123057Sjeffslab_print(uma_slab_t slab)
3260123057Sjeff{
3261251709Sjeff	printf("slab: keg %p, data %p, freecount %d\n",
3262251709Sjeff		slab->us_keg, slab->us_data, slab->us_freecount);
3263123057Sjeff}
3264123057Sjeff
3265123057Sjeffstatic void
3266123057Sjeffcache_print(uma_cache_t cache)
3267123057Sjeff{
3268125246Sdes	printf("alloc: %p(%d), free: %p(%d)\n",
3269123057Sjeff		cache->uc_allocbucket,
3270123057Sjeff		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3271123057Sjeff		cache->uc_freebucket,
3272123057Sjeff		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3273123057Sjeff}
3274123057Sjeff
3275187681Sjeffstatic void
3276187681Sjeffuma_print_keg(uma_keg_t keg)
327792654Sjeff{
3278123057Sjeff	uma_slab_t slab;
3279123057Sjeff
3280241825Seadler	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3281187681Sjeff	    "out %d free %d limit %d\n",
3282187681Sjeff	    keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3283129906Sbmilekic	    keg->uk_ipers, keg->uk_ppera,
3284187681Sjeff	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3285187681Sjeff	    (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3286123057Sjeff	printf("Part slabs:\n");
3287129906Sbmilekic	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3288123057Sjeff		slab_print(slab);
3289123057Sjeff	printf("Free slabs:\n");
3290129906Sbmilekic	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3291123057Sjeff		slab_print(slab);
3292123057Sjeff	printf("Full slabs:\n");
3293129906Sbmilekic	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3294123057Sjeff		slab_print(slab);
3295187681Sjeff}
3296187681Sjeff
3297187681Sjeffvoid
3298187681Sjeffuma_print_zone(uma_zone_t zone)
3299187681Sjeff{
3300187681Sjeff	uma_cache_t cache;
3301187681Sjeff	uma_klink_t kl;
3302187681Sjeff	int i;
3303187681Sjeff
3304241825Seadler	printf("zone: %s(%p) size %d flags %#x\n",
3305187681Sjeff	    zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3306187681Sjeff	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3307187681Sjeff		uma_print_keg(kl->kl_keg);
3308209059Sjhb	CPU_FOREACH(i) {
3309123057Sjeff		cache = &zone->uz_cpu[i];
3310123057Sjeff		printf("CPU %d Cache:\n", i);
3311123057Sjeff		cache_print(cache);
3312123057Sjeff	}
331392654Sjeff}
331492654Sjeff
3315160460Srwatson#ifdef DDB
331692654Sjeff/*
3317147996Srwatson * Generate statistics across both the zone and its per-cpu cache's.  Return
3318147996Srwatson * desired statistics if the pointer is non-NULL for that statistic.
3319147996Srwatson *
3320147996Srwatson * Note: does not update the zone statistics, as it can't safely clear the
3321147996Srwatson * per-CPU cache statistic.
3322147996Srwatson *
3323147996Srwatson * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3324147996Srwatson * safe from off-CPU; we should modify the caches to track this information
3325147996Srwatson * directly so that we don't have to.
3326147996Srwatson */
3327147996Srwatsonstatic void
3328249313Sglebiusuma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3329249313Sglebius    uint64_t *freesp, uint64_t *sleepsp)
3330147996Srwatson{
3331147996Srwatson	uma_cache_t cache;
3332249313Sglebius	uint64_t allocs, frees, sleeps;
3333147996Srwatson	int cachefree, cpu;
3334147996Srwatson
3335209215Ssbruno	allocs = frees = sleeps = 0;
3336147996Srwatson	cachefree = 0;
3337209059Sjhb	CPU_FOREACH(cpu) {
3338147996Srwatson		cache = &z->uz_cpu[cpu];
3339147996Srwatson		if (cache->uc_allocbucket != NULL)
3340147996Srwatson			cachefree += cache->uc_allocbucket->ub_cnt;
3341147996Srwatson		if (cache->uc_freebucket != NULL)
3342147996Srwatson			cachefree += cache->uc_freebucket->ub_cnt;
3343147996Srwatson		allocs += cache->uc_allocs;
3344147996Srwatson		frees += cache->uc_frees;
3345147996Srwatson	}
3346147996Srwatson	allocs += z->uz_allocs;
3347147996Srwatson	frees += z->uz_frees;
3348209215Ssbruno	sleeps += z->uz_sleeps;
3349147996Srwatson	if (cachefreep != NULL)
3350147996Srwatson		*cachefreep = cachefree;
3351147996Srwatson	if (allocsp != NULL)
3352147996Srwatson		*allocsp = allocs;
3353147996Srwatson	if (freesp != NULL)
3354147996Srwatson		*freesp = frees;
3355209215Ssbruno	if (sleepsp != NULL)
3356209215Ssbruno		*sleepsp = sleeps;
3357147996Srwatson}
3358160460Srwatson#endif /* DDB */
3359147996Srwatson
336092654Sjeffstatic int
3361147996Srwatsonsysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3362147996Srwatson{
3363147996Srwatson	uma_keg_t kz;
3364147996Srwatson	uma_zone_t z;
3365147996Srwatson	int count;
3366147996Srwatson
3367147996Srwatson	count = 0;
3368147996Srwatson	mtx_lock(&uma_mtx);
3369147996Srwatson	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3370147996Srwatson		LIST_FOREACH(z, &kz->uk_zones, uz_link)
3371147996Srwatson			count++;
3372147996Srwatson	}
3373147996Srwatson	mtx_unlock(&uma_mtx);
3374147996Srwatson	return (sysctl_handle_int(oidp, &count, 0, req));
3375147996Srwatson}
3376147996Srwatson
3377147996Srwatsonstatic int
3378147996Srwatsonsysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3379147996Srwatson{
3380147996Srwatson	struct uma_stream_header ush;
3381147996Srwatson	struct uma_type_header uth;
3382147996Srwatson	struct uma_percpu_stat ups;
3383147996Srwatson	uma_bucket_t bucket;
3384147996Srwatson	struct sbuf sbuf;
3385147996Srwatson	uma_cache_t cache;
3386187681Sjeff	uma_klink_t kl;
3387147996Srwatson	uma_keg_t kz;
3388147996Srwatson	uma_zone_t z;
3389187681Sjeff	uma_keg_t k;
3390212750Smdf	int count, error, i;
3391147996Srwatson
3392217916Smdf	error = sysctl_wire_old_buffer(req, 0);
3393217916Smdf	if (error != 0)
3394217916Smdf		return (error);
3395212750Smdf	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3396212750Smdf
3397212750Smdf	count = 0;
3398212572Smdf	mtx_lock(&uma_mtx);
3399147996Srwatson	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3400147996Srwatson		LIST_FOREACH(z, &kz->uk_zones, uz_link)
3401147996Srwatson			count++;
3402147996Srwatson	}
3403147996Srwatson
3404147996Srwatson	/*
3405147996Srwatson	 * Insert stream header.
3406147996Srwatson	 */
3407147996Srwatson	bzero(&ush, sizeof(ush));
3408147996Srwatson	ush.ush_version = UMA_STREAM_VERSION;
3409148079Srwatson	ush.ush_maxcpus = (mp_maxid + 1);
3410147996Srwatson	ush.ush_count = count;
3411212750Smdf	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3412147996Srwatson
3413147996Srwatson	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3414147996Srwatson		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3415147996Srwatson			bzero(&uth, sizeof(uth));
3416147996Srwatson			ZONE_LOCK(z);
3417148371Srwatson			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3418147996Srwatson			uth.uth_align = kz->uk_align;
3419147996Srwatson			uth.uth_size = kz->uk_size;
3420147996Srwatson			uth.uth_rsize = kz->uk_rsize;
3421187681Sjeff			LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3422187681Sjeff				k = kl->kl_keg;
3423187681Sjeff				uth.uth_maxpages += k->uk_maxpages;
3424187681Sjeff				uth.uth_pages += k->uk_pages;
3425187681Sjeff				uth.uth_keg_free += k->uk_free;
3426187681Sjeff				uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3427187681Sjeff				    * k->uk_ipers;
3428187681Sjeff			}
3429148371Srwatson
3430148371Srwatson			/*
3431148371Srwatson			 * A zone is secondary is it is not the first entry
3432148371Srwatson			 * on the keg's zone list.
3433148371Srwatson			 */
3434187681Sjeff			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3435148371Srwatson			    (LIST_FIRST(&kz->uk_zones) != z))
3436148371Srwatson				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3437148371Srwatson
3438251894Sjeff			LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3439147996Srwatson				uth.uth_zone_free += bucket->ub_cnt;
3440147996Srwatson			uth.uth_allocs = z->uz_allocs;
3441147996Srwatson			uth.uth_frees = z->uz_frees;
3442148070Srwatson			uth.uth_fails = z->uz_fails;
3443209215Ssbruno			uth.uth_sleeps = z->uz_sleeps;
3444212750Smdf			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3445147996Srwatson			/*
3446148077Srwatson			 * While it is not normally safe to access the cache
3447148077Srwatson			 * bucket pointers while not on the CPU that owns the
3448148077Srwatson			 * cache, we only allow the pointers to be exchanged
3449148077Srwatson			 * without the zone lock held, not invalidated, so
3450148077Srwatson			 * accept the possible race associated with bucket
3451148077Srwatson			 * exchange during monitoring.
3452147996Srwatson			 */
3453148079Srwatson			for (i = 0; i < (mp_maxid + 1); i++) {
3454147996Srwatson				bzero(&ups, sizeof(ups));
3455147996Srwatson				if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3456147996Srwatson					goto skip;
3457155551Srwatson				if (CPU_ABSENT(i))
3458155551Srwatson					goto skip;
3459147996Srwatson				cache = &z->uz_cpu[i];
3460147996Srwatson				if (cache->uc_allocbucket != NULL)
3461147996Srwatson					ups.ups_cache_free +=
3462147996Srwatson					    cache->uc_allocbucket->ub_cnt;
3463147996Srwatson				if (cache->uc_freebucket != NULL)
3464147996Srwatson					ups.ups_cache_free +=
3465147996Srwatson					    cache->uc_freebucket->ub_cnt;
3466147996Srwatson				ups.ups_allocs = cache->uc_allocs;
3467147996Srwatson				ups.ups_frees = cache->uc_frees;
3468147996Srwatsonskip:
3469212750Smdf				(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3470147996Srwatson			}
3471148077Srwatson			ZONE_UNLOCK(z);
3472147996Srwatson		}
3473147996Srwatson	}
3474147996Srwatson	mtx_unlock(&uma_mtx);
3475212750Smdf	error = sbuf_finish(&sbuf);
3476212750Smdf	sbuf_delete(&sbuf);
3477147996Srwatson	return (error);
3478147996Srwatson}
3479151516Srwatson
3480262737Sglebiusint
3481262737Sglebiussysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3482262737Sglebius{
3483262737Sglebius	uma_zone_t zone = *(uma_zone_t *)arg1;
3484262737Sglebius	int error, max, old;
3485262737Sglebius
3486262737Sglebius	old = max = uma_zone_get_max(zone);
3487262737Sglebius	error = sysctl_handle_int(oidp, &max, 0, req);
3488262737Sglebius	if (error || !req->newptr)
3489262737Sglebius		return (error);
3490262737Sglebius
3491262737Sglebius	if (max < old)
3492262737Sglebius		return (EINVAL);
3493262737Sglebius
3494262737Sglebius	uma_zone_set_max(zone, max);
3495262737Sglebius
3496262737Sglebius	return (0);
3497262737Sglebius}
3498262737Sglebius
3499262737Sglebiusint
3500262737Sglebiussysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3501262737Sglebius{
3502262737Sglebius	uma_zone_t zone = *(uma_zone_t *)arg1;
3503262737Sglebius	int cur;
3504262737Sglebius
3505262737Sglebius	cur = uma_zone_get_cur(zone);
3506262737Sglebius	return (sysctl_handle_int(oidp, &cur, 0, req));
3507262737Sglebius}
3508262737Sglebius
3509151516Srwatson#ifdef DDB
3510151516SrwatsonDB_SHOW_COMMAND(uma, db_show_uma)
3511151516Srwatson{
3512249313Sglebius	uint64_t allocs, frees, sleeps;
3513151516Srwatson	uma_bucket_t bucket;
3514151516Srwatson	uma_keg_t kz;
3515151516Srwatson	uma_zone_t z;
3516151516Srwatson	int cachefree;
3517151516Srwatson
3518260306Smav	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3519260306Smav	    "Free", "Requests", "Sleeps", "Bucket");
3520151516Srwatson	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3521151516Srwatson		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3522151516Srwatson			if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3523151516Srwatson				allocs = z->uz_allocs;
3524151516Srwatson				frees = z->uz_frees;
3525209215Ssbruno				sleeps = z->uz_sleeps;
3526151516Srwatson				cachefree = 0;
3527151516Srwatson			} else
3528151516Srwatson				uma_zone_sumstat(z, &cachefree, &allocs,
3529209215Ssbruno				    &frees, &sleeps);
3530187681Sjeff			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3531151516Srwatson			    (LIST_FIRST(&kz->uk_zones) != z)))
3532151516Srwatson				cachefree += kz->uk_free;
3533251894Sjeff			LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3534151516Srwatson				cachefree += bucket->ub_cnt;
3535260306Smav			db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3536260306Smav			    z->uz_name, (uintmax_t)kz->uk_size,
3537163702Srwatson			    (intmax_t)(allocs - frees), cachefree,
3538260306Smav			    (uintmax_t)allocs, sleeps, z->uz_count);
3539238000Sjhb			if (db_pager_quit)
3540238000Sjhb				return;
3541151516Srwatson		}
3542151516Srwatson	}
3543151516Srwatson}
3544260306Smav
3545260306SmavDB_SHOW_COMMAND(umacache, db_show_umacache)
3546260306Smav{
3547260306Smav	uint64_t allocs, frees;
3548260306Smav	uma_bucket_t bucket;
3549260306Smav	uma_zone_t z;
3550260306Smav	int cachefree;
3551260306Smav
3552260306Smav	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3553260306Smav	    "Requests", "Bucket");
3554260306Smav	LIST_FOREACH(z, &uma_cachezones, uz_link) {
3555260306Smav		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3556260306Smav		LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3557260306Smav			cachefree += bucket->ub_cnt;
3558260306Smav		db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3559260306Smav		    z->uz_name, (uintmax_t)z->uz_size,
3560260306Smav		    (intmax_t)(allocs - frees), cachefree,
3561260306Smav		    (uintmax_t)allocs, z->uz_count);
3562260306Smav		if (db_pager_quit)
3563260306Smav			return;
3564260306Smav	}
3565260306Smav}
3566151516Srwatson#endif
3567