uma_core.c revision 262739
1285809Sscottl/*-
2285809Sscottl * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
3285809Sscottl * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
4285809Sscottl * Copyright (c) 2004-2006 Robert N. M. Watson
5285809Sscottl * All rights reserved.
6285809Sscottl *
7285809Sscottl * Redistribution and use in source and binary forms, with or without
8285809Sscottl * modification, are permitted provided that the following conditions
9285809Sscottl * are met:
10285809Sscottl * 1. Redistributions of source code must retain the above copyright
11285809Sscottl *    notice unmodified, this list of conditions, and the following
12285809Sscottl *    disclaimer.
13285809Sscottl * 2. Redistributions in binary form must reproduce the above copyright
14285809Sscottl *    notice, this list of conditions and the following disclaimer in the
15285809Sscottl *    documentation and/or other materials provided with the distribution.
16285809Sscottl *
17285809Sscottl * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18285809Sscottl * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19285809Sscottl * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20285809Sscottl * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21285809Sscottl * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22285809Sscottl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23285809Sscottl * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24285809Sscottl * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25285809Sscottl * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26285809Sscottl * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27285809Sscottl */
28285809Sscottl
29285809Sscottl/*
30285809Sscottl * uma_core.c  Implementation of the Universal Memory allocator
31285809Sscottl *
32285809Sscottl * This allocator is intended to replace the multitude of similar object caches
33285809Sscottl * in the standard FreeBSD kernel.  The intent is to be flexible as well as
34285809Sscottl * effecient.  A primary design goal is to return unused memory to the rest of
35285809Sscottl * the system.  This will make the system as a whole more flexible due to the
36285809Sscottl * ability to move memory to subsystems which most need it instead of leaving
37285809Sscottl * pools of reserved memory unused.
38285809Sscottl *
39285809Sscottl * The basic ideas stem from similar slab/zone based allocators whose algorithms
40285809Sscottl * are well known.
41285809Sscottl *
42285809Sscottl */
43285809Sscottl
44285809Sscottl/*
45285809Sscottl * TODO:
46285809Sscottl *	- Improve memory usage for large allocations
47285809Sscottl *	- Investigate cache size adjustments
48285809Sscottl */
49285809Sscottl
50285809Sscottl#include <sys/cdefs.h>
51285809Sscottl__FBSDID("$FreeBSD: stable/10/sys/vm/uma_core.c 262739 2014-03-04 14:46:30Z glebius $");
52285809Sscottl
53285809Sscottl/* I should really use ktr.. */
54285809Sscottl/*
55285809Sscottl#define UMA_DEBUG 1
56285809Sscottl#define UMA_DEBUG_ALLOC 1
57285809Sscottl#define UMA_DEBUG_ALLOC_1 1
58285809Sscottl*/
59285809Sscottl
60285809Sscottl#include "opt_ddb.h"
61285809Sscottl#include "opt_param.h"
62285809Sscottl#include "opt_vm.h"
63285809Sscottl
64285809Sscottl#include <sys/param.h>
65285809Sscottl#include <sys/systm.h>
66285809Sscottl#include <sys/bitset.h>
67285809Sscottl#include <sys/kernel.h>
68285809Sscottl#include <sys/types.h>
69285809Sscottl#include <sys/queue.h>
70285809Sscottl#include <sys/malloc.h>
71285809Sscottl#include <sys/ktr.h>
72285809Sscottl#include <sys/lock.h>
73285809Sscottl#include <sys/sysctl.h>
74285809Sscottl#include <sys/mutex.h>
75285809Sscottl#include <sys/proc.h>
76285809Sscottl#include <sys/rwlock.h>
77285809Sscottl#include <sys/sbuf.h>
78285809Sscottl#include <sys/sched.h>
79285809Sscottl#include <sys/smp.h>
80285809Sscottl#include <sys/vmmeter.h>
81285809Sscottl
82285809Sscottl#include <vm/vm.h>
83285809Sscottl#include <vm/vm_object.h>
84285809Sscottl#include <vm/vm_page.h>
85285809Sscottl#include <vm/vm_pageout.h>
86285809Sscottl#include <vm/vm_param.h>
87285809Sscottl#include <vm/vm_map.h>
88285809Sscottl#include <vm/vm_kern.h>
89285809Sscottl#include <vm/vm_extern.h>
90285809Sscottl#include <vm/uma.h>
91285809Sscottl#include <vm/uma_int.h>
92285809Sscottl#include <vm/uma_dbg.h>
93285809Sscottl
94285809Sscottl#include <ddb/ddb.h>
95285809Sscottl
96285809Sscottl#ifdef DEBUG_MEMGUARD
97285809Sscottl#include <vm/memguard.h>
98285809Sscottl#endif
99285809Sscottl
100285809Sscottl/*
101285809Sscottl * This is the zone and keg from which all zones are spawned.  The idea is that
102285809Sscottl * even the zone & keg heads are allocated from the allocator, so we use the
103285809Sscottl * bss section to bootstrap us.
104285809Sscottl */
105285809Sscottlstatic struct uma_keg masterkeg;
106285809Sscottlstatic struct uma_zone masterzone_k;
107285809Sscottlstatic struct uma_zone masterzone_z;
108285809Sscottlstatic uma_zone_t kegs = &masterzone_k;
109285809Sscottlstatic uma_zone_t zones = &masterzone_z;
110285809Sscottl
111285809Sscottl/* This is the zone from which all of uma_slab_t's are allocated. */
112285809Sscottlstatic uma_zone_t slabzone;
113285809Sscottlstatic uma_zone_t slabrefzone;	/* With refcounters (for UMA_ZONE_REFCNT) */
114285809Sscottl
115285809Sscottl/*
116285809Sscottl * The initial hash tables come out of this zone so they can be allocated
117285809Sscottl * prior to malloc coming up.
118285809Sscottl */
119285809Sscottlstatic uma_zone_t hashzone;
120285809Sscottl
121285809Sscottl/* The boot-time adjusted value for cache line alignment. */
122285809Sscottlint uma_align_cache = 64 - 1;
123285809Sscottl
124285809Sscottlstatic MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
125285809Sscottl
126285809Sscottl/*
127285809Sscottl * Are we allowed to allocate buckets?
128285809Sscottl */
129285809Sscottlstatic int bucketdisable = 1;
130285809Sscottl
131285809Sscottl/* Linked list of all kegs in the system */
132285809Sscottlstatic LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
133285809Sscottl
134285809Sscottl/* Linked list of all cache-only zones in the system */
135285809Sscottlstatic LIST_HEAD(,uma_zone) uma_cachezones =
136285809Sscottl    LIST_HEAD_INITIALIZER(uma_cachezones);
137285809Sscottl
138285809Sscottl/* This mutex protects the keg list */
139285809Sscottlstatic struct mtx_padalign uma_mtx;
140285809Sscottl
141285809Sscottl/* Linked list of boot time pages */
142285809Sscottlstatic LIST_HEAD(,uma_slab) uma_boot_pages =
143285809Sscottl    LIST_HEAD_INITIALIZER(uma_boot_pages);
144285809Sscottl
145285809Sscottl/* This mutex protects the boot time pages list */
146285809Sscottlstatic struct mtx_padalign uma_boot_pages_mtx;
147285809Sscottl
148285809Sscottl/* Is the VM done starting up? */
149285809Sscottlstatic int booted = 0;
150285809Sscottl#define	UMA_STARTUP	1
151285809Sscottl#define	UMA_STARTUP2	2
152285809Sscottl
153285809Sscottl/*
154285809Sscottl * Only mbuf clusters use ref zones.  Just provide enough references
155285809Sscottl * to support the one user.  New code should not use the ref facility.
156285809Sscottl */
157285809Sscottlstatic const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES;
158285809Sscottl
159285809Sscottl/*
160285809Sscottl * This is the handle used to schedule events that need to happen
161285809Sscottl * outside of the allocation fast path.
162285809Sscottl */
163285809Sscottlstatic struct callout uma_callout;
164285809Sscottl#define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
165285809Sscottl
166285809Sscottl/*
167285809Sscottl * This structure is passed as the zone ctor arg so that I don't have to create
168285809Sscottl * a special allocation function just for zones.
169285809Sscottl */
170285809Sscottlstruct uma_zctor_args {
171285809Sscottl	const char *name;
172285809Sscottl	size_t size;
173285809Sscottl	uma_ctor ctor;
174285809Sscottl	uma_dtor dtor;
175285809Sscottl	uma_init uminit;
176285809Sscottl	uma_fini fini;
177285809Sscottl	uma_import import;
178285809Sscottl	uma_release release;
179285809Sscottl	void *arg;
180285809Sscottl	uma_keg_t keg;
181285809Sscottl	int align;
182285809Sscottl	uint32_t flags;
183285809Sscottl};
184285809Sscottl
185285809Sscottlstruct uma_kctor_args {
186285809Sscottl	uma_zone_t zone;
187285809Sscottl	size_t size;
188285809Sscottl	uma_init uminit;
189285809Sscottl	uma_fini fini;
190285809Sscottl	int align;
191285809Sscottl	uint32_t flags;
192285809Sscottl};
193285809Sscottl
194285809Sscottlstruct uma_bucket_zone {
195285809Sscottl	uma_zone_t	ubz_zone;
196285809Sscottl	char		*ubz_name;
197285809Sscottl	int		ubz_entries;	/* Number of items it can hold. */
198285809Sscottl	int		ubz_maxsize;	/* Maximum allocation size per-item. */
199285809Sscottl};
200285809Sscottl
201285809Sscottl/*
202285809Sscottl * Compute the actual number of bucket entries to pack them in power
203285809Sscottl * of two sizes for more efficient space utilization.
204285809Sscottl */
205285809Sscottl#define	BUCKET_SIZE(n)						\
206285809Sscottl    (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
207285809Sscottl
208285809Sscottl#define	BUCKET_MAX	BUCKET_SIZE(128)
209285809Sscottl
210285809Sscottlstruct uma_bucket_zone bucket_zones[] = {
211285809Sscottl	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
212285809Sscottl	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
213285809Sscottl	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
214285809Sscottl	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
215285809Sscottl	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
216285809Sscottl	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
217285809Sscottl	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
218285809Sscottl	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
219285809Sscottl	{ NULL, NULL, 0}
220285809Sscottl};
221285809Sscottl
222285809Sscottl/*
223285809Sscottl * Flags and enumerations to be passed to internal functions.
224285809Sscottl */
225285809Sscottlenum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
226285809Sscottl
227285809Sscottl/* Prototypes.. */
228285809Sscottl
229285809Sscottlstatic void *noobj_alloc(uma_zone_t, int, uint8_t *, int);
230285809Sscottlstatic void *page_alloc(uma_zone_t, int, uint8_t *, int);
231285809Sscottlstatic void *startup_alloc(uma_zone_t, int, uint8_t *, int);
232285809Sscottlstatic void page_free(void *, int, uint8_t);
233285809Sscottlstatic uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
234285809Sscottlstatic void cache_drain(uma_zone_t);
235285809Sscottlstatic void bucket_drain(uma_zone_t, uma_bucket_t);
236285809Sscottlstatic void bucket_cache_drain(uma_zone_t zone);
237285809Sscottlstatic int keg_ctor(void *, int, void *, int);
238285809Sscottlstatic void keg_dtor(void *, int, void *);
239285809Sscottlstatic int zone_ctor(void *, int, void *, int);
240285809Sscottlstatic void zone_dtor(void *, int, void *);
241285809Sscottlstatic int zero_init(void *, int, int);
242285809Sscottlstatic void keg_small_init(uma_keg_t keg);
243285809Sscottlstatic void keg_large_init(uma_keg_t keg);
244285809Sscottlstatic void zone_foreach(void (*zfunc)(uma_zone_t));
245285809Sscottlstatic void zone_timeout(uma_zone_t zone);
246285809Sscottlstatic int hash_alloc(struct uma_hash *);
247285809Sscottlstatic int hash_expand(struct uma_hash *, struct uma_hash *);
248285809Sscottlstatic void hash_free(struct uma_hash *hash);
249285809Sscottlstatic void uma_timeout(void *);
250285809Sscottlstatic void uma_startup3(void);
251285809Sscottlstatic void *zone_alloc_item(uma_zone_t, void *, int);
252285809Sscottlstatic void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
253285809Sscottlstatic void bucket_enable(void);
254285809Sscottlstatic void bucket_init(void);
255285809Sscottlstatic uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
256285809Sscottlstatic void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
257285809Sscottlstatic void bucket_zone_drain(void);
258285809Sscottlstatic uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
259285809Sscottlstatic uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
260285809Sscottlstatic uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
261285809Sscottlstatic void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
262285809Sscottlstatic void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
263285809Sscottlstatic uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
264285809Sscottl    uma_fini fini, int align, uint32_t flags);
265285809Sscottlstatic int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
266285809Sscottlstatic void zone_release(uma_zone_t zone, void **bucket, int cnt);
267285809Sscottlstatic void uma_zero_item(void *item, uma_zone_t zone);
268285809Sscottl
269285809Sscottlvoid uma_print_zone(uma_zone_t);
270285809Sscottlvoid uma_print_stats(void);
271285809Sscottlstatic int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
272285809Sscottlstatic int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
273285809Sscottl
274285809SscottlSYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
275285809Sscottl
276285809SscottlSYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
277285809Sscottl    0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
278285809Sscottl
279285809SscottlSYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
280285809Sscottl    0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
281285809Sscottl
282285809Sscottlstatic int zone_warnings = 1;
283285809SscottlTUNABLE_INT("vm.zone_warnings", &zone_warnings);
284285809SscottlSYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RW, &zone_warnings, 0,
285285809Sscottl    "Warn when UMA zones becomes full");
286285809Sscottl
287285809Sscottl/*
288285809Sscottl * This routine checks to see whether or not it's safe to enable buckets.
289285809Sscottl */
290285809Sscottlstatic void
291285809Sscottlbucket_enable(void)
292285809Sscottl{
293285809Sscottl	bucketdisable = vm_page_count_min();
294285809Sscottl}
295285809Sscottl
296285809Sscottl/*
297285809Sscottl * Initialize bucket_zones, the array of zones of buckets of various sizes.
298285809Sscottl *
299285809Sscottl * For each zone, calculate the memory required for each bucket, consisting
300285809Sscottl * of the header and an array of pointers.
301285809Sscottl */
302285809Sscottlstatic void
303285809Sscottlbucket_init(void)
304285809Sscottl{
305285809Sscottl	struct uma_bucket_zone *ubz;
306285809Sscottl	int size;
307285809Sscottl	int i;
308285809Sscottl
309285809Sscottl	for (i = 0, ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
310285809Sscottl		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
311285809Sscottl		size += sizeof(void *) * ubz->ubz_entries;
312285809Sscottl		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
313285809Sscottl		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
314285809Sscottl		    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
315285809Sscottl	}
316285809Sscottl}
317285809Sscottl
318285809Sscottl/*
319285809Sscottl * Given a desired number of entries for a bucket, return the zone from which
320285809Sscottl * to allocate the bucket.
321285809Sscottl */
322285809Sscottlstatic struct uma_bucket_zone *
323285809Sscottlbucket_zone_lookup(int entries)
324285809Sscottl{
325285809Sscottl	struct uma_bucket_zone *ubz;
326285809Sscottl
327285809Sscottl	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
328285809Sscottl		if (ubz->ubz_entries >= entries)
329285809Sscottl			return (ubz);
330285809Sscottl	ubz--;
331285809Sscottl	return (ubz);
332285809Sscottl}
333285809Sscottl
334285809Sscottlstatic int
335285809Sscottlbucket_select(int size)
336285809Sscottl{
337285809Sscottl	struct uma_bucket_zone *ubz;
338285809Sscottl
339285809Sscottl	ubz = &bucket_zones[0];
340285809Sscottl	if (size > ubz->ubz_maxsize)
341285809Sscottl		return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
342285809Sscottl
343285809Sscottl	for (; ubz->ubz_entries != 0; ubz++)
344285809Sscottl		if (ubz->ubz_maxsize < size)
345285809Sscottl			break;
346285809Sscottl	ubz--;
347285809Sscottl	return (ubz->ubz_entries);
348285809Sscottl}
349285809Sscottl
350285809Sscottlstatic uma_bucket_t
351285809Sscottlbucket_alloc(uma_zone_t zone, void *udata, int flags)
352285809Sscottl{
353285809Sscottl	struct uma_bucket_zone *ubz;
354285809Sscottl	uma_bucket_t bucket;
355285809Sscottl
356285809Sscottl	/*
357285809Sscottl	 * This is to stop us from allocating per cpu buckets while we're
358285809Sscottl	 * running out of vm.boot_pages.  Otherwise, we would exhaust the
359285809Sscottl	 * boot pages.  This also prevents us from allocating buckets in
360285809Sscottl	 * low memory situations.
361285809Sscottl	 */
362285809Sscottl	if (bucketdisable)
363285809Sscottl		return (NULL);
364285809Sscottl	/*
365285809Sscottl	 * To limit bucket recursion we store the original zone flags
366285809Sscottl	 * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
367285809Sscottl	 * NOVM flag to persist even through deep recursions.  We also
368285809Sscottl	 * store ZFLAG_BUCKET once we have recursed attempting to allocate
369285809Sscottl	 * a bucket for a bucket zone so we do not allow infinite bucket
370285809Sscottl	 * recursion.  This cookie will even persist to frees of unused
371285809Sscottl	 * buckets via the allocation path or bucket allocations in the
372285809Sscottl	 * free path.
373285809Sscottl	 */
374285809Sscottl	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
375285809Sscottl		udata = (void *)(uintptr_t)zone->uz_flags;
376285809Sscottl	else {
377285809Sscottl		if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
378285809Sscottl			return (NULL);
379285809Sscottl		udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
380285809Sscottl	}
381285809Sscottl	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
382285809Sscottl		flags |= M_NOVM;
383285809Sscottl	ubz = bucket_zone_lookup(zone->uz_count);
384285809Sscottl	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
385285809Sscottl	if (bucket) {
386285809Sscottl#ifdef INVARIANTS
387285809Sscottl		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
388285809Sscottl#endif
389285809Sscottl		bucket->ub_cnt = 0;
390285809Sscottl		bucket->ub_entries = ubz->ubz_entries;
391285809Sscottl	}
392285809Sscottl
393285809Sscottl	return (bucket);
394285809Sscottl}
395285809Sscottl
396285809Sscottlstatic void
397285809Sscottlbucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
398285809Sscottl{
399285809Sscottl	struct uma_bucket_zone *ubz;
400285809Sscottl
401285809Sscottl	KASSERT(bucket->ub_cnt == 0,
402285809Sscottl	    ("bucket_free: Freeing a non free bucket."));
403285809Sscottl	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
404285809Sscottl		udata = (void *)(uintptr_t)zone->uz_flags;
405285809Sscottl	ubz = bucket_zone_lookup(bucket->ub_entries);
406285809Sscottl	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
407285809Sscottl}
408285809Sscottl
409285809Sscottlstatic void
410285809Sscottlbucket_zone_drain(void)
411285809Sscottl{
412285809Sscottl	struct uma_bucket_zone *ubz;
413285809Sscottl
414285809Sscottl	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
415285809Sscottl		zone_drain(ubz->ubz_zone);
416285809Sscottl}
417285809Sscottl
418285809Sscottlstatic void
419285809Sscottlzone_log_warning(uma_zone_t zone)
420285809Sscottl{
421285809Sscottl	static const struct timeval warninterval = { 300, 0 };
422285809Sscottl
423285809Sscottl	if (!zone_warnings || zone->uz_warning == NULL)
424285809Sscottl		return;
425285809Sscottl
426285809Sscottl	if (ratecheck(&zone->uz_ratecheck, &warninterval))
427285809Sscottl		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
428285809Sscottl}
429285809Sscottl
430285809Sscottlstatic void
431285809Sscottlzone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
432285809Sscottl{
433285809Sscottl	uma_klink_t klink;
434285809Sscottl
435285809Sscottl	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
436285809Sscottl		kegfn(klink->kl_keg);
437285809Sscottl}
438285809Sscottl
439285809Sscottl/*
440285809Sscottl * Routine called by timeout which is used to fire off some time interval
441285809Sscottl * based calculations.  (stats, hash size, etc.)
442285809Sscottl *
443285809Sscottl * Arguments:
444285809Sscottl *	arg   Unused
445285809Sscottl *
446285809Sscottl * Returns:
447285809Sscottl *	Nothing
448285809Sscottl */
449285809Sscottlstatic void
450285809Sscottluma_timeout(void *unused)
451285809Sscottl{
452285809Sscottl	bucket_enable();
453285809Sscottl	zone_foreach(zone_timeout);
454285809Sscottl
455285809Sscottl	/* Reschedule this event */
456285809Sscottl	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
457285809Sscottl}
458285809Sscottl
459285809Sscottl/*
460285809Sscottl * Routine to perform timeout driven calculations.  This expands the
461285809Sscottl * hashes and does per cpu statistics aggregation.
462285809Sscottl *
463285809Sscottl *  Returns nothing.
464285809Sscottl */
465285809Sscottlstatic void
466285809Sscottlkeg_timeout(uma_keg_t keg)
467285809Sscottl{
468285809Sscottl
469285809Sscottl	KEG_LOCK(keg);
470285809Sscottl	/*
471285809Sscottl	 * Expand the keg hash table.
472285809Sscottl	 *
473285809Sscottl	 * This is done if the number of slabs is larger than the hash size.
474285809Sscottl	 * What I'm trying to do here is completely reduce collisions.  This
475285809Sscottl	 * may be a little aggressive.  Should I allow for two collisions max?
476285809Sscottl	 */
477285809Sscottl	if (keg->uk_flags & UMA_ZONE_HASH &&
478285809Sscottl	    keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
479285809Sscottl		struct uma_hash newhash;
480285809Sscottl		struct uma_hash oldhash;
481285809Sscottl		int ret;
482285809Sscottl
483285809Sscottl		/*
484285809Sscottl		 * This is so involved because allocating and freeing
485285809Sscottl		 * while the keg lock is held will lead to deadlock.
486285809Sscottl		 * I have to do everything in stages and check for
487285809Sscottl		 * races.
488285809Sscottl		 */
489285809Sscottl		newhash = keg->uk_hash;
490285809Sscottl		KEG_UNLOCK(keg);
491285809Sscottl		ret = hash_alloc(&newhash);
492285809Sscottl		KEG_LOCK(keg);
493285809Sscottl		if (ret) {
494285809Sscottl			if (hash_expand(&keg->uk_hash, &newhash)) {
495285809Sscottl				oldhash = keg->uk_hash;
496285809Sscottl				keg->uk_hash = newhash;
497285809Sscottl			} else
498285809Sscottl				oldhash = newhash;
499285809Sscottl
500285809Sscottl			KEG_UNLOCK(keg);
501285809Sscottl			hash_free(&oldhash);
502285809Sscottl			return;
503285809Sscottl		}
504285809Sscottl	}
505285809Sscottl	KEG_UNLOCK(keg);
506285809Sscottl}
507285809Sscottl
508285809Sscottlstatic void
509285809Sscottlzone_timeout(uma_zone_t zone)
510285809Sscottl{
511285809Sscottl
512285809Sscottl	zone_foreach_keg(zone, &keg_timeout);
513285809Sscottl}
514285809Sscottl
515285809Sscottl/*
516285809Sscottl * Allocate and zero fill the next sized hash table from the appropriate
517285809Sscottl * backing store.
518285809Sscottl *
519285809Sscottl * Arguments:
520285809Sscottl *	hash  A new hash structure with the old hash size in uh_hashsize
521285809Sscottl *
522285809Sscottl * Returns:
523285809Sscottl *	1 on sucess and 0 on failure.
524285809Sscottl */
525285809Sscottlstatic int
526285809Sscottlhash_alloc(struct uma_hash *hash)
527285809Sscottl{
528285809Sscottl	int oldsize;
529285809Sscottl	int alloc;
530285809Sscottl
531285809Sscottl	oldsize = hash->uh_hashsize;
532285809Sscottl
533285809Sscottl	/* We're just going to go to a power of two greater */
534285809Sscottl	if (oldsize)  {
535285809Sscottl		hash->uh_hashsize = oldsize * 2;
536285809Sscottl		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
537285809Sscottl		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
538285809Sscottl		    M_UMAHASH, M_NOWAIT);
539285809Sscottl	} else {
540285809Sscottl		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
541285809Sscottl		hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
542285809Sscottl		    M_WAITOK);
543285809Sscottl		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
544285809Sscottl	}
545285809Sscottl	if (hash->uh_slab_hash) {
546285809Sscottl		bzero(hash->uh_slab_hash, alloc);
547285809Sscottl		hash->uh_hashmask = hash->uh_hashsize - 1;
548285809Sscottl		return (1);
549285809Sscottl	}
550285809Sscottl
551285809Sscottl	return (0);
552285809Sscottl}
553285809Sscottl
554285809Sscottl/*
555285809Sscottl * Expands the hash table for HASH zones.  This is done from zone_timeout
556285809Sscottl * to reduce collisions.  This must not be done in the regular allocation
557285809Sscottl * path, otherwise, we can recurse on the vm while allocating pages.
558285809Sscottl *
559285809Sscottl * Arguments:
560285809Sscottl *	oldhash  The hash you want to expand
561285809Sscottl *	newhash  The hash structure for the new table
562285809Sscottl *
563285809Sscottl * Returns:
564285809Sscottl *	Nothing
565285809Sscottl *
566285809Sscottl * Discussion:
567285809Sscottl */
568285809Sscottlstatic int
569285809Sscottlhash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
570285809Sscottl{
571285809Sscottl	uma_slab_t slab;
572285809Sscottl	int hval;
573285809Sscottl	int i;
574285809Sscottl
575285809Sscottl	if (!newhash->uh_slab_hash)
576285809Sscottl		return (0);
577285809Sscottl
578285809Sscottl	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
579285809Sscottl		return (0);
580285809Sscottl
581285809Sscottl	/*
582285809Sscottl	 * I need to investigate hash algorithms for resizing without a
583285809Sscottl	 * full rehash.
584285809Sscottl	 */
585285809Sscottl
586285809Sscottl	for (i = 0; i < oldhash->uh_hashsize; i++)
587285809Sscottl		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
588285809Sscottl			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
589285809Sscottl			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
590285809Sscottl			hval = UMA_HASH(newhash, slab->us_data);
591285809Sscottl			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
592285809Sscottl			    slab, us_hlink);
593285809Sscottl		}
594285809Sscottl
595285809Sscottl	return (1);
596285809Sscottl}
597285809Sscottl
598285809Sscottl/*
599285809Sscottl * Free the hash bucket to the appropriate backing store.
600285809Sscottl *
601285809Sscottl * Arguments:
602285809Sscottl *	slab_hash  The hash bucket we're freeing
603285809Sscottl *	hashsize   The number of entries in that hash bucket
604285809Sscottl *
605285809Sscottl * Returns:
606285809Sscottl *	Nothing
607285809Sscottl */
608285809Sscottlstatic void
609285809Sscottlhash_free(struct uma_hash *hash)
610285809Sscottl{
611285809Sscottl	if (hash->uh_slab_hash == NULL)
612285809Sscottl		return;
613285809Sscottl	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
614285809Sscottl		zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
615285809Sscottl	else
616285809Sscottl		free(hash->uh_slab_hash, M_UMAHASH);
617285809Sscottl}
618285809Sscottl
619285809Sscottl/*
620285809Sscottl * Frees all outstanding items in a bucket
621285809Sscottl *
622285809Sscottl * Arguments:
623285809Sscottl *	zone   The zone to free to, must be unlocked.
624285809Sscottl *	bucket The free/alloc bucket with items, cpu queue must be locked.
625285809Sscottl *
626285809Sscottl * Returns:
627285809Sscottl *	Nothing
628285809Sscottl */
629285809Sscottl
630285809Sscottlstatic void
631285809Sscottlbucket_drain(uma_zone_t zone, uma_bucket_t bucket)
632285809Sscottl{
633285809Sscottl	int i;
634285809Sscottl
635285809Sscottl	if (bucket == NULL)
636285809Sscottl		return;
637285809Sscottl
638285809Sscottl	if (zone->uz_fini)
639285809Sscottl		for (i = 0; i < bucket->ub_cnt; i++)
640285809Sscottl			zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
641285809Sscottl	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
642285809Sscottl	bucket->ub_cnt = 0;
643285809Sscottl}
644285809Sscottl
645285809Sscottl/*
646285809Sscottl * Drains the per cpu caches for a zone.
647285809Sscottl *
648285809Sscottl * NOTE: This may only be called while the zone is being turn down, and not
649285809Sscottl * during normal operation.  This is necessary in order that we do not have
650285809Sscottl * to migrate CPUs to drain the per-CPU caches.
651285809Sscottl *
652285809Sscottl * Arguments:
653285809Sscottl *	zone     The zone to drain, must be unlocked.
654285809Sscottl *
655285809Sscottl * Returns:
656285809Sscottl *	Nothing
657285809Sscottl */
658285809Sscottlstatic void
659285809Sscottlcache_drain(uma_zone_t zone)
660285809Sscottl{
661285809Sscottl	uma_cache_t cache;
662285809Sscottl	int cpu;
663285809Sscottl
664285809Sscottl	/*
665285809Sscottl	 * XXX: It is safe to not lock the per-CPU caches, because we're
666285809Sscottl	 * tearing down the zone anyway.  I.e., there will be no further use
667285809Sscottl	 * of the caches at this point.
668285809Sscottl	 *
669285809Sscottl	 * XXX: It would good to be able to assert that the zone is being
670285809Sscottl	 * torn down to prevent improper use of cache_drain().
671285809Sscottl	 *
672285809Sscottl	 * XXX: We lock the zone before passing into bucket_cache_drain() as
673285809Sscottl	 * it is used elsewhere.  Should the tear-down path be made special
674285809Sscottl	 * there in some form?
675285809Sscottl	 */
676285809Sscottl	CPU_FOREACH(cpu) {
677285809Sscottl		cache = &zone->uz_cpu[cpu];
678285809Sscottl		bucket_drain(zone, cache->uc_allocbucket);
679285809Sscottl		bucket_drain(zone, cache->uc_freebucket);
680285809Sscottl		if (cache->uc_allocbucket != NULL)
681285809Sscottl			bucket_free(zone, cache->uc_allocbucket, NULL);
682285809Sscottl		if (cache->uc_freebucket != NULL)
683285809Sscottl			bucket_free(zone, cache->uc_freebucket, NULL);
684285809Sscottl		cache->uc_allocbucket = cache->uc_freebucket = NULL;
685285809Sscottl	}
686285809Sscottl	ZONE_LOCK(zone);
687285809Sscottl	bucket_cache_drain(zone);
688285809Sscottl	ZONE_UNLOCK(zone);
689285809Sscottl}
690285809Sscottl
691285809Sscottlstatic void
692285809Sscottlcache_shrink(uma_zone_t zone)
693285809Sscottl{
694285809Sscottl
695285809Sscottl	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
696285809Sscottl		return;
697285809Sscottl
698285809Sscottl	ZONE_LOCK(zone);
699285809Sscottl	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
700285809Sscottl	ZONE_UNLOCK(zone);
701285809Sscottl}
702285809Sscottl
703285809Sscottlstatic void
704285809Sscottlcache_drain_safe_cpu(uma_zone_t zone)
705285809Sscottl{
706285809Sscottl	uma_cache_t cache;
707285809Sscottl	uma_bucket_t b1, b2;
708285809Sscottl
709285809Sscottl	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
710285809Sscottl		return;
711285809Sscottl
712285809Sscottl	b1 = b2 = NULL;
713285809Sscottl	ZONE_LOCK(zone);
714285809Sscottl	critical_enter();
715285809Sscottl	cache = &zone->uz_cpu[curcpu];
716285809Sscottl	if (cache->uc_allocbucket) {
717285809Sscottl		if (cache->uc_allocbucket->ub_cnt != 0)
718285809Sscottl			LIST_INSERT_HEAD(&zone->uz_buckets,
719285809Sscottl			    cache->uc_allocbucket, ub_link);
720285809Sscottl		else
721285809Sscottl			b1 = cache->uc_allocbucket;
722285809Sscottl		cache->uc_allocbucket = NULL;
723285809Sscottl	}
724285809Sscottl	if (cache->uc_freebucket) {
725285809Sscottl		if (cache->uc_freebucket->ub_cnt != 0)
726285809Sscottl			LIST_INSERT_HEAD(&zone->uz_buckets,
727285809Sscottl			    cache->uc_freebucket, ub_link);
728285809Sscottl		else
729285809Sscottl			b2 = cache->uc_freebucket;
730285809Sscottl		cache->uc_freebucket = NULL;
731285809Sscottl	}
732285809Sscottl	critical_exit();
733285809Sscottl	ZONE_UNLOCK(zone);
734285809Sscottl	if (b1)
735285809Sscottl		bucket_free(zone, b1, NULL);
736285809Sscottl	if (b2)
737285809Sscottl		bucket_free(zone, b2, NULL);
738285809Sscottl}
739285809Sscottl
740285809Sscottl/*
741285809Sscottl * Safely drain per-CPU caches of a zone(s) to alloc bucket.
742285809Sscottl * This is an expensive call because it needs to bind to all CPUs
743285809Sscottl * one by one and enter a critical section on each of them in order
744285809Sscottl * to safely access their cache buckets.
745285809Sscottl * Zone lock must not be held on call this function.
746285809Sscottl */
747285809Sscottlstatic void
748285809Sscottlcache_drain_safe(uma_zone_t zone)
749285809Sscottl{
750285809Sscottl	int cpu;
751285809Sscottl
752285809Sscottl	/*
753285809Sscottl	 * Polite bucket sizes shrinking was not enouth, shrink aggressively.
754285809Sscottl	 */
755285809Sscottl	if (zone)
756285809Sscottl		cache_shrink(zone);
757285809Sscottl	else
758285809Sscottl		zone_foreach(cache_shrink);
759285809Sscottl
760285809Sscottl	CPU_FOREACH(cpu) {
761285809Sscottl		thread_lock(curthread);
762285809Sscottl		sched_bind(curthread, cpu);
763285809Sscottl		thread_unlock(curthread);
764285809Sscottl
765285809Sscottl		if (zone)
766285809Sscottl			cache_drain_safe_cpu(zone);
767285809Sscottl		else
768285809Sscottl			zone_foreach(cache_drain_safe_cpu);
769285809Sscottl	}
770285809Sscottl	thread_lock(curthread);
771285809Sscottl	sched_unbind(curthread);
772285809Sscottl	thread_unlock(curthread);
773285809Sscottl}
774285809Sscottl
775285809Sscottl/*
776285809Sscottl * Drain the cached buckets from a zone.  Expects a locked zone on entry.
777285809Sscottl */
778285809Sscottlstatic void
779285809Sscottlbucket_cache_drain(uma_zone_t zone)
780285809Sscottl{
781285809Sscottl	uma_bucket_t bucket;
782285809Sscottl
783285809Sscottl	/*
784285809Sscottl	 * Drain the bucket queues and free the buckets, we just keep two per
785285809Sscottl	 * cpu (alloc/free).
786285809Sscottl	 */
787285809Sscottl	while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
788285809Sscottl		LIST_REMOVE(bucket, ub_link);
789285809Sscottl		ZONE_UNLOCK(zone);
790285809Sscottl		bucket_drain(zone, bucket);
791285809Sscottl		bucket_free(zone, bucket, NULL);
792285809Sscottl		ZONE_LOCK(zone);
793285809Sscottl	}
794285809Sscottl
795285809Sscottl	/*
796285809Sscottl	 * Shrink further bucket sizes.  Price of single zone lock collision
797285809Sscottl	 * is probably lower then price of global cache drain.
798285809Sscottl	 */
799285809Sscottl	if (zone->uz_count > zone->uz_count_min)
800285809Sscottl		zone->uz_count--;
801285809Sscottl}
802285809Sscottl
803285809Sscottlstatic void
804285809Sscottlkeg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
805285809Sscottl{
806285809Sscottl	uint8_t *mem;
807285809Sscottl	int i;
808285809Sscottl	uint8_t flags;
809285809Sscottl
810285809Sscottl	mem = slab->us_data;
811285809Sscottl	flags = slab->us_flags;
812285809Sscottl	i = start;
813285809Sscottl	if (keg->uk_fini != NULL) {
814285809Sscottl		for (i--; i > -1; i--)
815285809Sscottl			keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
816285809Sscottl			    keg->uk_size);
817285809Sscottl	}
818285809Sscottl	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
819285809Sscottl		zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
820285809Sscottl#ifdef UMA_DEBUG
821285809Sscottl	printf("%s: Returning %d bytes.\n", keg->uk_name,
822285809Sscottl	    PAGE_SIZE * keg->uk_ppera);
823285809Sscottl#endif
824285809Sscottl	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
825285809Sscottl}
826285809Sscottl
827285809Sscottl/*
828285809Sscottl * Frees pages from a keg back to the system.  This is done on demand from
829285809Sscottl * the pageout daemon.
830285809Sscottl *
831285809Sscottl * Returns nothing.
832285809Sscottl */
833285809Sscottlstatic void
834285809Sscottlkeg_drain(uma_keg_t keg)
835285809Sscottl{
836285809Sscottl	struct slabhead freeslabs = { 0 };
837285809Sscottl	uma_slab_t slab;
838285809Sscottl	uma_slab_t n;
839285809Sscottl
840285809Sscottl	/*
841285809Sscottl	 * We don't want to take pages from statically allocated kegs at this
842285809Sscottl	 * time
843285809Sscottl	 */
844285809Sscottl	if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
845285809Sscottl		return;
846285809Sscottl
847285809Sscottl#ifdef UMA_DEBUG
848285809Sscottl	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
849285809Sscottl#endif
850285809Sscottl	KEG_LOCK(keg);
851285809Sscottl	if (keg->uk_free == 0)
852285809Sscottl		goto finished;
853285809Sscottl
854285809Sscottl	slab = LIST_FIRST(&keg->uk_free_slab);
855285809Sscottl	while (slab) {
856285809Sscottl		n = LIST_NEXT(slab, us_link);
857285809Sscottl
858285809Sscottl		/* We have no where to free these to */
859285809Sscottl		if (slab->us_flags & UMA_SLAB_BOOT) {
860285809Sscottl			slab = n;
861285809Sscottl			continue;
862285809Sscottl		}
863285809Sscottl
864285809Sscottl		LIST_REMOVE(slab, us_link);
865285809Sscottl		keg->uk_pages -= keg->uk_ppera;
866285809Sscottl		keg->uk_free -= keg->uk_ipers;
867285809Sscottl
868285809Sscottl		if (keg->uk_flags & UMA_ZONE_HASH)
869285809Sscottl			UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
870285809Sscottl
871285809Sscottl		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
872285809Sscottl
873285809Sscottl		slab = n;
874285809Sscottl	}
875285809Sscottlfinished:
876285809Sscottl	KEG_UNLOCK(keg);
877285809Sscottl
878285809Sscottl	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
879285809Sscottl		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
880285809Sscottl		keg_free_slab(keg, slab, keg->uk_ipers);
881285809Sscottl	}
882285809Sscottl}
883285809Sscottl
884285809Sscottlstatic void
885285809Sscottlzone_drain_wait(uma_zone_t zone, int waitok)
886285809Sscottl{
887285809Sscottl
888285809Sscottl	/*
889285809Sscottl	 * Set draining to interlock with zone_dtor() so we can release our
890285809Sscottl	 * locks as we go.  Only dtor() should do a WAITOK call since it
891285809Sscottl	 * is the only call that knows the structure will still be available
892285809Sscottl	 * when it wakes up.
893285809Sscottl	 */
894285809Sscottl	ZONE_LOCK(zone);
895285809Sscottl	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
896285809Sscottl		if (waitok == M_NOWAIT)
897285809Sscottl			goto out;
898285809Sscottl		mtx_unlock(&uma_mtx);
899285809Sscottl		msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
900285809Sscottl		mtx_lock(&uma_mtx);
901285809Sscottl	}
902285809Sscottl	zone->uz_flags |= UMA_ZFLAG_DRAINING;
903285809Sscottl	bucket_cache_drain(zone);
904285809Sscottl	ZONE_UNLOCK(zone);
905285809Sscottl	/*
906285809Sscottl	 * The DRAINING flag protects us from being freed while
907285809Sscottl	 * we're running.  Normally the uma_mtx would protect us but we
908285809Sscottl	 * must be able to release and acquire the right lock for each keg.
909285809Sscottl	 */
910285809Sscottl	zone_foreach_keg(zone, &keg_drain);
911285809Sscottl	ZONE_LOCK(zone);
912285809Sscottl	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
913285809Sscottl	wakeup(zone);
914285809Sscottlout:
915285809Sscottl	ZONE_UNLOCK(zone);
916285809Sscottl}
917285809Sscottl
918285809Sscottlvoid
919285809Sscottlzone_drain(uma_zone_t zone)
920285809Sscottl{
921285809Sscottl
922285809Sscottl	zone_drain_wait(zone, M_NOWAIT);
923285809Sscottl}
924285809Sscottl
925285809Sscottl/*
926285809Sscottl * Allocate a new slab for a keg.  This does not insert the slab onto a list.
927285809Sscottl *
928285809Sscottl * Arguments:
929285809Sscottl *	wait  Shall we wait?
930285809Sscottl *
931285809Sscottl * Returns:
932285809Sscottl *	The slab that was allocated or NULL if there is no memory and the
933285809Sscottl *	caller specified M_NOWAIT.
934285809Sscottl */
935285809Sscottlstatic uma_slab_t
936285809Sscottlkeg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
937285809Sscottl{
938285809Sscottl	uma_slabrefcnt_t slabref;
939285809Sscottl	uma_alloc allocf;
940285809Sscottl	uma_slab_t slab;
941285809Sscottl	uint8_t *mem;
942285809Sscottl	uint8_t flags;
943285809Sscottl	int i;
944285809Sscottl
945285809Sscottl	mtx_assert(&keg->uk_lock, MA_OWNED);
946285809Sscottl	slab = NULL;
947285809Sscottl	mem = NULL;
948285809Sscottl
949285809Sscottl#ifdef UMA_DEBUG
950285809Sscottl	printf("alloc_slab:  Allocating a new slab for %s\n", keg->uk_name);
951285809Sscottl#endif
952285809Sscottl	allocf = keg->uk_allocf;
953285809Sscottl	KEG_UNLOCK(keg);
954285809Sscottl
955285809Sscottl	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
956285809Sscottl		slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
957285809Sscottl		if (slab == NULL)
958285809Sscottl			goto out;
959285809Sscottl	}
960285809Sscottl
961285809Sscottl	/*
962285809Sscottl	 * This reproduces the old vm_zone behavior of zero filling pages the
963285809Sscottl	 * first time they are added to a zone.
964285809Sscottl	 *
965285809Sscottl	 * Malloced items are zeroed in uma_zalloc.
966285809Sscottl	 */
967285809Sscottl
968285809Sscottl	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
969285809Sscottl		wait |= M_ZERO;
970285809Sscottl	else
971285809Sscottl		wait &= ~M_ZERO;
972285809Sscottl
973285809Sscottl	if (keg->uk_flags & UMA_ZONE_NODUMP)
974285809Sscottl		wait |= M_NODUMP;
975285809Sscottl
976285809Sscottl	/* zone is passed for legacy reasons. */
977285809Sscottl	mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
978285809Sscottl	if (mem == NULL) {
979285809Sscottl		if (keg->uk_flags & UMA_ZONE_OFFPAGE)
980285809Sscottl			zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
981285809Sscottl		slab = NULL;
982285809Sscottl		goto out;
983285809Sscottl	}
984285809Sscottl
985285809Sscottl	/* Point the slab into the allocated memory */
986285809Sscottl	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
987285809Sscottl		slab = (uma_slab_t )(mem + keg->uk_pgoff);
988285809Sscottl
989285809Sscottl	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
990285809Sscottl		for (i = 0; i < keg->uk_ppera; i++)
991285809Sscottl			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
992285809Sscottl
993285809Sscottl	slab->us_keg = keg;
994285809Sscottl	slab->us_data = mem;
995285809Sscottl	slab->us_freecount = keg->uk_ipers;
996285809Sscottl	slab->us_flags = flags;
997285809Sscottl	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
998285809Sscottl#ifdef INVARIANTS
999285809Sscottl	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1000285809Sscottl#endif
1001285809Sscottl	if (keg->uk_flags & UMA_ZONE_REFCNT) {
1002285809Sscottl		slabref = (uma_slabrefcnt_t)slab;
1003285809Sscottl		for (i = 0; i < keg->uk_ipers; i++)
1004285809Sscottl			slabref->us_refcnt[i] = 0;
1005285809Sscottl	}
1006285809Sscottl
1007285809Sscottl	if (keg->uk_init != NULL) {
1008285809Sscottl		for (i = 0; i < keg->uk_ipers; i++)
1009285809Sscottl			if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1010285809Sscottl			    keg->uk_size, wait) != 0)
1011285809Sscottl				break;
1012285809Sscottl		if (i != keg->uk_ipers) {
1013285809Sscottl			keg_free_slab(keg, slab, i);
1014285809Sscottl			slab = NULL;
1015285809Sscottl			goto out;
1016285809Sscottl		}
1017285809Sscottl	}
1018285809Sscottlout:
1019285809Sscottl	KEG_LOCK(keg);
1020285809Sscottl
1021285809Sscottl	if (slab != NULL) {
1022285809Sscottl		if (keg->uk_flags & UMA_ZONE_HASH)
1023285809Sscottl			UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1024285809Sscottl
1025285809Sscottl		keg->uk_pages += keg->uk_ppera;
1026285809Sscottl		keg->uk_free += keg->uk_ipers;
1027285809Sscottl	}
1028285809Sscottl
1029285809Sscottl	return (slab);
1030285809Sscottl}
1031285809Sscottl
1032285809Sscottl/*
1033285809Sscottl * This function is intended to be used early on in place of page_alloc() so
1034285809Sscottl * that we may use the boot time page cache to satisfy allocations before
1035285809Sscottl * the VM is ready.
1036285809Sscottl */
1037285809Sscottlstatic void *
1038285809Sscottlstartup_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
1039285809Sscottl{
1040285809Sscottl	uma_keg_t keg;
1041285809Sscottl	uma_slab_t tmps;
1042285809Sscottl	int pages, check_pages;
1043285809Sscottl
1044285809Sscottl	keg = zone_first_keg(zone);
1045285809Sscottl	pages = howmany(bytes, PAGE_SIZE);
1046285809Sscottl	check_pages = pages - 1;
1047285809Sscottl	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1048285809Sscottl
1049285809Sscottl	/*
1050285809Sscottl	 * Check our small startup cache to see if it has pages remaining.
1051285809Sscottl	 */
1052285809Sscottl	mtx_lock(&uma_boot_pages_mtx);
1053285809Sscottl
1054285809Sscottl	/* First check if we have enough room. */
1055285809Sscottl	tmps = LIST_FIRST(&uma_boot_pages);
1056285809Sscottl	while (tmps != NULL && check_pages-- > 0)
1057285809Sscottl		tmps = LIST_NEXT(tmps, us_link);
1058285809Sscottl	if (tmps != NULL) {
1059285809Sscottl		/*
1060285809Sscottl		 * It's ok to lose tmps references.  The last one will
1061285809Sscottl		 * have tmps->us_data pointing to the start address of
1062285809Sscottl		 * "pages" contiguous pages of memory.
1063285809Sscottl		 */
1064285809Sscottl		while (pages-- > 0) {
1065285809Sscottl			tmps = LIST_FIRST(&uma_boot_pages);
1066285809Sscottl			LIST_REMOVE(tmps, us_link);
1067285809Sscottl		}
1068285809Sscottl		mtx_unlock(&uma_boot_pages_mtx);
1069285809Sscottl		*pflag = tmps->us_flags;
1070285809Sscottl		return (tmps->us_data);
1071285809Sscottl	}
1072285809Sscottl	mtx_unlock(&uma_boot_pages_mtx);
1073285809Sscottl	if (booted < UMA_STARTUP2)
1074285809Sscottl		panic("UMA: Increase vm.boot_pages");
1075285809Sscottl	/*
1076285809Sscottl	 * Now that we've booted reset these users to their real allocator.
1077285809Sscottl	 */
1078285809Sscottl#ifdef UMA_MD_SMALL_ALLOC
1079285809Sscottl	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1080285809Sscottl#else
1081285809Sscottl	keg->uk_allocf = page_alloc;
1082285809Sscottl#endif
1083285809Sscottl	return keg->uk_allocf(zone, bytes, pflag, wait);
1084285809Sscottl}
1085285809Sscottl
1086285809Sscottl/*
1087285809Sscottl * Allocates a number of pages from the system
1088285809Sscottl *
1089285809Sscottl * Arguments:
1090285809Sscottl *	bytes  The number of bytes requested
1091285809Sscottl *	wait  Shall we wait?
1092285809Sscottl *
1093285809Sscottl * Returns:
1094285809Sscottl *	A pointer to the alloced memory or possibly
1095285809Sscottl *	NULL if M_NOWAIT is set.
1096285809Sscottl */
1097285809Sscottlstatic void *
1098285809Sscottlpage_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
1099285809Sscottl{
1100285809Sscottl	void *p;	/* Returned page */
1101285809Sscottl
1102285809Sscottl	*pflag = UMA_SLAB_KMEM;
1103285809Sscottl	p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1104285809Sscottl
1105285809Sscottl	return (p);
1106285809Sscottl}
1107285809Sscottl
1108285809Sscottl/*
1109285809Sscottl * Allocates a number of pages from within an object
1110285809Sscottl *
1111285809Sscottl * Arguments:
1112285809Sscottl *	bytes  The number of bytes requested
1113285809Sscottl *	wait   Shall we wait?
1114285809Sscottl *
1115285809Sscottl * Returns:
1116285809Sscottl *	A pointer to the alloced memory or possibly
1117285809Sscottl *	NULL if M_NOWAIT is set.
1118285809Sscottl */
1119285809Sscottlstatic void *
1120285809Sscottlnoobj_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
1121285809Sscottl{
1122285809Sscottl	TAILQ_HEAD(, vm_page) alloctail;
1123285809Sscottl	u_long npages;
1124285809Sscottl	vm_offset_t retkva, zkva;
1125285809Sscottl	vm_page_t p, p_next;
1126285809Sscottl	uma_keg_t keg;
1127285809Sscottl
1128285809Sscottl	TAILQ_INIT(&alloctail);
1129285809Sscottl	keg = zone_first_keg(zone);
1130285809Sscottl
1131285809Sscottl	npages = howmany(bytes, PAGE_SIZE);
1132285809Sscottl	while (npages > 0) {
1133285809Sscottl		p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1134285809Sscottl		    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1135285809Sscottl		if (p != NULL) {
1136285809Sscottl			/*
1137285809Sscottl			 * Since the page does not belong to an object, its
1138285809Sscottl			 * listq is unused.
1139285809Sscottl			 */
1140285809Sscottl			TAILQ_INSERT_TAIL(&alloctail, p, listq);
1141285809Sscottl			npages--;
1142285809Sscottl			continue;
1143285809Sscottl		}
1144285809Sscottl		if (wait & M_WAITOK) {
1145285809Sscottl			VM_WAIT;
1146285809Sscottl			continue;
1147285809Sscottl		}
1148285809Sscottl
1149285809Sscottl		/*
1150285809Sscottl		 * Page allocation failed, free intermediate pages and
1151285809Sscottl		 * exit.
1152285809Sscottl		 */
1153285809Sscottl		TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1154285809Sscottl			vm_page_unwire(p, 0);
1155285809Sscottl			vm_page_free(p);
1156285809Sscottl		}
1157285809Sscottl		return (NULL);
1158285809Sscottl	}
1159285809Sscottl	*flags = UMA_SLAB_PRIV;
1160285809Sscottl	zkva = keg->uk_kva +
1161285809Sscottl	    atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1162285809Sscottl	retkva = zkva;
1163285809Sscottl	TAILQ_FOREACH(p, &alloctail, listq) {
1164285809Sscottl		pmap_qenter(zkva, &p, 1);
1165285809Sscottl		zkva += PAGE_SIZE;
1166285809Sscottl	}
1167285809Sscottl
1168285809Sscottl	return ((void *)retkva);
1169285809Sscottl}
1170285809Sscottl
1171285809Sscottl/*
1172285809Sscottl * Frees a number of pages to the system
1173285809Sscottl *
1174285809Sscottl * Arguments:
1175285809Sscottl *	mem   A pointer to the memory to be freed
1176285809Sscottl *	size  The size of the memory being freed
1177285809Sscottl *	flags The original p->us_flags field
1178285809Sscottl *
1179285809Sscottl * Returns:
1180285809Sscottl *	Nothing
1181285809Sscottl */
1182285809Sscottlstatic void
1183285809Sscottlpage_free(void *mem, int size, uint8_t flags)
1184285809Sscottl{
1185285809Sscottl	struct vmem *vmem;
1186285809Sscottl
1187285809Sscottl	if (flags & UMA_SLAB_KMEM)
1188285809Sscottl		vmem = kmem_arena;
1189285809Sscottl	else if (flags & UMA_SLAB_KERNEL)
1190285809Sscottl		vmem = kernel_arena;
1191285809Sscottl	else
1192285809Sscottl		panic("UMA: page_free used with invalid flags %d", flags);
1193285809Sscottl
1194285809Sscottl	kmem_free(vmem, (vm_offset_t)mem, size);
1195285809Sscottl}
1196285809Sscottl
1197285809Sscottl/*
1198285809Sscottl * Zero fill initializer
1199285809Sscottl *
1200285809Sscottl * Arguments/Returns follow uma_init specifications
1201285809Sscottl */
1202285809Sscottlstatic int
1203285809Sscottlzero_init(void *mem, int size, int flags)
1204285809Sscottl{
1205285809Sscottl	bzero(mem, size);
1206285809Sscottl	return (0);
1207285809Sscottl}
1208285809Sscottl
1209285809Sscottl/*
1210285809Sscottl * Finish creating a small uma keg.  This calculates ipers, and the keg size.
1211285809Sscottl *
1212285809Sscottl * Arguments
1213285809Sscottl *	keg  The zone we should initialize
1214285809Sscottl *
1215285809Sscottl * Returns
1216285809Sscottl *	Nothing
1217285809Sscottl */
1218285809Sscottlstatic void
1219285809Sscottlkeg_small_init(uma_keg_t keg)
1220285809Sscottl{
1221285809Sscottl	u_int rsize;
1222285809Sscottl	u_int memused;
1223285809Sscottl	u_int wastedspace;
1224285809Sscottl	u_int shsize;
1225285809Sscottl
1226285809Sscottl	if (keg->uk_flags & UMA_ZONE_PCPU) {
1227285809Sscottl		u_int ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
1228285809Sscottl
1229285809Sscottl		keg->uk_slabsize = sizeof(struct pcpu);
1230285809Sscottl		keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1231285809Sscottl		    PAGE_SIZE);
1232285809Sscottl	} else {
1233285809Sscottl		keg->uk_slabsize = UMA_SLAB_SIZE;
1234285809Sscottl		keg->uk_ppera = 1;
1235285809Sscottl	}
1236285809Sscottl
1237285809Sscottl	/*
1238285809Sscottl	 * Calculate the size of each allocation (rsize) according to
1239285809Sscottl	 * alignment.  If the requested size is smaller than we have
1240285809Sscottl	 * allocation bits for we round it up.
1241285809Sscottl	 */
1242285809Sscottl	rsize = keg->uk_size;
1243285809Sscottl	if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
1244285809Sscottl		rsize = keg->uk_slabsize / SLAB_SETSIZE;
1245285809Sscottl	if (rsize & keg->uk_align)
1246285809Sscottl		rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1247285809Sscottl	keg->uk_rsize = rsize;
1248285809Sscottl
1249285809Sscottl	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1250285809Sscottl	    keg->uk_rsize < sizeof(struct pcpu),
1251285809Sscottl	    ("%s: size %u too large", __func__, keg->uk_rsize));
1252285809Sscottl
1253285809Sscottl	if (keg->uk_flags & UMA_ZONE_REFCNT)
1254285809Sscottl		rsize += sizeof(uint32_t);
1255285809Sscottl
1256285809Sscottl	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1257285809Sscottl		shsize = 0;
1258285809Sscottl	else
1259285809Sscottl		shsize = sizeof(struct uma_slab);
1260285809Sscottl
1261285809Sscottl	keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
1262285809Sscottl	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1263285809Sscottl	    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1264285809Sscottl
1265285809Sscottl	memused = keg->uk_ipers * rsize + shsize;
1266285809Sscottl	wastedspace = keg->uk_slabsize - memused;
1267285809Sscottl
1268285809Sscottl	/*
1269285809Sscottl	 * We can't do OFFPAGE if we're internal or if we've been
1270285809Sscottl	 * asked to not go to the VM for buckets.  If we do this we
1271285809Sscottl	 * may end up going to the VM  for slabs which we do not
1272285809Sscottl	 * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1273285809Sscottl	 * of UMA_ZONE_VM, which clearly forbids it.
1274285809Sscottl	 */
1275285809Sscottl	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1276285809Sscottl	    (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1277285809Sscottl		return;
1278285809Sscottl
1279285809Sscottl	/*
1280285809Sscottl	 * See if using an OFFPAGE slab will limit our waste.  Only do
1281285809Sscottl	 * this if it permits more items per-slab.
1282285809Sscottl	 *
1283285809Sscottl	 * XXX We could try growing slabsize to limit max waste as well.
1284285809Sscottl	 * Historically this was not done because the VM could not
1285285809Sscottl	 * efficiently handle contiguous allocations.
1286285809Sscottl	 */
1287285809Sscottl	if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
1288285809Sscottl	    (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
1289285809Sscottl		keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
1290285809Sscottl		KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1291285809Sscottl		    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1292285809Sscottl#ifdef UMA_DEBUG
1293285809Sscottl		printf("UMA decided we need offpage slab headers for "
1294285809Sscottl		    "keg: %s, calculated wastedspace = %d, "
1295285809Sscottl		    "maximum wasted space allowed = %d, "
1296285809Sscottl		    "calculated ipers = %d, "
1297285809Sscottl		    "new wasted space = %d\n", keg->uk_name, wastedspace,
1298285809Sscottl		    keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1299285809Sscottl		    keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
1300285809Sscottl#endif
1301285809Sscottl		keg->uk_flags |= UMA_ZONE_OFFPAGE;
1302285809Sscottl	}
1303285809Sscottl
1304285809Sscottl	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1305285809Sscottl	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1306285809Sscottl		keg->uk_flags |= UMA_ZONE_HASH;
1307285809Sscottl}
1308285809Sscottl
1309285809Sscottl/*
1310285809Sscottl * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
1311285809Sscottl * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1312285809Sscottl * more complicated.
1313285809Sscottl *
1314285809Sscottl * Arguments
1315285809Sscottl *	keg  The keg we should initialize
1316285809Sscottl *
1317285809Sscottl * Returns
1318285809Sscottl *	Nothing
1319285809Sscottl */
1320285809Sscottlstatic void
1321285809Sscottlkeg_large_init(uma_keg_t keg)
1322285809Sscottl{
1323285809Sscottl	u_int shsize;
1324285809Sscottl
1325285809Sscottl	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1326285809Sscottl	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1327285809Sscottl	    ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1328285809Sscottl	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1329285809Sscottl	    ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1330285809Sscottl
1331285809Sscottl	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1332285809Sscottl	keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
1333285809Sscottl	keg->uk_ipers = 1;
1334285809Sscottl	keg->uk_rsize = keg->uk_size;
1335285809Sscottl
1336285809Sscottl	/* We can't do OFFPAGE if we're internal, bail out here. */
1337285809Sscottl	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1338285809Sscottl		return;
1339285809Sscottl
1340285809Sscottl	/* Check whether we have enough space to not do OFFPAGE. */
1341285809Sscottl	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1342285809Sscottl		shsize = sizeof(struct uma_slab);
1343285809Sscottl		if (keg->uk_flags & UMA_ZONE_REFCNT)
1344285809Sscottl			shsize += keg->uk_ipers * sizeof(uint32_t);
1345285809Sscottl		if (shsize & UMA_ALIGN_PTR)
1346285809Sscottl			shsize = (shsize & ~UMA_ALIGN_PTR) +
1347285809Sscottl			    (UMA_ALIGN_PTR + 1);
1348285809Sscottl
1349285809Sscottl		if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
1350285809Sscottl			keg->uk_flags |= UMA_ZONE_OFFPAGE;
1351285809Sscottl	}
1352285809Sscottl
1353285809Sscottl	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1354285809Sscottl	    (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1355285809Sscottl		keg->uk_flags |= UMA_ZONE_HASH;
1356285809Sscottl}
1357285809Sscottl
1358285809Sscottlstatic void
1359285809Sscottlkeg_cachespread_init(uma_keg_t keg)
1360285809Sscottl{
1361285809Sscottl	int alignsize;
1362285809Sscottl	int trailer;
1363285809Sscottl	int pages;
1364285809Sscottl	int rsize;
1365285809Sscottl
1366285809Sscottl	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1367285809Sscottl	    ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1368285809Sscottl
1369285809Sscottl	alignsize = keg->uk_align + 1;
1370285809Sscottl	rsize = keg->uk_size;
1371285809Sscottl	/*
1372285809Sscottl	 * We want one item to start on every align boundary in a page.  To
1373285809Sscottl	 * do this we will span pages.  We will also extend the item by the
1374285809Sscottl	 * size of align if it is an even multiple of align.  Otherwise, it
1375285809Sscottl	 * would fall on the same boundary every time.
1376285809Sscottl	 */
1377285809Sscottl	if (rsize & keg->uk_align)
1378285809Sscottl		rsize = (rsize & ~keg->uk_align) + alignsize;
1379285809Sscottl	if ((rsize & alignsize) == 0)
1380285809Sscottl		rsize += alignsize;
1381285809Sscottl	trailer = rsize - keg->uk_size;
1382285809Sscottl	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1383285809Sscottl	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1384285809Sscottl	keg->uk_rsize = rsize;
1385285809Sscottl	keg->uk_ppera = pages;
1386285809Sscottl	keg->uk_slabsize = UMA_SLAB_SIZE;
1387285809Sscottl	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1388285809Sscottl	keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1389285809Sscottl	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1390285809Sscottl	    ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1391285809Sscottl	    keg->uk_ipers));
1392285809Sscottl}
1393285809Sscottl
1394285809Sscottl/*
1395285809Sscottl * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1396285809Sscottl * the keg onto the global keg list.
1397285809Sscottl *
1398285809Sscottl * Arguments/Returns follow uma_ctor specifications
1399285809Sscottl *	udata  Actually uma_kctor_args
1400285809Sscottl */
1401285809Sscottlstatic int
1402285809Sscottlkeg_ctor(void *mem, int size, void *udata, int flags)
1403285809Sscottl{
1404285809Sscottl	struct uma_kctor_args *arg = udata;
1405285809Sscottl	uma_keg_t keg = mem;
1406285809Sscottl	uma_zone_t zone;
1407285809Sscottl
1408285809Sscottl	bzero(keg, size);
1409285809Sscottl	keg->uk_size = arg->size;
1410285809Sscottl	keg->uk_init = arg->uminit;
1411285809Sscottl	keg->uk_fini = arg->fini;
1412285809Sscottl	keg->uk_align = arg->align;
1413285809Sscottl	keg->uk_free = 0;
1414285809Sscottl	keg->uk_reserve = 0;
1415285809Sscottl	keg->uk_pages = 0;
1416285809Sscottl	keg->uk_flags = arg->flags;
1417285809Sscottl	keg->uk_allocf = page_alloc;
1418285809Sscottl	keg->uk_freef = page_free;
1419285809Sscottl	keg->uk_slabzone = NULL;
1420285809Sscottl
1421285809Sscottl	/*
1422285809Sscottl	 * The master zone is passed to us at keg-creation time.
1423285809Sscottl	 */
1424285809Sscottl	zone = arg->zone;
1425285809Sscottl	keg->uk_name = zone->uz_name;
1426285809Sscottl
1427285809Sscottl	if (arg->flags & UMA_ZONE_VM)
1428285809Sscottl		keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1429285809Sscottl
1430285809Sscottl	if (arg->flags & UMA_ZONE_ZINIT)
1431285809Sscottl		keg->uk_init = zero_init;
1432285809Sscottl
1433285809Sscottl	if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1434285809Sscottl		keg->uk_flags |= UMA_ZONE_VTOSLAB;
1435285809Sscottl
1436285809Sscottl	if (arg->flags & UMA_ZONE_PCPU)
1437285809Sscottl#ifdef SMP
1438285809Sscottl		keg->uk_flags |= UMA_ZONE_OFFPAGE;
1439285809Sscottl#else
1440285809Sscottl		keg->uk_flags &= ~UMA_ZONE_PCPU;
1441285809Sscottl#endif
1442285809Sscottl
1443285809Sscottl	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1444285809Sscottl		keg_cachespread_init(keg);
1445285809Sscottl	} else if (keg->uk_flags & UMA_ZONE_REFCNT) {
1446285809Sscottl		if (keg->uk_size >
1447285809Sscottl		    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
1448285809Sscottl		    sizeof(uint32_t)))
1449285809Sscottl			keg_large_init(keg);
1450285809Sscottl		else
1451285809Sscottl			keg_small_init(keg);
1452285809Sscottl	} else {
1453285809Sscottl		if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1454285809Sscottl			keg_large_init(keg);
1455285809Sscottl		else
1456285809Sscottl			keg_small_init(keg);
1457285809Sscottl	}
1458285809Sscottl
1459285809Sscottl	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1460285809Sscottl		if (keg->uk_flags & UMA_ZONE_REFCNT) {
1461285809Sscottl			if (keg->uk_ipers > uma_max_ipers_ref)
1462285809Sscottl				panic("Too many ref items per zone: %d > %d\n",
1463285809Sscottl				    keg->uk_ipers, uma_max_ipers_ref);
1464285809Sscottl			keg->uk_slabzone = slabrefzone;
1465285809Sscottl		} else
1466285809Sscottl			keg->uk_slabzone = slabzone;
1467285809Sscottl	}
1468285809Sscottl
1469285809Sscottl	/*
1470285809Sscottl	 * If we haven't booted yet we need allocations to go through the
1471285809Sscottl	 * startup cache until the vm is ready.
1472285809Sscottl	 */
1473285809Sscottl	if (keg->uk_ppera == 1) {
1474285809Sscottl#ifdef UMA_MD_SMALL_ALLOC
1475285809Sscottl		keg->uk_allocf = uma_small_alloc;
1476285809Sscottl		keg->uk_freef = uma_small_free;
1477285809Sscottl
1478285809Sscottl		if (booted < UMA_STARTUP)
1479285809Sscottl			keg->uk_allocf = startup_alloc;
1480285809Sscottl#else
1481285809Sscottl		if (booted < UMA_STARTUP2)
1482285809Sscottl			keg->uk_allocf = startup_alloc;
1483285809Sscottl#endif
1484285809Sscottl	} else if (booted < UMA_STARTUP2 &&
1485285809Sscottl	    (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1486285809Sscottl		keg->uk_allocf = startup_alloc;
1487285809Sscottl
1488285809Sscottl	/*
1489285809Sscottl	 * Initialize keg's lock
1490285809Sscottl	 */
1491285809Sscottl	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1492285809Sscottl
1493285809Sscottl	/*
1494285809Sscottl	 * If we're putting the slab header in the actual page we need to
1495285809Sscottl	 * figure out where in each page it goes.  This calculates a right
1496285809Sscottl	 * justified offset into the memory on an ALIGN_PTR boundary.
1497285809Sscottl	 */
1498285809Sscottl	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1499285809Sscottl		u_int totsize;
1500285809Sscottl
1501285809Sscottl		/* Size of the slab struct and free list */
1502285809Sscottl		totsize = sizeof(struct uma_slab);
1503285809Sscottl
1504285809Sscottl		/* Size of the reference counts. */
1505285809Sscottl		if (keg->uk_flags & UMA_ZONE_REFCNT)
1506285809Sscottl			totsize += keg->uk_ipers * sizeof(uint32_t);
1507285809Sscottl
1508285809Sscottl		if (totsize & UMA_ALIGN_PTR)
1509285809Sscottl			totsize = (totsize & ~UMA_ALIGN_PTR) +
1510285809Sscottl			    (UMA_ALIGN_PTR + 1);
1511285809Sscottl		keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1512285809Sscottl
1513285809Sscottl		/*
1514285809Sscottl		 * The only way the following is possible is if with our
1515285809Sscottl		 * UMA_ALIGN_PTR adjustments we are now bigger than
1516285809Sscottl		 * UMA_SLAB_SIZE.  I haven't checked whether this is
1517285809Sscottl		 * mathematically possible for all cases, so we make
1518285809Sscottl		 * sure here anyway.
1519285809Sscottl		 */
1520285809Sscottl		totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1521285809Sscottl		if (keg->uk_flags & UMA_ZONE_REFCNT)
1522285809Sscottl			totsize += keg->uk_ipers * sizeof(uint32_t);
1523285809Sscottl		if (totsize > PAGE_SIZE * keg->uk_ppera) {
1524285809Sscottl			printf("zone %s ipers %d rsize %d size %d\n",
1525285809Sscottl			    zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1526285809Sscottl			    keg->uk_size);
1527285809Sscottl			panic("UMA slab won't fit.");
1528285809Sscottl		}
1529285809Sscottl	}
1530285809Sscottl
1531285809Sscottl	if (keg->uk_flags & UMA_ZONE_HASH)
1532285809Sscottl		hash_alloc(&keg->uk_hash);
1533285809Sscottl
1534285809Sscottl#ifdef UMA_DEBUG
1535285809Sscottl	printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1536285809Sscottl	    zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1537285809Sscottl	    keg->uk_ipers, keg->uk_ppera,
1538285809Sscottl	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1539285809Sscottl#endif
1540285809Sscottl
1541285809Sscottl	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1542285809Sscottl
1543285809Sscottl	mtx_lock(&uma_mtx);
1544285809Sscottl	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1545285809Sscottl	mtx_unlock(&uma_mtx);
1546285809Sscottl	return (0);
1547285809Sscottl}
1548285809Sscottl
1549285809Sscottl/*
1550285809Sscottl * Zone header ctor.  This initializes all fields, locks, etc.
1551285809Sscottl *
1552285809Sscottl * Arguments/Returns follow uma_ctor specifications
1553285809Sscottl *	udata  Actually uma_zctor_args
1554285809Sscottl */
1555285809Sscottlstatic int
1556285809Sscottlzone_ctor(void *mem, int size, void *udata, int flags)
1557285809Sscottl{
1558285809Sscottl	struct uma_zctor_args *arg = udata;
1559285809Sscottl	uma_zone_t zone = mem;
1560285809Sscottl	uma_zone_t z;
1561285809Sscottl	uma_keg_t keg;
1562285809Sscottl
1563285809Sscottl	bzero(zone, size);
1564285809Sscottl	zone->uz_name = arg->name;
1565285809Sscottl	zone->uz_ctor = arg->ctor;
1566285809Sscottl	zone->uz_dtor = arg->dtor;
1567285809Sscottl	zone->uz_slab = zone_fetch_slab;
1568285809Sscottl	zone->uz_init = NULL;
1569285809Sscottl	zone->uz_fini = NULL;
1570285809Sscottl	zone->uz_allocs = 0;
1571285809Sscottl	zone->uz_frees = 0;
1572285809Sscottl	zone->uz_fails = 0;
1573285809Sscottl	zone->uz_sleeps = 0;
1574285809Sscottl	zone->uz_count = 0;
1575285809Sscottl	zone->uz_count_min = 0;
1576285809Sscottl	zone->uz_flags = 0;
1577285809Sscottl	zone->uz_warning = NULL;
1578285809Sscottl	timevalclear(&zone->uz_ratecheck);
1579285809Sscottl	keg = arg->keg;
1580285809Sscottl
1581285809Sscottl	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1582285809Sscottl
1583285809Sscottl	/*
1584285809Sscottl	 * This is a pure cache zone, no kegs.
1585285809Sscottl	 */
1586285809Sscottl	if (arg->import) {
1587285809Sscottl		if (arg->flags & UMA_ZONE_VM)
1588285809Sscottl			arg->flags |= UMA_ZFLAG_CACHEONLY;
1589285809Sscottl		zone->uz_flags = arg->flags;
1590285809Sscottl		zone->uz_size = arg->size;
1591285809Sscottl		zone->uz_import = arg->import;
1592285809Sscottl		zone->uz_release = arg->release;
1593285809Sscottl		zone->uz_arg = arg->arg;
1594285809Sscottl		zone->uz_lockptr = &zone->uz_lock;
1595285809Sscottl		mtx_lock(&uma_mtx);
1596285809Sscottl		LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1597285809Sscottl		mtx_unlock(&uma_mtx);
1598285809Sscottl		goto out;
1599285809Sscottl	}
1600285809Sscottl
1601285809Sscottl	/*
1602285809Sscottl	 * Use the regular zone/keg/slab allocator.
1603285809Sscottl	 */
1604285809Sscottl	zone->uz_import = (uma_import)zone_import;
1605285809Sscottl	zone->uz_release = (uma_release)zone_release;
1606285809Sscottl	zone->uz_arg = zone;
1607285809Sscottl
1608285809Sscottl	if (arg->flags & UMA_ZONE_SECONDARY) {
1609285809Sscottl		KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1610285809Sscottl		zone->uz_init = arg->uminit;
1611285809Sscottl		zone->uz_fini = arg->fini;
1612285809Sscottl		zone->uz_lockptr = &keg->uk_lock;
1613285809Sscottl		zone->uz_flags |= UMA_ZONE_SECONDARY;
1614285809Sscottl		mtx_lock(&uma_mtx);
1615285809Sscottl		ZONE_LOCK(zone);
1616285809Sscottl		LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1617285809Sscottl			if (LIST_NEXT(z, uz_link) == NULL) {
1618285809Sscottl				LIST_INSERT_AFTER(z, zone, uz_link);
1619285809Sscottl				break;
1620285809Sscottl			}
1621285809Sscottl		}
1622285809Sscottl		ZONE_UNLOCK(zone);
1623285809Sscottl		mtx_unlock(&uma_mtx);
1624285809Sscottl	} else if (keg == NULL) {
1625285809Sscottl		if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1626285809Sscottl		    arg->align, arg->flags)) == NULL)
1627285809Sscottl			return (ENOMEM);
1628285809Sscottl	} else {
1629285809Sscottl		struct uma_kctor_args karg;
1630285809Sscottl		int error;
1631285809Sscottl
1632285809Sscottl		/* We should only be here from uma_startup() */
1633285809Sscottl		karg.size = arg->size;
1634285809Sscottl		karg.uminit = arg->uminit;
1635285809Sscottl		karg.fini = arg->fini;
1636285809Sscottl		karg.align = arg->align;
1637285809Sscottl		karg.flags = arg->flags;
1638285809Sscottl		karg.zone = zone;
1639285809Sscottl		error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1640285809Sscottl		    flags);
1641285809Sscottl		if (error)
1642285809Sscottl			return (error);
1643285809Sscottl	}
1644285809Sscottl
1645285809Sscottl	/*
1646285809Sscottl	 * Link in the first keg.
1647285809Sscottl	 */
1648285809Sscottl	zone->uz_klink.kl_keg = keg;
1649285809Sscottl	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1650285809Sscottl	zone->uz_lockptr = &keg->uk_lock;
1651285809Sscottl	zone->uz_size = keg->uk_size;
1652285809Sscottl	zone->uz_flags |= (keg->uk_flags &
1653285809Sscottl	    (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1654285809Sscottl
1655285809Sscottl	/*
1656285809Sscottl	 * Some internal zones don't have room allocated for the per cpu
1657285809Sscottl	 * caches.  If we're internal, bail out here.
1658285809Sscottl	 */
1659285809Sscottl	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1660285809Sscottl		KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1661285809Sscottl		    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1662285809Sscottl		return (0);
1663285809Sscottl	}
1664285809Sscottl
1665285809Sscottlout:
1666285809Sscottl	if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1667285809Sscottl		zone->uz_count = bucket_select(zone->uz_size);
1668285809Sscottl	else
1669285809Sscottl		zone->uz_count = BUCKET_MAX;
1670285809Sscottl	zone->uz_count_min = zone->uz_count;
1671285809Sscottl
1672285809Sscottl	return (0);
1673285809Sscottl}
1674285809Sscottl
1675285809Sscottl/*
1676285809Sscottl * Keg header dtor.  This frees all data, destroys locks, frees the hash
1677285809Sscottl * table and removes the keg from the global list.
1678285809Sscottl *
1679285809Sscottl * Arguments/Returns follow uma_dtor specifications
1680285809Sscottl *	udata  unused
1681285809Sscottl */
1682285809Sscottlstatic void
1683285809Sscottlkeg_dtor(void *arg, int size, void *udata)
1684285809Sscottl{
1685285809Sscottl	uma_keg_t keg;
1686285809Sscottl
1687285809Sscottl	keg = (uma_keg_t)arg;
1688285809Sscottl	KEG_LOCK(keg);
1689285809Sscottl	if (keg->uk_free != 0) {
1690285809Sscottl		printf("Freed UMA keg (%s) was not empty (%d items). "
1691285809Sscottl		    " Lost %d pages of memory.\n",
1692285809Sscottl		    keg->uk_name ? keg->uk_name : "",
1693285809Sscottl		    keg->uk_free, keg->uk_pages);
1694285809Sscottl	}
1695285809Sscottl	KEG_UNLOCK(keg);
1696285809Sscottl
1697285809Sscottl	hash_free(&keg->uk_hash);
1698285809Sscottl
1699285809Sscottl	KEG_LOCK_FINI(keg);
1700285809Sscottl}
1701285809Sscottl
1702285809Sscottl/*
1703285809Sscottl * Zone header dtor.
1704285809Sscottl *
1705285809Sscottl * Arguments/Returns follow uma_dtor specifications
1706285809Sscottl *	udata  unused
1707285809Sscottl */
1708285809Sscottlstatic void
1709285809Sscottlzone_dtor(void *arg, int size, void *udata)
1710285809Sscottl{
1711285809Sscottl	uma_klink_t klink;
1712285809Sscottl	uma_zone_t zone;
1713285809Sscottl	uma_keg_t keg;
1714285809Sscottl
1715285809Sscottl	zone = (uma_zone_t)arg;
1716285809Sscottl	keg = zone_first_keg(zone);
1717285809Sscottl
1718285809Sscottl	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1719285809Sscottl		cache_drain(zone);
1720285809Sscottl
1721285809Sscottl	mtx_lock(&uma_mtx);
1722285809Sscottl	LIST_REMOVE(zone, uz_link);
1723285809Sscottl	mtx_unlock(&uma_mtx);
1724285809Sscottl	/*
1725285809Sscottl	 * XXX there are some races here where
1726285809Sscottl	 * the zone can be drained but zone lock
1727285809Sscottl	 * released and then refilled before we
1728285809Sscottl	 * remove it... we dont care for now
1729285809Sscottl	 */
1730285809Sscottl	zone_drain_wait(zone, M_WAITOK);
1731285809Sscottl	/*
1732285809Sscottl	 * Unlink all of our kegs.
1733285809Sscottl	 */
1734285809Sscottl	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1735285809Sscottl		klink->kl_keg = NULL;
1736285809Sscottl		LIST_REMOVE(klink, kl_link);
1737285809Sscottl		if (klink == &zone->uz_klink)
1738285809Sscottl			continue;
1739285809Sscottl		free(klink, M_TEMP);
1740285809Sscottl	}
1741285809Sscottl	/*
1742285809Sscottl	 * We only destroy kegs from non secondary zones.
1743285809Sscottl	 */
1744285809Sscottl	if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1745285809Sscottl		mtx_lock(&uma_mtx);
1746285809Sscottl		LIST_REMOVE(keg, uk_link);
1747285809Sscottl		mtx_unlock(&uma_mtx);
1748285809Sscottl		zone_free_item(kegs, keg, NULL, SKIP_NONE);
1749285809Sscottl	}
1750285809Sscottl	ZONE_LOCK_FINI(zone);
1751285809Sscottl}
1752285809Sscottl
1753285809Sscottl/*
1754285809Sscottl * Traverses every zone in the system and calls a callback
1755285809Sscottl *
1756285809Sscottl * Arguments:
1757285809Sscottl *	zfunc  A pointer to a function which accepts a zone
1758285809Sscottl *		as an argument.
1759285809Sscottl *
1760285809Sscottl * Returns:
1761285809Sscottl *	Nothing
1762285809Sscottl */
1763285809Sscottlstatic void
1764285809Sscottlzone_foreach(void (*zfunc)(uma_zone_t))
1765285809Sscottl{
1766285809Sscottl	uma_keg_t keg;
1767285809Sscottl	uma_zone_t zone;
1768285809Sscottl
1769285809Sscottl	mtx_lock(&uma_mtx);
1770285809Sscottl	LIST_FOREACH(keg, &uma_kegs, uk_link) {
1771285809Sscottl		LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1772285809Sscottl			zfunc(zone);
1773285809Sscottl	}
1774285809Sscottl	mtx_unlock(&uma_mtx);
1775285809Sscottl}
1776285809Sscottl
1777285809Sscottl/* Public functions */
1778285809Sscottl/* See uma.h */
1779285809Sscottlvoid
1780285809Sscottluma_startup(void *bootmem, int boot_pages)
1781285809Sscottl{
1782285809Sscottl	struct uma_zctor_args args;
1783285809Sscottl	uma_slab_t slab;
1784285809Sscottl	u_int slabsize;
1785285809Sscottl	int i;
1786285809Sscottl
1787285809Sscottl#ifdef UMA_DEBUG
1788285809Sscottl	printf("Creating uma keg headers zone and keg.\n");
1789285809Sscottl#endif
1790285809Sscottl	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1791285809Sscottl
1792285809Sscottl	/* "manually" create the initial zone */
1793285809Sscottl	memset(&args, 0, sizeof(args));
1794285809Sscottl	args.name = "UMA Kegs";
1795285809Sscottl	args.size = sizeof(struct uma_keg);
1796285809Sscottl	args.ctor = keg_ctor;
1797285809Sscottl	args.dtor = keg_dtor;
1798285809Sscottl	args.uminit = zero_init;
1799285809Sscottl	args.fini = NULL;
1800285809Sscottl	args.keg = &masterkeg;
1801285809Sscottl	args.align = 32 - 1;
1802285809Sscottl	args.flags = UMA_ZFLAG_INTERNAL;
1803285809Sscottl	/* The initial zone has no Per cpu queues so it's smaller */
1804285809Sscottl	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1805285809Sscottl
1806285809Sscottl#ifdef UMA_DEBUG
1807285809Sscottl	printf("Filling boot free list.\n");
1808285809Sscottl#endif
1809285809Sscottl	for (i = 0; i < boot_pages; i++) {
1810285809Sscottl		slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1811285809Sscottl		slab->us_data = (uint8_t *)slab;
1812285809Sscottl		slab->us_flags = UMA_SLAB_BOOT;
1813285809Sscottl		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1814285809Sscottl	}
1815285809Sscottl	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1816285809Sscottl
1817285809Sscottl#ifdef UMA_DEBUG
1818285809Sscottl	printf("Creating uma zone headers zone and keg.\n");
1819285809Sscottl#endif
1820285809Sscottl	args.name = "UMA Zones";
1821285809Sscottl	args.size = sizeof(struct uma_zone) +
1822285809Sscottl	    (sizeof(struct uma_cache) * (mp_maxid + 1));
1823285809Sscottl	args.ctor = zone_ctor;
1824285809Sscottl	args.dtor = zone_dtor;
1825285809Sscottl	args.uminit = zero_init;
1826285809Sscottl	args.fini = NULL;
1827285809Sscottl	args.keg = NULL;
1828285809Sscottl	args.align = 32 - 1;
1829285809Sscottl	args.flags = UMA_ZFLAG_INTERNAL;
1830285809Sscottl	/* The initial zone has no Per cpu queues so it's smaller */
1831285809Sscottl	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1832285809Sscottl
1833285809Sscottl#ifdef UMA_DEBUG
1834285809Sscottl	printf("Initializing pcpu cache locks.\n");
1835285809Sscottl#endif
1836285809Sscottl#ifdef UMA_DEBUG
1837285809Sscottl	printf("Creating slab and hash zones.\n");
1838285809Sscottl#endif
1839285809Sscottl
1840285809Sscottl	/* Now make a zone for slab headers */
1841285809Sscottl	slabzone = uma_zcreate("UMA Slabs",
1842285809Sscottl				sizeof(struct uma_slab),
1843285809Sscottl				NULL, NULL, NULL, NULL,
1844285809Sscottl				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1845285809Sscottl
1846285809Sscottl	/*
1847285809Sscottl	 * We also create a zone for the bigger slabs with reference
1848285809Sscottl	 * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1849285809Sscottl	 */
1850285809Sscottl	slabsize = sizeof(struct uma_slab_refcnt);
1851285809Sscottl	slabsize += uma_max_ipers_ref * sizeof(uint32_t);
1852285809Sscottl	slabrefzone = uma_zcreate("UMA RCntSlabs",
1853285809Sscottl				  slabsize,
1854285809Sscottl				  NULL, NULL, NULL, NULL,
1855285809Sscottl				  UMA_ALIGN_PTR,
1856285809Sscottl				  UMA_ZFLAG_INTERNAL);
1857285809Sscottl
1858285809Sscottl	hashzone = uma_zcreate("UMA Hash",
1859285809Sscottl	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1860285809Sscottl	    NULL, NULL, NULL, NULL,
1861285809Sscottl	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1862285809Sscottl
1863285809Sscottl	bucket_init();
1864285809Sscottl
1865285809Sscottl	booted = UMA_STARTUP;
1866285809Sscottl
1867285809Sscottl#ifdef UMA_DEBUG
1868285809Sscottl	printf("UMA startup complete.\n");
1869285809Sscottl#endif
1870285809Sscottl}
1871285809Sscottl
1872285809Sscottl/* see uma.h */
1873285809Sscottlvoid
1874285809Sscottluma_startup2(void)
1875285809Sscottl{
1876285809Sscottl	booted = UMA_STARTUP2;
1877285809Sscottl	bucket_enable();
1878285809Sscottl#ifdef UMA_DEBUG
1879285809Sscottl	printf("UMA startup2 complete.\n");
1880285809Sscottl#endif
1881285809Sscottl}
1882285809Sscottl
1883285809Sscottl/*
1884285809Sscottl * Initialize our callout handle
1885285809Sscottl *
1886285809Sscottl */
1887285809Sscottl
1888285809Sscottlstatic void
1889285809Sscottluma_startup3(void)
1890285809Sscottl{
1891285809Sscottl#ifdef UMA_DEBUG
1892285809Sscottl	printf("Starting callout.\n");
1893285809Sscottl#endif
1894285809Sscottl	callout_init(&uma_callout, CALLOUT_MPSAFE);
1895285809Sscottl	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1896285809Sscottl#ifdef UMA_DEBUG
1897285809Sscottl	printf("UMA startup3 complete.\n");
1898285809Sscottl#endif
1899285809Sscottl}
1900285809Sscottl
1901285809Sscottlstatic uma_keg_t
1902285809Sscottluma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1903285809Sscottl		int align, uint32_t flags)
1904285809Sscottl{
1905285809Sscottl	struct uma_kctor_args args;
1906285809Sscottl
1907285809Sscottl	args.size = size;
1908285809Sscottl	args.uminit = uminit;
1909285809Sscottl	args.fini = fini;
1910285809Sscottl	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1911285809Sscottl	args.flags = flags;
1912285809Sscottl	args.zone = zone;
1913285809Sscottl	return (zone_alloc_item(kegs, &args, M_WAITOK));
1914285809Sscottl}
1915285809Sscottl
1916285809Sscottl/* See uma.h */
1917285809Sscottlvoid
1918285809Sscottluma_set_align(int align)
1919285809Sscottl{
1920285809Sscottl
1921285809Sscottl	if (align != UMA_ALIGN_CACHE)
1922285809Sscottl		uma_align_cache = align;
1923285809Sscottl}
1924285809Sscottl
1925285809Sscottl/* See uma.h */
1926285809Sscottluma_zone_t
1927285809Sscottluma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1928285809Sscottl		uma_init uminit, uma_fini fini, int align, uint32_t flags)
1929285809Sscottl
1930285809Sscottl{
1931285809Sscottl	struct uma_zctor_args args;
1932285809Sscottl
1933285809Sscottl	/* This stuff is essential for the zone ctor */
1934285809Sscottl	memset(&args, 0, sizeof(args));
1935285809Sscottl	args.name = name;
1936285809Sscottl	args.size = size;
1937285809Sscottl	args.ctor = ctor;
1938285809Sscottl	args.dtor = dtor;
1939285809Sscottl	args.uminit = uminit;
1940285809Sscottl	args.fini = fini;
1941285809Sscottl	args.align = align;
1942285809Sscottl	args.flags = flags;
1943285809Sscottl	args.keg = NULL;
1944285809Sscottl
1945285809Sscottl	return (zone_alloc_item(zones, &args, M_WAITOK));
1946285809Sscottl}
1947285809Sscottl
1948285809Sscottl/* See uma.h */
1949285809Sscottluma_zone_t
1950285809Sscottluma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1951285809Sscottl		    uma_init zinit, uma_fini zfini, uma_zone_t master)
1952285809Sscottl{
1953285809Sscottl	struct uma_zctor_args args;
1954285809Sscottl	uma_keg_t keg;
1955285809Sscottl
1956285809Sscottl	keg = zone_first_keg(master);
1957285809Sscottl	memset(&args, 0, sizeof(args));
1958285809Sscottl	args.name = name;
1959285809Sscottl	args.size = keg->uk_size;
1960285809Sscottl	args.ctor = ctor;
1961285809Sscottl	args.dtor = dtor;
1962285809Sscottl	args.uminit = zinit;
1963285809Sscottl	args.fini = zfini;
1964285809Sscottl	args.align = keg->uk_align;
1965285809Sscottl	args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1966285809Sscottl	args.keg = keg;
1967285809Sscottl
1968285809Sscottl	/* XXX Attaches only one keg of potentially many. */
1969285809Sscottl	return (zone_alloc_item(zones, &args, M_WAITOK));
1970285809Sscottl}
1971285809Sscottl
1972285809Sscottl/* See uma.h */
1973285809Sscottluma_zone_t
1974285809Sscottluma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
1975285809Sscottl		    uma_init zinit, uma_fini zfini, uma_import zimport,
1976285809Sscottl		    uma_release zrelease, void *arg, int flags)
1977285809Sscottl{
1978285809Sscottl	struct uma_zctor_args args;
1979285809Sscottl
1980285809Sscottl	memset(&args, 0, sizeof(args));
1981285809Sscottl	args.name = name;
1982285809Sscottl	args.size = size;
1983285809Sscottl	args.ctor = ctor;
1984285809Sscottl	args.dtor = dtor;
1985285809Sscottl	args.uminit = zinit;
1986285809Sscottl	args.fini = zfini;
1987285809Sscottl	args.import = zimport;
1988285809Sscottl	args.release = zrelease;
1989285809Sscottl	args.arg = arg;
1990285809Sscottl	args.align = 0;
1991285809Sscottl	args.flags = flags;
1992285809Sscottl
1993285809Sscottl	return (zone_alloc_item(zones, &args, M_WAITOK));
1994285809Sscottl}
1995285809Sscottl
1996285809Sscottlstatic void
1997285809Sscottlzone_lock_pair(uma_zone_t a, uma_zone_t b)
1998285809Sscottl{
1999285809Sscottl	if (a < b) {
2000285809Sscottl		ZONE_LOCK(a);
2001285809Sscottl		mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2002285809Sscottl	} else {
2003285809Sscottl		ZONE_LOCK(b);
2004285809Sscottl		mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2005285809Sscottl	}
2006285809Sscottl}
2007285809Sscottl
2008285809Sscottlstatic void
2009285809Sscottlzone_unlock_pair(uma_zone_t a, uma_zone_t b)
2010285809Sscottl{
2011285809Sscottl
2012285809Sscottl	ZONE_UNLOCK(a);
2013285809Sscottl	ZONE_UNLOCK(b);
2014285809Sscottl}
2015285809Sscottl
2016285809Sscottlint
2017285809Sscottluma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2018285809Sscottl{
2019285809Sscottl	uma_klink_t klink;
2020285809Sscottl	uma_klink_t kl;
2021285809Sscottl	int error;
2022285809Sscottl
2023285809Sscottl	error = 0;
2024285809Sscottl	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
2025285809Sscottl
2026285809Sscottl	zone_lock_pair(zone, master);
2027285809Sscottl	/*
2028285809Sscottl	 * zone must use vtoslab() to resolve objects and must already be
2029285809Sscottl	 * a secondary.
2030285809Sscottl	 */
2031285809Sscottl	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
2032285809Sscottl	    != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
2033285809Sscottl		error = EINVAL;
2034285809Sscottl		goto out;
2035285809Sscottl	}
2036285809Sscottl	/*
2037285809Sscottl	 * The new master must also use vtoslab().
2038285809Sscottl	 */
2039285809Sscottl	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2040285809Sscottl		error = EINVAL;
2041285809Sscottl		goto out;
2042285809Sscottl	}
2043285809Sscottl	/*
2044285809Sscottl	 * Both must either be refcnt, or not be refcnt.
2045285809Sscottl	 */
2046285809Sscottl	if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
2047285809Sscottl	    (master->uz_flags & UMA_ZONE_REFCNT)) {
2048285809Sscottl		error = EINVAL;
2049285809Sscottl		goto out;
2050285809Sscottl	}
2051285809Sscottl	/*
2052285809Sscottl	 * The underlying object must be the same size.  rsize
2053285809Sscottl	 * may be different.
2054285809Sscottl	 */
2055285809Sscottl	if (master->uz_size != zone->uz_size) {
2056285809Sscottl		error = E2BIG;
2057285809Sscottl		goto out;
2058285809Sscottl	}
2059285809Sscottl	/*
2060285809Sscottl	 * Put it at the end of the list.
2061285809Sscottl	 */
2062285809Sscottl	klink->kl_keg = zone_first_keg(master);
2063285809Sscottl	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2064285809Sscottl		if (LIST_NEXT(kl, kl_link) == NULL) {
2065285809Sscottl			LIST_INSERT_AFTER(kl, klink, kl_link);
2066285809Sscottl			break;
2067285809Sscottl		}
2068285809Sscottl	}
2069285809Sscottl	klink = NULL;
2070285809Sscottl	zone->uz_flags |= UMA_ZFLAG_MULTI;
2071285809Sscottl	zone->uz_slab = zone_fetch_slab_multi;
2072285809Sscottl
2073285809Sscottlout:
2074285809Sscottl	zone_unlock_pair(zone, master);
2075285809Sscottl	if (klink != NULL)
2076285809Sscottl		free(klink, M_TEMP);
2077285809Sscottl
2078285809Sscottl	return (error);
2079285809Sscottl}
2080285809Sscottl
2081285809Sscottl
2082285809Sscottl/* See uma.h */
2083285809Sscottlvoid
2084285809Sscottluma_zdestroy(uma_zone_t zone)
2085285809Sscottl{
2086285809Sscottl
2087285809Sscottl	zone_free_item(zones, zone, NULL, SKIP_NONE);
2088285809Sscottl}
2089285809Sscottl
2090285809Sscottl/* See uma.h */
2091285809Sscottlvoid *
2092285809Sscottluma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2093285809Sscottl{
2094285809Sscottl	void *item;
2095285809Sscottl	uma_cache_t cache;
2096285809Sscottl	uma_bucket_t bucket;
2097285809Sscottl	int lockfail;
2098285809Sscottl	int cpu;
2099285809Sscottl
2100285809Sscottl	/* This is the fast path allocation */
2101285809Sscottl#ifdef UMA_DEBUG_ALLOC_1
2102285809Sscottl	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2103285809Sscottl#endif
2104285809Sscottl	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2105285809Sscottl	    zone->uz_name, flags);
2106285809Sscottl
2107285809Sscottl	if (flags & M_WAITOK) {
2108285809Sscottl		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2109285809Sscottl		    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2110285809Sscottl	}
2111285809Sscottl#ifdef DEBUG_MEMGUARD
2112285809Sscottl	if (memguard_cmp_zone(zone)) {
2113285809Sscottl		item = memguard_alloc(zone->uz_size, flags);
2114285809Sscottl		if (item != NULL) {
2115285809Sscottl			/*
2116285809Sscottl			 * Avoid conflict with the use-after-free
2117285809Sscottl			 * protecting infrastructure from INVARIANTS.
2118285809Sscottl			 */
2119285809Sscottl			if (zone->uz_init != NULL &&
2120285809Sscottl			    zone->uz_init != mtrash_init &&
2121285809Sscottl			    zone->uz_init(item, zone->uz_size, flags) != 0)
2122285809Sscottl				return (NULL);
2123285809Sscottl			if (zone->uz_ctor != NULL &&
2124285809Sscottl			    zone->uz_ctor != mtrash_ctor &&
2125285809Sscottl			    zone->uz_ctor(item, zone->uz_size, udata,
2126285809Sscottl			    flags) != 0) {
2127285809Sscottl			    	zone->uz_fini(item, zone->uz_size);
2128285809Sscottl				return (NULL);
2129285809Sscottl			}
2130285809Sscottl			return (item);
2131285809Sscottl		}
2132285809Sscottl		/* This is unfortunate but should not be fatal. */
2133285809Sscottl	}
2134285809Sscottl#endif
2135285809Sscottl	/*
2136285809Sscottl	 * If possible, allocate from the per-CPU cache.  There are two
2137285809Sscottl	 * requirements for safe access to the per-CPU cache: (1) the thread
2138285809Sscottl	 * accessing the cache must not be preempted or yield during access,
2139285809Sscottl	 * and (2) the thread must not migrate CPUs without switching which
2140285809Sscottl	 * cache it accesses.  We rely on a critical section to prevent
2141285809Sscottl	 * preemption and migration.  We release the critical section in
2142285809Sscottl	 * order to acquire the zone mutex if we are unable to allocate from
2143285809Sscottl	 * the current cache; when we re-acquire the critical section, we
2144285809Sscottl	 * must detect and handle migration if it has occurred.
2145285809Sscottl	 */
2146285809Sscottl	critical_enter();
2147285809Sscottl	cpu = curcpu;
2148285809Sscottl	cache = &zone->uz_cpu[cpu];
2149285809Sscottl
2150285809Sscottlzalloc_start:
2151285809Sscottl	bucket = cache->uc_allocbucket;
2152285809Sscottl	if (bucket != NULL && bucket->ub_cnt > 0) {
2153285809Sscottl		bucket->ub_cnt--;
2154285809Sscottl		item = bucket->ub_bucket[bucket->ub_cnt];
2155285809Sscottl#ifdef INVARIANTS
2156285809Sscottl		bucket->ub_bucket[bucket->ub_cnt] = NULL;
2157285809Sscottl#endif
2158285809Sscottl		KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2159285809Sscottl		cache->uc_allocs++;
2160285809Sscottl		critical_exit();
2161285809Sscottl		if (zone->uz_ctor != NULL &&
2162285809Sscottl		    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2163285809Sscottl			atomic_add_long(&zone->uz_fails, 1);
2164285809Sscottl			zone_free_item(zone, item, udata, SKIP_DTOR);
2165285809Sscottl			return (NULL);
2166285809Sscottl		}
2167285809Sscottl#ifdef INVARIANTS
2168285809Sscottl		uma_dbg_alloc(zone, NULL, item);
2169285809Sscottl#endif
2170285809Sscottl		if (flags & M_ZERO)
2171285809Sscottl			uma_zero_item(item, zone);
2172285809Sscottl		return (item);
2173285809Sscottl	}
2174285809Sscottl
2175285809Sscottl	/*
2176285809Sscottl	 * We have run out of items in our alloc bucket.
2177285809Sscottl	 * See if we can switch with our free bucket.
2178285809Sscottl	 */
2179285809Sscottl	bucket = cache->uc_freebucket;
2180285809Sscottl	if (bucket != NULL && bucket->ub_cnt > 0) {
2181285809Sscottl#ifdef UMA_DEBUG_ALLOC
2182285809Sscottl		printf("uma_zalloc: Swapping empty with alloc.\n");
2183285809Sscottl#endif
2184285809Sscottl		cache->uc_freebucket = cache->uc_allocbucket;
2185285809Sscottl		cache->uc_allocbucket = bucket;
2186285809Sscottl		goto zalloc_start;
2187285809Sscottl	}
2188285809Sscottl
2189285809Sscottl	/*
2190285809Sscottl	 * Discard any empty allocation bucket while we hold no locks.
2191285809Sscottl	 */
2192285809Sscottl	bucket = cache->uc_allocbucket;
2193285809Sscottl	cache->uc_allocbucket = NULL;
2194285809Sscottl	critical_exit();
2195285809Sscottl	if (bucket != NULL)
2196285809Sscottl		bucket_free(zone, bucket, udata);
2197285809Sscottl
2198285809Sscottl	/* Short-circuit for zones without buckets and low memory. */
2199285809Sscottl	if (zone->uz_count == 0 || bucketdisable)
2200285809Sscottl		goto zalloc_item;
2201285809Sscottl
2202285809Sscottl	/*
2203285809Sscottl	 * Attempt to retrieve the item from the per-CPU cache has failed, so
2204285809Sscottl	 * we must go back to the zone.  This requires the zone lock, so we
2205285809Sscottl	 * must drop the critical section, then re-acquire it when we go back
2206285809Sscottl	 * to the cache.  Since the critical section is released, we may be
2207285809Sscottl	 * preempted or migrate.  As such, make sure not to maintain any
2208285809Sscottl	 * thread-local state specific to the cache from prior to releasing
2209285809Sscottl	 * the critical section.
2210285809Sscottl	 */
2211285809Sscottl	lockfail = 0;
2212285809Sscottl	if (ZONE_TRYLOCK(zone) == 0) {
2213285809Sscottl		/* Record contention to size the buckets. */
2214285809Sscottl		ZONE_LOCK(zone);
2215285809Sscottl		lockfail = 1;
2216285809Sscottl	}
2217285809Sscottl	critical_enter();
2218285809Sscottl	cpu = curcpu;
2219285809Sscottl	cache = &zone->uz_cpu[cpu];
2220285809Sscottl
2221285809Sscottl	/*
2222285809Sscottl	 * Since we have locked the zone we may as well send back our stats.
2223285809Sscottl	 */
2224285809Sscottl	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2225285809Sscottl	atomic_add_long(&zone->uz_frees, cache->uc_frees);
2226285809Sscottl	cache->uc_allocs = 0;
2227285809Sscottl	cache->uc_frees = 0;
2228285809Sscottl
2229285809Sscottl	/* See if we lost the race to fill the cache. */
2230285809Sscottl	if (cache->uc_allocbucket != NULL) {
2231285809Sscottl		ZONE_UNLOCK(zone);
2232285809Sscottl		goto zalloc_start;
2233285809Sscottl	}
2234285809Sscottl
2235285809Sscottl	/*
2236285809Sscottl	 * Check the zone's cache of buckets.
2237285809Sscottl	 */
2238285809Sscottl	if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2239285809Sscottl		KASSERT(bucket->ub_cnt != 0,
2240285809Sscottl		    ("uma_zalloc_arg: Returning an empty bucket."));
2241285809Sscottl
2242285809Sscottl		LIST_REMOVE(bucket, ub_link);
2243285809Sscottl		cache->uc_allocbucket = bucket;
2244285809Sscottl		ZONE_UNLOCK(zone);
2245285809Sscottl		goto zalloc_start;
2246285809Sscottl	}
2247285809Sscottl	/* We are no longer associated with this CPU. */
2248285809Sscottl	critical_exit();
2249285809Sscottl
2250285809Sscottl	/*
2251285809Sscottl	 * We bump the uz count when the cache size is insufficient to
2252285809Sscottl	 * handle the working set.
2253285809Sscottl	 */
2254285809Sscottl	if (lockfail && zone->uz_count < BUCKET_MAX)
2255285809Sscottl		zone->uz_count++;
2256285809Sscottl	ZONE_UNLOCK(zone);
2257285809Sscottl
2258285809Sscottl	/*
2259285809Sscottl	 * Now lets just fill a bucket and put it on the free list.  If that
2260285809Sscottl	 * works we'll restart the allocation from the begining and it
2261285809Sscottl	 * will use the just filled bucket.
2262285809Sscottl	 */
2263285809Sscottl	bucket = zone_alloc_bucket(zone, udata, flags);
2264285809Sscottl	if (bucket != NULL) {
2265285809Sscottl		ZONE_LOCK(zone);
2266285809Sscottl		critical_enter();
2267285809Sscottl		cpu = curcpu;
2268285809Sscottl		cache = &zone->uz_cpu[cpu];
2269285809Sscottl		/*
2270285809Sscottl		 * See if we lost the race or were migrated.  Cache the
2271285809Sscottl		 * initialized bucket to make this less likely or claim
2272285809Sscottl		 * the memory directly.
2273285809Sscottl		 */
2274285809Sscottl		if (cache->uc_allocbucket == NULL)
2275285809Sscottl			cache->uc_allocbucket = bucket;
2276285809Sscottl		else
2277285809Sscottl			LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2278285809Sscottl		ZONE_UNLOCK(zone);
2279285809Sscottl		goto zalloc_start;
2280285809Sscottl	}
2281285809Sscottl
2282285809Sscottl	/*
2283285809Sscottl	 * We may not be able to get a bucket so return an actual item.
2284285809Sscottl	 */
2285285809Sscottl#ifdef UMA_DEBUG
2286285809Sscottl	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2287285809Sscottl#endif
2288285809Sscottl
2289285809Sscottlzalloc_item:
2290285809Sscottl	item = zone_alloc_item(zone, udata, flags);
2291285809Sscottl
2292285809Sscottl	return (item);
2293285809Sscottl}
2294285809Sscottl
2295285809Sscottlstatic uma_slab_t
2296285809Sscottlkeg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2297285809Sscottl{
2298285809Sscottl	uma_slab_t slab;
2299285809Sscottl	int reserve;
2300285809Sscottl
2301285809Sscottl	mtx_assert(&keg->uk_lock, MA_OWNED);
2302285809Sscottl	slab = NULL;
2303285809Sscottl	reserve = 0;
2304285809Sscottl	if ((flags & M_USE_RESERVE) == 0)
2305285809Sscottl		reserve = keg->uk_reserve;
2306285809Sscottl
2307285809Sscottl	for (;;) {
2308285809Sscottl		/*
2309285809Sscottl		 * Find a slab with some space.  Prefer slabs that are partially
2310285809Sscottl		 * used over those that are totally full.  This helps to reduce
2311285809Sscottl		 * fragmentation.
2312285809Sscottl		 */
2313285809Sscottl		if (keg->uk_free > reserve) {
2314285809Sscottl			if (!LIST_EMPTY(&keg->uk_part_slab)) {
2315285809Sscottl				slab = LIST_FIRST(&keg->uk_part_slab);
2316285809Sscottl			} else {
2317285809Sscottl				slab = LIST_FIRST(&keg->uk_free_slab);
2318285809Sscottl				LIST_REMOVE(slab, us_link);
2319285809Sscottl				LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2320285809Sscottl				    us_link);
2321285809Sscottl			}
2322285809Sscottl			MPASS(slab->us_keg == keg);
2323285809Sscottl			return (slab);
2324285809Sscottl		}
2325285809Sscottl
2326285809Sscottl		/*
2327285809Sscottl		 * M_NOVM means don't ask at all!
2328285809Sscottl		 */
2329285809Sscottl		if (flags & M_NOVM)
2330285809Sscottl			break;
2331285809Sscottl
2332285809Sscottl		if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2333285809Sscottl			keg->uk_flags |= UMA_ZFLAG_FULL;
2334285809Sscottl			/*
2335285809Sscottl			 * If this is not a multi-zone, set the FULL bit.
2336285809Sscottl			 * Otherwise slab_multi() takes care of it.
2337285809Sscottl			 */
2338285809Sscottl			if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2339285809Sscottl				zone->uz_flags |= UMA_ZFLAG_FULL;
2340285809Sscottl				zone_log_warning(zone);
2341285809Sscottl			}
2342285809Sscottl			if (flags & M_NOWAIT)
2343285809Sscottl				break;
2344285809Sscottl			zone->uz_sleeps++;
2345285809Sscottl			msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2346285809Sscottl			continue;
2347285809Sscottl		}
2348285809Sscottl		slab = keg_alloc_slab(keg, zone, flags);
2349285809Sscottl		/*
2350285809Sscottl		 * If we got a slab here it's safe to mark it partially used
2351285809Sscottl		 * and return.  We assume that the caller is going to remove
2352285809Sscottl		 * at least one item.
2353285809Sscottl		 */
2354285809Sscottl		if (slab) {
2355285809Sscottl			MPASS(slab->us_keg == keg);
2356285809Sscottl			LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2357285809Sscottl			return (slab);
2358285809Sscottl		}
2359285809Sscottl		/*
2360285809Sscottl		 * We might not have been able to get a slab but another cpu
2361285809Sscottl		 * could have while we were unlocked.  Check again before we
2362285809Sscottl		 * fail.
2363285809Sscottl		 */
2364285809Sscottl		flags |= M_NOVM;
2365285809Sscottl	}
2366285809Sscottl	return (slab);
2367285809Sscottl}
2368285809Sscottl
2369285809Sscottlstatic uma_slab_t
2370285809Sscottlzone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2371285809Sscottl{
2372285809Sscottl	uma_slab_t slab;
2373285809Sscottl
2374285809Sscottl	if (keg == NULL) {
2375285809Sscottl		keg = zone_first_keg(zone);
2376285809Sscottl		KEG_LOCK(keg);
2377285809Sscottl	}
2378285809Sscottl
2379285809Sscottl	for (;;) {
2380285809Sscottl		slab = keg_fetch_slab(keg, zone, flags);
2381285809Sscottl		if (slab)
2382285809Sscottl			return (slab);
2383285809Sscottl		if (flags & (M_NOWAIT | M_NOVM))
2384285809Sscottl			break;
2385285809Sscottl	}
2386285809Sscottl	KEG_UNLOCK(keg);
2387285809Sscottl	return (NULL);
2388285809Sscottl}
2389285809Sscottl
2390285809Sscottl/*
2391285809Sscottl * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2392285809Sscottl * with the keg locked.  On NULL no lock is held.
2393285809Sscottl *
2394285809Sscottl * The last pointer is used to seed the search.  It is not required.
2395285809Sscottl */
2396285809Sscottlstatic uma_slab_t
2397285809Sscottlzone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2398285809Sscottl{
2399285809Sscottl	uma_klink_t klink;
2400285809Sscottl	uma_slab_t slab;
2401285809Sscottl	uma_keg_t keg;
2402285809Sscottl	int flags;
2403285809Sscottl	int empty;
2404285809Sscottl	int full;
2405285809Sscottl
2406285809Sscottl	/*
2407285809Sscottl	 * Don't wait on the first pass.  This will skip limit tests
2408285809Sscottl	 * as well.  We don't want to block if we can find a provider
2409285809Sscottl	 * without blocking.
2410285809Sscottl	 */
2411285809Sscottl	flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2412285809Sscottl	/*
2413285809Sscottl	 * Use the last slab allocated as a hint for where to start
2414285809Sscottl	 * the search.
2415285809Sscottl	 */
2416285809Sscottl	if (last != NULL) {
2417285809Sscottl		slab = keg_fetch_slab(last, zone, flags);
2418285809Sscottl		if (slab)
2419285809Sscottl			return (slab);
2420285809Sscottl		KEG_UNLOCK(last);
2421285809Sscottl	}
2422285809Sscottl	/*
2423285809Sscottl	 * Loop until we have a slab incase of transient failures
2424285809Sscottl	 * while M_WAITOK is specified.  I'm not sure this is 100%
2425285809Sscottl	 * required but we've done it for so long now.
2426285809Sscottl	 */
2427285809Sscottl	for (;;) {
2428285809Sscottl		empty = 0;
2429285809Sscottl		full = 0;
2430285809Sscottl		/*
2431285809Sscottl		 * Search the available kegs for slabs.  Be careful to hold the
2432285809Sscottl		 * correct lock while calling into the keg layer.
2433285809Sscottl		 */
2434285809Sscottl		LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2435285809Sscottl			keg = klink->kl_keg;
2436285809Sscottl			KEG_LOCK(keg);
2437285809Sscottl			if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2438285809Sscottl				slab = keg_fetch_slab(keg, zone, flags);
2439285809Sscottl				if (slab)
2440285809Sscottl					return (slab);
2441285809Sscottl			}
2442285809Sscottl			if (keg->uk_flags & UMA_ZFLAG_FULL)
2443285809Sscottl				full++;
2444285809Sscottl			else
2445285809Sscottl				empty++;
2446285809Sscottl			KEG_UNLOCK(keg);
2447285809Sscottl		}
2448285809Sscottl		if (rflags & (M_NOWAIT | M_NOVM))
2449285809Sscottl			break;
2450285809Sscottl		flags = rflags;
2451285809Sscottl		/*
2452285809Sscottl		 * All kegs are full.  XXX We can't atomically check all kegs
2453285809Sscottl		 * and sleep so just sleep for a short period and retry.
2454285809Sscottl		 */
2455285809Sscottl		if (full && !empty) {
2456285809Sscottl			ZONE_LOCK(zone);
2457285809Sscottl			zone->uz_flags |= UMA_ZFLAG_FULL;
2458285809Sscottl			zone->uz_sleeps++;
2459285809Sscottl			zone_log_warning(zone);
2460285809Sscottl			msleep(zone, zone->uz_lockptr, PVM,
2461285809Sscottl			    "zonelimit", hz/100);
2462285809Sscottl			zone->uz_flags &= ~UMA_ZFLAG_FULL;
2463285809Sscottl			ZONE_UNLOCK(zone);
2464285809Sscottl			continue;
2465285809Sscottl		}
2466285809Sscottl	}
2467285809Sscottl	return (NULL);
2468285809Sscottl}
2469285809Sscottl
2470285809Sscottlstatic void *
2471285809Sscottlslab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2472285809Sscottl{
2473285809Sscottl	void *item;
2474285809Sscottl	uint8_t freei;
2475285809Sscottl
2476285809Sscottl	MPASS(keg == slab->us_keg);
2477285809Sscottl	mtx_assert(&keg->uk_lock, MA_OWNED);
2478285809Sscottl
2479285809Sscottl	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2480285809Sscottl	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2481285809Sscottl	item = slab->us_data + (keg->uk_rsize * freei);
2482285809Sscottl	slab->us_freecount--;
2483285809Sscottl	keg->uk_free--;
2484285809Sscottl
2485285809Sscottl	/* Move this slab to the full list */
2486285809Sscottl	if (slab->us_freecount == 0) {
2487285809Sscottl		LIST_REMOVE(slab, us_link);
2488285809Sscottl		LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2489285809Sscottl	}
2490285809Sscottl
2491285809Sscottl	return (item);
2492285809Sscottl}
2493285809Sscottl
2494285809Sscottlstatic int
2495285809Sscottlzone_import(uma_zone_t zone, void **bucket, int max, int flags)
2496285809Sscottl{
2497285809Sscottl	uma_slab_t slab;
2498285809Sscottl	uma_keg_t keg;
2499285809Sscottl	int i;
2500285809Sscottl
2501285809Sscottl	slab = NULL;
2502285809Sscottl	keg = NULL;
2503285809Sscottl	/* Try to keep the buckets totally full */
2504285809Sscottl	for (i = 0; i < max; ) {
2505285809Sscottl		if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2506285809Sscottl			break;
2507285809Sscottl		keg = slab->us_keg;
2508285809Sscottl		while (slab->us_freecount && i < max) {
2509285809Sscottl			bucket[i++] = slab_alloc_item(keg, slab);
2510285809Sscottl			if (keg->uk_free <= keg->uk_reserve)
2511285809Sscottl				break;
2512285809Sscottl		}
2513285809Sscottl		/* Don't grab more than one slab at a time. */
2514285809Sscottl		flags &= ~M_WAITOK;
2515285809Sscottl		flags |= M_NOWAIT;
2516285809Sscottl	}
2517285809Sscottl	if (slab != NULL)
2518285809Sscottl		KEG_UNLOCK(keg);
2519285809Sscottl
2520285809Sscottl	return i;
2521285809Sscottl}
2522285809Sscottl
2523285809Sscottlstatic uma_bucket_t
2524285809Sscottlzone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2525285809Sscottl{
2526285809Sscottl	uma_bucket_t bucket;
2527285809Sscottl	int max;
2528285809Sscottl
2529285809Sscottl	/* Don't wait for buckets, preserve caller's NOVM setting. */
2530285809Sscottl	bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
2531285809Sscottl	if (bucket == NULL)
2532285809Sscottl		return (NULL);
2533285809Sscottl
2534285809Sscottl	max = MIN(bucket->ub_entries, zone->uz_count);
2535285809Sscottl	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2536285809Sscottl	    max, flags);
2537285809Sscottl
2538285809Sscottl	/*
2539285809Sscottl	 * Initialize the memory if necessary.
2540285809Sscottl	 */
2541285809Sscottl	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2542285809Sscottl		int i;
2543285809Sscottl
2544285809Sscottl		for (i = 0; i < bucket->ub_cnt; i++)
2545285809Sscottl			if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2546285809Sscottl			    flags) != 0)
2547285809Sscottl				break;
2548285809Sscottl		/*
2549285809Sscottl		 * If we couldn't initialize the whole bucket, put the
2550285809Sscottl		 * rest back onto the freelist.
2551285809Sscottl		 */
2552285809Sscottl		if (i != bucket->ub_cnt) {
2553285809Sscottl			zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2554285809Sscottl			    bucket->ub_cnt - i);
2555285809Sscottl#ifdef INVARIANTS
2556285809Sscottl			bzero(&bucket->ub_bucket[i],
2557285809Sscottl			    sizeof(void *) * (bucket->ub_cnt - i));
2558285809Sscottl#endif
2559285809Sscottl			bucket->ub_cnt = i;
2560285809Sscottl		}
2561285809Sscottl	}
2562285809Sscottl
2563285809Sscottl	if (bucket->ub_cnt == 0) {
2564285809Sscottl		bucket_free(zone, bucket, udata);
2565285809Sscottl		atomic_add_long(&zone->uz_fails, 1);
2566285809Sscottl		return (NULL);
2567285809Sscottl	}
2568285809Sscottl
2569285809Sscottl	return (bucket);
2570285809Sscottl}
2571285809Sscottl
2572285809Sscottl/*
2573285809Sscottl * Allocates a single item from a zone.
2574285809Sscottl *
2575285809Sscottl * Arguments
2576285809Sscottl *	zone   The zone to alloc for.
2577285809Sscottl *	udata  The data to be passed to the constructor.
2578285809Sscottl *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
2579285809Sscottl *
2580285809Sscottl * Returns
2581285809Sscottl *	NULL if there is no memory and M_NOWAIT is set
2582285809Sscottl *	An item if successful
2583285809Sscottl */
2584285809Sscottl
2585285809Sscottlstatic void *
2586285809Sscottlzone_alloc_item(uma_zone_t zone, void *udata, int flags)
2587285809Sscottl{
2588285809Sscottl	void *item;
2589285809Sscottl
2590285809Sscottl	item = NULL;
2591285809Sscottl
2592285809Sscottl#ifdef UMA_DEBUG_ALLOC
2593285809Sscottl	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2594285809Sscottl#endif
2595285809Sscottl	if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2596285809Sscottl		goto fail;
2597285809Sscottl	atomic_add_long(&zone->uz_allocs, 1);
2598285809Sscottl
2599285809Sscottl	/*
2600285809Sscottl	 * We have to call both the zone's init (not the keg's init)
2601285809Sscottl	 * and the zone's ctor.  This is because the item is going from
2602285809Sscottl	 * a keg slab directly to the user, and the user is expecting it
2603285809Sscottl	 * to be both zone-init'd as well as zone-ctor'd.
2604285809Sscottl	 */
2605285809Sscottl	if (zone->uz_init != NULL) {
2606285809Sscottl		if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2607285809Sscottl			zone_free_item(zone, item, udata, SKIP_FINI);
2608285809Sscottl			goto fail;
2609285809Sscottl		}
2610285809Sscottl	}
2611285809Sscottl	if (zone->uz_ctor != NULL) {
2612285809Sscottl		if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2613285809Sscottl			zone_free_item(zone, item, udata, SKIP_DTOR);
2614285809Sscottl			goto fail;
2615285809Sscottl		}
2616285809Sscottl	}
2617285809Sscottl#ifdef INVARIANTS
2618285809Sscottl	uma_dbg_alloc(zone, NULL, item);
2619285809Sscottl#endif
2620285809Sscottl	if (flags & M_ZERO)
2621285809Sscottl		uma_zero_item(item, zone);
2622285809Sscottl
2623285809Sscottl	return (item);
2624285809Sscottl
2625285809Sscottlfail:
2626285809Sscottl	atomic_add_long(&zone->uz_fails, 1);
2627285809Sscottl	return (NULL);
2628285809Sscottl}
2629285809Sscottl
2630285809Sscottl/* See uma.h */
2631285809Sscottlvoid
2632285809Sscottluma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2633285809Sscottl{
2634285809Sscottl	uma_cache_t cache;
2635285809Sscottl	uma_bucket_t bucket;
2636285809Sscottl	int lockfail;
2637285809Sscottl	int cpu;
2638285809Sscottl
2639285809Sscottl#ifdef UMA_DEBUG_ALLOC_1
2640285809Sscottl	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2641285809Sscottl#endif
2642285809Sscottl	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2643285809Sscottl	    zone->uz_name);
2644285809Sscottl
2645285809Sscottl        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2646285809Sscottl        if (item == NULL)
2647285809Sscottl                return;
2648285809Sscottl#ifdef DEBUG_MEMGUARD
2649285809Sscottl	if (is_memguard_addr(item)) {
2650285809Sscottl		if (zone->uz_dtor != NULL && zone->uz_dtor != mtrash_dtor)
2651285809Sscottl			zone->uz_dtor(item, zone->uz_size, udata);
2652285809Sscottl		if (zone->uz_fini != NULL && zone->uz_fini != mtrash_fini)
2653285809Sscottl			zone->uz_fini(item, zone->uz_size);
2654285809Sscottl		memguard_free(item);
2655285809Sscottl		return;
2656285809Sscottl	}
2657285809Sscottl#endif
2658285809Sscottl#ifdef INVARIANTS
2659285809Sscottl	if (zone->uz_flags & UMA_ZONE_MALLOC)
2660285809Sscottl		uma_dbg_free(zone, udata, item);
2661285809Sscottl	else
2662285809Sscottl		uma_dbg_free(zone, NULL, item);
2663285809Sscottl#endif
2664285809Sscottl	if (zone->uz_dtor != NULL)
2665285809Sscottl		zone->uz_dtor(item, zone->uz_size, udata);
2666285809Sscottl
2667285809Sscottl	/*
2668285809Sscottl	 * The race here is acceptable.  If we miss it we'll just have to wait
2669285809Sscottl	 * a little longer for the limits to be reset.
2670285809Sscottl	 */
2671285809Sscottl	if (zone->uz_flags & UMA_ZFLAG_FULL)
2672285809Sscottl		goto zfree_item;
2673285809Sscottl
2674285809Sscottl	/*
2675285809Sscottl	 * If possible, free to the per-CPU cache.  There are two
2676285809Sscottl	 * requirements for safe access to the per-CPU cache: (1) the thread
2677285809Sscottl	 * accessing the cache must not be preempted or yield during access,
2678285809Sscottl	 * and (2) the thread must not migrate CPUs without switching which
2679285809Sscottl	 * cache it accesses.  We rely on a critical section to prevent
2680285809Sscottl	 * preemption and migration.  We release the critical section in
2681285809Sscottl	 * order to acquire the zone mutex if we are unable to free to the
2682285809Sscottl	 * current cache; when we re-acquire the critical section, we must
2683285809Sscottl	 * detect and handle migration if it has occurred.
2684285809Sscottl	 */
2685285809Sscottlzfree_restart:
2686285809Sscottl	critical_enter();
2687285809Sscottl	cpu = curcpu;
2688285809Sscottl	cache = &zone->uz_cpu[cpu];
2689285809Sscottl
2690285809Sscottlzfree_start:
2691285809Sscottl	/*
2692285809Sscottl	 * Try to free into the allocbucket first to give LIFO ordering
2693285809Sscottl	 * for cache-hot datastructures.  Spill over into the freebucket
2694285809Sscottl	 * if necessary.  Alloc will swap them if one runs dry.
2695285809Sscottl	 */
2696285809Sscottl	bucket = cache->uc_allocbucket;
2697285809Sscottl	if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2698285809Sscottl		bucket = cache->uc_freebucket;
2699285809Sscottl	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2700285809Sscottl		KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2701285809Sscottl		    ("uma_zfree: Freeing to non free bucket index."));
2702285809Sscottl		bucket->ub_bucket[bucket->ub_cnt] = item;
2703285809Sscottl		bucket->ub_cnt++;
2704285809Sscottl		cache->uc_frees++;
2705285809Sscottl		critical_exit();
2706285809Sscottl		return;
2707285809Sscottl	}
2708285809Sscottl
2709285809Sscottl	/*
2710285809Sscottl	 * We must go back the zone, which requires acquiring the zone lock,
2711285809Sscottl	 * which in turn means we must release and re-acquire the critical
2712285809Sscottl	 * section.  Since the critical section is released, we may be
2713285809Sscottl	 * preempted or migrate.  As such, make sure not to maintain any
2714285809Sscottl	 * thread-local state specific to the cache from prior to releasing
2715285809Sscottl	 * the critical section.
2716285809Sscottl	 */
2717285809Sscottl	critical_exit();
2718285809Sscottl	if (zone->uz_count == 0 || bucketdisable)
2719285809Sscottl		goto zfree_item;
2720285809Sscottl
2721285809Sscottl	lockfail = 0;
2722285809Sscottl	if (ZONE_TRYLOCK(zone) == 0) {
2723285809Sscottl		/* Record contention to size the buckets. */
2724285809Sscottl		ZONE_LOCK(zone);
2725285809Sscottl		lockfail = 1;
2726285809Sscottl	}
2727285809Sscottl	critical_enter();
2728285809Sscottl	cpu = curcpu;
2729285809Sscottl	cache = &zone->uz_cpu[cpu];
2730285809Sscottl
2731285809Sscottl	/*
2732285809Sscottl	 * Since we have locked the zone we may as well send back our stats.
2733285809Sscottl	 */
2734285809Sscottl	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2735285809Sscottl	atomic_add_long(&zone->uz_frees, cache->uc_frees);
2736285809Sscottl	cache->uc_allocs = 0;
2737285809Sscottl	cache->uc_frees = 0;
2738285809Sscottl
2739285809Sscottl	bucket = cache->uc_freebucket;
2740285809Sscottl	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2741285809Sscottl		ZONE_UNLOCK(zone);
2742285809Sscottl		goto zfree_start;
2743285809Sscottl	}
2744285809Sscottl	cache->uc_freebucket = NULL;
2745285809Sscottl
2746285809Sscottl	/* Can we throw this on the zone full list? */
2747285809Sscottl	if (bucket != NULL) {
2748285809Sscottl#ifdef UMA_DEBUG_ALLOC
2749285809Sscottl		printf("uma_zfree: Putting old bucket on the free list.\n");
2750285809Sscottl#endif
2751285809Sscottl		/* ub_cnt is pointing to the last free item */
2752285809Sscottl		KASSERT(bucket->ub_cnt != 0,
2753285809Sscottl		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2754285809Sscottl		LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2755285809Sscottl	}
2756285809Sscottl
2757285809Sscottl	/* We are no longer associated with this CPU. */
2758285809Sscottl	critical_exit();
2759285809Sscottl
2760285809Sscottl	/*
2761285809Sscottl	 * We bump the uz count when the cache size is insufficient to
2762285809Sscottl	 * handle the working set.
2763285809Sscottl	 */
2764285809Sscottl	if (lockfail && zone->uz_count < BUCKET_MAX)
2765285809Sscottl		zone->uz_count++;
2766285809Sscottl	ZONE_UNLOCK(zone);
2767285809Sscottl
2768285809Sscottl#ifdef UMA_DEBUG_ALLOC
2769285809Sscottl	printf("uma_zfree: Allocating new free bucket.\n");
2770285809Sscottl#endif
2771285809Sscottl	bucket = bucket_alloc(zone, udata, M_NOWAIT);
2772285809Sscottl	if (bucket) {
2773285809Sscottl		critical_enter();
2774285809Sscottl		cpu = curcpu;
2775285809Sscottl		cache = &zone->uz_cpu[cpu];
2776285809Sscottl		if (cache->uc_freebucket == NULL) {
2777285809Sscottl			cache->uc_freebucket = bucket;
2778285809Sscottl			goto zfree_start;
2779285809Sscottl		}
2780285809Sscottl		/*
2781285809Sscottl		 * We lost the race, start over.  We have to drop our
2782285809Sscottl		 * critical section to free the bucket.
2783285809Sscottl		 */
2784285809Sscottl		critical_exit();
2785285809Sscottl		bucket_free(zone, bucket, udata);
2786285809Sscottl		goto zfree_restart;
2787285809Sscottl	}
2788285809Sscottl
2789285809Sscottl	/*
2790285809Sscottl	 * If nothing else caught this, we'll just do an internal free.
2791285809Sscottl	 */
2792285809Sscottlzfree_item:
2793285809Sscottl	zone_free_item(zone, item, udata, SKIP_DTOR);
2794285809Sscottl
2795285809Sscottl	return;
2796285809Sscottl}
2797285809Sscottl
2798285809Sscottlstatic void
2799285809Sscottlslab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2800285809Sscottl{
2801285809Sscottl	uint8_t freei;
2802285809Sscottl
2803285809Sscottl	mtx_assert(&keg->uk_lock, MA_OWNED);
2804285809Sscottl	MPASS(keg == slab->us_keg);
2805285809Sscottl
2806285809Sscottl	/* Do we need to remove from any lists? */
2807285809Sscottl	if (slab->us_freecount+1 == keg->uk_ipers) {
2808285809Sscottl		LIST_REMOVE(slab, us_link);
2809285809Sscottl		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2810285809Sscottl	} else if (slab->us_freecount == 0) {
2811285809Sscottl		LIST_REMOVE(slab, us_link);
2812285809Sscottl		LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2813285809Sscottl	}
2814285809Sscottl
2815285809Sscottl	/* Slab management. */
2816285809Sscottl	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2817285809Sscottl	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2818285809Sscottl	slab->us_freecount++;
2819285809Sscottl
2820285809Sscottl	/* Keg statistics. */
2821285809Sscottl	keg->uk_free++;
2822285809Sscottl}
2823285809Sscottl
2824285809Sscottlstatic void
2825285809Sscottlzone_release(uma_zone_t zone, void **bucket, int cnt)
2826285809Sscottl{
2827285809Sscottl	void *item;
2828285809Sscottl	uma_slab_t slab;
2829285809Sscottl	uma_keg_t keg;
2830285809Sscottl	uint8_t *mem;
2831285809Sscottl	int clearfull;
2832285809Sscottl	int i;
2833285809Sscottl
2834285809Sscottl	clearfull = 0;
2835285809Sscottl	keg = zone_first_keg(zone);
2836285809Sscottl	KEG_LOCK(keg);
2837285809Sscottl	for (i = 0; i < cnt; i++) {
2838285809Sscottl		item = bucket[i];
2839285809Sscottl		if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2840285809Sscottl			mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2841285809Sscottl			if (zone->uz_flags & UMA_ZONE_HASH) {
2842285809Sscottl				slab = hash_sfind(&keg->uk_hash, mem);
2843285809Sscottl			} else {
2844285809Sscottl				mem += keg->uk_pgoff;
2845285809Sscottl				slab = (uma_slab_t)mem;
2846285809Sscottl			}
2847285809Sscottl		} else {
2848285809Sscottl			slab = vtoslab((vm_offset_t)item);
2849285809Sscottl			if (slab->us_keg != keg) {
2850285809Sscottl				KEG_UNLOCK(keg);
2851285809Sscottl				keg = slab->us_keg;
2852285809Sscottl				KEG_LOCK(keg);
2853285809Sscottl			}
2854285809Sscottl		}
2855285809Sscottl		slab_free_item(keg, slab, item);
2856285809Sscottl		if (keg->uk_flags & UMA_ZFLAG_FULL) {
2857285809Sscottl			if (keg->uk_pages < keg->uk_maxpages) {
2858285809Sscottl				keg->uk_flags &= ~UMA_ZFLAG_FULL;
2859285809Sscottl				clearfull = 1;
2860285809Sscottl			}
2861285809Sscottl
2862285809Sscottl			/*
2863285809Sscottl			 * We can handle one more allocation. Since we're
2864285809Sscottl			 * clearing ZFLAG_FULL, wake up all procs blocked
2865285809Sscottl			 * on pages. This should be uncommon, so keeping this
2866285809Sscottl			 * simple for now (rather than adding count of blocked
2867285809Sscottl			 * threads etc).
2868285809Sscottl			 */
2869285809Sscottl			wakeup(keg);
2870285809Sscottl		}
2871285809Sscottl	}
2872285809Sscottl	KEG_UNLOCK(keg);
2873285809Sscottl	if (clearfull) {
2874285809Sscottl		ZONE_LOCK(zone);
2875285809Sscottl		zone->uz_flags &= ~UMA_ZFLAG_FULL;
2876285809Sscottl		wakeup(zone);
2877285809Sscottl		ZONE_UNLOCK(zone);
2878285809Sscottl	}
2879285809Sscottl
2880285809Sscottl}
2881285809Sscottl
2882285809Sscottl/*
2883285809Sscottl * Frees a single item to any zone.
2884285809Sscottl *
2885285809Sscottl * Arguments:
2886285809Sscottl *	zone   The zone to free to
2887285809Sscottl *	item   The item we're freeing
2888285809Sscottl *	udata  User supplied data for the dtor
2889285809Sscottl *	skip   Skip dtors and finis
2890285809Sscottl */
2891285809Sscottlstatic void
2892285809Sscottlzone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2893285809Sscottl{
2894285809Sscottl
2895285809Sscottl#ifdef INVARIANTS
2896285809Sscottl	if (skip == SKIP_NONE) {
2897285809Sscottl		if (zone->uz_flags & UMA_ZONE_MALLOC)
2898285809Sscottl			uma_dbg_free(zone, udata, item);
2899285809Sscottl		else
2900285809Sscottl			uma_dbg_free(zone, NULL, item);
2901285809Sscottl	}
2902285809Sscottl#endif
2903285809Sscottl	if (skip < SKIP_DTOR && zone->uz_dtor)
2904285809Sscottl		zone->uz_dtor(item, zone->uz_size, udata);
2905285809Sscottl
2906285809Sscottl	if (skip < SKIP_FINI && zone->uz_fini)
2907285809Sscottl		zone->uz_fini(item, zone->uz_size);
2908285809Sscottl
2909285809Sscottl	atomic_add_long(&zone->uz_frees, 1);
2910285809Sscottl	zone->uz_release(zone->uz_arg, &item, 1);
2911285809Sscottl}
2912285809Sscottl
2913285809Sscottl/* See uma.h */
2914285809Sscottlint
2915285809Sscottluma_zone_set_max(uma_zone_t zone, int nitems)
2916285809Sscottl{
2917285809Sscottl	uma_keg_t keg;
2918285809Sscottl
2919285809Sscottl	keg = zone_first_keg(zone);
2920285809Sscottl	if (keg == NULL)
2921285809Sscottl		return (0);
2922285809Sscottl	KEG_LOCK(keg);
2923285809Sscottl	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2924285809Sscottl	if (keg->uk_maxpages * keg->uk_ipers < nitems)
2925285809Sscottl		keg->uk_maxpages += keg->uk_ppera;
2926285809Sscottl	nitems = keg->uk_maxpages * keg->uk_ipers;
2927285809Sscottl	KEG_UNLOCK(keg);
2928285809Sscottl
2929285809Sscottl	return (nitems);
2930285809Sscottl}
2931285809Sscottl
2932285809Sscottl/* See uma.h */
2933285809Sscottlint
2934285809Sscottluma_zone_get_max(uma_zone_t zone)
2935285809Sscottl{
2936285809Sscottl	int nitems;
2937285809Sscottl	uma_keg_t keg;
2938285809Sscottl
2939285809Sscottl	keg = zone_first_keg(zone);
2940285809Sscottl	if (keg == NULL)
2941285809Sscottl		return (0);
2942285809Sscottl	KEG_LOCK(keg);
2943285809Sscottl	nitems = keg->uk_maxpages * keg->uk_ipers;
2944285809Sscottl	KEG_UNLOCK(keg);
2945285809Sscottl
2946285809Sscottl	return (nitems);
2947285809Sscottl}
2948285809Sscottl
2949285809Sscottl/* See uma.h */
2950285809Sscottlvoid
2951285809Sscottluma_zone_set_warning(uma_zone_t zone, const char *warning)
2952285809Sscottl{
2953285809Sscottl
2954285809Sscottl	ZONE_LOCK(zone);
2955285809Sscottl	zone->uz_warning = warning;
2956285809Sscottl	ZONE_UNLOCK(zone);
2957285809Sscottl}
2958285809Sscottl
2959285809Sscottl/* See uma.h */
2960285809Sscottlint
2961285809Sscottluma_zone_get_cur(uma_zone_t zone)
2962285809Sscottl{
2963285809Sscottl	int64_t nitems;
2964285809Sscottl	u_int i;
2965285809Sscottl
2966285809Sscottl	ZONE_LOCK(zone);
2967285809Sscottl	nitems = zone->uz_allocs - zone->uz_frees;
2968285809Sscottl	CPU_FOREACH(i) {
2969285809Sscottl		/*
2970285809Sscottl		 * See the comment in sysctl_vm_zone_stats() regarding the
2971285809Sscottl		 * safety of accessing the per-cpu caches. With the zone lock
2972285809Sscottl		 * held, it is safe, but can potentially result in stale data.
2973285809Sscottl		 */
2974285809Sscottl		nitems += zone->uz_cpu[i].uc_allocs -
2975285809Sscottl		    zone->uz_cpu[i].uc_frees;
2976285809Sscottl	}
2977285809Sscottl	ZONE_UNLOCK(zone);
2978285809Sscottl
2979285809Sscottl	return (nitems < 0 ? 0 : nitems);
2980285809Sscottl}
2981285809Sscottl
2982285809Sscottl/* See uma.h */
2983285809Sscottlvoid
2984285809Sscottluma_zone_set_init(uma_zone_t zone, uma_init uminit)
2985285809Sscottl{
2986285809Sscottl	uma_keg_t keg;
2987285809Sscottl
2988285809Sscottl	keg = zone_first_keg(zone);
2989285809Sscottl	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
2990285809Sscottl	KEG_LOCK(keg);
2991285809Sscottl	KASSERT(keg->uk_pages == 0,
2992285809Sscottl	    ("uma_zone_set_init on non-empty keg"));
2993285809Sscottl	keg->uk_init = uminit;
2994285809Sscottl	KEG_UNLOCK(keg);
2995285809Sscottl}
2996285809Sscottl
2997285809Sscottl/* See uma.h */
2998285809Sscottlvoid
2999285809Sscottluma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3000285809Sscottl{
3001285809Sscottl	uma_keg_t keg;
3002285809Sscottl
3003285809Sscottl	keg = zone_first_keg(zone);
3004285809Sscottl	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3005285809Sscottl	KEG_LOCK(keg);
3006285809Sscottl	KASSERT(keg->uk_pages == 0,
3007285809Sscottl	    ("uma_zone_set_fini on non-empty keg"));
3008285809Sscottl	keg->uk_fini = fini;
3009285809Sscottl	KEG_UNLOCK(keg);
3010285809Sscottl}
3011285809Sscottl
3012285809Sscottl/* See uma.h */
3013285809Sscottlvoid
3014285809Sscottluma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3015285809Sscottl{
3016285809Sscottl
3017285809Sscottl	ZONE_LOCK(zone);
3018285809Sscottl	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3019285809Sscottl	    ("uma_zone_set_zinit on non-empty keg"));
3020285809Sscottl	zone->uz_init = zinit;
3021285809Sscottl	ZONE_UNLOCK(zone);
3022285809Sscottl}
3023285809Sscottl
3024285809Sscottl/* See uma.h */
3025285809Sscottlvoid
3026285809Sscottluma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3027285809Sscottl{
3028285809Sscottl
3029285809Sscottl	ZONE_LOCK(zone);
3030285809Sscottl	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3031285809Sscottl	    ("uma_zone_set_zfini on non-empty keg"));
3032285809Sscottl	zone->uz_fini = zfini;
3033285809Sscottl	ZONE_UNLOCK(zone);
3034285809Sscottl}
3035285809Sscottl
3036285809Sscottl/* See uma.h */
3037285809Sscottl/* XXX uk_freef is not actually used with the zone locked */
3038285809Sscottlvoid
3039285809Sscottluma_zone_set_freef(uma_zone_t zone, uma_free freef)
3040285809Sscottl{
3041285809Sscottl	uma_keg_t keg;
3042285809Sscottl
3043285809Sscottl	keg = zone_first_keg(zone);
3044285809Sscottl	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3045285809Sscottl	KEG_LOCK(keg);
3046285809Sscottl	keg->uk_freef = freef;
3047285809Sscottl	KEG_UNLOCK(keg);
3048285809Sscottl}
3049285809Sscottl
3050285809Sscottl/* See uma.h */
3051285809Sscottl/* XXX uk_allocf is not actually used with the zone locked */
3052285809Sscottlvoid
3053285809Sscottluma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3054285809Sscottl{
3055285809Sscottl	uma_keg_t keg;
3056285809Sscottl
3057285809Sscottl	keg = zone_first_keg(zone);
3058285809Sscottl	KEG_LOCK(keg);
3059285809Sscottl	keg->uk_allocf = allocf;
3060285809Sscottl	KEG_UNLOCK(keg);
3061285809Sscottl}
3062285809Sscottl
3063285809Sscottl/* See uma.h */
3064285809Sscottlvoid
3065285809Sscottluma_zone_reserve(uma_zone_t zone, int items)
3066285809Sscottl{
3067285809Sscottl	uma_keg_t keg;
3068285809Sscottl
3069285809Sscottl	keg = zone_first_keg(zone);
3070285809Sscottl	if (keg == NULL)
3071285809Sscottl		return;
3072285809Sscottl	KEG_LOCK(keg);
3073285809Sscottl	keg->uk_reserve = items;
3074285809Sscottl	KEG_UNLOCK(keg);
3075285809Sscottl
3076285809Sscottl	return;
3077285809Sscottl}
3078285809Sscottl
3079285809Sscottl/* See uma.h */
3080285809Sscottlint
3081285809Sscottluma_zone_reserve_kva(uma_zone_t zone, int count)
3082285809Sscottl{
3083285809Sscottl	uma_keg_t keg;
3084285809Sscottl	vm_offset_t kva;
3085285809Sscottl	int pages;
3086285809Sscottl
3087285809Sscottl	keg = zone_first_keg(zone);
3088285809Sscottl	if (keg == NULL)
3089285809Sscottl		return (0);
3090285809Sscottl	pages = count / keg->uk_ipers;
3091285809Sscottl
3092285809Sscottl	if (pages * keg->uk_ipers < count)
3093285809Sscottl		pages++;
3094285809Sscottl
3095285809Sscottl#ifdef UMA_MD_SMALL_ALLOC
3096285809Sscottl	if (keg->uk_ppera > 1) {
3097285809Sscottl#else
3098285809Sscottl	if (1) {
3099285809Sscottl#endif
3100285809Sscottl		kva = kva_alloc(pages * UMA_SLAB_SIZE);
3101285809Sscottl		if (kva == 0)
3102285809Sscottl			return (0);
3103285809Sscottl	} else
3104285809Sscottl		kva = 0;
3105285809Sscottl	KEG_LOCK(keg);
3106285809Sscottl	keg->uk_kva = kva;
3107285809Sscottl	keg->uk_offset = 0;
3108285809Sscottl	keg->uk_maxpages = pages;
3109285809Sscottl#ifdef UMA_MD_SMALL_ALLOC
3110285809Sscottl	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3111285809Sscottl#else
3112285809Sscottl	keg->uk_allocf = noobj_alloc;
3113285809Sscottl#endif
3114285809Sscottl	keg->uk_flags |= UMA_ZONE_NOFREE;
3115285809Sscottl	KEG_UNLOCK(keg);
3116285809Sscottl
3117285809Sscottl	return (1);
3118285809Sscottl}
3119285809Sscottl
3120285809Sscottl/* See uma.h */
3121285809Sscottlvoid
3122285809Sscottluma_prealloc(uma_zone_t zone, int items)
3123285809Sscottl{
3124285809Sscottl	int slabs;
3125285809Sscottl	uma_slab_t slab;
3126285809Sscottl	uma_keg_t keg;
3127285809Sscottl
3128285809Sscottl	keg = zone_first_keg(zone);
3129285809Sscottl	if (keg == NULL)
3130285809Sscottl		return;
3131285809Sscottl	KEG_LOCK(keg);
3132285809Sscottl	slabs = items / keg->uk_ipers;
3133285809Sscottl	if (slabs * keg->uk_ipers < items)
3134285809Sscottl		slabs++;
3135285809Sscottl	while (slabs > 0) {
3136285809Sscottl		slab = keg_alloc_slab(keg, zone, M_WAITOK);
3137285809Sscottl		if (slab == NULL)
3138285809Sscottl			break;
3139285809Sscottl		MPASS(slab->us_keg == keg);
3140285809Sscottl		LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3141285809Sscottl		slabs--;
3142285809Sscottl	}
3143285809Sscottl	KEG_UNLOCK(keg);
3144285809Sscottl}
3145285809Sscottl
3146285809Sscottl/* See uma.h */
3147285809Sscottluint32_t *
3148285809Sscottluma_find_refcnt(uma_zone_t zone, void *item)
3149285809Sscottl{
3150285809Sscottl	uma_slabrefcnt_t slabref;
3151285809Sscottl	uma_slab_t slab;
3152285809Sscottl	uma_keg_t keg;
3153285809Sscottl	uint32_t *refcnt;
3154285809Sscottl	int idx;
3155285809Sscottl
3156285809Sscottl	slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
3157285809Sscottl	slabref = (uma_slabrefcnt_t)slab;
3158285809Sscottl	keg = slab->us_keg;
3159285809Sscottl	KASSERT(keg->uk_flags & UMA_ZONE_REFCNT,
3160285809Sscottl	    ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3161285809Sscottl	idx = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3162285809Sscottl	refcnt = &slabref->us_refcnt[idx];
3163285809Sscottl	return refcnt;
3164285809Sscottl}
3165285809Sscottl
3166285809Sscottl/* See uma.h */
3167285809Sscottlvoid
3168285809Sscottluma_reclaim(void)
3169285809Sscottl{
3170285809Sscottl#ifdef UMA_DEBUG
3171285809Sscottl	printf("UMA: vm asked us to release pages!\n");
3172285809Sscottl#endif
3173285809Sscottl	bucket_enable();
3174285809Sscottl	zone_foreach(zone_drain);
3175285809Sscottl	if (vm_page_count_min()) {
3176285809Sscottl		cache_drain_safe(NULL);
3177285809Sscottl		zone_foreach(zone_drain);
3178285809Sscottl	}
3179285809Sscottl	/*
3180285809Sscottl	 * Some slabs may have been freed but this zone will be visited early
3181285809Sscottl	 * we visit again so that we can free pages that are empty once other
3182285809Sscottl	 * zones are drained.  We have to do the same for buckets.
3183285809Sscottl	 */
3184285809Sscottl	zone_drain(slabzone);
3185285809Sscottl	zone_drain(slabrefzone);
3186285809Sscottl	bucket_zone_drain();
3187285809Sscottl}
3188285809Sscottl
3189285809Sscottl/* See uma.h */
3190285809Sscottlint
3191285809Sscottluma_zone_exhausted(uma_zone_t zone)
3192285809Sscottl{
3193285809Sscottl	int full;
3194285809Sscottl
3195285809Sscottl	ZONE_LOCK(zone);
3196285809Sscottl	full = (zone->uz_flags & UMA_ZFLAG_FULL);
3197285809Sscottl	ZONE_UNLOCK(zone);
3198285809Sscottl	return (full);
3199285809Sscottl}
3200285809Sscottl
3201285809Sscottlint
3202285809Sscottluma_zone_exhausted_nolock(uma_zone_t zone)
3203285809Sscottl{
3204285809Sscottl	return (zone->uz_flags & UMA_ZFLAG_FULL);
3205285809Sscottl}
3206285809Sscottl
3207285809Sscottlvoid *
3208285809Sscottluma_large_malloc(int size, int wait)
3209285809Sscottl{
3210285809Sscottl	void *mem;
3211285809Sscottl	uma_slab_t slab;
3212285809Sscottl	uint8_t flags;
3213285809Sscottl
3214285809Sscottl	slab = zone_alloc_item(slabzone, NULL, wait);
3215285809Sscottl	if (slab == NULL)
3216285809Sscottl		return (NULL);
3217285809Sscottl	mem = page_alloc(NULL, size, &flags, wait);
3218285809Sscottl	if (mem) {
3219285809Sscottl		vsetslab((vm_offset_t)mem, slab);
3220285809Sscottl		slab->us_data = mem;
3221285809Sscottl		slab->us_flags = flags | UMA_SLAB_MALLOC;
3222285809Sscottl		slab->us_size = size;
3223285809Sscottl	} else {
3224285809Sscottl		zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3225285809Sscottl	}
3226285809Sscottl
3227285809Sscottl	return (mem);
3228285809Sscottl}
3229285809Sscottl
3230285809Sscottlvoid
3231285809Sscottluma_large_free(uma_slab_t slab)
3232285809Sscottl{
3233285809Sscottl
3234285809Sscottl	page_free(slab->us_data, slab->us_size, slab->us_flags);
3235285809Sscottl	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3236285809Sscottl}
3237285809Sscottl
3238285809Sscottlstatic void
3239285809Sscottluma_zero_item(void *item, uma_zone_t zone)
3240285809Sscottl{
3241285809Sscottl
3242285809Sscottl	if (zone->uz_flags & UMA_ZONE_PCPU) {
3243285809Sscottl		for (int i = 0; i < mp_ncpus; i++)
3244285809Sscottl			bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3245285809Sscottl	} else
3246285809Sscottl		bzero(item, zone->uz_size);
3247285809Sscottl}
3248285809Sscottl
3249285809Sscottlvoid
3250285809Sscottluma_print_stats(void)
3251285809Sscottl{
3252285809Sscottl	zone_foreach(uma_print_zone);
3253285809Sscottl}
3254285809Sscottl
3255285809Sscottlstatic void
3256285809Sscottlslab_print(uma_slab_t slab)
3257285809Sscottl{
3258285809Sscottl	printf("slab: keg %p, data %p, freecount %d\n",
3259285809Sscottl		slab->us_keg, slab->us_data, slab->us_freecount);
3260285809Sscottl}
3261285809Sscottl
3262285809Sscottlstatic void
3263285809Sscottlcache_print(uma_cache_t cache)
3264285809Sscottl{
3265285809Sscottl	printf("alloc: %p(%d), free: %p(%d)\n",
3266285809Sscottl		cache->uc_allocbucket,
3267285809Sscottl		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3268285809Sscottl		cache->uc_freebucket,
3269285809Sscottl		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3270285809Sscottl}
3271285809Sscottl
3272285809Sscottlstatic void
3273285809Sscottluma_print_keg(uma_keg_t keg)
3274285809Sscottl{
3275285809Sscottl	uma_slab_t slab;
3276285809Sscottl
3277285809Sscottl	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3278285809Sscottl	    "out %d free %d limit %d\n",
3279285809Sscottl	    keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3280285809Sscottl	    keg->uk_ipers, keg->uk_ppera,
3281285809Sscottl	    (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3282285809Sscottl	    (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3283285809Sscottl	printf("Part slabs:\n");
3284285809Sscottl	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3285285809Sscottl		slab_print(slab);
3286285809Sscottl	printf("Free slabs:\n");
3287285809Sscottl	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3288285809Sscottl		slab_print(slab);
3289285809Sscottl	printf("Full slabs:\n");
3290285809Sscottl	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3291285809Sscottl		slab_print(slab);
3292285809Sscottl}
3293285809Sscottl
3294285809Sscottlvoid
3295285809Sscottluma_print_zone(uma_zone_t zone)
3296285809Sscottl{
3297285809Sscottl	uma_cache_t cache;
3298285809Sscottl	uma_klink_t kl;
3299285809Sscottl	int i;
3300285809Sscottl
3301285809Sscottl	printf("zone: %s(%p) size %d flags %#x\n",
3302285809Sscottl	    zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3303285809Sscottl	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3304285809Sscottl		uma_print_keg(kl->kl_keg);
3305285809Sscottl	CPU_FOREACH(i) {
3306285809Sscottl		cache = &zone->uz_cpu[i];
3307285809Sscottl		printf("CPU %d Cache:\n", i);
3308285809Sscottl		cache_print(cache);
3309285809Sscottl	}
3310285809Sscottl}
3311285809Sscottl
3312285809Sscottl#ifdef DDB
3313285809Sscottl/*
3314285809Sscottl * Generate statistics across both the zone and its per-cpu cache's.  Return
3315285809Sscottl * desired statistics if the pointer is non-NULL for that statistic.
3316285809Sscottl *
3317285809Sscottl * Note: does not update the zone statistics, as it can't safely clear the
3318285809Sscottl * per-CPU cache statistic.
3319285809Sscottl *
3320285809Sscottl * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3321285809Sscottl * safe from off-CPU; we should modify the caches to track this information
3322285809Sscottl * directly so that we don't have to.
3323285809Sscottl */
3324285809Sscottlstatic void
3325285809Sscottluma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3326285809Sscottl    uint64_t *freesp, uint64_t *sleepsp)
3327285809Sscottl{
3328285809Sscottl	uma_cache_t cache;
3329285809Sscottl	uint64_t allocs, frees, sleeps;
3330285809Sscottl	int cachefree, cpu;
3331285809Sscottl
3332285809Sscottl	allocs = frees = sleeps = 0;
3333285809Sscottl	cachefree = 0;
3334285809Sscottl	CPU_FOREACH(cpu) {
3335285809Sscottl		cache = &z->uz_cpu[cpu];
3336285809Sscottl		if (cache->uc_allocbucket != NULL)
3337285809Sscottl			cachefree += cache->uc_allocbucket->ub_cnt;
3338285809Sscottl		if (cache->uc_freebucket != NULL)
3339285809Sscottl			cachefree += cache->uc_freebucket->ub_cnt;
3340285809Sscottl		allocs += cache->uc_allocs;
3341285809Sscottl		frees += cache->uc_frees;
3342285809Sscottl	}
3343285809Sscottl	allocs += z->uz_allocs;
3344285809Sscottl	frees += z->uz_frees;
3345285809Sscottl	sleeps += z->uz_sleeps;
3346285809Sscottl	if (cachefreep != NULL)
3347285809Sscottl		*cachefreep = cachefree;
3348285809Sscottl	if (allocsp != NULL)
3349285809Sscottl		*allocsp = allocs;
3350285809Sscottl	if (freesp != NULL)
3351285809Sscottl		*freesp = frees;
3352285809Sscottl	if (sleepsp != NULL)
3353285809Sscottl		*sleepsp = sleeps;
3354285809Sscottl}
3355285809Sscottl#endif /* DDB */
3356285809Sscottl
3357285809Sscottlstatic int
3358285809Sscottlsysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3359285809Sscottl{
3360285809Sscottl	uma_keg_t kz;
3361285809Sscottl	uma_zone_t z;
3362285809Sscottl	int count;
3363285809Sscottl
3364285809Sscottl	count = 0;
3365285809Sscottl	mtx_lock(&uma_mtx);
3366285809Sscottl	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3367285809Sscottl		LIST_FOREACH(z, &kz->uk_zones, uz_link)
3368285809Sscottl			count++;
3369285809Sscottl	}
3370285809Sscottl	mtx_unlock(&uma_mtx);
3371285809Sscottl	return (sysctl_handle_int(oidp, &count, 0, req));
3372285809Sscottl}
3373285809Sscottl
3374285809Sscottlstatic int
3375285809Sscottlsysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3376285809Sscottl{
3377285809Sscottl	struct uma_stream_header ush;
3378285809Sscottl	struct uma_type_header uth;
3379285809Sscottl	struct uma_percpu_stat ups;
3380285809Sscottl	uma_bucket_t bucket;
3381285809Sscottl	struct sbuf sbuf;
3382285809Sscottl	uma_cache_t cache;
3383285809Sscottl	uma_klink_t kl;
3384285809Sscottl	uma_keg_t kz;
3385285809Sscottl	uma_zone_t z;
3386285809Sscottl	uma_keg_t k;
3387285809Sscottl	int count, error, i;
3388285809Sscottl
3389285809Sscottl	error = sysctl_wire_old_buffer(req, 0);
3390285809Sscottl	if (error != 0)
3391285809Sscottl		return (error);
3392285809Sscottl	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3393285809Sscottl
3394285809Sscottl	count = 0;
3395285809Sscottl	mtx_lock(&uma_mtx);
3396285809Sscottl	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3397285809Sscottl		LIST_FOREACH(z, &kz->uk_zones, uz_link)
3398285809Sscottl			count++;
3399285809Sscottl	}
3400285809Sscottl
3401285809Sscottl	/*
3402285809Sscottl	 * Insert stream header.
3403285809Sscottl	 */
3404285809Sscottl	bzero(&ush, sizeof(ush));
3405285809Sscottl	ush.ush_version = UMA_STREAM_VERSION;
3406285809Sscottl	ush.ush_maxcpus = (mp_maxid + 1);
3407285809Sscottl	ush.ush_count = count;
3408285809Sscottl	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3409285809Sscottl
3410285809Sscottl	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3411285809Sscottl		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3412285809Sscottl			bzero(&uth, sizeof(uth));
3413285809Sscottl			ZONE_LOCK(z);
3414285809Sscottl			strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3415285809Sscottl			uth.uth_align = kz->uk_align;
3416285809Sscottl			uth.uth_size = kz->uk_size;
3417285809Sscottl			uth.uth_rsize = kz->uk_rsize;
3418285809Sscottl			LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3419285809Sscottl				k = kl->kl_keg;
3420285809Sscottl				uth.uth_maxpages += k->uk_maxpages;
3421285809Sscottl				uth.uth_pages += k->uk_pages;
3422285809Sscottl				uth.uth_keg_free += k->uk_free;
3423285809Sscottl				uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3424285809Sscottl				    * k->uk_ipers;
3425285809Sscottl			}
3426285809Sscottl
3427285809Sscottl			/*
3428285809Sscottl			 * A zone is secondary is it is not the first entry
3429285809Sscottl			 * on the keg's zone list.
3430285809Sscottl			 */
3431285809Sscottl			if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3432285809Sscottl			    (LIST_FIRST(&kz->uk_zones) != z))
3433285809Sscottl				uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3434285809Sscottl
3435285809Sscottl			LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3436285809Sscottl				uth.uth_zone_free += bucket->ub_cnt;
3437285809Sscottl			uth.uth_allocs = z->uz_allocs;
3438285809Sscottl			uth.uth_frees = z->uz_frees;
3439285809Sscottl			uth.uth_fails = z->uz_fails;
3440285809Sscottl			uth.uth_sleeps = z->uz_sleeps;
3441285809Sscottl			(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3442285809Sscottl			/*
3443285809Sscottl			 * While it is not normally safe to access the cache
3444285809Sscottl			 * bucket pointers while not on the CPU that owns the
3445285809Sscottl			 * cache, we only allow the pointers to be exchanged
3446285809Sscottl			 * without the zone lock held, not invalidated, so
3447285809Sscottl			 * accept the possible race associated with bucket
3448285809Sscottl			 * exchange during monitoring.
3449285809Sscottl			 */
3450285809Sscottl			for (i = 0; i < (mp_maxid + 1); i++) {
3451285809Sscottl				bzero(&ups, sizeof(ups));
3452285809Sscottl				if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3453285809Sscottl					goto skip;
3454285809Sscottl				if (CPU_ABSENT(i))
3455285809Sscottl					goto skip;
3456285809Sscottl				cache = &z->uz_cpu[i];
3457285809Sscottl				if (cache->uc_allocbucket != NULL)
3458285809Sscottl					ups.ups_cache_free +=
3459285809Sscottl					    cache->uc_allocbucket->ub_cnt;
3460285809Sscottl				if (cache->uc_freebucket != NULL)
3461285809Sscottl					ups.ups_cache_free +=
3462285809Sscottl					    cache->uc_freebucket->ub_cnt;
3463285809Sscottl				ups.ups_allocs = cache->uc_allocs;
3464285809Sscottl				ups.ups_frees = cache->uc_frees;
3465285809Sscottlskip:
3466285809Sscottl				(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3467285809Sscottl			}
3468285809Sscottl			ZONE_UNLOCK(z);
3469285809Sscottl		}
3470285809Sscottl	}
3471285809Sscottl	mtx_unlock(&uma_mtx);
3472285809Sscottl	error = sbuf_finish(&sbuf);
3473285809Sscottl	sbuf_delete(&sbuf);
3474285809Sscottl	return (error);
3475285809Sscottl}
3476285809Sscottl
3477285809Sscottlint
3478285809Sscottlsysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3479285809Sscottl{
3480285809Sscottl	uma_zone_t zone = *(uma_zone_t *)arg1;
3481285809Sscottl	int error, max, old;
3482285809Sscottl
3483285809Sscottl	old = max = uma_zone_get_max(zone);
3484285809Sscottl	error = sysctl_handle_int(oidp, &max, 0, req);
3485285809Sscottl	if (error || !req->newptr)
3486285809Sscottl		return (error);
3487285809Sscottl
3488285809Sscottl	if (max < old)
3489285809Sscottl		return (EINVAL);
3490285809Sscottl
3491285809Sscottl	uma_zone_set_max(zone, max);
3492285809Sscottl
3493285809Sscottl	return (0);
3494285809Sscottl}
3495285809Sscottl
3496285809Sscottlint
3497285809Sscottlsysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3498285809Sscottl{
3499285809Sscottl	uma_zone_t zone = *(uma_zone_t *)arg1;
3500285809Sscottl	int cur;
3501285809Sscottl
3502285809Sscottl	cur = uma_zone_get_cur(zone);
3503285809Sscottl	return (sysctl_handle_int(oidp, &cur, 0, req));
3504285809Sscottl}
3505285809Sscottl
3506285809Sscottl#ifdef DDB
3507285809SscottlDB_SHOW_COMMAND(uma, db_show_uma)
3508285809Sscottl{
3509285809Sscottl	uint64_t allocs, frees, sleeps;
3510285809Sscottl	uma_bucket_t bucket;
3511285809Sscottl	uma_keg_t kz;
3512285809Sscottl	uma_zone_t z;
3513285809Sscottl	int cachefree;
3514285809Sscottl
3515285809Sscottl	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3516285809Sscottl	    "Free", "Requests", "Sleeps", "Bucket");
3517285809Sscottl	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3518285809Sscottl		LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3519285809Sscottl			if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3520285809Sscottl				allocs = z->uz_allocs;
3521285809Sscottl				frees = z->uz_frees;
3522285809Sscottl				sleeps = z->uz_sleeps;
3523285809Sscottl				cachefree = 0;
3524285809Sscottl			} else
3525285809Sscottl				uma_zone_sumstat(z, &cachefree, &allocs,
3526285809Sscottl				    &frees, &sleeps);
3527285809Sscottl			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3528285809Sscottl			    (LIST_FIRST(&kz->uk_zones) != z)))
3529285809Sscottl				cachefree += kz->uk_free;
3530285809Sscottl			LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3531285809Sscottl				cachefree += bucket->ub_cnt;
3532285809Sscottl			db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3533285809Sscottl			    z->uz_name, (uintmax_t)kz->uk_size,
3534285809Sscottl			    (intmax_t)(allocs - frees), cachefree,
3535285809Sscottl			    (uintmax_t)allocs, sleeps, z->uz_count);
3536285809Sscottl			if (db_pager_quit)
3537285809Sscottl				return;
3538285809Sscottl		}
3539285809Sscottl	}
3540285809Sscottl}
3541285809Sscottl
3542285809SscottlDB_SHOW_COMMAND(umacache, db_show_umacache)
3543285809Sscottl{
3544285809Sscottl	uint64_t allocs, frees;
3545285809Sscottl	uma_bucket_t bucket;
3546285809Sscottl	uma_zone_t z;
3547285809Sscottl	int cachefree;
3548285809Sscottl
3549285809Sscottl	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3550285809Sscottl	    "Requests", "Bucket");
3551285809Sscottl	LIST_FOREACH(z, &uma_cachezones, uz_link) {
3552285809Sscottl		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3553285809Sscottl		LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3554285809Sscottl			cachefree += bucket->ub_cnt;
3555285809Sscottl		db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3556285809Sscottl		    z->uz_name, (uintmax_t)z->uz_size,
3557285809Sscottl		    (intmax_t)(allocs - frees), cachefree,
3558285809Sscottl		    (uintmax_t)allocs, z->uz_count);
3559285809Sscottl		if (db_pager_quit)
3560285809Sscottl			return;
3561285809Sscottl	}
3562285809Sscottl}
3563285809Sscottl#endif
3564285809Sscottl