1170477Salc/*-
2170477Salc * Copyright (c) 2002-2006 Rice University
3170477Salc * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
4170477Salc * All rights reserved.
5170477Salc *
6170477Salc * This software was developed for the FreeBSD Project by Alan L. Cox,
7170477Salc * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
8170477Salc *
9170477Salc * Redistribution and use in source and binary forms, with or without
10170477Salc * modification, are permitted provided that the following conditions
11170477Salc * are met:
12170477Salc * 1. Redistributions of source code must retain the above copyright
13170477Salc *    notice, this list of conditions and the following disclaimer.
14170477Salc * 2. Redistributions in binary form must reproduce the above copyright
15170477Salc *    notice, this list of conditions and the following disclaimer in the
16170477Salc *    documentation and/or other materials provided with the distribution.
17170477Salc *
18170477Salc * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19170477Salc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20170477Salc * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21170477Salc * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22170477Salc * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23170477Salc * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24170477Salc * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25170477Salc * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26170477Salc * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27170477Salc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
28170477Salc * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29170477Salc * POSSIBILITY OF SUCH DAMAGE.
30170477Salc */
31170477Salc
32262933Sdumbbell/*
33262933Sdumbbell *	Physical memory system implementation
34262933Sdumbbell *
35262933Sdumbbell * Any external functions defined by this module are only to be used by the
36262933Sdumbbell * virtual memory system.
37262933Sdumbbell */
38262933Sdumbbell
39170477Salc#include <sys/cdefs.h>
40170477Salc__FBSDID("$FreeBSD$");
41170477Salc
42170477Salc#include "opt_ddb.h"
43170477Salc
44170477Salc#include <sys/param.h>
45170477Salc#include <sys/systm.h>
46170477Salc#include <sys/lock.h>
47170477Salc#include <sys/kernel.h>
48170477Salc#include <sys/malloc.h>
49170477Salc#include <sys/mutex.h>
50170477Salc#include <sys/queue.h>
51170477Salc#include <sys/sbuf.h>
52170477Salc#include <sys/sysctl.h>
53170477Salc#include <sys/vmmeter.h>
54170477Salc
55170477Salc#include <ddb/ddb.h>
56170477Salc
57170477Salc#include <vm/vm.h>
58170477Salc#include <vm/vm_param.h>
59170477Salc#include <vm/vm_kern.h>
60170477Salc#include <vm/vm_object.h>
61170477Salc#include <vm/vm_page.h>
62170477Salc#include <vm/vm_phys.h>
63170477Salc
64210550Sjhb/*
65210550Sjhb * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
66210550Sjhb * domain.  These extra lists are stored at the end of the regular
67210550Sjhb * free lists starting with VM_NFREELIST.
68210550Sjhb */
69210550Sjhb#define VM_RAW_NFREELIST	(VM_NFREELIST + VM_NDOMAIN - 1)
70210550Sjhb
71170477Salcstruct vm_freelist {
72170477Salc	struct pglist pl;
73170477Salc	int lcnt;
74170477Salc};
75170477Salc
76170477Salcstruct vm_phys_seg {
77170477Salc	vm_paddr_t	start;
78170477Salc	vm_paddr_t	end;
79170477Salc	vm_page_t	first_page;
80210550Sjhb	int		domain;
81170477Salc	struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
82170477Salc};
83170477Salc
84210550Sjhbstruct mem_affinity *mem_affinity;
85210550Sjhb
86170477Salcstatic struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
87170477Salc
88170477Salcstatic int vm_phys_nsegs;
89170477Salc
90236924Skib#define VM_PHYS_FICTITIOUS_NSEGS	8
91236924Skibstatic struct vm_phys_fictitious_seg {
92236924Skib	vm_paddr_t	start;
93236924Skib	vm_paddr_t	end;
94236924Skib	vm_page_t	first_page;
95236924Skib} vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS];
96236924Skibstatic struct mtx vm_phys_fictitious_reg_mtx;
97254312SmarkjMALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages");
98236924Skib
99170477Salcstatic struct vm_freelist
100210550Sjhb    vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
101210550Sjhbstatic struct vm_freelist
102210550Sjhb(*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
103170477Salc
104170477Salcstatic int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
105170477Salc
106170477Salcstatic int cnt_prezero;
107170477SalcSYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
108170477Salc    &cnt_prezero, 0, "The number of physical pages prezeroed at idle time");
109170477Salc
110170477Salcstatic int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS);
111170477SalcSYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD,
112170477Salc    NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info");
113170477Salc
114170477Salcstatic int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
115170477SalcSYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
116170477Salc    NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
117170477Salc
118210550Sjhb#if VM_NDOMAIN > 1
119210550Sjhbstatic int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
120210550SjhbSYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
121210550Sjhb    NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
122210550Sjhb#endif
123210550Sjhb
124251179Sjhbstatic vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
125251179Sjhb    int order);
126210550Sjhbstatic void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
127210550Sjhb    int domain);
128170477Salcstatic void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
129170477Salcstatic int vm_phys_paddr_to_segind(vm_paddr_t pa);
130170477Salcstatic void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
131170477Salc    int order);
132170477Salc
133170477Salc/*
134170477Salc * Outputs the state of the physical memory allocator, specifically,
135170477Salc * the amount of physical memory in each free list.
136170477Salc */
137170477Salcstatic int
138170477Salcsysctl_vm_phys_free(SYSCTL_HANDLER_ARGS)
139170477Salc{
140170477Salc	struct sbuf sbuf;
141170477Salc	struct vm_freelist *fl;
142170477Salc	int error, flind, oind, pind;
143170477Salc
144217916Smdf	error = sysctl_wire_old_buffer(req, 0);
145217916Smdf	if (error != 0)
146217916Smdf		return (error);
147212750Smdf	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
148170477Salc	for (flind = 0; flind < vm_nfreelists; flind++) {
149170477Salc		sbuf_printf(&sbuf, "\nFREE LIST %d:\n"
150170477Salc		    "\n  ORDER (SIZE)  |  NUMBER"
151170477Salc		    "\n              ", flind);
152170477Salc		for (pind = 0; pind < VM_NFREEPOOL; pind++)
153170477Salc			sbuf_printf(&sbuf, "  |  POOL %d", pind);
154170477Salc		sbuf_printf(&sbuf, "\n--            ");
155170477Salc		for (pind = 0; pind < VM_NFREEPOOL; pind++)
156170477Salc			sbuf_printf(&sbuf, "-- --      ");
157170477Salc		sbuf_printf(&sbuf, "--\n");
158170477Salc		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
159214564Salc			sbuf_printf(&sbuf, "  %2d (%6dK)", oind,
160170477Salc			    1 << (PAGE_SHIFT - 10 + oind));
161170477Salc			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
162170477Salc				fl = vm_phys_free_queues[flind][pind];
163214564Salc				sbuf_printf(&sbuf, "  |  %6d", fl[oind].lcnt);
164170477Salc			}
165170477Salc			sbuf_printf(&sbuf, "\n");
166170477Salc		}
167170477Salc	}
168212750Smdf	error = sbuf_finish(&sbuf);
169170477Salc	sbuf_delete(&sbuf);
170170477Salc	return (error);
171170477Salc}
172170477Salc
173170477Salc/*
174170477Salc * Outputs the set of physical memory segments.
175170477Salc */
176170477Salcstatic int
177170477Salcsysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
178170477Salc{
179170477Salc	struct sbuf sbuf;
180170477Salc	struct vm_phys_seg *seg;
181170477Salc	int error, segind;
182170477Salc
183217916Smdf	error = sysctl_wire_old_buffer(req, 0);
184217916Smdf	if (error != 0)
185217916Smdf		return (error);
186212750Smdf	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
187170477Salc	for (segind = 0; segind < vm_phys_nsegs; segind++) {
188170477Salc		sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind);
189170477Salc		seg = &vm_phys_segs[segind];
190170477Salc		sbuf_printf(&sbuf, "start:     %#jx\n",
191170477Salc		    (uintmax_t)seg->start);
192170477Salc		sbuf_printf(&sbuf, "end:       %#jx\n",
193170477Salc		    (uintmax_t)seg->end);
194210550Sjhb		sbuf_printf(&sbuf, "domain:    %d\n", seg->domain);
195170477Salc		sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
196170477Salc	}
197212750Smdf	error = sbuf_finish(&sbuf);
198170477Salc	sbuf_delete(&sbuf);
199170477Salc	return (error);
200170477Salc}
201170477Salc
202210550Sjhb#if VM_NDOMAIN > 1
203170477Salc/*
204210550Sjhb * Outputs the set of free list lookup lists.
205210550Sjhb */
206210550Sjhbstatic int
207210550Sjhbsysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
208210550Sjhb{
209210550Sjhb	struct sbuf sbuf;
210210550Sjhb	int domain, error, flind, ndomains;
211210550Sjhb
212217916Smdf	error = sysctl_wire_old_buffer(req, 0);
213217916Smdf	if (error != 0)
214217916Smdf		return (error);
215217916Smdf	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
216210550Sjhb	ndomains = vm_nfreelists - VM_NFREELIST + 1;
217210550Sjhb	for (domain = 0; domain < ndomains; domain++) {
218210550Sjhb		sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
219210550Sjhb		for (flind = 0; flind < vm_nfreelists; flind++)
220210550Sjhb			sbuf_printf(&sbuf, "  [%d]:\t%p\n", flind,
221210550Sjhb			    vm_phys_lookup_lists[domain][flind]);
222210550Sjhb	}
223212750Smdf	error = sbuf_finish(&sbuf);
224210550Sjhb	sbuf_delete(&sbuf);
225210550Sjhb	return (error);
226210550Sjhb}
227210550Sjhb#endif
228210550Sjhb
229210550Sjhb/*
230170477Salc * Create a physical memory segment.
231170477Salc */
232170477Salcstatic void
233210550Sjhb_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
234170477Salc{
235170477Salc	struct vm_phys_seg *seg;
236170477Salc#ifdef VM_PHYSSEG_SPARSE
237170477Salc	long pages;
238170477Salc	int segind;
239170477Salc
240170477Salc	pages = 0;
241170477Salc	for (segind = 0; segind < vm_phys_nsegs; segind++) {
242170477Salc		seg = &vm_phys_segs[segind];
243170477Salc		pages += atop(seg->end - seg->start);
244170477Salc	}
245170477Salc#endif
246170477Salc	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
247170477Salc	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
248170477Salc	seg = &vm_phys_segs[vm_phys_nsegs++];
249170477Salc	seg->start = start;
250170477Salc	seg->end = end;
251210550Sjhb	seg->domain = domain;
252170477Salc#ifdef VM_PHYSSEG_SPARSE
253170477Salc	seg->first_page = &vm_page_array[pages];
254170477Salc#else
255170477Salc	seg->first_page = PHYS_TO_VM_PAGE(start);
256170477Salc#endif
257210550Sjhb#if VM_NDOMAIN > 1
258210550Sjhb	if (flind == VM_FREELIST_DEFAULT && domain != 0) {
259210550Sjhb		flind = VM_NFREELIST + (domain - 1);
260210550Sjhb		if (flind >= vm_nfreelists)
261210550Sjhb			vm_nfreelists = flind + 1;
262210550Sjhb	}
263210550Sjhb#endif
264170477Salc	seg->free_queues = &vm_phys_free_queues[flind];
265170477Salc}
266170477Salc
267210550Sjhbstatic void
268210550Sjhbvm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
269210550Sjhb{
270210550Sjhb	int i;
271210550Sjhb
272210550Sjhb	if (mem_affinity == NULL) {
273210550Sjhb		_vm_phys_create_seg(start, end, flind, 0);
274210550Sjhb		return;
275210550Sjhb	}
276210550Sjhb
277210550Sjhb	for (i = 0;; i++) {
278210550Sjhb		if (mem_affinity[i].end == 0)
279210550Sjhb			panic("Reached end of affinity info");
280210550Sjhb		if (mem_affinity[i].end <= start)
281210550Sjhb			continue;
282210550Sjhb		if (mem_affinity[i].start > start)
283210550Sjhb			panic("No affinity info for start %jx",
284210550Sjhb			    (uintmax_t)start);
285210550Sjhb		if (mem_affinity[i].end >= end) {
286210550Sjhb			_vm_phys_create_seg(start, end, flind,
287210550Sjhb			    mem_affinity[i].domain);
288210550Sjhb			break;
289210550Sjhb		}
290210550Sjhb		_vm_phys_create_seg(start, mem_affinity[i].end, flind,
291210550Sjhb		    mem_affinity[i].domain);
292210550Sjhb		start = mem_affinity[i].end;
293210550Sjhb	}
294210550Sjhb}
295210550Sjhb
296170477Salc/*
297170477Salc * Initialize the physical memory allocator.
298170477Salc */
299170477Salcvoid
300170477Salcvm_phys_init(void)
301170477Salc{
302170477Salc	struct vm_freelist *fl;
303170477Salc	int flind, i, oind, pind;
304210550Sjhb#if VM_NDOMAIN > 1
305210550Sjhb	int ndomains, j;
306210550Sjhb#endif
307170477Salc
308170477Salc	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
309170477Salc#ifdef	VM_FREELIST_ISADMA
310170477Salc		if (phys_avail[i] < 16777216) {
311170477Salc			if (phys_avail[i + 1] > 16777216) {
312170477Salc				vm_phys_create_seg(phys_avail[i], 16777216,
313170477Salc				    VM_FREELIST_ISADMA);
314170477Salc				vm_phys_create_seg(16777216, phys_avail[i + 1],
315170477Salc				    VM_FREELIST_DEFAULT);
316170477Salc			} else {
317170477Salc				vm_phys_create_seg(phys_avail[i],
318170477Salc				    phys_avail[i + 1], VM_FREELIST_ISADMA);
319170477Salc			}
320170477Salc			if (VM_FREELIST_ISADMA >= vm_nfreelists)
321170477Salc				vm_nfreelists = VM_FREELIST_ISADMA + 1;
322170477Salc		} else
323170477Salc#endif
324170477Salc#ifdef	VM_FREELIST_HIGHMEM
325170477Salc		if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) {
326170477Salc			if (phys_avail[i] < VM_HIGHMEM_ADDRESS) {
327170477Salc				vm_phys_create_seg(phys_avail[i],
328170477Salc				    VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT);
329170477Salc				vm_phys_create_seg(VM_HIGHMEM_ADDRESS,
330170477Salc				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
331170477Salc			} else {
332170477Salc				vm_phys_create_seg(phys_avail[i],
333170477Salc				    phys_avail[i + 1], VM_FREELIST_HIGHMEM);
334170477Salc			}
335170477Salc			if (VM_FREELIST_HIGHMEM >= vm_nfreelists)
336170477Salc				vm_nfreelists = VM_FREELIST_HIGHMEM + 1;
337170477Salc		} else
338170477Salc#endif
339170477Salc		vm_phys_create_seg(phys_avail[i], phys_avail[i + 1],
340170477Salc		    VM_FREELIST_DEFAULT);
341170477Salc	}
342170477Salc	for (flind = 0; flind < vm_nfreelists; flind++) {
343170477Salc		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
344170477Salc			fl = vm_phys_free_queues[flind][pind];
345170477Salc			for (oind = 0; oind < VM_NFREEORDER; oind++)
346170477Salc				TAILQ_INIT(&fl[oind].pl);
347170477Salc		}
348170477Salc	}
349210550Sjhb#if VM_NDOMAIN > 1
350210550Sjhb	/*
351210550Sjhb	 * Build a free list lookup list for each domain.  All of the
352210550Sjhb	 * memory domain lists are inserted at the VM_FREELIST_DEFAULT
353210550Sjhb	 * index in a round-robin order starting with the current
354210550Sjhb	 * domain.
355210550Sjhb	 */
356210550Sjhb	ndomains = vm_nfreelists - VM_NFREELIST + 1;
357210550Sjhb	for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
358210550Sjhb		for (i = 0; i < ndomains; i++)
359210550Sjhb			vm_phys_lookup_lists[i][flind] =
360210550Sjhb			    &vm_phys_free_queues[flind];
361210550Sjhb	for (i = 0; i < ndomains; i++)
362210550Sjhb		for (j = 0; j < ndomains; j++) {
363210550Sjhb			flind = (i + j) % ndomains;
364210550Sjhb			if (flind == 0)
365210550Sjhb				flind = VM_FREELIST_DEFAULT;
366210550Sjhb			else
367210550Sjhb				flind += VM_NFREELIST - 1;
368210550Sjhb			vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
369210550Sjhb			    &vm_phys_free_queues[flind];
370210550Sjhb		}
371210550Sjhb	for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
372210550Sjhb	     flind++)
373210550Sjhb		for (i = 0; i < ndomains; i++)
374210550Sjhb			vm_phys_lookup_lists[i][flind + ndomains - 1] =
375210550Sjhb			    &vm_phys_free_queues[flind];
376210550Sjhb#else
377210550Sjhb	for (flind = 0; flind < vm_nfreelists; flind++)
378210550Sjhb		vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
379210550Sjhb#endif
380236924Skib
381236924Skib	mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF);
382170477Salc}
383170477Salc
384170477Salc/*
385170477Salc * Split a contiguous, power of two-sized set of physical pages.
386170477Salc */
387170477Salcstatic __inline void
388170477Salcvm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
389170477Salc{
390170477Salc	vm_page_t m_buddy;
391170477Salc
392170477Salc	while (oind > order) {
393170477Salc		oind--;
394170477Salc		m_buddy = &m[1 << oind];
395170477Salc		KASSERT(m_buddy->order == VM_NFREEORDER,
396170477Salc		    ("vm_phys_split_pages: page %p has unexpected order %d",
397170477Salc		    m_buddy, m_buddy->order));
398170477Salc		m_buddy->order = oind;
399170477Salc		TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq);
400170477Salc		fl[oind].lcnt++;
401170477Salc        }
402170477Salc}
403170477Salc
404170477Salc/*
405170477Salc * Initialize a physical page and add it to the free lists.
406170477Salc */
407170477Salcvoid
408170477Salcvm_phys_add_page(vm_paddr_t pa)
409170477Salc{
410170477Salc	vm_page_t m;
411170477Salc
412170477Salc	cnt.v_page_count++;
413170477Salc	m = vm_phys_paddr_to_vm_page(pa);
414170477Salc	m->phys_addr = pa;
415217508Salc	m->queue = PQ_NONE;
416170477Salc	m->segind = vm_phys_paddr_to_segind(pa);
417170477Salc	m->flags = PG_FREE;
418170477Salc	KASSERT(m->order == VM_NFREEORDER,
419170477Salc	    ("vm_phys_add_page: page %p has unexpected order %d",
420170477Salc	    m, m->order));
421170477Salc	m->pool = VM_FREEPOOL_DEFAULT;
422170477Salc	pmap_page_init(m);
423171451Salc	mtx_lock(&vm_page_queue_free_mtx);
424172317Salc	cnt.v_free_count++;
425170477Salc	vm_phys_free_pages(m, 0);
426171451Salc	mtx_unlock(&vm_page_queue_free_mtx);
427170477Salc}
428170477Salc
429170477Salc/*
430170477Salc * Allocate a contiguous, power of two-sized set of physical pages
431170477Salc * from the free lists.
432171451Salc *
433171451Salc * The free page queues must be locked.
434170477Salc */
435170477Salcvm_page_t
436170477Salcvm_phys_alloc_pages(int pool, int order)
437170477Salc{
438210327Sjchandra	vm_page_t m;
439251179Sjhb	int domain, flind;
440210327Sjchandra
441251179Sjhb	KASSERT(pool < VM_NFREEPOOL,
442251179Sjhb	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
443251179Sjhb	KASSERT(order < VM_NFREEORDER,
444251179Sjhb	    ("vm_phys_alloc_pages: order %d is out of range", order));
445251179Sjhb
446251179Sjhb#if VM_NDOMAIN > 1
447251179Sjhb	domain = PCPU_GET(domain);
448251179Sjhb#else
449251179Sjhb	domain = 0;
450251179Sjhb#endif
451210327Sjchandra	for (flind = 0; flind < vm_nfreelists; flind++) {
452251179Sjhb		m = vm_phys_alloc_domain_pages(domain, flind, pool, order);
453210327Sjchandra		if (m != NULL)
454210327Sjchandra			return (m);
455210327Sjchandra	}
456210327Sjchandra	return (NULL);
457210327Sjchandra}
458210327Sjchandra
459210327Sjchandra/*
460210327Sjchandra * Find and dequeue a free page on the given free list, with the
461210327Sjchandra * specified pool and order
462210327Sjchandra */
463210327Sjchandravm_page_t
464210327Sjchandravm_phys_alloc_freelist_pages(int flind, int pool, int order)
465251179Sjhb{
466251179Sjhb#if VM_NDOMAIN > 1
467170477Salc	vm_page_t m;
468251179Sjhb	int i, ndomains;
469251179Sjhb#endif
470251179Sjhb	int domain;
471170477Salc
472210327Sjchandra	KASSERT(flind < VM_NFREELIST,
473210327Sjchandra	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind));
474170477Salc	KASSERT(pool < VM_NFREEPOOL,
475210327Sjchandra	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
476170477Salc	KASSERT(order < VM_NFREEORDER,
477210327Sjchandra	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
478210550Sjhb
479210550Sjhb#if VM_NDOMAIN > 1
480251179Sjhb	/*
481251179Sjhb	 * This routine expects to be called with a VM_FREELIST_* constant.
482251179Sjhb	 * On a system with multiple domains we need to adjust the flind
483251179Sjhb	 * appropriately.  If it is for VM_FREELIST_DEFAULT we need to
484251179Sjhb	 * iterate over the per-domain lists.
485251179Sjhb	 */
486210550Sjhb	domain = PCPU_GET(domain);
487251179Sjhb	ndomains = vm_nfreelists - VM_NFREELIST + 1;
488251179Sjhb	if (flind == VM_FREELIST_DEFAULT) {
489251179Sjhb		m = NULL;
490251179Sjhb		for (i = 0; i < ndomains; i++, flind++) {
491251179Sjhb			m = vm_phys_alloc_domain_pages(domain, flind, pool,
492251179Sjhb			    order);
493251179Sjhb			if (m != NULL)
494251179Sjhb				break;
495251179Sjhb		}
496251179Sjhb		return (m);
497251179Sjhb	} else if (flind > VM_FREELIST_DEFAULT)
498251179Sjhb		flind += ndomains - 1;
499210550Sjhb#else
500210550Sjhb	domain = 0;
501210550Sjhb#endif
502251179Sjhb	return (vm_phys_alloc_domain_pages(domain, flind, pool, order));
503251179Sjhb}
504251179Sjhb
505251179Sjhbstatic vm_page_t
506251179Sjhbvm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
507251179Sjhb{
508251179Sjhb	struct vm_freelist *fl;
509251179Sjhb	struct vm_freelist *alt;
510251179Sjhb	int oind, pind;
511251179Sjhb	vm_page_t m;
512251179Sjhb
513170477Salc	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
514210550Sjhb	fl = (*vm_phys_lookup_lists[domain][flind])[pool];
515210327Sjchandra	for (oind = order; oind < VM_NFREEORDER; oind++) {
516210327Sjchandra		m = TAILQ_FIRST(&fl[oind].pl);
517210327Sjchandra		if (m != NULL) {
518210327Sjchandra			TAILQ_REMOVE(&fl[oind].pl, m, pageq);
519210327Sjchandra			fl[oind].lcnt--;
520210327Sjchandra			m->order = VM_NFREEORDER;
521210327Sjchandra			vm_phys_split_pages(m, oind, fl, order);
522210327Sjchandra			return (m);
523210327Sjchandra		}
524210327Sjchandra	}
525210327Sjchandra
526210327Sjchandra	/*
527210327Sjchandra	 * The given pool was empty.  Find the largest
528210327Sjchandra	 * contiguous, power-of-two-sized set of pages in any
529210327Sjchandra	 * pool.  Transfer these pages to the given pool, and
530210327Sjchandra	 * use them to satisfy the allocation.
531210327Sjchandra	 */
532210327Sjchandra	for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
533210327Sjchandra		for (pind = 0; pind < VM_NFREEPOOL; pind++) {
534210550Sjhb			alt = (*vm_phys_lookup_lists[domain][flind])[pind];
535210327Sjchandra			m = TAILQ_FIRST(&alt[oind].pl);
536170477Salc			if (m != NULL) {
537210327Sjchandra				TAILQ_REMOVE(&alt[oind].pl, m, pageq);
538210327Sjchandra				alt[oind].lcnt--;
539170477Salc				m->order = VM_NFREEORDER;
540210327Sjchandra				vm_phys_set_pool(pool, m, oind);
541170477Salc				vm_phys_split_pages(m, oind, fl, order);
542170477Salc				return (m);
543170477Salc			}
544170477Salc		}
545170477Salc	}
546170477Salc	return (NULL);
547170477Salc}
548170477Salc
549170477Salc/*
550170477Salc * Find the vm_page corresponding to the given physical address.
551170477Salc */
552170477Salcvm_page_t
553170477Salcvm_phys_paddr_to_vm_page(vm_paddr_t pa)
554170477Salc{
555170477Salc	struct vm_phys_seg *seg;
556170477Salc	int segind;
557170477Salc
558170477Salc	for (segind = 0; segind < vm_phys_nsegs; segind++) {
559170477Salc		seg = &vm_phys_segs[segind];
560170477Salc		if (pa >= seg->start && pa < seg->end)
561170477Salc			return (&seg->first_page[atop(pa - seg->start)]);
562170477Salc	}
563194459Sthompsa	return (NULL);
564170477Salc}
565170477Salc
566236924Skibvm_page_t
567236924Skibvm_phys_fictitious_to_vm_page(vm_paddr_t pa)
568236924Skib{
569236924Skib	struct vm_phys_fictitious_seg *seg;
570236924Skib	vm_page_t m;
571236924Skib	int segind;
572236924Skib
573236924Skib	m = NULL;
574236924Skib	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
575236924Skib		seg = &vm_phys_fictitious_segs[segind];
576236924Skib		if (pa >= seg->start && pa < seg->end) {
577236924Skib			m = &seg->first_page[atop(pa - seg->start)];
578236924Skib			KASSERT((m->flags & PG_FICTITIOUS) != 0,
579236924Skib			    ("%p not fictitious", m));
580236924Skib			break;
581236924Skib		}
582236924Skib	}
583236924Skib	return (m);
584236924Skib}
585236924Skib
586236924Skibint
587236924Skibvm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
588236924Skib    vm_memattr_t memattr)
589236924Skib{
590236924Skib	struct vm_phys_fictitious_seg *seg;
591236924Skib	vm_page_t fp;
592236924Skib	long i, page_count;
593236924Skib	int segind;
594236924Skib#ifdef VM_PHYSSEG_DENSE
595236924Skib	long pi;
596236924Skib	boolean_t malloced;
597236924Skib#endif
598236924Skib
599236924Skib	page_count = (end - start) / PAGE_SIZE;
600236924Skib
601236924Skib#ifdef VM_PHYSSEG_DENSE
602236924Skib	pi = atop(start);
603265438Skib	if (pi >= first_page && pi < vm_page_array_size + first_page) {
604265438Skib		if (atop(end) >= vm_page_array_size + first_page)
605265438Skib			return (EINVAL);
606236924Skib		fp = &vm_page_array[pi - first_page];
607236924Skib		malloced = FALSE;
608236924Skib	} else
609236924Skib#endif
610236924Skib	{
611236924Skib		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
612236924Skib		    M_WAITOK | M_ZERO);
613236924Skib#ifdef VM_PHYSSEG_DENSE
614236924Skib		malloced = TRUE;
615236924Skib#endif
616236924Skib	}
617236924Skib	for (i = 0; i < page_count; i++) {
618236924Skib		vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
619236924Skib		pmap_page_init(&fp[i]);
620236924Skib		fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
621236924Skib	}
622236924Skib	mtx_lock(&vm_phys_fictitious_reg_mtx);
623236924Skib	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
624236924Skib		seg = &vm_phys_fictitious_segs[segind];
625236924Skib		if (seg->start == 0 && seg->end == 0) {
626236924Skib			seg->start = start;
627236924Skib			seg->end = end;
628236924Skib			seg->first_page = fp;
629236924Skib			mtx_unlock(&vm_phys_fictitious_reg_mtx);
630236924Skib			return (0);
631236924Skib		}
632236924Skib	}
633236924Skib	mtx_unlock(&vm_phys_fictitious_reg_mtx);
634236924Skib#ifdef VM_PHYSSEG_DENSE
635236924Skib	if (malloced)
636236924Skib#endif
637236924Skib		free(fp, M_FICT_PAGES);
638236924Skib	return (EBUSY);
639236924Skib}
640236924Skib
641236924Skibvoid
642236924Skibvm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
643236924Skib{
644236924Skib	struct vm_phys_fictitious_seg *seg;
645236924Skib	vm_page_t fp;
646236924Skib	int segind;
647236924Skib#ifdef VM_PHYSSEG_DENSE
648236924Skib	long pi;
649236924Skib#endif
650236924Skib
651236924Skib#ifdef VM_PHYSSEG_DENSE
652236924Skib	pi = atop(start);
653236924Skib#endif
654236924Skib
655236924Skib	mtx_lock(&vm_phys_fictitious_reg_mtx);
656236924Skib	for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
657236924Skib		seg = &vm_phys_fictitious_segs[segind];
658236924Skib		if (seg->start == start && seg->end == end) {
659236924Skib			seg->start = seg->end = 0;
660236924Skib			fp = seg->first_page;
661236924Skib			seg->first_page = NULL;
662236924Skib			mtx_unlock(&vm_phys_fictitious_reg_mtx);
663236924Skib#ifdef VM_PHYSSEG_DENSE
664236924Skib			if (pi < first_page || atop(end) >= vm_page_array_size)
665236924Skib#endif
666236924Skib				free(fp, M_FICT_PAGES);
667236924Skib			return;
668236924Skib		}
669236924Skib	}
670236924Skib	mtx_unlock(&vm_phys_fictitious_reg_mtx);
671236924Skib	KASSERT(0, ("Unregistering not registered fictitious range"));
672236924Skib}
673236924Skib
674170477Salc/*
675170477Salc * Find the segment containing the given physical address.
676170477Salc */
677170477Salcstatic int
678170477Salcvm_phys_paddr_to_segind(vm_paddr_t pa)
679170477Salc{
680170477Salc	struct vm_phys_seg *seg;
681170477Salc	int segind;
682170477Salc
683170477Salc	for (segind = 0; segind < vm_phys_nsegs; segind++) {
684170477Salc		seg = &vm_phys_segs[segind];
685170477Salc		if (pa >= seg->start && pa < seg->end)
686170477Salc			return (segind);
687170477Salc	}
688170477Salc	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
689170477Salc	    (uintmax_t)pa);
690170477Salc}
691170477Salc
692170477Salc/*
693170477Salc * Free a contiguous, power of two-sized set of physical pages.
694171451Salc *
695171451Salc * The free page queues must be locked.
696170477Salc */
697170477Salcvoid
698170477Salcvm_phys_free_pages(vm_page_t m, int order)
699170477Salc{
700170477Salc	struct vm_freelist *fl;
701170477Salc	struct vm_phys_seg *seg;
702262933Sdumbbell	vm_paddr_t pa;
703170477Salc	vm_page_t m_buddy;
704170477Salc
705170477Salc	KASSERT(m->order == VM_NFREEORDER,
706171451Salc	    ("vm_phys_free_pages: page %p has unexpected order %d",
707170477Salc	    m, m->order));
708170477Salc	KASSERT(m->pool < VM_NFREEPOOL,
709171451Salc	    ("vm_phys_free_pages: page %p has unexpected pool %d",
710170477Salc	    m, m->pool));
711170477Salc	KASSERT(order < VM_NFREEORDER,
712171451Salc	    ("vm_phys_free_pages: order %d is out of range", order));
713170477Salc	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
714170477Salc	seg = &vm_phys_segs[m->segind];
715262933Sdumbbell	if (order < VM_NFREEORDER - 1) {
716262933Sdumbbell		pa = VM_PAGE_TO_PHYS(m);
717262933Sdumbbell		do {
718262933Sdumbbell			pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order));
719262933Sdumbbell			if (pa < seg->start || pa >= seg->end)
720262933Sdumbbell				break;
721262933Sdumbbell			m_buddy = &seg->first_page[atop(pa - seg->start)];
722262933Sdumbbell			if (m_buddy->order != order)
723262933Sdumbbell				break;
724262933Sdumbbell			fl = (*seg->free_queues)[m_buddy->pool];
725262933Sdumbbell			TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq);
726262933Sdumbbell			fl[order].lcnt--;
727262933Sdumbbell			m_buddy->order = VM_NFREEORDER;
728262933Sdumbbell			if (m_buddy->pool != m->pool)
729262933Sdumbbell				vm_phys_set_pool(m->pool, m_buddy, order);
730262933Sdumbbell			order++;
731262933Sdumbbell			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
732262933Sdumbbell			m = &seg->first_page[atop(pa - seg->start)];
733262933Sdumbbell		} while (order < VM_NFREEORDER - 1);
734170477Salc	}
735170477Salc	m->order = order;
736170477Salc	fl = (*seg->free_queues)[m->pool];
737170477Salc	TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq);
738170477Salc	fl[order].lcnt++;
739170477Salc}
740170477Salc
741170477Salc/*
742262933Sdumbbell * Free a contiguous, arbitrarily sized set of physical pages.
743262933Sdumbbell *
744262933Sdumbbell * The free page queues must be locked.
745262933Sdumbbell */
746262933Sdumbbellvoid
747262933Sdumbbellvm_phys_free_contig(vm_page_t m, u_long npages)
748262933Sdumbbell{
749262933Sdumbbell	u_int n;
750262933Sdumbbell	int order;
751262933Sdumbbell
752262933Sdumbbell	/*
753262933Sdumbbell	 * Avoid unnecessary coalescing by freeing the pages in the largest
754262933Sdumbbell	 * possible power-of-two-sized subsets.
755262933Sdumbbell	 */
756262933Sdumbbell	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
757262933Sdumbbell	for (;; npages -= n) {
758262933Sdumbbell		/*
759262933Sdumbbell		 * Unsigned "min" is used here so that "order" is assigned
760262933Sdumbbell		 * "VM_NFREEORDER - 1" when "m"'s physical address is zero
761262933Sdumbbell		 * or the low-order bits of its physical address are zero
762262933Sdumbbell		 * because the size of a physical address exceeds the size of
763262933Sdumbbell		 * a long.
764262933Sdumbbell		 */
765262933Sdumbbell		order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1,
766262933Sdumbbell		    VM_NFREEORDER - 1);
767262933Sdumbbell		n = 1 << order;
768262933Sdumbbell		if (npages < n)
769262933Sdumbbell			break;
770262933Sdumbbell		vm_phys_free_pages(m, order);
771262933Sdumbbell		m += n;
772262933Sdumbbell	}
773262933Sdumbbell	/* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */
774262933Sdumbbell	for (; npages > 0; npages -= n) {
775262933Sdumbbell		order = flsl(npages) - 1;
776262933Sdumbbell		n = 1 << order;
777262933Sdumbbell		vm_phys_free_pages(m, order);
778262933Sdumbbell		m += n;
779262933Sdumbbell	}
780262933Sdumbbell}
781262933Sdumbbell
782262933Sdumbbell/*
783170477Salc * Set the pool for a contiguous, power of two-sized set of physical pages.
784170477Salc */
785172317Salcvoid
786170477Salcvm_phys_set_pool(int pool, vm_page_t m, int order)
787170477Salc{
788170477Salc	vm_page_t m_tmp;
789170477Salc
790170477Salc	for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
791170477Salc		m_tmp->pool = pool;
792170477Salc}
793170477Salc
794170477Salc/*
795174825Salc * Search for the given physical page "m" in the free lists.  If the search
796174825Salc * succeeds, remove "m" from the free lists and return TRUE.  Otherwise, return
797174825Salc * FALSE, indicating that "m" is not in the free lists.
798172317Salc *
799172317Salc * The free page queues must be locked.
800170477Salc */
801174821Salcboolean_t
802172317Salcvm_phys_unfree_page(vm_page_t m)
803172317Salc{
804172317Salc	struct vm_freelist *fl;
805172317Salc	struct vm_phys_seg *seg;
806172317Salc	vm_paddr_t pa, pa_half;
807172317Salc	vm_page_t m_set, m_tmp;
808172317Salc	int order;
809172317Salc
810172317Salc	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
811172317Salc
812172317Salc	/*
813172317Salc	 * First, find the contiguous, power of two-sized set of free
814172317Salc	 * physical pages containing the given physical page "m" and
815172317Salc	 * assign it to "m_set".
816172317Salc	 */
817172317Salc	seg = &vm_phys_segs[m->segind];
818172317Salc	for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
819174799Salc	    order < VM_NFREEORDER - 1; ) {
820172317Salc		order++;
821172317Salc		pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
822177932Salc		if (pa >= seg->start)
823174821Salc			m_set = &seg->first_page[atop(pa - seg->start)];
824174821Salc		else
825174821Salc			return (FALSE);
826172317Salc	}
827174821Salc	if (m_set->order < order)
828174821Salc		return (FALSE);
829174821Salc	if (m_set->order == VM_NFREEORDER)
830174821Salc		return (FALSE);
831172317Salc	KASSERT(m_set->order < VM_NFREEORDER,
832172317Salc	    ("vm_phys_unfree_page: page %p has unexpected order %d",
833172317Salc	    m_set, m_set->order));
834172317Salc
835172317Salc	/*
836172317Salc	 * Next, remove "m_set" from the free lists.  Finally, extract
837172317Salc	 * "m" from "m_set" using an iterative algorithm: While "m_set"
838172317Salc	 * is larger than a page, shrink "m_set" by returning the half
839172317Salc	 * of "m_set" that does not contain "m" to the free lists.
840172317Salc	 */
841172317Salc	fl = (*seg->free_queues)[m_set->pool];
842172317Salc	order = m_set->order;
843172317Salc	TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
844172317Salc	fl[order].lcnt--;
845172317Salc	m_set->order = VM_NFREEORDER;
846172317Salc	while (order > 0) {
847172317Salc		order--;
848172317Salc		pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
849172317Salc		if (m->phys_addr < pa_half)
850172317Salc			m_tmp = &seg->first_page[atop(pa_half - seg->start)];
851172317Salc		else {
852172317Salc			m_tmp = m_set;
853172317Salc			m_set = &seg->first_page[atop(pa_half - seg->start)];
854172317Salc		}
855172317Salc		m_tmp->order = order;
856172317Salc		TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
857172317Salc		fl[order].lcnt++;
858172317Salc	}
859172317Salc	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
860174821Salc	return (TRUE);
861172317Salc}
862172317Salc
863172317Salc/*
864172317Salc * Try to zero one physical page.  Used by an idle priority thread.
865172317Salc */
866170477Salcboolean_t
867170477Salcvm_phys_zero_pages_idle(void)
868170477Salc{
869172317Salc	static struct vm_freelist *fl = vm_phys_free_queues[0][0];
870172317Salc	static int flind, oind, pind;
871170477Salc	vm_page_t m, m_tmp;
872170477Salc
873170477Salc	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
874172317Salc	for (;;) {
875172317Salc		TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
876172317Salc			for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
877172317Salc				if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
878172317Salc					vm_phys_unfree_page(m_tmp);
879172317Salc					cnt.v_free_count--;
880172317Salc					mtx_unlock(&vm_page_queue_free_mtx);
881172317Salc					pmap_zero_page_idle(m_tmp);
882172317Salc					m_tmp->flags |= PG_ZERO;
883172317Salc					mtx_lock(&vm_page_queue_free_mtx);
884172317Salc					cnt.v_free_count++;
885172317Salc					vm_phys_free_pages(m_tmp, 0);
886172317Salc					vm_page_zero_count++;
887172317Salc					cnt_prezero++;
888172317Salc					return (TRUE);
889170477Salc				}
890170477Salc			}
891170477Salc		}
892172317Salc		oind++;
893172317Salc		if (oind == VM_NFREEORDER) {
894172317Salc			oind = 0;
895172317Salc			pind++;
896172317Salc			if (pind == VM_NFREEPOOL) {
897172317Salc				pind = 0;
898172317Salc				flind++;
899172317Salc				if (flind == vm_nfreelists)
900172317Salc					flind = 0;
901172317Salc			}
902172317Salc			fl = vm_phys_free_queues[flind][pind];
903172317Salc		}
904170477Salc	}
905170477Salc}
906170477Salc
907170477Salc/*
908170818Salc * Allocate a contiguous set of physical pages of the given size
909170818Salc * "npages" from the free lists.  All of the physical pages must be at
910170818Salc * or above the given physical address "low" and below the given
911170818Salc * physical address "high".  The given value "alignment" determines the
912170818Salc * alignment of the first physical page in the set.  If the given value
913170818Salc * "boundary" is non-zero, then the set of physical pages cannot cross
914170818Salc * any physical address boundary that is a multiple of that value.  Both
915170477Salc * "alignment" and "boundary" must be a power of two.
916170477Salc */
917170477Salcvm_page_t
918262933Sdumbbellvm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
919262933Sdumbbell    u_long alignment, u_long boundary)
920170477Salc{
921170477Salc	struct vm_freelist *fl;
922170477Salc	struct vm_phys_seg *seg;
923170477Salc	vm_paddr_t pa, pa_last, size;
924262933Sdumbbell	vm_page_t m, m_ret;
925262933Sdumbbell	u_long npages_end;
926262933Sdumbbell	int domain, flind, oind, order, pind;
927170477Salc
928262933Sdumbbell	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
929210550Sjhb#if VM_NDOMAIN > 1
930210550Sjhb	domain = PCPU_GET(domain);
931210550Sjhb#else
932210550Sjhb	domain = 0;
933210550Sjhb#endif
934170477Salc	size = npages << PAGE_SHIFT;
935170477Salc	KASSERT(size != 0,
936170477Salc	    ("vm_phys_alloc_contig: size must not be 0"));
937170477Salc	KASSERT((alignment & (alignment - 1)) == 0,
938170477Salc	    ("vm_phys_alloc_contig: alignment must be a power of 2"));
939170477Salc	KASSERT((boundary & (boundary - 1)) == 0,
940170477Salc	    ("vm_phys_alloc_contig: boundary must be a power of 2"));
941170477Salc	/* Compute the queue that is the best fit for npages. */
942170477Salc	for (order = 0; (1 << order) < npages; order++);
943170477Salc	for (flind = 0; flind < vm_nfreelists; flind++) {
944170477Salc		for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
945170477Salc			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
946210550Sjhb				fl = (*vm_phys_lookup_lists[domain][flind])
947210550Sjhb				    [pind];
948170477Salc				TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
949170477Salc					/*
950170477Salc					 * A free list may contain physical pages
951170477Salc					 * from one or more segments.
952170477Salc					 */
953170477Salc					seg = &vm_phys_segs[m_ret->segind];
954170477Salc					if (seg->start > high ||
955170477Salc					    low >= seg->end)
956170477Salc						continue;
957170477Salc
958170477Salc					/*
959170477Salc					 * Is the size of this allocation request
960170477Salc					 * larger than the largest block size?
961170477Salc					 */
962170477Salc					if (order >= VM_NFREEORDER) {
963170477Salc						/*
964170477Salc						 * Determine if a sufficient number
965170477Salc						 * of subsequent blocks to satisfy
966170477Salc						 * the allocation request are free.
967170477Salc						 */
968170477Salc						pa = VM_PAGE_TO_PHYS(m_ret);
969170477Salc						pa_last = pa + size;
970170477Salc						for (;;) {
971170477Salc							pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1);
972170477Salc							if (pa >= pa_last)
973170477Salc								break;
974170477Salc							if (pa < seg->start ||
975170477Salc							    pa >= seg->end)
976170477Salc								break;
977170477Salc							m = &seg->first_page[atop(pa - seg->start)];
978170477Salc							if (m->order != VM_NFREEORDER - 1)
979170477Salc								break;
980170477Salc						}
981170477Salc						/* If not, continue to the next block. */
982170477Salc						if (pa < pa_last)
983170477Salc							continue;
984170477Salc					}
985170477Salc
986170477Salc					/*
987170477Salc					 * Determine if the blocks are within the given range,
988170477Salc					 * satisfy the given alignment, and do not cross the
989170477Salc					 * given boundary.
990170477Salc					 */
991170477Salc					pa = VM_PAGE_TO_PHYS(m_ret);
992170477Salc					if (pa >= low &&
993170477Salc					    pa + size <= high &&
994170477Salc					    (pa & (alignment - 1)) == 0 &&
995170477Salc					    ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0)
996170477Salc						goto done;
997170477Salc				}
998170477Salc			}
999170477Salc		}
1000170477Salc	}
1001170477Salc	return (NULL);
1002170477Salcdone:
1003170477Salc	for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
1004170477Salc		fl = (*seg->free_queues)[m->pool];
1005170477Salc		TAILQ_REMOVE(&fl[m->order].pl, m, pageq);
1006170477Salc		fl[m->order].lcnt--;
1007170477Salc		m->order = VM_NFREEORDER;
1008170477Salc	}
1009170477Salc	if (m_ret->pool != VM_FREEPOOL_DEFAULT)
1010170477Salc		vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
1011170477Salc	fl = (*seg->free_queues)[m_ret->pool];
1012170477Salc	vm_phys_split_pages(m_ret, oind, fl, order);
1013262933Sdumbbell	/* Return excess pages to the free lists. */
1014262933Sdumbbell	npages_end = roundup2(npages, 1 << imin(oind, order));
1015262933Sdumbbell	if (npages < npages_end)
1016262933Sdumbbell		vm_phys_free_contig(&m_ret[npages], npages_end - npages);
1017170477Salc	return (m_ret);
1018170477Salc}
1019170477Salc
1020170477Salc#ifdef DDB
1021170477Salc/*
1022170477Salc * Show the number of physical pages in each of the free lists.
1023170477Salc */
1024170477SalcDB_SHOW_COMMAND(freepages, db_show_freepages)
1025170477Salc{
1026170477Salc	struct vm_freelist *fl;
1027170477Salc	int flind, oind, pind;
1028170477Salc
1029170477Salc	for (flind = 0; flind < vm_nfreelists; flind++) {
1030170477Salc		db_printf("FREE LIST %d:\n"
1031170477Salc		    "\n  ORDER (SIZE)  |  NUMBER"
1032170477Salc		    "\n              ", flind);
1033170477Salc		for (pind = 0; pind < VM_NFREEPOOL; pind++)
1034170477Salc			db_printf("  |  POOL %d", pind);
1035170477Salc		db_printf("\n--            ");
1036170477Salc		for (pind = 0; pind < VM_NFREEPOOL; pind++)
1037170477Salc			db_printf("-- --      ");
1038170477Salc		db_printf("--\n");
1039170477Salc		for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
1040170477Salc			db_printf("  %2.2d (%6.6dK)", oind,
1041170477Salc			    1 << (PAGE_SHIFT - 10 + oind));
1042170477Salc			for (pind = 0; pind < VM_NFREEPOOL; pind++) {
1043170477Salc				fl = vm_phys_free_queues[flind][pind];
1044170477Salc				db_printf("  |  %6.6d", fl[oind].lcnt);
1045170477Salc			}
1046170477Salc			db_printf("\n");
1047170477Salc		}
1048170477Salc		db_printf("\n");
1049170477Salc	}
1050170477Salc}
1051170477Salc#endif
1052