vm_reserv.c revision 190912
1/*-
2 * Copyright (c) 2002-2006 Rice University
3 * Copyright (c) 2007-2008 Alan L. Cox <alc@cs.rice.edu>
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Alan L. Cox,
7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 *	Superpage reservation management module
34 */
35
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/vm/vm_reserv.c 190912 2009-04-11 09:09:00Z alc $");
38
39#include "opt_vm.h"
40
41#include <sys/param.h>
42#include <sys/kernel.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mutex.h>
46#include <sys/queue.h>
47#include <sys/sbuf.h>
48#include <sys/sysctl.h>
49#include <sys/systm.h>
50
51#include <vm/vm.h>
52#include <vm/vm_param.h>
53#include <vm/vm_object.h>
54#include <vm/vm_page.h>
55#include <vm/vm_phys.h>
56#include <vm/vm_reserv.h>
57
58/*
59 * The reservation system supports the speculative allocation of large physical
60 * pages ("superpages").  Speculative allocation enables the fully-automatic
61 * utilization of superpages by the virtual memory system.  In other words, no
62 * programmatic directives are required to use superpages.
63 */
64
65#if VM_NRESERVLEVEL > 0
66
67/*
68 * The number of small pages that are contained in a level 0 reservation
69 */
70#define	VM_LEVEL_0_NPAGES	(1 << VM_LEVEL_0_ORDER)
71
72/*
73 * The number of bits by which a physical address is shifted to obtain the
74 * reservation number
75 */
76#define	VM_LEVEL_0_SHIFT	(VM_LEVEL_0_ORDER + PAGE_SHIFT)
77
78/*
79 * The size of a level 0 reservation in bytes
80 */
81#define	VM_LEVEL_0_SIZE		(1 << VM_LEVEL_0_SHIFT)
82
83/*
84 * Computes the index of the small page underlying the given (object, pindex)
85 * within the reservation's array of small pages.
86 */
87#define	VM_RESERV_INDEX(object, pindex)	\
88    (((object)->pg_color + (pindex)) & (VM_LEVEL_0_NPAGES - 1))
89
90/*
91 * The reservation structure
92 *
93 * A reservation structure is constructed whenever a large physical page is
94 * speculatively allocated to an object.  The reservation provides the small
95 * physical pages for the range [pindex, pindex + VM_LEVEL_0_NPAGES) of offsets
96 * within that object.  The reservation's "popcnt" tracks the number of these
97 * small physical pages that are in use at any given time.  When and if the
98 * reservation is not fully utilized, it appears in the queue of partially-
99 * populated reservations.  The reservation always appears on the containing
100 * object's list of reservations.
101 *
102 * A partially-populated reservation can be broken and reclaimed at any time.
103 */
104struct vm_reserv {
105	TAILQ_ENTRY(vm_reserv) partpopq;
106	LIST_ENTRY(vm_reserv) objq;
107	vm_object_t	object;			/* containing object */
108	vm_pindex_t	pindex;			/* offset within object */
109	vm_page_t	pages;			/* first page of a superpage */
110	int		popcnt;			/* # of pages in use */
111	char		inpartpopq;
112};
113
114/*
115 * The reservation array
116 *
117 * This array is analoguous in function to vm_page_array.  It differs in the
118 * respect that it may contain a greater number of useful reservation
119 * structures than there are (physical) superpages.  These "invalid"
120 * reservation structures exist to trade-off space for time in the
121 * implementation of vm_reserv_from_page().  Invalid reservation structures are
122 * distinguishable from "valid" reservation structures by inspecting the
123 * reservation's "pages" field.  Invalid reservation structures have a NULL
124 * "pages" field.
125 *
126 * vm_reserv_from_page() maps a small (physical) page to an element of this
127 * array by computing a physical reservation number from the page's physical
128 * address.  The physical reservation number is used as the array index.
129 *
130 * An "active" reservation is a valid reservation structure that has a non-NULL
131 * "object" field and a non-zero "popcnt" field.  In other words, every active
132 * reservation belongs to a particular object.  Moreover, every active
133 * reservation has an entry in the containing object's list of reservations.
134 */
135static vm_reserv_t vm_reserv_array;
136
137/*
138 * The partially-populated reservation queue
139 *
140 * This queue enables the fast recovery of an unused cached or free small page
141 * from a partially-populated reservation.  The reservation at the head of
142 * this queue is the least-recently-changed, partially-populated reservation.
143 *
144 * Access to this queue is synchronized by the free page queue lock.
145 */
146static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop =
147			    TAILQ_HEAD_INITIALIZER(vm_rvq_partpop);
148
149static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
150
151static long vm_reserv_broken;
152SYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD,
153    &vm_reserv_broken, 0, "Cumulative number of broken reservations");
154
155static long vm_reserv_freed;
156SYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD,
157    &vm_reserv_freed, 0, "Cumulative number of freed reservations");
158
159static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS);
160
161SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
162    sysctl_vm_reserv_partpopq, "A", "Partially-populated reservation queues");
163
164static long vm_reserv_reclaimed;
165SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
166    &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations");
167
168static void		vm_reserv_depopulate(vm_reserv_t rv);
169static vm_reserv_t	vm_reserv_from_page(vm_page_t m);
170static boolean_t	vm_reserv_has_pindex(vm_reserv_t rv,
171			    vm_pindex_t pindex);
172static void		vm_reserv_populate(vm_reserv_t rv);
173static void		vm_reserv_reclaim(vm_reserv_t rv);
174
175/*
176 * Describes the current state of the partially-populated reservation queue.
177 */
178static int
179sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
180{
181	struct sbuf sbuf;
182	vm_reserv_t rv;
183	char *cbuf;
184	const int cbufsize = (VM_NRESERVLEVEL + 1) * 81;
185	int counter, error, level, unused_pages;
186
187	cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO);
188	sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN);
189	sbuf_printf(&sbuf, "\nLEVEL     SIZE  NUMBER\n\n");
190	for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
191		counter = 0;
192		unused_pages = 0;
193		mtx_lock(&vm_page_queue_free_mtx);
194		TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) {
195			counter++;
196			unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
197		}
198		mtx_unlock(&vm_page_queue_free_mtx);
199		sbuf_printf(&sbuf, "%5.5d: %6.6dK, %6.6d\n", level,
200		    unused_pages * (PAGE_SIZE / 1024), counter);
201	}
202	sbuf_finish(&sbuf);
203	error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
204	sbuf_delete(&sbuf);
205	free(cbuf, M_TEMP);
206	return (error);
207}
208
209/*
210 * Reduces the given reservation's population count.  If the population count
211 * becomes zero, the reservation is destroyed.  Additionally, moves the
212 * reservation to the tail of the partially-populated reservations queue if the
213 * population count is non-zero.
214 *
215 * The free page queue lock must be held.
216 */
217static void
218vm_reserv_depopulate(vm_reserv_t rv)
219{
220
221	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
222	KASSERT(rv->object != NULL,
223	    ("vm_reserv_depopulate: reserv %p is free", rv));
224	KASSERT(rv->popcnt > 0,
225	    ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
226	if (rv->inpartpopq) {
227		TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
228		rv->inpartpopq = FALSE;
229	}
230	rv->popcnt--;
231	if (rv->popcnt == 0) {
232		LIST_REMOVE(rv, objq);
233		rv->object = NULL;
234		vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
235		vm_reserv_freed++;
236	} else {
237		rv->inpartpopq = TRUE;
238		TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
239	}
240}
241
242/*
243 * Returns the reservation to which the given page might belong.
244 */
245static __inline vm_reserv_t
246vm_reserv_from_page(vm_page_t m)
247{
248
249	return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
250}
251
252/*
253 * Returns TRUE if the given reservation contains the given page index and
254 * FALSE otherwise.
255 */
256static __inline boolean_t
257vm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex)
258{
259
260	return (((pindex - rv->pindex) & ~(VM_LEVEL_0_NPAGES - 1)) == 0);
261}
262
263/*
264 * Increases the given reservation's population count.  Moves the reservation
265 * to the tail of the partially-populated reservation queue.
266 *
267 * The free page queue must be locked.
268 */
269static void
270vm_reserv_populate(vm_reserv_t rv)
271{
272
273	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
274	KASSERT(rv->object != NULL,
275	    ("vm_reserv_populate: reserv %p is free", rv));
276	KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES,
277	    ("vm_reserv_populate: reserv %p is already full", rv));
278	if (rv->inpartpopq) {
279		TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
280		rv->inpartpopq = FALSE;
281	}
282	rv->popcnt++;
283	if (rv->popcnt < VM_LEVEL_0_NPAGES) {
284		rv->inpartpopq = TRUE;
285		TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
286	}
287}
288
289/*
290 * Allocates a page from an existing or newly-created reservation.
291 *
292 * The object and free page queue must be locked.
293 */
294vm_page_t
295vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex)
296{
297	vm_page_t m, mpred, msucc;
298	vm_pindex_t first, leftcap, rightcap;
299	vm_reserv_t rv;
300
301	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
302
303	/*
304	 * Is a reservation fundamentally not possible?
305	 */
306	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
307	if (pindex < VM_RESERV_INDEX(object, pindex) ||
308	    pindex >= object->size)
309		return (NULL);
310
311	/*
312	 * Look for an existing reservation.
313	 */
314	msucc = NULL;
315	mpred = object->root;
316	while (mpred != NULL) {
317		KASSERT(mpred->pindex != pindex,
318		    ("vm_reserv_alloc_page: pindex already allocated"));
319		rv = vm_reserv_from_page(mpred);
320		if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) {
321			m = &rv->pages[VM_RESERV_INDEX(object, pindex)];
322			/* Handle vm_page_rename(m, new_object, ...). */
323			if ((m->flags & (PG_CACHED | PG_FREE)) == 0)
324				return (NULL);
325			vm_reserv_populate(rv);
326			return (m);
327		} else if (mpred->pindex < pindex) {
328			if (msucc != NULL ||
329			    (msucc = TAILQ_NEXT(mpred, listq)) == NULL)
330				break;
331			KASSERT(msucc->pindex != pindex,
332			    ("vm_reserv_alloc_page: pindex already allocated"));
333			rv = vm_reserv_from_page(msucc);
334			if (rv->object == object &&
335			    vm_reserv_has_pindex(rv, pindex)) {
336				m = &rv->pages[VM_RESERV_INDEX(object, pindex)];
337				/* Handle vm_page_rename(m, new_object, ...). */
338				if ((m->flags & (PG_CACHED | PG_FREE)) == 0)
339					return (NULL);
340				vm_reserv_populate(rv);
341				return (m);
342			} else if (pindex < msucc->pindex)
343				break;
344		} else if (msucc == NULL) {
345			msucc = mpred;
346			mpred = TAILQ_PREV(msucc, pglist, listq);
347			continue;
348		}
349		msucc = NULL;
350		mpred = object->root = vm_page_splay(pindex, object->root);
351	}
352
353	/*
354	 * Determine the first index to the left that can be used.
355	 */
356	if (mpred == NULL)
357		leftcap = 0;
358	else if ((rv = vm_reserv_from_page(mpred))->object != object)
359		leftcap = mpred->pindex + 1;
360	else
361		leftcap = rv->pindex + VM_LEVEL_0_NPAGES;
362
363	/*
364	 * Determine the first index to the right that cannot be used.
365	 */
366	if (msucc == NULL)
367		rightcap = pindex + VM_LEVEL_0_NPAGES;
368	else if ((rv = vm_reserv_from_page(msucc))->object != object)
369		rightcap = msucc->pindex;
370	else
371		rightcap = rv->pindex;
372
373	/*
374	 * Determine if a reservation fits between the first index to
375	 * the left that can be used and the first index to the right
376	 * that cannot be used.
377	 */
378	first = pindex - VM_RESERV_INDEX(object, pindex);
379	if (first < leftcap || first + VM_LEVEL_0_NPAGES > rightcap)
380		return (NULL);
381
382	/*
383	 * Would a new reservation extend past the end of the given object?
384	 */
385	if (object->size < first + VM_LEVEL_0_NPAGES) {
386		/*
387		 * Don't allocate a new reservation if the object is a vnode or
388		 * backed by another object that is a vnode.
389		 */
390		if (object->type == OBJT_VNODE ||
391		    (object->backing_object != NULL &&
392		    object->backing_object->type == OBJT_VNODE))
393			return (NULL);
394		/* Speculate that the object may grow. */
395	}
396
397	/*
398	 * Allocate a new reservation.
399	 */
400	m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
401	if (m != NULL) {
402		rv = vm_reserv_from_page(m);
403		KASSERT(rv->pages == m,
404		    ("vm_reserv_alloc_page: reserv %p's pages is corrupted",
405		    rv));
406		KASSERT(rv->object == NULL,
407		    ("vm_reserv_alloc_page: reserv %p isn't free", rv));
408		LIST_INSERT_HEAD(&object->rvq, rv, objq);
409		rv->object = object;
410		rv->pindex = first;
411		KASSERT(rv->popcnt == 0,
412		    ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted",
413		    rv));
414		KASSERT(!rv->inpartpopq,
415		    ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE",
416		    rv));
417		vm_reserv_populate(rv);
418		m = &rv->pages[VM_RESERV_INDEX(object, pindex)];
419	}
420	return (m);
421}
422
423/*
424 * Breaks all reservations belonging to the given object.
425 */
426void
427vm_reserv_break_all(vm_object_t object)
428{
429	vm_reserv_t rv;
430	int i;
431
432	mtx_lock(&vm_page_queue_free_mtx);
433	while ((rv = LIST_FIRST(&object->rvq)) != NULL) {
434		KASSERT(rv->object == object,
435		    ("vm_reserv_break_all: reserv %p is corrupted", rv));
436		if (rv->inpartpopq) {
437			TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
438			rv->inpartpopq = FALSE;
439		}
440		LIST_REMOVE(rv, objq);
441		rv->object = NULL;
442		for (i = 0; i < VM_LEVEL_0_NPAGES; i++) {
443			if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
444				vm_phys_free_pages(&rv->pages[i], 0);
445			else
446				rv->popcnt--;
447		}
448		KASSERT(rv->popcnt == 0,
449		    ("vm_reserv_break_all: reserv %p's popcnt is corrupted",
450		    rv));
451		vm_reserv_broken++;
452	}
453	mtx_unlock(&vm_page_queue_free_mtx);
454}
455
456/*
457 * Frees the given page if it belongs to a reservation.  Returns TRUE if the
458 * page is freed and FALSE otherwise.
459 *
460 * The free page queue lock must be held.
461 */
462boolean_t
463vm_reserv_free_page(vm_page_t m)
464{
465	vm_reserv_t rv;
466
467	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
468	rv = vm_reserv_from_page(m);
469	if (rv->object != NULL) {
470		vm_reserv_depopulate(rv);
471		return (TRUE);
472	}
473	return (FALSE);
474}
475
476/*
477 * Initializes the reservation management system.  Specifically, initializes
478 * the reservation array.
479 *
480 * Requires that vm_page_array and first_page are initialized!
481 */
482void
483vm_reserv_init(void)
484{
485	vm_paddr_t paddr;
486	int i;
487
488	/*
489	 * Initialize the reservation array.  Specifically, initialize the
490	 * "pages" field for every element that has an underlying superpage.
491	 */
492	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
493		paddr = roundup2(phys_avail[i], VM_LEVEL_0_SIZE);
494		while (paddr + VM_LEVEL_0_SIZE <= phys_avail[i + 1]) {
495			vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages =
496			    PHYS_TO_VM_PAGE(paddr);
497			paddr += VM_LEVEL_0_SIZE;
498		}
499	}
500}
501
502/*
503 * Returns a reservation level if the given page belongs to a fully-populated
504 * reservation and -1 otherwise.
505 */
506int
507vm_reserv_level_iffullpop(vm_page_t m)
508{
509	vm_reserv_t rv;
510
511	rv = vm_reserv_from_page(m);
512	return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1);
513}
514
515/*
516 * Prepare for the reactivation of a cached page.
517 *
518 * First, suppose that the given page "m" was allocated individually, i.e., not
519 * as part of a reservation, and cached.  Then, suppose a reservation
520 * containing "m" is allocated by the same object.  Although "m" and the
521 * reservation belong to the same object, "m"'s pindex may not match the
522 * reservation's.
523 *
524 * The free page queue must be locked.
525 */
526boolean_t
527vm_reserv_reactivate_page(vm_page_t m)
528{
529	vm_reserv_t rv;
530	int i, m_index;
531
532	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
533	rv = vm_reserv_from_page(m);
534	if (rv->object == NULL)
535		return (FALSE);
536	KASSERT((m->flags & PG_CACHED) != 0,
537	    ("vm_reserv_uncache_page: page %p is not cached", m));
538	if (m->object == rv->object &&
539	    m->pindex - rv->pindex == VM_RESERV_INDEX(m->object, m->pindex))
540		vm_reserv_populate(rv);
541	else {
542		KASSERT(rv->inpartpopq,
543		    ("vm_reserv_uncache_page: reserv %p's inpartpopq is FALSE",
544		    rv));
545		TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
546		rv->inpartpopq = FALSE;
547		LIST_REMOVE(rv, objq);
548		rv->object = NULL;
549		/* Don't vm_phys_free_pages(m, 0). */
550		m_index = m - rv->pages;
551		for (i = 0; i < m_index; i++) {
552			if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
553				vm_phys_free_pages(&rv->pages[i], 0);
554			else
555				rv->popcnt--;
556		}
557		for (i++; i < VM_LEVEL_0_NPAGES; i++) {
558			if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
559				vm_phys_free_pages(&rv->pages[i], 0);
560			else
561				rv->popcnt--;
562		}
563		KASSERT(rv->popcnt == 0,
564		    ("vm_reserv_uncache_page: reserv %p's popcnt is corrupted",
565		    rv));
566		vm_reserv_broken++;
567	}
568	return (TRUE);
569}
570
571/*
572 * Breaks the given partially-populated reservation, releasing its cached and
573 * free pages to the physical memory allocator.
574 *
575 * The free page queue lock must be held.
576 */
577static void
578vm_reserv_reclaim(vm_reserv_t rv)
579{
580	int i;
581
582	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
583	KASSERT(rv->inpartpopq,
584	    ("vm_reserv_reclaim: reserv %p's inpartpopq is corrupted", rv));
585	TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
586	rv->inpartpopq = FALSE;
587	KASSERT(rv->object != NULL,
588	    ("vm_reserv_reclaim: reserv %p is free", rv));
589	LIST_REMOVE(rv, objq);
590	rv->object = NULL;
591	for (i = 0; i < VM_LEVEL_0_NPAGES; i++) {
592		if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0)
593			vm_phys_free_pages(&rv->pages[i], 0);
594		else
595			rv->popcnt--;
596	}
597	KASSERT(rv->popcnt == 0,
598	    ("vm_reserv_reclaim: reserv %p's popcnt is corrupted", rv));
599	vm_reserv_reclaimed++;
600}
601
602/*
603 * Breaks the reservation at the head of the partially-populated reservation
604 * queue, releasing its cached and free pages to the physical memory
605 * allocator.  Returns TRUE if a reservation is broken and FALSE otherwise.
606 *
607 * The free page queue lock must be held.
608 */
609boolean_t
610vm_reserv_reclaim_inactive(void)
611{
612	vm_reserv_t rv;
613
614	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
615	if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) {
616		vm_reserv_reclaim(rv);
617		return (TRUE);
618	}
619	return (FALSE);
620}
621
622/*
623 * Searches the partially-populated reservation queue for the least recently
624 * active reservation with unused pages, i.e., cached or free, that satisfy the
625 * given request for contiguous physical memory.  If a satisfactory reservation
626 * is found, it is broken.  Returns TRUE if a reservation is broken and FALSE
627 * otherwise.
628 *
629 * The free page queue lock must be held.
630 */
631boolean_t
632vm_reserv_reclaim_contig(vm_paddr_t size, vm_paddr_t low, vm_paddr_t high,
633    unsigned long alignment, unsigned long boundary)
634{
635	vm_paddr_t pa, pa_length;
636	vm_reserv_t rv;
637	int i;
638
639	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
640	if (size > VM_LEVEL_0_SIZE - PAGE_SIZE)
641		return (FALSE);
642	TAILQ_FOREACH(rv, &vm_rvq_partpop, partpopq) {
643		pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]);
644		if (pa + PAGE_SIZE - size < low) {
645			/* this entire reservation is too low; go to next */
646			continue;
647		}
648		pa_length = 0;
649		for (i = 0; i < VM_LEVEL_0_NPAGES; i++)
650			if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) {
651				pa_length += PAGE_SIZE;
652				if (pa_length == PAGE_SIZE) {
653					pa = VM_PAGE_TO_PHYS(&rv->pages[i]);
654					if (pa + size > high) {
655						/* skip to next reservation */
656						break;
657					} else if (pa < low ||
658					    (pa & (alignment - 1)) != 0 ||
659					    ((pa ^ (pa + size - 1)) &
660					    ~(boundary - 1)) != 0)
661						pa_length = 0;
662				} else if (pa_length >= size) {
663					vm_reserv_reclaim(rv);
664					return (TRUE);
665				}
666			} else
667				pa_length = 0;
668	}
669	return (FALSE);
670}
671
672/*
673 * Transfers the reservation underlying the given page to a new object.
674 *
675 * The object must be locked.
676 */
677void
678vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object,
679    vm_pindex_t old_object_offset)
680{
681	vm_reserv_t rv;
682
683	VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
684	rv = vm_reserv_from_page(m);
685	if (rv->object == old_object) {
686		mtx_lock(&vm_page_queue_free_mtx);
687		if (rv->object == old_object) {
688			LIST_REMOVE(rv, objq);
689			LIST_INSERT_HEAD(&new_object->rvq, rv, objq);
690			rv->object = new_object;
691			rv->pindex -= old_object_offset;
692		}
693		mtx_unlock(&vm_page_queue_free_mtx);
694	}
695}
696
697/*
698 * Allocates the virtual and physical memory required by the reservation
699 * management system's data structures, in particular, the reservation array.
700 */
701vm_paddr_t
702vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
703{
704	vm_paddr_t new_end;
705	size_t size;
706
707	/*
708	 * Calculate the size (in bytes) of the reservation array.  Round up
709	 * from "high_water" because every small page is mapped to an element
710	 * in the reservation array based on its physical address.  Thus, the
711	 * number of elements in the reservation array can be greater than the
712	 * number of superpages.
713	 */
714	size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv);
715
716	/*
717	 * Allocate and map the physical memory for the reservation array.  The
718	 * next available virtual address is returned by reference.
719	 */
720	new_end = end - round_page(size);
721	vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end,
722	    VM_PROT_READ | VM_PROT_WRITE);
723	bzero(vm_reserv_array, size);
724
725	/*
726	 * Return the next available physical address.
727	 */
728	return (new_end);
729}
730
731#endif	/* VM_NRESERVLEVEL > 0 */
732