vm_reserv.c revision 269072
167754Smsmith/*- 267754Smsmith * Copyright (c) 2002-2006 Rice University 377424Smsmith * Copyright (c) 2007-2008 Alan L. Cox <alc@cs.rice.edu> 491116Smsmith * All rights reserved. 567754Smsmith * 667754Smsmith * This software was developed for the FreeBSD Project by Alan L. Cox, 767754Smsmith * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 867754Smsmith * 967754Smsmith * Redistribution and use in source and binary forms, with or without 1067754Smsmith * modification, are permitted provided that the following conditions 1167754Smsmith * are met: 1291116Smsmith * 1. Redistributions of source code must retain the above copyright 1370243Smsmith * notice, this list of conditions and the following disclaimer. 1467754Smsmith * 2. Redistributions in binary form must reproduce the above copyright 1567754Smsmith * notice, this list of conditions and the following disclaimer in the 1667754Smsmith * documentation and/or other materials provided with the distribution. 1767754Smsmith * 1867754Smsmith * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1967754Smsmith * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2067754Smsmith * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 2167754Smsmith * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 2267754Smsmith * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2367754Smsmith * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 2467754Smsmith * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 2567754Smsmith * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2667754Smsmith * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2767754Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 2867754Smsmith * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2967754Smsmith * POSSIBILITY OF SUCH DAMAGE. 3067754Smsmith */ 3167754Smsmith 3267754Smsmith/* 3367754Smsmith * Superpage reservation management module 3467754Smsmith * 3567754Smsmith * Any external functions defined by this module are only to be used by the 3667754Smsmith * virtual memory system. 3767754Smsmith */ 3867754Smsmith 3967754Smsmith#include <sys/cdefs.h> 4067754Smsmith__FBSDID("$FreeBSD: stable/10/sys/vm/vm_reserv.c 269072 2014-07-24 16:29:44Z kib $"); 4167754Smsmith 4267754Smsmith#include "opt_vm.h" 4367754Smsmith 4467754Smsmith#include <sys/param.h> 4567754Smsmith#include <sys/kernel.h> 4667754Smsmith#include <sys/lock.h> 4767754Smsmith#include <sys/malloc.h> 4867754Smsmith#include <sys/mutex.h> 4967754Smsmith#include <sys/queue.h> 5067754Smsmith#include <sys/rwlock.h> 5167754Smsmith#include <sys/sbuf.h> 5267754Smsmith#include <sys/sysctl.h> 5367754Smsmith#include <sys/systm.h> 5467754Smsmith 5567754Smsmith#include <vm/vm.h> 5667754Smsmith#include <vm/vm_param.h> 5767754Smsmith#include <vm/vm_object.h> 5867754Smsmith#include <vm/vm_page.h> 5967754Smsmith#include <vm/vm_phys.h> 6067754Smsmith#include <vm/vm_radix.h> 6167754Smsmith#include <vm/vm_reserv.h> 6267754Smsmith 6367754Smsmith/* 6467754Smsmith * The reservation system supports the speculative allocation of large physical 6567754Smsmith * pages ("superpages"). Speculative allocation enables the fully-automatic 6667754Smsmith * utilization of superpages by the virtual memory system. In other words, no 6767754Smsmith * programmatic directives are required to use superpages. 6867754Smsmith */ 6967754Smsmith 7067754Smsmith#if VM_NRESERVLEVEL > 0 7167754Smsmith 7267754Smsmith/* 7367754Smsmith * The number of small pages that are contained in a level 0 reservation 7467754Smsmith */ 7567754Smsmith#define VM_LEVEL_0_NPAGES (1 << VM_LEVEL_0_ORDER) 7667754Smsmith 7767754Smsmith/* 7867754Smsmith * The number of bits by which a physical address is shifted to obtain the 7967754Smsmith * reservation number 8067754Smsmith */ 8167754Smsmith#define VM_LEVEL_0_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT) 8267754Smsmith 8367754Smsmith/* 8467754Smsmith * The size of a level 0 reservation in bytes 8567754Smsmith */ 8667754Smsmith#define VM_LEVEL_0_SIZE (1 << VM_LEVEL_0_SHIFT) 8767754Smsmith 8867754Smsmith/* 8967754Smsmith * Computes the index of the small page underlying the given (object, pindex) 9067754Smsmith * within the reservation's array of small pages. 9167754Smsmith */ 9267754Smsmith#define VM_RESERV_INDEX(object, pindex) \ 9367754Smsmith (((object)->pg_color + (pindex)) & (VM_LEVEL_0_NPAGES - 1)) 9467754Smsmith 9567754Smsmith/* 9667754Smsmith * The reservation structure 9767754Smsmith * 9867754Smsmith * A reservation structure is constructed whenever a large physical page is 9967754Smsmith * speculatively allocated to an object. The reservation provides the small 10067754Smsmith * physical pages for the range [pindex, pindex + VM_LEVEL_0_NPAGES) of offsets 10167754Smsmith * within that object. The reservation's "popcnt" tracks the number of these 10267754Smsmith * small physical pages that are in use at any given time. When and if the 10367754Smsmith * reservation is not fully utilized, it appears in the queue of partially- 10467754Smsmith * populated reservations. The reservation always appears on the containing 10567754Smsmith * object's list of reservations. 10667754Smsmith * 10767754Smsmith * A partially-populated reservation can be broken and reclaimed at any time. 10867754Smsmith */ 10967754Smsmithstruct vm_reserv { 11067754Smsmith TAILQ_ENTRY(vm_reserv) partpopq; 11167754Smsmith LIST_ENTRY(vm_reserv) objq; 11267754Smsmith vm_object_t object; /* containing object */ 11367754Smsmith vm_pindex_t pindex; /* offset within object */ 11467754Smsmith vm_page_t pages; /* first page of a superpage */ 11567754Smsmith int popcnt; /* # of pages in use */ 11667754Smsmith char inpartpopq; 11777424Smsmith}; 11867754Smsmith 11967754Smsmith/* 12067754Smsmith * The reservation array 12177424Smsmith * 12291116Smsmith * This array is analoguous in function to vm_page_array. It differs in the 12367754Smsmith * respect that it may contain a greater number of useful reservation 12467754Smsmith * structures than there are (physical) superpages. These "invalid" 12583174Smsmith * reservation structures exist to trade-off space for time in the 12683174Smsmith * implementation of vm_reserv_from_page(). Invalid reservation structures are 12783174Smsmith * distinguishable from "valid" reservation structures by inspecting the 12877424Smsmith * reservation's "pages" field. Invalid reservation structures have a NULL 12983174Smsmith * "pages" field. 13083174Smsmith * 13183174Smsmith * vm_reserv_from_page() maps a small (physical) page to an element of this 13283174Smsmith * array by computing a physical reservation number from the page's physical 13367754Smsmith * address. The physical reservation number is used as the array index. 13467754Smsmith * 13583174Smsmith * An "active" reservation is a valid reservation structure that has a non-NULL 13667754Smsmith * "object" field and a non-zero "popcnt" field. In other words, every active 13783174Smsmith * reservation belongs to a particular object. Moreover, every active 13867754Smsmith * reservation has an entry in the containing object's list of reservations. 13983174Smsmith */ 14067754Smsmithstatic vm_reserv_t vm_reserv_array; 14183174Smsmith 14283174Smsmith/* 14367754Smsmith * The partially-populated reservation queue 14467754Smsmith * 14583174Smsmith * This queue enables the fast recovery of an unused cached or free small page 14683174Smsmith * from a partially-populated reservation. The reservation at the head of 14783174Smsmith * this queue is the least-recently-changed, partially-populated reservation. 14867754Smsmith * 14983174Smsmith * Access to this queue is synchronized by the free page queue lock. 15067754Smsmith */ 15183174Smsmithstatic TAILQ_HEAD(, vm_reserv) vm_rvq_partpop = 15291116Smsmith TAILQ_HEAD_INITIALIZER(vm_rvq_partpop); 15367754Smsmith 15467754Smsmithstatic SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info"); 15583174Smsmith 15683174Smsmithstatic long vm_reserv_broken; 15783174SmsmithSYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD, 15883174Smsmith &vm_reserv_broken, 0, "Cumulative number of broken reservations"); 15983174Smsmith 16083174Smsmithstatic long vm_reserv_freed; 16183174SmsmithSYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD, 16283174Smsmith &vm_reserv_freed, 0, "Cumulative number of freed reservations"); 16383174Smsmith 16483174Smsmithstatic int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS); 16583174Smsmith 16683174SmsmithSYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 16783174Smsmith sysctl_vm_reserv_partpopq, "A", "Partially-populated reservation queues"); 16867754Smsmith 16983174Smsmithstatic long vm_reserv_reclaimed; 17083174SmsmithSYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, 17167754Smsmith &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations"); 17291116Smsmith 17367754Smsmithstatic void vm_reserv_depopulate(vm_reserv_t rv); 17483174Smsmithstatic vm_reserv_t vm_reserv_from_page(vm_page_t m); 17591116Smsmithstatic boolean_t vm_reserv_has_pindex(vm_reserv_t rv, 17691116Smsmith vm_pindex_t pindex); 17783174Smsmithstatic void vm_reserv_populate(vm_reserv_t rv); 17883174Smsmithstatic void vm_reserv_reclaim(vm_reserv_t rv); 17983174Smsmith 18083174Smsmith/* 18183174Smsmith * Describes the current state of the partially-populated reservation queue. 18283174Smsmith */ 18383174Smsmithstatic int 18483174Smsmithsysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS) 18583174Smsmith{ 18667754Smsmith struct sbuf sbuf; 18767754Smsmith vm_reserv_t rv; 18867754Smsmith int counter, error, level, unused_pages; 18967754Smsmith 19067754Smsmith error = sysctl_wire_old_buffer(req, 0); 19182367Smsmith if (error != 0) 19282367Smsmith return (error); 19382367Smsmith sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 19482367Smsmith sbuf_printf(&sbuf, "\nLEVEL SIZE NUMBER\n\n"); 19582367Smsmith for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) { 19682367Smsmith counter = 0; 19782367Smsmith unused_pages = 0; 19883174Smsmith mtx_lock(&vm_page_queue_free_mtx); 19982367Smsmith TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) { 20082367Smsmith counter++; 20182367Smsmith unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; 20282367Smsmith } 20382367Smsmith mtx_unlock(&vm_page_queue_free_mtx); 20482367Smsmith sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level, 20582367Smsmith unused_pages * ((int)PAGE_SIZE / 1024), counter); 20682367Smsmith } 20782367Smsmith error = sbuf_finish(&sbuf); 20882367Smsmith sbuf_delete(&sbuf); 20982367Smsmith return (error); 21082367Smsmith} 21182367Smsmith 21282367Smsmith/* 21383174Smsmith * Reduces the given reservation's population count. If the population count 21482367Smsmith * becomes zero, the reservation is destroyed. Additionally, moves the 21582367Smsmith * reservation to the tail of the partially-populated reservations queue if the 21682367Smsmith * population count is non-zero. 21782367Smsmith * 21882367Smsmith * The free page queue lock must be held. 21982367Smsmith */ 22082367Smsmithstatic void 22183174Smsmithvm_reserv_depopulate(vm_reserv_t rv) 22283174Smsmith{ 22383174Smsmith 22482367Smsmith mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 22583174Smsmith KASSERT(rv->object != NULL, 22682367Smsmith ("vm_reserv_depopulate: reserv %p is free", rv)); 22782367Smsmith KASSERT(rv->popcnt > 0, 22882367Smsmith ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv)); 22982367Smsmith if (rv->inpartpopq) { 23082367Smsmith TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 23183174Smsmith rv->inpartpopq = FALSE; 23283174Smsmith } else { 23383174Smsmith KASSERT(rv->pages->psind == 1, 23482367Smsmith ("vm_reserv_depopulate: reserv %p is already demoted", 23582367Smsmith rv)); 23683174Smsmith rv->pages->psind = 0; 23782367Smsmith } 23882367Smsmith rv->popcnt--; 23982367Smsmith if (rv->popcnt == 0) { 24082367Smsmith LIST_REMOVE(rv, objq); 24183174Smsmith rv->object = NULL; 24282367Smsmith vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER); 24382367Smsmith vm_reserv_freed++; 24483174Smsmith } else { 24582367Smsmith rv->inpartpopq = TRUE; 24682367Smsmith TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); 24783174Smsmith } 24883174Smsmith} 24983174Smsmith 25083174Smsmith/* 25183174Smsmith * Returns the reservation to which the given page might belong. 25283174Smsmith */ 25382367Smsmithstatic __inline vm_reserv_t 25482367Smsmithvm_reserv_from_page(vm_page_t m) 25583174Smsmith{ 25682367Smsmith 25782367Smsmith return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]); 25883174Smsmith} 25982367Smsmith 26082367Smsmith/* 26182367Smsmith * Returns TRUE if the given reservation contains the given page index and 26282367Smsmith * FALSE otherwise. 26382367Smsmith */ 26482367Smsmithstatic __inline boolean_t 26582367Smsmithvm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex) 26682367Smsmith{ 26782367Smsmith 26882367Smsmith return (((pindex - rv->pindex) & ~(VM_LEVEL_0_NPAGES - 1)) == 0); 26982367Smsmith} 27083174Smsmith 27183174Smsmith/* 27283174Smsmith * Increases the given reservation's population count. Moves the reservation 27383174Smsmith * to the tail of the partially-populated reservation queue. 27483174Smsmith * 27583174Smsmith * The free page queue must be locked. 27682367Smsmith */ 27782367Smsmithstatic void 27882367Smsmithvm_reserv_populate(vm_reserv_t rv) 27982367Smsmith{ 28082367Smsmith 28183174Smsmith mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 28282367Smsmith KASSERT(rv->object != NULL, 28382367Smsmith ("vm_reserv_populate: reserv %p is free", rv)); 28482367Smsmith KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES, 28582367Smsmith ("vm_reserv_populate: reserv %p is already full", rv)); 28682367Smsmith KASSERT(rv->pages->psind == 0, 28782367Smsmith ("vm_reserv_populate: reserv %p is already promoted", rv)); 28882367Smsmith if (rv->inpartpopq) { 28982367Smsmith TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 29083174Smsmith rv->inpartpopq = FALSE; 29182367Smsmith } 29282367Smsmith rv->popcnt++; 29382367Smsmith if (rv->popcnt < VM_LEVEL_0_NPAGES) { 29482367Smsmith rv->inpartpopq = TRUE; 29582367Smsmith TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); 29682367Smsmith } else 29782367Smsmith rv->pages->psind = 1; 29883174Smsmith} 29982367Smsmith 30082367Smsmith/* 30182367Smsmith * Allocates a contiguous set of physical pages of the given size "npages" 30282367Smsmith * from an existing or newly-created reservation. All of the physical pages 30382367Smsmith * must be at or above the given physical address "low" and below the given 30482367Smsmith * physical address "high". The given value "alignment" determines the 30582367Smsmith * alignment of the first physical page in the set. If the given value 30682367Smsmith * "boundary" is non-zero, then the set of physical pages cannot cross any 30782367Smsmith * physical address boundary that is a multiple of that value. Both 30882367Smsmith * "alignment" and "boundary" must be a power of two. 30982367Smsmith * 31082367Smsmith * The object and free page queue must be locked. 31183174Smsmith */ 31267754Smsmithvm_page_t 31383174Smsmithvm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages, 31483174Smsmith vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 31583174Smsmith{ 31683174Smsmith vm_paddr_t pa, size; 31783174Smsmith vm_page_t m, m_ret, mpred, msucc; 31867754Smsmith vm_pindex_t first, leftcap, rightcap; 31967754Smsmith vm_reserv_t rv; 32067754Smsmith u_long allocpages, maxpages, minpages; 32167754Smsmith int i, index, n; 32267754Smsmith 32367754Smsmith mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 32467754Smsmith VM_OBJECT_ASSERT_WLOCKED(object); 32567754Smsmith KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); 32667754Smsmith 32783174Smsmith /* 32867754Smsmith * Is a reservation fundamentally impossible? 32983174Smsmith */ 33067754Smsmith if (pindex < VM_RESERV_INDEX(object, pindex) || 33167754Smsmith pindex + npages > object->size) 33267754Smsmith return (NULL); 33383174Smsmith 33467754Smsmith /* 33583174Smsmith * All reservations of a particular size have the same alignment. 33683174Smsmith * Assuming that the first page is allocated from a reservation, the 33767754Smsmith * least significant bits of its physical address can be determined 33867754Smsmith * from its offset from the beginning of the reservation and the size 33967754Smsmith * of the reservation. 34067754Smsmith * 34167754Smsmith * Could the specified index within a reservation of the smallest 34283174Smsmith * possible size satisfy the alignment and boundary requirements? 34367754Smsmith */ 34483174Smsmith pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT; 34583174Smsmith if ((pa & (alignment - 1)) != 0) 34683174Smsmith return (NULL); 34783174Smsmith size = npages << PAGE_SHIFT; 34883174Smsmith if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) 34967754Smsmith return (NULL); 35067754Smsmith 35167754Smsmith /* 35267754Smsmith * Look for an existing reservation. 35367754Smsmith */ 35467754Smsmith mpred = vm_radix_lookup_le(&object->rtree, pindex); 35567754Smsmith if (mpred != NULL) { 35667754Smsmith KASSERT(mpred->pindex < pindex, 35767754Smsmith ("vm_reserv_alloc_contig: pindex already allocated")); 35867754Smsmith rv = vm_reserv_from_page(mpred); 35983174Smsmith if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) 36067754Smsmith goto found; 36183174Smsmith msucc = TAILQ_NEXT(mpred, listq); 36267754Smsmith } else 36367754Smsmith msucc = TAILQ_FIRST(&object->memq); 36467754Smsmith if (msucc != NULL) { 36583174Smsmith KASSERT(msucc->pindex > pindex, 36683174Smsmith ("vm_reserv_alloc_page: pindex already allocated")); 36783174Smsmith rv = vm_reserv_from_page(msucc); 36883174Smsmith if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) 36967754Smsmith goto found; 37067754Smsmith } 37167754Smsmith 37267754Smsmith /* 37367754Smsmith * Could at least one reservation fit between the first index to the 37483174Smsmith * left that can be used and the first index to the right that cannot 37567754Smsmith * be used? 37683174Smsmith */ 37783174Smsmith first = pindex - VM_RESERV_INDEX(object, pindex); 37883174Smsmith if (mpred != NULL) { 37983174Smsmith if ((rv = vm_reserv_from_page(mpred))->object != object) 38083174Smsmith leftcap = mpred->pindex + 1; 38167754Smsmith else 38267754Smsmith leftcap = rv->pindex + VM_LEVEL_0_NPAGES; 38367754Smsmith if (leftcap > first) 38467754Smsmith return (NULL); 38567754Smsmith } 38667754Smsmith minpages = VM_RESERV_INDEX(object, pindex) + npages; 38767754Smsmith maxpages = roundup2(minpages, VM_LEVEL_0_NPAGES); 38867754Smsmith allocpages = maxpages; 38967754Smsmith if (msucc != NULL) { 39067754Smsmith if ((rv = vm_reserv_from_page(msucc))->object != object) 39183174Smsmith rightcap = msucc->pindex; 39267754Smsmith else 39383174Smsmith rightcap = rv->pindex; 39467754Smsmith if (first + maxpages > rightcap) { 39567754Smsmith if (maxpages == VM_LEVEL_0_NPAGES) 39667754Smsmith return (NULL); 39767754Smsmith allocpages = minpages; 39883174Smsmith } 39983174Smsmith } 40083174Smsmith 40183174Smsmith /* 40267754Smsmith * Would the last new reservation extend past the end of the object? 40367754Smsmith */ 40467754Smsmith if (first + maxpages > object->size) { 40567754Smsmith /* 40667754Smsmith * Don't allocate the last new reservation if the object is a 40783174Smsmith * vnode or backed by another object that is a vnode. 40867754Smsmith */ 40983174Smsmith if (object->type == OBJT_VNODE || 41083174Smsmith (object->backing_object != NULL && 41183174Smsmith object->backing_object->type == OBJT_VNODE)) { 41283174Smsmith if (maxpages == VM_LEVEL_0_NPAGES) 41383174Smsmith return (NULL); 41467754Smsmith allocpages = minpages; 41567754Smsmith } 41667754Smsmith /* Speculate that the object may grow. */ 41767754Smsmith } 41867754Smsmith 41967754Smsmith /* 42067754Smsmith * Allocate and populate the new reservations. The alignment and 42167754Smsmith * boundary specified for this allocation may be different from the 42267754Smsmith * alignment and boundary specified for the requested pages. For 42367754Smsmith * instance, the specified index may not be the first page within the 42483174Smsmith * first new reservation. 42567754Smsmith */ 42683174Smsmith m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment, 42767754Smsmith VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); 42867754Smsmith if (m == NULL) 42967754Smsmith return (NULL); 43067754Smsmith m_ret = NULL; 43183174Smsmith index = VM_RESERV_INDEX(object, pindex); 43283174Smsmith do { 43383174Smsmith rv = vm_reserv_from_page(m); 43483174Smsmith KASSERT(rv->pages == m, 43567754Smsmith ("vm_reserv_alloc_contig: reserv %p's pages is corrupted", 43667754Smsmith rv)); 43767754Smsmith KASSERT(rv->object == NULL, 43867754Smsmith ("vm_reserv_alloc_contig: reserv %p isn't free", rv)); 43967754Smsmith LIST_INSERT_HEAD(&object->rvq, rv, objq); 44083174Smsmith rv->object = object; 44167754Smsmith rv->pindex = first; 44283174Smsmith KASSERT(rv->popcnt == 0, 44383174Smsmith ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted", 44483174Smsmith rv)); 44583174Smsmith KASSERT(!rv->inpartpopq, 44683174Smsmith ("vm_reserv_alloc_contig: reserv %p's inpartpopq is TRUE", 44767754Smsmith rv)); 44867754Smsmith n = ulmin(VM_LEVEL_0_NPAGES - index, npages); 44967754Smsmith for (i = 0; i < n; i++) 45067754Smsmith vm_reserv_populate(rv); 45167754Smsmith npages -= n; 45267754Smsmith if (m_ret == NULL) { 45367754Smsmith m_ret = &rv->pages[index]; 45467754Smsmith index = 0; 45567754Smsmith } 45683174Smsmith m += VM_LEVEL_0_NPAGES; 45767754Smsmith first += VM_LEVEL_0_NPAGES; 45883174Smsmith allocpages -= VM_LEVEL_0_NPAGES; 45967754Smsmith } while (allocpages > 0); 46067754Smsmith return (m_ret); 46183174Smsmith 46283174Smsmith /* 46367754Smsmith * Found a matching reservation. 46467754Smsmith */ 46567754Smsmithfound: 46667754Smsmith index = VM_RESERV_INDEX(object, pindex); 46767754Smsmith /* Does the allocation fit within the reservation? */ 46867754Smsmith if (index + npages > VM_LEVEL_0_NPAGES) 46967754Smsmith return (NULL); 47083174Smsmith m = &rv->pages[index]; 47167754Smsmith pa = VM_PAGE_TO_PHYS(m); 47283174Smsmith if (pa < low || pa + size > high || (pa & (alignment - 1)) != 0 || 47383174Smsmith ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) 47483174Smsmith return (NULL); 47583174Smsmith /* Handle vm_page_rename(m, new_object, ...). */ 47683174Smsmith for (i = 0; i < npages; i++) 47767754Smsmith if ((rv->pages[index + i].flags & (PG_CACHED | PG_FREE)) == 0) 47867754Smsmith return (NULL); 47967754Smsmith for (i = 0; i < npages; i++) 48067754Smsmith vm_reserv_populate(rv); 48167754Smsmith return (m); 48267754Smsmith} 48367754Smsmith 48467754Smsmith/* 48567754Smsmith * Allocates a page from an existing or newly-created reservation. 48667754Smsmith * 48783174Smsmith * The page "mpred" must immediately precede the offset "pindex" within the 48867754Smsmith * specified object. 48983174Smsmith * 49067754Smsmith * The object and free page queue must be locked. 49167754Smsmith */ 49267754Smsmithvm_page_t 49383174Smsmithvm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) 49483174Smsmith{ 49583174Smsmith vm_page_t m, msucc; 49683174Smsmith vm_pindex_t first, leftcap, rightcap; 49783174Smsmith vm_reserv_t rv; 49883174Smsmith 49983174Smsmith mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 50083174Smsmith VM_OBJECT_ASSERT_WLOCKED(object); 50183174Smsmith 50283174Smsmith /* 50383174Smsmith * Is a reservation fundamentally impossible? 50483174Smsmith */ 50567754Smsmith if (pindex < VM_RESERV_INDEX(object, pindex) || 50667754Smsmith pindex >= object->size) 50767754Smsmith return (NULL); 50867754Smsmith 50967754Smsmith /* 51067754Smsmith * Look for an existing reservation. 51167754Smsmith */ 51283174Smsmith if (mpred != NULL) { 51367754Smsmith KASSERT(mpred->object == object, 51483174Smsmith ("vm_reserv_alloc_page: object doesn't contain mpred")); 51583174Smsmith KASSERT(mpred->pindex < pindex, 51683174Smsmith ("vm_reserv_alloc_page: mpred doesn't precede pindex")); 51783174Smsmith rv = vm_reserv_from_page(mpred); 51883174Smsmith if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) 51967754Smsmith goto found; 52067754Smsmith msucc = TAILQ_NEXT(mpred, listq); 52167754Smsmith } else 52267754Smsmith msucc = TAILQ_FIRST(&object->memq); 52367754Smsmith if (msucc != NULL) { 52467754Smsmith KASSERT(msucc->pindex > pindex, 52567754Smsmith ("vm_reserv_alloc_page: msucc doesn't succeed pindex")); 52667754Smsmith rv = vm_reserv_from_page(msucc); 52767754Smsmith if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) 52867754Smsmith goto found; 52983174Smsmith } 53067754Smsmith 53183174Smsmith /* 53271867Smsmith * Could a reservation fit between the first index to the left that 53367754Smsmith * can be used and the first index to the right that cannot be used? 53467754Smsmith */ 53583174Smsmith first = pindex - VM_RESERV_INDEX(object, pindex); 53691116Smsmith if (mpred != NULL) { 53791116Smsmith if ((rv = vm_reserv_from_page(mpred))->object != object) 53867754Smsmith leftcap = mpred->pindex + 1; 53967754Smsmith else 54067754Smsmith leftcap = rv->pindex + VM_LEVEL_0_NPAGES; 54167754Smsmith if (leftcap > first) 54267754Smsmith return (NULL); 54367754Smsmith } 54467754Smsmith if (msucc != NULL) { 54583174Smsmith if ((rv = vm_reserv_from_page(msucc))->object != object) 54667754Smsmith rightcap = msucc->pindex; 54783174Smsmith else 54883174Smsmith rightcap = rv->pindex; 54983174Smsmith if (first + VM_LEVEL_0_NPAGES > rightcap) 55083174Smsmith return (NULL); 55183174Smsmith } 55267754Smsmith 55367754Smsmith /* 55467754Smsmith * Would a new reservation extend past the end of the object? 55567754Smsmith */ 55667754Smsmith if (first + VM_LEVEL_0_NPAGES > object->size) { 55767754Smsmith /* 55867754Smsmith * Don't allocate a new reservation if the object is a vnode or 55967754Smsmith * backed by another object that is a vnode. 56067754Smsmith */ 56167754Smsmith if (object->type == OBJT_VNODE || 56283174Smsmith (object->backing_object != NULL && 56367754Smsmith object->backing_object->type == OBJT_VNODE)) 56483174Smsmith return (NULL); 56567754Smsmith /* Speculate that the object may grow. */ 56667754Smsmith } 56767754Smsmith 56883174Smsmith /* 56983174Smsmith * Allocate and populate the new reservation. 57067754Smsmith */ 57167754Smsmith m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER); 57267754Smsmith if (m == NULL) 57367754Smsmith return (NULL); 57483174Smsmith rv = vm_reserv_from_page(m); 57567754Smsmith KASSERT(rv->pages == m, 57683174Smsmith ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv)); 57767754Smsmith KASSERT(rv->object == NULL, 57867754Smsmith ("vm_reserv_alloc_page: reserv %p isn't free", rv)); 57977424Smsmith LIST_INSERT_HEAD(&object->rvq, rv, objq); 58067754Smsmith rv->object = object; 58167754Smsmith rv->pindex = first; 58267754Smsmith KASSERT(rv->popcnt == 0, 58383174Smsmith ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); 58467754Smsmith KASSERT(!rv->inpartpopq, 58567754Smsmith ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", rv)); 58667754Smsmith vm_reserv_populate(rv); 58767754Smsmith return (&rv->pages[VM_RESERV_INDEX(object, pindex)]); 58867754Smsmith 58967754Smsmith /* 59067754Smsmith * Found a matching reservation. 59167754Smsmith */ 59267754Smsmithfound: 59377424Smsmith m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; 59467754Smsmith /* Handle vm_page_rename(m, new_object, ...). */ 59567754Smsmith if ((m->flags & (PG_CACHED | PG_FREE)) == 0) 59667754Smsmith return (NULL); 59767754Smsmith vm_reserv_populate(rv); 59867754Smsmith return (m); 59967754Smsmith} 60067754Smsmith 60167754Smsmith/* 60267754Smsmith * Breaks all reservations belonging to the given object. 60367754Smsmith */ 60467754Smsmithvoid 60567754Smsmithvm_reserv_break_all(vm_object_t object) 60667754Smsmith{ 60782367Smsmith vm_reserv_t rv; 60867754Smsmith int i; 60967754Smsmith 61067754Smsmith mtx_lock(&vm_page_queue_free_mtx); 61167754Smsmith while ((rv = LIST_FIRST(&object->rvq)) != NULL) { 61267754Smsmith KASSERT(rv->object == object, 61367754Smsmith ("vm_reserv_break_all: reserv %p is corrupted", rv)); 61467754Smsmith if (rv->inpartpopq) { 61567754Smsmith TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 61667754Smsmith rv->inpartpopq = FALSE; 61767754Smsmith } 61867754Smsmith LIST_REMOVE(rv, objq); 61967754Smsmith rv->object = NULL; 62067754Smsmith for (i = 0; i < VM_LEVEL_0_NPAGES; i++) { 62167754Smsmith if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 62267754Smsmith vm_phys_free_pages(&rv->pages[i], 0); 62367754Smsmith else 62467754Smsmith rv->popcnt--; 62567754Smsmith } 62667754Smsmith KASSERT(rv->popcnt == 0, 62767754Smsmith ("vm_reserv_break_all: reserv %p's popcnt is corrupted", 62867754Smsmith rv)); 62967754Smsmith vm_reserv_broken++; 63067754Smsmith } 63167754Smsmith mtx_unlock(&vm_page_queue_free_mtx); 63267754Smsmith} 63367754Smsmith 63467754Smsmith/* 63567754Smsmith * Frees the given page if it belongs to a reservation. Returns TRUE if the 63667754Smsmith * page is freed and FALSE otherwise. 63767754Smsmith * 63867754Smsmith * The free page queue lock must be held. 63967754Smsmith */ 64067754Smsmithboolean_t 64167754Smsmithvm_reserv_free_page(vm_page_t m) 64267754Smsmith{ 64367754Smsmith vm_reserv_t rv; 64467754Smsmith 64567754Smsmith mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 64667754Smsmith rv = vm_reserv_from_page(m); 64767754Smsmith if (rv->object == NULL) 64867754Smsmith return (FALSE); 64967754Smsmith if ((m->flags & PG_CACHED) != 0 && m->pool != VM_FREEPOOL_CACHE) 65091116Smsmith vm_phys_set_pool(VM_FREEPOOL_CACHE, rv->pages, 65191116Smsmith VM_LEVEL_0_ORDER); 65267754Smsmith vm_reserv_depopulate(rv); 65367754Smsmith return (TRUE); 65467754Smsmith} 65567754Smsmith 65667754Smsmith/* 65767754Smsmith * Initializes the reservation management system. Specifically, initializes 65867754Smsmith * the reservation array. 65991116Smsmith * 66091116Smsmith * Requires that vm_page_array and first_page are initialized! 66167754Smsmith */ 66267754Smsmithvoid 66367754Smsmithvm_reserv_init(void) 66467754Smsmith{ 66567754Smsmith vm_paddr_t paddr; 66667754Smsmith int i; 66767754Smsmith 66891116Smsmith /* 66991116Smsmith * Initialize the reservation array. Specifically, initialize the 67067754Smsmith * "pages" field for every element that has an underlying superpage. 67167754Smsmith */ 67291116Smsmith for (i = 0; phys_avail[i + 1] != 0; i += 2) { 67391116Smsmith paddr = roundup2(phys_avail[i], VM_LEVEL_0_SIZE); 67467754Smsmith while (paddr + VM_LEVEL_0_SIZE <= phys_avail[i + 1]) { 67567754Smsmith vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages = 67667754Smsmith PHYS_TO_VM_PAGE(paddr); 67767754Smsmith paddr += VM_LEVEL_0_SIZE; 67867754Smsmith } 67967754Smsmith } 68067754Smsmith} 68167754Smsmith 68267754Smsmith/* 68367754Smsmith * Returns a reservation level if the given page belongs to a fully-populated 68467754Smsmith * reservation and -1 otherwise. 68567754Smsmith */ 68667754Smsmithint 68767754Smsmithvm_reserv_level_iffullpop(vm_page_t m) 68867754Smsmith{ 68967754Smsmith vm_reserv_t rv; 69067754Smsmith 69167754Smsmith rv = vm_reserv_from_page(m); 69267754Smsmith return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1); 69367754Smsmith} 69467754Smsmith 69567754Smsmith/* 69667754Smsmith * Prepare for the reactivation of a cached page. 69767754Smsmith * 69867754Smsmith * First, suppose that the given page "m" was allocated individually, i.e., not 69967754Smsmith * as part of a reservation, and cached. Then, suppose a reservation 70067754Smsmith * containing "m" is allocated by the same object. Although "m" and the 70167754Smsmith * reservation belong to the same object, "m"'s pindex may not match the 70267754Smsmith * reservation's. 70367754Smsmith * 70467754Smsmith * The free page queue must be locked. 70567754Smsmith */ 70667754Smsmithboolean_t 70767754Smsmithvm_reserv_reactivate_page(vm_page_t m) 70867754Smsmith{ 70967754Smsmith vm_reserv_t rv; 71067754Smsmith int i, m_index; 71167754Smsmith 71267754Smsmith mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 71367754Smsmith rv = vm_reserv_from_page(m); 71467754Smsmith if (rv->object == NULL) 71567754Smsmith return (FALSE); 716 KASSERT((m->flags & PG_CACHED) != 0, 717 ("vm_reserv_uncache_page: page %p is not cached", m)); 718 if (m->object == rv->object && 719 m->pindex - rv->pindex == VM_RESERV_INDEX(m->object, m->pindex)) 720 vm_reserv_populate(rv); 721 else { 722 KASSERT(rv->inpartpopq, 723 ("vm_reserv_uncache_page: reserv %p's inpartpopq is FALSE", 724 rv)); 725 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 726 rv->inpartpopq = FALSE; 727 LIST_REMOVE(rv, objq); 728 rv->object = NULL; 729 /* Don't vm_phys_free_pages(m, 0). */ 730 m_index = m - rv->pages; 731 for (i = 0; i < m_index; i++) { 732 if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 733 vm_phys_free_pages(&rv->pages[i], 0); 734 else 735 rv->popcnt--; 736 } 737 for (i++; i < VM_LEVEL_0_NPAGES; i++) { 738 if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 739 vm_phys_free_pages(&rv->pages[i], 0); 740 else 741 rv->popcnt--; 742 } 743 KASSERT(rv->popcnt == 0, 744 ("vm_reserv_uncache_page: reserv %p's popcnt is corrupted", 745 rv)); 746 vm_reserv_broken++; 747 } 748 return (TRUE); 749} 750 751/* 752 * Breaks the given partially-populated reservation, releasing its cached and 753 * free pages to the physical memory allocator. 754 * 755 * The free page queue lock must be held. 756 */ 757static void 758vm_reserv_reclaim(vm_reserv_t rv) 759{ 760 int i; 761 762 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 763 KASSERT(rv->inpartpopq, 764 ("vm_reserv_reclaim: reserv %p's inpartpopq is corrupted", rv)); 765 TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 766 rv->inpartpopq = FALSE; 767 KASSERT(rv->object != NULL, 768 ("vm_reserv_reclaim: reserv %p is free", rv)); 769 LIST_REMOVE(rv, objq); 770 rv->object = NULL; 771 for (i = 0; i < VM_LEVEL_0_NPAGES; i++) { 772 if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 773 vm_phys_free_pages(&rv->pages[i], 0); 774 else 775 rv->popcnt--; 776 } 777 KASSERT(rv->popcnt == 0, 778 ("vm_reserv_reclaim: reserv %p's popcnt is corrupted", rv)); 779 vm_reserv_reclaimed++; 780} 781 782/* 783 * Breaks the reservation at the head of the partially-populated reservation 784 * queue, releasing its cached and free pages to the physical memory 785 * allocator. Returns TRUE if a reservation is broken and FALSE otherwise. 786 * 787 * The free page queue lock must be held. 788 */ 789boolean_t 790vm_reserv_reclaim_inactive(void) 791{ 792 vm_reserv_t rv; 793 794 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 795 if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) { 796 vm_reserv_reclaim(rv); 797 return (TRUE); 798 } 799 return (FALSE); 800} 801 802/* 803 * Searches the partially-populated reservation queue for the least recently 804 * active reservation with unused pages, i.e., cached or free, that satisfy the 805 * given request for contiguous physical memory. If a satisfactory reservation 806 * is found, it is broken. Returns TRUE if a reservation is broken and FALSE 807 * otherwise. 808 * 809 * The free page queue lock must be held. 810 */ 811boolean_t 812vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 813 u_long alignment, vm_paddr_t boundary) 814{ 815 vm_paddr_t pa, pa_length, size; 816 vm_reserv_t rv; 817 int i; 818 819 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 820 if (npages > VM_LEVEL_0_NPAGES - 1) 821 return (FALSE); 822 size = npages << PAGE_SHIFT; 823 TAILQ_FOREACH(rv, &vm_rvq_partpop, partpopq) { 824 pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]); 825 if (pa + PAGE_SIZE - size < low) { 826 /* this entire reservation is too low; go to next */ 827 continue; 828 } 829 pa_length = 0; 830 for (i = 0; i < VM_LEVEL_0_NPAGES; i++) 831 if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) { 832 pa_length += PAGE_SIZE; 833 if (pa_length == PAGE_SIZE) { 834 pa = VM_PAGE_TO_PHYS(&rv->pages[i]); 835 if (pa + size > high) { 836 /* skip to next reservation */ 837 break; 838 } else if (pa < low || 839 (pa & (alignment - 1)) != 0 || 840 ((pa ^ (pa + size - 1)) & 841 ~(boundary - 1)) != 0) 842 pa_length = 0; 843 } 844 if (pa_length >= size) { 845 vm_reserv_reclaim(rv); 846 return (TRUE); 847 } 848 } else 849 pa_length = 0; 850 } 851 return (FALSE); 852} 853 854/* 855 * Transfers the reservation underlying the given page to a new object. 856 * 857 * The object must be locked. 858 */ 859void 860vm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, 861 vm_pindex_t old_object_offset) 862{ 863 vm_reserv_t rv; 864 865 VM_OBJECT_ASSERT_WLOCKED(new_object); 866 rv = vm_reserv_from_page(m); 867 if (rv->object == old_object) { 868 mtx_lock(&vm_page_queue_free_mtx); 869 if (rv->object == old_object) { 870 LIST_REMOVE(rv, objq); 871 LIST_INSERT_HEAD(&new_object->rvq, rv, objq); 872 rv->object = new_object; 873 rv->pindex -= old_object_offset; 874 } 875 mtx_unlock(&vm_page_queue_free_mtx); 876 } 877} 878 879/* 880 * Allocates the virtual and physical memory required by the reservation 881 * management system's data structures, in particular, the reservation array. 882 */ 883vm_paddr_t 884vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water) 885{ 886 vm_paddr_t new_end; 887 size_t size; 888 889 /* 890 * Calculate the size (in bytes) of the reservation array. Round up 891 * from "high_water" because every small page is mapped to an element 892 * in the reservation array based on its physical address. Thus, the 893 * number of elements in the reservation array can be greater than the 894 * number of superpages. 895 */ 896 size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv); 897 898 /* 899 * Allocate and map the physical memory for the reservation array. The 900 * next available virtual address is returned by reference. 901 */ 902 new_end = end - round_page(size); 903 vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end, 904 VM_PROT_READ | VM_PROT_WRITE); 905 bzero(vm_reserv_array, size); 906 907 /* 908 * Return the next available physical address. 909 */ 910 return (new_end); 911} 912 913#endif /* VM_NRESERVLEVEL > 0 */ 914