1174982Salc/*- 2174982Salc * Copyright (c) 2002-2006 Rice University 3177956Salc * Copyright (c) 2007-2008 Alan L. Cox <alc@cs.rice.edu> 4174982Salc * All rights reserved. 5174982Salc * 6174982Salc * This software was developed for the FreeBSD Project by Alan L. Cox, 7174982Salc * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8174982Salc * 9174982Salc * Redistribution and use in source and binary forms, with or without 10174982Salc * modification, are permitted provided that the following conditions 11174982Salc * are met: 12174982Salc * 1. Redistributions of source code must retain the above copyright 13174982Salc * notice, this list of conditions and the following disclaimer. 14174982Salc * 2. Redistributions in binary form must reproduce the above copyright 15174982Salc * notice, this list of conditions and the following disclaimer in the 16174982Salc * documentation and/or other materials provided with the distribution. 17174982Salc * 18174982Salc * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19174982Salc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20174982Salc * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21174982Salc * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22174982Salc * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23174982Salc * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24174982Salc * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25174982Salc * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26174982Salc * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27174982Salc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28174982Salc * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29174982Salc * POSSIBILITY OF SUCH DAMAGE. 30174982Salc */ 31174982Salc 32174982Salc/* 33174982Salc * Superpage reservation management module 34174982Salc */ 35174982Salc 36174982Salc#include <sys/cdefs.h> 37174982Salc__FBSDID("$FreeBSD$"); 38174982Salc 39174982Salc#include "opt_vm.h" 40174982Salc 41174982Salc#include <sys/param.h> 42174982Salc#include <sys/kernel.h> 43174982Salc#include <sys/lock.h> 44174982Salc#include <sys/malloc.h> 45174982Salc#include <sys/mutex.h> 46174982Salc#include <sys/queue.h> 47174982Salc#include <sys/sbuf.h> 48174982Salc#include <sys/sysctl.h> 49174982Salc#include <sys/systm.h> 50174982Salc 51174982Salc#include <vm/vm.h> 52174982Salc#include <vm/vm_param.h> 53174982Salc#include <vm/vm_object.h> 54174982Salc#include <vm/vm_page.h> 55174982Salc#include <vm/vm_phys.h> 56174982Salc#include <vm/vm_reserv.h> 57174982Salc 58174982Salc/* 59174982Salc * The reservation system supports the speculative allocation of large physical 60174982Salc * pages ("superpages"). Speculative allocation enables the fully-automatic 61174982Salc * utilization of superpages by the virtual memory system. In other words, no 62174982Salc * programmatic directives are required to use superpages. 63174982Salc */ 64174982Salc 65174982Salc#if VM_NRESERVLEVEL > 0 66174982Salc 67174982Salc/* 68174982Salc * The number of small pages that are contained in a level 0 reservation 69174982Salc */ 70174982Salc#define VM_LEVEL_0_NPAGES (1 << VM_LEVEL_0_ORDER) 71174982Salc 72174982Salc/* 73174982Salc * The number of bits by which a physical address is shifted to obtain the 74174982Salc * reservation number 75174982Salc */ 76174982Salc#define VM_LEVEL_0_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT) 77174982Salc 78174982Salc/* 79174982Salc * The size of a level 0 reservation in bytes 80174982Salc */ 81174982Salc#define VM_LEVEL_0_SIZE (1 << VM_LEVEL_0_SHIFT) 82174982Salc 83174982Salc/* 84174982Salc * Computes the index of the small page underlying the given (object, pindex) 85174982Salc * within the reservation's array of small pages. 86174982Salc */ 87174982Salc#define VM_RESERV_INDEX(object, pindex) \ 88174982Salc (((object)->pg_color + (pindex)) & (VM_LEVEL_0_NPAGES - 1)) 89174982Salc 90174982Salc/* 91174982Salc * The reservation structure 92174982Salc * 93174982Salc * A reservation structure is constructed whenever a large physical page is 94174982Salc * speculatively allocated to an object. The reservation provides the small 95174982Salc * physical pages for the range [pindex, pindex + VM_LEVEL_0_NPAGES) of offsets 96174982Salc * within that object. The reservation's "popcnt" tracks the number of these 97174982Salc * small physical pages that are in use at any given time. When and if the 98174982Salc * reservation is not fully utilized, it appears in the queue of partially- 99174982Salc * populated reservations. The reservation always appears on the containing 100174982Salc * object's list of reservations. 101174982Salc * 102174982Salc * A partially-populated reservation can be broken and reclaimed at any time. 103174982Salc */ 104174982Salcstruct vm_reserv { 105174982Salc TAILQ_ENTRY(vm_reserv) partpopq; 106174982Salc LIST_ENTRY(vm_reserv) objq; 107174982Salc vm_object_t object; /* containing object */ 108174982Salc vm_pindex_t pindex; /* offset within object */ 109174982Salc vm_page_t pages; /* first page of a superpage */ 110174982Salc int popcnt; /* # of pages in use */ 111174982Salc char inpartpopq; 112174982Salc}; 113174982Salc 114174982Salc/* 115174982Salc * The reservation array 116174982Salc * 117174982Salc * This array is analoguous in function to vm_page_array. It differs in the 118174982Salc * respect that it may contain a greater number of useful reservation 119174982Salc * structures than there are (physical) superpages. These "invalid" 120174982Salc * reservation structures exist to trade-off space for time in the 121174982Salc * implementation of vm_reserv_from_page(). Invalid reservation structures are 122174982Salc * distinguishable from "valid" reservation structures by inspecting the 123174982Salc * reservation's "pages" field. Invalid reservation structures have a NULL 124174982Salc * "pages" field. 125174982Salc * 126174982Salc * vm_reserv_from_page() maps a small (physical) page to an element of this 127174982Salc * array by computing a physical reservation number from the page's physical 128174982Salc * address. The physical reservation number is used as the array index. 129174982Salc * 130174982Salc * An "active" reservation is a valid reservation structure that has a non-NULL 131174982Salc * "object" field and a non-zero "popcnt" field. In other words, every active 132174982Salc * reservation belongs to a particular object. Moreover, every active 133174982Salc * reservation has an entry in the containing object's list of reservations. 134174982Salc */ 135174982Salcstatic vm_reserv_t vm_reserv_array; 136174982Salc 137174982Salc/* 138174982Salc * The partially-populated reservation queue 139174982Salc * 140174982Salc * This queue enables the fast recovery of an unused cached or free small page 141190912Salc * from a partially-populated reservation. The reservation at the head of 142190912Salc * this queue is the least-recently-changed, partially-populated reservation. 143174982Salc * 144174982Salc * Access to this queue is synchronized by the free page queue lock. 145174982Salc */ 146174982Salcstatic TAILQ_HEAD(, vm_reserv) vm_rvq_partpop = 147174982Salc TAILQ_HEAD_INITIALIZER(vm_rvq_partpop); 148174982Salc 149174982Salcstatic SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info"); 150174982Salc 151174982Salcstatic long vm_reserv_broken; 152174982SalcSYSCTL_LONG(_vm_reserv, OID_AUTO, broken, CTLFLAG_RD, 153174982Salc &vm_reserv_broken, 0, "Cumulative number of broken reservations"); 154174982Salc 155174982Salcstatic long vm_reserv_freed; 156174982SalcSYSCTL_LONG(_vm_reserv, OID_AUTO, freed, CTLFLAG_RD, 157174982Salc &vm_reserv_freed, 0, "Cumulative number of freed reservations"); 158174982Salc 159174982Salcstatic int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS); 160174982Salc 161174982SalcSYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, 162174982Salc sysctl_vm_reserv_partpopq, "A", "Partially-populated reservation queues"); 163174982Salc 164174982Salcstatic long vm_reserv_reclaimed; 165174982SalcSYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, 166174982Salc &vm_reserv_reclaimed, 0, "Cumulative number of reclaimed reservations"); 167174982Salc 168174982Salcstatic void vm_reserv_depopulate(vm_reserv_t rv); 169174982Salcstatic vm_reserv_t vm_reserv_from_page(vm_page_t m); 170174982Salcstatic boolean_t vm_reserv_has_pindex(vm_reserv_t rv, 171174982Salc vm_pindex_t pindex); 172174982Salcstatic void vm_reserv_populate(vm_reserv_t rv); 173177956Salcstatic void vm_reserv_reclaim(vm_reserv_t rv); 174174982Salc 175174982Salc/* 176174982Salc * Describes the current state of the partially-populated reservation queue. 177174982Salc */ 178174982Salcstatic int 179174982Salcsysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS) 180174982Salc{ 181174982Salc struct sbuf sbuf; 182174982Salc vm_reserv_t rv; 183174982Salc int counter, error, level, unused_pages; 184174982Salc 185217916Smdf error = sysctl_wire_old_buffer(req, 0); 186217916Smdf if (error != 0) 187217916Smdf return (error); 188212750Smdf sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 189174982Salc sbuf_printf(&sbuf, "\nLEVEL SIZE NUMBER\n\n"); 190174982Salc for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) { 191174982Salc counter = 0; 192174982Salc unused_pages = 0; 193174982Salc mtx_lock(&vm_page_queue_free_mtx); 194174982Salc TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) { 195174982Salc counter++; 196174982Salc unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; 197174982Salc } 198174982Salc mtx_unlock(&vm_page_queue_free_mtx); 199214564Salc sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level, 200215093Salc unused_pages * ((int)PAGE_SIZE / 1024), counter); 201174982Salc } 202212750Smdf error = sbuf_finish(&sbuf); 203174982Salc sbuf_delete(&sbuf); 204174982Salc return (error); 205174982Salc} 206174982Salc 207174982Salc/* 208174982Salc * Reduces the given reservation's population count. If the population count 209174982Salc * becomes zero, the reservation is destroyed. Additionally, moves the 210190912Salc * reservation to the tail of the partially-populated reservations queue if the 211174982Salc * population count is non-zero. 212174982Salc * 213174982Salc * The free page queue lock must be held. 214174982Salc */ 215174982Salcstatic void 216174982Salcvm_reserv_depopulate(vm_reserv_t rv) 217174982Salc{ 218174982Salc 219174982Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 220174982Salc KASSERT(rv->object != NULL, 221174982Salc ("vm_reserv_depopulate: reserv %p is free", rv)); 222174982Salc KASSERT(rv->popcnt > 0, 223174982Salc ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv)); 224174982Salc if (rv->inpartpopq) { 225174982Salc TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 226174982Salc rv->inpartpopq = FALSE; 227174982Salc } 228174982Salc rv->popcnt--; 229174982Salc if (rv->popcnt == 0) { 230174982Salc LIST_REMOVE(rv, objq); 231174982Salc rv->object = NULL; 232174982Salc vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER); 233174982Salc vm_reserv_freed++; 234174982Salc } else { 235174982Salc rv->inpartpopq = TRUE; 236190912Salc TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); 237174982Salc } 238174982Salc} 239174982Salc 240174982Salc/* 241174982Salc * Returns the reservation to which the given page might belong. 242174982Salc */ 243174982Salcstatic __inline vm_reserv_t 244174982Salcvm_reserv_from_page(vm_page_t m) 245174982Salc{ 246174982Salc 247174982Salc return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]); 248174982Salc} 249174982Salc 250174982Salc/* 251174982Salc * Returns TRUE if the given reservation contains the given page index and 252174982Salc * FALSE otherwise. 253174982Salc */ 254174982Salcstatic __inline boolean_t 255174982Salcvm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex) 256174982Salc{ 257174982Salc 258174982Salc return (((pindex - rv->pindex) & ~(VM_LEVEL_0_NPAGES - 1)) == 0); 259174982Salc} 260174982Salc 261174982Salc/* 262174982Salc * Increases the given reservation's population count. Moves the reservation 263174982Salc * to the tail of the partially-populated reservation queue. 264174982Salc * 265174982Salc * The free page queue must be locked. 266174982Salc */ 267174982Salcstatic void 268174982Salcvm_reserv_populate(vm_reserv_t rv) 269174982Salc{ 270174982Salc 271174982Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 272174982Salc KASSERT(rv->object != NULL, 273174982Salc ("vm_reserv_populate: reserv %p is free", rv)); 274174982Salc KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES, 275174982Salc ("vm_reserv_populate: reserv %p is already full", rv)); 276174982Salc if (rv->inpartpopq) { 277174982Salc TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 278174982Salc rv->inpartpopq = FALSE; 279174982Salc } 280174982Salc rv->popcnt++; 281174982Salc if (rv->popcnt < VM_LEVEL_0_NPAGES) { 282174982Salc rv->inpartpopq = TRUE; 283174982Salc TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq); 284174982Salc } 285174982Salc} 286174982Salc 287174982Salc/* 288174982Salc * Allocates a page from an existing or newly-created reservation. 289174982Salc * 290174982Salc * The object and free page queue must be locked. 291174982Salc */ 292174982Salcvm_page_t 293174982Salcvm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex) 294174982Salc{ 295174982Salc vm_page_t m, mpred, msucc; 296174982Salc vm_pindex_t first, leftcap, rightcap; 297174982Salc vm_reserv_t rv; 298174982Salc 299174982Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 300174982Salc 301174982Salc /* 302174982Salc * Is a reservation fundamentally not possible? 303174982Salc */ 304174982Salc VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 305174982Salc if (pindex < VM_RESERV_INDEX(object, pindex) || 306174982Salc pindex >= object->size) 307174982Salc return (NULL); 308174982Salc 309174982Salc /* 310174982Salc * Look for an existing reservation. 311174982Salc */ 312174982Salc msucc = NULL; 313174982Salc mpred = object->root; 314174982Salc while (mpred != NULL) { 315174982Salc KASSERT(mpred->pindex != pindex, 316174982Salc ("vm_reserv_alloc_page: pindex already allocated")); 317174982Salc rv = vm_reserv_from_page(mpred); 318174982Salc if (rv->object == object && vm_reserv_has_pindex(rv, pindex)) { 319174982Salc m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; 320174982Salc /* Handle vm_page_rename(m, new_object, ...). */ 321174982Salc if ((m->flags & (PG_CACHED | PG_FREE)) == 0) 322174982Salc return (NULL); 323174982Salc vm_reserv_populate(rv); 324174982Salc return (m); 325174982Salc } else if (mpred->pindex < pindex) { 326174982Salc if (msucc != NULL || 327174982Salc (msucc = TAILQ_NEXT(mpred, listq)) == NULL) 328174982Salc break; 329174982Salc KASSERT(msucc->pindex != pindex, 330174982Salc ("vm_reserv_alloc_page: pindex already allocated")); 331174982Salc rv = vm_reserv_from_page(msucc); 332174982Salc if (rv->object == object && 333174982Salc vm_reserv_has_pindex(rv, pindex)) { 334174982Salc m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; 335174982Salc /* Handle vm_page_rename(m, new_object, ...). */ 336174982Salc if ((m->flags & (PG_CACHED | PG_FREE)) == 0) 337174982Salc return (NULL); 338174982Salc vm_reserv_populate(rv); 339174982Salc return (m); 340174982Salc } else if (pindex < msucc->pindex) 341174982Salc break; 342174982Salc } else if (msucc == NULL) { 343174982Salc msucc = mpred; 344174982Salc mpred = TAILQ_PREV(msucc, pglist, listq); 345174982Salc continue; 346174982Salc } 347174982Salc msucc = NULL; 348174982Salc mpred = object->root = vm_page_splay(pindex, object->root); 349174982Salc } 350174982Salc 351174982Salc /* 352174982Salc * Determine the first index to the left that can be used. 353174982Salc */ 354174982Salc if (mpred == NULL) 355174982Salc leftcap = 0; 356174982Salc else if ((rv = vm_reserv_from_page(mpred))->object != object) 357174982Salc leftcap = mpred->pindex + 1; 358174982Salc else 359174982Salc leftcap = rv->pindex + VM_LEVEL_0_NPAGES; 360174982Salc 361174982Salc /* 362174982Salc * Determine the first index to the right that cannot be used. 363174982Salc */ 364174982Salc if (msucc == NULL) 365174982Salc rightcap = pindex + VM_LEVEL_0_NPAGES; 366174982Salc else if ((rv = vm_reserv_from_page(msucc))->object != object) 367174982Salc rightcap = msucc->pindex; 368174982Salc else 369174982Salc rightcap = rv->pindex; 370174982Salc 371174982Salc /* 372174982Salc * Determine if a reservation fits between the first index to 373174982Salc * the left that can be used and the first index to the right 374174982Salc * that cannot be used. 375174982Salc */ 376174982Salc first = pindex - VM_RESERV_INDEX(object, pindex); 377174982Salc if (first < leftcap || first + VM_LEVEL_0_NPAGES > rightcap) 378174982Salc return (NULL); 379174982Salc 380174982Salc /* 381174982Salc * Would a new reservation extend past the end of the given object? 382174982Salc */ 383174982Salc if (object->size < first + VM_LEVEL_0_NPAGES) { 384174982Salc /* 385174982Salc * Don't allocate a new reservation if the object is a vnode or 386174982Salc * backed by another object that is a vnode. 387174982Salc */ 388174982Salc if (object->type == OBJT_VNODE || 389174982Salc (object->backing_object != NULL && 390174982Salc object->backing_object->type == OBJT_VNODE)) 391174982Salc return (NULL); 392174982Salc /* Speculate that the object may grow. */ 393174982Salc } 394174982Salc 395174982Salc /* 396174982Salc * Allocate a new reservation. 397174982Salc */ 398174982Salc m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER); 399174982Salc if (m != NULL) { 400174982Salc rv = vm_reserv_from_page(m); 401174982Salc KASSERT(rv->pages == m, 402174982Salc ("vm_reserv_alloc_page: reserv %p's pages is corrupted", 403174982Salc rv)); 404174982Salc KASSERT(rv->object == NULL, 405174982Salc ("vm_reserv_alloc_page: reserv %p isn't free", rv)); 406174982Salc LIST_INSERT_HEAD(&object->rvq, rv, objq); 407174982Salc rv->object = object; 408174982Salc rv->pindex = first; 409174982Salc KASSERT(rv->popcnt == 0, 410174982Salc ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", 411174982Salc rv)); 412174982Salc KASSERT(!rv->inpartpopq, 413174982Salc ("vm_reserv_alloc_page: reserv %p's inpartpopq is TRUE", 414174982Salc rv)); 415174982Salc vm_reserv_populate(rv); 416174982Salc m = &rv->pages[VM_RESERV_INDEX(object, pindex)]; 417174982Salc } 418174982Salc return (m); 419174982Salc} 420174982Salc 421174982Salc/* 422174982Salc * Breaks all reservations belonging to the given object. 423174982Salc */ 424174982Salcvoid 425174982Salcvm_reserv_break_all(vm_object_t object) 426174982Salc{ 427174982Salc vm_reserv_t rv; 428174982Salc int i; 429174982Salc 430174982Salc mtx_lock(&vm_page_queue_free_mtx); 431174982Salc while ((rv = LIST_FIRST(&object->rvq)) != NULL) { 432174982Salc KASSERT(rv->object == object, 433174982Salc ("vm_reserv_break_all: reserv %p is corrupted", rv)); 434174982Salc if (rv->inpartpopq) { 435174982Salc TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 436174982Salc rv->inpartpopq = FALSE; 437174982Salc } 438174982Salc LIST_REMOVE(rv, objq); 439174982Salc rv->object = NULL; 440174982Salc for (i = 0; i < VM_LEVEL_0_NPAGES; i++) { 441174982Salc if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 442174982Salc vm_phys_free_pages(&rv->pages[i], 0); 443174982Salc else 444174982Salc rv->popcnt--; 445174982Salc } 446174982Salc KASSERT(rv->popcnt == 0, 447174982Salc ("vm_reserv_break_all: reserv %p's popcnt is corrupted", 448174982Salc rv)); 449174982Salc vm_reserv_broken++; 450174982Salc } 451174982Salc mtx_unlock(&vm_page_queue_free_mtx); 452174982Salc} 453174982Salc 454174982Salc/* 455174982Salc * Frees the given page if it belongs to a reservation. Returns TRUE if the 456174982Salc * page is freed and FALSE otherwise. 457174982Salc * 458174982Salc * The free page queue lock must be held. 459174982Salc */ 460174982Salcboolean_t 461174982Salcvm_reserv_free_page(vm_page_t m) 462174982Salc{ 463174982Salc vm_reserv_t rv; 464174982Salc 465174982Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 466174982Salc rv = vm_reserv_from_page(m); 467234771Salc if (rv->object == NULL) 468234771Salc return (FALSE); 469234771Salc if ((m->flags & PG_CACHED) != 0 && m->pool != VM_FREEPOOL_CACHE) 470234771Salc vm_phys_set_pool(VM_FREEPOOL_CACHE, rv->pages, 471234771Salc VM_LEVEL_0_ORDER); 472234771Salc vm_reserv_depopulate(rv); 473234771Salc return (TRUE); 474174982Salc} 475174982Salc 476174982Salc/* 477174982Salc * Initializes the reservation management system. Specifically, initializes 478174982Salc * the reservation array. 479174982Salc * 480174982Salc * Requires that vm_page_array and first_page are initialized! 481174982Salc */ 482174982Salcvoid 483174982Salcvm_reserv_init(void) 484174982Salc{ 485174982Salc vm_paddr_t paddr; 486174982Salc int i; 487174982Salc 488174982Salc /* 489174982Salc * Initialize the reservation array. Specifically, initialize the 490174982Salc * "pages" field for every element that has an underlying superpage. 491174982Salc */ 492174982Salc for (i = 0; phys_avail[i + 1] != 0; i += 2) { 493174982Salc paddr = roundup2(phys_avail[i], VM_LEVEL_0_SIZE); 494174982Salc while (paddr + VM_LEVEL_0_SIZE <= phys_avail[i + 1]) { 495174982Salc vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages = 496174982Salc PHYS_TO_VM_PAGE(paddr); 497174982Salc paddr += VM_LEVEL_0_SIZE; 498174982Salc } 499174982Salc } 500174982Salc} 501174982Salc 502174982Salc/* 503174982Salc * Returns a reservation level if the given page belongs to a fully-populated 504174982Salc * reservation and -1 otherwise. 505174982Salc */ 506174982Salcint 507174982Salcvm_reserv_level_iffullpop(vm_page_t m) 508174982Salc{ 509174982Salc vm_reserv_t rv; 510174982Salc 511174982Salc rv = vm_reserv_from_page(m); 512174982Salc return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1); 513174982Salc} 514174982Salc 515174982Salc/* 516174982Salc * Prepare for the reactivation of a cached page. 517174982Salc * 518174982Salc * First, suppose that the given page "m" was allocated individually, i.e., not 519174982Salc * as part of a reservation, and cached. Then, suppose a reservation 520174982Salc * containing "m" is allocated by the same object. Although "m" and the 521174982Salc * reservation belong to the same object, "m"'s pindex may not match the 522174982Salc * reservation's. 523174982Salc * 524174982Salc * The free page queue must be locked. 525174982Salc */ 526174982Salcboolean_t 527174982Salcvm_reserv_reactivate_page(vm_page_t m) 528174982Salc{ 529174982Salc vm_reserv_t rv; 530174982Salc int i, m_index; 531174982Salc 532174982Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 533174982Salc rv = vm_reserv_from_page(m); 534174982Salc if (rv->object == NULL) 535174982Salc return (FALSE); 536174982Salc KASSERT((m->flags & PG_CACHED) != 0, 537174982Salc ("vm_reserv_uncache_page: page %p is not cached", m)); 538174982Salc if (m->object == rv->object && 539174982Salc m->pindex - rv->pindex == VM_RESERV_INDEX(m->object, m->pindex)) 540174982Salc vm_reserv_populate(rv); 541174982Salc else { 542174982Salc KASSERT(rv->inpartpopq, 543174982Salc ("vm_reserv_uncache_page: reserv %p's inpartpopq is FALSE", 544174982Salc rv)); 545174982Salc TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 546174982Salc rv->inpartpopq = FALSE; 547174982Salc LIST_REMOVE(rv, objq); 548174982Salc rv->object = NULL; 549174982Salc /* Don't vm_phys_free_pages(m, 0). */ 550174982Salc m_index = m - rv->pages; 551174982Salc for (i = 0; i < m_index; i++) { 552174982Salc if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 553174982Salc vm_phys_free_pages(&rv->pages[i], 0); 554174982Salc else 555174982Salc rv->popcnt--; 556174982Salc } 557174982Salc for (i++; i < VM_LEVEL_0_NPAGES; i++) { 558174982Salc if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 559174982Salc vm_phys_free_pages(&rv->pages[i], 0); 560174982Salc else 561174982Salc rv->popcnt--; 562174982Salc } 563174982Salc KASSERT(rv->popcnt == 0, 564174982Salc ("vm_reserv_uncache_page: reserv %p's popcnt is corrupted", 565174982Salc rv)); 566174982Salc vm_reserv_broken++; 567174982Salc } 568174982Salc return (TRUE); 569174982Salc} 570174982Salc 571174982Salc/* 572177956Salc * Breaks the given partially-populated reservation, releasing its cached and 573177956Salc * free pages to the physical memory allocator. 574177956Salc * 575177956Salc * The free page queue lock must be held. 576177956Salc */ 577177956Salcstatic void 578177956Salcvm_reserv_reclaim(vm_reserv_t rv) 579177956Salc{ 580177956Salc int i; 581177956Salc 582177956Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 583177956Salc KASSERT(rv->inpartpopq, 584177956Salc ("vm_reserv_reclaim: reserv %p's inpartpopq is corrupted", rv)); 585177956Salc TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq); 586177956Salc rv->inpartpopq = FALSE; 587177956Salc KASSERT(rv->object != NULL, 588177956Salc ("vm_reserv_reclaim: reserv %p is free", rv)); 589177956Salc LIST_REMOVE(rv, objq); 590177956Salc rv->object = NULL; 591177956Salc for (i = 0; i < VM_LEVEL_0_NPAGES; i++) { 592177956Salc if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) 593177956Salc vm_phys_free_pages(&rv->pages[i], 0); 594177956Salc else 595177956Salc rv->popcnt--; 596177956Salc } 597177956Salc KASSERT(rv->popcnt == 0, 598177956Salc ("vm_reserv_reclaim: reserv %p's popcnt is corrupted", rv)); 599177956Salc vm_reserv_reclaimed++; 600177956Salc} 601177956Salc 602177956Salc/* 603174982Salc * Breaks the reservation at the head of the partially-populated reservation 604174982Salc * queue, releasing its cached and free pages to the physical memory 605174982Salc * allocator. Returns TRUE if a reservation is broken and FALSE otherwise. 606174982Salc * 607174982Salc * The free page queue lock must be held. 608174982Salc */ 609174982Salcboolean_t 610177956Salcvm_reserv_reclaim_inactive(void) 611174982Salc{ 612174982Salc vm_reserv_t rv; 613174982Salc 614174982Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 615174982Salc if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) { 616177956Salc vm_reserv_reclaim(rv); 617174982Salc return (TRUE); 618174982Salc } 619174982Salc return (FALSE); 620174982Salc} 621174982Salc 622174982Salc/* 623177956Salc * Searches the partially-populated reservation queue for the least recently 624177956Salc * active reservation with unused pages, i.e., cached or free, that satisfy the 625177956Salc * given request for contiguous physical memory. If a satisfactory reservation 626177956Salc * is found, it is broken. Returns TRUE if a reservation is broken and FALSE 627177956Salc * otherwise. 628177956Salc * 629177956Salc * The free page queue lock must be held. 630177956Salc */ 631177956Salcboolean_t 632177956Salcvm_reserv_reclaim_contig(vm_paddr_t size, vm_paddr_t low, vm_paddr_t high, 633262933Sdumbbell u_long alignment, u_long boundary) 634177956Salc{ 635177956Salc vm_paddr_t pa, pa_length; 636177956Salc vm_reserv_t rv; 637177956Salc int i; 638177956Salc 639177956Salc mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 640177956Salc if (size > VM_LEVEL_0_SIZE - PAGE_SIZE) 641177956Salc return (FALSE); 642177956Salc TAILQ_FOREACH(rv, &vm_rvq_partpop, partpopq) { 643177956Salc pa = VM_PAGE_TO_PHYS(&rv->pages[VM_LEVEL_0_NPAGES - 1]); 644177956Salc if (pa + PAGE_SIZE - size < low) { 645177956Salc /* this entire reservation is too low; go to next */ 646177956Salc continue; 647177956Salc } 648177956Salc pa_length = 0; 649177956Salc for (i = 0; i < VM_LEVEL_0_NPAGES; i++) 650177956Salc if ((rv->pages[i].flags & (PG_CACHED | PG_FREE)) != 0) { 651177956Salc pa_length += PAGE_SIZE; 652177956Salc if (pa_length == PAGE_SIZE) { 653177956Salc pa = VM_PAGE_TO_PHYS(&rv->pages[i]); 654177956Salc if (pa + size > high) { 655177956Salc /* skip to next reservation */ 656177956Salc break; 657177956Salc } else if (pa < low || 658177956Salc (pa & (alignment - 1)) != 0 || 659177956Salc ((pa ^ (pa + size - 1)) & 660177956Salc ~(boundary - 1)) != 0) 661177956Salc pa_length = 0; 662215508Smlaier } 663215508Smlaier if (pa_length >= size) { 664177956Salc vm_reserv_reclaim(rv); 665177956Salc return (TRUE); 666177956Salc } 667177956Salc } else 668177956Salc pa_length = 0; 669177956Salc } 670177956Salc return (FALSE); 671177956Salc} 672177956Salc 673177956Salc/* 674174982Salc * Transfers the reservation underlying the given page to a new object. 675174982Salc * 676174982Salc * The object must be locked. 677174982Salc */ 678174982Salcvoid 679174982Salcvm_reserv_rename(vm_page_t m, vm_object_t new_object, vm_object_t old_object, 680174982Salc vm_pindex_t old_object_offset) 681174982Salc{ 682174982Salc vm_reserv_t rv; 683174982Salc 684174982Salc VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); 685174982Salc rv = vm_reserv_from_page(m); 686174982Salc if (rv->object == old_object) { 687174982Salc mtx_lock(&vm_page_queue_free_mtx); 688174982Salc if (rv->object == old_object) { 689174982Salc LIST_REMOVE(rv, objq); 690174982Salc LIST_INSERT_HEAD(&new_object->rvq, rv, objq); 691174982Salc rv->object = new_object; 692174982Salc rv->pindex -= old_object_offset; 693174982Salc } 694174982Salc mtx_unlock(&vm_page_queue_free_mtx); 695174982Salc } 696174982Salc} 697174982Salc 698174982Salc/* 699174982Salc * Allocates the virtual and physical memory required by the reservation 700174982Salc * management system's data structures, in particular, the reservation array. 701174982Salc */ 702174982Salcvm_paddr_t 703174982Salcvm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water) 704174982Salc{ 705174982Salc vm_paddr_t new_end; 706174982Salc size_t size; 707174982Salc 708174982Salc /* 709174982Salc * Calculate the size (in bytes) of the reservation array. Round up 710174982Salc * from "high_water" because every small page is mapped to an element 711174982Salc * in the reservation array based on its physical address. Thus, the 712174982Salc * number of elements in the reservation array can be greater than the 713174982Salc * number of superpages. 714174982Salc */ 715174982Salc size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv); 716174982Salc 717174982Salc /* 718174982Salc * Allocate and map the physical memory for the reservation array. The 719174982Salc * next available virtual address is returned by reference. 720174982Salc */ 721174982Salc new_end = end - round_page(size); 722174982Salc vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end, 723174982Salc VM_PROT_READ | VM_PROT_WRITE); 724174982Salc bzero(vm_reserv_array, size); 725174982Salc 726174982Salc /* 727174982Salc * Return the next available physical address. 728174982Salc */ 729174982Salc return (new_end); 730174982Salc} 731174982Salc 732174982Salc#endif /* VM_NRESERVLEVEL > 0 */ 733