vm_fault.c revision 308364
192108Sphk/*- 292108Sphk * Copyright (c) 1991, 1993 392108Sphk * The Regents of the University of California. All rights reserved. 492108Sphk * Copyright (c) 1994 John S. Dyson 592108Sphk * All rights reserved. 692108Sphk * Copyright (c) 1994 David Greenman 792108Sphk * All rights reserved. 892108Sphk * 992108Sphk * 1092108Sphk * This code is derived from software contributed to Berkeley by 1192108Sphk * The Mach Operating System project at Carnegie-Mellon University. 1292108Sphk * 1392108Sphk * Redistribution and use in source and binary forms, with or without 1492108Sphk * modification, are permitted provided that the following conditions 1592108Sphk * are met: 1692108Sphk * 1. Redistributions of source code must retain the above copyright 1792108Sphk * notice, this list of conditions and the following disclaimer. 1892108Sphk * 2. Redistributions in binary form must reproduce the above copyright 1992108Sphk * notice, this list of conditions and the following disclaimer in the 2092108Sphk * documentation and/or other materials provided with the distribution. 2192108Sphk * 3. All advertising materials mentioning features or use of this software 2292108Sphk * must display the following acknowledgement: 2392108Sphk * This product includes software developed by the University of 2492108Sphk * California, Berkeley and its contributors. 2592108Sphk * 4. Neither the name of the University nor the names of its contributors 2692108Sphk * may be used to endorse or promote products derived from this software 2792108Sphk * without specific prior written permission. 2892108Sphk * 2992108Sphk * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 3092108Sphk * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3192108Sphk * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3292108Sphk * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3392108Sphk * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3492108Sphk * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3592108Sphk * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36116196Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37116196Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3892108Sphk * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39162326Spjd * SUCH DAMAGE. 40162326Spjd * 4192108Sphk * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 4292108Sphk * 43112370Sphk * 4492108Sphk * Copyright (c) 1987, 1990 Carnegie-Mellon University. 4592108Sphk * All rights reserved. 4692108Sphk * 4792108Sphk * Authors: Avadis Tevanian, Jr., Michael Wayne Young 4892108Sphk * 4992108Sphk * Permission to use, copy, modify and distribute this software and 5092108Sphk * its documentation is hereby granted, provided that both the copyright 5192108Sphk * notice and this permission notice appear in all copies of the 5292108Sphk * software, derivative works or modified versions, and any portions 5392108Sphk * thereof, and that both notices appear in supporting documentation. 5492108Sphk * 5593250Sphk * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 5692108Sphk * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 5792108Sphk * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58162326Spjd * 59162326Spjd * Carnegie Mellon requests users of this software to return to 60162326Spjd * 61162326Spjd * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62206859Sjh * School of Computer Science 63206859Sjh * Carnegie Mellon University 64206859Sjh * Pittsburgh PA 15213-3890 65206859Sjh * 6693774Sphk * any improvements or extensions that they make and grant Carnegie the 6792108Sphk * rights to redistribute these changes. 6892108Sphk */ 6992108Sphk 70115473Sphk/* 71238213Strasz * Page fault handling module. 72238213Strasz */ 73238213Strasz 74238213Strasz#include <sys/cdefs.h> 75238213Strasz__FBSDID("$FreeBSD: stable/10/sys/vm/vm_fault.c 308364 2016-11-06 13:35:20Z kib $"); 76115473Sphk 77113927Sphk#include "opt_ktrace.h" 78113927Sphk#include "opt_vm.h" 79113927Sphk 80113927Sphk#include <sys/param.h> 81113927Sphk#include <sys/systm.h> 82115473Sphk#include <sys/kernel.h> 83113927Sphk#include <sys/lock.h> 84115473Sphk#include <sys/proc.h> 85113927Sphk#include <sys/resourcevar.h> 86113927Sphk#include <sys/rwlock.h> 87113927Sphk#include <sys/sysctl.h> 88113927Sphk#include <sys/vmmeter.h> 89113927Sphk#include <sys/vnode.h> 90115473Sphk#ifdef KTRACE 91113927Sphk#include <sys/ktrace.h> 92113927Sphk#endif 93113927Sphk 94115473Sphk#include <vm/vm.h> 95115473Sphk#include <vm/vm_param.h> 96115473Sphk#include <vm/pmap.h> 97144157Sphk#include <vm/vm_map.h> 98144157Sphk#include <vm/vm_object.h> 99136797Sarr#include <vm/vm_page.h> 100144157Sphk#include <vm/vm_pageout.h> 101144157Sphk#include <vm/vm_kern.h> 102115473Sphk#include <vm/vm_pager.h> 103124883Sphk#include <vm/vm_extern.h> 104124883Sphk#include <vm/vm_reserv.h> 105115845Sphk 106136797Sarr#define PFBAK 4 107136797Sarr#define PFFOR 4 108136797Sarr 109144157Sphkstatic int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int *); 110144157Sphk 111136797Sarr#define VM_FAULT_READ_BEHIND 8 112136797Sarr#define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX) 113136797Sarr#define VM_FAULT_NINCR (VM_FAULT_READ_MAX / VM_FAULT_READ_BEHIND) 114136797Sarr#define VM_FAULT_SUM (VM_FAULT_NINCR * (VM_FAULT_NINCR + 1) / 2) 115144157Sphk#define VM_FAULT_CACHE_BEHIND (VM_FAULT_READ_BEHIND * VM_FAULT_SUM) 116144157Sphk 117136797Sarrstruct faultstate { 118136797Sarr vm_page_t m; 119115845Sphk vm_object_t object; 120115473Sphk vm_pindex_t pindex; 121122888Sphk vm_page_t first_m; 122122888Sphk vm_object_t first_object; 123115473Sphk vm_pindex_t first_pindex; 124115473Sphk vm_map_t map; 125115473Sphk vm_map_entry_t entry; 126113927Sphk int lookup_still_valid; 127115473Sphk struct vnode *vp; 128115473Sphk}; 129113927Sphk 130115473Sphkstatic void vm_fault_cache_behind(const struct faultstate *fs, int distance); 131113927Sphkstatic void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, 132115473Sphk int faultcount, int reqpage); 133113927Sphk 134113927Sphkstatic inline void 135113927Sphkrelease_page(struct faultstate *fs) 136113927Sphk{ 137113927Sphk 138113927Sphk vm_page_xunbusy(fs->m); 139207671Sjh vm_page_lock(fs->m); 140207671Sjh vm_page_deactivate(fs->m); 14192108Sphk vm_page_unlock(fs->m); 142115473Sphk fs->m = NULL; 143115473Sphk} 144115473Sphk 145115473Sphkstatic inline void 14692108Sphkunlock_map(struct faultstate *fs) 147207671Sjh{ 148207671Sjh 149207671Sjh if (fs->lookup_still_valid) { 150126798Sphk vm_map_lookup_done(fs->map, fs->entry); 151115473Sphk fs->lookup_still_valid = FALSE; 152207671Sjh } 153115473Sphk} 154115473Sphk 155207671Sjhstatic void 156207671Sjhunlock_vp(struct faultstate *fs) 157115473Sphk{ 158115473Sphk 159115473Sphk if (fs->vp != NULL) { 160207671Sjh vput(fs->vp); 161207671Sjh fs->vp = NULL; 162115473Sphk } 163207671Sjh} 164207671Sjh 165207671Sjhstatic void 166207671Sjhunlock_and_deallocate(struct faultstate *fs) 167207671Sjh{ 168115473Sphk 169115473Sphk vm_object_pip_wakeup(fs->object); 170207671Sjh VM_OBJECT_WUNLOCK(fs->object); 171207671Sjh if (fs->object != fs->first_object) { 172207671Sjh VM_OBJECT_WLOCK(fs->first_object); 173207671Sjh vm_page_lock(fs->first_m); 174207671Sjh vm_page_free(fs->first_m); 175207671Sjh vm_page_unlock(fs->first_m); 176207671Sjh vm_object_pip_wakeup(fs->first_object); 177207671Sjh VM_OBJECT_WUNLOCK(fs->first_object); 178207671Sjh fs->first_m = NULL; 179115473Sphk } 180115473Sphk vm_object_deallocate(fs->first_object); 181115473Sphk unlock_map(fs); 182115473Sphk unlock_vp(fs); 183207671Sjh} 184207671Sjh 185207671Sjhstatic void 186207671Sjhvm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, 187207671Sjh vm_prot_t fault_type, int fault_flags, boolean_t set_wd) 188207671Sjh{ 189207671Sjh boolean_t need_dirty; 190207671Sjh 191126832Sphk if (((prot & VM_PROT_WRITE) == 0 && 192126832Sphk (fault_flags & VM_FAULT_DIRTY) == 0) || 193126832Sphk (m->oflags & VPO_UNMANAGED) != 0) 194126832Sphk return; 195207671Sjh 196207671Sjh VM_OBJECT_ASSERT_LOCKED(m->object); 197207671Sjh 198115473Sphk need_dirty = ((fault_type & VM_PROT_WRITE) != 0 && 199207671Sjh (fault_flags & VM_FAULT_WIRE) == 0) || 200207671Sjh (fault_flags & VM_FAULT_DIRTY) != 0; 201207671Sjh 202207671Sjh if (set_wd) 203207671Sjh vm_object_set_writeable_dirty(m->object); 204207671Sjh else 205207671Sjh /* 206115473Sphk * If two callers of vm_fault_dirty() with set_wd == 207115473Sphk * FALSE, one for the map entry with MAP_ENTRY_NOSYNC 208115473Sphk * flag set, other with flag clear, race, it is 209115473Sphk * possible for the no-NOSYNC thread to see m->dirty 210115473Sphk * != 0 and not clear VPO_NOSYNC. Take vm_page lock 211115473Sphk * around manipulation of VPO_NOSYNC and 212115473Sphk * vm_page_dirty() call, to avoid the race and keep 213115473Sphk * m->oflags consistent. 214133312Sphk */ 215115473Sphk vm_page_lock(m); 216133312Sphk 217133319Sphk /* 218133312Sphk * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC 219133312Sphk * if the page is already dirty to prevent data written with 220133312Sphk * the expectation of being synced from not being synced. 221133312Sphk * Likewise if this entry does not request NOSYNC then make 22292108Sphk * sure the page isn't marked NOSYNC. Applications sharing 22392108Sphk * data should use the same flags to avoid ping ponging. 22492108Sphk */ 22592108Sphk if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0) { 226115473Sphk if (m->dirty == 0) { 227115473Sphk m->oflags |= VPO_NOSYNC; 228115473Sphk } 229207671Sjh } else { 230207671Sjh m->oflags &= ~VPO_NOSYNC; 231207671Sjh } 232136414Sgreen 233136414Sgreen /* 234136414Sgreen * If the fault is a write, we know that this page is being 235136414Sgreen * written NOW so dirty it explicitly to save on 236136414Sgreen * pmap_is_modified() calls later. 237136797Sarr * 238136797Sarr * Also tell the backing pager, if any, that it should remove 239136797Sarr * any swap backing since the page is now dirty. 240136797Sarr */ 241136797Sarr if (need_dirty) 242136797Sarr vm_page_dirty(m); 243136797Sarr if (!set_wd) 244136797Sarr vm_page_unlock(m); 245136797Sarr if (need_dirty) 246136797Sarr vm_pager_page_unswapped(m); 247115473Sphk} 248115473Sphk 249207671Sjh/* 250207671Sjh * vm_fault: 251207671Sjh * 252207671Sjh * Handle a page fault occurring at the given address, 253119298Sphk * requiring the given permissions, in the map specified. 254207671Sjh * If successful, the page is inserted into the 255207671Sjh * associated physical map. 256119298Sphk * 257115473Sphk * NOTE: the given address should be truncated to the 258115473Sphk * proper page address. 259115473Sphk * 26092108Sphk * KERN_SUCCESS is returned if the page fault is handled; otherwise, 26192108Sphk * a standard error specifying why the fault is fatal is returned. 262177509Smarcel * 263177509Smarcel * The map in question must be referenced, and remains so. 264177509Smarcel * Caller may hold no locks. 265238886Smav */ 266238886Smavint 267177509Smarcelvm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, 268177509Smarcel int fault_flags) 269238886Smav{ 270177509Smarcel struct thread *td; 271177509Smarcel int result; 272177509Smarcel 273177509Smarcel td = curthread; 274255860Sdes if ((td->td_pflags & TDP_NOFAULTING) != 0) 275177509Smarcel return (KERN_PROTECTION_FAILURE); 276177509Smarcel#ifdef KTRACE 277177509Smarcel if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) 278177509Smarcel ktrfault(vaddr, fault_type); 279177509Smarcel#endif 280177509Smarcel result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, 281177509Smarcel NULL); 282177509Smarcel#ifdef KTRACE 283177509Smarcel if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) 284177509Smarcel ktrfaultend(result); 285177509Smarcel#endif 286238886Smav return (result); 287238886Smav} 288177509Smarcel 289177509Smarcelint 290177509Smarcelvm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, 291238886Smav int fault_flags, vm_page_t *m_hold) 292238886Smav{ 293238886Smav vm_prot_t prot; 294177681Smarcel long ahead, behind; 295177681Smarcel int alloc_req, era, faultcount, nera, reqpage, result; 296238886Smav boolean_t dead, growstack, is_first_object_locked, wired; 297238886Smav int map_generation; 298238886Smav vm_object_t next_object; 299238886Smav vm_page_t marray[VM_FAULT_READ_MAX]; 300177509Smarcel int hardfault; 301177509Smarcel struct faultstate fs; 302177509Smarcel struct vnode *vp; 303177509Smarcel vm_page_t m; 304177509Smarcel int locked, error; 305177509Smarcel 306177509Smarcel hardfault = 0; 307177509Smarcel growstack = TRUE; 308177509Smarcel PCPU_INC(cnt.v_vm_faults); 309177509Smarcel fs.vp = NULL; 310177509Smarcel faultcount = reqpage = 0; 311177509Smarcel 312177509SmarcelRetryFault:; 313177509Smarcel 314177509Smarcel /* 315177509Smarcel * Find the backing store object and offset into it to begin the 316177509Smarcel * search. 317177509Smarcel */ 318177509Smarcel fs.map = map; 319177509Smarcel result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, 320177509Smarcel &fs.first_object, &fs.first_pindex, &prot, &wired); 321177509Smarcel if (result != KERN_SUCCESS) { 322177509Smarcel if (growstack && result == KERN_INVALID_ADDRESS && 323177509Smarcel map != kernel_map) { 324177509Smarcel result = vm_map_growstack(curproc, vaddr); 325177509Smarcel if (result != KERN_SUCCESS) 326177509Smarcel return (KERN_FAILURE); 327177509Smarcel growstack = FALSE; 328177509Smarcel goto RetryFault; 329177509Smarcel } 330177509Smarcel unlock_vp(&fs); 331177509Smarcel return (result); 33292108Sphk } 333107953Sphk 33492108Sphk map_generation = fs.map->timestamp; 33592108Sphk 33692108Sphk if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { 33792108Sphk panic("vm_fault: fault on nofault entry, addr: %lx", 33892108Sphk (u_long)vaddr); 33992108Sphk } 340126798Sphk 341181463Sdes if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION && 34292108Sphk fs.entry->wiring_thread != curthread) { 34392108Sphk vm_map_unlock_read(fs.map); 344115949Sphk vm_map_lock(fs.map); 34592108Sphk if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) && 346111119Simp (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) { 347111119Simp unlock_vp(&fs); 34893248Sphk fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 34992108Sphk vm_map_unlock_and_wait(fs.map, 0); 35092108Sphk } else 35192108Sphk vm_map_unlock(fs.map); 35292108Sphk goto RetryFault; 35392108Sphk } 35492108Sphk 35592108Sphk if (wired) 356133312Sphk fault_type = prot | (fault_type & VM_PROT_COPY); 357133312Sphk else 358133312Sphk KASSERT((fault_flags & VM_FAULT_WIRE) == 0, 359223089Sgibbs ("!wired && VM_FAULT_WIRE")); 360237518Sken 361133312Sphk /* 362133312Sphk * Try to avoid lock contention on the top-level object through 363133312Sphk * special-case handling of some types of page faults, specifically, 364133312Sphk * those that are both (1) mapping an existing page from the top- 365238213Strasz * level object and (2) not having to mark that object as containing 36692108Sphk * dirty pages. Under these conditions, a read lock on the top-level 36792108Sphk * object suffices, allowing multiple page faults of a similar type to 36892108Sphk * run in parallel on the same top-level object. 36992108Sphk */ 37092108Sphk if (fs.vp == NULL /* avoid locked vnode leak */ && 37192108Sphk (fault_flags & (VM_FAULT_WIRE | VM_FAULT_DIRTY)) == 0 && 37292108Sphk /* avoid calling vm_object_set_writeable_dirty() */ 373126798Sphk ((prot & VM_PROT_WRITE) == 0 || 374126798Sphk (fs.first_object->type != OBJT_VNODE && 37592108Sphk (fs.first_object->flags & OBJ_TMPFS_NODE) == 0) || 37692108Sphk (fs.first_object->flags & OBJ_MIGHTBEDIRTY) != 0)) { 37792108Sphk VM_OBJECT_RLOCK(fs.first_object); 37892108Sphk if ((prot & VM_PROT_WRITE) != 0 && 37992108Sphk (fs.first_object->type == OBJT_VNODE || 38092108Sphk (fs.first_object->flags & OBJ_TMPFS_NODE) != 0) && 381124371Sphk (fs.first_object->flags & OBJ_MIGHTBEDIRTY) == 0) 382112988Sphk goto fast_failed; 38392108Sphk m = vm_page_lookup(fs.first_object, fs.first_pindex); 38492108Sphk /* A busy page can be mapped for read|execute access. */ 38595310Sphk if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && 38692108Sphk vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) 38792108Sphk goto fast_failed; 38892108Sphk result = pmap_enter(fs.map->pmap, vaddr, m, prot, 389114495Sphk fault_type | PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 390185768Slulf 0), 0); 391114495Sphk if (result != KERN_SUCCESS) 392114495Sphk goto fast_failed; 393114495Sphk if (m_hold != NULL) { 394114495Sphk *m_hold = m; 395131820Sphk vm_page_lock(m); 396114495Sphk vm_page_hold(m); 397126798Sphk vm_page_unlock(m); 398126798Sphk } 399114495Sphk vm_fault_dirty(fs.entry, m, prot, fault_type, fault_flags, 400114495Sphk FALSE); 401114495Sphk VM_OBJECT_RUNLOCK(fs.first_object); 402114495Sphk if (!wired) 403126726Sphk vm_fault_prefault(&fs, vaddr, 0, 0); 404126726Sphk vm_map_lookup_done(fs.map, fs.entry); 405114495Sphk curthread->td_ru.ru_minflt++; 406131820Sphk return (KERN_SUCCESS); 407131820Sphkfast_failed: 408131820Sphk if (!VM_OBJECT_TRYUPGRADE(fs.first_object)) { 409131820Sphk VM_OBJECT_RUNLOCK(fs.first_object); 410157619Smarcel VM_OBJECT_WLOCK(fs.first_object); 411157619Smarcel } 412157619Smarcel } else { 413157619Smarcel VM_OBJECT_WLOCK(fs.first_object); 414157619Smarcel } 415157619Smarcel 416157619Smarcel /* 417157619Smarcel * Make a reference to this object to prevent its disposal while we 418157619Smarcel * are messing with it. Once we have the reference, the map is free 419157619Smarcel * to be diddled. Since objects reference their shadows (and copies), 420157619Smarcel * they will stay around as well. 421157619Smarcel * 422137032Sphk * Bump the paging-in-progress count to prevent size changes (e.g. 423137032Sphk * truncation operations) during I/O. This must be done after 424137032Sphk * obtaining the vnode lock in order to avoid possible deadlocks. 425137032Sphk */ 426137032Sphk vm_object_reference_locked(fs.first_object); 427137032Sphk vm_object_pip_add(fs.first_object, 1); 428137032Sphk 429137032Sphk fs.lookup_still_valid = TRUE; 430137032Sphk 431137032Sphk fs.first_m = NULL; 432137032Sphk 433137032Sphk /* 434137032Sphk * Search for the page at object/offset. 435137032Sphk */ 436137032Sphk fs.object = fs.first_object; 437137032Sphk fs.pindex = fs.first_pindex; 438137032Sphk while (TRUE) { 439131820Sphk /* 440248674Smav * If the object is marked for imminent termination, 441131820Sphk * we retry here, since the collapse pass has raced 442248674Smav * with us. Otherwise, if we see terminally dead 443131820Sphk * object, return fail. 444131820Sphk */ 445131820Sphk if ((fs.object->flags & OBJ_DEAD) != 0) { 446131820Sphk dead = fs.object->type == OBJT_DEAD; 447131820Sphk unlock_and_deallocate(&fs); 448131820Sphk if (dead) 449131820Sphk return (KERN_PROTECTION_FAILURE); 450131820Sphk pause("vmf_de", 1); 451131820Sphk goto RetryFault; 452131820Sphk } 453131820Sphk 454131820Sphk /* 455131820Sphk * See if page is resident 456131820Sphk */ 457131820Sphk fs.m = vm_page_lookup(fs.object, fs.pindex); 458131820Sphk if (fs.m != NULL) { 459131820Sphk /* 460131820Sphk * Wait/Retry if the page is busy. We have to do this 461131820Sphk * if the page is either exclusive or shared busy 462131820Sphk * because the vm_pager may be using read busy for 463131820Sphk * pageouts (and even pageins if it is the vnode 464131820Sphk * pager), and we could end up trying to pagein and 465131820Sphk * pageout the same page simultaneously. 466248674Smav * 467131820Sphk * We can theoretically allow the busy case on a read 468131877Sphk * fault if the page is marked valid, but since such 469131877Sphk * pages are typically already pmap'd, putting that 470131820Sphk * special case in might be more effort then it is 471131820Sphk * worth. We cannot under any circumstances mess 472131820Sphk * around with a shared busied page except, perhaps, 473131820Sphk * to pmap it. 474131820Sphk */ 475131820Sphk if (vm_page_busied(fs.m)) { 476114495Sphk /* 477114495Sphk * Reference the page before unlocking and 478114495Sphk * sleeping so that the page daemon is less 47992108Sphk * likely to reclaim it. 48092108Sphk */ 48192108Sphk vm_page_aflag_set(fs.m, PGA_REFERENCED); 48292108Sphk if (fs.object != fs.first_object) { 48392108Sphk if (!VM_OBJECT_TRYWLOCK( 48492108Sphk fs.first_object)) { 485126798Sphk VM_OBJECT_WUNLOCK(fs.object); 486124883Sphk VM_OBJECT_WLOCK(fs.first_object); 487124883Sphk VM_OBJECT_WLOCK(fs.object); 488124883Sphk } 48993776Sphk vm_page_lock(fs.first_m); 49093776Sphk vm_page_free(fs.first_m); 49193776Sphk vm_page_unlock(fs.first_m); 49292108Sphk vm_object_pip_wakeup(fs.first_object); 493111119Simp VM_OBJECT_WUNLOCK(fs.first_object); 49492108Sphk fs.first_m = NULL; 495112370Sphk } 496112370Sphk unlock_map(&fs); 49792108Sphk if (fs.m == vm_page_lookup(fs.object, 49892108Sphk fs.pindex)) { 49992108Sphk vm_page_sleep_if_busy(fs.m, "vmpfw"); 50092108Sphk } 50192108Sphk vm_object_pip_wakeup(fs.object); 50292108Sphk VM_OBJECT_WUNLOCK(fs.object); 50392108Sphk PCPU_INC(cnt.v_intrans); 504114495Sphk vm_object_deallocate(fs.first_object); 50592108Sphk goto RetryFault; 506126798Sphk } 507126798Sphk vm_page_lock(fs.m); 50892108Sphk vm_page_remque(fs.m); 50992108Sphk vm_page_unlock(fs.m); 51092108Sphk 51192108Sphk /* 51292108Sphk * Mark page busy for other processes, and the 513112988Sphk * pagedaemon. If it still isn't completely valid 514114495Sphk * (readable), jump to readrest, else break-out ( we 51592108Sphk * found the page ). 516112370Sphk */ 51792108Sphk vm_page_xbusy(fs.m); 518114495Sphk if (fs.m->valid != VM_PAGE_BITS_ALL) 519131820Sphk goto readrest; 52092108Sphk break; 52192108Sphk } 522113930Sphk 523113930Sphk /* 524113930Sphk * Page is not resident. If this is the search termination 525113930Sphk * or the pager might contain the page, allocate a new page. 526113930Sphk * Default objects are zero-fill, there is no real pager. 527238886Smav */ 528113930Sphk if (fs.object->type != OBJT_DEFAULT || 529113930Sphk fs.object == fs.first_object) { 530113930Sphk if (fs.pindex >= fs.object->size) { 531113930Sphk unlock_and_deallocate(&fs); 532113930Sphk return (KERN_PROTECTION_FAILURE); 533113930Sphk } 534113930Sphk 535126798Sphk /* 536179097Spjd * Allocate a new page for this object/offset pair. 537179097Spjd * 538238886Smav * Unlocked read of the p_flag is harmless. At 539238886Smav * worst, the P_KILLED might be not observed 540238886Smav * there, and allocation can fail, causing 541238886Smav * restart and new reading of the p_flag. 542238886Smav */ 543255860Sdes fs.m = NULL; 544255860Sdes if (!vm_page_count_severe() || P_KILLED(curproc)) { 545113930Sphk#if VM_NRESERVLEVEL > 0 546113930Sphk if ((fs.object->flags & OBJ_COLORED) == 0) { 547113930Sphk fs.object->flags |= OBJ_COLORED; 548113930Sphk fs.object->pg_color = atop(vaddr) - 549238886Smav fs.pindex; 550238886Smav } 551193547Spjd#endif 552193547Spjd alloc_req = P_KILLED(curproc) ? 553113930Sphk VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; 554114511Sphk if (fs.object->type != OBJT_VNODE && 555113930Sphk fs.object->backing_object == NULL) 556113930Sphk alloc_req |= VM_ALLOC_ZERO; 557113930Sphk fs.m = vm_page_alloc(fs.object, fs.pindex, 558113930Sphk alloc_req); 559113930Sphk } 56092108Sphk if (fs.m == NULL) { 561107953Sphk unlock_and_deallocate(&fs); 56292108Sphk VM_WAITPFAULT; 56392108Sphk goto RetryFault; 56492108Sphk } else if (fs.m->valid == VM_PAGE_BITS_ALL) 56592108Sphk break; 56692108Sphk } 56792108Sphk 568126798Sphkreadrest: 569126832Sphk /* 570126832Sphk * We have found a valid page or we have allocated a new page. 571126832Sphk * The page thus may not be valid or may not be entirely 572124883Sphk * valid. 573125332Spjd * 574125332Spjd * Attempt to fault-in the page if there is a chance that the 575124883Sphk * pager has it, and potentially fault in additional pages 576125332Spjd * at the same time. For default objects simply provide 577125332Spjd * zero-filled pages. 578181463Sdes */ 57992108Sphk if (fs.object->type != OBJT_DEFAULT) { 58092108Sphk int rv; 581115949Sphk u_char behavior = vm_map_entry_behavior(fs.entry); 58292108Sphk 583111119Simp if (behavior == MAP_ENTRY_BEHAV_RANDOM || 58492108Sphk P_KILLED(curproc)) { 58592108Sphk behind = 0; 58692108Sphk ahead = 0; 58792108Sphk } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { 58892108Sphk behind = 0; 58992108Sphk ahead = atop(fs.entry->end - vaddr) - 1; 590112370Sphk if (ahead > VM_FAULT_READ_AHEAD_MAX) 591112370Sphk ahead = VM_FAULT_READ_AHEAD_MAX; 59292108Sphk if (fs.pindex == fs.entry->next_read) 593126832Sphk vm_fault_cache_behind(&fs, 59492108Sphk VM_FAULT_READ_MAX); 59592108Sphk } else { 59692108Sphk /* 59792108Sphk * If this is a sequential page fault, then 59892108Sphk * arithmetically increase the number of pages 59992108Sphk * in the read-ahead window. Otherwise, reset 60092108Sphk * the read-ahead window to its smallest size. 601126798Sphk */ 60292108Sphk behind = atop(vaddr - fs.entry->start); 60392108Sphk if (behind > VM_FAULT_READ_BEHIND) 60492108Sphk behind = VM_FAULT_READ_BEHIND; 605238213Strasz ahead = atop(fs.entry->end - vaddr) - 1; 606238213Strasz era = fs.entry->read_ahead; 607238213Strasz if (fs.pindex == fs.entry->next_read) { 608238213Strasz nera = era + behind; 609238213Strasz if (nera > VM_FAULT_READ_AHEAD_MAX) 610238213Strasz nera = VM_FAULT_READ_AHEAD_MAX; 611238213Strasz behind = 0; 612238213Strasz if (ahead > nera) 613238213Strasz ahead = nera; 614238213Strasz if (era == VM_FAULT_READ_AHEAD_MAX) 615238213Strasz vm_fault_cache_behind(&fs, 616238213Strasz VM_FAULT_CACHE_BEHIND); 617238213Strasz } else if (ahead > VM_FAULT_READ_AHEAD_MIN) 618238213Strasz ahead = VM_FAULT_READ_AHEAD_MIN; 619238213Strasz if (era != ahead) 620238213Strasz fs.entry->read_ahead = ahead; 621238213Strasz } 622238565Strasz 623238213Strasz /* 624238213Strasz * Call the pager to retrieve the data, if any, after 625238213Strasz * releasing the lock on the map. We hold a ref on 626238213Strasz * fs.object and the pages are exclusive busied. 627238213Strasz */ 628238213Strasz unlock_map(&fs); 629238886Smav 630238886Smav if (fs.object->type == OBJT_VNODE && 631238213Strasz (vp = fs.object->handle) != fs.vp) { 632238886Smav unlock_vp(&fs); 633238213Strasz locked = VOP_ISLOCKED(vp); 634238213Strasz 635238213Strasz if (locked != LK_EXCLUSIVE) 636238213Strasz locked = LK_SHARED; 637238213Strasz /* Do not sleep for vnode lock while fs.m is busy */ 638238213Strasz error = vget(vp, locked | LK_CANRECURSE | 639238213Strasz LK_NOWAIT, curthread); 640238213Strasz if (error != 0) { 641238213Strasz vhold(vp); 642238213Strasz release_page(&fs); 643238213Strasz unlock_and_deallocate(&fs); 644238213Strasz error = vget(vp, locked | LK_RETRY | 645238213Strasz LK_CANRECURSE, curthread); 646238213Strasz vdrop(vp); 647238213Strasz fs.vp = vp; 648238213Strasz KASSERT(error == 0, 649238213Strasz ("vm_fault: vget failed")); 650238213Strasz goto RetryFault; 651238886Smav } 652238886Smav fs.vp = vp; 653238213Strasz } 654238213Strasz KASSERT(fs.vp == NULL || !fs.map->system_map, 655238213Strasz ("vm_fault: vnode-backed object mapped by system map")); 656238213Strasz 657238213Strasz /* 658238213Strasz * now we find out if any other pages should be paged 659238213Strasz * in at this time this routine checks to see if the 660238213Strasz * pages surrounding this fault reside in the same 661238213Strasz * object as the page for this fault. If they do, 662238213Strasz * then they are faulted in also into the object. The 663238213Strasz * array "marray" returned contains an array of 664238213Strasz * vm_page_t structs where one of them is the 665238213Strasz * vm_page_t passed to the routine. The reqpage 666238213Strasz * return value is the index into the marray for the 667238213Strasz * vm_page_t passed to the routine. 668238213Strasz * 669238213Strasz * fs.m plus the additional pages are exclusive busied. 670238213Strasz */ 671238213Strasz faultcount = vm_fault_additional_pages( 672238213Strasz fs.m, behind, ahead, marray, &reqpage); 673238213Strasz 674238213Strasz rv = faultcount ? 675238213Strasz vm_pager_get_pages(fs.object, marray, faultcount, 676238213Strasz reqpage) : VM_PAGER_FAIL; 677239987Spjd 678239987Spjd if (rv == VM_PAGER_OK) { 679239987Spjd /* 680239987Spjd * Found the page. Leave it busy while we play 681115850Sphk * with it. 682115850Sphk */ 683115850Sphk 684115850Sphk /* 685115850Sphk * Relookup in case pager changed page. Pager 686281298Smav * is responsible for disposition of old page 68792108Sphk * if moved. 688239987Spjd */ 689239987Spjd fs.m = vm_page_lookup(fs.object, fs.pindex); 690239987Spjd if (!fs.m) { 691281298Smav unlock_and_deallocate(&fs); 692115850Sphk goto RetryFault; 693115850Sphk } 694115850Sphk 695281298Smav hardfault++; 696281298Smav break; /* break to PAGE HAS BEEN FOUND */ 697281298Smav } 698281298Smav /* 699115850Sphk * Remove the bogus page (which does not exist at this 700281298Smav * object/offset); before doing so, we must get back 701281298Smav * our object lock to preserve our invariant. 702115850Sphk * 703115850Sphk * Also wake up any other process that may want to bring 704115850Sphk * in this page. 705239987Spjd * 706281298Smav * If this is the top-level object, we must leave the 707115850Sphk * busy page to prevent another process from rushing 708115850Sphk * past us, and inserting the page in that object at 70992108Sphk * the same time that we are. 71092108Sphk */ 71192108Sphk if (rv == VM_PAGER_ERROR) 71292108Sphk printf("vm_fault: pager read error, pid %d (%s)\n", 71392108Sphk curproc->p_pid, curproc->p_comm); 71492108Sphk /* 715126798Sphk * Data outside the range of the pager or an I/O error 71692108Sphk */ 71792108Sphk /* 71892108Sphk * XXX - the check for kernel_map is a kludge to work 71992108Sphk * around having the machine panic on a kernel space 720179097Spjd * fault w/ I/O error. 721112988Sphk */ 72292108Sphk if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || 72392108Sphk (rv == VM_PAGER_BAD)) { 724112370Sphk vm_page_lock(fs.m); 725237518Sken vm_page_free(fs.m); 726237518Sken vm_page_unlock(fs.m); 727237518Sken fs.m = NULL; 728237518Sken unlock_and_deallocate(&fs); 729237518Sken return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); 730237518Sken } 731237518Sken if (fs.object != fs.first_object) { 73292108Sphk vm_page_lock(fs.m); 733114495Sphk vm_page_free(fs.m); 734131820Sphk vm_page_unlock(fs.m); 73592108Sphk fs.m = NULL; 73692108Sphk /* 73792108Sphk * XXX - we cannot just fall out at this 73892108Sphk * point, m has been freed and is invalid! 73992108Sphk */ 74092108Sphk } 74192108Sphk } 74292108Sphk 74392108Sphk /* 74492108Sphk * We get here if the object has default pager (or unwiring) 74592108Sphk * or the pager doesn't have the page. 74692108Sphk */ 74792108Sphk if (fs.object == fs.first_object) 74898066Sphk fs.first_m = fs.m; 74992108Sphk 75092108Sphk /* 75192108Sphk * Move on to the next object. Lock the next object before 75292108Sphk * unlocking the current one. 75392108Sphk */ 75492108Sphk fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); 75592108Sphk next_object = fs.object->backing_object; 75692108Sphk if (next_object == NULL) { 75792108Sphk /* 75892108Sphk * If there's no object left, fill the page in the top 75992108Sphk * object with zeros. 76092108Sphk */ 76192108Sphk if (fs.object != fs.first_object) { 762126798Sphk vm_object_pip_wakeup(fs.object); 76392108Sphk VM_OBJECT_WUNLOCK(fs.object); 76492108Sphk 76592108Sphk fs.object = fs.first_object; 76692108Sphk fs.pindex = fs.first_pindex; 76792108Sphk fs.m = fs.first_m; 76892108Sphk VM_OBJECT_WLOCK(fs.object); 76992108Sphk } 77092108Sphk fs.first_m = NULL; 77192108Sphk 77292108Sphk /* 77392108Sphk * Zero the page if necessary and mark it valid. 77492108Sphk */ 77592108Sphk if ((fs.m->flags & PG_ZERO) == 0) { 77692108Sphk pmap_zero_page(fs.m); 77792108Sphk } else { 77892108Sphk PCPU_INC(cnt.v_ozfod); 77992108Sphk } 78092108Sphk PCPU_INC(cnt.v_zfod); 78192108Sphk fs.m->valid = VM_PAGE_BITS_ALL; 78292108Sphk /* Don't try to prefault neighboring pages. */ 78392108Sphk faultcount = 1; 78492108Sphk break; /* break to PAGE HAS BEEN FOUND */ 78592108Sphk } else { 78692108Sphk KASSERT(fs.object != next_object, 78792108Sphk ("object loop %p", next_object)); 78892108Sphk VM_OBJECT_WLOCK(next_object); 78992108Sphk vm_object_pip_add(next_object, 1); 79092108Sphk if (fs.object != fs.first_object) 79192108Sphk vm_object_pip_wakeup(fs.object); 79292108Sphk VM_OBJECT_WUNLOCK(fs.object); 79392108Sphk fs.object = next_object; 79492108Sphk } 79592108Sphk } 79695550Sphk 79792108Sphk vm_page_assert_xbusied(fs.m); 79892108Sphk 79992108Sphk /* 80092108Sphk * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock 80192108Sphk * is held.] 80292108Sphk */ 80392108Sphk 80492108Sphk /* 80592108Sphk * If the page is being written, but isn't already owned by the 80692108Sphk * top-level object, we have to copy it into a new page owned by the 80792108Sphk * top-level object. 80892108Sphk */ 80992108Sphk if (fs.object != fs.first_object) { 81092108Sphk /* 81192108Sphk * We only really need to copy if we want to write it. 812126798Sphk */ 813126798Sphk if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { 814204069Spjd /* 81592108Sphk * This allows pages to be virtually copied from a 81692108Sphk * backing_object into the first_object, where the 81792108Sphk * backing object has no other refs to it, and cannot 81892108Sphk * gain any more refs. Instead of a bcopy, we just 81992108Sphk * move the page from the backing object to the 82092108Sphk * first object. Note that we must mark the page 82192108Sphk * dirty in the first object so that it will go out 82292108Sphk * to swap when needed. 82392108Sphk */ 82492108Sphk is_first_object_locked = FALSE; 82592108Sphk if ( 82692108Sphk /* 82792108Sphk * Only one shadow object 82898066Sphk */ 82992108Sphk (fs.object->shadow_count == 1) && 83092108Sphk /* 83192108Sphk * No COW refs, except us 832126798Sphk */ 833126798Sphk (fs.object->ref_count == 1) && 83498066Sphk /* 83598066Sphk * No one else can look this object up 83698066Sphk */ 83798066Sphk (fs.object->handle == NULL) && 83898066Sphk /* 839112028Sphk * No other ways to look the object up 840110541Sphk */ 84192108Sphk ((fs.object->type == OBJT_DEFAULT) || 84292108Sphk (fs.object->type == OBJT_SWAP)) && 84392108Sphk (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && 844248674Smav /* 845248674Smav * We don't chase down the shadow chain 846248674Smav */ 847131820Sphk fs.object == fs.first_object->backing_object) { 84892108Sphk /* 84992108Sphk * get rid of the unnecessary page 85092108Sphk */ 85192108Sphk vm_page_lock(fs.first_m); 852125755Sphk vm_page_free(fs.first_m); 85392108Sphk vm_page_unlock(fs.first_m); 85492108Sphk /* 85592108Sphk * grab the page and put it into the 85692108Sphk * process'es object. The page is 85792108Sphk * automatically made dirty. 85892108Sphk */ 859125755Sphk if (vm_page_rename(fs.m, fs.first_object, 86092108Sphk fs.first_pindex)) { 86192108Sphk unlock_and_deallocate(&fs); 862332096Savg goto RetryFault; 863332096Savg } 864332096Savg#if VM_NRESERVLEVEL > 0 865332096Savg /* 866332096Savg * Rename the reservation. 86792108Sphk */ 86892108Sphk vm_reserv_rename(fs.m, fs.first_object, 869126798Sphk fs.object, OFF_TO_IDX( 870126798Sphk fs.first_object->backing_object_offset)); 87192108Sphk#endif 872127162Spjd vm_page_xbusy(fs.m); 873126798Sphk fs.first_m = fs.m; 874332096Savg fs.m = NULL; 87592108Sphk PCPU_INC(cnt.v_cow_optim); 876125755Sphk } else { 87792108Sphk /* 87892108Sphk * Oh, well, lets copy it. 87992108Sphk */ 88092108Sphk pmap_copy_page(fs.m, fs.first_m); 88192108Sphk fs.first_m->valid = VM_PAGE_BITS_ALL; 882124294Sphk if (wired && (fault_flags & 883332096Savg VM_FAULT_WIRE) == 0) { 88492108Sphk vm_page_lock(fs.first_m); 88592108Sphk vm_page_wire(fs.first_m); 88693248Sphk vm_page_unlock(fs.first_m); 88792108Sphk 88892108Sphk vm_page_lock(fs.m); 88992108Sphk vm_page_unwire(fs.m, FALSE); 890238886Smav vm_page_unlock(fs.m); 891131798Sphk } 892131798Sphk /* 89392108Sphk * We no longer need the old page or object. 89492108Sphk */ 895332096Savg release_page(&fs); 896332096Savg } 897332096Savg /* 898332096Savg * fs.object != fs.first_object due to above 899332096Savg * conditional 900332096Savg */ 901332096Savg vm_object_pip_wakeup(fs.object); 902332096Savg VM_OBJECT_WUNLOCK(fs.object); 903332096Savg /* 904332096Savg * Only use the new page below... 905332096Savg */ 906332096Savg fs.object = fs.first_object; 907332096Savg fs.pindex = fs.first_pindex; 908332096Savg fs.m = fs.first_m; 909332096Savg if (!is_first_object_locked) 910332096Savg VM_OBJECT_WLOCK(fs.object); 911332096Savg PCPU_INC(cnt.v_cow_faults); 912332096Savg curthread->td_cow++; 913332096Savg } else { 914332096Savg prot &= ~VM_PROT_WRITE; 915332096Savg } 91692108Sphk } 91792108Sphk 91892108Sphk /* 91992108Sphk * We must verify that the maps have not changed since our last 92092108Sphk * lookup. 92192108Sphk */ 92292108Sphk if (!fs.lookup_still_valid) { 92392108Sphk vm_object_t retry_object; 92492108Sphk vm_pindex_t retry_pindex; 92592108Sphk vm_prot_t retry_prot; 92692108Sphk 92792108Sphk if (!vm_map_trylock_read(fs.map)) { 92892108Sphk release_page(&fs); 929110759Sphk unlock_and_deallocate(&fs); 930332096Savg goto RetryFault; 931110759Sphk } 93292108Sphk fs.lookup_still_valid = TRUE; 933110759Sphk if (fs.map->timestamp != map_generation) { 93492108Sphk result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, 93592108Sphk &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); 936110759Sphk 93792108Sphk /* 93892108Sphk * If we don't need the page any longer, put it on the inactive 939110759Sphk * list (the easiest thing to do here). If no one needs it, 94092108Sphk * pageout will grab it eventually. 94192108Sphk */ 94292108Sphk if (result != KERN_SUCCESS) { 94392108Sphk release_page(&fs); 944332096Savg unlock_and_deallocate(&fs); 945332096Savg 946332096Savg /* 947332096Savg * If retry of map lookup would have blocked then 948332096Savg * retry fault from start. 949332096Savg */ 950332096Savg if (result == KERN_FAILURE) 951125802Sphk goto RetryFault; 952266031Sbdrewery return (result); 953332096Savg } 954266031Sbdrewery if ((retry_object != fs.first_object) || 955332096Savg (retry_pindex != fs.first_pindex)) { 956332096Savg release_page(&fs); 957332096Savg unlock_and_deallocate(&fs); 958332096Savg goto RetryFault; 959332096Savg } 960332096Savg 961332096Savg /* 962332096Savg * Check whether the protection has changed or the object has 963332096Savg * been copied while we left the map unlocked. Changing from 964332096Savg * read to write permission is OK - we leave the page 96592108Sphk * write-protected, and catch the write fault. Changing from 966112979Sphk * write to read permission means that we can't mark the page 967112979Sphk * write-enabled after all. 968152565Sjdp */ 969152565Sjdp prot &= retry_prot; 970112979Sphk } 971112979Sphk } 972112979Sphk /* 973152565Sjdp * If the page was filled by a pager, update the map entry's 974332096Savg * last read offset. Since the pager does not return the 975113937Sphk * actual set of pages that it read, this update is based on 976113937Sphk * the requested set. Typically, the requested and actual 977112979Sphk * sets are the same. 97892108Sphk * 97992108Sphk * XXX The following assignment modifies the map 98092108Sphk * without holding a write lock on it. 98192108Sphk */ 98292108Sphk if (hardfault) 98392108Sphk fs.entry->next_read = fs.pindex + faultcount - reqpage; 984134824Sphk 985134824Sphk vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE); 986134824Sphk vm_page_assert_xbusied(fs.m); 987248674Smav 988248674Smav /* 989248674Smav * Page must be completely valid or it is not fit to 99092108Sphk * map into user space. vm_pager_get_pages() ensures this. 99192108Sphk */ 99292108Sphk KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, 99392108Sphk ("vm_fault: page %p partially invalid", fs.m)); 99492108Sphk VM_OBJECT_WUNLOCK(fs.object); 995107953Sphk 99692108Sphk /* 99792108Sphk * Put this page into the physical map. We had to do the unlock above 99898066Sphk * because pmap_enter() may sleep. We don't put the page 99992108Sphk * back on the active queue until later so that the pageout daemon 100092108Sphk * won't find it (yet). 100192108Sphk */ 1002271238Ssmh pmap_enter(fs.map->pmap, vaddr, fs.m, prot, 1003271238Ssmh fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); 1004271238Ssmh if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 && 1005271238Ssmh wired == 0) 1006271238Ssmh vm_fault_prefault(&fs, vaddr, faultcount, reqpage); 1007271238Ssmh VM_OBJECT_WLOCK(fs.object); 1008271238Ssmh vm_page_lock(fs.m); 1009107953Sphk 101092108Sphk /* 101192108Sphk * If the page is not wired down, then put it where the pageout daemon 101298066Sphk * can find it. 101392108Sphk */ 101492108Sphk if ((fault_flags & VM_FAULT_WIRE) != 0) { 101592108Sphk KASSERT(wired, ("VM_FAULT_WIRE && !wired")); 1016187973Smarcel vm_page_wire(fs.m); 1017169282Spjd } else 1018169282Spjd vm_page_activate(fs.m); 1019169282Spjd if (m_hold != NULL) { 1020169282Spjd *m_hold = fs.m; 1021169282Spjd vm_page_hold(fs.m); 1022169282Spjd } 1023187973Smarcel vm_page_unlock(fs.m); 102492108Sphk vm_page_xunbusy(fs.m); 1025169282Spjd 102692108Sphk /* 102792108Sphk * Unlock everything, and return 102892108Sphk */ 1029169282Spjd unlock_and_deallocate(&fs); 1030169282Spjd if (hardfault) { 1031169282Spjd PCPU_INC(cnt.v_io_faults); 1032169282Spjd curthread->td_ru.ru_majflt++; 1033169282Spjd } else 1034169282Spjd curthread->td_ru.ru_minflt++; 1035169282Spjd 1036169282Spjd return (KERN_SUCCESS); 1037169282Spjd} 1038169282Spjd 103992108Sphk/* 1040169282Spjd * Speed up the reclamation of up to "distance" pages that precede the 1041169282Spjd * faulting pindex within the first object of the shadow chain. 1042169282Spjd */ 1043169282Spjdstatic void 104492108Sphkvm_fault_cache_behind(const struct faultstate *fs, int distance) 1045188054Smarcel{ 1046188054Smarcel vm_object_t first_object, object; 1047104195Sphk vm_page_t m, m_prev; 104892108Sphk vm_pindex_t pindex; 104992108Sphk 105092108Sphk object = fs->object; 105192108Sphk VM_OBJECT_ASSERT_WLOCKED(object); 1052126798Sphk first_object = fs->first_object; 105392108Sphk if (first_object != object) { 105492108Sphk if (!VM_OBJECT_TRYWLOCK(first_object)) { 105592108Sphk VM_OBJECT_WUNLOCK(object); 1056126798Sphk VM_OBJECT_WLOCK(first_object); 1057126798Sphk VM_OBJECT_WLOCK(object); 105892108Sphk } 105992108Sphk } 106092108Sphk /* Neither fictitious nor unmanaged pages can be cached. */ 106192108Sphk if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { 106292108Sphk if (fs->first_pindex < distance) 106392108Sphk pindex = 0; 106492108Sphk else 106592108Sphk pindex = fs->first_pindex - distance; 1066110517Sphk if (pindex < OFF_TO_IDX(fs->entry->offset)) 1067104701Sphk pindex = OFF_TO_IDX(fs->entry->offset); 1068104701Sphk m = first_object != object ? fs->first_m : fs->m; 1069104701Sphk vm_page_assert_xbusied(m); 107092108Sphk m_prev = vm_page_prev(m); 1071110523Sphk while ((m = m_prev) != NULL && m->pindex >= pindex && 1072110523Sphk m->valid == VM_PAGE_BITS_ALL) { 1073104701Sphk m_prev = vm_page_prev(m); 107492108Sphk if (vm_page_busied(m)) 107592108Sphk continue; 107692108Sphk vm_page_lock(m); 107792108Sphk if (m->hold_count == 0 && m->wire_count == 0) { 107892108Sphk pmap_remove_all(m); 107992108Sphk vm_page_aflag_clear(m, PGA_REFERENCED); 108092108Sphk if (m->dirty != 0) 1081114511Sphk vm_page_deactivate(m); 1082114511Sphk else 108392108Sphk vm_page_cache(m); 1084126798Sphk } 1085126798Sphk vm_page_unlock(m); 108692108Sphk } 1087238886Smav } 1088114511Sphk if (first_object != object) 1089114511Sphk VM_OBJECT_WUNLOCK(first_object); 1090114511Sphk} 1091114511Sphk 1092114511Sphk/* 1093114511Sphk * vm_fault_prefault provides a quick way of clustering 1094114511Sphk * pagefaults into a processes address space. It is a "cousin" 1095114511Sphk * of vm_map_pmap_enter, except it runs at page fault time instead 1096114511Sphk * of mmap time. 109792108Sphk */ 109892108Sphkstatic void 109992108Sphkvm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, 110092108Sphk int faultcount, int reqpage) 110192108Sphk{ 110292108Sphk pmap_t pmap; 110392108Sphk vm_map_entry_t entry; 110492108Sphk vm_object_t backing_object, lobject; 110592108Sphk vm_offset_t addr, starta; 110692108Sphk vm_pindex_t pindex; 110792108Sphk vm_page_t m; 110892108Sphk int backward, forward, i; 110992108Sphk 1110113929Sphk pmap = fs->map->pmap; 1111113929Sphk if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) 1112113929Sphk return; 1113113929Sphk 1114113929Sphk if (faultcount > 0) { 1115113929Sphk backward = reqpage; 1116113929Sphk forward = faultcount - reqpage - 1; 1117113929Sphk } else { 1118113929Sphk backward = PFBAK; 1119113929Sphk forward = PFFOR; 1120126798Sphk } 1121266679Sae entry = fs->entry; 1122266679Sae 1123113929Sphk starta = addra - backward * PAGE_SIZE; 1124113929Sphk if (starta < entry->start) { 1125238886Smav starta = entry->start; 1126113929Sphk } else if (starta > addra) { 1127238886Smav starta = 0; 1128113929Sphk } 1129113929Sphk 1130113929Sphk /* 1131113929Sphk * Generate the sequence of virtual addresses that are candidates for 1132113929Sphk * prefaulting in an outward spiral from the faulting virtual address, 1133113929Sphk * "addra". Specifically, the sequence is "addra - PAGE_SIZE", "addra 1134113929Sphk * + PAGE_SIZE", "addra - 2 * PAGE_SIZE", "addra + 2 * PAGE_SIZE", ... 113592108Sphk * If the candidate address doesn't have a backing physical page, then 113692108Sphk * the loop immediately terminates. 113792108Sphk */ 113892108Sphk for (i = 0; i < 2 * imax(backward, forward); i++) { 113992108Sphk addr = addra + ((i >> 1) + 1) * ((i & 1) == 0 ? -PAGE_SIZE : 114092108Sphk PAGE_SIZE); 1141126798Sphk if (addr > addra + forward * PAGE_SIZE) 1142126798Sphk addr = 0; 114392108Sphk 114492108Sphk if (addr < starta || addr >= entry->end) 114592108Sphk continue; 114692108Sphk 114792108Sphk if (!pmap_is_prefaultable(pmap, addr)) 114892108Sphk continue; 114992108Sphk 115092108Sphk pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 115192108Sphk lobject = entry->object.vm_object; 1152238886Smav VM_OBJECT_RLOCK(lobject); 115392108Sphk while ((m = vm_page_lookup(lobject, pindex)) == NULL && 1154113937Sphk lobject->type == OBJT_DEFAULT && 115592108Sphk (backing_object = lobject->backing_object) != NULL) { 115692108Sphk KASSERT((lobject->backing_object_offset & PAGE_MASK) == 1157238886Smav 0, ("vm_fault_prefault: unaligned object offset")); 1158238886Smav pindex += lobject->backing_object_offset >> PAGE_SHIFT; 1159238886Smav VM_OBJECT_RLOCK(backing_object); 1160238886Smav VM_OBJECT_RUNLOCK(lobject); 1161238886Smav lobject = backing_object; 1162238886Smav } 1163238886Smav if (m == NULL) { 1164238886Smav VM_OBJECT_RUNLOCK(lobject); 1165238886Smav break; 1166238886Smav } 1167238886Smav if (m->valid == VM_PAGE_BITS_ALL && 1168238886Smav (m->flags & PG_FICTITIOUS) == 0) 1169238886Smav pmap_enter_quick(pmap, addr, m, entry->protection); 1170238886Smav VM_OBJECT_RUNLOCK(lobject); 1171238886Smav } 1172238886Smav} 1173238886Smav 1174238886Smav/* 1175238886Smav * Hold each of the physical pages that are mapped by the specified range of 1176238886Smav * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid 1177238886Smav * and allow the specified types of access, "prot". If all of the implied 1178238886Smav * pages are successfully held, then the number of held pages is returned 1179238886Smav * together with pointers to those pages in the array "ma". However, if any 118094284Sphk * of the pages cannot be held, -1 is returned. 1181238886Smav */ 1182238886Smavint 1183238886Smavvm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, 1184238886Smav vm_prot_t prot, vm_page_t *ma, int max_count) 1185238886Smav{ 1186238886Smav vm_offset_t end, va; 1187238886Smav vm_page_t *mp; 1188238886Smav int count; 1189238886Smav boolean_t pmap_failed; 1190238886Smav 1191238886Smav if (len == 0) 1192238886Smav return (0); 1193238886Smav end = round_page(addr + len); 1194238886Smav addr = trunc_page(addr); 1195238886Smav 1196238886Smav /* 1197238886Smav * Check for illegal addresses. 1198238886Smav */ 1199238886Smav if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) 1200238886Smav return (-1); 120194284Sphk 120294284Sphk if (atop(end - addr) > max_count) 120394284Sphk panic("vm_fault_quick_hold_pages: count > max_count"); 120494284Sphk count = atop(end - addr); 120594284Sphk 120694284Sphk /* 120794284Sphk * Most likely, the physical pages are resident in the pmap, so it is 120894284Sphk * faster to try pmap_extract_and_hold() first. 120994284Sphk */ 121094284Sphk pmap_failed = FALSE; 121194284Sphk for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { 121294284Sphk *mp = pmap_extract_and_hold(map->pmap, va, prot); 121395310Sphk if (*mp == NULL) 1214221101Smav pmap_failed = TRUE; 1215221101Smav else if ((prot & VM_PROT_WRITE) != 0 && 1216221101Smav (*mp)->dirty != VM_PAGE_BITS_ALL) { 1217221101Smav /* 1218221101Smav * Explicitly dirty the physical page. Otherwise, the 1219221101Smav * caller's changes may go unnoticed because they are 1220221101Smav * performed through an unmanaged mapping or by a DMA 1221221101Smav * operation. 1222221101Smav * 1223221101Smav * The object lock is not held here. 1224221101Smav * See vm_page_clear_dirty_mask(). 1225221101Smav */ 1226221101Smav vm_page_dirty(*mp); 1227221101Smav } 1228221101Smav } 1229221101Smav if (pmap_failed) { 1230221101Smav /* 1231221101Smav * One or more pages could not be held by the pmap. Either no 1232221101Smav * page was mapped at the specified virtual address or that 1233221101Smav * mapping had insufficient permissions. Attempt to fault in 1234221101Smav * and hold these pages. 1235221101Smav */ 1236221101Smav for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) 1237221101Smav if (*mp == NULL && vm_fault_hold(map, va, prot, 1238221101Smav VM_FAULT_NORMAL, mp) != KERN_SUCCESS) 1239221101Smav goto error; 1240221101Smav } 1241221101Smav return (count); 1242221101Smaverror: 1243221101Smav for (mp = ma; mp < ma + count; mp++) 1244221101Smav if (*mp != NULL) { 1245221101Smav vm_page_lock(*mp); 1246221101Smav vm_page_unhold(*mp); 1247221101Smav vm_page_unlock(*mp); 1248221101Smav } 1249221101Smav return (-1); 1250221101Smav} 1251162326Spjd 125295310Sphk/* 1253206859Sjh * Routine: 1254206859Sjh * vm_fault_copy_entry 1255206859Sjh * Function: 1256206859Sjh * Create new shadow object backing dst_entry with private copy of 1257206859Sjh * all underlying pages. When src_entry is equal to dst_entry, 1258122762Sphk * function implements COW for wired-down map entry. Otherwise, 1259126798Sphk * it forks wired entry into dst_map. 1260122762Sphk * 1261122762Sphk * In/out conditions: 1262122762Sphk * The source and destination maps must be locked for write. 1263122762Sphk * The source map entry must be wired down (or be a sharing map 1264122762Sphk * entry corresponding to a main map entry that is wired down). 1265122762Sphk */ 1266122762Sphkvoid 1267206859Sjhvm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, 1268206859Sjh vm_map_entry_t dst_entry, vm_map_entry_t src_entry, 1269206859Sjh vm_ooffset_t *fork_charge) 1270206859Sjh{ 1271195257Strasz vm_object_t backing_object, dst_object, object, src_object; 1272122762Sphk vm_pindex_t dst_pindex, pindex, src_pindex; 1273122762Sphk vm_prot_t access, prot; 1274122762Sphk vm_offset_t vaddr; 1275122762Sphk vm_page_t dst_m; 1276122762Sphk vm_page_t src_m; 1277126798Sphk boolean_t upgrade; 1278122762Sphk 1279122762Sphk#ifdef lint 1280126798Sphk src_map++; 1281122762Sphk#endif /* lint */ 1282122762Sphk 1283126798Sphk upgrade = src_entry == dst_entry; 1284122762Sphk access = prot = dst_entry->protection; 1285122762Sphk 1286122762Sphk src_object = src_entry->object.vm_object; 1287122762Sphk src_pindex = OFF_TO_IDX(src_entry->offset); 1288126798Sphk 1289162326Spjd if (upgrade && (dst_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { 1290162326Spjd dst_object = src_object; 1291162326Spjd vm_object_reference(dst_object); 1292162326Spjd } else { 1293192803Slulf /* 1294192803Slulf * Create the top-level object for the destination entry. (Doesn't 1295162326Spjd * actually shadow anything - we copy the pages directly.) 1296162326Spjd */ 1297162326Spjd dst_object = vm_object_allocate(OBJT_DEFAULT, 1298192803Slulf OFF_TO_IDX(dst_entry->end - dst_entry->start)); 1299162326Spjd#if VM_NRESERVLEVEL > 0 1300162326Spjd dst_object->flags |= OBJ_COLORED; 1301162326Spjd dst_object->pg_color = atop(dst_entry->start); 1302162326Spjd#endif 1303162326Spjd } 1304162326Spjd 1305162326Spjd VM_OBJECT_WLOCK(dst_object); 1306162326Spjd KASSERT(upgrade || dst_entry->object.vm_object == NULL, 1307162326Spjd ("vm_fault_copy_entry: vm_object not NULL")); 1308162326Spjd if (src_object != dst_object) { 1309162326Spjd dst_entry->object.vm_object = dst_object; 1310162326Spjd dst_entry->offset = 0; 1311162326Spjd dst_object->charge = dst_entry->end - dst_entry->start; 1312162326Spjd } 1313162326Spjd if (fork_charge != NULL) { 1314162326Spjd KASSERT(dst_entry->cred == NULL, 1315162326Spjd ("vm_fault_copy_entry: leaked swp charge")); 1316162326Spjd dst_object->cred = curthread->td_ucred; 1317162326Spjd crhold(dst_object->cred); 1318162326Spjd *fork_charge += dst_object->charge; 1319162326Spjd } else if (dst_object->cred == NULL) { 1320162326Spjd KASSERT(dst_entry->cred != NULL, ("no cred for entry %p", 1321162326Spjd dst_entry)); 1322162326Spjd dst_object->cred = dst_entry->cred; 1323162326Spjd dst_entry->cred = NULL; 1324162326Spjd } 1325162326Spjd 1326162326Spjd /* 1327162326Spjd * If not an upgrade, then enter the mappings in the pmap as 1328162326Spjd * read and/or execute accesses. Otherwise, enter them as 1329162326Spjd * write accesses. 1330162326Spjd * 1331162326Spjd * A writeable large page mapping is only created if all of 1332162326Spjd * the constituent small page mappings are modified. Marking 1333162326Spjd * PTEs as modified on inception allows promotion to happen 1334162326Spjd * without taking potentially large number of soft faults. 1335162326Spjd */ 1336162326Spjd if (!upgrade) 1337162326Spjd access &= ~VM_PROT_WRITE; 1338162326Spjd 1339162326Spjd /* 1340162326Spjd * Loop through all of the virtual pages within the entry's 1341162326Spjd * range, copying each page from the source object to the 1342162326Spjd * destination object. Since the source is wired, those pages 1343162326Spjd * must exist. In contrast, the destination is pageable. 1344162326Spjd * Since the destination object does share any backing storage 1345162326Spjd * with the source object, all of its pages must be dirtied, 1346162326Spjd * regardless of whether they can be written. 1347162326Spjd */ 1348162326Spjd for (vaddr = dst_entry->start, dst_pindex = 0; 1349162326Spjd vaddr < dst_entry->end; 1350162326Spjd vaddr += PAGE_SIZE, dst_pindex++) { 1351162326Spjdagain: 1352162326Spjd /* 1353162326Spjd * Find the page in the source object, and copy it in. 1354238886Smav * Because the source is wired down, the page will be 1355162326Spjd * in memory. 1356162326Spjd */ 1357162326Spjd if (src_object != dst_object) 1358162326Spjd VM_OBJECT_RLOCK(src_object); 1359162326Spjd object = src_object; 1360162326Spjd pindex = src_pindex + dst_pindex; 1361238886Smav while ((src_m = vm_page_lookup(object, pindex)) == NULL && 1362238886Smav (backing_object = object->backing_object) != NULL) { 1363192803Slulf /* 1364162326Spjd * Unless the source mapping is read-only or 1365162326Spjd * it is presently being upgraded from 1366162326Spjd * read-only, the first object in the shadow 1367162326Spjd * chain should provide all of the pages. In 1368162326Spjd * other words, this loop body should never be 1369162326Spjd * executed when the source mapping is already 1370162326Spjd * read/write. 1371162326Spjd */ 1372162326Spjd KASSERT((src_entry->protection & VM_PROT_WRITE) == 0 || 1373162326Spjd upgrade, 1374162326Spjd ("vm_fault_copy_entry: main object missing page")); 1375162326Spjd 1376162326Spjd VM_OBJECT_RLOCK(backing_object); 1377162326Spjd pindex += OFF_TO_IDX(object->backing_object_offset); 1378162326Spjd if (object != dst_object) 1379162326Spjd VM_OBJECT_RUNLOCK(object); 1380162326Spjd object = backing_object; 1381162326Spjd } 1382162326Spjd KASSERT(src_m != NULL, ("vm_fault_copy_entry: page missing")); 1383162326Spjd 1384162326Spjd if (object != dst_object) { 1385162326Spjd /* 1386162326Spjd * Allocate a page in the destination object. 1387162326Spjd */ 1388162326Spjd dst_m = vm_page_alloc(dst_object, (src_object == 1389162326Spjd dst_object ? src_pindex : 0) + dst_pindex, 1390162326Spjd VM_ALLOC_NORMAL); 1391162326Spjd if (dst_m == NULL) { 1392162326Spjd VM_OBJECT_WUNLOCK(dst_object); 1393162326Spjd VM_OBJECT_RUNLOCK(object); 1394162326Spjd VM_WAIT; 1395162326Spjd VM_OBJECT_WLOCK(dst_object); 1396192803Slulf goto again; 1397162326Spjd } 1398162326Spjd pmap_copy_page(src_m, dst_m); 1399162326Spjd VM_OBJECT_RUNLOCK(object); 1400192803Slulf dst_m->valid = VM_PAGE_BITS_ALL; 1401162326Spjd dst_m->dirty = VM_PAGE_BITS_ALL; 1402162326Spjd } else { 1403179094Spjd dst_m = src_m; 1404162326Spjd if (vm_page_sleep_if_busy(dst_m, "fltupg")) 1405179094Spjd goto again; 1406179094Spjd vm_page_xbusy(dst_m); 1407179094Spjd KASSERT(dst_m->valid == VM_PAGE_BITS_ALL, 1408162326Spjd ("invalid dst page %p", dst_m)); 1409162326Spjd } 1410162326Spjd VM_OBJECT_WUNLOCK(dst_object); 1411162326Spjd 1412162326Spjd /* 1413162326Spjd * Enter it in the pmap. If a wired, copy-on-write 1414162326Spjd * mapping is being replaced by a write-enabled 1415162326Spjd * mapping, then wire that new mapping. 1416162326Spjd */ 1417162326Spjd pmap_enter(dst_map->pmap, vaddr, dst_m, prot, 1418162326Spjd access | (upgrade ? PMAP_ENTER_WIRED : 0), 0); 1419162326Spjd 1420162326Spjd /* 1421162326Spjd * Mark it no longer busy, and put it on the active list. 1422162326Spjd */ 1423162326Spjd VM_OBJECT_WLOCK(dst_object); 1424162326Spjd 1425162326Spjd if (upgrade) { 1426162326Spjd if (src_m != dst_m) { 1427162326Spjd vm_page_lock(src_m); 1428162326Spjd vm_page_unwire(src_m, 0); 1429162326Spjd vm_page_unlock(src_m); 1430162326Spjd vm_page_lock(dst_m); 1431192803Slulf vm_page_wire(dst_m); 1432162326Spjd vm_page_unlock(dst_m); 1433162326Spjd } else { 1434162326Spjd KASSERT(dst_m->wire_count > 0, 1435192803Slulf ("dst_m %p is not wired", dst_m)); 1436162326Spjd } 1437162326Spjd } else { 1438179094Spjd vm_page_lock(dst_m); 1439162326Spjd vm_page_activate(dst_m); 1440179094Spjd vm_page_unlock(dst_m); 1441179094Spjd } 1442179094Spjd vm_page_xunbusy(dst_m); 1443162326Spjd } 1444162326Spjd VM_OBJECT_WUNLOCK(dst_object); 1445179094Spjd if (upgrade) { 1446162326Spjd dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); 1447179094Spjd vm_object_deallocate(src_object); 1448179094Spjd } 1449179094Spjd} 1450162326Spjd 1451162326Spjd 1452162326Spjd/* 1453162326Spjd * This routine checks around the requested page for other pages that 1454162326Spjd * might be able to be faulted in. This routine brackets the viable 1455162326Spjd * pages for the pages to be paged in. 1456162326Spjd * 1457162326Spjd * Inputs: 1458192803Slulf * m, rbehind, rahead 1459179094Spjd * 1460162326Spjd * Outputs: 1461179094Spjd * marray (array of vm_page_t), reqpage (index of requested page) 1462179094Spjd * 1463179094Spjd * Return value: 1464162326Spjd * number of pages in marray 1465162326Spjd */ 1466162326Spjdstatic int 1467162326Spjdvm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) 1468162326Spjd vm_page_t m; 1469162326Spjd int rbehind; 1470162326Spjd int rahead; 1471162326Spjd vm_page_t *marray; 1472162326Spjd int *reqpage; 1473162326Spjd{ 1474162326Spjd int i,j; 1475162326Spjd vm_object_t object; 1476162326Spjd vm_pindex_t pindex, startpindex, endpindex, tpindex; 1477192803Slulf vm_page_t rtm; 1478179094Spjd int cbehind, cahead; 1479179094Spjd 1480162326Spjd VM_OBJECT_ASSERT_WLOCKED(m->object); 1481162326Spjd 1482162326Spjd object = m->object; 1483162326Spjd pindex = m->pindex; 1484162326Spjd cbehind = cahead = 0; 1485162326Spjd 1486162326Spjd /* 1487162326Spjd * if the requested page is not available, then give up now 1488162326Spjd */ 1489162326Spjd if (!vm_pager_has_page(object, pindex, &cbehind, &cahead)) { 1490162326Spjd return 0; 1491162326Spjd } 1492162326Spjd 1493162326Spjd if ((cbehind == 0) && (cahead == 0)) { 1494162326Spjd *reqpage = 0; 1495162326Spjd marray[0] = m; 1496192803Slulf return 1; 1497162326Spjd } 1498162326Spjd 1499162326Spjd if (rahead > cahead) { 1500162326Spjd rahead = cahead; 1501162326Spjd } 1502192797Slulf 1503192797Slulf if (rbehind > cbehind) { 1504192797Slulf rbehind = cbehind; 1505192803Slulf } 1506192797Slulf 1507192803Slulf /* 1508192803Slulf * scan backward for the read behind pages -- in memory 1509192803Slulf */ 1510192803Slulf if (pindex > 0) { 1511192803Slulf if (rbehind > pindex) { 1512192803Slulf rbehind = pindex; 1513192803Slulf startpindex = 0; 1514192803Slulf } else { 1515192803Slulf startpindex = pindex - rbehind; 1516192797Slulf } 1517192803Slulf 1518192797Slulf if ((rtm = TAILQ_PREV(m, pglist, listq)) != NULL && 1519192797Slulf rtm->pindex >= startpindex) 1520192797Slulf startpindex = rtm->pindex + 1; 1521192797Slulf 1522192797Slulf /* tpindex is unsigned; beware of numeric underflow. */ 1523192797Slulf for (i = 0, tpindex = pindex - 1; tpindex >= startpindex && 1524192797Slulf tpindex < pindex; i++, tpindex--) { 1525192797Slulf 1526192803Slulf rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | 1527192797Slulf VM_ALLOC_IFNOTCACHED); 1528192803Slulf if (rtm == NULL) { 1529192797Slulf /* 1530192797Slulf * Shift the allocated pages to the 1531192797Slulf * beginning of the array. 1532192803Slulf */ 1533192797Slulf for (j = 0; j < i; j++) { 1534192797Slulf marray[j] = marray[j + tpindex + 1 - 1535192797Slulf startpindex]; 1536192803Slulf } 1537192803Slulf break; 1538192797Slulf } 1539192797Slulf 1540192797Slulf marray[tpindex - startpindex] = rtm; 1541192797Slulf } 1542192797Slulf } else { 1543192797Slulf startpindex = 0; 1544192797Slulf i = 0; 1545192797Slulf } 1546192797Slulf 1547192797Slulf marray[i] = m; 1548192797Slulf /* page offset of the required page */ 1549192803Slulf *reqpage = i; 1550192797Slulf 1551192797Slulf tpindex = pindex + 1; 1552192803Slulf i++; 1553192803Slulf 1554192808Slulf /* 1555192808Slulf * scan forward for the read ahead pages 1556192803Slulf */ 1557192803Slulf endpindex = tpindex + rahead; 1558192808Slulf if ((rtm = TAILQ_NEXT(m, listq)) != NULL && rtm->pindex < endpindex) 1559192803Slulf endpindex = rtm->pindex; 1560192803Slulf if (endpindex > object->size) 1561192803Slulf endpindex = object->size; 1562192803Slulf 1563192803Slulf for (; tpindex < endpindex; i++, tpindex++) { 1564192803Slulf 1565192803Slulf rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | 1566192803Slulf VM_ALLOC_IFNOTCACHED); 1567192803Slulf if (rtm == NULL) { 1568192803Slulf break; 1569192797Slulf } 1570192797Slulf 1571192797Slulf marray[i] = rtm; 1572162326Spjd } 1573162326Spjd 1574162326Spjd /* return number of pages */ 1575162326Spjd return i; 1576162326Spjd} 1577 1578/* 1579 * Block entry into the machine-independent layer's page fault handler by 1580 * the calling thread. Subsequent calls to vm_fault() by that thread will 1581 * return KERN_PROTECTION_FAILURE. Enable machine-dependent handling of 1582 * spurious page faults. 1583 */ 1584int 1585vm_fault_disable_pagefaults(void) 1586{ 1587 1588 return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR)); 1589} 1590 1591void 1592vm_fault_enable_pagefaults(int save) 1593{ 1594 1595 curthread_pflags_restore(save); 1596} 1597