pmap-v6.c revision 269072
1/* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */ 2/*- 3 * Copyright 2011 Semihalf 4 * Copyright 2004 Olivier Houchard. 5 * Copyright 2003 Wasabi Systems, Inc. 6 * All rights reserved. 7 * 8 * Written by Steve C. Woodford for Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project by 21 * Wasabi Systems, Inc. 22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 23 * or promote products derived from this software without specific prior 24 * written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 * 38 * From: FreeBSD: src/sys/arm/arm/pmap.c,v 1.113 2009/07/24 13:50:29 39 */ 40 41/*- 42 * Copyright (c) 2002-2003 Wasabi Systems, Inc. 43 * Copyright (c) 2001 Richard Earnshaw 44 * Copyright (c) 2001-2002 Christopher Gilbert 45 * All rights reserved. 46 * 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. The name of the company nor the name of the author may be used to 53 * endorse or promote products derived from this software without specific 54 * prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 57 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 58 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 59 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 */ 68/*- 69 * Copyright (c) 1999 The NetBSD Foundation, Inc. 70 * All rights reserved. 71 * 72 * This code is derived from software contributed to The NetBSD Foundation 73 * by Charles M. Hannum. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 84 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 85 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 86 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 87 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 88 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 89 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 90 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 91 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 92 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 93 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 94 * POSSIBILITY OF SUCH DAMAGE. 95 */ 96 97/*- 98 * Copyright (c) 1994-1998 Mark Brinicombe. 99 * Copyright (c) 1994 Brini. 100 * All rights reserved. 101 * 102 * This code is derived from software written for Brini by Mark Brinicombe 103 * 104 * Redistribution and use in source and binary forms, with or without 105 * modification, are permitted provided that the following conditions 106 * are met: 107 * 1. Redistributions of source code must retain the above copyright 108 * notice, this list of conditions and the following disclaimer. 109 * 2. Redistributions in binary form must reproduce the above copyright 110 * notice, this list of conditions and the following disclaimer in the 111 * documentation and/or other materials provided with the distribution. 112 * 3. All advertising materials mentioning features or use of this software 113 * must display the following acknowledgement: 114 * This product includes software developed by Mark Brinicombe. 115 * 4. The name of the author may not be used to endorse or promote products 116 * derived from this software without specific prior written permission. 117 * 118 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 119 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 120 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 121 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 122 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 123 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 124 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 125 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 126 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 127 * 128 * RiscBSD kernel project 129 * 130 * pmap.c 131 * 132 * Machine dependant vm stuff 133 * 134 * Created : 20/09/94 135 */ 136 137/* 138 * Special compilation symbols 139 * PMAP_DEBUG - Build in pmap_debug_level code 140 * 141 * Note that pmap_mapdev() and pmap_unmapdev() are implemented in arm/devmap.c 142*/ 143/* Include header files */ 144 145#include "opt_vm.h" 146#include "opt_pmap.h" 147 148#include <sys/cdefs.h> 149__FBSDID("$FreeBSD: stable/10/sys/arm/arm/pmap-v6.c 269072 2014-07-24 16:29:44Z kib $"); 150#include <sys/param.h> 151#include <sys/systm.h> 152#include <sys/kernel.h> 153#include <sys/ktr.h> 154#include <sys/lock.h> 155#include <sys/proc.h> 156#include <sys/malloc.h> 157#include <sys/msgbuf.h> 158#include <sys/mutex.h> 159#include <sys/vmmeter.h> 160#include <sys/mman.h> 161#include <sys/rwlock.h> 162#include <sys/smp.h> 163#include <sys/sched.h> 164#include <sys/sysctl.h> 165 166#include <vm/vm.h> 167#include <vm/vm_param.h> 168#include <vm/uma.h> 169#include <vm/pmap.h> 170#include <vm/vm_kern.h> 171#include <vm/vm_object.h> 172#include <vm/vm_map.h> 173#include <vm/vm_page.h> 174#include <vm/vm_pageout.h> 175#include <vm/vm_extern.h> 176#include <vm/vm_reserv.h> 177 178#include <machine/md_var.h> 179#include <machine/cpu.h> 180#include <machine/cpufunc.h> 181#include <machine/pcb.h> 182 183#ifdef DEBUG 184extern int last_fault_code; 185#endif 186 187#ifdef PMAP_DEBUG 188#define PDEBUG(_lev_,_stat_) \ 189 if (pmap_debug_level >= (_lev_)) \ 190 ((_stat_)) 191#define dprintf printf 192 193int pmap_debug_level = 0; 194#define PMAP_INLINE 195#else /* PMAP_DEBUG */ 196#define PDEBUG(_lev_,_stat_) /* Nothing */ 197#define dprintf(x, arg...) 198#define PMAP_INLINE __inline 199#endif /* PMAP_DEBUG */ 200 201#ifdef PV_STATS 202#define PV_STAT(x) do { x ; } while (0) 203#else 204#define PV_STAT(x) do { } while (0) 205#endif 206 207#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 208 209#ifdef ARM_L2_PIPT 210#define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((pa), (size)) 211#define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((pa), (size)) 212#else 213#define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((va), (size)) 214#define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((va), (size)) 215#endif 216 217extern struct pv_addr systempage; 218 219/* 220 * Internal function prototypes 221 */ 222 223static PMAP_INLINE 224struct pv_entry *pmap_find_pv(struct md_page *, pmap_t, vm_offset_t); 225static void pmap_free_pv_chunk(struct pv_chunk *pc); 226static void pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv); 227static pv_entry_t pmap_get_pv_entry(pmap_t pmap, boolean_t try); 228static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); 229static boolean_t pmap_pv_insert_section(pmap_t, vm_offset_t, 230 vm_paddr_t); 231static struct pv_entry *pmap_remove_pv(struct vm_page *, pmap_t, vm_offset_t); 232static int pmap_pvh_wired_mappings(struct md_page *, int); 233 234static void pmap_enter_locked(pmap_t, vm_offset_t, vm_prot_t, 235 vm_page_t, vm_prot_t, boolean_t, int); 236static vm_paddr_t pmap_extract_locked(pmap_t pmap, vm_offset_t va); 237static void pmap_alloc_l1(pmap_t); 238static void pmap_free_l1(pmap_t); 239 240static void pmap_map_section(pmap_t, vm_offset_t, vm_offset_t, 241 vm_prot_t, boolean_t); 242static void pmap_promote_section(pmap_t, vm_offset_t); 243static boolean_t pmap_demote_section(pmap_t, vm_offset_t); 244static boolean_t pmap_enter_section(pmap_t, vm_offset_t, vm_page_t, 245 vm_prot_t); 246static void pmap_remove_section(pmap_t, vm_offset_t); 247 248static int pmap_clearbit(struct vm_page *, u_int); 249 250static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t); 251static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t); 252static void pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int); 253static vm_offset_t kernel_pt_lookup(vm_paddr_t); 254 255static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); 256 257vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 258vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 259vm_offset_t pmap_curmaxkvaddr; 260vm_paddr_t kernel_l1pa; 261 262vm_offset_t kernel_vm_end = 0; 263 264vm_offset_t vm_max_kernel_address; 265 266struct pmap kernel_pmap_store; 267 268/* 269 * Resources for quickly copying and zeroing pages using virtual address space 270 * and page table entries that are pre-allocated per-CPU by pmap_init(). 271 */ 272struct czpages { 273 struct mtx lock; 274 pt_entry_t *srcptep; 275 pt_entry_t *dstptep; 276 vm_offset_t srcva; 277 vm_offset_t dstva; 278}; 279static struct czpages cpu_czpages[MAXCPU]; 280 281static void pmap_init_l1(struct l1_ttable *, pd_entry_t *); 282/* 283 * These routines are called when the CPU type is identified to set up 284 * the PTE prototypes, cache modes, etc. 285 * 286 * The variables are always here, just in case LKMs need to reference 287 * them (though, they shouldn't). 288 */ 289static void pmap_set_prot(pt_entry_t *pte, vm_prot_t prot, uint8_t user); 290pt_entry_t pte_l1_s_cache_mode; 291pt_entry_t pte_l1_s_cache_mode_pt; 292 293pt_entry_t pte_l2_l_cache_mode; 294pt_entry_t pte_l2_l_cache_mode_pt; 295 296pt_entry_t pte_l2_s_cache_mode; 297pt_entry_t pte_l2_s_cache_mode_pt; 298 299struct msgbuf *msgbufp = 0; 300 301/* 302 * Crashdump maps. 303 */ 304static caddr_t crashdumpmap; 305 306extern void bcopy_page(vm_offset_t, vm_offset_t); 307extern void bzero_page(vm_offset_t); 308 309char *_tmppt; 310 311/* 312 * Metadata for L1 translation tables. 313 */ 314struct l1_ttable { 315 /* Entry on the L1 Table list */ 316 SLIST_ENTRY(l1_ttable) l1_link; 317 318 /* Entry on the L1 Least Recently Used list */ 319 TAILQ_ENTRY(l1_ttable) l1_lru; 320 321 /* Track how many domains are allocated from this L1 */ 322 volatile u_int l1_domain_use_count; 323 324 /* 325 * A free-list of domain numbers for this L1. 326 * We avoid using ffs() and a bitmap to track domains since ffs() 327 * is slow on ARM. 328 */ 329 u_int8_t l1_domain_first; 330 u_int8_t l1_domain_free[PMAP_DOMAINS]; 331 332 /* Physical address of this L1 page table */ 333 vm_paddr_t l1_physaddr; 334 335 /* KVA of this L1 page table */ 336 pd_entry_t *l1_kva; 337}; 338 339/* 340 * Convert a virtual address into its L1 table index. That is, the 341 * index used to locate the L2 descriptor table pointer in an L1 table. 342 * This is basically used to index l1->l1_kva[]. 343 * 344 * Each L2 descriptor table represents 1MB of VA space. 345 */ 346#define L1_IDX(va) (((vm_offset_t)(va)) >> L1_S_SHIFT) 347 348/* 349 * L1 Page Tables are tracked using a Least Recently Used list. 350 * - New L1s are allocated from the HEAD. 351 * - Freed L1s are added to the TAIl. 352 * - Recently accessed L1s (where an 'access' is some change to one of 353 * the userland pmaps which owns this L1) are moved to the TAIL. 354 */ 355static TAILQ_HEAD(, l1_ttable) l1_lru_list; 356/* 357 * A list of all L1 tables 358 */ 359static SLIST_HEAD(, l1_ttable) l1_list; 360static struct mtx l1_lru_lock; 361 362/* 363 * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots. 364 * 365 * This is normally 16MB worth L2 page descriptors for any given pmap. 366 * Reference counts are maintained for L2 descriptors so they can be 367 * freed when empty. 368 */ 369struct l2_dtable { 370 /* The number of L2 page descriptors allocated to this l2_dtable */ 371 u_int l2_occupancy; 372 373 /* List of L2 page descriptors */ 374 struct l2_bucket { 375 pt_entry_t *l2b_kva; /* KVA of L2 Descriptor Table */ 376 vm_paddr_t l2b_phys; /* Physical address of same */ 377 u_short l2b_l1idx; /* This L2 table's L1 index */ 378 u_short l2b_occupancy; /* How many active descriptors */ 379 } l2_bucket[L2_BUCKET_SIZE]; 380}; 381 382/* pmap_kenter_internal flags */ 383#define KENTER_CACHE 0x1 384#define KENTER_USER 0x2 385 386/* 387 * Given an L1 table index, calculate the corresponding l2_dtable index 388 * and bucket index within the l2_dtable. 389 */ 390#define L2_IDX(l1idx) (((l1idx) >> L2_BUCKET_LOG2) & \ 391 (L2_SIZE - 1)) 392#define L2_BUCKET(l1idx) ((l1idx) & (L2_BUCKET_SIZE - 1)) 393 394/* 395 * Given a virtual address, this macro returns the 396 * virtual address required to drop into the next L2 bucket. 397 */ 398#define L2_NEXT_BUCKET(va) (((va) & L1_S_FRAME) + L1_S_SIZE) 399 400/* 401 * We try to map the page tables write-through, if possible. However, not 402 * all CPUs have a write-through cache mode, so on those we have to sync 403 * the cache when we frob page tables. 404 * 405 * We try to evaluate this at compile time, if possible. However, it's 406 * not always possible to do that, hence this run-time var. 407 */ 408int pmap_needs_pte_sync; 409 410/* 411 * Macro to determine if a mapping might be resident in the 412 * instruction cache and/or TLB 413 */ 414#define PTE_BEEN_EXECD(pte) (L2_S_EXECUTABLE(pte) && L2_S_REFERENCED(pte)) 415 416/* 417 * Macro to determine if a mapping might be resident in the 418 * data cache and/or TLB 419 */ 420#define PTE_BEEN_REFD(pte) (L2_S_REFERENCED(pte)) 421 422#ifndef PMAP_SHPGPERPROC 423#define PMAP_SHPGPERPROC 200 424#endif 425 426#define pmap_is_current(pm) ((pm) == pmap_kernel() || \ 427 curproc->p_vmspace->vm_map.pmap == (pm)) 428 429/* 430 * Data for the pv entry allocation mechanism 431 */ 432static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 433static int pv_entry_count, pv_entry_max, pv_entry_high_water; 434static struct md_page *pv_table; 435static int shpgperproc = PMAP_SHPGPERPROC; 436 437struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 438int pv_maxchunks; /* How many chunks we have KVA for */ 439vm_offset_t pv_vafree; /* Freelist stored in the PTE */ 440 441static __inline struct pv_chunk * 442pv_to_chunk(pv_entry_t pv) 443{ 444 445 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 446} 447 448#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 449 450CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 451CTASSERT(_NPCM == 8); 452CTASSERT(_NPCPV == 252); 453 454#define PC_FREE0_6 0xfffffffful /* Free values for index 0 through 6 */ 455#define PC_FREE7 0x0ffffffful /* Free values for index 7 */ 456 457static const uint32_t pc_freemask[_NPCM] = { 458 PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, 459 PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, 460 PC_FREE0_6, PC_FREE7 461}; 462 463static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 464 465/* Superpages utilization enabled = 1 / disabled = 0 */ 466static int sp_enabled = 0; 467SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN, &sp_enabled, 0, 468 "Are large page mappings enabled?"); 469 470SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 471 "Current number of pv entries"); 472 473#ifdef PV_STATS 474static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 475 476SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 477 "Current number of pv entry chunks"); 478SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 479 "Current number of pv entry chunks allocated"); 480SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 481 "Current number of pv entry chunks frees"); 482SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 483 "Number of times tried to get a chunk page but failed."); 484 485static long pv_entry_frees, pv_entry_allocs; 486static int pv_entry_spare; 487 488SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 489 "Current number of pv entry frees"); 490SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 491 "Current number of pv entry allocs"); 492SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 493 "Current number of spare pv entries"); 494#endif 495 496uma_zone_t l2zone; 497static uma_zone_t l2table_zone; 498static vm_offset_t pmap_kernel_l2dtable_kva; 499static vm_offset_t pmap_kernel_l2ptp_kva; 500static vm_paddr_t pmap_kernel_l2ptp_phys; 501static struct rwlock pvh_global_lock; 502 503int l1_mem_types[] = { 504 ARM_L1S_STRONG_ORD, 505 ARM_L1S_DEVICE_NOSHARE, 506 ARM_L1S_DEVICE_SHARE, 507 ARM_L1S_NRML_NOCACHE, 508 ARM_L1S_NRML_IWT_OWT, 509 ARM_L1S_NRML_IWB_OWB, 510 ARM_L1S_NRML_IWBA_OWBA 511}; 512 513int l2l_mem_types[] = { 514 ARM_L2L_STRONG_ORD, 515 ARM_L2L_DEVICE_NOSHARE, 516 ARM_L2L_DEVICE_SHARE, 517 ARM_L2L_NRML_NOCACHE, 518 ARM_L2L_NRML_IWT_OWT, 519 ARM_L2L_NRML_IWB_OWB, 520 ARM_L2L_NRML_IWBA_OWBA 521}; 522 523int l2s_mem_types[] = { 524 ARM_L2S_STRONG_ORD, 525 ARM_L2S_DEVICE_NOSHARE, 526 ARM_L2S_DEVICE_SHARE, 527 ARM_L2S_NRML_NOCACHE, 528 ARM_L2S_NRML_IWT_OWT, 529 ARM_L2S_NRML_IWB_OWB, 530 ARM_L2S_NRML_IWBA_OWBA 531}; 532 533/* 534 * This list exists for the benefit of pmap_map_chunk(). It keeps track 535 * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can 536 * find them as necessary. 537 * 538 * Note that the data on this list MUST remain valid after initarm() returns, 539 * as pmap_bootstrap() uses it to contruct L2 table metadata. 540 */ 541SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list); 542 543static void 544pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt) 545{ 546 int i; 547 548 l1->l1_kva = l1pt; 549 l1->l1_domain_use_count = 0; 550 l1->l1_domain_first = 0; 551 552 for (i = 0; i < PMAP_DOMAINS; i++) 553 l1->l1_domain_free[i] = i + 1; 554 555 /* 556 * Copy the kernel's L1 entries to each new L1. 557 */ 558 if (l1pt != pmap_kernel()->pm_l1->l1_kva) 559 memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE); 560 561 if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0) 562 panic("pmap_init_l1: can't get PA of L1 at %p", l1pt); 563 SLIST_INSERT_HEAD(&l1_list, l1, l1_link); 564 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 565} 566 567static vm_offset_t 568kernel_pt_lookup(vm_paddr_t pa) 569{ 570 struct pv_addr *pv; 571 572 SLIST_FOREACH(pv, &kernel_pt_list, pv_list) { 573 if (pv->pv_pa == pa) 574 return (pv->pv_va); 575 } 576 return (0); 577} 578 579void 580pmap_pte_init_mmu_v6(void) 581{ 582 583 if (PTE_PAGETABLE >= 3) 584 pmap_needs_pte_sync = 1; 585 pte_l1_s_cache_mode = l1_mem_types[PTE_CACHE]; 586 pte_l2_l_cache_mode = l2l_mem_types[PTE_CACHE]; 587 pte_l2_s_cache_mode = l2s_mem_types[PTE_CACHE]; 588 589 pte_l1_s_cache_mode_pt = l1_mem_types[PTE_PAGETABLE]; 590 pte_l2_l_cache_mode_pt = l2l_mem_types[PTE_PAGETABLE]; 591 pte_l2_s_cache_mode_pt = l2s_mem_types[PTE_PAGETABLE]; 592 593} 594 595/* 596 * Allocate an L1 translation table for the specified pmap. 597 * This is called at pmap creation time. 598 */ 599static void 600pmap_alloc_l1(pmap_t pmap) 601{ 602 struct l1_ttable *l1; 603 u_int8_t domain; 604 605 /* 606 * Remove the L1 at the head of the LRU list 607 */ 608 mtx_lock(&l1_lru_lock); 609 l1 = TAILQ_FIRST(&l1_lru_list); 610 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); 611 612 /* 613 * Pick the first available domain number, and update 614 * the link to the next number. 615 */ 616 domain = l1->l1_domain_first; 617 l1->l1_domain_first = l1->l1_domain_free[domain]; 618 619 /* 620 * If there are still free domain numbers in this L1, 621 * put it back on the TAIL of the LRU list. 622 */ 623 if (++l1->l1_domain_use_count < PMAP_DOMAINS) 624 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 625 626 mtx_unlock(&l1_lru_lock); 627 628 /* 629 * Fix up the relevant bits in the pmap structure 630 */ 631 pmap->pm_l1 = l1; 632 pmap->pm_domain = domain + 1; 633} 634 635/* 636 * Free an L1 translation table. 637 * This is called at pmap destruction time. 638 */ 639static void 640pmap_free_l1(pmap_t pmap) 641{ 642 struct l1_ttable *l1 = pmap->pm_l1; 643 644 mtx_lock(&l1_lru_lock); 645 646 /* 647 * If this L1 is currently on the LRU list, remove it. 648 */ 649 if (l1->l1_domain_use_count < PMAP_DOMAINS) 650 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); 651 652 /* 653 * Free up the domain number which was allocated to the pmap 654 */ 655 l1->l1_domain_free[pmap->pm_domain - 1] = l1->l1_domain_first; 656 l1->l1_domain_first = pmap->pm_domain - 1; 657 l1->l1_domain_use_count--; 658 659 /* 660 * The L1 now must have at least 1 free domain, so add 661 * it back to the LRU list. If the use count is zero, 662 * put it at the head of the list, otherwise it goes 663 * to the tail. 664 */ 665 if (l1->l1_domain_use_count == 0) { 666 TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru); 667 } else 668 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 669 670 mtx_unlock(&l1_lru_lock); 671} 672 673/* 674 * Returns a pointer to the L2 bucket associated with the specified pmap 675 * and VA, or NULL if no L2 bucket exists for the address. 676 */ 677static PMAP_INLINE struct l2_bucket * 678pmap_get_l2_bucket(pmap_t pmap, vm_offset_t va) 679{ 680 struct l2_dtable *l2; 681 struct l2_bucket *l2b; 682 u_short l1idx; 683 684 l1idx = L1_IDX(va); 685 686 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL || 687 (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL) 688 return (NULL); 689 690 return (l2b); 691} 692 693/* 694 * Returns a pointer to the L2 bucket associated with the specified pmap 695 * and VA. 696 * 697 * If no L2 bucket exists, perform the necessary allocations to put an L2 698 * bucket/page table in place. 699 * 700 * Note that if a new L2 bucket/page was allocated, the caller *must* 701 * increment the bucket occupancy counter appropriately *before* 702 * releasing the pmap's lock to ensure no other thread or cpu deallocates 703 * the bucket/page in the meantime. 704 */ 705static struct l2_bucket * 706pmap_alloc_l2_bucket(pmap_t pmap, vm_offset_t va) 707{ 708 struct l2_dtable *l2; 709 struct l2_bucket *l2b; 710 u_short l1idx; 711 712 l1idx = L1_IDX(va); 713 714 PMAP_ASSERT_LOCKED(pmap); 715 rw_assert(&pvh_global_lock, RA_WLOCKED); 716 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 717 /* 718 * No mapping at this address, as there is 719 * no entry in the L1 table. 720 * Need to allocate a new l2_dtable. 721 */ 722 PMAP_UNLOCK(pmap); 723 rw_wunlock(&pvh_global_lock); 724 if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) { 725 rw_wlock(&pvh_global_lock); 726 PMAP_LOCK(pmap); 727 return (NULL); 728 } 729 rw_wlock(&pvh_global_lock); 730 PMAP_LOCK(pmap); 731 if (pmap->pm_l2[L2_IDX(l1idx)] != NULL) { 732 /* 733 * Someone already allocated the l2_dtable while 734 * we were doing the same. 735 */ 736 uma_zfree(l2table_zone, l2); 737 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 738 } else { 739 bzero(l2, sizeof(*l2)); 740 /* 741 * Link it into the parent pmap 742 */ 743 pmap->pm_l2[L2_IDX(l1idx)] = l2; 744 } 745 } 746 747 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 748 749 /* 750 * Fetch pointer to the L2 page table associated with the address. 751 */ 752 if (l2b->l2b_kva == NULL) { 753 pt_entry_t *ptep; 754 755 /* 756 * No L2 page table has been allocated. Chances are, this 757 * is because we just allocated the l2_dtable, above. 758 */ 759 PMAP_UNLOCK(pmap); 760 rw_wunlock(&pvh_global_lock); 761 ptep = uma_zalloc(l2zone, M_NOWAIT); 762 rw_wlock(&pvh_global_lock); 763 PMAP_LOCK(pmap); 764 if (l2b->l2b_kva != 0) { 765 /* We lost the race. */ 766 uma_zfree(l2zone, ptep); 767 return (l2b); 768 } 769 l2b->l2b_phys = vtophys(ptep); 770 if (ptep == NULL) { 771 /* 772 * Oops, no more L2 page tables available at this 773 * time. We may need to deallocate the l2_dtable 774 * if we allocated a new one above. 775 */ 776 if (l2->l2_occupancy == 0) { 777 pmap->pm_l2[L2_IDX(l1idx)] = NULL; 778 uma_zfree(l2table_zone, l2); 779 } 780 return (NULL); 781 } 782 783 l2->l2_occupancy++; 784 l2b->l2b_kva = ptep; 785 l2b->l2b_l1idx = l1idx; 786 } 787 788 return (l2b); 789} 790 791static PMAP_INLINE void 792pmap_free_l2_ptp(pt_entry_t *l2) 793{ 794 uma_zfree(l2zone, l2); 795} 796/* 797 * One or more mappings in the specified L2 descriptor table have just been 798 * invalidated. 799 * 800 * Garbage collect the metadata and descriptor table itself if necessary. 801 * 802 * The pmap lock must be acquired when this is called (not necessary 803 * for the kernel pmap). 804 */ 805static void 806pmap_free_l2_bucket(pmap_t pmap, struct l2_bucket *l2b, u_int count) 807{ 808 struct l2_dtable *l2; 809 pd_entry_t *pl1pd, l1pd; 810 pt_entry_t *ptep; 811 u_short l1idx; 812 813 814 /* 815 * Update the bucket's reference count according to how many 816 * PTEs the caller has just invalidated. 817 */ 818 l2b->l2b_occupancy -= count; 819 820 /* 821 * Note: 822 * 823 * Level 2 page tables allocated to the kernel pmap are never freed 824 * as that would require checking all Level 1 page tables and 825 * removing any references to the Level 2 page table. See also the 826 * comment elsewhere about never freeing bootstrap L2 descriptors. 827 * 828 * We make do with just invalidating the mapping in the L2 table. 829 * 830 * This isn't really a big deal in practice and, in fact, leads 831 * to a performance win over time as we don't need to continually 832 * alloc/free. 833 */ 834 if (l2b->l2b_occupancy > 0 || pmap == pmap_kernel()) 835 return; 836 837 /* 838 * There are no more valid mappings in this level 2 page table. 839 * Go ahead and NULL-out the pointer in the bucket, then 840 * free the page table. 841 */ 842 l1idx = l2b->l2b_l1idx; 843 ptep = l2b->l2b_kva; 844 l2b->l2b_kva = NULL; 845 846 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 847 848 /* 849 * If the L1 slot matches the pmap's domain 850 * number, then invalidate it. 851 */ 852 l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK); 853 if (l1pd == (L1_C_DOM(pmap->pm_domain) | L1_TYPE_C)) { 854 *pl1pd = 0; 855 PTE_SYNC(pl1pd); 856 cpu_tlb_flushD_SE((vm_offset_t)ptep); 857 cpu_cpwait(); 858 } 859 860 /* 861 * Release the L2 descriptor table back to the pool cache. 862 */ 863 pmap_free_l2_ptp(ptep); 864 865 /* 866 * Update the reference count in the associated l2_dtable 867 */ 868 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 869 if (--l2->l2_occupancy > 0) 870 return; 871 872 /* 873 * There are no more valid mappings in any of the Level 1 874 * slots managed by this l2_dtable. Go ahead and NULL-out 875 * the pointer in the parent pmap and free the l2_dtable. 876 */ 877 pmap->pm_l2[L2_IDX(l1idx)] = NULL; 878 uma_zfree(l2table_zone, l2); 879} 880 881/* 882 * Pool cache constructors for L2 descriptor tables, metadata and pmap 883 * structures. 884 */ 885static int 886pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags) 887{ 888 struct l2_bucket *l2b; 889 pt_entry_t *ptep, pte; 890 vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK; 891 892 /* 893 * The mappings for these page tables were initially made using 894 * pmap_kenter() by the pool subsystem. Therefore, the cache- 895 * mode will not be right for page table mappings. To avoid 896 * polluting the pmap_kenter() code with a special case for 897 * page tables, we simply fix up the cache-mode here if it's not 898 * correct. 899 */ 900 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 901 ptep = &l2b->l2b_kva[l2pte_index(va)]; 902 pte = *ptep; 903 904 cpu_idcache_wbinv_range(va, PAGE_SIZE); 905 pmap_l2cache_wbinv_range(va, pte & L2_S_FRAME, PAGE_SIZE); 906 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { 907 /* 908 * Page tables must have the cache-mode set to 909 * Write-Thru. 910 */ 911 *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; 912 PTE_SYNC(ptep); 913 cpu_tlb_flushD_SE(va); 914 cpu_cpwait(); 915 } 916 917 memset(mem, 0, L2_TABLE_SIZE_REAL); 918 return (0); 919} 920 921/* 922 * Modify pte bits for all ptes corresponding to the given physical address. 923 * We use `maskbits' rather than `clearbits' because we're always passing 924 * constants and the latter would require an extra inversion at run-time. 925 */ 926static int 927pmap_clearbit(struct vm_page *m, u_int maskbits) 928{ 929 struct l2_bucket *l2b; 930 struct pv_entry *pv, *pve, *next_pv; 931 struct md_page *pvh; 932 pd_entry_t *pl1pd; 933 pt_entry_t *ptep, npte, opte; 934 pmap_t pmap; 935 vm_offset_t va; 936 u_int oflags; 937 int count = 0; 938 939 rw_wlock(&pvh_global_lock); 940 if ((m->flags & PG_FICTITIOUS) != 0) 941 goto small_mappings; 942 943 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 944 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 945 va = pv->pv_va; 946 pmap = PV_PMAP(pv); 947 PMAP_LOCK(pmap); 948 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 949 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, 950 ("pmap_clearbit: valid section mapping expected")); 951 if ((maskbits & PVF_WRITE) && (pv->pv_flags & PVF_WRITE)) 952 (void)pmap_demote_section(pmap, va); 953 else if ((maskbits & PVF_REF) && L1_S_REFERENCED(*pl1pd)) { 954 if (pmap_demote_section(pmap, va)) { 955 if ((pv->pv_flags & PVF_WIRED) == 0) { 956 /* 957 * Remove the mapping to a single page 958 * so that a subsequent access may 959 * repromote. Since the underlying 960 * l2_bucket is fully populated, this 961 * removal never frees an entire 962 * l2_bucket. 963 */ 964 va += (VM_PAGE_TO_PHYS(m) & 965 L1_S_OFFSET); 966 l2b = pmap_get_l2_bucket(pmap, va); 967 KASSERT(l2b != NULL, 968 ("pmap_clearbit: no l2 bucket for " 969 "va 0x%#x, pmap 0x%p", va, pmap)); 970 ptep = &l2b->l2b_kva[l2pte_index(va)]; 971 *ptep = 0; 972 PTE_SYNC(ptep); 973 pmap_free_l2_bucket(pmap, l2b, 1); 974 pve = pmap_remove_pv(m, pmap, va); 975 KASSERT(pve != NULL, ("pmap_clearbit: " 976 "no PV entry for managed mapping")); 977 pmap_free_pv_entry(pmap, pve); 978 979 } 980 } 981 } else if ((maskbits & PVF_MOD) && L1_S_WRITABLE(*pl1pd)) { 982 if (pmap_demote_section(pmap, va)) { 983 if ((pv->pv_flags & PVF_WIRED) == 0) { 984 /* 985 * Write protect the mapping to a 986 * single page so that a subsequent 987 * write access may repromote. 988 */ 989 va += (VM_PAGE_TO_PHYS(m) & 990 L1_S_OFFSET); 991 l2b = pmap_get_l2_bucket(pmap, va); 992 KASSERT(l2b != NULL, 993 ("pmap_clearbit: no l2 bucket for " 994 "va 0x%#x, pmap 0x%p", va, pmap)); 995 ptep = &l2b->l2b_kva[l2pte_index(va)]; 996 if ((*ptep & L2_S_PROTO) != 0) { 997 pve = pmap_find_pv(&m->md, 998 pmap, va); 999 KASSERT(pve != NULL, 1000 ("pmap_clearbit: no PV " 1001 "entry for managed mapping")); 1002 pve->pv_flags &= ~PVF_WRITE; 1003 *ptep |= L2_APX; 1004 PTE_SYNC(ptep); 1005 } 1006 } 1007 } 1008 } 1009 PMAP_UNLOCK(pmap); 1010 } 1011 1012small_mappings: 1013 if (TAILQ_EMPTY(&m->md.pv_list)) { 1014 rw_wunlock(&pvh_global_lock); 1015 return (0); 1016 } 1017 1018 /* 1019 * Loop over all current mappings setting/clearing as appropos 1020 */ 1021 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1022 va = pv->pv_va; 1023 pmap = PV_PMAP(pv); 1024 oflags = pv->pv_flags; 1025 pv->pv_flags &= ~maskbits; 1026 1027 PMAP_LOCK(pmap); 1028 1029 l2b = pmap_get_l2_bucket(pmap, va); 1030 KASSERT(l2b != NULL, ("pmap_clearbit: no l2 bucket for " 1031 "va 0x%#x, pmap 0x%p", va, pmap)); 1032 1033 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1034 npte = opte = *ptep; 1035 1036 if (maskbits & (PVF_WRITE | PVF_MOD)) { 1037 /* make the pte read only */ 1038 npte |= L2_APX; 1039 } 1040 1041 if (maskbits & PVF_REF) { 1042 /* 1043 * Clear referenced flag in PTE so that we 1044 * will take a flag fault the next time the mapping 1045 * is referenced. 1046 */ 1047 npte &= ~L2_S_REF; 1048 } 1049 1050 CTR4(KTR_PMAP,"clearbit: pmap:%p bits:%x pte:%x->%x", 1051 pmap, maskbits, opte, npte); 1052 if (npte != opte) { 1053 count++; 1054 *ptep = npte; 1055 PTE_SYNC(ptep); 1056 /* Flush the TLB entry if a current pmap. */ 1057 if (PTE_BEEN_EXECD(opte)) 1058 cpu_tlb_flushID_SE(pv->pv_va); 1059 else if (PTE_BEEN_REFD(opte)) 1060 cpu_tlb_flushD_SE(pv->pv_va); 1061 cpu_cpwait(); 1062 } 1063 1064 PMAP_UNLOCK(pmap); 1065 1066 } 1067 1068 if (maskbits & PVF_WRITE) 1069 vm_page_aflag_clear(m, PGA_WRITEABLE); 1070 rw_wunlock(&pvh_global_lock); 1071 return (count); 1072} 1073 1074/* 1075 * main pv_entry manipulation functions: 1076 * pmap_enter_pv: enter a mapping onto a vm_page list 1077 * pmap_remove_pv: remove a mappiing from a vm_page list 1078 * 1079 * NOTE: pmap_enter_pv expects to lock the pvh itself 1080 * pmap_remove_pv expects the caller to lock the pvh before calling 1081 */ 1082 1083/* 1084 * pmap_enter_pv: enter a mapping onto a vm_page's PV list 1085 * 1086 * => caller should hold the proper lock on pvh_global_lock 1087 * => caller should have pmap locked 1088 * => we will (someday) gain the lock on the vm_page's PV list 1089 * => caller should adjust ptp's wire_count before calling 1090 * => caller should not adjust pmap's wire_count 1091 */ 1092static void 1093pmap_enter_pv(struct vm_page *m, struct pv_entry *pve, pmap_t pmap, 1094 vm_offset_t va, u_int flags) 1095{ 1096 1097 rw_assert(&pvh_global_lock, RA_WLOCKED); 1098 1099 PMAP_ASSERT_LOCKED(pmap); 1100 pve->pv_va = va; 1101 pve->pv_flags = flags; 1102 1103 TAILQ_INSERT_HEAD(&m->md.pv_list, pve, pv_list); 1104 if (pve->pv_flags & PVF_WIRED) 1105 ++pmap->pm_stats.wired_count; 1106} 1107 1108/* 1109 * 1110 * pmap_find_pv: Find a pv entry 1111 * 1112 * => caller should hold lock on vm_page 1113 */ 1114static PMAP_INLINE struct pv_entry * 1115pmap_find_pv(struct md_page *md, pmap_t pmap, vm_offset_t va) 1116{ 1117 struct pv_entry *pv; 1118 1119 rw_assert(&pvh_global_lock, RA_WLOCKED); 1120 TAILQ_FOREACH(pv, &md->pv_list, pv_list) 1121 if (pmap == PV_PMAP(pv) && va == pv->pv_va) 1122 break; 1123 1124 return (pv); 1125} 1126 1127/* 1128 * vector_page_setprot: 1129 * 1130 * Manipulate the protection of the vector page. 1131 */ 1132void 1133vector_page_setprot(int prot) 1134{ 1135 struct l2_bucket *l2b; 1136 pt_entry_t *ptep; 1137 1138 l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page); 1139 1140 ptep = &l2b->l2b_kva[l2pte_index(vector_page)]; 1141 /* 1142 * Set referenced flag. 1143 * Vectors' page is always desired 1144 * to be allowed to reside in TLB. 1145 */ 1146 *ptep |= L2_S_REF; 1147 1148 pmap_set_prot(ptep, prot|VM_PROT_EXECUTE, 0); 1149 PTE_SYNC(ptep); 1150 cpu_tlb_flushID_SE(vector_page); 1151 cpu_cpwait(); 1152} 1153 1154static void 1155pmap_set_prot(pt_entry_t *ptep, vm_prot_t prot, uint8_t user) 1156{ 1157 1158 *ptep &= ~(L2_S_PROT_MASK | L2_XN); 1159 1160 if (!(prot & VM_PROT_EXECUTE)) 1161 *ptep |= L2_XN; 1162 1163 /* Set defaults first - kernel read access */ 1164 *ptep |= L2_APX; 1165 *ptep |= L2_S_PROT_R; 1166 /* Now tune APs as desired */ 1167 if (user) 1168 *ptep |= L2_S_PROT_U; 1169 1170 if (prot & VM_PROT_WRITE) 1171 *ptep &= ~(L2_APX); 1172} 1173 1174/* 1175 * pmap_remove_pv: try to remove a mapping from a pv_list 1176 * 1177 * => caller should hold proper lock on pmap_main_lock 1178 * => pmap should be locked 1179 * => caller should hold lock on vm_page [so that attrs can be adjusted] 1180 * => caller should adjust ptp's wire_count and free PTP if needed 1181 * => caller should NOT adjust pmap's wire_count 1182 * => we return the removed pve 1183 */ 1184static struct pv_entry * 1185pmap_remove_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va) 1186{ 1187 struct pv_entry *pve; 1188 1189 rw_assert(&pvh_global_lock, RA_WLOCKED); 1190 PMAP_ASSERT_LOCKED(pmap); 1191 1192 pve = pmap_find_pv(&m->md, pmap, va); /* find corresponding pve */ 1193 if (pve != NULL) { 1194 TAILQ_REMOVE(&m->md.pv_list, pve, pv_list); 1195 if (pve->pv_flags & PVF_WIRED) 1196 --pmap->pm_stats.wired_count; 1197 } 1198 if (TAILQ_EMPTY(&m->md.pv_list)) 1199 vm_page_aflag_clear(m, PGA_WRITEABLE); 1200 1201 return(pve); /* return removed pve */ 1202} 1203 1204/* 1205 * 1206 * pmap_modify_pv: Update pv flags 1207 * 1208 * => caller should hold lock on vm_page [so that attrs can be adjusted] 1209 * => caller should NOT adjust pmap's wire_count 1210 * => we return the old flags 1211 * 1212 * Modify a physical-virtual mapping in the pv table 1213 */ 1214static u_int 1215pmap_modify_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va, 1216 u_int clr_mask, u_int set_mask) 1217{ 1218 struct pv_entry *npv; 1219 u_int flags, oflags; 1220 1221 PMAP_ASSERT_LOCKED(pmap); 1222 rw_assert(&pvh_global_lock, RA_WLOCKED); 1223 if ((npv = pmap_find_pv(&m->md, pmap, va)) == NULL) 1224 return (0); 1225 1226 /* 1227 * There is at least one VA mapping this page. 1228 */ 1229 oflags = npv->pv_flags; 1230 npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask; 1231 1232 if ((flags ^ oflags) & PVF_WIRED) { 1233 if (flags & PVF_WIRED) 1234 ++pmap->pm_stats.wired_count; 1235 else 1236 --pmap->pm_stats.wired_count; 1237 } 1238 1239 return (oflags); 1240} 1241 1242/* Function to set the debug level of the pmap code */ 1243#ifdef PMAP_DEBUG 1244void 1245pmap_debug(int level) 1246{ 1247 pmap_debug_level = level; 1248 dprintf("pmap_debug: level=%d\n", pmap_debug_level); 1249} 1250#endif /* PMAP_DEBUG */ 1251 1252void 1253pmap_pinit0(struct pmap *pmap) 1254{ 1255 PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap)); 1256 1257 bcopy(kernel_pmap, pmap, sizeof(*pmap)); 1258 bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx)); 1259 PMAP_LOCK_INIT(pmap); 1260 TAILQ_INIT(&pmap->pm_pvchunk); 1261} 1262 1263/* 1264 * Initialize a vm_page's machine-dependent fields. 1265 */ 1266void 1267pmap_page_init(vm_page_t m) 1268{ 1269 1270 TAILQ_INIT(&m->md.pv_list); 1271 m->md.pv_memattr = VM_MEMATTR_DEFAULT; 1272} 1273 1274static vm_offset_t 1275pmap_ptelist_alloc(vm_offset_t *head) 1276{ 1277 pt_entry_t *pte; 1278 vm_offset_t va; 1279 1280 va = *head; 1281 if (va == 0) 1282 return (va); /* Out of memory */ 1283 pte = vtopte(va); 1284 *head = *pte; 1285 if ((*head & L2_TYPE_MASK) != L2_TYPE_INV) 1286 panic("%s: va is not L2_TYPE_INV!", __func__); 1287 *pte = 0; 1288 return (va); 1289} 1290 1291static void 1292pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 1293{ 1294 pt_entry_t *pte; 1295 1296 if ((va & L2_TYPE_MASK) != L2_TYPE_INV) 1297 panic("%s: freeing va that is not L2_TYPE INV!", __func__); 1298 pte = vtopte(va); 1299 *pte = *head; /* virtual! L2_TYPE is L2_TYPE_INV though */ 1300 *head = va; 1301} 1302 1303static void 1304pmap_ptelist_init(vm_offset_t *head, void *base, int npages) 1305{ 1306 int i; 1307 vm_offset_t va; 1308 1309 *head = 0; 1310 for (i = npages - 1; i >= 0; i--) { 1311 va = (vm_offset_t)base + i * PAGE_SIZE; 1312 pmap_ptelist_free(head, va); 1313 } 1314} 1315 1316/* 1317 * Initialize the pmap module. 1318 * Called by vm_init, to initialize any structures that the pmap 1319 * system needs to map virtual memory. 1320 */ 1321void 1322pmap_init(void) 1323{ 1324 vm_size_t s; 1325 int i, pv_npg; 1326 1327 l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor, 1328 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1329 l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL, 1330 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1331 1332 /* 1333 * Are large page mappings supported and enabled? 1334 */ 1335 TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); 1336 if (sp_enabled) { 1337 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 1338 ("pmap_init: can't assign to pagesizes[1]")); 1339 pagesizes[1] = NBPDR; 1340 } 1341 1342 /* 1343 * Calculate the size of the pv head table for superpages. 1344 */ 1345 for (i = 0; phys_avail[i + 1]; i += 2); 1346 pv_npg = round_1mpage(phys_avail[(i - 2) + 1]) / NBPDR; 1347 1348 /* 1349 * Allocate memory for the pv head table for superpages. 1350 */ 1351 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 1352 s = round_page(s); 1353 pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 1354 M_WAITOK | M_ZERO); 1355 for (i = 0; i < pv_npg; i++) 1356 TAILQ_INIT(&pv_table[i].pv_list); 1357 1358 /* 1359 * Initialize the address space for the pv chunks. 1360 */ 1361 1362 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1363 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 1364 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 1365 pv_entry_max = roundup(pv_entry_max, _NPCPV); 1366 pv_entry_high_water = 9 * (pv_entry_max / 10); 1367 1368 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 1369 pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); 1370 1371 if (pv_chunkbase == NULL) 1372 panic("pmap_init: not enough kvm for pv chunks"); 1373 1374 pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 1375 1376 /* 1377 * Now it is safe to enable pv_table recording. 1378 */ 1379 PDEBUG(1, printf("pmap_init: done!\n")); 1380} 1381 1382SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 1383 "Max number of PV entries"); 1384SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 1385 "Page share factor per proc"); 1386 1387static SYSCTL_NODE(_vm_pmap, OID_AUTO, section, CTLFLAG_RD, 0, 1388 "1MB page mapping counters"); 1389 1390static u_long pmap_section_demotions; 1391SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, demotions, CTLFLAG_RD, 1392 &pmap_section_demotions, 0, "1MB page demotions"); 1393 1394static u_long pmap_section_mappings; 1395SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, mappings, CTLFLAG_RD, 1396 &pmap_section_mappings, 0, "1MB page mappings"); 1397 1398static u_long pmap_section_p_failures; 1399SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, p_failures, CTLFLAG_RD, 1400 &pmap_section_p_failures, 0, "1MB page promotion failures"); 1401 1402static u_long pmap_section_promotions; 1403SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, promotions, CTLFLAG_RD, 1404 &pmap_section_promotions, 0, "1MB page promotions"); 1405 1406int 1407pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype, int user) 1408{ 1409 struct l2_dtable *l2; 1410 struct l2_bucket *l2b; 1411 pd_entry_t *pl1pd, l1pd; 1412 pt_entry_t *ptep, pte; 1413 vm_paddr_t pa; 1414 u_int l1idx; 1415 int rv = 0; 1416 1417 l1idx = L1_IDX(va); 1418 rw_wlock(&pvh_global_lock); 1419 PMAP_LOCK(pmap); 1420 /* 1421 * Check and possibly fix-up L1 section mapping 1422 * only when superpage mappings are enabled to speed up. 1423 */ 1424 if (sp_enabled) { 1425 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 1426 l1pd = *pl1pd; 1427 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 1428 /* Catch an access to the vectors section */ 1429 if (l1idx == L1_IDX(vector_page)) 1430 goto out; 1431 /* 1432 * Stay away from the kernel mappings. 1433 * None of them should fault from L1 entry. 1434 */ 1435 if (pmap == pmap_kernel()) 1436 goto out; 1437 /* 1438 * Catch a forbidden userland access 1439 */ 1440 if (user && !(l1pd & L1_S_PROT_U)) 1441 goto out; 1442 /* 1443 * Superpage is always either mapped read only 1444 * or it is modified and permitted to be written 1445 * by default. Therefore, process only reference 1446 * flag fault and demote page in case of write fault. 1447 */ 1448 if ((ftype & VM_PROT_WRITE) && !L1_S_WRITABLE(l1pd) && 1449 L1_S_REFERENCED(l1pd)) { 1450 (void)pmap_demote_section(pmap, va); 1451 goto out; 1452 } else if (!L1_S_REFERENCED(l1pd)) { 1453 /* Mark the page "referenced" */ 1454 *pl1pd = l1pd | L1_S_REF; 1455 PTE_SYNC(pl1pd); 1456 goto l1_section_out; 1457 } else 1458 goto out; 1459 } 1460 } 1461 /* 1462 * If there is no l2_dtable for this address, then the process 1463 * has no business accessing it. 1464 * 1465 * Note: This will catch userland processes trying to access 1466 * kernel addresses. 1467 */ 1468 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 1469 if (l2 == NULL) 1470 goto out; 1471 1472 /* 1473 * Likewise if there is no L2 descriptor table 1474 */ 1475 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 1476 if (l2b->l2b_kva == NULL) 1477 goto out; 1478 1479 /* 1480 * Check the PTE itself. 1481 */ 1482 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1483 pte = *ptep; 1484 if (pte == 0) 1485 goto out; 1486 1487 /* 1488 * Catch a userland access to the vector page mapped at 0x0 1489 */ 1490 if (user && !(pte & L2_S_PROT_U)) 1491 goto out; 1492 if (va == vector_page) 1493 goto out; 1494 1495 pa = l2pte_pa(pte); 1496 CTR5(KTR_PMAP, "pmap_fault_fix: pmap:%p va:%x pte:0x%x ftype:%x user:%x", 1497 pmap, va, pte, ftype, user); 1498 if ((ftype & VM_PROT_WRITE) && !(L2_S_WRITABLE(pte)) && 1499 L2_S_REFERENCED(pte)) { 1500 /* 1501 * This looks like a good candidate for "page modified" 1502 * emulation... 1503 */ 1504 struct pv_entry *pv; 1505 struct vm_page *m; 1506 1507 /* Extract the physical address of the page */ 1508 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) { 1509 goto out; 1510 } 1511 /* Get the current flags for this page. */ 1512 1513 pv = pmap_find_pv(&m->md, pmap, va); 1514 if (pv == NULL) { 1515 goto out; 1516 } 1517 1518 /* 1519 * Do the flags say this page is writable? If not then it 1520 * is a genuine write fault. If yes then the write fault is 1521 * our fault as we did not reflect the write access in the 1522 * PTE. Now we know a write has occurred we can correct this 1523 * and also set the modified bit 1524 */ 1525 if ((pv->pv_flags & PVF_WRITE) == 0) { 1526 goto out; 1527 } 1528 1529 vm_page_dirty(m); 1530 1531 /* Re-enable write permissions for the page */ 1532 *ptep = (pte & ~L2_APX); 1533 PTE_SYNC(ptep); 1534 rv = 1; 1535 CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", *ptep); 1536 } else if (!L2_S_REFERENCED(pte)) { 1537 /* 1538 * This looks like a good candidate for "page referenced" 1539 * emulation. 1540 */ 1541 struct pv_entry *pv; 1542 struct vm_page *m; 1543 1544 /* Extract the physical address of the page */ 1545 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) 1546 goto out; 1547 /* Get the current flags for this page. */ 1548 pv = pmap_find_pv(&m->md, pmap, va); 1549 if (pv == NULL) 1550 goto out; 1551 1552 vm_page_aflag_set(m, PGA_REFERENCED); 1553 1554 /* Mark the page "referenced" */ 1555 *ptep = pte | L2_S_REF; 1556 PTE_SYNC(ptep); 1557 rv = 1; 1558 CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", *ptep); 1559 } 1560 1561 /* 1562 * We know there is a valid mapping here, so simply 1563 * fix up the L1 if necessary. 1564 */ 1565 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 1566 l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 1567 if (*pl1pd != l1pd) { 1568 *pl1pd = l1pd; 1569 PTE_SYNC(pl1pd); 1570 rv = 1; 1571 } 1572 1573#ifdef DEBUG 1574 /* 1575 * If 'rv == 0' at this point, it generally indicates that there is a 1576 * stale TLB entry for the faulting address. This happens when two or 1577 * more processes are sharing an L1. Since we don't flush the TLB on 1578 * a context switch between such processes, we can take domain faults 1579 * for mappings which exist at the same VA in both processes. EVEN IF 1580 * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for 1581 * example. 1582 * 1583 * This is extremely likely to happen if pmap_enter() updated the L1 1584 * entry for a recently entered mapping. In this case, the TLB is 1585 * flushed for the new mapping, but there may still be TLB entries for 1586 * other mappings belonging to other processes in the 1MB range 1587 * covered by the L1 entry. 1588 * 1589 * Since 'rv == 0', we know that the L1 already contains the correct 1590 * value, so the fault must be due to a stale TLB entry. 1591 * 1592 * Since we always need to flush the TLB anyway in the case where we 1593 * fixed up the L1, or frobbed the L2 PTE, we effectively deal with 1594 * stale TLB entries dynamically. 1595 * 1596 * However, the above condition can ONLY happen if the current L1 is 1597 * being shared. If it happens when the L1 is unshared, it indicates 1598 * that other parts of the pmap are not doing their job WRT managing 1599 * the TLB. 1600 */ 1601 if (rv == 0 && pmap->pm_l1->l1_domain_use_count == 1) { 1602 printf("fixup: pmap %p, va 0x%08x, ftype %d - nothing to do!\n", 1603 pmap, va, ftype); 1604 printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n", 1605 l2, l2b, ptep, pl1pd); 1606 printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n", 1607 pte, l1pd, last_fault_code); 1608#ifdef DDB 1609 Debugger(); 1610#endif 1611 } 1612#endif 1613 1614l1_section_out: 1615 cpu_tlb_flushID_SE(va); 1616 cpu_cpwait(); 1617 1618 rv = 1; 1619 1620out: 1621 rw_wunlock(&pvh_global_lock); 1622 PMAP_UNLOCK(pmap); 1623 return (rv); 1624} 1625 1626void 1627pmap_postinit(void) 1628{ 1629 struct l2_bucket *l2b; 1630 struct l1_ttable *l1; 1631 pd_entry_t *pl1pt; 1632 pt_entry_t *ptep, pte; 1633 vm_offset_t va, eva; 1634 u_int loop, needed; 1635 1636 needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0); 1637 needed -= 1; 1638 l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK); 1639 1640 for (loop = 0; loop < needed; loop++, l1++) { 1641 /* Allocate a L1 page table */ 1642 va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0, 1643 0xffffffff, L1_TABLE_SIZE, 0); 1644 1645 if (va == 0) 1646 panic("Cannot allocate L1 KVM"); 1647 1648 eva = va + L1_TABLE_SIZE; 1649 pl1pt = (pd_entry_t *)va; 1650 1651 while (va < eva) { 1652 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 1653 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1654 pte = *ptep; 1655 pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; 1656 *ptep = pte; 1657 PTE_SYNC(ptep); 1658 cpu_tlb_flushID_SE(va); 1659 cpu_cpwait(); 1660 va += PAGE_SIZE; 1661 } 1662 pmap_init_l1(l1, pl1pt); 1663 } 1664#ifdef DEBUG 1665 printf("pmap_postinit: Allocated %d static L1 descriptor tables\n", 1666 needed); 1667#endif 1668} 1669 1670/* 1671 * This is used to stuff certain critical values into the PCB where they 1672 * can be accessed quickly from cpu_switch() et al. 1673 */ 1674void 1675pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) 1676{ 1677 struct l2_bucket *l2b; 1678 1679 pcb->pcb_pagedir = pmap->pm_l1->l1_physaddr; 1680 pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | 1681 (DOMAIN_CLIENT << (pmap->pm_domain * 2)); 1682 1683 if (vector_page < KERNBASE) { 1684 pcb->pcb_pl1vec = &pmap->pm_l1->l1_kva[L1_IDX(vector_page)]; 1685 l2b = pmap_get_l2_bucket(pmap, vector_page); 1686 pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO | 1687 L1_C_DOM(pmap->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL); 1688 } else 1689 pcb->pcb_pl1vec = NULL; 1690} 1691 1692void 1693pmap_activate(struct thread *td) 1694{ 1695 pmap_t pmap; 1696 struct pcb *pcb; 1697 1698 pmap = vmspace_pmap(td->td_proc->p_vmspace); 1699 pcb = td->td_pcb; 1700 1701 critical_enter(); 1702 pmap_set_pcb_pagedir(pmap, pcb); 1703 1704 if (td == curthread) { 1705 u_int cur_dacr, cur_ttb; 1706 1707 __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb)); 1708 __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr)); 1709 1710 cur_ttb &= ~(L1_TABLE_SIZE - 1); 1711 1712 if (cur_ttb == (u_int)pcb->pcb_pagedir && 1713 cur_dacr == pcb->pcb_dacr) { 1714 /* 1715 * No need to switch address spaces. 1716 */ 1717 critical_exit(); 1718 return; 1719 } 1720 1721 1722 /* 1723 * We MUST, I repeat, MUST fix up the L1 entry corresponding 1724 * to 'vector_page' in the incoming L1 table before switching 1725 * to it otherwise subsequent interrupts/exceptions (including 1726 * domain faults!) will jump into hyperspace. 1727 */ 1728 if (pcb->pcb_pl1vec) { 1729 *pcb->pcb_pl1vec = pcb->pcb_l1vec; 1730 } 1731 1732 cpu_domains(pcb->pcb_dacr); 1733 cpu_setttb(pcb->pcb_pagedir); 1734 } 1735 critical_exit(); 1736} 1737 1738static int 1739pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va) 1740{ 1741 pd_entry_t *pdep, pde; 1742 pt_entry_t *ptep, pte; 1743 vm_offset_t pa; 1744 int rv = 0; 1745 1746 /* 1747 * Make sure the descriptor itself has the correct cache mode 1748 */ 1749 pdep = &kl1[L1_IDX(va)]; 1750 pde = *pdep; 1751 1752 if (l1pte_section_p(pde)) { 1753 if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) { 1754 *pdep = (pde & ~L1_S_CACHE_MASK) | 1755 pte_l1_s_cache_mode_pt; 1756 PTE_SYNC(pdep); 1757 rv = 1; 1758 } 1759 } else { 1760 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); 1761 ptep = (pt_entry_t *)kernel_pt_lookup(pa); 1762 if (ptep == NULL) 1763 panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep); 1764 1765 ptep = &ptep[l2pte_index(va)]; 1766 pte = *ptep; 1767 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { 1768 *ptep = (pte & ~L2_S_CACHE_MASK) | 1769 pte_l2_s_cache_mode_pt; 1770 PTE_SYNC(ptep); 1771 rv = 1; 1772 } 1773 } 1774 1775 return (rv); 1776} 1777 1778static void 1779pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap, 1780 pt_entry_t **ptep) 1781{ 1782 vm_offset_t va = *availp; 1783 struct l2_bucket *l2b; 1784 1785 if (ptep) { 1786 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 1787 if (l2b == NULL) 1788 panic("pmap_alloc_specials: no l2b for 0x%x", va); 1789 1790 *ptep = &l2b->l2b_kva[l2pte_index(va)]; 1791 } 1792 1793 *vap = va; 1794 *availp = va + (PAGE_SIZE * pages); 1795} 1796 1797/* 1798 * Bootstrap the system enough to run with virtual memory. 1799 * 1800 * On the arm this is called after mapping has already been enabled 1801 * and just syncs the pmap module with what has already been done. 1802 * [We can't call it easily with mapping off since the kernel is not 1803 * mapped with PA == VA, hence we would have to relocate every address 1804 * from the linked base (virtual) address "KERNBASE" to the actual 1805 * (physical) address starting relative to 0] 1806 */ 1807#define PMAP_STATIC_L2_SIZE 16 1808 1809void 1810pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt) 1811{ 1812 static struct l1_ttable static_l1; 1813 static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE]; 1814 struct l1_ttable *l1 = &static_l1; 1815 struct l2_dtable *l2; 1816 struct l2_bucket *l2b; 1817 struct czpages *czp; 1818 pd_entry_t pde; 1819 pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va; 1820 pt_entry_t *ptep; 1821 vm_paddr_t pa; 1822 vm_offset_t va; 1823 vm_size_t size; 1824 int i, l1idx, l2idx, l2next = 0; 1825 1826 PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n", 1827 firstaddr, vm_max_kernel_address)); 1828 1829 virtual_avail = firstaddr; 1830 kernel_pmap->pm_l1 = l1; 1831 kernel_l1pa = l1pt->pv_pa; 1832 1833 /* 1834 * Scan the L1 translation table created by initarm() and create 1835 * the required metadata for all valid mappings found in it. 1836 */ 1837 for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) { 1838 pde = kernel_l1pt[l1idx]; 1839 1840 /* 1841 * We're only interested in Coarse mappings. 1842 * pmap_extract() can deal with section mappings without 1843 * recourse to checking L2 metadata. 1844 */ 1845 if ((pde & L1_TYPE_MASK) != L1_TYPE_C) 1846 continue; 1847 1848 /* 1849 * Lookup the KVA of this L2 descriptor table 1850 */ 1851 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); 1852 ptep = (pt_entry_t *)kernel_pt_lookup(pa); 1853 1854 if (ptep == NULL) { 1855 panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx", 1856 (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa); 1857 } 1858 1859 /* 1860 * Fetch the associated L2 metadata structure. 1861 * Allocate a new one if necessary. 1862 */ 1863 if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 1864 if (l2next == PMAP_STATIC_L2_SIZE) 1865 panic("pmap_bootstrap: out of static L2s"); 1866 kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 = 1867 &static_l2[l2next++]; 1868 } 1869 1870 /* 1871 * One more L1 slot tracked... 1872 */ 1873 l2->l2_occupancy++; 1874 1875 /* 1876 * Fill in the details of the L2 descriptor in the 1877 * appropriate bucket. 1878 */ 1879 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 1880 l2b->l2b_kva = ptep; 1881 l2b->l2b_phys = pa; 1882 l2b->l2b_l1idx = l1idx; 1883 1884 /* 1885 * Establish an initial occupancy count for this descriptor 1886 */ 1887 for (l2idx = 0; 1888 l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)); 1889 l2idx++) { 1890 if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) { 1891 l2b->l2b_occupancy++; 1892 } 1893 } 1894 1895 /* 1896 * Make sure the descriptor itself has the correct cache mode. 1897 * If not, fix it, but whine about the problem. Port-meisters 1898 * should consider this a clue to fix up their initarm() 1899 * function. :) 1900 */ 1901 if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) { 1902 printf("pmap_bootstrap: WARNING! wrong cache mode for " 1903 "L2 pte @ %p\n", ptep); 1904 } 1905 } 1906 1907 1908 /* 1909 * Ensure the primary (kernel) L1 has the correct cache mode for 1910 * a page table. Bitch if it is not correctly set. 1911 */ 1912 for (va = (vm_offset_t)kernel_l1pt; 1913 va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) { 1914 if (pmap_set_pt_cache_mode(kernel_l1pt, va)) 1915 printf("pmap_bootstrap: WARNING! wrong cache mode for " 1916 "primary L1 @ 0x%x\n", va); 1917 } 1918 1919 cpu_dcache_wbinv_all(); 1920 cpu_l2cache_wbinv_all(); 1921 cpu_tlb_flushID(); 1922 cpu_cpwait(); 1923 1924 PMAP_LOCK_INIT(kernel_pmap); 1925 CPU_FILL(&kernel_pmap->pm_active); 1926 kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; 1927 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 1928 1929 /* 1930 * Initialize the global pv list lock. 1931 */ 1932 rw_init(&pvh_global_lock, "pmap pv global"); 1933 1934 /* 1935 * Reserve some special page table entries/VA space for temporary 1936 * mapping of pages that are being copied or zeroed. 1937 */ 1938 for (czp = cpu_czpages, i = 0; i < MAXCPU; ++i, ++czp) { 1939 mtx_init(&czp->lock, "czpages", NULL, MTX_DEF); 1940 pmap_alloc_specials(&virtual_avail, 1, &czp->srcva, &czp->srcptep); 1941 pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->srcptep); 1942 pmap_alloc_specials(&virtual_avail, 1, &czp->dstva, &czp->dstptep); 1943 pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->dstptep); 1944 } 1945 1946 size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) / 1947 L1_S_SIZE; 1948 pmap_alloc_specials(&virtual_avail, 1949 round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE, 1950 &pmap_kernel_l2ptp_kva, NULL); 1951 1952 size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE; 1953 pmap_alloc_specials(&virtual_avail, 1954 round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE, 1955 &pmap_kernel_l2dtable_kva, NULL); 1956 1957 pmap_alloc_specials(&virtual_avail, 1958 1, (vm_offset_t*)&_tmppt, NULL); 1959 pmap_alloc_specials(&virtual_avail, 1960 MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL); 1961 SLIST_INIT(&l1_list); 1962 TAILQ_INIT(&l1_lru_list); 1963 mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF); 1964 pmap_init_l1(l1, kernel_l1pt); 1965 cpu_dcache_wbinv_all(); 1966 cpu_l2cache_wbinv_all(); 1967 cpu_tlb_flushID(); 1968 cpu_cpwait(); 1969 1970 virtual_avail = round_page(virtual_avail); 1971 virtual_end = vm_max_kernel_address; 1972 kernel_vm_end = pmap_curmaxkvaddr; 1973 1974 pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb); 1975} 1976 1977/*************************************************** 1978 * Pmap allocation/deallocation routines. 1979 ***************************************************/ 1980 1981/* 1982 * Release any resources held by the given physical map. 1983 * Called when a pmap initialized by pmap_pinit is being released. 1984 * Should only be called if the map contains no valid mappings. 1985 */ 1986void 1987pmap_release(pmap_t pmap) 1988{ 1989 struct pcb *pcb; 1990 1991 cpu_tlb_flushID(); 1992 cpu_cpwait(); 1993 if (vector_page < KERNBASE) { 1994 struct pcb *curpcb = PCPU_GET(curpcb); 1995 pcb = thread0.td_pcb; 1996 if (pmap_is_current(pmap)) { 1997 /* 1998 * Frob the L1 entry corresponding to the vector 1999 * page so that it contains the kernel pmap's domain 2000 * number. This will ensure pmap_remove() does not 2001 * pull the current vector page out from under us. 2002 */ 2003 critical_enter(); 2004 *pcb->pcb_pl1vec = pcb->pcb_l1vec; 2005 cpu_domains(pcb->pcb_dacr); 2006 cpu_setttb(pcb->pcb_pagedir); 2007 critical_exit(); 2008 } 2009 pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE); 2010 /* 2011 * Make sure cpu_switch(), et al, DTRT. This is safe to do 2012 * since this process has no remaining mappings of its own. 2013 */ 2014 curpcb->pcb_pl1vec = pcb->pcb_pl1vec; 2015 curpcb->pcb_l1vec = pcb->pcb_l1vec; 2016 curpcb->pcb_dacr = pcb->pcb_dacr; 2017 curpcb->pcb_pagedir = pcb->pcb_pagedir; 2018 2019 } 2020 pmap_free_l1(pmap); 2021 2022 dprintf("pmap_release()\n"); 2023} 2024 2025 2026 2027/* 2028 * Helper function for pmap_grow_l2_bucket() 2029 */ 2030static __inline int 2031pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap) 2032{ 2033 struct l2_bucket *l2b; 2034 pt_entry_t *ptep; 2035 vm_paddr_t pa; 2036 struct vm_page *m; 2037 2038 m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 2039 if (m == NULL) 2040 return (1); 2041 pa = VM_PAGE_TO_PHYS(m); 2042 2043 if (pap) 2044 *pap = pa; 2045 2046 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2047 2048 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2049 *ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF; 2050 pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0); 2051 PTE_SYNC(ptep); 2052 cpu_tlb_flushD_SE(va); 2053 cpu_cpwait(); 2054 2055 return (0); 2056} 2057 2058/* 2059 * This is the same as pmap_alloc_l2_bucket(), except that it is only 2060 * used by pmap_growkernel(). 2061 */ 2062static __inline struct l2_bucket * 2063pmap_grow_l2_bucket(pmap_t pmap, vm_offset_t va) 2064{ 2065 struct l2_dtable *l2; 2066 struct l2_bucket *l2b; 2067 struct l1_ttable *l1; 2068 pd_entry_t *pl1pd; 2069 u_short l1idx; 2070 vm_offset_t nva; 2071 2072 l1idx = L1_IDX(va); 2073 2074 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 2075 /* 2076 * No mapping at this address, as there is 2077 * no entry in the L1 table. 2078 * Need to allocate a new l2_dtable. 2079 */ 2080 nva = pmap_kernel_l2dtable_kva; 2081 if ((nva & PAGE_MASK) == 0) { 2082 /* 2083 * Need to allocate a backing page 2084 */ 2085 if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) 2086 return (NULL); 2087 } 2088 2089 l2 = (struct l2_dtable *)nva; 2090 nva += sizeof(struct l2_dtable); 2091 2092 if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva & 2093 PAGE_MASK)) { 2094 /* 2095 * The new l2_dtable straddles a page boundary. 2096 * Map in another page to cover it. 2097 */ 2098 if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) 2099 return (NULL); 2100 } 2101 2102 pmap_kernel_l2dtable_kva = nva; 2103 2104 /* 2105 * Link it into the parent pmap 2106 */ 2107 pmap->pm_l2[L2_IDX(l1idx)] = l2; 2108 memset(l2, 0, sizeof(*l2)); 2109 } 2110 2111 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 2112 2113 /* 2114 * Fetch pointer to the L2 page table associated with the address. 2115 */ 2116 if (l2b->l2b_kva == NULL) { 2117 pt_entry_t *ptep; 2118 2119 /* 2120 * No L2 page table has been allocated. Chances are, this 2121 * is because we just allocated the l2_dtable, above. 2122 */ 2123 nva = pmap_kernel_l2ptp_kva; 2124 ptep = (pt_entry_t *)nva; 2125 if ((nva & PAGE_MASK) == 0) { 2126 /* 2127 * Need to allocate a backing page 2128 */ 2129 if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt, 2130 &pmap_kernel_l2ptp_phys)) 2131 return (NULL); 2132 } 2133 memset(ptep, 0, L2_TABLE_SIZE_REAL); 2134 l2->l2_occupancy++; 2135 l2b->l2b_kva = ptep; 2136 l2b->l2b_l1idx = l1idx; 2137 l2b->l2b_phys = pmap_kernel_l2ptp_phys; 2138 2139 pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL; 2140 pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL; 2141 } 2142 2143 /* Distribute new L1 entry to all other L1s */ 2144 SLIST_FOREACH(l1, &l1_list, l1_link) { 2145 pl1pd = &l1->l1_kva[L1_IDX(va)]; 2146 *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) | 2147 L1_C_PROTO; 2148 PTE_SYNC(pl1pd); 2149 } 2150 cpu_tlb_flushID_SE(va); 2151 cpu_cpwait(); 2152 2153 return (l2b); 2154} 2155 2156 2157/* 2158 * grow the number of kernel page table entries, if needed 2159 */ 2160void 2161pmap_growkernel(vm_offset_t addr) 2162{ 2163 pmap_t kpmap = pmap_kernel(); 2164 2165 if (addr <= pmap_curmaxkvaddr) 2166 return; /* we are OK */ 2167 2168 /* 2169 * whoops! we need to add kernel PTPs 2170 */ 2171 2172 /* Map 1MB at a time */ 2173 for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE) 2174 pmap_grow_l2_bucket(kpmap, pmap_curmaxkvaddr); 2175 2176 kernel_vm_end = pmap_curmaxkvaddr; 2177} 2178 2179/* 2180 * Returns TRUE if the given page is mapped individually or as part of 2181 * a 1MB section. Otherwise, returns FALSE. 2182 */ 2183boolean_t 2184pmap_page_is_mapped(vm_page_t m) 2185{ 2186 boolean_t rv; 2187 2188 if ((m->oflags & VPO_UNMANAGED) != 0) 2189 return (FALSE); 2190 rw_wlock(&pvh_global_lock); 2191 rv = !TAILQ_EMPTY(&m->md.pv_list) || 2192 ((m->flags & PG_FICTITIOUS) == 0 && 2193 !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); 2194 rw_wunlock(&pvh_global_lock); 2195 return (rv); 2196} 2197 2198/* 2199 * Remove all pages from specified address space 2200 * this aids process exit speeds. Also, this code 2201 * is special cased for current process only, but 2202 * can have the more generic (and slightly slower) 2203 * mode enabled. This is much faster than pmap_remove 2204 * in the case of running down an entire address space. 2205 */ 2206void 2207pmap_remove_pages(pmap_t pmap) 2208{ 2209 struct pv_entry *pv; 2210 struct l2_bucket *l2b = NULL; 2211 struct pv_chunk *pc, *npc; 2212 struct md_page *pvh; 2213 pd_entry_t *pl1pd, l1pd; 2214 pt_entry_t *ptep; 2215 vm_page_t m, mt; 2216 vm_offset_t va; 2217 uint32_t inuse, bitmask; 2218 int allfree, bit, field, idx; 2219 2220 rw_wlock(&pvh_global_lock); 2221 PMAP_LOCK(pmap); 2222 2223 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2224 allfree = 1; 2225 for (field = 0; field < _NPCM; field++) { 2226 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2227 while (inuse != 0) { 2228 bit = ffs(inuse) - 1; 2229 bitmask = 1ul << bit; 2230 idx = field * sizeof(inuse) * NBBY + bit; 2231 pv = &pc->pc_pventry[idx]; 2232 va = pv->pv_va; 2233 inuse &= ~bitmask; 2234 if (pv->pv_flags & PVF_WIRED) { 2235 /* Cannot remove wired pages now. */ 2236 allfree = 0; 2237 continue; 2238 } 2239 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 2240 l1pd = *pl1pd; 2241 l2b = pmap_get_l2_bucket(pmap, va); 2242 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 2243 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 2244 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2245 if (TAILQ_EMPTY(&pvh->pv_list)) { 2246 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 2247 KASSERT((vm_offset_t)m >= KERNBASE, 2248 ("Trying to access non-existent page " 2249 "va %x l1pd %x", trunc_1mpage(va), l1pd)); 2250 for (mt = m; mt < &m[L2_PTE_NUM_TOTAL]; mt++) { 2251 if (TAILQ_EMPTY(&mt->md.pv_list)) 2252 vm_page_aflag_clear(mt, PGA_WRITEABLE); 2253 } 2254 } 2255 if (l2b != NULL) { 2256 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, 2257 ("pmap_remove_pages: l2_bucket occupancy error")); 2258 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); 2259 } 2260 pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; 2261 *pl1pd = 0; 2262 PTE_SYNC(pl1pd); 2263 } else { 2264 KASSERT(l2b != NULL, 2265 ("No L2 bucket in pmap_remove_pages")); 2266 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2267 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 2268 KASSERT((vm_offset_t)m >= KERNBASE, 2269 ("Trying to access non-existent page " 2270 "va %x pte %x", va, *ptep)); 2271 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2272 if (TAILQ_EMPTY(&m->md.pv_list) && 2273 (m->flags & PG_FICTITIOUS) == 0) { 2274 pvh = pa_to_pvh(l2pte_pa(*ptep)); 2275 if (TAILQ_EMPTY(&pvh->pv_list)) 2276 vm_page_aflag_clear(m, PGA_WRITEABLE); 2277 } 2278 *ptep = 0; 2279 PTE_SYNC(ptep); 2280 pmap_free_l2_bucket(pmap, l2b, 1); 2281 pmap->pm_stats.resident_count--; 2282 } 2283 2284 /* Mark free */ 2285 PV_STAT(pv_entry_frees++); 2286 PV_STAT(pv_entry_spare++); 2287 pv_entry_count--; 2288 pc->pc_map[field] |= bitmask; 2289 } 2290 } 2291 if (allfree) { 2292 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2293 pmap_free_pv_chunk(pc); 2294 } 2295 2296 } 2297 2298 rw_wunlock(&pvh_global_lock); 2299 cpu_tlb_flushID(); 2300 cpu_cpwait(); 2301 PMAP_UNLOCK(pmap); 2302} 2303 2304 2305/*************************************************** 2306 * Low level mapping routines..... 2307 ***************************************************/ 2308 2309#ifdef ARM_HAVE_SUPERSECTIONS 2310/* Map a super section into the KVA. */ 2311 2312void 2313pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags) 2314{ 2315 pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) | 2316 (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL, 2317 VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | 2318 L1_S_DOM(PMAP_DOMAIN_KERNEL); 2319 struct l1_ttable *l1; 2320 vm_offset_t va0, va_end; 2321 2322 KASSERT(((va | pa) & L1_SUP_OFFSET) == 0, 2323 ("Not a valid super section mapping")); 2324 if (flags & SECTION_CACHE) 2325 pd |= pte_l1_s_cache_mode; 2326 else if (flags & SECTION_PT) 2327 pd |= pte_l1_s_cache_mode_pt; 2328 2329 va0 = va & L1_SUP_FRAME; 2330 va_end = va + L1_SUP_SIZE; 2331 SLIST_FOREACH(l1, &l1_list, l1_link) { 2332 va = va0; 2333 for (; va < va_end; va += L1_S_SIZE) { 2334 l1->l1_kva[L1_IDX(va)] = pd; 2335 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); 2336 } 2337 } 2338} 2339#endif 2340 2341/* Map a section into the KVA. */ 2342 2343void 2344pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags) 2345{ 2346 pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL, 2347 VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | L1_S_REF | 2348 L1_S_DOM(PMAP_DOMAIN_KERNEL); 2349 struct l1_ttable *l1; 2350 2351 KASSERT(((va | pa) & L1_S_OFFSET) == 0, 2352 ("Not a valid section mapping")); 2353 if (flags & SECTION_CACHE) 2354 pd |= pte_l1_s_cache_mode; 2355 else if (flags & SECTION_PT) 2356 pd |= pte_l1_s_cache_mode_pt; 2357 2358 SLIST_FOREACH(l1, &l1_list, l1_link) { 2359 l1->l1_kva[L1_IDX(va)] = pd; 2360 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); 2361 } 2362 cpu_tlb_flushID_SE(va); 2363 cpu_cpwait(); 2364} 2365 2366/* 2367 * Make a temporary mapping for a physical address. This is only intended 2368 * to be used for panic dumps. 2369 */ 2370void * 2371pmap_kenter_temp(vm_paddr_t pa, int i) 2372{ 2373 vm_offset_t va; 2374 2375 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2376 pmap_kenter(va, pa); 2377 return ((void *)crashdumpmap); 2378} 2379 2380/* 2381 * add a wired page to the kva 2382 * note that in order for the mapping to take effect -- you 2383 * should do a invltlb after doing the pmap_kenter... 2384 */ 2385static PMAP_INLINE void 2386pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags) 2387{ 2388 struct l2_bucket *l2b; 2389 pt_entry_t *ptep; 2390 pt_entry_t opte; 2391 2392 PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n", 2393 (uint32_t) va, (uint32_t) pa)); 2394 2395 2396 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2397 if (l2b == NULL) 2398 l2b = pmap_grow_l2_bucket(pmap_kernel(), va); 2399 KASSERT(l2b != NULL, ("No L2 Bucket")); 2400 2401 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2402 opte = *ptep; 2403 2404 if (flags & KENTER_CACHE) { 2405 *ptep = L2_S_PROTO | pa | pte_l2_s_cache_mode | L2_S_REF; 2406 pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 2407 flags & KENTER_USER); 2408 } else { 2409 *ptep = L2_S_PROTO | pa | L2_S_REF; 2410 pmap_set_prot(ptep, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 2411 0); 2412 } 2413 2414 PTE_SYNC(ptep); 2415 if (l2pte_valid(opte)) { 2416 if (L2_S_EXECUTABLE(opte) || L2_S_EXECUTABLE(*ptep)) 2417 cpu_tlb_flushID_SE(va); 2418 else 2419 cpu_tlb_flushD_SE(va); 2420 } else { 2421 if (opte == 0) 2422 l2b->l2b_occupancy++; 2423 } 2424 cpu_cpwait(); 2425 2426 PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n", 2427 (uint32_t) ptep, opte, *ptep)); 2428} 2429 2430void 2431pmap_kenter(vm_offset_t va, vm_paddr_t pa) 2432{ 2433 pmap_kenter_internal(va, pa, KENTER_CACHE); 2434} 2435 2436void 2437pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa) 2438{ 2439 2440 pmap_kenter_internal(va, pa, 0); 2441} 2442 2443void 2444pmap_kenter_device(vm_offset_t va, vm_paddr_t pa) 2445{ 2446 2447 /* 2448 * XXX - Need a way for kenter_internal to handle PTE_DEVICE mapping as 2449 * a potentially different thing than PTE_NOCACHE. 2450 */ 2451 pmap_kenter_internal(va, pa, 0); 2452} 2453 2454void 2455pmap_kenter_user(vm_offset_t va, vm_paddr_t pa) 2456{ 2457 2458 pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER); 2459 /* 2460 * Call pmap_fault_fixup now, to make sure we'll have no exception 2461 * at the first use of the new address, or bad things will happen, 2462 * as we use one of these addresses in the exception handlers. 2463 */ 2464 pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1); 2465} 2466 2467vm_paddr_t 2468pmap_kextract(vm_offset_t va) 2469{ 2470 2471 if (kernel_vm_end == 0) 2472 return (0); 2473 return (pmap_extract_locked(kernel_pmap, va)); 2474} 2475 2476/* 2477 * remove a page from the kernel pagetables 2478 */ 2479void 2480pmap_kremove(vm_offset_t va) 2481{ 2482 struct l2_bucket *l2b; 2483 pt_entry_t *ptep, opte; 2484 2485 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2486 if (!l2b) 2487 return; 2488 KASSERT(l2b != NULL, ("No L2 Bucket")); 2489 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2490 opte = *ptep; 2491 if (l2pte_valid(opte)) { 2492 va = va & ~PAGE_MASK; 2493 *ptep = 0; 2494 PTE_SYNC(ptep); 2495 if (L2_S_EXECUTABLE(opte)) 2496 cpu_tlb_flushID_SE(va); 2497 else 2498 cpu_tlb_flushD_SE(va); 2499 cpu_cpwait(); 2500 } 2501} 2502 2503 2504/* 2505 * Used to map a range of physical addresses into kernel 2506 * virtual address space. 2507 * 2508 * The value passed in '*virt' is a suggested virtual address for 2509 * the mapping. Architectures which can support a direct-mapped 2510 * physical to virtual region can return the appropriate address 2511 * within that region, leaving '*virt' unchanged. Other 2512 * architectures should map the pages starting at '*virt' and 2513 * update '*virt' with the first usable address after the mapped 2514 * region. 2515 */ 2516vm_offset_t 2517pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 2518{ 2519 vm_offset_t sva = *virt; 2520 vm_offset_t va = sva; 2521 2522 PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, " 2523 "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end, 2524 prot)); 2525 2526 while (start < end) { 2527 pmap_kenter(va, start); 2528 va += PAGE_SIZE; 2529 start += PAGE_SIZE; 2530 } 2531 *virt = va; 2532 return (sva); 2533} 2534 2535/* 2536 * Add a list of wired pages to the kva 2537 * this routine is only used for temporary 2538 * kernel mappings that do not need to have 2539 * page modification or references recorded. 2540 * Note that old mappings are simply written 2541 * over. The page *must* be wired. 2542 */ 2543void 2544pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 2545{ 2546 int i; 2547 2548 for (i = 0; i < count; i++) { 2549 pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]), 2550 KENTER_CACHE); 2551 va += PAGE_SIZE; 2552 } 2553} 2554 2555 2556/* 2557 * this routine jerks page mappings from the 2558 * kernel -- it is meant only for temporary mappings. 2559 */ 2560void 2561pmap_qremove(vm_offset_t va, int count) 2562{ 2563 int i; 2564 2565 for (i = 0; i < count; i++) { 2566 if (vtophys(va)) 2567 pmap_kremove(va); 2568 2569 va += PAGE_SIZE; 2570 } 2571} 2572 2573 2574/* 2575 * pmap_object_init_pt preloads the ptes for a given object 2576 * into the specified pmap. This eliminates the blast of soft 2577 * faults on process startup and immediately after an mmap. 2578 */ 2579void 2580pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2581 vm_pindex_t pindex, vm_size_t size) 2582{ 2583 2584 VM_OBJECT_ASSERT_WLOCKED(object); 2585 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2586 ("pmap_object_init_pt: non-device object")); 2587} 2588 2589 2590/* 2591 * pmap_is_prefaultable: 2592 * 2593 * Return whether or not the specified virtual address is elgible 2594 * for prefault. 2595 */ 2596boolean_t 2597pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2598{ 2599 pd_entry_t *pdep; 2600 pt_entry_t *ptep; 2601 2602 if (!pmap_get_pde_pte(pmap, addr, &pdep, &ptep)) 2603 return (FALSE); 2604 KASSERT((pdep != NULL && (l1pte_section_p(*pdep) || ptep != NULL)), 2605 ("Valid mapping but no pte ?")); 2606 if (*pdep != 0 && !l1pte_section_p(*pdep)) 2607 if (*ptep == 0) 2608 return (TRUE); 2609 return (FALSE); 2610} 2611 2612/* 2613 * Fetch pointers to the PDE/PTE for the given pmap/VA pair. 2614 * Returns TRUE if the mapping exists, else FALSE. 2615 * 2616 * NOTE: This function is only used by a couple of arm-specific modules. 2617 * It is not safe to take any pmap locks here, since we could be right 2618 * in the middle of debugging the pmap anyway... 2619 * 2620 * It is possible for this routine to return FALSE even though a valid 2621 * mapping does exist. This is because we don't lock, so the metadata 2622 * state may be inconsistent. 2623 * 2624 * NOTE: We can return a NULL *ptp in the case where the L1 pde is 2625 * a "section" mapping. 2626 */ 2627boolean_t 2628pmap_get_pde_pte(pmap_t pmap, vm_offset_t va, pd_entry_t **pdp, 2629 pt_entry_t **ptp) 2630{ 2631 struct l2_dtable *l2; 2632 pd_entry_t *pl1pd, l1pd; 2633 pt_entry_t *ptep; 2634 u_short l1idx; 2635 2636 if (pmap->pm_l1 == NULL) 2637 return (FALSE); 2638 2639 l1idx = L1_IDX(va); 2640 *pdp = pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 2641 l1pd = *pl1pd; 2642 2643 if (l1pte_section_p(l1pd)) { 2644 *ptp = NULL; 2645 return (TRUE); 2646 } 2647 2648 if (pmap->pm_l2 == NULL) 2649 return (FALSE); 2650 2651 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 2652 2653 if (l2 == NULL || 2654 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { 2655 return (FALSE); 2656 } 2657 2658 *ptp = &ptep[l2pte_index(va)]; 2659 return (TRUE); 2660} 2661 2662/* 2663 * Routine: pmap_remove_all 2664 * Function: 2665 * Removes this physical page from 2666 * all physical maps in which it resides. 2667 * Reflects back modify bits to the pager. 2668 * 2669 * Notes: 2670 * Original versions of this routine were very 2671 * inefficient because they iteratively called 2672 * pmap_remove (slow...) 2673 */ 2674void 2675pmap_remove_all(vm_page_t m) 2676{ 2677 struct md_page *pvh; 2678 pv_entry_t pv; 2679 pmap_t pmap; 2680 pt_entry_t *ptep; 2681 struct l2_bucket *l2b; 2682 boolean_t flush = FALSE; 2683 pmap_t curpmap; 2684 u_int is_exec = 0; 2685 2686 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2687 ("pmap_remove_all: page %p is not managed", m)); 2688 rw_wlock(&pvh_global_lock); 2689 if ((m->flags & PG_FICTITIOUS) != 0) 2690 goto small_mappings; 2691 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2692 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2693 pmap = PV_PMAP(pv); 2694 PMAP_LOCK(pmap); 2695 pd_entry_t *pl1pd; 2696 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 2697 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, 2698 ("pmap_remove_all: valid section mapping expected")); 2699 (void)pmap_demote_section(pmap, pv->pv_va); 2700 PMAP_UNLOCK(pmap); 2701 } 2702small_mappings: 2703 curpmap = vmspace_pmap(curproc->p_vmspace); 2704 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2705 pmap = PV_PMAP(pv); 2706 if (flush == FALSE && (pmap == curpmap || 2707 pmap == pmap_kernel())) 2708 flush = TRUE; 2709 2710 PMAP_LOCK(pmap); 2711 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 2712 KASSERT(l2b != NULL, ("No l2 bucket")); 2713 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 2714 is_exec |= PTE_BEEN_EXECD(*ptep); 2715 *ptep = 0; 2716 if (pmap_is_current(pmap)) 2717 PTE_SYNC(ptep); 2718 pmap_free_l2_bucket(pmap, l2b, 1); 2719 pmap->pm_stats.resident_count--; 2720 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2721 if (pv->pv_flags & PVF_WIRED) 2722 pmap->pm_stats.wired_count--; 2723 pmap_free_pv_entry(pmap, pv); 2724 PMAP_UNLOCK(pmap); 2725 } 2726 2727 if (flush) { 2728 if (is_exec) 2729 cpu_tlb_flushID(); 2730 else 2731 cpu_tlb_flushD(); 2732 cpu_cpwait(); 2733 } 2734 vm_page_aflag_clear(m, PGA_WRITEABLE); 2735 rw_wunlock(&pvh_global_lock); 2736} 2737 2738int 2739pmap_change_attr(vm_offset_t sva, vm_size_t len, int mode) 2740{ 2741 vm_offset_t base, offset, tmpva; 2742 vm_size_t size; 2743 struct l2_bucket *l2b; 2744 pt_entry_t *ptep, pte; 2745 vm_offset_t next_bucket; 2746 2747 PMAP_LOCK(kernel_pmap); 2748 2749 base = trunc_page(sva); 2750 offset = sva & PAGE_MASK; 2751 size = roundup(offset + len, PAGE_SIZE); 2752 2753#ifdef checkit 2754 /* 2755 * Only supported on kernel virtual addresses, including the direct 2756 * map but excluding the recursive map. 2757 */ 2758 if (base < DMAP_MIN_ADDRESS) { 2759 PMAP_UNLOCK(kernel_pmap); 2760 return (EINVAL); 2761 } 2762#endif 2763 for (tmpva = base; tmpva < base + size; ) { 2764 next_bucket = L2_NEXT_BUCKET(tmpva); 2765 if (next_bucket > base + size) 2766 next_bucket = base + size; 2767 2768 l2b = pmap_get_l2_bucket(kernel_pmap, tmpva); 2769 if (l2b == NULL) { 2770 tmpva = next_bucket; 2771 continue; 2772 } 2773 2774 ptep = &l2b->l2b_kva[l2pte_index(tmpva)]; 2775 2776 if (*ptep == 0) { 2777 PMAP_UNLOCK(kernel_pmap); 2778 return(EINVAL); 2779 } 2780 2781 pte = *ptep &~ L2_S_CACHE_MASK; 2782 cpu_idcache_wbinv_range(tmpva, PAGE_SIZE); 2783 pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE); 2784 *ptep = pte; 2785 cpu_tlb_flushID_SE(tmpva); 2786 cpu_cpwait(); 2787 2788 dprintf("%s: for va:%x ptep:%x pte:%x\n", 2789 __func__, tmpva, (uint32_t)ptep, pte); 2790 tmpva += PAGE_SIZE; 2791 } 2792 2793 PMAP_UNLOCK(kernel_pmap); 2794 2795 return (0); 2796} 2797 2798/* 2799 * Set the physical protection on the 2800 * specified range of this map as requested. 2801 */ 2802void 2803pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2804{ 2805 struct l2_bucket *l2b; 2806 struct md_page *pvh; 2807 struct pv_entry *pve; 2808 pd_entry_t *pl1pd, l1pd; 2809 pt_entry_t *ptep, pte; 2810 vm_offset_t next_bucket; 2811 u_int is_exec, is_refd; 2812 int flush; 2813 2814 if ((prot & VM_PROT_READ) == 0) { 2815 pmap_remove(pmap, sva, eva); 2816 return; 2817 } 2818 2819 if (prot & VM_PROT_WRITE) { 2820 /* 2821 * If this is a read->write transition, just ignore it and let 2822 * vm_fault() take care of it later. 2823 */ 2824 return; 2825 } 2826 2827 rw_wlock(&pvh_global_lock); 2828 PMAP_LOCK(pmap); 2829 2830 /* 2831 * OK, at this point, we know we're doing write-protect operation. 2832 * If the pmap is active, write-back the range. 2833 */ 2834 2835 flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1; 2836 is_exec = is_refd = 0; 2837 2838 while (sva < eva) { 2839 next_bucket = L2_NEXT_BUCKET(sva); 2840 /* 2841 * Check for large page. 2842 */ 2843 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 2844 l1pd = *pl1pd; 2845 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 2846 KASSERT(pmap != pmap_kernel(), 2847 ("pmap_protect: trying to modify " 2848 "kernel section protections")); 2849 /* 2850 * Are we protecting the entire large page? If not, 2851 * demote the mapping and fall through. 2852 */ 2853 if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) && 2854 eva >= L2_NEXT_BUCKET(sva)) { 2855 l1pd &= ~(L1_S_PROT_MASK | L1_S_XN); 2856 if (!(prot & VM_PROT_EXECUTE)) 2857 *pl1pd |= L1_S_XN; 2858 /* 2859 * At this point we are always setting 2860 * write-protect bit. 2861 */ 2862 l1pd |= L1_S_APX; 2863 /* All managed superpages are user pages. */ 2864 l1pd |= L1_S_PROT_U; 2865 *pl1pd = l1pd; 2866 PTE_SYNC(pl1pd); 2867 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 2868 pve = pmap_find_pv(pvh, pmap, 2869 trunc_1mpage(sva)); 2870 pve->pv_flags &= ~PVF_WRITE; 2871 sva = next_bucket; 2872 continue; 2873 } else if (!pmap_demote_section(pmap, sva)) { 2874 /* The large page mapping was destroyed. */ 2875 sva = next_bucket; 2876 continue; 2877 } 2878 } 2879 if (next_bucket > eva) 2880 next_bucket = eva; 2881 l2b = pmap_get_l2_bucket(pmap, sva); 2882 if (l2b == NULL) { 2883 sva = next_bucket; 2884 continue; 2885 } 2886 2887 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 2888 2889 while (sva < next_bucket) { 2890 if ((pte = *ptep) != 0 && L2_S_WRITABLE(pte)) { 2891 struct vm_page *m; 2892 2893 m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); 2894 pmap_set_prot(ptep, prot, 2895 !(pmap == pmap_kernel())); 2896 PTE_SYNC(ptep); 2897 2898 pmap_modify_pv(m, pmap, sva, PVF_WRITE, 0); 2899 2900 if (flush >= 0) { 2901 flush++; 2902 is_exec |= PTE_BEEN_EXECD(pte); 2903 is_refd |= PTE_BEEN_REFD(pte); 2904 } else { 2905 if (PTE_BEEN_EXECD(pte)) 2906 cpu_tlb_flushID_SE(sva); 2907 else if (PTE_BEEN_REFD(pte)) 2908 cpu_tlb_flushD_SE(sva); 2909 } 2910 } 2911 2912 sva += PAGE_SIZE; 2913 ptep++; 2914 } 2915 } 2916 2917 2918 if (flush) { 2919 if (is_exec) 2920 cpu_tlb_flushID(); 2921 else 2922 if (is_refd) 2923 cpu_tlb_flushD(); 2924 cpu_cpwait(); 2925 } 2926 rw_wunlock(&pvh_global_lock); 2927 2928 PMAP_UNLOCK(pmap); 2929} 2930 2931 2932/* 2933 * Insert the given physical page (p) at 2934 * the specified virtual address (v) in the 2935 * target physical map with the protection requested. 2936 * 2937 * If specified, the page will be wired down, meaning 2938 * that the related pte can not be reclaimed. 2939 * 2940 * NB: This is the only routine which MAY NOT lazy-evaluate 2941 * or lose information. That is, this routine must actually 2942 * insert this page into the given map NOW. 2943 */ 2944 2945void 2946pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 2947 vm_prot_t prot, boolean_t wired) 2948{ 2949 struct l2_bucket *l2b; 2950 2951 rw_wlock(&pvh_global_lock); 2952 PMAP_LOCK(pmap); 2953 pmap_enter_locked(pmap, va, access, m, prot, wired, M_WAITOK); 2954 /* 2955 * If both the l2b_occupancy and the reservation are fully 2956 * populated, then attempt promotion. 2957 */ 2958 l2b = pmap_get_l2_bucket(pmap, va); 2959 if ((l2b != NULL) && (l2b->l2b_occupancy == L2_PTE_NUM_TOTAL) && 2960 sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && 2961 vm_reserv_level_iffullpop(m) == 0) 2962 pmap_promote_section(pmap, va); 2963 2964 PMAP_UNLOCK(pmap); 2965 rw_wunlock(&pvh_global_lock); 2966} 2967 2968/* 2969 * The pvh global and pmap locks must be held. 2970 */ 2971static void 2972pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 2973 vm_prot_t prot, boolean_t wired, int flags) 2974{ 2975 struct l2_bucket *l2b = NULL; 2976 struct vm_page *om; 2977 struct pv_entry *pve = NULL; 2978 pd_entry_t *pl1pd, l1pd; 2979 pt_entry_t *ptep, npte, opte; 2980 u_int nflags; 2981 u_int is_exec, is_refd; 2982 vm_paddr_t pa; 2983 u_char user; 2984 2985 PMAP_ASSERT_LOCKED(pmap); 2986 rw_assert(&pvh_global_lock, RA_WLOCKED); 2987 if (va == vector_page) { 2988 pa = systempage.pv_pa; 2989 m = NULL; 2990 } else { 2991 KASSERT((m->oflags & VPO_UNMANAGED) != 0 || 2992 vm_page_xbusied(m) || (flags & M_NOWAIT) != 0, 2993 ("pmap_enter_locked: page %p is not busy", m)); 2994 pa = VM_PAGE_TO_PHYS(m); 2995 } 2996 2997 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 2998 if ((va < VM_MAXUSER_ADDRESS) && 2999 (*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) { 3000 (void)pmap_demote_section(pmap, va); 3001 } 3002 3003 user = 0; 3004 /* 3005 * Make sure userland mappings get the right permissions 3006 */ 3007 if (pmap != pmap_kernel() && va != vector_page) 3008 user = 1; 3009 3010 nflags = 0; 3011 3012 if (prot & VM_PROT_WRITE) 3013 nflags |= PVF_WRITE; 3014 if (wired) 3015 nflags |= PVF_WIRED; 3016 3017 PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, " 3018 "prot = %x, wired = %x\n", (uint32_t) pmap, va, (uint32_t) m, 3019 prot, wired)); 3020 3021 if (pmap == pmap_kernel()) { 3022 l2b = pmap_get_l2_bucket(pmap, va); 3023 if (l2b == NULL) 3024 l2b = pmap_grow_l2_bucket(pmap, va); 3025 } else { 3026do_l2b_alloc: 3027 l2b = pmap_alloc_l2_bucket(pmap, va); 3028 if (l2b == NULL) { 3029 if (flags & M_WAITOK) { 3030 PMAP_UNLOCK(pmap); 3031 rw_wunlock(&pvh_global_lock); 3032 VM_WAIT; 3033 rw_wlock(&pvh_global_lock); 3034 PMAP_LOCK(pmap); 3035 goto do_l2b_alloc; 3036 } 3037 return; 3038 } 3039 } 3040 3041 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3042 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 3043 panic("pmap_enter: attempt to enter on 1MB page, va: %#x", va); 3044 3045 ptep = &l2b->l2b_kva[l2pte_index(va)]; 3046 3047 opte = *ptep; 3048 npte = pa; 3049 is_exec = is_refd = 0; 3050 3051 if (opte) { 3052 if (l2pte_pa(opte) == pa) { 3053 /* 3054 * We're changing the attrs of an existing mapping. 3055 */ 3056 if (m != NULL) 3057 pmap_modify_pv(m, pmap, va, 3058 PVF_WRITE | PVF_WIRED, nflags); 3059 is_exec |= PTE_BEEN_EXECD(opte); 3060 is_refd |= PTE_BEEN_REFD(opte); 3061 goto validate; 3062 } 3063 if ((om = PHYS_TO_VM_PAGE(l2pte_pa(opte)))) { 3064 /* 3065 * Replacing an existing mapping with a new one. 3066 * It is part of our managed memory so we 3067 * must remove it from the PV list 3068 */ 3069 if ((pve = pmap_remove_pv(om, pmap, va))) { 3070 is_exec |= PTE_BEEN_EXECD(opte); 3071 is_refd |= PTE_BEEN_REFD(opte); 3072 3073 if (m && ((m->oflags & VPO_UNMANAGED))) 3074 pmap_free_pv_entry(pmap, pve); 3075 } 3076 } 3077 3078 } else { 3079 /* 3080 * Keep the stats up to date 3081 */ 3082 l2b->l2b_occupancy++; 3083 pmap->pm_stats.resident_count++; 3084 } 3085 3086 /* 3087 * Enter on the PV list if part of our managed memory. 3088 */ 3089 if ((m && !(m->oflags & VPO_UNMANAGED))) { 3090 if ((!pve) && (pve = pmap_get_pv_entry(pmap, FALSE)) == NULL) 3091 panic("pmap_enter: no pv entries"); 3092 3093 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3094 ("pmap_enter: managed mapping within the clean submap")); 3095 KASSERT(pve != NULL, ("No pv")); 3096 pmap_enter_pv(m, pve, pmap, va, nflags); 3097 } 3098 3099validate: 3100 /* Make the new PTE valid */ 3101 npte |= L2_S_PROTO; 3102#ifdef SMP 3103 npte |= L2_SHARED; 3104#endif 3105 /* Set defaults first - kernel read access */ 3106 npte |= L2_APX; 3107 npte |= L2_S_PROT_R; 3108 /* Set "referenced" flag */ 3109 npte |= L2_S_REF; 3110 3111 /* Now tune APs as desired */ 3112 if (user) 3113 npte |= L2_S_PROT_U; 3114 /* 3115 * If this is not a vector_page 3116 * then continue setting mapping parameters 3117 */ 3118 if (m != NULL) { 3119 if ((m->oflags & VPO_UNMANAGED) == 0) { 3120 if (prot & (VM_PROT_ALL)) { 3121 vm_page_aflag_set(m, PGA_REFERENCED); 3122 } else { 3123 /* 3124 * Need to do page referenced emulation. 3125 */ 3126 npte &= ~L2_S_REF; 3127 } 3128 } 3129 3130 if (prot & VM_PROT_WRITE) { 3131 if ((m->oflags & VPO_UNMANAGED) == 0) { 3132 vm_page_aflag_set(m, PGA_WRITEABLE); 3133 /* 3134 * XXX: Skip modified bit emulation for now. 3135 * The emulation reveals problems 3136 * that result in random failures 3137 * during memory allocation on some 3138 * platforms. 3139 * Therefore, the page is marked RW 3140 * immediately. 3141 */ 3142 npte &= ~(L2_APX); 3143 vm_page_dirty(m); 3144 } else 3145 npte &= ~(L2_APX); 3146 } 3147 if (!(prot & VM_PROT_EXECUTE)) 3148 npte |= L2_XN; 3149 3150 if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) 3151 npte |= pte_l2_s_cache_mode; 3152 } 3153 3154 CTR5(KTR_PMAP,"enter: pmap:%p va:%x prot:%x pte:%x->%x", 3155 pmap, va, prot, opte, npte); 3156 /* 3157 * If this is just a wiring change, the two PTEs will be 3158 * identical, so there's no need to update the page table. 3159 */ 3160 if (npte != opte) { 3161 boolean_t is_cached = pmap_is_current(pmap); 3162 3163 *ptep = npte; 3164 PTE_SYNC(ptep); 3165 if (is_cached) { 3166 /* 3167 * We only need to frob the cache/tlb if this pmap 3168 * is current 3169 */ 3170 if (L1_IDX(va) != L1_IDX(vector_page) && 3171 l2pte_valid(npte)) { 3172 /* 3173 * This mapping is likely to be accessed as 3174 * soon as we return to userland. Fix up the 3175 * L1 entry to avoid taking another 3176 * page/domain fault. 3177 */ 3178 l1pd = l2b->l2b_phys | 3179 L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 3180 if (*pl1pd != l1pd) { 3181 *pl1pd = l1pd; 3182 PTE_SYNC(pl1pd); 3183 } 3184 } 3185 } 3186 3187 if (is_exec) 3188 cpu_tlb_flushID_SE(va); 3189 else if (is_refd) 3190 cpu_tlb_flushD_SE(va); 3191 cpu_cpwait(); 3192 } 3193 3194 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 3195 cpu_icache_sync_range(va, PAGE_SIZE); 3196} 3197 3198/* 3199 * Maps a sequence of resident pages belonging to the same object. 3200 * The sequence begins with the given page m_start. This page is 3201 * mapped at the given virtual address start. Each subsequent page is 3202 * mapped at a virtual address that is offset from start by the same 3203 * amount as the page is offset from m_start within the object. The 3204 * last page in the sequence is the page with the largest offset from 3205 * m_start that can be mapped at a virtual address less than the given 3206 * virtual address end. Not every virtual page between start and end 3207 * is mapped; only those for which a resident page exists with the 3208 * corresponding offset from m_start are mapped. 3209 */ 3210void 3211pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3212 vm_page_t m_start, vm_prot_t prot) 3213{ 3214 vm_offset_t va; 3215 vm_page_t m; 3216 vm_pindex_t diff, psize; 3217 vm_prot_t access; 3218 3219 VM_OBJECT_ASSERT_LOCKED(m_start->object); 3220 3221 psize = atop(end - start); 3222 m = m_start; 3223 access = prot = prot & (VM_PROT_READ | VM_PROT_EXECUTE); 3224 rw_wlock(&pvh_global_lock); 3225 PMAP_LOCK(pmap); 3226 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3227 va = start + ptoa(diff); 3228 if ((va & L1_S_OFFSET) == 0 && L2_NEXT_BUCKET(va) <= end && 3229 m->psind == 1 && sp_enabled && 3230 pmap_enter_section(pmap, va, m, prot)) 3231 m = &m[L1_S_SIZE / PAGE_SIZE - 1]; 3232 else 3233 pmap_enter_locked(pmap, va, access, m, prot, 3234 FALSE, M_NOWAIT); 3235 m = TAILQ_NEXT(m, listq); 3236 } 3237 PMAP_UNLOCK(pmap); 3238 rw_wunlock(&pvh_global_lock); 3239} 3240 3241/* 3242 * this code makes some *MAJOR* assumptions: 3243 * 1. Current pmap & pmap exists. 3244 * 2. Not wired. 3245 * 3. Read access. 3246 * 4. No page table pages. 3247 * but is *MUCH* faster than pmap_enter... 3248 */ 3249 3250void 3251pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3252{ 3253 vm_prot_t access; 3254 3255 access = prot = prot & (VM_PROT_READ | VM_PROT_EXECUTE); 3256 rw_wlock(&pvh_global_lock); 3257 PMAP_LOCK(pmap); 3258 pmap_enter_locked(pmap, va, access, m, prot, FALSE, M_NOWAIT); 3259 PMAP_UNLOCK(pmap); 3260 rw_wunlock(&pvh_global_lock); 3261} 3262 3263/* 3264 * Routine: pmap_change_wiring 3265 * Function: Change the wiring attribute for a map/virtual-address 3266 * pair. 3267 * In/out conditions: 3268 * The mapping must already exist in the pmap. 3269 */ 3270void 3271pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3272{ 3273 struct l2_bucket *l2b; 3274 struct md_page *pvh; 3275 struct pv_entry *pve; 3276 pd_entry_t *pl1pd, l1pd; 3277 pt_entry_t *ptep, pte; 3278 vm_page_t m; 3279 3280 rw_wlock(&pvh_global_lock); 3281 PMAP_LOCK(pmap); 3282 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3283 l1pd = *pl1pd; 3284 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 3285 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3286 KASSERT((m != NULL) && ((m->oflags & VPO_UNMANAGED) == 0), 3287 ("pmap_change_wiring: unmanaged superpage should not " 3288 "be changed")); 3289 KASSERT(pmap != pmap_kernel(), 3290 ("pmap_change_wiring: managed kernel superpage " 3291 "should not exist")); 3292 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 3293 pve = pmap_find_pv(pvh, pmap, trunc_1mpage(va)); 3294 if (!wired != ((pve->pv_flags & PVF_WIRED) == 0)) { 3295 if (!pmap_demote_section(pmap, va)) 3296 panic("pmap_change_wiring: demotion failed"); 3297 } else 3298 goto out; 3299 } 3300 l2b = pmap_get_l2_bucket(pmap, va); 3301 KASSERT(l2b, ("No l2b bucket in pmap_change_wiring")); 3302 ptep = &l2b->l2b_kva[l2pte_index(va)]; 3303 pte = *ptep; 3304 m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); 3305 if (m != NULL) 3306 pmap_modify_pv(m, pmap, va, PVF_WIRED, 3307 wired == TRUE ? PVF_WIRED : 0); 3308out: 3309 rw_wunlock(&pvh_global_lock); 3310 PMAP_UNLOCK(pmap); 3311} 3312 3313 3314/* 3315 * Copy the range specified by src_addr/len 3316 * from the source map to the range dst_addr/len 3317 * in the destination map. 3318 * 3319 * This routine is only advisory and need not do anything. 3320 */ 3321void 3322pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 3323 vm_size_t len, vm_offset_t src_addr) 3324{ 3325} 3326 3327 3328/* 3329 * Routine: pmap_extract 3330 * Function: 3331 * Extract the physical page address associated 3332 * with the given map/virtual_address pair. 3333 */ 3334vm_paddr_t 3335pmap_extract(pmap_t pmap, vm_offset_t va) 3336{ 3337 vm_paddr_t pa; 3338 3339 if (kernel_vm_end != 0) 3340 PMAP_LOCK(pmap); 3341 pa = pmap_extract_locked(pmap, va); 3342 if (kernel_vm_end != 0) 3343 PMAP_UNLOCK(pmap); 3344 return (pa); 3345} 3346 3347static vm_paddr_t 3348pmap_extract_locked(pmap_t pmap, vm_offset_t va) 3349{ 3350 struct l2_dtable *l2; 3351 pd_entry_t l1pd; 3352 pt_entry_t *ptep, pte; 3353 vm_paddr_t pa; 3354 u_int l1idx; 3355 3356 if (kernel_vm_end != 0 && pmap != kernel_pmap) 3357 PMAP_ASSERT_LOCKED(pmap); 3358 l1idx = L1_IDX(va); 3359 l1pd = pmap->pm_l1->l1_kva[l1idx]; 3360 if (l1pte_section_p(l1pd)) { 3361 /* XXX: what to do about the bits > 32 ? */ 3362 if (l1pd & L1_S_SUPERSEC) 3363 pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); 3364 else 3365 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); 3366 } else { 3367 /* 3368 * Note that we can't rely on the validity of the L1 3369 * descriptor as an indication that a mapping exists. 3370 * We have to look it up in the L2 dtable. 3371 */ 3372 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 3373 if (l2 == NULL || 3374 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) 3375 return (0); 3376 pte = ptep[l2pte_index(va)]; 3377 if (pte == 0) 3378 return (0); 3379 switch (pte & L2_TYPE_MASK) { 3380 case L2_TYPE_L: 3381 pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); 3382 break; 3383 default: 3384 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); 3385 break; 3386 } 3387 } 3388 return (pa); 3389} 3390 3391/* 3392 * Atomically extract and hold the physical page with the given 3393 * pmap and virtual address pair if that mapping permits the given 3394 * protection. 3395 * 3396 */ 3397vm_page_t 3398pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 3399{ 3400 struct l2_dtable *l2; 3401 pd_entry_t l1pd; 3402 pt_entry_t *ptep, pte; 3403 vm_paddr_t pa, paddr; 3404 vm_page_t m = NULL; 3405 u_int l1idx; 3406 l1idx = L1_IDX(va); 3407 paddr = 0; 3408 3409 PMAP_LOCK(pmap); 3410retry: 3411 l1pd = pmap->pm_l1->l1_kva[l1idx]; 3412 if (l1pte_section_p(l1pd)) { 3413 /* XXX: what to do about the bits > 32 ? */ 3414 if (l1pd & L1_S_SUPERSEC) 3415 pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); 3416 else 3417 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); 3418 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) 3419 goto retry; 3420 if (L1_S_WRITABLE(l1pd) || (prot & VM_PROT_WRITE) == 0) { 3421 m = PHYS_TO_VM_PAGE(pa); 3422 vm_page_hold(m); 3423 } 3424 } else { 3425 /* 3426 * Note that we can't rely on the validity of the L1 3427 * descriptor as an indication that a mapping exists. 3428 * We have to look it up in the L2 dtable. 3429 */ 3430 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 3431 3432 if (l2 == NULL || 3433 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { 3434 PMAP_UNLOCK(pmap); 3435 return (NULL); 3436 } 3437 3438 ptep = &ptep[l2pte_index(va)]; 3439 pte = *ptep; 3440 3441 if (pte == 0) { 3442 PMAP_UNLOCK(pmap); 3443 return (NULL); 3444 } else if ((prot & VM_PROT_WRITE) && (pte & L2_APX)) { 3445 PMAP_UNLOCK(pmap); 3446 return (NULL); 3447 } else { 3448 switch (pte & L2_TYPE_MASK) { 3449 case L2_TYPE_L: 3450 panic("extract and hold section mapping"); 3451 break; 3452 default: 3453 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); 3454 break; 3455 } 3456 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) 3457 goto retry; 3458 m = PHYS_TO_VM_PAGE(pa); 3459 vm_page_hold(m); 3460 } 3461 3462 } 3463 3464 PMAP_UNLOCK(pmap); 3465 PA_UNLOCK_COND(paddr); 3466 return (m); 3467} 3468 3469/* 3470 * Initialize a preallocated and zeroed pmap structure, 3471 * such as one in a vmspace structure. 3472 */ 3473 3474int 3475pmap_pinit(pmap_t pmap) 3476{ 3477 PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap)); 3478 3479 pmap_alloc_l1(pmap); 3480 bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); 3481 3482 CPU_ZERO(&pmap->pm_active); 3483 3484 TAILQ_INIT(&pmap->pm_pvchunk); 3485 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 3486 pmap->pm_stats.resident_count = 1; 3487 if (vector_page < KERNBASE) { 3488 pmap_enter(pmap, vector_page, 3489 VM_PROT_READ, PHYS_TO_VM_PAGE(systempage.pv_pa), 3490 VM_PROT_READ, 1); 3491 } 3492 return (1); 3493} 3494 3495 3496/*************************************************** 3497 * Superpage management routines. 3498 ***************************************************/ 3499 3500static PMAP_INLINE struct pv_entry * 3501pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3502{ 3503 pv_entry_t pv; 3504 3505 rw_assert(&pvh_global_lock, RA_WLOCKED); 3506 3507 pv = pmap_find_pv(pvh, pmap, va); 3508 if (pv != NULL) 3509 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 3510 3511 return (pv); 3512} 3513 3514static void 3515pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3516{ 3517 pv_entry_t pv; 3518 3519 pv = pmap_pvh_remove(pvh, pmap, va); 3520 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 3521 pmap_free_pv_entry(pmap, pv); 3522} 3523 3524static boolean_t 3525pmap_pv_insert_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3526{ 3527 struct md_page *pvh; 3528 pv_entry_t pv; 3529 3530 rw_assert(&pvh_global_lock, RA_WLOCKED); 3531 if (pv_entry_count < pv_entry_high_water && 3532 (pv = pmap_get_pv_entry(pmap, TRUE)) != NULL) { 3533 pv->pv_va = va; 3534 pvh = pa_to_pvh(pa); 3535 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 3536 return (TRUE); 3537 } else 3538 return (FALSE); 3539} 3540 3541/* 3542 * Create the pv entries for each of the pages within a superpage. 3543 */ 3544static void 3545pmap_pv_demote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3546{ 3547 struct md_page *pvh; 3548 pv_entry_t pve, pv; 3549 vm_offset_t va_last; 3550 vm_page_t m; 3551 3552 rw_assert(&pvh_global_lock, RA_WLOCKED); 3553 KASSERT((pa & L1_S_OFFSET) == 0, 3554 ("pmap_pv_demote_section: pa is not 1mpage aligned")); 3555 3556 /* 3557 * Transfer the 1mpage's pv entry for this mapping to the first 3558 * page's pv list. 3559 */ 3560 pvh = pa_to_pvh(pa); 3561 va = trunc_1mpage(va); 3562 pv = pmap_pvh_remove(pvh, pmap, va); 3563 KASSERT(pv != NULL, ("pmap_pv_demote_section: pv not found")); 3564 m = PHYS_TO_VM_PAGE(pa); 3565 TAILQ_INSERT_HEAD(&m->md.pv_list, pv, pv_list); 3566 /* Instantiate the remaining pv entries. */ 3567 va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; 3568 do { 3569 m++; 3570 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3571 ("pmap_pv_demote_section: page %p is not managed", m)); 3572 va += PAGE_SIZE; 3573 pve = pmap_get_pv_entry(pmap, FALSE); 3574 pmap_enter_pv(m, pve, pmap, va, pv->pv_flags); 3575 } while (va < va_last); 3576} 3577 3578static void 3579pmap_pv_promote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3580{ 3581 struct md_page *pvh; 3582 pv_entry_t pv; 3583 vm_offset_t va_last; 3584 vm_page_t m; 3585 3586 rw_assert(&pvh_global_lock, RA_WLOCKED); 3587 KASSERT((pa & L1_S_OFFSET) == 0, 3588 ("pmap_pv_promote_section: pa is not 1mpage aligned")); 3589 3590 /* 3591 * Transfer the first page's pv entry for this mapping to the 3592 * 1mpage's pv list. Aside from avoiding the cost of a call 3593 * to get_pv_entry(), a transfer avoids the possibility that 3594 * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() 3595 * removes one of the mappings that is being promoted. 3596 */ 3597 m = PHYS_TO_VM_PAGE(pa); 3598 va = trunc_1mpage(va); 3599 pv = pmap_pvh_remove(&m->md, pmap, va); 3600 KASSERT(pv != NULL, ("pmap_pv_promote_section: pv not found")); 3601 pvh = pa_to_pvh(pa); 3602 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 3603 /* Free the remaining pv entries in the newly mapped section pages */ 3604 va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; 3605 do { 3606 m++; 3607 va += PAGE_SIZE; 3608 /* 3609 * Don't care the flags, first pv contains sufficient 3610 * information for all of the pages so nothing is really lost. 3611 */ 3612 pmap_pvh_free(&m->md, pmap, va); 3613 } while (va < va_last); 3614} 3615 3616/* 3617 * Tries to create a 1MB page mapping. Returns TRUE if successful and 3618 * FALSE otherwise. Fails if (1) page is unmanageg, kernel pmap or vectors 3619 * page, (2) a mapping already exists at the specified virtual address, or 3620 * (3) a pv entry cannot be allocated without reclaiming another pv entry. 3621 */ 3622static boolean_t 3623pmap_enter_section(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3624{ 3625 pd_entry_t *pl1pd; 3626 vm_offset_t pa; 3627 struct l2_bucket *l2b; 3628 3629 rw_assert(&pvh_global_lock, RA_WLOCKED); 3630 PMAP_ASSERT_LOCKED(pmap); 3631 3632 /* Skip kernel, vectors page and unmanaged mappings */ 3633 if ((pmap == pmap_kernel()) || (L1_IDX(va) == L1_IDX(vector_page)) || 3634 ((m->oflags & VPO_UNMANAGED) != 0)) { 3635 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3636 " in pmap %p", va, pmap); 3637 return (FALSE); 3638 } 3639 /* 3640 * Check whether this is a valid section superpage entry or 3641 * there is a l2_bucket associated with that L1 page directory. 3642 */ 3643 va = trunc_1mpage(va); 3644 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3645 l2b = pmap_get_l2_bucket(pmap, va); 3646 if ((*pl1pd & L1_S_PROTO) || (l2b != NULL)) { 3647 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3648 " in pmap %p", va, pmap); 3649 return (FALSE); 3650 } 3651 pa = VM_PAGE_TO_PHYS(m); 3652 /* 3653 * Abort this mapping if its PV entry could not be created. 3654 */ 3655 if (!pmap_pv_insert_section(pmap, va, VM_PAGE_TO_PHYS(m))) { 3656 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3657 " in pmap %p", va, pmap); 3658 return (FALSE); 3659 } 3660 /* 3661 * Increment counters. 3662 */ 3663 pmap->pm_stats.resident_count += L2_PTE_NUM_TOTAL; 3664 /* 3665 * Despite permissions, mark the superpage read-only. 3666 */ 3667 prot &= ~VM_PROT_WRITE; 3668 /* 3669 * Map the superpage. 3670 */ 3671 pmap_map_section(pmap, va, pa, prot, FALSE); 3672 3673 pmap_section_mappings++; 3674 CTR2(KTR_PMAP, "pmap_enter_section: success for va %#lx" 3675 " in pmap %p", va, pmap); 3676 return (TRUE); 3677} 3678 3679/* 3680 * pmap_remove_section: do the things to unmap a superpage in a process 3681 */ 3682static void 3683pmap_remove_section(pmap_t pmap, vm_offset_t sva) 3684{ 3685 struct md_page *pvh; 3686 struct l2_bucket *l2b; 3687 pd_entry_t *pl1pd, l1pd; 3688 vm_offset_t eva, va; 3689 vm_page_t m; 3690 3691 PMAP_ASSERT_LOCKED(pmap); 3692 if ((pmap == pmap_kernel()) || (L1_IDX(sva) == L1_IDX(vector_page))) 3693 return; 3694 3695 KASSERT((sva & L1_S_OFFSET) == 0, 3696 ("pmap_remove_section: sva is not 1mpage aligned")); 3697 3698 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 3699 l1pd = *pl1pd; 3700 3701 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3702 KASSERT((m != NULL && ((m->oflags & VPO_UNMANAGED) == 0)), 3703 ("pmap_remove_section: no corresponding vm_page or " 3704 "page unmanaged")); 3705 3706 pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; 3707 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 3708 pmap_pvh_free(pvh, pmap, sva); 3709 eva = L2_NEXT_BUCKET(sva); 3710 for (va = sva, m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3711 va < eva; va += PAGE_SIZE, m++) { 3712 /* 3713 * Mark base pages referenced but skip marking them dirty. 3714 * If the superpage is writeable, hence all base pages were 3715 * already marked as dirty in pmap_fault_fixup() before 3716 * promotion. Reference bit however, might not have been set 3717 * for each base page when the superpage was created at once, 3718 * not as a result of promotion. 3719 */ 3720 if (L1_S_REFERENCED(l1pd)) 3721 vm_page_aflag_set(m, PGA_REFERENCED); 3722 if (TAILQ_EMPTY(&m->md.pv_list) && 3723 TAILQ_EMPTY(&pvh->pv_list)) 3724 vm_page_aflag_clear(m, PGA_WRITEABLE); 3725 } 3726 3727 l2b = pmap_get_l2_bucket(pmap, sva); 3728 if (l2b != NULL) { 3729 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, 3730 ("pmap_remove_section: l2_bucket occupancy error")); 3731 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); 3732 } 3733 /* Now invalidate L1 slot */ 3734 *pl1pd = 0; 3735 PTE_SYNC(pl1pd); 3736 if (L1_S_EXECUTABLE(l1pd)) 3737 cpu_tlb_flushID_SE(sva); 3738 else 3739 cpu_tlb_flushD_SE(sva); 3740 cpu_cpwait(); 3741} 3742 3743/* 3744 * Tries to promote the 256, contiguous 4KB page mappings that are 3745 * within a single l2_bucket to a single 1MB section mapping. 3746 * For promotion to occur, two conditions must be met: (1) the 4KB page 3747 * mappings must map aligned, contiguous physical memory and (2) the 4KB page 3748 * mappings must have identical characteristics. 3749 */ 3750static void 3751pmap_promote_section(pmap_t pmap, vm_offset_t va) 3752{ 3753 pt_entry_t *firstptep, firstpte, oldpte, pa, *pte; 3754 vm_page_t m, oldm; 3755 vm_offset_t first_va, old_va; 3756 struct l2_bucket *l2b = NULL; 3757 vm_prot_t prot; 3758 struct pv_entry *pve, *first_pve; 3759 3760 PMAP_ASSERT_LOCKED(pmap); 3761 3762 prot = VM_PROT_ALL; 3763 /* 3764 * Skip promoting kernel pages. This is justified by following: 3765 * 1. Kernel is already mapped using section mappings in each pmap 3766 * 2. Managed mappings within the kernel are not to be promoted anyway 3767 */ 3768 if (pmap == pmap_kernel()) { 3769 pmap_section_p_failures++; 3770 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3771 " in pmap %p", va, pmap); 3772 return; 3773 } 3774 /* Do not attemp to promote vectors pages */ 3775 if (L1_IDX(va) == L1_IDX(vector_page)) { 3776 pmap_section_p_failures++; 3777 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3778 " in pmap %p", va, pmap); 3779 return; 3780 } 3781 /* 3782 * Examine the first PTE in the specified l2_bucket. Abort if this PTE 3783 * is either invalid, unused, or does not map the first 4KB physical 3784 * page within 1MB page. 3785 */ 3786 first_va = trunc_1mpage(va); 3787 l2b = pmap_get_l2_bucket(pmap, first_va); 3788 KASSERT(l2b != NULL, ("pmap_promote_section: trying to promote " 3789 "not existing l2 bucket")); 3790 firstptep = &l2b->l2b_kva[0]; 3791 3792 firstpte = *firstptep; 3793 if ((l2pte_pa(firstpte) & L1_S_OFFSET) != 0) { 3794 pmap_section_p_failures++; 3795 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3796 " in pmap %p", va, pmap); 3797 return; 3798 } 3799 3800 if ((firstpte & (L2_S_PROTO | L2_S_REF)) != (L2_S_PROTO | L2_S_REF)) { 3801 pmap_section_p_failures++; 3802 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3803 " in pmap %p", va, pmap); 3804 return; 3805 } 3806 /* 3807 * ARM uses pv_entry to mark particular mapping WIRED so don't promote 3808 * unmanaged pages since it is impossible to determine, whether the 3809 * page is wired or not if there is no corresponding pv_entry. 3810 */ 3811 m = PHYS_TO_VM_PAGE(l2pte_pa(firstpte)); 3812 if (m && ((m->oflags & VPO_UNMANAGED) != 0)) { 3813 pmap_section_p_failures++; 3814 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3815 " in pmap %p", va, pmap); 3816 return; 3817 } 3818 first_pve = pmap_find_pv(&m->md, pmap, first_va); 3819 /* 3820 * PTE is modified only on write due to modified bit 3821 * emulation. If the entry is referenced and writable 3822 * then it is modified and we don't clear write enable. 3823 * Otherwise, writing is disabled in PTE anyway and 3824 * we just configure protections for the section mapping 3825 * that is going to be created. 3826 */ 3827 if ((first_pve->pv_flags & PVF_WRITE) != 0) { 3828 if (!L2_S_WRITABLE(firstpte)) { 3829 first_pve->pv_flags &= ~PVF_WRITE; 3830 prot &= ~VM_PROT_WRITE; 3831 } 3832 } else 3833 prot &= ~VM_PROT_WRITE; 3834 3835 if (!L2_S_EXECUTABLE(firstpte)) 3836 prot &= ~VM_PROT_EXECUTE; 3837 3838 /* 3839 * Examine each of the other PTEs in the specified l2_bucket. 3840 * Abort if this PTE maps an unexpected 4KB physical page or 3841 * does not have identical characteristics to the first PTE. 3842 */ 3843 pa = l2pte_pa(firstpte) + ((L2_PTE_NUM_TOTAL - 1) * PAGE_SIZE); 3844 old_va = L2_NEXT_BUCKET(first_va) - PAGE_SIZE; 3845 3846 for (pte = (firstptep + L2_PTE_NUM_TOTAL - 1); pte > firstptep; pte--) { 3847 oldpte = *pte; 3848 if (l2pte_pa(oldpte) != pa) { 3849 pmap_section_p_failures++; 3850 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3851 "va %#x in pmap %p", va, pmap); 3852 return; 3853 } 3854 if ((oldpte & L2_S_PROMOTE) != (firstpte & L2_S_PROMOTE)) { 3855 pmap_section_p_failures++; 3856 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3857 "va %#x in pmap %p", va, pmap); 3858 return; 3859 } 3860 oldm = PHYS_TO_VM_PAGE(l2pte_pa(oldpte)); 3861 if (oldm && ((oldm->oflags & VPO_UNMANAGED) != 0)) { 3862 pmap_section_p_failures++; 3863 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3864 "va %#x in pmap %p", va, pmap); 3865 return; 3866 } 3867 3868 pve = pmap_find_pv(&oldm->md, pmap, old_va); 3869 if (pve == NULL) { 3870 pmap_section_p_failures++; 3871 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3872 "va %#x old_va %x - no pve", va, old_va); 3873 return; 3874 } 3875 3876 if (!L2_S_WRITABLE(oldpte) && (pve->pv_flags & PVF_WRITE)) 3877 pve->pv_flags &= ~PVF_WRITE; 3878 if (pve->pv_flags != first_pve->pv_flags) { 3879 pmap_section_p_failures++; 3880 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3881 "va %#x in pmap %p", va, pmap); 3882 return; 3883 } 3884 3885 old_va -= PAGE_SIZE; 3886 pa -= PAGE_SIZE; 3887 } 3888 /* 3889 * Promote the pv entries. 3890 */ 3891 pmap_pv_promote_section(pmap, first_va, l2pte_pa(firstpte)); 3892 /* 3893 * Map the superpage. 3894 */ 3895 pmap_map_section(pmap, first_va, l2pte_pa(firstpte), prot, TRUE); 3896 /* 3897 * Invalidate all possible TLB mappings for small 3898 * pages within the newly created superpage. 3899 * Rely on the first PTE's attributes since they 3900 * have to be consistent across all of the base pages 3901 * within the superpage. If page is not executable it 3902 * is at least referenced. 3903 * The fastest way to do that is to invalidate whole 3904 * TLB at once instead of executing 256 CP15 TLB 3905 * invalidations by single entry. TLBs usually maintain 3906 * several dozen entries so loss of unrelated entries is 3907 * still a less agresive approach. 3908 */ 3909 if (L2_S_EXECUTABLE(firstpte)) 3910 cpu_tlb_flushID(); 3911 else 3912 cpu_tlb_flushD(); 3913 cpu_cpwait(); 3914 3915 pmap_section_promotions++; 3916 CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x" 3917 " in pmap %p", first_va, pmap); 3918} 3919 3920/* 3921 * Fills a l2_bucket with mappings to consecutive physical pages. 3922 */ 3923static void 3924pmap_fill_l2b(struct l2_bucket *l2b, pt_entry_t newpte) 3925{ 3926 pt_entry_t *ptep; 3927 int i; 3928 3929 for (i = 0; i < L2_PTE_NUM_TOTAL; i++) { 3930 ptep = &l2b->l2b_kva[i]; 3931 *ptep = newpte; 3932 PTE_SYNC(ptep); 3933 3934 newpte += PAGE_SIZE; 3935 } 3936 3937 l2b->l2b_occupancy = L2_PTE_NUM_TOTAL; 3938} 3939 3940/* 3941 * Tries to demote a 1MB section mapping. If demotion fails, the 3942 * 1MB section mapping is invalidated. 3943 */ 3944static boolean_t 3945pmap_demote_section(pmap_t pmap, vm_offset_t va) 3946{ 3947 struct l2_bucket *l2b; 3948 struct pv_entry *l1pdpve; 3949 struct md_page *pvh; 3950 pd_entry_t *pl1pd, l1pd, newl1pd; 3951 pt_entry_t *firstptep, newpte; 3952 vm_offset_t pa; 3953 vm_page_t m; 3954 3955 PMAP_ASSERT_LOCKED(pmap); 3956 /* 3957 * According to assumptions described in pmap_promote_section, 3958 * kernel is and always should be mapped using 1MB section mappings. 3959 * What more, managed kernel pages were not to be promoted. 3960 */ 3961 KASSERT(pmap != pmap_kernel() && L1_IDX(va) != L1_IDX(vector_page), 3962 ("pmap_demote_section: forbidden section mapping")); 3963 3964 va = trunc_1mpage(va); 3965 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3966 l1pd = *pl1pd; 3967 KASSERT((l1pd & L1_TYPE_MASK) == L1_S_PROTO, 3968 ("pmap_demote_section: not section or invalid section")); 3969 3970 pa = l1pd & L1_S_FRAME; 3971 m = PHYS_TO_VM_PAGE(pa); 3972 KASSERT((m != NULL && (m->oflags & VPO_UNMANAGED) == 0), 3973 ("pmap_demote_section: no vm_page for selected superpage or" 3974 "unmanaged")); 3975 3976 pvh = pa_to_pvh(pa); 3977 l1pdpve = pmap_find_pv(pvh, pmap, va); 3978 KASSERT(l1pdpve != NULL, ("pmap_demote_section: no pv entry for " 3979 "managed page")); 3980 3981 l2b = pmap_get_l2_bucket(pmap, va); 3982 if (l2b == NULL) { 3983 KASSERT((l1pdpve->pv_flags & PVF_WIRED) == 0, 3984 ("pmap_demote_section: No l2_bucket for wired mapping")); 3985 /* 3986 * Invalidate the 1MB section mapping and return 3987 * "failure" if the mapping was never accessed or the 3988 * allocation of the new l2_bucket fails. 3989 */ 3990 if (!L1_S_REFERENCED(l1pd) || 3991 (l2b = pmap_alloc_l2_bucket(pmap, va)) == NULL) { 3992 /* Unmap and invalidate superpage. */ 3993 pmap_remove_section(pmap, trunc_1mpage(va)); 3994 CTR2(KTR_PMAP, "pmap_demote_section: failure for " 3995 "va %#x in pmap %p", va, pmap); 3996 return (FALSE); 3997 } 3998 } 3999 4000 /* 4001 * Now we should have corresponding l2_bucket available. 4002 * Let's process it to recreate 256 PTEs for each base page 4003 * within superpage. 4004 */ 4005 newpte = pa | L1_S_DEMOTE(l1pd); 4006 if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) 4007 newpte |= pte_l2_s_cache_mode; 4008 4009 /* 4010 * If the l2_bucket is new, initialize it. 4011 */ 4012 if (l2b->l2b_occupancy == 0) 4013 pmap_fill_l2b(l2b, newpte); 4014 else { 4015 firstptep = &l2b->l2b_kva[0]; 4016 KASSERT(l2pte_pa(*firstptep) == (pa), 4017 ("pmap_demote_section: firstpte and newpte map different " 4018 "physical addresses")); 4019 /* 4020 * If the mapping has changed attributes, update the page table 4021 * entries. 4022 */ 4023 if ((*firstptep & L2_S_PROMOTE) != (L1_S_DEMOTE(l1pd))) 4024 pmap_fill_l2b(l2b, newpte); 4025 } 4026 /* Demote PV entry */ 4027 pmap_pv_demote_section(pmap, va, pa); 4028 4029 /* Now fix-up L1 */ 4030 newl1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 4031 *pl1pd = newl1pd; 4032 PTE_SYNC(pl1pd); 4033 /* Invalidate old TLB mapping */ 4034 if (L1_S_EXECUTABLE(l1pd)) 4035 cpu_tlb_flushID_SE(va); 4036 else if (L1_S_REFERENCED(l1pd)) 4037 cpu_tlb_flushD_SE(va); 4038 cpu_cpwait(); 4039 4040 pmap_section_demotions++; 4041 CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x" 4042 " in pmap %p", va, pmap); 4043 return (TRUE); 4044} 4045 4046/*************************************************** 4047 * page management routines. 4048 ***************************************************/ 4049 4050/* 4051 * We are in a serious low memory condition. Resort to 4052 * drastic measures to free some pages so we can allocate 4053 * another pv entry chunk. 4054 */ 4055static vm_page_t 4056pmap_pv_reclaim(pmap_t locked_pmap) 4057{ 4058 struct pch newtail; 4059 struct pv_chunk *pc; 4060 struct l2_bucket *l2b = NULL; 4061 pmap_t pmap; 4062 pd_entry_t *pl1pd; 4063 pt_entry_t *ptep; 4064 pv_entry_t pv; 4065 vm_offset_t va; 4066 vm_page_t free, m, m_pc; 4067 uint32_t inuse; 4068 int bit, field, freed, idx; 4069 4070 PMAP_ASSERT_LOCKED(locked_pmap); 4071 pmap = NULL; 4072 free = m_pc = NULL; 4073 TAILQ_INIT(&newtail); 4074 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 4075 free == NULL)) { 4076 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 4077 if (pmap != pc->pc_pmap) { 4078 if (pmap != NULL) { 4079 cpu_tlb_flushID(); 4080 cpu_cpwait(); 4081 if (pmap != locked_pmap) 4082 PMAP_UNLOCK(pmap); 4083 } 4084 pmap = pc->pc_pmap; 4085 /* Avoid deadlock and lock recursion. */ 4086 if (pmap > locked_pmap) 4087 PMAP_LOCK(pmap); 4088 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 4089 pmap = NULL; 4090 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4091 continue; 4092 } 4093 } 4094 4095 /* 4096 * Destroy every non-wired, 4 KB page mapping in the chunk. 4097 */ 4098 freed = 0; 4099 for (field = 0; field < _NPCM; field++) { 4100 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 4101 inuse != 0; inuse &= ~(1UL << bit)) { 4102 bit = ffs(inuse) - 1; 4103 idx = field * sizeof(inuse) * NBBY + bit; 4104 pv = &pc->pc_pventry[idx]; 4105 va = pv->pv_va; 4106 4107 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 4108 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4109 continue; 4110 if (pv->pv_flags & PVF_WIRED) 4111 continue; 4112 4113 l2b = pmap_get_l2_bucket(pmap, va); 4114 KASSERT(l2b != NULL, ("No l2 bucket")); 4115 ptep = &l2b->l2b_kva[l2pte_index(va)]; 4116 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 4117 KASSERT((vm_offset_t)m >= KERNBASE, 4118 ("Trying to access non-existent page " 4119 "va %x pte %x", va, *ptep)); 4120 *ptep = 0; 4121 PTE_SYNC(ptep); 4122 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4123 if (TAILQ_EMPTY(&m->md.pv_list)) 4124 vm_page_aflag_clear(m, PGA_WRITEABLE); 4125 pc->pc_map[field] |= 1UL << bit; 4126 freed++; 4127 } 4128 } 4129 4130 if (freed == 0) { 4131 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4132 continue; 4133 } 4134 /* Every freed mapping is for a 4 KB page. */ 4135 pmap->pm_stats.resident_count -= freed; 4136 PV_STAT(pv_entry_frees += freed); 4137 PV_STAT(pv_entry_spare += freed); 4138 pv_entry_count -= freed; 4139 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4140 for (field = 0; field < _NPCM; field++) 4141 if (pc->pc_map[field] != pc_freemask[field]) { 4142 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 4143 pc_list); 4144 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4145 4146 /* 4147 * One freed pv entry in locked_pmap is 4148 * sufficient. 4149 */ 4150 if (pmap == locked_pmap) 4151 goto out; 4152 break; 4153 } 4154 if (field == _NPCM) { 4155 PV_STAT(pv_entry_spare -= _NPCPV); 4156 PV_STAT(pc_chunk_count--); 4157 PV_STAT(pc_chunk_frees++); 4158 /* Entire chunk is free; return it. */ 4159 m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4160 pmap_qremove((vm_offset_t)pc, 1); 4161 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4162 break; 4163 } 4164 } 4165out: 4166 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 4167 if (pmap != NULL) { 4168 cpu_tlb_flushID(); 4169 cpu_cpwait(); 4170 if (pmap != locked_pmap) 4171 PMAP_UNLOCK(pmap); 4172 } 4173 return (m_pc); 4174} 4175 4176/* 4177 * free the pv_entry back to the free list 4178 */ 4179static void 4180pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv) 4181{ 4182 struct pv_chunk *pc; 4183 int bit, field, idx; 4184 4185 rw_assert(&pvh_global_lock, RA_WLOCKED); 4186 PMAP_ASSERT_LOCKED(pmap); 4187 PV_STAT(pv_entry_frees++); 4188 PV_STAT(pv_entry_spare++); 4189 pv_entry_count--; 4190 pc = pv_to_chunk(pv); 4191 idx = pv - &pc->pc_pventry[0]; 4192 field = idx / (sizeof(u_long) * NBBY); 4193 bit = idx % (sizeof(u_long) * NBBY); 4194 pc->pc_map[field] |= 1ul << bit; 4195 for (idx = 0; idx < _NPCM; idx++) 4196 if (pc->pc_map[idx] != pc_freemask[idx]) { 4197 /* 4198 * 98% of the time, pc is already at the head of the 4199 * list. If it isn't already, move it to the head. 4200 */ 4201 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 4202 pc)) { 4203 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4204 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 4205 pc_list); 4206 } 4207 return; 4208 } 4209 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4210 pmap_free_pv_chunk(pc); 4211} 4212 4213static void 4214pmap_free_pv_chunk(struct pv_chunk *pc) 4215{ 4216 vm_page_t m; 4217 4218 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 4219 PV_STAT(pv_entry_spare -= _NPCPV); 4220 PV_STAT(pc_chunk_count--); 4221 PV_STAT(pc_chunk_frees++); 4222 /* entire chunk is free, return it */ 4223 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4224 pmap_qremove((vm_offset_t)pc, 1); 4225 vm_page_unwire(m, 0); 4226 vm_page_free(m); 4227 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4228 4229} 4230 4231static pv_entry_t 4232pmap_get_pv_entry(pmap_t pmap, boolean_t try) 4233{ 4234 static const struct timeval printinterval = { 60, 0 }; 4235 static struct timeval lastprint; 4236 struct pv_chunk *pc; 4237 pv_entry_t pv; 4238 vm_page_t m; 4239 int bit, field, idx; 4240 4241 rw_assert(&pvh_global_lock, RA_WLOCKED); 4242 PMAP_ASSERT_LOCKED(pmap); 4243 PV_STAT(pv_entry_allocs++); 4244 pv_entry_count++; 4245 4246 if (pv_entry_count > pv_entry_high_water) 4247 if (ratecheck(&lastprint, &printinterval)) 4248 printf("%s: Approaching the limit on PV entries.\n", 4249 __func__); 4250retry: 4251 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 4252 if (pc != NULL) { 4253 for (field = 0; field < _NPCM; field++) { 4254 if (pc->pc_map[field]) { 4255 bit = ffs(pc->pc_map[field]) - 1; 4256 break; 4257 } 4258 } 4259 if (field < _NPCM) { 4260 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; 4261 pv = &pc->pc_pventry[idx]; 4262 pc->pc_map[field] &= ~(1ul << bit); 4263 /* If this was the last item, move it to tail */ 4264 for (field = 0; field < _NPCM; field++) 4265 if (pc->pc_map[field] != 0) { 4266 PV_STAT(pv_entry_spare--); 4267 return (pv); /* not full, return */ 4268 } 4269 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4270 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 4271 PV_STAT(pv_entry_spare--); 4272 return (pv); 4273 } 4274 } 4275 /* 4276 * Access to the ptelist "pv_vafree" is synchronized by the pvh 4277 * global lock. If "pv_vafree" is currently non-empty, it will 4278 * remain non-empty until pmap_ptelist_alloc() completes. 4279 */ 4280 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 4281 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4282 if (try) { 4283 pv_entry_count--; 4284 PV_STAT(pc_chunk_tryfail++); 4285 return (NULL); 4286 } 4287 m = pmap_pv_reclaim(pmap); 4288 if (m == NULL) 4289 goto retry; 4290 } 4291 PV_STAT(pc_chunk_count++); 4292 PV_STAT(pc_chunk_allocs++); 4293 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 4294 pmap_qenter((vm_offset_t)pc, &m, 1); 4295 pc->pc_pmap = pmap; 4296 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 4297 for (field = 1; field < _NPCM; field++) 4298 pc->pc_map[field] = pc_freemask[field]; 4299 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 4300 pv = &pc->pc_pventry[0]; 4301 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 4302 PV_STAT(pv_entry_spare += _NPCPV - 1); 4303 return (pv); 4304} 4305 4306/* 4307 * Remove the given range of addresses from the specified map. 4308 * 4309 * It is assumed that the start and end are properly 4310 * rounded to the page size. 4311 */ 4312#define PMAP_REMOVE_CLEAN_LIST_SIZE 3 4313void 4314pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 4315{ 4316 struct l2_bucket *l2b; 4317 vm_offset_t next_bucket; 4318 pd_entry_t *pl1pd, l1pd; 4319 pt_entry_t *ptep; 4320 u_int total; 4321 u_int mappings, is_exec, is_refd; 4322 int flushall = 0; 4323 4324 4325 /* 4326 * we lock in the pmap => pv_head direction 4327 */ 4328 4329 rw_wlock(&pvh_global_lock); 4330 PMAP_LOCK(pmap); 4331 total = 0; 4332 while (sva < eva) { 4333 /* 4334 * Check for large page. 4335 */ 4336 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 4337 l1pd = *pl1pd; 4338 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 4339 KASSERT((l1pd & L1_S_DOM_MASK) != 4340 L1_S_DOM(PMAP_DOMAIN_KERNEL), ("pmap_remove: " 4341 "Trying to remove kernel section mapping")); 4342 /* 4343 * Are we removing the entire large page? If not, 4344 * demote the mapping and fall through. 4345 */ 4346 if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) && 4347 eva >= L2_NEXT_BUCKET(sva)) { 4348 pmap_remove_section(pmap, sva); 4349 sva = L2_NEXT_BUCKET(sva); 4350 continue; 4351 } else if (!pmap_demote_section(pmap, sva)) { 4352 /* The large page mapping was destroyed. */ 4353 sva = L2_NEXT_BUCKET(sva); 4354 continue; 4355 } 4356 } 4357 /* 4358 * Do one L2 bucket's worth at a time. 4359 */ 4360 next_bucket = L2_NEXT_BUCKET(sva); 4361 if (next_bucket > eva) 4362 next_bucket = eva; 4363 4364 l2b = pmap_get_l2_bucket(pmap, sva); 4365 if (l2b == NULL) { 4366 sva = next_bucket; 4367 continue; 4368 } 4369 4370 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4371 mappings = 0; 4372 4373 while (sva < next_bucket) { 4374 struct vm_page *m; 4375 pt_entry_t pte; 4376 vm_paddr_t pa; 4377 4378 pte = *ptep; 4379 4380 if (pte == 0) { 4381 /* 4382 * Nothing here, move along 4383 */ 4384 sva += PAGE_SIZE; 4385 ptep++; 4386 continue; 4387 } 4388 4389 pmap->pm_stats.resident_count--; 4390 pa = l2pte_pa(pte); 4391 is_exec = 0; 4392 is_refd = 1; 4393 4394 /* 4395 * Update flags. In a number of circumstances, 4396 * we could cluster a lot of these and do a 4397 * number of sequential pages in one go. 4398 */ 4399 if ((m = PHYS_TO_VM_PAGE(pa)) != NULL) { 4400 struct pv_entry *pve; 4401 4402 pve = pmap_remove_pv(m, pmap, sva); 4403 if (pve) { 4404 is_exec = PTE_BEEN_EXECD(pte); 4405 is_refd = PTE_BEEN_REFD(pte); 4406 pmap_free_pv_entry(pmap, pve); 4407 } 4408 } 4409 4410 *ptep = 0; 4411 PTE_SYNC(ptep); 4412 if (pmap_is_current(pmap)) { 4413 total++; 4414 if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) { 4415 if (is_exec) 4416 cpu_tlb_flushID_SE(sva); 4417 else if (is_refd) 4418 cpu_tlb_flushD_SE(sva); 4419 } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) 4420 flushall = 1; 4421 } 4422 4423 sva += PAGE_SIZE; 4424 ptep++; 4425 mappings++; 4426 } 4427 4428 pmap_free_l2_bucket(pmap, l2b, mappings); 4429 } 4430 4431 rw_wunlock(&pvh_global_lock); 4432 if (flushall) 4433 cpu_tlb_flushID(); 4434 cpu_cpwait(); 4435 4436 PMAP_UNLOCK(pmap); 4437} 4438 4439/* 4440 * pmap_zero_page() 4441 * 4442 * Zero a given physical page by mapping it at a page hook point. 4443 * In doing the zero page op, the page we zero is mapped cachable, as with 4444 * StrongARM accesses to non-cached pages are non-burst making writing 4445 * _any_ bulk data very slow. 4446 */ 4447static void 4448pmap_zero_page_gen(vm_page_t m, int off, int size) 4449{ 4450 struct czpages *czp; 4451 4452 KASSERT(TAILQ_EMPTY(&m->md.pv_list), 4453 ("pmap_zero_page_gen: page has mappings")); 4454 4455 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 4456 4457 sched_pin(); 4458 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4459 mtx_lock(&czp->lock); 4460 4461 /* 4462 * Hook in the page, zero it. 4463 */ 4464 *czp->dstptep = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF; 4465 pmap_set_prot(czp->dstptep, VM_PROT_WRITE, 0); 4466 PTE_SYNC(czp->dstptep); 4467 cpu_tlb_flushD_SE(czp->dstva); 4468 cpu_cpwait(); 4469 4470 if (off || size != PAGE_SIZE) 4471 bzero((void *)(czp->dstva + off), size); 4472 else 4473 bzero_page(czp->dstva); 4474 4475 /* 4476 * Although aliasing is not possible, if we use temporary mappings with 4477 * memory that will be mapped later as non-cached or with write-through 4478 * caches, we might end up overwriting it when calling wbinv_all. So 4479 * make sure caches are clean after the operation. 4480 */ 4481 cpu_idcache_wbinv_range(czp->dstva, size); 4482 pmap_l2cache_wbinv_range(czp->dstva, phys, size); 4483 4484 mtx_unlock(&czp->lock); 4485 sched_unpin(); 4486} 4487 4488/* 4489 * pmap_zero_page zeros the specified hardware page by mapping 4490 * the page into KVM and using bzero to clear its contents. 4491 */ 4492void 4493pmap_zero_page(vm_page_t m) 4494{ 4495 pmap_zero_page_gen(m, 0, PAGE_SIZE); 4496} 4497 4498 4499/* 4500 * pmap_zero_page_area zeros the specified hardware page by mapping 4501 * the page into KVM and using bzero to clear its contents. 4502 * 4503 * off and size may not cover an area beyond a single hardware page. 4504 */ 4505void 4506pmap_zero_page_area(vm_page_t m, int off, int size) 4507{ 4508 4509 pmap_zero_page_gen(m, off, size); 4510} 4511 4512 4513/* 4514 * pmap_zero_page_idle zeros the specified hardware page by mapping 4515 * the page into KVM and using bzero to clear its contents. This 4516 * is intended to be called from the vm_pagezero process only and 4517 * outside of Giant. 4518 */ 4519void 4520pmap_zero_page_idle(vm_page_t m) 4521{ 4522 4523 pmap_zero_page(m); 4524} 4525 4526/* 4527 * pmap_copy_page copies the specified (machine independent) 4528 * page by mapping the page into virtual memory and using 4529 * bcopy to copy the page, one machine dependent page at a 4530 * time. 4531 */ 4532 4533/* 4534 * pmap_copy_page() 4535 * 4536 * Copy one physical page into another, by mapping the pages into 4537 * hook points. The same comment regarding cachability as in 4538 * pmap_zero_page also applies here. 4539 */ 4540void 4541pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst) 4542{ 4543 struct czpages *czp; 4544 4545 sched_pin(); 4546 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4547 mtx_lock(&czp->lock); 4548 4549 /* 4550 * Map the pages into the page hook points, copy them, and purge the 4551 * cache for the appropriate page. 4552 */ 4553 *czp->srcptep = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF; 4554 pmap_set_prot(czp->srcptep, VM_PROT_READ, 0); 4555 PTE_SYNC(czp->srcptep); 4556 cpu_tlb_flushD_SE(czp->srcva); 4557 *czp->dstptep = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF; 4558 pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0); 4559 PTE_SYNC(czp->dstptep); 4560 cpu_tlb_flushD_SE(czp->dstva); 4561 cpu_cpwait(); 4562 4563 bcopy_page(czp->srcva, czp->dstva); 4564 4565 /* 4566 * Although aliasing is not possible, if we use temporary mappings with 4567 * memory that will be mapped later as non-cached or with write-through 4568 * caches, we might end up overwriting it when calling wbinv_all. So 4569 * make sure caches are clean after the operation. 4570 */ 4571 cpu_idcache_wbinv_range(czp->dstva, PAGE_SIZE); 4572 pmap_l2cache_wbinv_range(czp->dstva, dst, PAGE_SIZE); 4573 4574 mtx_unlock(&czp->lock); 4575 sched_unpin(); 4576} 4577 4578int unmapped_buf_allowed = 1; 4579 4580void 4581pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 4582 vm_offset_t b_offset, int xfersize) 4583{ 4584 vm_page_t a_pg, b_pg; 4585 vm_offset_t a_pg_offset, b_pg_offset; 4586 int cnt; 4587 struct czpages *czp; 4588 4589 sched_pin(); 4590 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4591 mtx_lock(&czp->lock); 4592 4593 while (xfersize > 0) { 4594 a_pg = ma[a_offset >> PAGE_SHIFT]; 4595 a_pg_offset = a_offset & PAGE_MASK; 4596 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 4597 b_pg = mb[b_offset >> PAGE_SHIFT]; 4598 b_pg_offset = b_offset & PAGE_MASK; 4599 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 4600 *czp->srcptep = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) | 4601 pte_l2_s_cache_mode | L2_S_REF; 4602 pmap_set_prot(czp->srcptep, VM_PROT_READ, 0); 4603 PTE_SYNC(czp->srcptep); 4604 cpu_tlb_flushD_SE(czp->srcva); 4605 *czp->dstptep = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) | 4606 pte_l2_s_cache_mode | L2_S_REF; 4607 pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0); 4608 PTE_SYNC(czp->dstptep); 4609 cpu_tlb_flushD_SE(czp->dstva); 4610 cpu_cpwait(); 4611 bcopy((char *)czp->srcva + a_pg_offset, (char *)czp->dstva + b_pg_offset, 4612 cnt); 4613 cpu_idcache_wbinv_range(czp->dstva + b_pg_offset, cnt); 4614 pmap_l2cache_wbinv_range(czp->dstva + b_pg_offset, 4615 VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt); 4616 xfersize -= cnt; 4617 a_offset += cnt; 4618 b_offset += cnt; 4619 } 4620 4621 mtx_unlock(&czp->lock); 4622 sched_unpin(); 4623} 4624 4625void 4626pmap_copy_page(vm_page_t src, vm_page_t dst) 4627{ 4628 4629 if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size && 4630 _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst), 4631 (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0) 4632 return; 4633 4634 pmap_copy_page_generic(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 4635} 4636 4637/* 4638 * this routine returns true if a physical page resides 4639 * in the given pmap. 4640 */ 4641boolean_t 4642pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 4643{ 4644 struct md_page *pvh; 4645 pv_entry_t pv; 4646 int loops = 0; 4647 boolean_t rv; 4648 4649 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4650 ("pmap_page_exists_quick: page %p is not managed", m)); 4651 rv = FALSE; 4652 rw_wlock(&pvh_global_lock); 4653 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4654 if (PV_PMAP(pv) == pmap) { 4655 rv = TRUE; 4656 break; 4657 } 4658 loops++; 4659 if (loops >= 16) 4660 break; 4661 } 4662 if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 4663 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4664 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4665 if (PV_PMAP(pv) == pmap) { 4666 rv = TRUE; 4667 break; 4668 } 4669 loops++; 4670 if (loops >= 16) 4671 break; 4672 } 4673 } 4674 rw_wunlock(&pvh_global_lock); 4675 return (rv); 4676} 4677 4678/* 4679 * pmap_page_wired_mappings: 4680 * 4681 * Return the number of managed mappings to the given physical page 4682 * that are wired. 4683 */ 4684int 4685pmap_page_wired_mappings(vm_page_t m) 4686{ 4687 int count; 4688 4689 count = 0; 4690 if ((m->oflags & VPO_UNMANAGED) != 0) 4691 return (count); 4692 rw_wlock(&pvh_global_lock); 4693 count = pmap_pvh_wired_mappings(&m->md, count); 4694 if ((m->flags & PG_FICTITIOUS) == 0) { 4695 count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), 4696 count); 4697 } 4698 rw_wunlock(&pvh_global_lock); 4699 return (count); 4700} 4701 4702/* 4703 * pmap_pvh_wired_mappings: 4704 * 4705 * Return the updated number "count" of managed mappings that are wired. 4706 */ 4707static int 4708pmap_pvh_wired_mappings(struct md_page *pvh, int count) 4709{ 4710 pv_entry_t pv; 4711 4712 rw_assert(&pvh_global_lock, RA_WLOCKED); 4713 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4714 if ((pv->pv_flags & PVF_WIRED) != 0) 4715 count++; 4716 } 4717 return (count); 4718} 4719 4720/* 4721 * Returns TRUE if any of the given mappings were referenced and FALSE 4722 * otherwise. Both page and section mappings are supported. 4723 */ 4724static boolean_t 4725pmap_is_referenced_pvh(struct md_page *pvh) 4726{ 4727 struct l2_bucket *l2b; 4728 pv_entry_t pv; 4729 pd_entry_t *pl1pd; 4730 pt_entry_t *ptep; 4731 pmap_t pmap; 4732 boolean_t rv; 4733 4734 rw_assert(&pvh_global_lock, RA_WLOCKED); 4735 rv = FALSE; 4736 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4737 pmap = PV_PMAP(pv); 4738 PMAP_LOCK(pmap); 4739 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 4740 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4741 rv = L1_S_REFERENCED(*pl1pd); 4742 else { 4743 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 4744 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 4745 rv = L2_S_REFERENCED(*ptep); 4746 } 4747 PMAP_UNLOCK(pmap); 4748 if (rv) 4749 break; 4750 } 4751 return (rv); 4752} 4753 4754/* 4755 * pmap_is_referenced: 4756 * 4757 * Return whether or not the specified physical page was referenced 4758 * in any physical maps. 4759 */ 4760boolean_t 4761pmap_is_referenced(vm_page_t m) 4762{ 4763 boolean_t rv; 4764 4765 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4766 ("pmap_is_referenced: page %p is not managed", m)); 4767 rw_wlock(&pvh_global_lock); 4768 rv = pmap_is_referenced_pvh(&m->md) || 4769 ((m->flags & PG_FICTITIOUS) == 0 && 4770 pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4771 rw_wunlock(&pvh_global_lock); 4772 return (rv); 4773} 4774 4775/* 4776 * pmap_ts_referenced: 4777 * 4778 * Return the count of reference bits for a page, clearing all of them. 4779 */ 4780int 4781pmap_ts_referenced(vm_page_t m) 4782{ 4783 4784 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4785 ("pmap_ts_referenced: page %p is not managed", m)); 4786 return (pmap_clearbit(m, PVF_REF)); 4787} 4788 4789/* 4790 * Returns TRUE if any of the given mappings were used to modify 4791 * physical memory. Otherwise, returns FALSE. Both page and 1MB section 4792 * mappings are supported. 4793 */ 4794static boolean_t 4795pmap_is_modified_pvh(struct md_page *pvh) 4796{ 4797 pd_entry_t *pl1pd; 4798 struct l2_bucket *l2b; 4799 pv_entry_t pv; 4800 pt_entry_t *ptep; 4801 pmap_t pmap; 4802 boolean_t rv; 4803 4804 rw_assert(&pvh_global_lock, RA_WLOCKED); 4805 rv = FALSE; 4806 4807 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4808 pmap = PV_PMAP(pv); 4809 PMAP_LOCK(pmap); 4810 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 4811 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4812 rv = L1_S_WRITABLE(*pl1pd); 4813 else { 4814 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 4815 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 4816 rv = L2_S_WRITABLE(*ptep); 4817 } 4818 PMAP_UNLOCK(pmap); 4819 if (rv) 4820 break; 4821 } 4822 4823 return (rv); 4824} 4825 4826boolean_t 4827pmap_is_modified(vm_page_t m) 4828{ 4829 boolean_t rv; 4830 4831 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4832 ("pmap_is_modified: page %p is not managed", m)); 4833 /* 4834 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 4835 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 4836 * is clear, no PTEs can have APX cleared. 4837 */ 4838 VM_OBJECT_ASSERT_WLOCKED(m->object); 4839 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 4840 return (FALSE); 4841 rw_wlock(&pvh_global_lock); 4842 rv = pmap_is_modified_pvh(&m->md) || 4843 ((m->flags & PG_FICTITIOUS) == 0 && 4844 pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4845 rw_wunlock(&pvh_global_lock); 4846 return (rv); 4847} 4848 4849/* 4850 * Apply the given advice to the specified range of addresses within the 4851 * given pmap. Depending on the advice, clear the referenced and/or 4852 * modified flags in each mapping. 4853 */ 4854void 4855pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4856{ 4857 struct l2_bucket *l2b; 4858 struct pv_entry *pve; 4859 pd_entry_t *pl1pd, l1pd; 4860 pt_entry_t *ptep, opte, pte; 4861 vm_offset_t next_bucket; 4862 vm_page_t m; 4863 4864 if (advice != MADV_DONTNEED && advice != MADV_FREE) 4865 return; 4866 rw_wlock(&pvh_global_lock); 4867 PMAP_LOCK(pmap); 4868 for (; sva < eva; sva = next_bucket) { 4869 next_bucket = L2_NEXT_BUCKET(sva); 4870 if (next_bucket < sva) 4871 next_bucket = eva; 4872 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 4873 l1pd = *pl1pd; 4874 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 4875 if (pmap == pmap_kernel()) 4876 continue; 4877 if (!pmap_demote_section(pmap, sva)) { 4878 /* 4879 * The large page mapping was destroyed. 4880 */ 4881 continue; 4882 } 4883 /* 4884 * Unless the page mappings are wired, remove the 4885 * mapping to a single page so that a subsequent 4886 * access may repromote. Since the underlying 4887 * l2_bucket is fully populated, this removal 4888 * never frees an entire l2_bucket. 4889 */ 4890 l2b = pmap_get_l2_bucket(pmap, sva); 4891 KASSERT(l2b != NULL, 4892 ("pmap_advise: no l2 bucket for " 4893 "va 0x%#x, pmap 0x%p", sva, pmap)); 4894 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4895 opte = *ptep; 4896 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 4897 KASSERT(m != NULL, 4898 ("pmap_advise: no vm_page for demoted superpage")); 4899 pve = pmap_find_pv(&m->md, pmap, sva); 4900 KASSERT(pve != NULL, 4901 ("pmap_advise: no PV entry for managed mapping")); 4902 if ((pve->pv_flags & PVF_WIRED) == 0) { 4903 pmap_free_l2_bucket(pmap, l2b, 1); 4904 pve = pmap_remove_pv(m, pmap, sva); 4905 pmap_free_pv_entry(pmap, pve); 4906 *ptep = 0; 4907 PTE_SYNC(ptep); 4908 if (pmap_is_current(pmap)) { 4909 if (PTE_BEEN_EXECD(opte)) 4910 cpu_tlb_flushID_SE(sva); 4911 else if (PTE_BEEN_REFD(opte)) 4912 cpu_tlb_flushD_SE(sva); 4913 } 4914 } 4915 } 4916 if (next_bucket > eva) 4917 next_bucket = eva; 4918 l2b = pmap_get_l2_bucket(pmap, sva); 4919 if (l2b == NULL) 4920 continue; 4921 for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4922 sva != next_bucket; ptep++, sva += PAGE_SIZE) { 4923 opte = pte = *ptep; 4924 if ((opte & L2_S_PROTO) == 0) 4925 continue; 4926 m = PHYS_TO_VM_PAGE(l2pte_pa(opte)); 4927 if (m == NULL || (m->oflags & VPO_UNMANAGED) != 0) 4928 continue; 4929 else if (L2_S_WRITABLE(opte)) { 4930 if (advice == MADV_DONTNEED) { 4931 /* 4932 * Don't need to mark the page 4933 * dirty as it was already marked as 4934 * such in pmap_fault_fixup() or 4935 * pmap_enter_locked(). 4936 * Just clear the state. 4937 */ 4938 } else 4939 pte |= L2_APX; 4940 4941 pte &= ~L2_S_REF; 4942 *ptep = pte; 4943 PTE_SYNC(ptep); 4944 } else if (L2_S_REFERENCED(opte)) { 4945 pte &= ~L2_S_REF; 4946 *ptep = pte; 4947 PTE_SYNC(ptep); 4948 } else 4949 continue; 4950 if (pmap_is_current(pmap)) { 4951 if (PTE_BEEN_EXECD(opte)) 4952 cpu_tlb_flushID_SE(sva); 4953 else if (PTE_BEEN_REFD(opte)) 4954 cpu_tlb_flushD_SE(sva); 4955 } 4956 } 4957 } 4958 cpu_cpwait(); 4959 rw_wunlock(&pvh_global_lock); 4960 PMAP_UNLOCK(pmap); 4961} 4962 4963/* 4964 * Clear the modify bits on the specified physical page. 4965 */ 4966void 4967pmap_clear_modify(vm_page_t m) 4968{ 4969 4970 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4971 ("pmap_clear_modify: page %p is not managed", m)); 4972 VM_OBJECT_ASSERT_WLOCKED(m->object); 4973 KASSERT(!vm_page_xbusied(m), 4974 ("pmap_clear_modify: page %p is exclusive busied", m)); 4975 4976 /* 4977 * If the page is not PGA_WRITEABLE, then no mappings can be modified. 4978 * If the object containing the page is locked and the page is not 4979 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4980 */ 4981 if ((m->aflags & PGA_WRITEABLE) == 0) 4982 return; 4983 if (pmap_is_modified(m)) 4984 pmap_clearbit(m, PVF_MOD); 4985} 4986 4987 4988/* 4989 * Clear the write and modified bits in each of the given page's mappings. 4990 */ 4991void 4992pmap_remove_write(vm_page_t m) 4993{ 4994 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4995 ("pmap_remove_write: page %p is not managed", m)); 4996 4997 /* 4998 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 4999 * set by another thread while the object is locked. Thus, 5000 * if PGA_WRITEABLE is clear, no page table entries need updating. 5001 */ 5002 VM_OBJECT_ASSERT_WLOCKED(m->object); 5003 if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) 5004 pmap_clearbit(m, PVF_WRITE); 5005} 5006 5007 5008/* 5009 * perform the pmap work for mincore 5010 */ 5011int 5012pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 5013{ 5014 struct l2_bucket *l2b; 5015 pd_entry_t *pl1pd, l1pd; 5016 pt_entry_t *ptep, pte; 5017 vm_paddr_t pa; 5018 vm_page_t m; 5019 int val; 5020 boolean_t managed; 5021 5022 PMAP_LOCK(pmap); 5023retry: 5024 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(addr)]; 5025 l1pd = *pl1pd; 5026 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 5027 pa = (l1pd & L1_S_FRAME); 5028 val = MINCORE_SUPER | MINCORE_INCORE; 5029 if (L1_S_WRITABLE(l1pd)) 5030 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5031 managed = FALSE; 5032 m = PHYS_TO_VM_PAGE(pa); 5033 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) 5034 managed = TRUE; 5035 if (managed) { 5036 if (L1_S_REFERENCED(l1pd)) 5037 val |= MINCORE_REFERENCED | 5038 MINCORE_REFERENCED_OTHER; 5039 } 5040 } else { 5041 l2b = pmap_get_l2_bucket(pmap, addr); 5042 if (l2b == NULL) { 5043 val = 0; 5044 goto out; 5045 } 5046 ptep = &l2b->l2b_kva[l2pte_index(addr)]; 5047 pte = *ptep; 5048 if (!l2pte_valid(pte)) { 5049 val = 0; 5050 goto out; 5051 } 5052 val = MINCORE_INCORE; 5053 if (L2_S_WRITABLE(pte)) 5054 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5055 managed = FALSE; 5056 pa = l2pte_pa(pte); 5057 m = PHYS_TO_VM_PAGE(pa); 5058 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) 5059 managed = TRUE; 5060 if (managed) { 5061 if (L2_S_REFERENCED(pte)) 5062 val |= MINCORE_REFERENCED | 5063 MINCORE_REFERENCED_OTHER; 5064 } 5065 } 5066 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 5067 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 5068 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 5069 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 5070 goto retry; 5071 } else 5072out: 5073 PA_UNLOCK_COND(*locked_pa); 5074 PMAP_UNLOCK(pmap); 5075 return (val); 5076} 5077 5078void 5079pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 5080{ 5081} 5082 5083/* 5084 * Increase the starting virtual address of the given mapping if a 5085 * different alignment might result in more superpage mappings. 5086 */ 5087void 5088pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 5089 vm_offset_t *addr, vm_size_t size) 5090{ 5091 vm_offset_t superpage_offset; 5092 5093 if (size < NBPDR) 5094 return; 5095 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 5096 offset += ptoa(object->pg_color); 5097 superpage_offset = offset & PDRMASK; 5098 if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 5099 (*addr & PDRMASK) == superpage_offset) 5100 return; 5101 if ((*addr & PDRMASK) < superpage_offset) 5102 *addr = (*addr & ~PDRMASK) + superpage_offset; 5103 else 5104 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 5105} 5106 5107/* 5108 * pmap_map_section: 5109 * 5110 * Create a single section mapping. 5111 */ 5112void 5113pmap_map_section(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot, 5114 boolean_t ref) 5115{ 5116 pd_entry_t *pl1pd, l1pd; 5117 pd_entry_t fl; 5118 5119 KASSERT(((va | pa) & L1_S_OFFSET) == 0, 5120 ("Not a valid section mapping")); 5121 5122 fl = pte_l1_s_cache_mode; 5123 5124 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 5125 l1pd = L1_S_PROTO | pa | L1_S_PROT(PTE_USER, prot) | fl | 5126 L1_S_DOM(pmap->pm_domain); 5127 5128 /* Mark page referenced if this section is a result of a promotion. */ 5129 if (ref == TRUE) 5130 l1pd |= L1_S_REF; 5131#ifdef SMP 5132 l1pd |= L1_SHARED; 5133#endif 5134 *pl1pd = l1pd; 5135 PTE_SYNC(pl1pd); 5136} 5137 5138/* 5139 * pmap_link_l2pt: 5140 * 5141 * Link the L2 page table specified by l2pv.pv_pa into the L1 5142 * page table at the slot for "va". 5143 */ 5144void 5145pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv) 5146{ 5147 pd_entry_t *pde = (pd_entry_t *) l1pt, proto; 5148 u_int slot = va >> L1_S_SHIFT; 5149 5150 proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO; 5151 5152#ifdef VERBOSE_INIT_ARM 5153 printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va); 5154#endif 5155 5156 pde[slot + 0] = proto | (l2pv->pv_pa + 0x000); 5157 PTE_SYNC(&pde[slot]); 5158 5159 SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list); 5160 5161} 5162 5163/* 5164 * pmap_map_entry 5165 * 5166 * Create a single page mapping. 5167 */ 5168void 5169pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, 5170 int cache) 5171{ 5172 pd_entry_t *pde = (pd_entry_t *) l1pt; 5173 pt_entry_t fl; 5174 pt_entry_t *ptep; 5175 5176 KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin")); 5177 5178 fl = l2s_mem_types[cache]; 5179 5180 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) 5181 panic("pmap_map_entry: no L2 table for VA 0x%08x", va); 5182 5183 ptep = (pt_entry_t *)kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK); 5184 5185 if (ptep == NULL) 5186 panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va); 5187 5188 ptep[l2pte_index(va)] = L2_S_PROTO | pa | fl | L2_S_REF; 5189 pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); 5190 PTE_SYNC(&ptep[l2pte_index(va)]); 5191} 5192 5193/* 5194 * pmap_map_chunk: 5195 * 5196 * Map a chunk of memory using the most efficient mappings 5197 * possible (section. large page, small page) into the 5198 * provided L1 and L2 tables at the specified virtual address. 5199 */ 5200vm_size_t 5201pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, 5202 vm_size_t size, int prot, int type) 5203{ 5204 pd_entry_t *pde = (pd_entry_t *) l1pt; 5205 pt_entry_t *ptep, f1, f2s, f2l; 5206 vm_size_t resid; 5207 int i; 5208 5209 resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); 5210 5211 if (l1pt == 0) 5212 panic("pmap_map_chunk: no L1 table provided"); 5213 5214#ifdef VERBOSE_INIT_ARM 5215 printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x " 5216 "prot=0x%x type=%d\n", pa, va, size, resid, prot, type); 5217#endif 5218 5219 f1 = l1_mem_types[type]; 5220 f2l = l2l_mem_types[type]; 5221 f2s = l2s_mem_types[type]; 5222 5223 size = resid; 5224 5225 while (resid > 0) { 5226 /* See if we can use a section mapping. */ 5227 if (L1_S_MAPPABLE_P(va, pa, resid)) { 5228#ifdef VERBOSE_INIT_ARM 5229 printf("S"); 5230#endif 5231 pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | 5232 L1_S_PROT(PTE_KERNEL, prot | VM_PROT_EXECUTE) | 5233 f1 | L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_S_REF; 5234 PTE_SYNC(&pde[va >> L1_S_SHIFT]); 5235 va += L1_S_SIZE; 5236 pa += L1_S_SIZE; 5237 resid -= L1_S_SIZE; 5238 continue; 5239 } 5240 5241 /* 5242 * Ok, we're going to use an L2 table. Make sure 5243 * one is actually in the corresponding L1 slot 5244 * for the current VA. 5245 */ 5246 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) 5247 panic("pmap_map_chunk: no L2 table for VA 0x%08x", va); 5248 5249 ptep = (pt_entry_t *) kernel_pt_lookup( 5250 pde[L1_IDX(va)] & L1_C_ADDR_MASK); 5251 if (ptep == NULL) 5252 panic("pmap_map_chunk: can't find L2 table for VA" 5253 "0x%08x", va); 5254 /* See if we can use a L2 large page mapping. */ 5255 if (L2_L_MAPPABLE_P(va, pa, resid)) { 5256#ifdef VERBOSE_INIT_ARM 5257 printf("L"); 5258#endif 5259 for (i = 0; i < 16; i++) { 5260 ptep[l2pte_index(va) + i] = 5261 L2_L_PROTO | pa | 5262 L2_L_PROT(PTE_KERNEL, prot) | f2l; 5263 PTE_SYNC(&ptep[l2pte_index(va) + i]); 5264 } 5265 va += L2_L_SIZE; 5266 pa += L2_L_SIZE; 5267 resid -= L2_L_SIZE; 5268 continue; 5269 } 5270 5271 /* Use a small page mapping. */ 5272#ifdef VERBOSE_INIT_ARM 5273 printf("P"); 5274#endif 5275 ptep[l2pte_index(va)] = L2_S_PROTO | pa | f2s | L2_S_REF; 5276 pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); 5277 PTE_SYNC(&ptep[l2pte_index(va)]); 5278 va += PAGE_SIZE; 5279 pa += PAGE_SIZE; 5280 resid -= PAGE_SIZE; 5281 } 5282#ifdef VERBOSE_INIT_ARM 5283 printf("\n"); 5284#endif 5285 return (size); 5286 5287} 5288 5289int 5290pmap_dmap_iscurrent(pmap_t pmap) 5291{ 5292 return(pmap_is_current(pmap)); 5293} 5294 5295void 5296pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 5297{ 5298 /* 5299 * Remember the memattr in a field that gets used to set the appropriate 5300 * bits in the PTEs as mappings are established. 5301 */ 5302 m->md.pv_memattr = ma; 5303 5304 /* 5305 * It appears that this function can only be called before any mappings 5306 * for the page are established on ARM. If this ever changes, this code 5307 * will need to walk the pv_list and make each of the existing mappings 5308 * uncacheable, being careful to sync caches and PTEs (and maybe 5309 * invalidate TLB?) for any current mapping it modifies. 5310 */ 5311 if (TAILQ_FIRST(&m->md.pv_list) != NULL) 5312 panic("Can't change memattr on page with existing mappings"); 5313} 5314