pmap-v6.c revision 270439
1/* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */ 2/*- 3 * Copyright 2011 Semihalf 4 * Copyright 2004 Olivier Houchard. 5 * Copyright 2003 Wasabi Systems, Inc. 6 * All rights reserved. 7 * 8 * Written by Steve C. Woodford for Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project by 21 * Wasabi Systems, Inc. 22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 23 * or promote products derived from this software without specific prior 24 * written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 * 38 * From: FreeBSD: src/sys/arm/arm/pmap.c,v 1.113 2009/07/24 13:50:29 39 */ 40 41/*- 42 * Copyright (c) 2002-2003 Wasabi Systems, Inc. 43 * Copyright (c) 2001 Richard Earnshaw 44 * Copyright (c) 2001-2002 Christopher Gilbert 45 * All rights reserved. 46 * 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. The name of the company nor the name of the author may be used to 53 * endorse or promote products derived from this software without specific 54 * prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 57 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 58 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 59 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 */ 68/*- 69 * Copyright (c) 1999 The NetBSD Foundation, Inc. 70 * All rights reserved. 71 * 72 * This code is derived from software contributed to The NetBSD Foundation 73 * by Charles M. Hannum. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 84 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 85 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 86 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 87 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 88 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 89 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 90 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 91 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 92 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 93 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 94 * POSSIBILITY OF SUCH DAMAGE. 95 */ 96 97/*- 98 * Copyright (c) 1994-1998 Mark Brinicombe. 99 * Copyright (c) 1994 Brini. 100 * All rights reserved. 101 * 102 * This code is derived from software written for Brini by Mark Brinicombe 103 * 104 * Redistribution and use in source and binary forms, with or without 105 * modification, are permitted provided that the following conditions 106 * are met: 107 * 1. Redistributions of source code must retain the above copyright 108 * notice, this list of conditions and the following disclaimer. 109 * 2. Redistributions in binary form must reproduce the above copyright 110 * notice, this list of conditions and the following disclaimer in the 111 * documentation and/or other materials provided with the distribution. 112 * 3. All advertising materials mentioning features or use of this software 113 * must display the following acknowledgement: 114 * This product includes software developed by Mark Brinicombe. 115 * 4. The name of the author may not be used to endorse or promote products 116 * derived from this software without specific prior written permission. 117 * 118 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 119 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 120 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 121 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 122 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 123 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 124 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 125 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 126 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 127 * 128 * RiscBSD kernel project 129 * 130 * pmap.c 131 * 132 * Machine dependant vm stuff 133 * 134 * Created : 20/09/94 135 */ 136 137/* 138 * Special compilation symbols 139 * PMAP_DEBUG - Build in pmap_debug_level code 140 * 141 * Note that pmap_mapdev() and pmap_unmapdev() are implemented in arm/devmap.c 142*/ 143/* Include header files */ 144 145#include "opt_vm.h" 146#include "opt_pmap.h" 147 148#include <sys/cdefs.h> 149__FBSDID("$FreeBSD: stable/10/sys/arm/arm/pmap-v6.c 270439 2014-08-24 07:53:15Z kib $"); 150#include <sys/param.h> 151#include <sys/systm.h> 152#include <sys/kernel.h> 153#include <sys/ktr.h> 154#include <sys/lock.h> 155#include <sys/proc.h> 156#include <sys/malloc.h> 157#include <sys/msgbuf.h> 158#include <sys/mutex.h> 159#include <sys/vmmeter.h> 160#include <sys/mman.h> 161#include <sys/rwlock.h> 162#include <sys/smp.h> 163#include <sys/sched.h> 164#include <sys/sysctl.h> 165 166#include <vm/vm.h> 167#include <vm/vm_param.h> 168#include <vm/uma.h> 169#include <vm/pmap.h> 170#include <vm/vm_kern.h> 171#include <vm/vm_object.h> 172#include <vm/vm_map.h> 173#include <vm/vm_page.h> 174#include <vm/vm_pageout.h> 175#include <vm/vm_extern.h> 176#include <vm/vm_reserv.h> 177 178#include <machine/md_var.h> 179#include <machine/cpu.h> 180#include <machine/cpufunc.h> 181#include <machine/pcb.h> 182 183#ifdef DEBUG 184extern int last_fault_code; 185#endif 186 187#ifdef PMAP_DEBUG 188#define PDEBUG(_lev_,_stat_) \ 189 if (pmap_debug_level >= (_lev_)) \ 190 ((_stat_)) 191#define dprintf printf 192 193int pmap_debug_level = 0; 194#define PMAP_INLINE 195#else /* PMAP_DEBUG */ 196#define PDEBUG(_lev_,_stat_) /* Nothing */ 197#define dprintf(x, arg...) 198#define PMAP_INLINE __inline 199#endif /* PMAP_DEBUG */ 200 201#ifdef PV_STATS 202#define PV_STAT(x) do { x ; } while (0) 203#else 204#define PV_STAT(x) do { } while (0) 205#endif 206 207#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 208 209#ifdef ARM_L2_PIPT 210#define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((pa), (size)) 211#define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((pa), (size)) 212#else 213#define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((va), (size)) 214#define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((va), (size)) 215#endif 216 217extern struct pv_addr systempage; 218 219/* 220 * Internal function prototypes 221 */ 222 223static PMAP_INLINE 224struct pv_entry *pmap_find_pv(struct md_page *, pmap_t, vm_offset_t); 225static void pmap_free_pv_chunk(struct pv_chunk *pc); 226static void pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv); 227static pv_entry_t pmap_get_pv_entry(pmap_t pmap, boolean_t try); 228static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); 229static boolean_t pmap_pv_insert_section(pmap_t, vm_offset_t, 230 vm_paddr_t); 231static struct pv_entry *pmap_remove_pv(struct vm_page *, pmap_t, vm_offset_t); 232static int pmap_pvh_wired_mappings(struct md_page *, int); 233 234static int pmap_enter_locked(pmap_t, vm_offset_t, vm_page_t, 235 vm_prot_t, u_int); 236static vm_paddr_t pmap_extract_locked(pmap_t pmap, vm_offset_t va); 237static void pmap_alloc_l1(pmap_t); 238static void pmap_free_l1(pmap_t); 239 240static void pmap_map_section(pmap_t, vm_offset_t, vm_offset_t, 241 vm_prot_t, boolean_t); 242static void pmap_promote_section(pmap_t, vm_offset_t); 243static boolean_t pmap_demote_section(pmap_t, vm_offset_t); 244static boolean_t pmap_enter_section(pmap_t, vm_offset_t, vm_page_t, 245 vm_prot_t); 246static void pmap_remove_section(pmap_t, vm_offset_t); 247 248static int pmap_clearbit(struct vm_page *, u_int); 249 250static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t); 251static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t); 252static void pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int); 253static vm_offset_t kernel_pt_lookup(vm_paddr_t); 254 255static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); 256 257vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 258vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 259vm_offset_t pmap_curmaxkvaddr; 260vm_paddr_t kernel_l1pa; 261 262vm_offset_t kernel_vm_end = 0; 263 264vm_offset_t vm_max_kernel_address; 265 266struct pmap kernel_pmap_store; 267 268/* 269 * Resources for quickly copying and zeroing pages using virtual address space 270 * and page table entries that are pre-allocated per-CPU by pmap_init(). 271 */ 272struct czpages { 273 struct mtx lock; 274 pt_entry_t *srcptep; 275 pt_entry_t *dstptep; 276 vm_offset_t srcva; 277 vm_offset_t dstva; 278}; 279static struct czpages cpu_czpages[MAXCPU]; 280 281static void pmap_init_l1(struct l1_ttable *, pd_entry_t *); 282/* 283 * These routines are called when the CPU type is identified to set up 284 * the PTE prototypes, cache modes, etc. 285 * 286 * The variables are always here, just in case LKMs need to reference 287 * them (though, they shouldn't). 288 */ 289static void pmap_set_prot(pt_entry_t *pte, vm_prot_t prot, uint8_t user); 290pt_entry_t pte_l1_s_cache_mode; 291pt_entry_t pte_l1_s_cache_mode_pt; 292 293pt_entry_t pte_l2_l_cache_mode; 294pt_entry_t pte_l2_l_cache_mode_pt; 295 296pt_entry_t pte_l2_s_cache_mode; 297pt_entry_t pte_l2_s_cache_mode_pt; 298 299struct msgbuf *msgbufp = 0; 300 301/* 302 * Crashdump maps. 303 */ 304static caddr_t crashdumpmap; 305 306extern void bcopy_page(vm_offset_t, vm_offset_t); 307extern void bzero_page(vm_offset_t); 308 309char *_tmppt; 310 311/* 312 * Metadata for L1 translation tables. 313 */ 314struct l1_ttable { 315 /* Entry on the L1 Table list */ 316 SLIST_ENTRY(l1_ttable) l1_link; 317 318 /* Entry on the L1 Least Recently Used list */ 319 TAILQ_ENTRY(l1_ttable) l1_lru; 320 321 /* Track how many domains are allocated from this L1 */ 322 volatile u_int l1_domain_use_count; 323 324 /* 325 * A free-list of domain numbers for this L1. 326 * We avoid using ffs() and a bitmap to track domains since ffs() 327 * is slow on ARM. 328 */ 329 u_int8_t l1_domain_first; 330 u_int8_t l1_domain_free[PMAP_DOMAINS]; 331 332 /* Physical address of this L1 page table */ 333 vm_paddr_t l1_physaddr; 334 335 /* KVA of this L1 page table */ 336 pd_entry_t *l1_kva; 337}; 338 339/* 340 * Convert a virtual address into its L1 table index. That is, the 341 * index used to locate the L2 descriptor table pointer in an L1 table. 342 * This is basically used to index l1->l1_kva[]. 343 * 344 * Each L2 descriptor table represents 1MB of VA space. 345 */ 346#define L1_IDX(va) (((vm_offset_t)(va)) >> L1_S_SHIFT) 347 348/* 349 * L1 Page Tables are tracked using a Least Recently Used list. 350 * - New L1s are allocated from the HEAD. 351 * - Freed L1s are added to the TAIl. 352 * - Recently accessed L1s (where an 'access' is some change to one of 353 * the userland pmaps which owns this L1) are moved to the TAIL. 354 */ 355static TAILQ_HEAD(, l1_ttable) l1_lru_list; 356/* 357 * A list of all L1 tables 358 */ 359static SLIST_HEAD(, l1_ttable) l1_list; 360static struct mtx l1_lru_lock; 361 362/* 363 * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots. 364 * 365 * This is normally 16MB worth L2 page descriptors for any given pmap. 366 * Reference counts are maintained for L2 descriptors so they can be 367 * freed when empty. 368 */ 369struct l2_dtable { 370 /* The number of L2 page descriptors allocated to this l2_dtable */ 371 u_int l2_occupancy; 372 373 /* List of L2 page descriptors */ 374 struct l2_bucket { 375 pt_entry_t *l2b_kva; /* KVA of L2 Descriptor Table */ 376 vm_paddr_t l2b_phys; /* Physical address of same */ 377 u_short l2b_l1idx; /* This L2 table's L1 index */ 378 u_short l2b_occupancy; /* How many active descriptors */ 379 } l2_bucket[L2_BUCKET_SIZE]; 380}; 381 382/* pmap_kenter_internal flags */ 383#define KENTER_CACHE 0x1 384#define KENTER_DEVICE 0x2 385#define KENTER_USER 0x4 386 387/* 388 * Given an L1 table index, calculate the corresponding l2_dtable index 389 * and bucket index within the l2_dtable. 390 */ 391#define L2_IDX(l1idx) (((l1idx) >> L2_BUCKET_LOG2) & \ 392 (L2_SIZE - 1)) 393#define L2_BUCKET(l1idx) ((l1idx) & (L2_BUCKET_SIZE - 1)) 394 395/* 396 * Given a virtual address, this macro returns the 397 * virtual address required to drop into the next L2 bucket. 398 */ 399#define L2_NEXT_BUCKET(va) (((va) & L1_S_FRAME) + L1_S_SIZE) 400 401/* 402 * We try to map the page tables write-through, if possible. However, not 403 * all CPUs have a write-through cache mode, so on those we have to sync 404 * the cache when we frob page tables. 405 * 406 * We try to evaluate this at compile time, if possible. However, it's 407 * not always possible to do that, hence this run-time var. 408 */ 409int pmap_needs_pte_sync; 410 411/* 412 * Macro to determine if a mapping might be resident in the 413 * instruction cache and/or TLB 414 */ 415#define PTE_BEEN_EXECD(pte) (L2_S_EXECUTABLE(pte) && L2_S_REFERENCED(pte)) 416 417/* 418 * Macro to determine if a mapping might be resident in the 419 * data cache and/or TLB 420 */ 421#define PTE_BEEN_REFD(pte) (L2_S_REFERENCED(pte)) 422 423#ifndef PMAP_SHPGPERPROC 424#define PMAP_SHPGPERPROC 200 425#endif 426 427#define pmap_is_current(pm) ((pm) == pmap_kernel() || \ 428 curproc->p_vmspace->vm_map.pmap == (pm)) 429 430/* 431 * Data for the pv entry allocation mechanism 432 */ 433static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 434static int pv_entry_count, pv_entry_max, pv_entry_high_water; 435static struct md_page *pv_table; 436static int shpgperproc = PMAP_SHPGPERPROC; 437 438struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 439int pv_maxchunks; /* How many chunks we have KVA for */ 440vm_offset_t pv_vafree; /* Freelist stored in the PTE */ 441 442static __inline struct pv_chunk * 443pv_to_chunk(pv_entry_t pv) 444{ 445 446 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 447} 448 449#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 450 451CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 452CTASSERT(_NPCM == 8); 453CTASSERT(_NPCPV == 252); 454 455#define PC_FREE0_6 0xfffffffful /* Free values for index 0 through 6 */ 456#define PC_FREE7 0x0ffffffful /* Free values for index 7 */ 457 458static const uint32_t pc_freemask[_NPCM] = { 459 PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, 460 PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, 461 PC_FREE0_6, PC_FREE7 462}; 463 464static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 465 466/* Superpages utilization enabled = 1 / disabled = 0 */ 467static int sp_enabled = 0; 468SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN, &sp_enabled, 0, 469 "Are large page mappings enabled?"); 470 471SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 472 "Current number of pv entries"); 473 474#ifdef PV_STATS 475static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 476 477SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 478 "Current number of pv entry chunks"); 479SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 480 "Current number of pv entry chunks allocated"); 481SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 482 "Current number of pv entry chunks frees"); 483SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 484 "Number of times tried to get a chunk page but failed."); 485 486static long pv_entry_frees, pv_entry_allocs; 487static int pv_entry_spare; 488 489SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 490 "Current number of pv entry frees"); 491SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 492 "Current number of pv entry allocs"); 493SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 494 "Current number of spare pv entries"); 495#endif 496 497uma_zone_t l2zone; 498static uma_zone_t l2table_zone; 499static vm_offset_t pmap_kernel_l2dtable_kva; 500static vm_offset_t pmap_kernel_l2ptp_kva; 501static vm_paddr_t pmap_kernel_l2ptp_phys; 502static struct rwlock pvh_global_lock; 503 504int l1_mem_types[] = { 505 ARM_L1S_STRONG_ORD, 506 ARM_L1S_DEVICE_NOSHARE, 507 ARM_L1S_DEVICE_SHARE, 508 ARM_L1S_NRML_NOCACHE, 509 ARM_L1S_NRML_IWT_OWT, 510 ARM_L1S_NRML_IWB_OWB, 511 ARM_L1S_NRML_IWBA_OWBA 512}; 513 514int l2l_mem_types[] = { 515 ARM_L2L_STRONG_ORD, 516 ARM_L2L_DEVICE_NOSHARE, 517 ARM_L2L_DEVICE_SHARE, 518 ARM_L2L_NRML_NOCACHE, 519 ARM_L2L_NRML_IWT_OWT, 520 ARM_L2L_NRML_IWB_OWB, 521 ARM_L2L_NRML_IWBA_OWBA 522}; 523 524int l2s_mem_types[] = { 525 ARM_L2S_STRONG_ORD, 526 ARM_L2S_DEVICE_NOSHARE, 527 ARM_L2S_DEVICE_SHARE, 528 ARM_L2S_NRML_NOCACHE, 529 ARM_L2S_NRML_IWT_OWT, 530 ARM_L2S_NRML_IWB_OWB, 531 ARM_L2S_NRML_IWBA_OWBA 532}; 533 534/* 535 * This list exists for the benefit of pmap_map_chunk(). It keeps track 536 * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can 537 * find them as necessary. 538 * 539 * Note that the data on this list MUST remain valid after initarm() returns, 540 * as pmap_bootstrap() uses it to contruct L2 table metadata. 541 */ 542SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list); 543 544static void 545pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt) 546{ 547 int i; 548 549 l1->l1_kva = l1pt; 550 l1->l1_domain_use_count = 0; 551 l1->l1_domain_first = 0; 552 553 for (i = 0; i < PMAP_DOMAINS; i++) 554 l1->l1_domain_free[i] = i + 1; 555 556 /* 557 * Copy the kernel's L1 entries to each new L1. 558 */ 559 if (l1pt != pmap_kernel()->pm_l1->l1_kva) 560 memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE); 561 562 if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0) 563 panic("pmap_init_l1: can't get PA of L1 at %p", l1pt); 564 SLIST_INSERT_HEAD(&l1_list, l1, l1_link); 565 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 566} 567 568static vm_offset_t 569kernel_pt_lookup(vm_paddr_t pa) 570{ 571 struct pv_addr *pv; 572 573 SLIST_FOREACH(pv, &kernel_pt_list, pv_list) { 574 if (pv->pv_pa == pa) 575 return (pv->pv_va); 576 } 577 return (0); 578} 579 580void 581pmap_pte_init_mmu_v6(void) 582{ 583 584 if (PTE_PAGETABLE >= 3) 585 pmap_needs_pte_sync = 1; 586 pte_l1_s_cache_mode = l1_mem_types[PTE_CACHE]; 587 pte_l2_l_cache_mode = l2l_mem_types[PTE_CACHE]; 588 pte_l2_s_cache_mode = l2s_mem_types[PTE_CACHE]; 589 590 pte_l1_s_cache_mode_pt = l1_mem_types[PTE_PAGETABLE]; 591 pte_l2_l_cache_mode_pt = l2l_mem_types[PTE_PAGETABLE]; 592 pte_l2_s_cache_mode_pt = l2s_mem_types[PTE_PAGETABLE]; 593 594} 595 596/* 597 * Allocate an L1 translation table for the specified pmap. 598 * This is called at pmap creation time. 599 */ 600static void 601pmap_alloc_l1(pmap_t pmap) 602{ 603 struct l1_ttable *l1; 604 u_int8_t domain; 605 606 /* 607 * Remove the L1 at the head of the LRU list 608 */ 609 mtx_lock(&l1_lru_lock); 610 l1 = TAILQ_FIRST(&l1_lru_list); 611 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); 612 613 /* 614 * Pick the first available domain number, and update 615 * the link to the next number. 616 */ 617 domain = l1->l1_domain_first; 618 l1->l1_domain_first = l1->l1_domain_free[domain]; 619 620 /* 621 * If there are still free domain numbers in this L1, 622 * put it back on the TAIL of the LRU list. 623 */ 624 if (++l1->l1_domain_use_count < PMAP_DOMAINS) 625 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 626 627 mtx_unlock(&l1_lru_lock); 628 629 /* 630 * Fix up the relevant bits in the pmap structure 631 */ 632 pmap->pm_l1 = l1; 633 pmap->pm_domain = domain + 1; 634} 635 636/* 637 * Free an L1 translation table. 638 * This is called at pmap destruction time. 639 */ 640static void 641pmap_free_l1(pmap_t pmap) 642{ 643 struct l1_ttable *l1 = pmap->pm_l1; 644 645 mtx_lock(&l1_lru_lock); 646 647 /* 648 * If this L1 is currently on the LRU list, remove it. 649 */ 650 if (l1->l1_domain_use_count < PMAP_DOMAINS) 651 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); 652 653 /* 654 * Free up the domain number which was allocated to the pmap 655 */ 656 l1->l1_domain_free[pmap->pm_domain - 1] = l1->l1_domain_first; 657 l1->l1_domain_first = pmap->pm_domain - 1; 658 l1->l1_domain_use_count--; 659 660 /* 661 * The L1 now must have at least 1 free domain, so add 662 * it back to the LRU list. If the use count is zero, 663 * put it at the head of the list, otherwise it goes 664 * to the tail. 665 */ 666 if (l1->l1_domain_use_count == 0) { 667 TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru); 668 } else 669 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 670 671 mtx_unlock(&l1_lru_lock); 672} 673 674/* 675 * Returns a pointer to the L2 bucket associated with the specified pmap 676 * and VA, or NULL if no L2 bucket exists for the address. 677 */ 678static PMAP_INLINE struct l2_bucket * 679pmap_get_l2_bucket(pmap_t pmap, vm_offset_t va) 680{ 681 struct l2_dtable *l2; 682 struct l2_bucket *l2b; 683 u_short l1idx; 684 685 l1idx = L1_IDX(va); 686 687 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL || 688 (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL) 689 return (NULL); 690 691 return (l2b); 692} 693 694/* 695 * Returns a pointer to the L2 bucket associated with the specified pmap 696 * and VA. 697 * 698 * If no L2 bucket exists, perform the necessary allocations to put an L2 699 * bucket/page table in place. 700 * 701 * Note that if a new L2 bucket/page was allocated, the caller *must* 702 * increment the bucket occupancy counter appropriately *before* 703 * releasing the pmap's lock to ensure no other thread or cpu deallocates 704 * the bucket/page in the meantime. 705 */ 706static struct l2_bucket * 707pmap_alloc_l2_bucket(pmap_t pmap, vm_offset_t va) 708{ 709 struct l2_dtable *l2; 710 struct l2_bucket *l2b; 711 u_short l1idx; 712 713 l1idx = L1_IDX(va); 714 715 PMAP_ASSERT_LOCKED(pmap); 716 rw_assert(&pvh_global_lock, RA_WLOCKED); 717 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 718 /* 719 * No mapping at this address, as there is 720 * no entry in the L1 table. 721 * Need to allocate a new l2_dtable. 722 */ 723 PMAP_UNLOCK(pmap); 724 rw_wunlock(&pvh_global_lock); 725 if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) { 726 rw_wlock(&pvh_global_lock); 727 PMAP_LOCK(pmap); 728 return (NULL); 729 } 730 rw_wlock(&pvh_global_lock); 731 PMAP_LOCK(pmap); 732 if (pmap->pm_l2[L2_IDX(l1idx)] != NULL) { 733 /* 734 * Someone already allocated the l2_dtable while 735 * we were doing the same. 736 */ 737 uma_zfree(l2table_zone, l2); 738 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 739 } else { 740 bzero(l2, sizeof(*l2)); 741 /* 742 * Link it into the parent pmap 743 */ 744 pmap->pm_l2[L2_IDX(l1idx)] = l2; 745 } 746 } 747 748 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 749 750 /* 751 * Fetch pointer to the L2 page table associated with the address. 752 */ 753 if (l2b->l2b_kva == NULL) { 754 pt_entry_t *ptep; 755 756 /* 757 * No L2 page table has been allocated. Chances are, this 758 * is because we just allocated the l2_dtable, above. 759 */ 760 PMAP_UNLOCK(pmap); 761 rw_wunlock(&pvh_global_lock); 762 ptep = uma_zalloc(l2zone, M_NOWAIT); 763 rw_wlock(&pvh_global_lock); 764 PMAP_LOCK(pmap); 765 if (l2b->l2b_kva != 0) { 766 /* We lost the race. */ 767 uma_zfree(l2zone, ptep); 768 return (l2b); 769 } 770 l2b->l2b_phys = vtophys(ptep); 771 if (ptep == NULL) { 772 /* 773 * Oops, no more L2 page tables available at this 774 * time. We may need to deallocate the l2_dtable 775 * if we allocated a new one above. 776 */ 777 if (l2->l2_occupancy == 0) { 778 pmap->pm_l2[L2_IDX(l1idx)] = NULL; 779 uma_zfree(l2table_zone, l2); 780 } 781 return (NULL); 782 } 783 784 l2->l2_occupancy++; 785 l2b->l2b_kva = ptep; 786 l2b->l2b_l1idx = l1idx; 787 } 788 789 return (l2b); 790} 791 792static PMAP_INLINE void 793pmap_free_l2_ptp(pt_entry_t *l2) 794{ 795 uma_zfree(l2zone, l2); 796} 797/* 798 * One or more mappings in the specified L2 descriptor table have just been 799 * invalidated. 800 * 801 * Garbage collect the metadata and descriptor table itself if necessary. 802 * 803 * The pmap lock must be acquired when this is called (not necessary 804 * for the kernel pmap). 805 */ 806static void 807pmap_free_l2_bucket(pmap_t pmap, struct l2_bucket *l2b, u_int count) 808{ 809 struct l2_dtable *l2; 810 pd_entry_t *pl1pd, l1pd; 811 pt_entry_t *ptep; 812 u_short l1idx; 813 814 815 /* 816 * Update the bucket's reference count according to how many 817 * PTEs the caller has just invalidated. 818 */ 819 l2b->l2b_occupancy -= count; 820 821 /* 822 * Note: 823 * 824 * Level 2 page tables allocated to the kernel pmap are never freed 825 * as that would require checking all Level 1 page tables and 826 * removing any references to the Level 2 page table. See also the 827 * comment elsewhere about never freeing bootstrap L2 descriptors. 828 * 829 * We make do with just invalidating the mapping in the L2 table. 830 * 831 * This isn't really a big deal in practice and, in fact, leads 832 * to a performance win over time as we don't need to continually 833 * alloc/free. 834 */ 835 if (l2b->l2b_occupancy > 0 || pmap == pmap_kernel()) 836 return; 837 838 /* 839 * There are no more valid mappings in this level 2 page table. 840 * Go ahead and NULL-out the pointer in the bucket, then 841 * free the page table. 842 */ 843 l1idx = l2b->l2b_l1idx; 844 ptep = l2b->l2b_kva; 845 l2b->l2b_kva = NULL; 846 847 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 848 849 /* 850 * If the L1 slot matches the pmap's domain 851 * number, then invalidate it. 852 */ 853 l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK); 854 if (l1pd == (L1_C_DOM(pmap->pm_domain) | L1_TYPE_C)) { 855 *pl1pd = 0; 856 PTE_SYNC(pl1pd); 857 cpu_tlb_flushD_SE((vm_offset_t)ptep); 858 cpu_cpwait(); 859 } 860 861 /* 862 * Release the L2 descriptor table back to the pool cache. 863 */ 864 pmap_free_l2_ptp(ptep); 865 866 /* 867 * Update the reference count in the associated l2_dtable 868 */ 869 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 870 if (--l2->l2_occupancy > 0) 871 return; 872 873 /* 874 * There are no more valid mappings in any of the Level 1 875 * slots managed by this l2_dtable. Go ahead and NULL-out 876 * the pointer in the parent pmap and free the l2_dtable. 877 */ 878 pmap->pm_l2[L2_IDX(l1idx)] = NULL; 879 uma_zfree(l2table_zone, l2); 880} 881 882/* 883 * Pool cache constructors for L2 descriptor tables, metadata and pmap 884 * structures. 885 */ 886static int 887pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags) 888{ 889 struct l2_bucket *l2b; 890 pt_entry_t *ptep, pte; 891 vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK; 892 893 /* 894 * The mappings for these page tables were initially made using 895 * pmap_kenter() by the pool subsystem. Therefore, the cache- 896 * mode will not be right for page table mappings. To avoid 897 * polluting the pmap_kenter() code with a special case for 898 * page tables, we simply fix up the cache-mode here if it's not 899 * correct. 900 */ 901 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 902 ptep = &l2b->l2b_kva[l2pte_index(va)]; 903 pte = *ptep; 904 905 cpu_idcache_wbinv_range(va, PAGE_SIZE); 906 pmap_l2cache_wbinv_range(va, pte & L2_S_FRAME, PAGE_SIZE); 907 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { 908 /* 909 * Page tables must have the cache-mode set to 910 * Write-Thru. 911 */ 912 *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; 913 PTE_SYNC(ptep); 914 cpu_tlb_flushD_SE(va); 915 cpu_cpwait(); 916 } 917 918 memset(mem, 0, L2_TABLE_SIZE_REAL); 919 return (0); 920} 921 922/* 923 * Modify pte bits for all ptes corresponding to the given physical address. 924 * We use `maskbits' rather than `clearbits' because we're always passing 925 * constants and the latter would require an extra inversion at run-time. 926 */ 927static int 928pmap_clearbit(struct vm_page *m, u_int maskbits) 929{ 930 struct l2_bucket *l2b; 931 struct pv_entry *pv, *pve, *next_pv; 932 struct md_page *pvh; 933 pd_entry_t *pl1pd; 934 pt_entry_t *ptep, npte, opte; 935 pmap_t pmap; 936 vm_offset_t va; 937 u_int oflags; 938 int count = 0; 939 940 rw_wlock(&pvh_global_lock); 941 if ((m->flags & PG_FICTITIOUS) != 0) 942 goto small_mappings; 943 944 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 945 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 946 va = pv->pv_va; 947 pmap = PV_PMAP(pv); 948 PMAP_LOCK(pmap); 949 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 950 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, 951 ("pmap_clearbit: valid section mapping expected")); 952 if ((maskbits & PVF_WRITE) && (pv->pv_flags & PVF_WRITE)) 953 (void)pmap_demote_section(pmap, va); 954 else if ((maskbits & PVF_REF) && L1_S_REFERENCED(*pl1pd)) { 955 if (pmap_demote_section(pmap, va)) { 956 if ((pv->pv_flags & PVF_WIRED) == 0) { 957 /* 958 * Remove the mapping to a single page 959 * so that a subsequent access may 960 * repromote. Since the underlying 961 * l2_bucket is fully populated, this 962 * removal never frees an entire 963 * l2_bucket. 964 */ 965 va += (VM_PAGE_TO_PHYS(m) & 966 L1_S_OFFSET); 967 l2b = pmap_get_l2_bucket(pmap, va); 968 KASSERT(l2b != NULL, 969 ("pmap_clearbit: no l2 bucket for " 970 "va 0x%#x, pmap 0x%p", va, pmap)); 971 ptep = &l2b->l2b_kva[l2pte_index(va)]; 972 *ptep = 0; 973 PTE_SYNC(ptep); 974 pmap_free_l2_bucket(pmap, l2b, 1); 975 pve = pmap_remove_pv(m, pmap, va); 976 KASSERT(pve != NULL, ("pmap_clearbit: " 977 "no PV entry for managed mapping")); 978 pmap_free_pv_entry(pmap, pve); 979 980 } 981 } 982 } else if ((maskbits & PVF_MOD) && L1_S_WRITABLE(*pl1pd)) { 983 if (pmap_demote_section(pmap, va)) { 984 if ((pv->pv_flags & PVF_WIRED) == 0) { 985 /* 986 * Write protect the mapping to a 987 * single page so that a subsequent 988 * write access may repromote. 989 */ 990 va += (VM_PAGE_TO_PHYS(m) & 991 L1_S_OFFSET); 992 l2b = pmap_get_l2_bucket(pmap, va); 993 KASSERT(l2b != NULL, 994 ("pmap_clearbit: no l2 bucket for " 995 "va 0x%#x, pmap 0x%p", va, pmap)); 996 ptep = &l2b->l2b_kva[l2pte_index(va)]; 997 if ((*ptep & L2_S_PROTO) != 0) { 998 pve = pmap_find_pv(&m->md, 999 pmap, va); 1000 KASSERT(pve != NULL, 1001 ("pmap_clearbit: no PV " 1002 "entry for managed mapping")); 1003 pve->pv_flags &= ~PVF_WRITE; 1004 *ptep |= L2_APX; 1005 PTE_SYNC(ptep); 1006 } 1007 } 1008 } 1009 } 1010 PMAP_UNLOCK(pmap); 1011 } 1012 1013small_mappings: 1014 if (TAILQ_EMPTY(&m->md.pv_list)) { 1015 rw_wunlock(&pvh_global_lock); 1016 return (0); 1017 } 1018 1019 /* 1020 * Loop over all current mappings setting/clearing as appropos 1021 */ 1022 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1023 va = pv->pv_va; 1024 pmap = PV_PMAP(pv); 1025 oflags = pv->pv_flags; 1026 pv->pv_flags &= ~maskbits; 1027 1028 PMAP_LOCK(pmap); 1029 1030 l2b = pmap_get_l2_bucket(pmap, va); 1031 KASSERT(l2b != NULL, ("pmap_clearbit: no l2 bucket for " 1032 "va 0x%#x, pmap 0x%p", va, pmap)); 1033 1034 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1035 npte = opte = *ptep; 1036 1037 if (maskbits & (PVF_WRITE | PVF_MOD)) { 1038 /* make the pte read only */ 1039 npte |= L2_APX; 1040 } 1041 1042 if (maskbits & PVF_REF) { 1043 /* 1044 * Clear referenced flag in PTE so that we 1045 * will take a flag fault the next time the mapping 1046 * is referenced. 1047 */ 1048 npte &= ~L2_S_REF; 1049 } 1050 1051 CTR4(KTR_PMAP,"clearbit: pmap:%p bits:%x pte:%x->%x", 1052 pmap, maskbits, opte, npte); 1053 if (npte != opte) { 1054 count++; 1055 *ptep = npte; 1056 PTE_SYNC(ptep); 1057 /* Flush the TLB entry if a current pmap. */ 1058 if (PTE_BEEN_EXECD(opte)) 1059 cpu_tlb_flushID_SE(pv->pv_va); 1060 else if (PTE_BEEN_REFD(opte)) 1061 cpu_tlb_flushD_SE(pv->pv_va); 1062 cpu_cpwait(); 1063 } 1064 1065 PMAP_UNLOCK(pmap); 1066 1067 } 1068 1069 if (maskbits & PVF_WRITE) 1070 vm_page_aflag_clear(m, PGA_WRITEABLE); 1071 rw_wunlock(&pvh_global_lock); 1072 return (count); 1073} 1074 1075/* 1076 * main pv_entry manipulation functions: 1077 * pmap_enter_pv: enter a mapping onto a vm_page list 1078 * pmap_remove_pv: remove a mappiing from a vm_page list 1079 * 1080 * NOTE: pmap_enter_pv expects to lock the pvh itself 1081 * pmap_remove_pv expects the caller to lock the pvh before calling 1082 */ 1083 1084/* 1085 * pmap_enter_pv: enter a mapping onto a vm_page's PV list 1086 * 1087 * => caller should hold the proper lock on pvh_global_lock 1088 * => caller should have pmap locked 1089 * => we will (someday) gain the lock on the vm_page's PV list 1090 * => caller should adjust ptp's wire_count before calling 1091 * => caller should not adjust pmap's wire_count 1092 */ 1093static void 1094pmap_enter_pv(struct vm_page *m, struct pv_entry *pve, pmap_t pmap, 1095 vm_offset_t va, u_int flags) 1096{ 1097 1098 rw_assert(&pvh_global_lock, RA_WLOCKED); 1099 1100 PMAP_ASSERT_LOCKED(pmap); 1101 pve->pv_va = va; 1102 pve->pv_flags = flags; 1103 1104 TAILQ_INSERT_HEAD(&m->md.pv_list, pve, pv_list); 1105 if (pve->pv_flags & PVF_WIRED) 1106 ++pmap->pm_stats.wired_count; 1107} 1108 1109/* 1110 * 1111 * pmap_find_pv: Find a pv entry 1112 * 1113 * => caller should hold lock on vm_page 1114 */ 1115static PMAP_INLINE struct pv_entry * 1116pmap_find_pv(struct md_page *md, pmap_t pmap, vm_offset_t va) 1117{ 1118 struct pv_entry *pv; 1119 1120 rw_assert(&pvh_global_lock, RA_WLOCKED); 1121 TAILQ_FOREACH(pv, &md->pv_list, pv_list) 1122 if (pmap == PV_PMAP(pv) && va == pv->pv_va) 1123 break; 1124 1125 return (pv); 1126} 1127 1128/* 1129 * vector_page_setprot: 1130 * 1131 * Manipulate the protection of the vector page. 1132 */ 1133void 1134vector_page_setprot(int prot) 1135{ 1136 struct l2_bucket *l2b; 1137 pt_entry_t *ptep; 1138 1139 l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page); 1140 1141 ptep = &l2b->l2b_kva[l2pte_index(vector_page)]; 1142 /* 1143 * Set referenced flag. 1144 * Vectors' page is always desired 1145 * to be allowed to reside in TLB. 1146 */ 1147 *ptep |= L2_S_REF; 1148 1149 pmap_set_prot(ptep, prot|VM_PROT_EXECUTE, 0); 1150 PTE_SYNC(ptep); 1151 cpu_tlb_flushID_SE(vector_page); 1152 cpu_cpwait(); 1153} 1154 1155static void 1156pmap_set_prot(pt_entry_t *ptep, vm_prot_t prot, uint8_t user) 1157{ 1158 1159 *ptep &= ~(L2_S_PROT_MASK | L2_XN); 1160 1161 if (!(prot & VM_PROT_EXECUTE)) 1162 *ptep |= L2_XN; 1163 1164 /* Set defaults first - kernel read access */ 1165 *ptep |= L2_APX; 1166 *ptep |= L2_S_PROT_R; 1167 /* Now tune APs as desired */ 1168 if (user) 1169 *ptep |= L2_S_PROT_U; 1170 1171 if (prot & VM_PROT_WRITE) 1172 *ptep &= ~(L2_APX); 1173} 1174 1175/* 1176 * pmap_remove_pv: try to remove a mapping from a pv_list 1177 * 1178 * => caller should hold proper lock on pmap_main_lock 1179 * => pmap should be locked 1180 * => caller should hold lock on vm_page [so that attrs can be adjusted] 1181 * => caller should adjust ptp's wire_count and free PTP if needed 1182 * => caller should NOT adjust pmap's wire_count 1183 * => we return the removed pve 1184 */ 1185static struct pv_entry * 1186pmap_remove_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va) 1187{ 1188 struct pv_entry *pve; 1189 1190 rw_assert(&pvh_global_lock, RA_WLOCKED); 1191 PMAP_ASSERT_LOCKED(pmap); 1192 1193 pve = pmap_find_pv(&m->md, pmap, va); /* find corresponding pve */ 1194 if (pve != NULL) { 1195 TAILQ_REMOVE(&m->md.pv_list, pve, pv_list); 1196 if (pve->pv_flags & PVF_WIRED) 1197 --pmap->pm_stats.wired_count; 1198 } 1199 if (TAILQ_EMPTY(&m->md.pv_list)) 1200 vm_page_aflag_clear(m, PGA_WRITEABLE); 1201 1202 return(pve); /* return removed pve */ 1203} 1204 1205/* 1206 * 1207 * pmap_modify_pv: Update pv flags 1208 * 1209 * => caller should hold lock on vm_page [so that attrs can be adjusted] 1210 * => caller should NOT adjust pmap's wire_count 1211 * => we return the old flags 1212 * 1213 * Modify a physical-virtual mapping in the pv table 1214 */ 1215static u_int 1216pmap_modify_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va, 1217 u_int clr_mask, u_int set_mask) 1218{ 1219 struct pv_entry *npv; 1220 u_int flags, oflags; 1221 1222 PMAP_ASSERT_LOCKED(pmap); 1223 rw_assert(&pvh_global_lock, RA_WLOCKED); 1224 if ((npv = pmap_find_pv(&m->md, pmap, va)) == NULL) 1225 return (0); 1226 1227 /* 1228 * There is at least one VA mapping this page. 1229 */ 1230 oflags = npv->pv_flags; 1231 npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask; 1232 1233 if ((flags ^ oflags) & PVF_WIRED) { 1234 if (flags & PVF_WIRED) 1235 ++pmap->pm_stats.wired_count; 1236 else 1237 --pmap->pm_stats.wired_count; 1238 } 1239 1240 return (oflags); 1241} 1242 1243/* Function to set the debug level of the pmap code */ 1244#ifdef PMAP_DEBUG 1245void 1246pmap_debug(int level) 1247{ 1248 pmap_debug_level = level; 1249 dprintf("pmap_debug: level=%d\n", pmap_debug_level); 1250} 1251#endif /* PMAP_DEBUG */ 1252 1253void 1254pmap_pinit0(struct pmap *pmap) 1255{ 1256 PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap)); 1257 1258 bcopy(kernel_pmap, pmap, sizeof(*pmap)); 1259 bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx)); 1260 PMAP_LOCK_INIT(pmap); 1261 TAILQ_INIT(&pmap->pm_pvchunk); 1262} 1263 1264/* 1265 * Initialize a vm_page's machine-dependent fields. 1266 */ 1267void 1268pmap_page_init(vm_page_t m) 1269{ 1270 1271 TAILQ_INIT(&m->md.pv_list); 1272 m->md.pv_memattr = VM_MEMATTR_DEFAULT; 1273} 1274 1275static vm_offset_t 1276pmap_ptelist_alloc(vm_offset_t *head) 1277{ 1278 pt_entry_t *pte; 1279 vm_offset_t va; 1280 1281 va = *head; 1282 if (va == 0) 1283 return (va); /* Out of memory */ 1284 pte = vtopte(va); 1285 *head = *pte; 1286 if ((*head & L2_TYPE_MASK) != L2_TYPE_INV) 1287 panic("%s: va is not L2_TYPE_INV!", __func__); 1288 *pte = 0; 1289 return (va); 1290} 1291 1292static void 1293pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 1294{ 1295 pt_entry_t *pte; 1296 1297 if ((va & L2_TYPE_MASK) != L2_TYPE_INV) 1298 panic("%s: freeing va that is not L2_TYPE INV!", __func__); 1299 pte = vtopte(va); 1300 *pte = *head; /* virtual! L2_TYPE is L2_TYPE_INV though */ 1301 *head = va; 1302} 1303 1304static void 1305pmap_ptelist_init(vm_offset_t *head, void *base, int npages) 1306{ 1307 int i; 1308 vm_offset_t va; 1309 1310 *head = 0; 1311 for (i = npages - 1; i >= 0; i--) { 1312 va = (vm_offset_t)base + i * PAGE_SIZE; 1313 pmap_ptelist_free(head, va); 1314 } 1315} 1316 1317/* 1318 * Initialize the pmap module. 1319 * Called by vm_init, to initialize any structures that the pmap 1320 * system needs to map virtual memory. 1321 */ 1322void 1323pmap_init(void) 1324{ 1325 vm_size_t s; 1326 int i, pv_npg; 1327 1328 l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor, 1329 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1330 l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL, 1331 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1332 1333 /* 1334 * Are large page mappings supported and enabled? 1335 */ 1336 TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); 1337 if (sp_enabled) { 1338 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 1339 ("pmap_init: can't assign to pagesizes[1]")); 1340 pagesizes[1] = NBPDR; 1341 } 1342 1343 /* 1344 * Calculate the size of the pv head table for superpages. 1345 */ 1346 for (i = 0; phys_avail[i + 1]; i += 2); 1347 pv_npg = round_1mpage(phys_avail[(i - 2) + 1]) / NBPDR; 1348 1349 /* 1350 * Allocate memory for the pv head table for superpages. 1351 */ 1352 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 1353 s = round_page(s); 1354 pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 1355 M_WAITOK | M_ZERO); 1356 for (i = 0; i < pv_npg; i++) 1357 TAILQ_INIT(&pv_table[i].pv_list); 1358 1359 /* 1360 * Initialize the address space for the pv chunks. 1361 */ 1362 1363 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1364 pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 1365 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 1366 pv_entry_max = roundup(pv_entry_max, _NPCPV); 1367 pv_entry_high_water = 9 * (pv_entry_max / 10); 1368 1369 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 1370 pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); 1371 1372 if (pv_chunkbase == NULL) 1373 panic("pmap_init: not enough kvm for pv chunks"); 1374 1375 pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 1376 1377 /* 1378 * Now it is safe to enable pv_table recording. 1379 */ 1380 PDEBUG(1, printf("pmap_init: done!\n")); 1381} 1382 1383SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 1384 "Max number of PV entries"); 1385SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 1386 "Page share factor per proc"); 1387 1388static SYSCTL_NODE(_vm_pmap, OID_AUTO, section, CTLFLAG_RD, 0, 1389 "1MB page mapping counters"); 1390 1391static u_long pmap_section_demotions; 1392SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, demotions, CTLFLAG_RD, 1393 &pmap_section_demotions, 0, "1MB page demotions"); 1394 1395static u_long pmap_section_mappings; 1396SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, mappings, CTLFLAG_RD, 1397 &pmap_section_mappings, 0, "1MB page mappings"); 1398 1399static u_long pmap_section_p_failures; 1400SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, p_failures, CTLFLAG_RD, 1401 &pmap_section_p_failures, 0, "1MB page promotion failures"); 1402 1403static u_long pmap_section_promotions; 1404SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, promotions, CTLFLAG_RD, 1405 &pmap_section_promotions, 0, "1MB page promotions"); 1406 1407int 1408pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype, int user) 1409{ 1410 struct l2_dtable *l2; 1411 struct l2_bucket *l2b; 1412 pd_entry_t *pl1pd, l1pd; 1413 pt_entry_t *ptep, pte; 1414 vm_paddr_t pa; 1415 u_int l1idx; 1416 int rv = 0; 1417 1418 l1idx = L1_IDX(va); 1419 rw_wlock(&pvh_global_lock); 1420 PMAP_LOCK(pmap); 1421 /* 1422 * Check and possibly fix-up L1 section mapping 1423 * only when superpage mappings are enabled to speed up. 1424 */ 1425 if (sp_enabled) { 1426 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 1427 l1pd = *pl1pd; 1428 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 1429 /* Catch an access to the vectors section */ 1430 if (l1idx == L1_IDX(vector_page)) 1431 goto out; 1432 /* 1433 * Stay away from the kernel mappings. 1434 * None of them should fault from L1 entry. 1435 */ 1436 if (pmap == pmap_kernel()) 1437 goto out; 1438 /* 1439 * Catch a forbidden userland access 1440 */ 1441 if (user && !(l1pd & L1_S_PROT_U)) 1442 goto out; 1443 /* 1444 * Superpage is always either mapped read only 1445 * or it is modified and permitted to be written 1446 * by default. Therefore, process only reference 1447 * flag fault and demote page in case of write fault. 1448 */ 1449 if ((ftype & VM_PROT_WRITE) && !L1_S_WRITABLE(l1pd) && 1450 L1_S_REFERENCED(l1pd)) { 1451 (void)pmap_demote_section(pmap, va); 1452 goto out; 1453 } else if (!L1_S_REFERENCED(l1pd)) { 1454 /* Mark the page "referenced" */ 1455 *pl1pd = l1pd | L1_S_REF; 1456 PTE_SYNC(pl1pd); 1457 goto l1_section_out; 1458 } else 1459 goto out; 1460 } 1461 } 1462 /* 1463 * If there is no l2_dtable for this address, then the process 1464 * has no business accessing it. 1465 * 1466 * Note: This will catch userland processes trying to access 1467 * kernel addresses. 1468 */ 1469 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 1470 if (l2 == NULL) 1471 goto out; 1472 1473 /* 1474 * Likewise if there is no L2 descriptor table 1475 */ 1476 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 1477 if (l2b->l2b_kva == NULL) 1478 goto out; 1479 1480 /* 1481 * Check the PTE itself. 1482 */ 1483 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1484 pte = *ptep; 1485 if (pte == 0) 1486 goto out; 1487 1488 /* 1489 * Catch a userland access to the vector page mapped at 0x0 1490 */ 1491 if (user && !(pte & L2_S_PROT_U)) 1492 goto out; 1493 if (va == vector_page) 1494 goto out; 1495 1496 pa = l2pte_pa(pte); 1497 CTR5(KTR_PMAP, "pmap_fault_fix: pmap:%p va:%x pte:0x%x ftype:%x user:%x", 1498 pmap, va, pte, ftype, user); 1499 if ((ftype & VM_PROT_WRITE) && !(L2_S_WRITABLE(pte)) && 1500 L2_S_REFERENCED(pte)) { 1501 /* 1502 * This looks like a good candidate for "page modified" 1503 * emulation... 1504 */ 1505 struct pv_entry *pv; 1506 struct vm_page *m; 1507 1508 /* Extract the physical address of the page */ 1509 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) { 1510 goto out; 1511 } 1512 /* Get the current flags for this page. */ 1513 1514 pv = pmap_find_pv(&m->md, pmap, va); 1515 if (pv == NULL) { 1516 goto out; 1517 } 1518 1519 /* 1520 * Do the flags say this page is writable? If not then it 1521 * is a genuine write fault. If yes then the write fault is 1522 * our fault as we did not reflect the write access in the 1523 * PTE. Now we know a write has occurred we can correct this 1524 * and also set the modified bit 1525 */ 1526 if ((pv->pv_flags & PVF_WRITE) == 0) { 1527 goto out; 1528 } 1529 1530 vm_page_dirty(m); 1531 1532 /* Re-enable write permissions for the page */ 1533 *ptep = (pte & ~L2_APX); 1534 PTE_SYNC(ptep); 1535 rv = 1; 1536 CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", *ptep); 1537 } else if (!L2_S_REFERENCED(pte)) { 1538 /* 1539 * This looks like a good candidate for "page referenced" 1540 * emulation. 1541 */ 1542 struct pv_entry *pv; 1543 struct vm_page *m; 1544 1545 /* Extract the physical address of the page */ 1546 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) 1547 goto out; 1548 /* Get the current flags for this page. */ 1549 pv = pmap_find_pv(&m->md, pmap, va); 1550 if (pv == NULL) 1551 goto out; 1552 1553 vm_page_aflag_set(m, PGA_REFERENCED); 1554 1555 /* Mark the page "referenced" */ 1556 *ptep = pte | L2_S_REF; 1557 PTE_SYNC(ptep); 1558 rv = 1; 1559 CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", *ptep); 1560 } 1561 1562 /* 1563 * We know there is a valid mapping here, so simply 1564 * fix up the L1 if necessary. 1565 */ 1566 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 1567 l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 1568 if (*pl1pd != l1pd) { 1569 *pl1pd = l1pd; 1570 PTE_SYNC(pl1pd); 1571 rv = 1; 1572 } 1573 1574#ifdef DEBUG 1575 /* 1576 * If 'rv == 0' at this point, it generally indicates that there is a 1577 * stale TLB entry for the faulting address. This happens when two or 1578 * more processes are sharing an L1. Since we don't flush the TLB on 1579 * a context switch between such processes, we can take domain faults 1580 * for mappings which exist at the same VA in both processes. EVEN IF 1581 * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for 1582 * example. 1583 * 1584 * This is extremely likely to happen if pmap_enter() updated the L1 1585 * entry for a recently entered mapping. In this case, the TLB is 1586 * flushed for the new mapping, but there may still be TLB entries for 1587 * other mappings belonging to other processes in the 1MB range 1588 * covered by the L1 entry. 1589 * 1590 * Since 'rv == 0', we know that the L1 already contains the correct 1591 * value, so the fault must be due to a stale TLB entry. 1592 * 1593 * Since we always need to flush the TLB anyway in the case where we 1594 * fixed up the L1, or frobbed the L2 PTE, we effectively deal with 1595 * stale TLB entries dynamically. 1596 * 1597 * However, the above condition can ONLY happen if the current L1 is 1598 * being shared. If it happens when the L1 is unshared, it indicates 1599 * that other parts of the pmap are not doing their job WRT managing 1600 * the TLB. 1601 */ 1602 if (rv == 0 && pmap->pm_l1->l1_domain_use_count == 1) { 1603 printf("fixup: pmap %p, va 0x%08x, ftype %d - nothing to do!\n", 1604 pmap, va, ftype); 1605 printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n", 1606 l2, l2b, ptep, pl1pd); 1607 printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n", 1608 pte, l1pd, last_fault_code); 1609#ifdef DDB 1610 Debugger(); 1611#endif 1612 } 1613#endif 1614 1615l1_section_out: 1616 cpu_tlb_flushID_SE(va); 1617 cpu_cpwait(); 1618 1619 rv = 1; 1620 1621out: 1622 rw_wunlock(&pvh_global_lock); 1623 PMAP_UNLOCK(pmap); 1624 return (rv); 1625} 1626 1627void 1628pmap_postinit(void) 1629{ 1630 struct l2_bucket *l2b; 1631 struct l1_ttable *l1; 1632 pd_entry_t *pl1pt; 1633 pt_entry_t *ptep, pte; 1634 vm_offset_t va, eva; 1635 u_int loop, needed; 1636 1637 needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0); 1638 needed -= 1; 1639 l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK); 1640 1641 for (loop = 0; loop < needed; loop++, l1++) { 1642 /* Allocate a L1 page table */ 1643 va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0, 1644 0xffffffff, L1_TABLE_SIZE, 0); 1645 1646 if (va == 0) 1647 panic("Cannot allocate L1 KVM"); 1648 1649 eva = va + L1_TABLE_SIZE; 1650 pl1pt = (pd_entry_t *)va; 1651 1652 while (va < eva) { 1653 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 1654 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1655 pte = *ptep; 1656 pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; 1657 *ptep = pte; 1658 PTE_SYNC(ptep); 1659 cpu_tlb_flushID_SE(va); 1660 cpu_cpwait(); 1661 va += PAGE_SIZE; 1662 } 1663 pmap_init_l1(l1, pl1pt); 1664 } 1665#ifdef DEBUG 1666 printf("pmap_postinit: Allocated %d static L1 descriptor tables\n", 1667 needed); 1668#endif 1669} 1670 1671/* 1672 * This is used to stuff certain critical values into the PCB where they 1673 * can be accessed quickly from cpu_switch() et al. 1674 */ 1675void 1676pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) 1677{ 1678 struct l2_bucket *l2b; 1679 1680 pcb->pcb_pagedir = pmap->pm_l1->l1_physaddr; 1681 pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | 1682 (DOMAIN_CLIENT << (pmap->pm_domain * 2)); 1683 1684 if (vector_page < KERNBASE) { 1685 pcb->pcb_pl1vec = &pmap->pm_l1->l1_kva[L1_IDX(vector_page)]; 1686 l2b = pmap_get_l2_bucket(pmap, vector_page); 1687 pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO | 1688 L1_C_DOM(pmap->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL); 1689 } else 1690 pcb->pcb_pl1vec = NULL; 1691} 1692 1693void 1694pmap_activate(struct thread *td) 1695{ 1696 pmap_t pmap; 1697 struct pcb *pcb; 1698 1699 pmap = vmspace_pmap(td->td_proc->p_vmspace); 1700 pcb = td->td_pcb; 1701 1702 critical_enter(); 1703 pmap_set_pcb_pagedir(pmap, pcb); 1704 1705 if (td == curthread) { 1706 u_int cur_dacr, cur_ttb; 1707 1708 __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb)); 1709 __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr)); 1710 1711 cur_ttb &= ~(L1_TABLE_SIZE - 1); 1712 1713 if (cur_ttb == (u_int)pcb->pcb_pagedir && 1714 cur_dacr == pcb->pcb_dacr) { 1715 /* 1716 * No need to switch address spaces. 1717 */ 1718 critical_exit(); 1719 return; 1720 } 1721 1722 1723 /* 1724 * We MUST, I repeat, MUST fix up the L1 entry corresponding 1725 * to 'vector_page' in the incoming L1 table before switching 1726 * to it otherwise subsequent interrupts/exceptions (including 1727 * domain faults!) will jump into hyperspace. 1728 */ 1729 if (pcb->pcb_pl1vec) { 1730 *pcb->pcb_pl1vec = pcb->pcb_l1vec; 1731 } 1732 1733 cpu_domains(pcb->pcb_dacr); 1734 cpu_setttb(pcb->pcb_pagedir); 1735 } 1736 critical_exit(); 1737} 1738 1739static int 1740pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va) 1741{ 1742 pd_entry_t *pdep, pde; 1743 pt_entry_t *ptep, pte; 1744 vm_offset_t pa; 1745 int rv = 0; 1746 1747 /* 1748 * Make sure the descriptor itself has the correct cache mode 1749 */ 1750 pdep = &kl1[L1_IDX(va)]; 1751 pde = *pdep; 1752 1753 if (l1pte_section_p(pde)) { 1754 if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) { 1755 *pdep = (pde & ~L1_S_CACHE_MASK) | 1756 pte_l1_s_cache_mode_pt; 1757 PTE_SYNC(pdep); 1758 rv = 1; 1759 } 1760 } else { 1761 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); 1762 ptep = (pt_entry_t *)kernel_pt_lookup(pa); 1763 if (ptep == NULL) 1764 panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep); 1765 1766 ptep = &ptep[l2pte_index(va)]; 1767 pte = *ptep; 1768 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { 1769 *ptep = (pte & ~L2_S_CACHE_MASK) | 1770 pte_l2_s_cache_mode_pt; 1771 PTE_SYNC(ptep); 1772 rv = 1; 1773 } 1774 } 1775 1776 return (rv); 1777} 1778 1779static void 1780pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap, 1781 pt_entry_t **ptep) 1782{ 1783 vm_offset_t va = *availp; 1784 struct l2_bucket *l2b; 1785 1786 if (ptep) { 1787 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 1788 if (l2b == NULL) 1789 panic("pmap_alloc_specials: no l2b for 0x%x", va); 1790 1791 *ptep = &l2b->l2b_kva[l2pte_index(va)]; 1792 } 1793 1794 *vap = va; 1795 *availp = va + (PAGE_SIZE * pages); 1796} 1797 1798/* 1799 * Bootstrap the system enough to run with virtual memory. 1800 * 1801 * On the arm this is called after mapping has already been enabled 1802 * and just syncs the pmap module with what has already been done. 1803 * [We can't call it easily with mapping off since the kernel is not 1804 * mapped with PA == VA, hence we would have to relocate every address 1805 * from the linked base (virtual) address "KERNBASE" to the actual 1806 * (physical) address starting relative to 0] 1807 */ 1808#define PMAP_STATIC_L2_SIZE 16 1809 1810void 1811pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt) 1812{ 1813 static struct l1_ttable static_l1; 1814 static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE]; 1815 struct l1_ttable *l1 = &static_l1; 1816 struct l2_dtable *l2; 1817 struct l2_bucket *l2b; 1818 struct czpages *czp; 1819 pd_entry_t pde; 1820 pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va; 1821 pt_entry_t *ptep; 1822 vm_paddr_t pa; 1823 vm_offset_t va; 1824 vm_size_t size; 1825 int i, l1idx, l2idx, l2next = 0; 1826 1827 PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n", 1828 firstaddr, vm_max_kernel_address)); 1829 1830 virtual_avail = firstaddr; 1831 kernel_pmap->pm_l1 = l1; 1832 kernel_l1pa = l1pt->pv_pa; 1833 1834 /* 1835 * Scan the L1 translation table created by initarm() and create 1836 * the required metadata for all valid mappings found in it. 1837 */ 1838 for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) { 1839 pde = kernel_l1pt[l1idx]; 1840 1841 /* 1842 * We're only interested in Coarse mappings. 1843 * pmap_extract() can deal with section mappings without 1844 * recourse to checking L2 metadata. 1845 */ 1846 if ((pde & L1_TYPE_MASK) != L1_TYPE_C) 1847 continue; 1848 1849 /* 1850 * Lookup the KVA of this L2 descriptor table 1851 */ 1852 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); 1853 ptep = (pt_entry_t *)kernel_pt_lookup(pa); 1854 1855 if (ptep == NULL) { 1856 panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx", 1857 (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa); 1858 } 1859 1860 /* 1861 * Fetch the associated L2 metadata structure. 1862 * Allocate a new one if necessary. 1863 */ 1864 if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 1865 if (l2next == PMAP_STATIC_L2_SIZE) 1866 panic("pmap_bootstrap: out of static L2s"); 1867 kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 = 1868 &static_l2[l2next++]; 1869 } 1870 1871 /* 1872 * One more L1 slot tracked... 1873 */ 1874 l2->l2_occupancy++; 1875 1876 /* 1877 * Fill in the details of the L2 descriptor in the 1878 * appropriate bucket. 1879 */ 1880 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 1881 l2b->l2b_kva = ptep; 1882 l2b->l2b_phys = pa; 1883 l2b->l2b_l1idx = l1idx; 1884 1885 /* 1886 * Establish an initial occupancy count for this descriptor 1887 */ 1888 for (l2idx = 0; 1889 l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)); 1890 l2idx++) { 1891 if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) { 1892 l2b->l2b_occupancy++; 1893 } 1894 } 1895 1896 /* 1897 * Make sure the descriptor itself has the correct cache mode. 1898 * If not, fix it, but whine about the problem. Port-meisters 1899 * should consider this a clue to fix up their initarm() 1900 * function. :) 1901 */ 1902 if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) { 1903 printf("pmap_bootstrap: WARNING! wrong cache mode for " 1904 "L2 pte @ %p\n", ptep); 1905 } 1906 } 1907 1908 1909 /* 1910 * Ensure the primary (kernel) L1 has the correct cache mode for 1911 * a page table. Bitch if it is not correctly set. 1912 */ 1913 for (va = (vm_offset_t)kernel_l1pt; 1914 va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) { 1915 if (pmap_set_pt_cache_mode(kernel_l1pt, va)) 1916 printf("pmap_bootstrap: WARNING! wrong cache mode for " 1917 "primary L1 @ 0x%x\n", va); 1918 } 1919 1920 cpu_dcache_wbinv_all(); 1921 cpu_l2cache_wbinv_all(); 1922 cpu_tlb_flushID(); 1923 cpu_cpwait(); 1924 1925 PMAP_LOCK_INIT(kernel_pmap); 1926 CPU_FILL(&kernel_pmap->pm_active); 1927 kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; 1928 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 1929 1930 /* 1931 * Initialize the global pv list lock. 1932 */ 1933 rw_init(&pvh_global_lock, "pmap pv global"); 1934 1935 /* 1936 * Reserve some special page table entries/VA space for temporary 1937 * mapping of pages that are being copied or zeroed. 1938 */ 1939 for (czp = cpu_czpages, i = 0; i < MAXCPU; ++i, ++czp) { 1940 mtx_init(&czp->lock, "czpages", NULL, MTX_DEF); 1941 pmap_alloc_specials(&virtual_avail, 1, &czp->srcva, &czp->srcptep); 1942 pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->srcptep); 1943 pmap_alloc_specials(&virtual_avail, 1, &czp->dstva, &czp->dstptep); 1944 pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->dstptep); 1945 } 1946 1947 size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) / 1948 L1_S_SIZE; 1949 pmap_alloc_specials(&virtual_avail, 1950 round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE, 1951 &pmap_kernel_l2ptp_kva, NULL); 1952 1953 size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE; 1954 pmap_alloc_specials(&virtual_avail, 1955 round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE, 1956 &pmap_kernel_l2dtable_kva, NULL); 1957 1958 pmap_alloc_specials(&virtual_avail, 1959 1, (vm_offset_t*)&_tmppt, NULL); 1960 pmap_alloc_specials(&virtual_avail, 1961 MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL); 1962 SLIST_INIT(&l1_list); 1963 TAILQ_INIT(&l1_lru_list); 1964 mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF); 1965 pmap_init_l1(l1, kernel_l1pt); 1966 cpu_dcache_wbinv_all(); 1967 cpu_l2cache_wbinv_all(); 1968 cpu_tlb_flushID(); 1969 cpu_cpwait(); 1970 1971 virtual_avail = round_page(virtual_avail); 1972 virtual_end = vm_max_kernel_address; 1973 kernel_vm_end = pmap_curmaxkvaddr; 1974 1975 pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb); 1976} 1977 1978/*************************************************** 1979 * Pmap allocation/deallocation routines. 1980 ***************************************************/ 1981 1982/* 1983 * Release any resources held by the given physical map. 1984 * Called when a pmap initialized by pmap_pinit is being released. 1985 * Should only be called if the map contains no valid mappings. 1986 */ 1987void 1988pmap_release(pmap_t pmap) 1989{ 1990 struct pcb *pcb; 1991 1992 cpu_tlb_flushID(); 1993 cpu_cpwait(); 1994 if (vector_page < KERNBASE) { 1995 struct pcb *curpcb = PCPU_GET(curpcb); 1996 pcb = thread0.td_pcb; 1997 if (pmap_is_current(pmap)) { 1998 /* 1999 * Frob the L1 entry corresponding to the vector 2000 * page so that it contains the kernel pmap's domain 2001 * number. This will ensure pmap_remove() does not 2002 * pull the current vector page out from under us. 2003 */ 2004 critical_enter(); 2005 *pcb->pcb_pl1vec = pcb->pcb_l1vec; 2006 cpu_domains(pcb->pcb_dacr); 2007 cpu_setttb(pcb->pcb_pagedir); 2008 critical_exit(); 2009 } 2010 pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE); 2011 /* 2012 * Make sure cpu_switch(), et al, DTRT. This is safe to do 2013 * since this process has no remaining mappings of its own. 2014 */ 2015 curpcb->pcb_pl1vec = pcb->pcb_pl1vec; 2016 curpcb->pcb_l1vec = pcb->pcb_l1vec; 2017 curpcb->pcb_dacr = pcb->pcb_dacr; 2018 curpcb->pcb_pagedir = pcb->pcb_pagedir; 2019 2020 } 2021 pmap_free_l1(pmap); 2022 2023 dprintf("pmap_release()\n"); 2024} 2025 2026 2027 2028/* 2029 * Helper function for pmap_grow_l2_bucket() 2030 */ 2031static __inline int 2032pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap) 2033{ 2034 struct l2_bucket *l2b; 2035 pt_entry_t *ptep; 2036 vm_paddr_t pa; 2037 struct vm_page *m; 2038 2039 m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 2040 if (m == NULL) 2041 return (1); 2042 pa = VM_PAGE_TO_PHYS(m); 2043 2044 if (pap) 2045 *pap = pa; 2046 2047 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2048 2049 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2050 *ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF; 2051 pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0); 2052 PTE_SYNC(ptep); 2053 cpu_tlb_flushD_SE(va); 2054 cpu_cpwait(); 2055 2056 return (0); 2057} 2058 2059/* 2060 * This is the same as pmap_alloc_l2_bucket(), except that it is only 2061 * used by pmap_growkernel(). 2062 */ 2063static __inline struct l2_bucket * 2064pmap_grow_l2_bucket(pmap_t pmap, vm_offset_t va) 2065{ 2066 struct l2_dtable *l2; 2067 struct l2_bucket *l2b; 2068 struct l1_ttable *l1; 2069 pd_entry_t *pl1pd; 2070 u_short l1idx; 2071 vm_offset_t nva; 2072 2073 l1idx = L1_IDX(va); 2074 2075 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 2076 /* 2077 * No mapping at this address, as there is 2078 * no entry in the L1 table. 2079 * Need to allocate a new l2_dtable. 2080 */ 2081 nva = pmap_kernel_l2dtable_kva; 2082 if ((nva & PAGE_MASK) == 0) { 2083 /* 2084 * Need to allocate a backing page 2085 */ 2086 if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) 2087 return (NULL); 2088 } 2089 2090 l2 = (struct l2_dtable *)nva; 2091 nva += sizeof(struct l2_dtable); 2092 2093 if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva & 2094 PAGE_MASK)) { 2095 /* 2096 * The new l2_dtable straddles a page boundary. 2097 * Map in another page to cover it. 2098 */ 2099 if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) 2100 return (NULL); 2101 } 2102 2103 pmap_kernel_l2dtable_kva = nva; 2104 2105 /* 2106 * Link it into the parent pmap 2107 */ 2108 pmap->pm_l2[L2_IDX(l1idx)] = l2; 2109 memset(l2, 0, sizeof(*l2)); 2110 } 2111 2112 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 2113 2114 /* 2115 * Fetch pointer to the L2 page table associated with the address. 2116 */ 2117 if (l2b->l2b_kva == NULL) { 2118 pt_entry_t *ptep; 2119 2120 /* 2121 * No L2 page table has been allocated. Chances are, this 2122 * is because we just allocated the l2_dtable, above. 2123 */ 2124 nva = pmap_kernel_l2ptp_kva; 2125 ptep = (pt_entry_t *)nva; 2126 if ((nva & PAGE_MASK) == 0) { 2127 /* 2128 * Need to allocate a backing page 2129 */ 2130 if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt, 2131 &pmap_kernel_l2ptp_phys)) 2132 return (NULL); 2133 } 2134 memset(ptep, 0, L2_TABLE_SIZE_REAL); 2135 l2->l2_occupancy++; 2136 l2b->l2b_kva = ptep; 2137 l2b->l2b_l1idx = l1idx; 2138 l2b->l2b_phys = pmap_kernel_l2ptp_phys; 2139 2140 pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL; 2141 pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL; 2142 } 2143 2144 /* Distribute new L1 entry to all other L1s */ 2145 SLIST_FOREACH(l1, &l1_list, l1_link) { 2146 pl1pd = &l1->l1_kva[L1_IDX(va)]; 2147 *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) | 2148 L1_C_PROTO; 2149 PTE_SYNC(pl1pd); 2150 } 2151 cpu_tlb_flushID_SE(va); 2152 cpu_cpwait(); 2153 2154 return (l2b); 2155} 2156 2157 2158/* 2159 * grow the number of kernel page table entries, if needed 2160 */ 2161void 2162pmap_growkernel(vm_offset_t addr) 2163{ 2164 pmap_t kpmap = pmap_kernel(); 2165 2166 if (addr <= pmap_curmaxkvaddr) 2167 return; /* we are OK */ 2168 2169 /* 2170 * whoops! we need to add kernel PTPs 2171 */ 2172 2173 /* Map 1MB at a time */ 2174 for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE) 2175 pmap_grow_l2_bucket(kpmap, pmap_curmaxkvaddr); 2176 2177 kernel_vm_end = pmap_curmaxkvaddr; 2178} 2179 2180/* 2181 * Returns TRUE if the given page is mapped individually or as part of 2182 * a 1MB section. Otherwise, returns FALSE. 2183 */ 2184boolean_t 2185pmap_page_is_mapped(vm_page_t m) 2186{ 2187 boolean_t rv; 2188 2189 if ((m->oflags & VPO_UNMANAGED) != 0) 2190 return (FALSE); 2191 rw_wlock(&pvh_global_lock); 2192 rv = !TAILQ_EMPTY(&m->md.pv_list) || 2193 ((m->flags & PG_FICTITIOUS) == 0 && 2194 !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); 2195 rw_wunlock(&pvh_global_lock); 2196 return (rv); 2197} 2198 2199/* 2200 * Remove all pages from specified address space 2201 * this aids process exit speeds. Also, this code 2202 * is special cased for current process only, but 2203 * can have the more generic (and slightly slower) 2204 * mode enabled. This is much faster than pmap_remove 2205 * in the case of running down an entire address space. 2206 */ 2207void 2208pmap_remove_pages(pmap_t pmap) 2209{ 2210 struct pv_entry *pv; 2211 struct l2_bucket *l2b = NULL; 2212 struct pv_chunk *pc, *npc; 2213 struct md_page *pvh; 2214 pd_entry_t *pl1pd, l1pd; 2215 pt_entry_t *ptep; 2216 vm_page_t m, mt; 2217 vm_offset_t va; 2218 uint32_t inuse, bitmask; 2219 int allfree, bit, field, idx; 2220 2221 rw_wlock(&pvh_global_lock); 2222 PMAP_LOCK(pmap); 2223 2224 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2225 allfree = 1; 2226 for (field = 0; field < _NPCM; field++) { 2227 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2228 while (inuse != 0) { 2229 bit = ffs(inuse) - 1; 2230 bitmask = 1ul << bit; 2231 idx = field * sizeof(inuse) * NBBY + bit; 2232 pv = &pc->pc_pventry[idx]; 2233 va = pv->pv_va; 2234 inuse &= ~bitmask; 2235 if (pv->pv_flags & PVF_WIRED) { 2236 /* Cannot remove wired pages now. */ 2237 allfree = 0; 2238 continue; 2239 } 2240 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 2241 l1pd = *pl1pd; 2242 l2b = pmap_get_l2_bucket(pmap, va); 2243 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 2244 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 2245 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2246 if (TAILQ_EMPTY(&pvh->pv_list)) { 2247 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 2248 KASSERT((vm_offset_t)m >= KERNBASE, 2249 ("Trying to access non-existent page " 2250 "va %x l1pd %x", trunc_1mpage(va), l1pd)); 2251 for (mt = m; mt < &m[L2_PTE_NUM_TOTAL]; mt++) { 2252 if (TAILQ_EMPTY(&mt->md.pv_list)) 2253 vm_page_aflag_clear(mt, PGA_WRITEABLE); 2254 } 2255 } 2256 if (l2b != NULL) { 2257 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, 2258 ("pmap_remove_pages: l2_bucket occupancy error")); 2259 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); 2260 } 2261 pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; 2262 *pl1pd = 0; 2263 PTE_SYNC(pl1pd); 2264 } else { 2265 KASSERT(l2b != NULL, 2266 ("No L2 bucket in pmap_remove_pages")); 2267 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2268 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 2269 KASSERT((vm_offset_t)m >= KERNBASE, 2270 ("Trying to access non-existent page " 2271 "va %x pte %x", va, *ptep)); 2272 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2273 if (TAILQ_EMPTY(&m->md.pv_list) && 2274 (m->flags & PG_FICTITIOUS) == 0) { 2275 pvh = pa_to_pvh(l2pte_pa(*ptep)); 2276 if (TAILQ_EMPTY(&pvh->pv_list)) 2277 vm_page_aflag_clear(m, PGA_WRITEABLE); 2278 } 2279 *ptep = 0; 2280 PTE_SYNC(ptep); 2281 pmap_free_l2_bucket(pmap, l2b, 1); 2282 pmap->pm_stats.resident_count--; 2283 } 2284 2285 /* Mark free */ 2286 PV_STAT(pv_entry_frees++); 2287 PV_STAT(pv_entry_spare++); 2288 pv_entry_count--; 2289 pc->pc_map[field] |= bitmask; 2290 } 2291 } 2292 if (allfree) { 2293 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2294 pmap_free_pv_chunk(pc); 2295 } 2296 2297 } 2298 2299 rw_wunlock(&pvh_global_lock); 2300 cpu_tlb_flushID(); 2301 cpu_cpwait(); 2302 PMAP_UNLOCK(pmap); 2303} 2304 2305 2306/*************************************************** 2307 * Low level mapping routines..... 2308 ***************************************************/ 2309 2310#ifdef ARM_HAVE_SUPERSECTIONS 2311/* Map a super section into the KVA. */ 2312 2313void 2314pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags) 2315{ 2316 pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) | 2317 (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL, 2318 VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | 2319 L1_S_DOM(PMAP_DOMAIN_KERNEL); 2320 struct l1_ttable *l1; 2321 vm_offset_t va0, va_end; 2322 2323 KASSERT(((va | pa) & L1_SUP_OFFSET) == 0, 2324 ("Not a valid super section mapping")); 2325 if (flags & SECTION_CACHE) 2326 pd |= pte_l1_s_cache_mode; 2327 else if (flags & SECTION_PT) 2328 pd |= pte_l1_s_cache_mode_pt; 2329 2330 va0 = va & L1_SUP_FRAME; 2331 va_end = va + L1_SUP_SIZE; 2332 SLIST_FOREACH(l1, &l1_list, l1_link) { 2333 va = va0; 2334 for (; va < va_end; va += L1_S_SIZE) { 2335 l1->l1_kva[L1_IDX(va)] = pd; 2336 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); 2337 } 2338 } 2339} 2340#endif 2341 2342/* Map a section into the KVA. */ 2343 2344void 2345pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags) 2346{ 2347 pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL, 2348 VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | L1_S_REF | 2349 L1_S_DOM(PMAP_DOMAIN_KERNEL); 2350 struct l1_ttable *l1; 2351 2352 KASSERT(((va | pa) & L1_S_OFFSET) == 0, 2353 ("Not a valid section mapping")); 2354 if (flags & SECTION_CACHE) 2355 pd |= pte_l1_s_cache_mode; 2356 else if (flags & SECTION_PT) 2357 pd |= pte_l1_s_cache_mode_pt; 2358 2359 SLIST_FOREACH(l1, &l1_list, l1_link) { 2360 l1->l1_kva[L1_IDX(va)] = pd; 2361 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); 2362 } 2363 cpu_tlb_flushID_SE(va); 2364 cpu_cpwait(); 2365} 2366 2367/* 2368 * Make a temporary mapping for a physical address. This is only intended 2369 * to be used for panic dumps. 2370 */ 2371void * 2372pmap_kenter_temp(vm_paddr_t pa, int i) 2373{ 2374 vm_offset_t va; 2375 2376 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2377 pmap_kenter(va, pa); 2378 return ((void *)crashdumpmap); 2379} 2380 2381/* 2382 * add a wired page to the kva 2383 * note that in order for the mapping to take effect -- you 2384 * should do a invltlb after doing the pmap_kenter... 2385 */ 2386static PMAP_INLINE void 2387pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags) 2388{ 2389 struct l2_bucket *l2b; 2390 pt_entry_t *ptep; 2391 pt_entry_t opte; 2392 2393 PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n", 2394 (uint32_t) va, (uint32_t) pa)); 2395 2396 2397 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2398 if (l2b == NULL) 2399 l2b = pmap_grow_l2_bucket(pmap_kernel(), va); 2400 KASSERT(l2b != NULL, ("No L2 Bucket")); 2401 2402 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2403 opte = *ptep; 2404 2405 if (flags & KENTER_CACHE) 2406 *ptep = L2_S_PROTO | l2s_mem_types[PTE_CACHE] | pa | L2_S_REF; 2407 else if (flags & KENTER_DEVICE) 2408 *ptep = L2_S_PROTO | l2s_mem_types[PTE_DEVICE] | pa | L2_S_REF; 2409 else 2410 *ptep = L2_S_PROTO | l2s_mem_types[PTE_NOCACHE] | pa | L2_S_REF; 2411 2412 if (flags & KENTER_CACHE) { 2413 pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 2414 flags & KENTER_USER); 2415 } else { 2416 pmap_set_prot(ptep, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 2417 0); 2418 } 2419 2420 PTE_SYNC(ptep); 2421 if (l2pte_valid(opte)) { 2422 if (L2_S_EXECUTABLE(opte) || L2_S_EXECUTABLE(*ptep)) 2423 cpu_tlb_flushID_SE(va); 2424 else 2425 cpu_tlb_flushD_SE(va); 2426 } else { 2427 if (opte == 0) 2428 l2b->l2b_occupancy++; 2429 } 2430 cpu_cpwait(); 2431 2432 PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n", 2433 (uint32_t) ptep, opte, *ptep)); 2434} 2435 2436void 2437pmap_kenter(vm_offset_t va, vm_paddr_t pa) 2438{ 2439 pmap_kenter_internal(va, pa, KENTER_CACHE); 2440} 2441 2442void 2443pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa) 2444{ 2445 2446 pmap_kenter_internal(va, pa, 0); 2447} 2448 2449void 2450pmap_kenter_device(vm_offset_t va, vm_paddr_t pa) 2451{ 2452 2453 pmap_kenter_internal(va, pa, KENTER_DEVICE); 2454} 2455 2456void 2457pmap_kenter_user(vm_offset_t va, vm_paddr_t pa) 2458{ 2459 2460 pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER); 2461 /* 2462 * Call pmap_fault_fixup now, to make sure we'll have no exception 2463 * at the first use of the new address, or bad things will happen, 2464 * as we use one of these addresses in the exception handlers. 2465 */ 2466 pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1); 2467} 2468 2469vm_paddr_t 2470pmap_kextract(vm_offset_t va) 2471{ 2472 2473 if (kernel_vm_end == 0) 2474 return (0); 2475 return (pmap_extract_locked(kernel_pmap, va)); 2476} 2477 2478/* 2479 * remove a page from the kernel pagetables 2480 */ 2481void 2482pmap_kremove(vm_offset_t va) 2483{ 2484 struct l2_bucket *l2b; 2485 pt_entry_t *ptep, opte; 2486 2487 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2488 if (!l2b) 2489 return; 2490 KASSERT(l2b != NULL, ("No L2 Bucket")); 2491 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2492 opte = *ptep; 2493 if (l2pte_valid(opte)) { 2494 va = va & ~PAGE_MASK; 2495 *ptep = 0; 2496 PTE_SYNC(ptep); 2497 if (L2_S_EXECUTABLE(opte)) 2498 cpu_tlb_flushID_SE(va); 2499 else 2500 cpu_tlb_flushD_SE(va); 2501 cpu_cpwait(); 2502 } 2503} 2504 2505 2506/* 2507 * Used to map a range of physical addresses into kernel 2508 * virtual address space. 2509 * 2510 * The value passed in '*virt' is a suggested virtual address for 2511 * the mapping. Architectures which can support a direct-mapped 2512 * physical to virtual region can return the appropriate address 2513 * within that region, leaving '*virt' unchanged. Other 2514 * architectures should map the pages starting at '*virt' and 2515 * update '*virt' with the first usable address after the mapped 2516 * region. 2517 */ 2518vm_offset_t 2519pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 2520{ 2521 vm_offset_t sva = *virt; 2522 vm_offset_t va = sva; 2523 2524 PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, " 2525 "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end, 2526 prot)); 2527 2528 while (start < end) { 2529 pmap_kenter(va, start); 2530 va += PAGE_SIZE; 2531 start += PAGE_SIZE; 2532 } 2533 *virt = va; 2534 return (sva); 2535} 2536 2537/* 2538 * Add a list of wired pages to the kva 2539 * this routine is only used for temporary 2540 * kernel mappings that do not need to have 2541 * page modification or references recorded. 2542 * Note that old mappings are simply written 2543 * over. The page *must* be wired. 2544 */ 2545void 2546pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 2547{ 2548 int i; 2549 2550 for (i = 0; i < count; i++) { 2551 pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]), 2552 KENTER_CACHE); 2553 va += PAGE_SIZE; 2554 } 2555} 2556 2557 2558/* 2559 * this routine jerks page mappings from the 2560 * kernel -- it is meant only for temporary mappings. 2561 */ 2562void 2563pmap_qremove(vm_offset_t va, int count) 2564{ 2565 int i; 2566 2567 for (i = 0; i < count; i++) { 2568 if (vtophys(va)) 2569 pmap_kremove(va); 2570 2571 va += PAGE_SIZE; 2572 } 2573} 2574 2575 2576/* 2577 * pmap_object_init_pt preloads the ptes for a given object 2578 * into the specified pmap. This eliminates the blast of soft 2579 * faults on process startup and immediately after an mmap. 2580 */ 2581void 2582pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2583 vm_pindex_t pindex, vm_size_t size) 2584{ 2585 2586 VM_OBJECT_ASSERT_WLOCKED(object); 2587 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2588 ("pmap_object_init_pt: non-device object")); 2589} 2590 2591 2592/* 2593 * pmap_is_prefaultable: 2594 * 2595 * Return whether or not the specified virtual address is elgible 2596 * for prefault. 2597 */ 2598boolean_t 2599pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2600{ 2601 pd_entry_t *pdep; 2602 pt_entry_t *ptep; 2603 2604 if (!pmap_get_pde_pte(pmap, addr, &pdep, &ptep)) 2605 return (FALSE); 2606 KASSERT((pdep != NULL && (l1pte_section_p(*pdep) || ptep != NULL)), 2607 ("Valid mapping but no pte ?")); 2608 if (*pdep != 0 && !l1pte_section_p(*pdep)) 2609 if (*ptep == 0) 2610 return (TRUE); 2611 return (FALSE); 2612} 2613 2614/* 2615 * Fetch pointers to the PDE/PTE for the given pmap/VA pair. 2616 * Returns TRUE if the mapping exists, else FALSE. 2617 * 2618 * NOTE: This function is only used by a couple of arm-specific modules. 2619 * It is not safe to take any pmap locks here, since we could be right 2620 * in the middle of debugging the pmap anyway... 2621 * 2622 * It is possible for this routine to return FALSE even though a valid 2623 * mapping does exist. This is because we don't lock, so the metadata 2624 * state may be inconsistent. 2625 * 2626 * NOTE: We can return a NULL *ptp in the case where the L1 pde is 2627 * a "section" mapping. 2628 */ 2629boolean_t 2630pmap_get_pde_pte(pmap_t pmap, vm_offset_t va, pd_entry_t **pdp, 2631 pt_entry_t **ptp) 2632{ 2633 struct l2_dtable *l2; 2634 pd_entry_t *pl1pd, l1pd; 2635 pt_entry_t *ptep; 2636 u_short l1idx; 2637 2638 if (pmap->pm_l1 == NULL) 2639 return (FALSE); 2640 2641 l1idx = L1_IDX(va); 2642 *pdp = pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 2643 l1pd = *pl1pd; 2644 2645 if (l1pte_section_p(l1pd)) { 2646 *ptp = NULL; 2647 return (TRUE); 2648 } 2649 2650 if (pmap->pm_l2 == NULL) 2651 return (FALSE); 2652 2653 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 2654 2655 if (l2 == NULL || 2656 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { 2657 return (FALSE); 2658 } 2659 2660 *ptp = &ptep[l2pte_index(va)]; 2661 return (TRUE); 2662} 2663 2664/* 2665 * Routine: pmap_remove_all 2666 * Function: 2667 * Removes this physical page from 2668 * all physical maps in which it resides. 2669 * Reflects back modify bits to the pager. 2670 * 2671 * Notes: 2672 * Original versions of this routine were very 2673 * inefficient because they iteratively called 2674 * pmap_remove (slow...) 2675 */ 2676void 2677pmap_remove_all(vm_page_t m) 2678{ 2679 struct md_page *pvh; 2680 pv_entry_t pv; 2681 pmap_t pmap; 2682 pt_entry_t *ptep; 2683 struct l2_bucket *l2b; 2684 boolean_t flush = FALSE; 2685 pmap_t curpmap; 2686 u_int is_exec = 0; 2687 2688 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2689 ("pmap_remove_all: page %p is not managed", m)); 2690 rw_wlock(&pvh_global_lock); 2691 if ((m->flags & PG_FICTITIOUS) != 0) 2692 goto small_mappings; 2693 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2694 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2695 pmap = PV_PMAP(pv); 2696 PMAP_LOCK(pmap); 2697 pd_entry_t *pl1pd; 2698 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 2699 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, 2700 ("pmap_remove_all: valid section mapping expected")); 2701 (void)pmap_demote_section(pmap, pv->pv_va); 2702 PMAP_UNLOCK(pmap); 2703 } 2704small_mappings: 2705 curpmap = vmspace_pmap(curproc->p_vmspace); 2706 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2707 pmap = PV_PMAP(pv); 2708 if (flush == FALSE && (pmap == curpmap || 2709 pmap == pmap_kernel())) 2710 flush = TRUE; 2711 2712 PMAP_LOCK(pmap); 2713 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 2714 KASSERT(l2b != NULL, ("No l2 bucket")); 2715 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 2716 is_exec |= PTE_BEEN_EXECD(*ptep); 2717 *ptep = 0; 2718 if (pmap_is_current(pmap)) 2719 PTE_SYNC(ptep); 2720 pmap_free_l2_bucket(pmap, l2b, 1); 2721 pmap->pm_stats.resident_count--; 2722 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2723 if (pv->pv_flags & PVF_WIRED) 2724 pmap->pm_stats.wired_count--; 2725 pmap_free_pv_entry(pmap, pv); 2726 PMAP_UNLOCK(pmap); 2727 } 2728 2729 if (flush) { 2730 if (is_exec) 2731 cpu_tlb_flushID(); 2732 else 2733 cpu_tlb_flushD(); 2734 cpu_cpwait(); 2735 } 2736 vm_page_aflag_clear(m, PGA_WRITEABLE); 2737 rw_wunlock(&pvh_global_lock); 2738} 2739 2740int 2741pmap_change_attr(vm_offset_t sva, vm_size_t len, int mode) 2742{ 2743 vm_offset_t base, offset, tmpva; 2744 vm_size_t size; 2745 struct l2_bucket *l2b; 2746 pt_entry_t *ptep, pte; 2747 vm_offset_t next_bucket; 2748 2749 PMAP_LOCK(kernel_pmap); 2750 2751 base = trunc_page(sva); 2752 offset = sva & PAGE_MASK; 2753 size = roundup(offset + len, PAGE_SIZE); 2754 2755#ifdef checkit 2756 /* 2757 * Only supported on kernel virtual addresses, including the direct 2758 * map but excluding the recursive map. 2759 */ 2760 if (base < DMAP_MIN_ADDRESS) { 2761 PMAP_UNLOCK(kernel_pmap); 2762 return (EINVAL); 2763 } 2764#endif 2765 for (tmpva = base; tmpva < base + size; ) { 2766 next_bucket = L2_NEXT_BUCKET(tmpva); 2767 if (next_bucket > base + size) 2768 next_bucket = base + size; 2769 2770 l2b = pmap_get_l2_bucket(kernel_pmap, tmpva); 2771 if (l2b == NULL) { 2772 tmpva = next_bucket; 2773 continue; 2774 } 2775 2776 ptep = &l2b->l2b_kva[l2pte_index(tmpva)]; 2777 2778 if (*ptep == 0) { 2779 PMAP_UNLOCK(kernel_pmap); 2780 return(EINVAL); 2781 } 2782 2783 pte = *ptep &~ L2_S_CACHE_MASK; 2784 cpu_idcache_wbinv_range(tmpva, PAGE_SIZE); 2785 pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE); 2786 *ptep = pte; 2787 cpu_tlb_flushID_SE(tmpva); 2788 cpu_cpwait(); 2789 2790 dprintf("%s: for va:%x ptep:%x pte:%x\n", 2791 __func__, tmpva, (uint32_t)ptep, pte); 2792 tmpva += PAGE_SIZE; 2793 } 2794 2795 PMAP_UNLOCK(kernel_pmap); 2796 2797 return (0); 2798} 2799 2800/* 2801 * Set the physical protection on the 2802 * specified range of this map as requested. 2803 */ 2804void 2805pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2806{ 2807 struct l2_bucket *l2b; 2808 struct md_page *pvh; 2809 struct pv_entry *pve; 2810 pd_entry_t *pl1pd, l1pd; 2811 pt_entry_t *ptep, pte; 2812 vm_offset_t next_bucket; 2813 u_int is_exec, is_refd; 2814 int flush; 2815 2816 if ((prot & VM_PROT_READ) == 0) { 2817 pmap_remove(pmap, sva, eva); 2818 return; 2819 } 2820 2821 if (prot & VM_PROT_WRITE) { 2822 /* 2823 * If this is a read->write transition, just ignore it and let 2824 * vm_fault() take care of it later. 2825 */ 2826 return; 2827 } 2828 2829 rw_wlock(&pvh_global_lock); 2830 PMAP_LOCK(pmap); 2831 2832 /* 2833 * OK, at this point, we know we're doing write-protect operation. 2834 * If the pmap is active, write-back the range. 2835 */ 2836 2837 flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1; 2838 is_exec = is_refd = 0; 2839 2840 while (sva < eva) { 2841 next_bucket = L2_NEXT_BUCKET(sva); 2842 /* 2843 * Check for large page. 2844 */ 2845 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 2846 l1pd = *pl1pd; 2847 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 2848 KASSERT(pmap != pmap_kernel(), 2849 ("pmap_protect: trying to modify " 2850 "kernel section protections")); 2851 /* 2852 * Are we protecting the entire large page? If not, 2853 * demote the mapping and fall through. 2854 */ 2855 if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) && 2856 eva >= L2_NEXT_BUCKET(sva)) { 2857 l1pd &= ~(L1_S_PROT_MASK | L1_S_XN); 2858 if (!(prot & VM_PROT_EXECUTE)) 2859 *pl1pd |= L1_S_XN; 2860 /* 2861 * At this point we are always setting 2862 * write-protect bit. 2863 */ 2864 l1pd |= L1_S_APX; 2865 /* All managed superpages are user pages. */ 2866 l1pd |= L1_S_PROT_U; 2867 *pl1pd = l1pd; 2868 PTE_SYNC(pl1pd); 2869 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 2870 pve = pmap_find_pv(pvh, pmap, 2871 trunc_1mpage(sva)); 2872 pve->pv_flags &= ~PVF_WRITE; 2873 sva = next_bucket; 2874 continue; 2875 } else if (!pmap_demote_section(pmap, sva)) { 2876 /* The large page mapping was destroyed. */ 2877 sva = next_bucket; 2878 continue; 2879 } 2880 } 2881 if (next_bucket > eva) 2882 next_bucket = eva; 2883 l2b = pmap_get_l2_bucket(pmap, sva); 2884 if (l2b == NULL) { 2885 sva = next_bucket; 2886 continue; 2887 } 2888 2889 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 2890 2891 while (sva < next_bucket) { 2892 if ((pte = *ptep) != 0 && L2_S_WRITABLE(pte)) { 2893 struct vm_page *m; 2894 2895 m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); 2896 pmap_set_prot(ptep, prot, 2897 !(pmap == pmap_kernel())); 2898 PTE_SYNC(ptep); 2899 2900 pmap_modify_pv(m, pmap, sva, PVF_WRITE, 0); 2901 2902 if (flush >= 0) { 2903 flush++; 2904 is_exec |= PTE_BEEN_EXECD(pte); 2905 is_refd |= PTE_BEEN_REFD(pte); 2906 } else { 2907 if (PTE_BEEN_EXECD(pte)) 2908 cpu_tlb_flushID_SE(sva); 2909 else if (PTE_BEEN_REFD(pte)) 2910 cpu_tlb_flushD_SE(sva); 2911 } 2912 } 2913 2914 sva += PAGE_SIZE; 2915 ptep++; 2916 } 2917 } 2918 2919 2920 if (flush) { 2921 if (is_exec) 2922 cpu_tlb_flushID(); 2923 else 2924 if (is_refd) 2925 cpu_tlb_flushD(); 2926 cpu_cpwait(); 2927 } 2928 rw_wunlock(&pvh_global_lock); 2929 2930 PMAP_UNLOCK(pmap); 2931} 2932 2933 2934/* 2935 * Insert the given physical page (p) at 2936 * the specified virtual address (v) in the 2937 * target physical map with the protection requested. 2938 * 2939 * If specified, the page will be wired down, meaning 2940 * that the related pte can not be reclaimed. 2941 * 2942 * NB: This is the only routine which MAY NOT lazy-evaluate 2943 * or lose information. That is, this routine must actually 2944 * insert this page into the given map NOW. 2945 */ 2946 2947int 2948pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2949 u_int flags, int8_t psind __unused) 2950{ 2951 struct l2_bucket *l2b; 2952 int rv; 2953 2954 rw_wlock(&pvh_global_lock); 2955 PMAP_LOCK(pmap); 2956 rv = pmap_enter_locked(pmap, va, m, prot, flags); 2957 if (rv == KERN_SUCCESS) { 2958 /* 2959 * If both the l2b_occupancy and the reservation are fully 2960 * populated, then attempt promotion. 2961 */ 2962 l2b = pmap_get_l2_bucket(pmap, va); 2963 if (l2b != NULL && l2b->l2b_occupancy == L2_PTE_NUM_TOTAL && 2964 sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && 2965 vm_reserv_level_iffullpop(m) == 0) 2966 pmap_promote_section(pmap, va); 2967 } 2968 PMAP_UNLOCK(pmap); 2969 rw_wunlock(&pvh_global_lock); 2970 return (rv); 2971} 2972 2973/* 2974 * The pvh global and pmap locks must be held. 2975 */ 2976static int 2977pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2978 u_int flags) 2979{ 2980 struct l2_bucket *l2b = NULL; 2981 struct vm_page *om; 2982 struct pv_entry *pve = NULL; 2983 pd_entry_t *pl1pd, l1pd; 2984 pt_entry_t *ptep, npte, opte; 2985 u_int nflags; 2986 u_int is_exec, is_refd; 2987 vm_paddr_t pa; 2988 u_char user; 2989 2990 PMAP_ASSERT_LOCKED(pmap); 2991 rw_assert(&pvh_global_lock, RA_WLOCKED); 2992 if (va == vector_page) { 2993 pa = systempage.pv_pa; 2994 m = NULL; 2995 } else { 2996 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2997 VM_OBJECT_ASSERT_LOCKED(m->object); 2998 pa = VM_PAGE_TO_PHYS(m); 2999 } 3000 3001 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3002 if ((va < VM_MAXUSER_ADDRESS) && 3003 (*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) { 3004 (void)pmap_demote_section(pmap, va); 3005 } 3006 3007 user = 0; 3008 /* 3009 * Make sure userland mappings get the right permissions 3010 */ 3011 if (pmap != pmap_kernel() && va != vector_page) 3012 user = 1; 3013 3014 nflags = 0; 3015 3016 if (prot & VM_PROT_WRITE) 3017 nflags |= PVF_WRITE; 3018 if ((flags & PMAP_ENTER_WIRED) != 0) 3019 nflags |= PVF_WIRED; 3020 3021 PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, " 3022 "prot = %x, flags = %x\n", (uint32_t) pmap, va, (uint32_t) m, 3023 prot, flags)); 3024 3025 if (pmap == pmap_kernel()) { 3026 l2b = pmap_get_l2_bucket(pmap, va); 3027 if (l2b == NULL) 3028 l2b = pmap_grow_l2_bucket(pmap, va); 3029 } else { 3030do_l2b_alloc: 3031 l2b = pmap_alloc_l2_bucket(pmap, va); 3032 if (l2b == NULL) { 3033 if ((flags & PMAP_ENTER_NOSLEEP) == 0) { 3034 PMAP_UNLOCK(pmap); 3035 rw_wunlock(&pvh_global_lock); 3036 VM_WAIT; 3037 rw_wlock(&pvh_global_lock); 3038 PMAP_LOCK(pmap); 3039 goto do_l2b_alloc; 3040 } 3041 return (KERN_RESOURCE_SHORTAGE); 3042 } 3043 } 3044 3045 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3046 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 3047 panic("pmap_enter: attempt to enter on 1MB page, va: %#x", va); 3048 3049 ptep = &l2b->l2b_kva[l2pte_index(va)]; 3050 3051 opte = *ptep; 3052 npte = pa; 3053 is_exec = is_refd = 0; 3054 3055 if (opte) { 3056 if (l2pte_pa(opte) == pa) { 3057 /* 3058 * We're changing the attrs of an existing mapping. 3059 */ 3060 if (m != NULL) 3061 pmap_modify_pv(m, pmap, va, 3062 PVF_WRITE | PVF_WIRED, nflags); 3063 is_exec |= PTE_BEEN_EXECD(opte); 3064 is_refd |= PTE_BEEN_REFD(opte); 3065 goto validate; 3066 } 3067 if ((om = PHYS_TO_VM_PAGE(l2pte_pa(opte)))) { 3068 /* 3069 * Replacing an existing mapping with a new one. 3070 * It is part of our managed memory so we 3071 * must remove it from the PV list 3072 */ 3073 if ((pve = pmap_remove_pv(om, pmap, va))) { 3074 is_exec |= PTE_BEEN_EXECD(opte); 3075 is_refd |= PTE_BEEN_REFD(opte); 3076 3077 if (m && ((m->oflags & VPO_UNMANAGED))) 3078 pmap_free_pv_entry(pmap, pve); 3079 } 3080 } 3081 3082 } else { 3083 /* 3084 * Keep the stats up to date 3085 */ 3086 l2b->l2b_occupancy++; 3087 pmap->pm_stats.resident_count++; 3088 } 3089 3090 /* 3091 * Enter on the PV list if part of our managed memory. 3092 */ 3093 if ((m && !(m->oflags & VPO_UNMANAGED))) { 3094 if ((!pve) && (pve = pmap_get_pv_entry(pmap, FALSE)) == NULL) 3095 panic("pmap_enter: no pv entries"); 3096 3097 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3098 ("pmap_enter: managed mapping within the clean submap")); 3099 KASSERT(pve != NULL, ("No pv")); 3100 pmap_enter_pv(m, pve, pmap, va, nflags); 3101 } 3102 3103validate: 3104 /* Make the new PTE valid */ 3105 npte |= L2_S_PROTO; 3106#ifdef SMP 3107 npte |= L2_SHARED; 3108#endif 3109 /* Set defaults first - kernel read access */ 3110 npte |= L2_APX; 3111 npte |= L2_S_PROT_R; 3112 /* Set "referenced" flag */ 3113 npte |= L2_S_REF; 3114 3115 /* Now tune APs as desired */ 3116 if (user) 3117 npte |= L2_S_PROT_U; 3118 /* 3119 * If this is not a vector_page 3120 * then continue setting mapping parameters 3121 */ 3122 if (m != NULL) { 3123 if ((m->oflags & VPO_UNMANAGED) == 0) { 3124 if (prot & (VM_PROT_ALL)) { 3125 vm_page_aflag_set(m, PGA_REFERENCED); 3126 } else { 3127 /* 3128 * Need to do page referenced emulation. 3129 */ 3130 npte &= ~L2_S_REF; 3131 } 3132 } 3133 3134 if (prot & VM_PROT_WRITE) { 3135 if ((m->oflags & VPO_UNMANAGED) == 0) { 3136 vm_page_aflag_set(m, PGA_WRITEABLE); 3137 /* 3138 * XXX: Skip modified bit emulation for now. 3139 * The emulation reveals problems 3140 * that result in random failures 3141 * during memory allocation on some 3142 * platforms. 3143 * Therefore, the page is marked RW 3144 * immediately. 3145 */ 3146 npte &= ~(L2_APX); 3147 vm_page_dirty(m); 3148 } else 3149 npte &= ~(L2_APX); 3150 } 3151 if (!(prot & VM_PROT_EXECUTE)) 3152 npte |= L2_XN; 3153 3154 if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) 3155 npte |= pte_l2_s_cache_mode; 3156 } 3157 3158 CTR5(KTR_PMAP,"enter: pmap:%p va:%x prot:%x pte:%x->%x", 3159 pmap, va, prot, opte, npte); 3160 /* 3161 * If this is just a wiring change, the two PTEs will be 3162 * identical, so there's no need to update the page table. 3163 */ 3164 if (npte != opte) { 3165 boolean_t is_cached = pmap_is_current(pmap); 3166 3167 *ptep = npte; 3168 PTE_SYNC(ptep); 3169 if (is_cached) { 3170 /* 3171 * We only need to frob the cache/tlb if this pmap 3172 * is current 3173 */ 3174 if (L1_IDX(va) != L1_IDX(vector_page) && 3175 l2pte_valid(npte)) { 3176 /* 3177 * This mapping is likely to be accessed as 3178 * soon as we return to userland. Fix up the 3179 * L1 entry to avoid taking another 3180 * page/domain fault. 3181 */ 3182 l1pd = l2b->l2b_phys | 3183 L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 3184 if (*pl1pd != l1pd) { 3185 *pl1pd = l1pd; 3186 PTE_SYNC(pl1pd); 3187 } 3188 } 3189 } 3190 3191 if (is_exec) 3192 cpu_tlb_flushID_SE(va); 3193 else if (is_refd) 3194 cpu_tlb_flushD_SE(va); 3195 cpu_cpwait(); 3196 } 3197 3198 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 3199 cpu_icache_sync_range(va, PAGE_SIZE); 3200 return (KERN_SUCCESS); 3201} 3202 3203/* 3204 * Maps a sequence of resident pages belonging to the same object. 3205 * The sequence begins with the given page m_start. This page is 3206 * mapped at the given virtual address start. Each subsequent page is 3207 * mapped at a virtual address that is offset from start by the same 3208 * amount as the page is offset from m_start within the object. The 3209 * last page in the sequence is the page with the largest offset from 3210 * m_start that can be mapped at a virtual address less than the given 3211 * virtual address end. Not every virtual page between start and end 3212 * is mapped; only those for which a resident page exists with the 3213 * corresponding offset from m_start are mapped. 3214 */ 3215void 3216pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3217 vm_page_t m_start, vm_prot_t prot) 3218{ 3219 vm_offset_t va; 3220 vm_page_t m; 3221 vm_pindex_t diff, psize; 3222 3223 VM_OBJECT_ASSERT_LOCKED(m_start->object); 3224 3225 psize = atop(end - start); 3226 m = m_start; 3227 prot &= VM_PROT_READ | VM_PROT_EXECUTE; 3228 rw_wlock(&pvh_global_lock); 3229 PMAP_LOCK(pmap); 3230 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3231 va = start + ptoa(diff); 3232 if ((va & L1_S_OFFSET) == 0 && L2_NEXT_BUCKET(va) <= end && 3233 m->psind == 1 && sp_enabled && 3234 pmap_enter_section(pmap, va, m, prot)) 3235 m = &m[L1_S_SIZE / PAGE_SIZE - 1]; 3236 else 3237 pmap_enter_locked(pmap, va, m, prot, 3238 PMAP_ENTER_NOSLEEP); 3239 m = TAILQ_NEXT(m, listq); 3240 } 3241 PMAP_UNLOCK(pmap); 3242 rw_wunlock(&pvh_global_lock); 3243} 3244 3245/* 3246 * this code makes some *MAJOR* assumptions: 3247 * 1. Current pmap & pmap exists. 3248 * 2. Not wired. 3249 * 3. Read access. 3250 * 4. No page table pages. 3251 * but is *MUCH* faster than pmap_enter... 3252 */ 3253 3254void 3255pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3256{ 3257 3258 prot &= VM_PROT_READ | VM_PROT_EXECUTE; 3259 rw_wlock(&pvh_global_lock); 3260 PMAP_LOCK(pmap); 3261 pmap_enter_locked(pmap, va, m, prot, PMAP_ENTER_NOSLEEP); 3262 PMAP_UNLOCK(pmap); 3263 rw_wunlock(&pvh_global_lock); 3264} 3265 3266/* 3267 * Routine: pmap_change_wiring 3268 * Function: Change the wiring attribute for a map/virtual-address 3269 * pair. 3270 * In/out conditions: 3271 * The mapping must already exist in the pmap. 3272 */ 3273void 3274pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3275{ 3276 struct l2_bucket *l2b; 3277 struct md_page *pvh; 3278 struct pv_entry *pve; 3279 pd_entry_t *pl1pd, l1pd; 3280 pt_entry_t *ptep, pte; 3281 vm_page_t m; 3282 3283 rw_wlock(&pvh_global_lock); 3284 PMAP_LOCK(pmap); 3285 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3286 l1pd = *pl1pd; 3287 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 3288 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3289 KASSERT((m != NULL) && ((m->oflags & VPO_UNMANAGED) == 0), 3290 ("pmap_change_wiring: unmanaged superpage should not " 3291 "be changed")); 3292 KASSERT(pmap != pmap_kernel(), 3293 ("pmap_change_wiring: managed kernel superpage " 3294 "should not exist")); 3295 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 3296 pve = pmap_find_pv(pvh, pmap, trunc_1mpage(va)); 3297 if (!wired != ((pve->pv_flags & PVF_WIRED) == 0)) { 3298 if (!pmap_demote_section(pmap, va)) 3299 panic("pmap_change_wiring: demotion failed"); 3300 } else 3301 goto out; 3302 } 3303 l2b = pmap_get_l2_bucket(pmap, va); 3304 KASSERT(l2b, ("No l2b bucket in pmap_change_wiring")); 3305 ptep = &l2b->l2b_kva[l2pte_index(va)]; 3306 pte = *ptep; 3307 m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); 3308 if (m != NULL) 3309 pmap_modify_pv(m, pmap, va, PVF_WIRED, 3310 wired == TRUE ? PVF_WIRED : 0); 3311out: 3312 rw_wunlock(&pvh_global_lock); 3313 PMAP_UNLOCK(pmap); 3314} 3315 3316 3317/* 3318 * Copy the range specified by src_addr/len 3319 * from the source map to the range dst_addr/len 3320 * in the destination map. 3321 * 3322 * This routine is only advisory and need not do anything. 3323 */ 3324void 3325pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 3326 vm_size_t len, vm_offset_t src_addr) 3327{ 3328} 3329 3330 3331/* 3332 * Routine: pmap_extract 3333 * Function: 3334 * Extract the physical page address associated 3335 * with the given map/virtual_address pair. 3336 */ 3337vm_paddr_t 3338pmap_extract(pmap_t pmap, vm_offset_t va) 3339{ 3340 vm_paddr_t pa; 3341 3342 if (kernel_vm_end != 0) 3343 PMAP_LOCK(pmap); 3344 pa = pmap_extract_locked(pmap, va); 3345 if (kernel_vm_end != 0) 3346 PMAP_UNLOCK(pmap); 3347 return (pa); 3348} 3349 3350static vm_paddr_t 3351pmap_extract_locked(pmap_t pmap, vm_offset_t va) 3352{ 3353 struct l2_dtable *l2; 3354 pd_entry_t l1pd; 3355 pt_entry_t *ptep, pte; 3356 vm_paddr_t pa; 3357 u_int l1idx; 3358 3359 if (kernel_vm_end != 0 && pmap != kernel_pmap) 3360 PMAP_ASSERT_LOCKED(pmap); 3361 l1idx = L1_IDX(va); 3362 l1pd = pmap->pm_l1->l1_kva[l1idx]; 3363 if (l1pte_section_p(l1pd)) { 3364 /* XXX: what to do about the bits > 32 ? */ 3365 if (l1pd & L1_S_SUPERSEC) 3366 pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); 3367 else 3368 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); 3369 } else { 3370 /* 3371 * Note that we can't rely on the validity of the L1 3372 * descriptor as an indication that a mapping exists. 3373 * We have to look it up in the L2 dtable. 3374 */ 3375 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 3376 if (l2 == NULL || 3377 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) 3378 return (0); 3379 pte = ptep[l2pte_index(va)]; 3380 if (pte == 0) 3381 return (0); 3382 switch (pte & L2_TYPE_MASK) { 3383 case L2_TYPE_L: 3384 pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); 3385 break; 3386 default: 3387 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); 3388 break; 3389 } 3390 } 3391 return (pa); 3392} 3393 3394/* 3395 * Atomically extract and hold the physical page with the given 3396 * pmap and virtual address pair if that mapping permits the given 3397 * protection. 3398 * 3399 */ 3400vm_page_t 3401pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 3402{ 3403 struct l2_dtable *l2; 3404 pd_entry_t l1pd; 3405 pt_entry_t *ptep, pte; 3406 vm_paddr_t pa, paddr; 3407 vm_page_t m = NULL; 3408 u_int l1idx; 3409 l1idx = L1_IDX(va); 3410 paddr = 0; 3411 3412 PMAP_LOCK(pmap); 3413retry: 3414 l1pd = pmap->pm_l1->l1_kva[l1idx]; 3415 if (l1pte_section_p(l1pd)) { 3416 /* XXX: what to do about the bits > 32 ? */ 3417 if (l1pd & L1_S_SUPERSEC) 3418 pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); 3419 else 3420 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); 3421 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) 3422 goto retry; 3423 if (L1_S_WRITABLE(l1pd) || (prot & VM_PROT_WRITE) == 0) { 3424 m = PHYS_TO_VM_PAGE(pa); 3425 vm_page_hold(m); 3426 } 3427 } else { 3428 /* 3429 * Note that we can't rely on the validity of the L1 3430 * descriptor as an indication that a mapping exists. 3431 * We have to look it up in the L2 dtable. 3432 */ 3433 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 3434 3435 if (l2 == NULL || 3436 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { 3437 PMAP_UNLOCK(pmap); 3438 return (NULL); 3439 } 3440 3441 ptep = &ptep[l2pte_index(va)]; 3442 pte = *ptep; 3443 3444 if (pte == 0) { 3445 PMAP_UNLOCK(pmap); 3446 return (NULL); 3447 } else if ((prot & VM_PROT_WRITE) && (pte & L2_APX)) { 3448 PMAP_UNLOCK(pmap); 3449 return (NULL); 3450 } else { 3451 switch (pte & L2_TYPE_MASK) { 3452 case L2_TYPE_L: 3453 panic("extract and hold section mapping"); 3454 break; 3455 default: 3456 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); 3457 break; 3458 } 3459 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) 3460 goto retry; 3461 m = PHYS_TO_VM_PAGE(pa); 3462 vm_page_hold(m); 3463 } 3464 3465 } 3466 3467 PMAP_UNLOCK(pmap); 3468 PA_UNLOCK_COND(paddr); 3469 return (m); 3470} 3471 3472/* 3473 * Initialize a preallocated and zeroed pmap structure, 3474 * such as one in a vmspace structure. 3475 */ 3476 3477int 3478pmap_pinit(pmap_t pmap) 3479{ 3480 PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap)); 3481 3482 pmap_alloc_l1(pmap); 3483 bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); 3484 3485 CPU_ZERO(&pmap->pm_active); 3486 3487 TAILQ_INIT(&pmap->pm_pvchunk); 3488 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 3489 pmap->pm_stats.resident_count = 1; 3490 if (vector_page < KERNBASE) { 3491 pmap_enter(pmap, vector_page, 3492 PHYS_TO_VM_PAGE(systempage.pv_pa), VM_PROT_READ, 3493 PMAP_ENTER_WIRED, 0); 3494 } 3495 return (1); 3496} 3497 3498 3499/*************************************************** 3500 * Superpage management routines. 3501 ***************************************************/ 3502 3503static PMAP_INLINE struct pv_entry * 3504pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3505{ 3506 pv_entry_t pv; 3507 3508 rw_assert(&pvh_global_lock, RA_WLOCKED); 3509 3510 pv = pmap_find_pv(pvh, pmap, va); 3511 if (pv != NULL) 3512 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 3513 3514 return (pv); 3515} 3516 3517static void 3518pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3519{ 3520 pv_entry_t pv; 3521 3522 pv = pmap_pvh_remove(pvh, pmap, va); 3523 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 3524 pmap_free_pv_entry(pmap, pv); 3525} 3526 3527static boolean_t 3528pmap_pv_insert_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3529{ 3530 struct md_page *pvh; 3531 pv_entry_t pv; 3532 3533 rw_assert(&pvh_global_lock, RA_WLOCKED); 3534 if (pv_entry_count < pv_entry_high_water && 3535 (pv = pmap_get_pv_entry(pmap, TRUE)) != NULL) { 3536 pv->pv_va = va; 3537 pvh = pa_to_pvh(pa); 3538 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 3539 return (TRUE); 3540 } else 3541 return (FALSE); 3542} 3543 3544/* 3545 * Create the pv entries for each of the pages within a superpage. 3546 */ 3547static void 3548pmap_pv_demote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3549{ 3550 struct md_page *pvh; 3551 pv_entry_t pve, pv; 3552 vm_offset_t va_last; 3553 vm_page_t m; 3554 3555 rw_assert(&pvh_global_lock, RA_WLOCKED); 3556 KASSERT((pa & L1_S_OFFSET) == 0, 3557 ("pmap_pv_demote_section: pa is not 1mpage aligned")); 3558 3559 /* 3560 * Transfer the 1mpage's pv entry for this mapping to the first 3561 * page's pv list. 3562 */ 3563 pvh = pa_to_pvh(pa); 3564 va = trunc_1mpage(va); 3565 pv = pmap_pvh_remove(pvh, pmap, va); 3566 KASSERT(pv != NULL, ("pmap_pv_demote_section: pv not found")); 3567 m = PHYS_TO_VM_PAGE(pa); 3568 TAILQ_INSERT_HEAD(&m->md.pv_list, pv, pv_list); 3569 /* Instantiate the remaining pv entries. */ 3570 va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; 3571 do { 3572 m++; 3573 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3574 ("pmap_pv_demote_section: page %p is not managed", m)); 3575 va += PAGE_SIZE; 3576 pve = pmap_get_pv_entry(pmap, FALSE); 3577 pmap_enter_pv(m, pve, pmap, va, pv->pv_flags); 3578 } while (va < va_last); 3579} 3580 3581static void 3582pmap_pv_promote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3583{ 3584 struct md_page *pvh; 3585 pv_entry_t pv; 3586 vm_offset_t va_last; 3587 vm_page_t m; 3588 3589 rw_assert(&pvh_global_lock, RA_WLOCKED); 3590 KASSERT((pa & L1_S_OFFSET) == 0, 3591 ("pmap_pv_promote_section: pa is not 1mpage aligned")); 3592 3593 /* 3594 * Transfer the first page's pv entry for this mapping to the 3595 * 1mpage's pv list. Aside from avoiding the cost of a call 3596 * to get_pv_entry(), a transfer avoids the possibility that 3597 * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() 3598 * removes one of the mappings that is being promoted. 3599 */ 3600 m = PHYS_TO_VM_PAGE(pa); 3601 va = trunc_1mpage(va); 3602 pv = pmap_pvh_remove(&m->md, pmap, va); 3603 KASSERT(pv != NULL, ("pmap_pv_promote_section: pv not found")); 3604 pvh = pa_to_pvh(pa); 3605 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 3606 /* Free the remaining pv entries in the newly mapped section pages */ 3607 va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; 3608 do { 3609 m++; 3610 va += PAGE_SIZE; 3611 /* 3612 * Don't care the flags, first pv contains sufficient 3613 * information for all of the pages so nothing is really lost. 3614 */ 3615 pmap_pvh_free(&m->md, pmap, va); 3616 } while (va < va_last); 3617} 3618 3619/* 3620 * Tries to create a 1MB page mapping. Returns TRUE if successful and 3621 * FALSE otherwise. Fails if (1) page is unmanageg, kernel pmap or vectors 3622 * page, (2) a mapping already exists at the specified virtual address, or 3623 * (3) a pv entry cannot be allocated without reclaiming another pv entry. 3624 */ 3625static boolean_t 3626pmap_enter_section(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3627{ 3628 pd_entry_t *pl1pd; 3629 vm_offset_t pa; 3630 struct l2_bucket *l2b; 3631 3632 rw_assert(&pvh_global_lock, RA_WLOCKED); 3633 PMAP_ASSERT_LOCKED(pmap); 3634 3635 /* Skip kernel, vectors page and unmanaged mappings */ 3636 if ((pmap == pmap_kernel()) || (L1_IDX(va) == L1_IDX(vector_page)) || 3637 ((m->oflags & VPO_UNMANAGED) != 0)) { 3638 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3639 " in pmap %p", va, pmap); 3640 return (FALSE); 3641 } 3642 /* 3643 * Check whether this is a valid section superpage entry or 3644 * there is a l2_bucket associated with that L1 page directory. 3645 */ 3646 va = trunc_1mpage(va); 3647 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3648 l2b = pmap_get_l2_bucket(pmap, va); 3649 if ((*pl1pd & L1_S_PROTO) || (l2b != NULL)) { 3650 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3651 " in pmap %p", va, pmap); 3652 return (FALSE); 3653 } 3654 pa = VM_PAGE_TO_PHYS(m); 3655 /* 3656 * Abort this mapping if its PV entry could not be created. 3657 */ 3658 if (!pmap_pv_insert_section(pmap, va, VM_PAGE_TO_PHYS(m))) { 3659 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3660 " in pmap %p", va, pmap); 3661 return (FALSE); 3662 } 3663 /* 3664 * Increment counters. 3665 */ 3666 pmap->pm_stats.resident_count += L2_PTE_NUM_TOTAL; 3667 /* 3668 * Despite permissions, mark the superpage read-only. 3669 */ 3670 prot &= ~VM_PROT_WRITE; 3671 /* 3672 * Map the superpage. 3673 */ 3674 pmap_map_section(pmap, va, pa, prot, FALSE); 3675 3676 pmap_section_mappings++; 3677 CTR2(KTR_PMAP, "pmap_enter_section: success for va %#lx" 3678 " in pmap %p", va, pmap); 3679 return (TRUE); 3680} 3681 3682/* 3683 * pmap_remove_section: do the things to unmap a superpage in a process 3684 */ 3685static void 3686pmap_remove_section(pmap_t pmap, vm_offset_t sva) 3687{ 3688 struct md_page *pvh; 3689 struct l2_bucket *l2b; 3690 pd_entry_t *pl1pd, l1pd; 3691 vm_offset_t eva, va; 3692 vm_page_t m; 3693 3694 PMAP_ASSERT_LOCKED(pmap); 3695 if ((pmap == pmap_kernel()) || (L1_IDX(sva) == L1_IDX(vector_page))) 3696 return; 3697 3698 KASSERT((sva & L1_S_OFFSET) == 0, 3699 ("pmap_remove_section: sva is not 1mpage aligned")); 3700 3701 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 3702 l1pd = *pl1pd; 3703 3704 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3705 KASSERT((m != NULL && ((m->oflags & VPO_UNMANAGED) == 0)), 3706 ("pmap_remove_section: no corresponding vm_page or " 3707 "page unmanaged")); 3708 3709 pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; 3710 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 3711 pmap_pvh_free(pvh, pmap, sva); 3712 eva = L2_NEXT_BUCKET(sva); 3713 for (va = sva, m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3714 va < eva; va += PAGE_SIZE, m++) { 3715 /* 3716 * Mark base pages referenced but skip marking them dirty. 3717 * If the superpage is writeable, hence all base pages were 3718 * already marked as dirty in pmap_fault_fixup() before 3719 * promotion. Reference bit however, might not have been set 3720 * for each base page when the superpage was created at once, 3721 * not as a result of promotion. 3722 */ 3723 if (L1_S_REFERENCED(l1pd)) 3724 vm_page_aflag_set(m, PGA_REFERENCED); 3725 if (TAILQ_EMPTY(&m->md.pv_list) && 3726 TAILQ_EMPTY(&pvh->pv_list)) 3727 vm_page_aflag_clear(m, PGA_WRITEABLE); 3728 } 3729 3730 l2b = pmap_get_l2_bucket(pmap, sva); 3731 if (l2b != NULL) { 3732 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, 3733 ("pmap_remove_section: l2_bucket occupancy error")); 3734 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); 3735 } 3736 /* Now invalidate L1 slot */ 3737 *pl1pd = 0; 3738 PTE_SYNC(pl1pd); 3739 if (L1_S_EXECUTABLE(l1pd)) 3740 cpu_tlb_flushID_SE(sva); 3741 else 3742 cpu_tlb_flushD_SE(sva); 3743 cpu_cpwait(); 3744} 3745 3746/* 3747 * Tries to promote the 256, contiguous 4KB page mappings that are 3748 * within a single l2_bucket to a single 1MB section mapping. 3749 * For promotion to occur, two conditions must be met: (1) the 4KB page 3750 * mappings must map aligned, contiguous physical memory and (2) the 4KB page 3751 * mappings must have identical characteristics. 3752 */ 3753static void 3754pmap_promote_section(pmap_t pmap, vm_offset_t va) 3755{ 3756 pt_entry_t *firstptep, firstpte, oldpte, pa, *pte; 3757 vm_page_t m, oldm; 3758 vm_offset_t first_va, old_va; 3759 struct l2_bucket *l2b = NULL; 3760 vm_prot_t prot; 3761 struct pv_entry *pve, *first_pve; 3762 3763 PMAP_ASSERT_LOCKED(pmap); 3764 3765 prot = VM_PROT_ALL; 3766 /* 3767 * Skip promoting kernel pages. This is justified by following: 3768 * 1. Kernel is already mapped using section mappings in each pmap 3769 * 2. Managed mappings within the kernel are not to be promoted anyway 3770 */ 3771 if (pmap == pmap_kernel()) { 3772 pmap_section_p_failures++; 3773 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3774 " in pmap %p", va, pmap); 3775 return; 3776 } 3777 /* Do not attemp to promote vectors pages */ 3778 if (L1_IDX(va) == L1_IDX(vector_page)) { 3779 pmap_section_p_failures++; 3780 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3781 " in pmap %p", va, pmap); 3782 return; 3783 } 3784 /* 3785 * Examine the first PTE in the specified l2_bucket. Abort if this PTE 3786 * is either invalid, unused, or does not map the first 4KB physical 3787 * page within 1MB page. 3788 */ 3789 first_va = trunc_1mpage(va); 3790 l2b = pmap_get_l2_bucket(pmap, first_va); 3791 KASSERT(l2b != NULL, ("pmap_promote_section: trying to promote " 3792 "not existing l2 bucket")); 3793 firstptep = &l2b->l2b_kva[0]; 3794 3795 firstpte = *firstptep; 3796 if ((l2pte_pa(firstpte) & L1_S_OFFSET) != 0) { 3797 pmap_section_p_failures++; 3798 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3799 " in pmap %p", va, pmap); 3800 return; 3801 } 3802 3803 if ((firstpte & (L2_S_PROTO | L2_S_REF)) != (L2_S_PROTO | L2_S_REF)) { 3804 pmap_section_p_failures++; 3805 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3806 " in pmap %p", va, pmap); 3807 return; 3808 } 3809 /* 3810 * ARM uses pv_entry to mark particular mapping WIRED so don't promote 3811 * unmanaged pages since it is impossible to determine, whether the 3812 * page is wired or not if there is no corresponding pv_entry. 3813 */ 3814 m = PHYS_TO_VM_PAGE(l2pte_pa(firstpte)); 3815 if (m && ((m->oflags & VPO_UNMANAGED) != 0)) { 3816 pmap_section_p_failures++; 3817 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3818 " in pmap %p", va, pmap); 3819 return; 3820 } 3821 first_pve = pmap_find_pv(&m->md, pmap, first_va); 3822 /* 3823 * PTE is modified only on write due to modified bit 3824 * emulation. If the entry is referenced and writable 3825 * then it is modified and we don't clear write enable. 3826 * Otherwise, writing is disabled in PTE anyway and 3827 * we just configure protections for the section mapping 3828 * that is going to be created. 3829 */ 3830 if ((first_pve->pv_flags & PVF_WRITE) != 0) { 3831 if (!L2_S_WRITABLE(firstpte)) { 3832 first_pve->pv_flags &= ~PVF_WRITE; 3833 prot &= ~VM_PROT_WRITE; 3834 } 3835 } else 3836 prot &= ~VM_PROT_WRITE; 3837 3838 if (!L2_S_EXECUTABLE(firstpte)) 3839 prot &= ~VM_PROT_EXECUTE; 3840 3841 /* 3842 * Examine each of the other PTEs in the specified l2_bucket. 3843 * Abort if this PTE maps an unexpected 4KB physical page or 3844 * does not have identical characteristics to the first PTE. 3845 */ 3846 pa = l2pte_pa(firstpte) + ((L2_PTE_NUM_TOTAL - 1) * PAGE_SIZE); 3847 old_va = L2_NEXT_BUCKET(first_va) - PAGE_SIZE; 3848 3849 for (pte = (firstptep + L2_PTE_NUM_TOTAL - 1); pte > firstptep; pte--) { 3850 oldpte = *pte; 3851 if (l2pte_pa(oldpte) != pa) { 3852 pmap_section_p_failures++; 3853 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3854 "va %#x in pmap %p", va, pmap); 3855 return; 3856 } 3857 if ((oldpte & L2_S_PROMOTE) != (firstpte & L2_S_PROMOTE)) { 3858 pmap_section_p_failures++; 3859 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3860 "va %#x in pmap %p", va, pmap); 3861 return; 3862 } 3863 oldm = PHYS_TO_VM_PAGE(l2pte_pa(oldpte)); 3864 if (oldm && ((oldm->oflags & VPO_UNMANAGED) != 0)) { 3865 pmap_section_p_failures++; 3866 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3867 "va %#x in pmap %p", va, pmap); 3868 return; 3869 } 3870 3871 pve = pmap_find_pv(&oldm->md, pmap, old_va); 3872 if (pve == NULL) { 3873 pmap_section_p_failures++; 3874 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3875 "va %#x old_va %x - no pve", va, old_va); 3876 return; 3877 } 3878 3879 if (!L2_S_WRITABLE(oldpte) && (pve->pv_flags & PVF_WRITE)) 3880 pve->pv_flags &= ~PVF_WRITE; 3881 if (pve->pv_flags != first_pve->pv_flags) { 3882 pmap_section_p_failures++; 3883 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3884 "va %#x in pmap %p", va, pmap); 3885 return; 3886 } 3887 3888 old_va -= PAGE_SIZE; 3889 pa -= PAGE_SIZE; 3890 } 3891 /* 3892 * Promote the pv entries. 3893 */ 3894 pmap_pv_promote_section(pmap, first_va, l2pte_pa(firstpte)); 3895 /* 3896 * Map the superpage. 3897 */ 3898 pmap_map_section(pmap, first_va, l2pte_pa(firstpte), prot, TRUE); 3899 /* 3900 * Invalidate all possible TLB mappings for small 3901 * pages within the newly created superpage. 3902 * Rely on the first PTE's attributes since they 3903 * have to be consistent across all of the base pages 3904 * within the superpage. If page is not executable it 3905 * is at least referenced. 3906 * The fastest way to do that is to invalidate whole 3907 * TLB at once instead of executing 256 CP15 TLB 3908 * invalidations by single entry. TLBs usually maintain 3909 * several dozen entries so loss of unrelated entries is 3910 * still a less agresive approach. 3911 */ 3912 if (L2_S_EXECUTABLE(firstpte)) 3913 cpu_tlb_flushID(); 3914 else 3915 cpu_tlb_flushD(); 3916 cpu_cpwait(); 3917 3918 pmap_section_promotions++; 3919 CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x" 3920 " in pmap %p", first_va, pmap); 3921} 3922 3923/* 3924 * Fills a l2_bucket with mappings to consecutive physical pages. 3925 */ 3926static void 3927pmap_fill_l2b(struct l2_bucket *l2b, pt_entry_t newpte) 3928{ 3929 pt_entry_t *ptep; 3930 int i; 3931 3932 for (i = 0; i < L2_PTE_NUM_TOTAL; i++) { 3933 ptep = &l2b->l2b_kva[i]; 3934 *ptep = newpte; 3935 PTE_SYNC(ptep); 3936 3937 newpte += PAGE_SIZE; 3938 } 3939 3940 l2b->l2b_occupancy = L2_PTE_NUM_TOTAL; 3941} 3942 3943/* 3944 * Tries to demote a 1MB section mapping. If demotion fails, the 3945 * 1MB section mapping is invalidated. 3946 */ 3947static boolean_t 3948pmap_demote_section(pmap_t pmap, vm_offset_t va) 3949{ 3950 struct l2_bucket *l2b; 3951 struct pv_entry *l1pdpve; 3952 struct md_page *pvh; 3953 pd_entry_t *pl1pd, l1pd, newl1pd; 3954 pt_entry_t *firstptep, newpte; 3955 vm_offset_t pa; 3956 vm_page_t m; 3957 3958 PMAP_ASSERT_LOCKED(pmap); 3959 /* 3960 * According to assumptions described in pmap_promote_section, 3961 * kernel is and always should be mapped using 1MB section mappings. 3962 * What more, managed kernel pages were not to be promoted. 3963 */ 3964 KASSERT(pmap != pmap_kernel() && L1_IDX(va) != L1_IDX(vector_page), 3965 ("pmap_demote_section: forbidden section mapping")); 3966 3967 va = trunc_1mpage(va); 3968 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3969 l1pd = *pl1pd; 3970 KASSERT((l1pd & L1_TYPE_MASK) == L1_S_PROTO, 3971 ("pmap_demote_section: not section or invalid section")); 3972 3973 pa = l1pd & L1_S_FRAME; 3974 m = PHYS_TO_VM_PAGE(pa); 3975 KASSERT((m != NULL && (m->oflags & VPO_UNMANAGED) == 0), 3976 ("pmap_demote_section: no vm_page for selected superpage or" 3977 "unmanaged")); 3978 3979 pvh = pa_to_pvh(pa); 3980 l1pdpve = pmap_find_pv(pvh, pmap, va); 3981 KASSERT(l1pdpve != NULL, ("pmap_demote_section: no pv entry for " 3982 "managed page")); 3983 3984 l2b = pmap_get_l2_bucket(pmap, va); 3985 if (l2b == NULL) { 3986 KASSERT((l1pdpve->pv_flags & PVF_WIRED) == 0, 3987 ("pmap_demote_section: No l2_bucket for wired mapping")); 3988 /* 3989 * Invalidate the 1MB section mapping and return 3990 * "failure" if the mapping was never accessed or the 3991 * allocation of the new l2_bucket fails. 3992 */ 3993 if (!L1_S_REFERENCED(l1pd) || 3994 (l2b = pmap_alloc_l2_bucket(pmap, va)) == NULL) { 3995 /* Unmap and invalidate superpage. */ 3996 pmap_remove_section(pmap, trunc_1mpage(va)); 3997 CTR2(KTR_PMAP, "pmap_demote_section: failure for " 3998 "va %#x in pmap %p", va, pmap); 3999 return (FALSE); 4000 } 4001 } 4002 4003 /* 4004 * Now we should have corresponding l2_bucket available. 4005 * Let's process it to recreate 256 PTEs for each base page 4006 * within superpage. 4007 */ 4008 newpte = pa | L1_S_DEMOTE(l1pd); 4009 if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) 4010 newpte |= pte_l2_s_cache_mode; 4011 4012 /* 4013 * If the l2_bucket is new, initialize it. 4014 */ 4015 if (l2b->l2b_occupancy == 0) 4016 pmap_fill_l2b(l2b, newpte); 4017 else { 4018 firstptep = &l2b->l2b_kva[0]; 4019 KASSERT(l2pte_pa(*firstptep) == (pa), 4020 ("pmap_demote_section: firstpte and newpte map different " 4021 "physical addresses")); 4022 /* 4023 * If the mapping has changed attributes, update the page table 4024 * entries. 4025 */ 4026 if ((*firstptep & L2_S_PROMOTE) != (L1_S_DEMOTE(l1pd))) 4027 pmap_fill_l2b(l2b, newpte); 4028 } 4029 /* Demote PV entry */ 4030 pmap_pv_demote_section(pmap, va, pa); 4031 4032 /* Now fix-up L1 */ 4033 newl1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 4034 *pl1pd = newl1pd; 4035 PTE_SYNC(pl1pd); 4036 /* Invalidate old TLB mapping */ 4037 if (L1_S_EXECUTABLE(l1pd)) 4038 cpu_tlb_flushID_SE(va); 4039 else if (L1_S_REFERENCED(l1pd)) 4040 cpu_tlb_flushD_SE(va); 4041 cpu_cpwait(); 4042 4043 pmap_section_demotions++; 4044 CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x" 4045 " in pmap %p", va, pmap); 4046 return (TRUE); 4047} 4048 4049/*************************************************** 4050 * page management routines. 4051 ***************************************************/ 4052 4053/* 4054 * We are in a serious low memory condition. Resort to 4055 * drastic measures to free some pages so we can allocate 4056 * another pv entry chunk. 4057 */ 4058static vm_page_t 4059pmap_pv_reclaim(pmap_t locked_pmap) 4060{ 4061 struct pch newtail; 4062 struct pv_chunk *pc; 4063 struct l2_bucket *l2b = NULL; 4064 pmap_t pmap; 4065 pd_entry_t *pl1pd; 4066 pt_entry_t *ptep; 4067 pv_entry_t pv; 4068 vm_offset_t va; 4069 vm_page_t free, m, m_pc; 4070 uint32_t inuse; 4071 int bit, field, freed, idx; 4072 4073 PMAP_ASSERT_LOCKED(locked_pmap); 4074 pmap = NULL; 4075 free = m_pc = NULL; 4076 TAILQ_INIT(&newtail); 4077 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 4078 free == NULL)) { 4079 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 4080 if (pmap != pc->pc_pmap) { 4081 if (pmap != NULL) { 4082 cpu_tlb_flushID(); 4083 cpu_cpwait(); 4084 if (pmap != locked_pmap) 4085 PMAP_UNLOCK(pmap); 4086 } 4087 pmap = pc->pc_pmap; 4088 /* Avoid deadlock and lock recursion. */ 4089 if (pmap > locked_pmap) 4090 PMAP_LOCK(pmap); 4091 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 4092 pmap = NULL; 4093 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4094 continue; 4095 } 4096 } 4097 4098 /* 4099 * Destroy every non-wired, 4 KB page mapping in the chunk. 4100 */ 4101 freed = 0; 4102 for (field = 0; field < _NPCM; field++) { 4103 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 4104 inuse != 0; inuse &= ~(1UL << bit)) { 4105 bit = ffs(inuse) - 1; 4106 idx = field * sizeof(inuse) * NBBY + bit; 4107 pv = &pc->pc_pventry[idx]; 4108 va = pv->pv_va; 4109 4110 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 4111 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4112 continue; 4113 if (pv->pv_flags & PVF_WIRED) 4114 continue; 4115 4116 l2b = pmap_get_l2_bucket(pmap, va); 4117 KASSERT(l2b != NULL, ("No l2 bucket")); 4118 ptep = &l2b->l2b_kva[l2pte_index(va)]; 4119 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 4120 KASSERT((vm_offset_t)m >= KERNBASE, 4121 ("Trying to access non-existent page " 4122 "va %x pte %x", va, *ptep)); 4123 *ptep = 0; 4124 PTE_SYNC(ptep); 4125 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4126 if (TAILQ_EMPTY(&m->md.pv_list)) 4127 vm_page_aflag_clear(m, PGA_WRITEABLE); 4128 pc->pc_map[field] |= 1UL << bit; 4129 freed++; 4130 } 4131 } 4132 4133 if (freed == 0) { 4134 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4135 continue; 4136 } 4137 /* Every freed mapping is for a 4 KB page. */ 4138 pmap->pm_stats.resident_count -= freed; 4139 PV_STAT(pv_entry_frees += freed); 4140 PV_STAT(pv_entry_spare += freed); 4141 pv_entry_count -= freed; 4142 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4143 for (field = 0; field < _NPCM; field++) 4144 if (pc->pc_map[field] != pc_freemask[field]) { 4145 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 4146 pc_list); 4147 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4148 4149 /* 4150 * One freed pv entry in locked_pmap is 4151 * sufficient. 4152 */ 4153 if (pmap == locked_pmap) 4154 goto out; 4155 break; 4156 } 4157 if (field == _NPCM) { 4158 PV_STAT(pv_entry_spare -= _NPCPV); 4159 PV_STAT(pc_chunk_count--); 4160 PV_STAT(pc_chunk_frees++); 4161 /* Entire chunk is free; return it. */ 4162 m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4163 pmap_qremove((vm_offset_t)pc, 1); 4164 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4165 break; 4166 } 4167 } 4168out: 4169 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 4170 if (pmap != NULL) { 4171 cpu_tlb_flushID(); 4172 cpu_cpwait(); 4173 if (pmap != locked_pmap) 4174 PMAP_UNLOCK(pmap); 4175 } 4176 return (m_pc); 4177} 4178 4179/* 4180 * free the pv_entry back to the free list 4181 */ 4182static void 4183pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv) 4184{ 4185 struct pv_chunk *pc; 4186 int bit, field, idx; 4187 4188 rw_assert(&pvh_global_lock, RA_WLOCKED); 4189 PMAP_ASSERT_LOCKED(pmap); 4190 PV_STAT(pv_entry_frees++); 4191 PV_STAT(pv_entry_spare++); 4192 pv_entry_count--; 4193 pc = pv_to_chunk(pv); 4194 idx = pv - &pc->pc_pventry[0]; 4195 field = idx / (sizeof(u_long) * NBBY); 4196 bit = idx % (sizeof(u_long) * NBBY); 4197 pc->pc_map[field] |= 1ul << bit; 4198 for (idx = 0; idx < _NPCM; idx++) 4199 if (pc->pc_map[idx] != pc_freemask[idx]) { 4200 /* 4201 * 98% of the time, pc is already at the head of the 4202 * list. If it isn't already, move it to the head. 4203 */ 4204 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 4205 pc)) { 4206 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4207 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 4208 pc_list); 4209 } 4210 return; 4211 } 4212 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4213 pmap_free_pv_chunk(pc); 4214} 4215 4216static void 4217pmap_free_pv_chunk(struct pv_chunk *pc) 4218{ 4219 vm_page_t m; 4220 4221 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 4222 PV_STAT(pv_entry_spare -= _NPCPV); 4223 PV_STAT(pc_chunk_count--); 4224 PV_STAT(pc_chunk_frees++); 4225 /* entire chunk is free, return it */ 4226 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4227 pmap_qremove((vm_offset_t)pc, 1); 4228 vm_page_unwire(m, 0); 4229 vm_page_free(m); 4230 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4231 4232} 4233 4234static pv_entry_t 4235pmap_get_pv_entry(pmap_t pmap, boolean_t try) 4236{ 4237 static const struct timeval printinterval = { 60, 0 }; 4238 static struct timeval lastprint; 4239 struct pv_chunk *pc; 4240 pv_entry_t pv; 4241 vm_page_t m; 4242 int bit, field, idx; 4243 4244 rw_assert(&pvh_global_lock, RA_WLOCKED); 4245 PMAP_ASSERT_LOCKED(pmap); 4246 PV_STAT(pv_entry_allocs++); 4247 pv_entry_count++; 4248 4249 if (pv_entry_count > pv_entry_high_water) 4250 if (ratecheck(&lastprint, &printinterval)) 4251 printf("%s: Approaching the limit on PV entries.\n", 4252 __func__); 4253retry: 4254 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 4255 if (pc != NULL) { 4256 for (field = 0; field < _NPCM; field++) { 4257 if (pc->pc_map[field]) { 4258 bit = ffs(pc->pc_map[field]) - 1; 4259 break; 4260 } 4261 } 4262 if (field < _NPCM) { 4263 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; 4264 pv = &pc->pc_pventry[idx]; 4265 pc->pc_map[field] &= ~(1ul << bit); 4266 /* If this was the last item, move it to tail */ 4267 for (field = 0; field < _NPCM; field++) 4268 if (pc->pc_map[field] != 0) { 4269 PV_STAT(pv_entry_spare--); 4270 return (pv); /* not full, return */ 4271 } 4272 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4273 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 4274 PV_STAT(pv_entry_spare--); 4275 return (pv); 4276 } 4277 } 4278 /* 4279 * Access to the ptelist "pv_vafree" is synchronized by the pvh 4280 * global lock. If "pv_vafree" is currently non-empty, it will 4281 * remain non-empty until pmap_ptelist_alloc() completes. 4282 */ 4283 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 4284 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4285 if (try) { 4286 pv_entry_count--; 4287 PV_STAT(pc_chunk_tryfail++); 4288 return (NULL); 4289 } 4290 m = pmap_pv_reclaim(pmap); 4291 if (m == NULL) 4292 goto retry; 4293 } 4294 PV_STAT(pc_chunk_count++); 4295 PV_STAT(pc_chunk_allocs++); 4296 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 4297 pmap_qenter((vm_offset_t)pc, &m, 1); 4298 pc->pc_pmap = pmap; 4299 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 4300 for (field = 1; field < _NPCM; field++) 4301 pc->pc_map[field] = pc_freemask[field]; 4302 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 4303 pv = &pc->pc_pventry[0]; 4304 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 4305 PV_STAT(pv_entry_spare += _NPCPV - 1); 4306 return (pv); 4307} 4308 4309/* 4310 * Remove the given range of addresses from the specified map. 4311 * 4312 * It is assumed that the start and end are properly 4313 * rounded to the page size. 4314 */ 4315#define PMAP_REMOVE_CLEAN_LIST_SIZE 3 4316void 4317pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 4318{ 4319 struct l2_bucket *l2b; 4320 vm_offset_t next_bucket; 4321 pd_entry_t *pl1pd, l1pd; 4322 pt_entry_t *ptep; 4323 u_int total; 4324 u_int mappings, is_exec, is_refd; 4325 int flushall = 0; 4326 4327 4328 /* 4329 * we lock in the pmap => pv_head direction 4330 */ 4331 4332 rw_wlock(&pvh_global_lock); 4333 PMAP_LOCK(pmap); 4334 total = 0; 4335 while (sva < eva) { 4336 /* 4337 * Check for large page. 4338 */ 4339 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 4340 l1pd = *pl1pd; 4341 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 4342 KASSERT((l1pd & L1_S_DOM_MASK) != 4343 L1_S_DOM(PMAP_DOMAIN_KERNEL), ("pmap_remove: " 4344 "Trying to remove kernel section mapping")); 4345 /* 4346 * Are we removing the entire large page? If not, 4347 * demote the mapping and fall through. 4348 */ 4349 if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) && 4350 eva >= L2_NEXT_BUCKET(sva)) { 4351 pmap_remove_section(pmap, sva); 4352 sva = L2_NEXT_BUCKET(sva); 4353 continue; 4354 } else if (!pmap_demote_section(pmap, sva)) { 4355 /* The large page mapping was destroyed. */ 4356 sva = L2_NEXT_BUCKET(sva); 4357 continue; 4358 } 4359 } 4360 /* 4361 * Do one L2 bucket's worth at a time. 4362 */ 4363 next_bucket = L2_NEXT_BUCKET(sva); 4364 if (next_bucket > eva) 4365 next_bucket = eva; 4366 4367 l2b = pmap_get_l2_bucket(pmap, sva); 4368 if (l2b == NULL) { 4369 sva = next_bucket; 4370 continue; 4371 } 4372 4373 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4374 mappings = 0; 4375 4376 while (sva < next_bucket) { 4377 struct vm_page *m; 4378 pt_entry_t pte; 4379 vm_paddr_t pa; 4380 4381 pte = *ptep; 4382 4383 if (pte == 0) { 4384 /* 4385 * Nothing here, move along 4386 */ 4387 sva += PAGE_SIZE; 4388 ptep++; 4389 continue; 4390 } 4391 4392 pmap->pm_stats.resident_count--; 4393 pa = l2pte_pa(pte); 4394 is_exec = 0; 4395 is_refd = 1; 4396 4397 /* 4398 * Update flags. In a number of circumstances, 4399 * we could cluster a lot of these and do a 4400 * number of sequential pages in one go. 4401 */ 4402 if ((m = PHYS_TO_VM_PAGE(pa)) != NULL) { 4403 struct pv_entry *pve; 4404 4405 pve = pmap_remove_pv(m, pmap, sva); 4406 if (pve) { 4407 is_exec = PTE_BEEN_EXECD(pte); 4408 is_refd = PTE_BEEN_REFD(pte); 4409 pmap_free_pv_entry(pmap, pve); 4410 } 4411 } 4412 4413 *ptep = 0; 4414 PTE_SYNC(ptep); 4415 if (pmap_is_current(pmap)) { 4416 total++; 4417 if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) { 4418 if (is_exec) 4419 cpu_tlb_flushID_SE(sva); 4420 else if (is_refd) 4421 cpu_tlb_flushD_SE(sva); 4422 } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) 4423 flushall = 1; 4424 } 4425 4426 sva += PAGE_SIZE; 4427 ptep++; 4428 mappings++; 4429 } 4430 4431 pmap_free_l2_bucket(pmap, l2b, mappings); 4432 } 4433 4434 rw_wunlock(&pvh_global_lock); 4435 if (flushall) 4436 cpu_tlb_flushID(); 4437 cpu_cpwait(); 4438 4439 PMAP_UNLOCK(pmap); 4440} 4441 4442/* 4443 * pmap_zero_page() 4444 * 4445 * Zero a given physical page by mapping it at a page hook point. 4446 * In doing the zero page op, the page we zero is mapped cachable, as with 4447 * StrongARM accesses to non-cached pages are non-burst making writing 4448 * _any_ bulk data very slow. 4449 */ 4450static void 4451pmap_zero_page_gen(vm_page_t m, int off, int size) 4452{ 4453 struct czpages *czp; 4454 4455 KASSERT(TAILQ_EMPTY(&m->md.pv_list), 4456 ("pmap_zero_page_gen: page has mappings")); 4457 4458 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 4459 4460 sched_pin(); 4461 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4462 mtx_lock(&czp->lock); 4463 4464 /* 4465 * Hook in the page, zero it. 4466 */ 4467 *czp->dstptep = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF; 4468 pmap_set_prot(czp->dstptep, VM_PROT_WRITE, 0); 4469 PTE_SYNC(czp->dstptep); 4470 cpu_tlb_flushD_SE(czp->dstva); 4471 cpu_cpwait(); 4472 4473 if (off || size != PAGE_SIZE) 4474 bzero((void *)(czp->dstva + off), size); 4475 else 4476 bzero_page(czp->dstva); 4477 4478 /* 4479 * Although aliasing is not possible, if we use temporary mappings with 4480 * memory that will be mapped later as non-cached or with write-through 4481 * caches, we might end up overwriting it when calling wbinv_all. So 4482 * make sure caches are clean after the operation. 4483 */ 4484 cpu_idcache_wbinv_range(czp->dstva, size); 4485 pmap_l2cache_wbinv_range(czp->dstva, phys, size); 4486 4487 mtx_unlock(&czp->lock); 4488 sched_unpin(); 4489} 4490 4491/* 4492 * pmap_zero_page zeros the specified hardware page by mapping 4493 * the page into KVM and using bzero to clear its contents. 4494 */ 4495void 4496pmap_zero_page(vm_page_t m) 4497{ 4498 pmap_zero_page_gen(m, 0, PAGE_SIZE); 4499} 4500 4501 4502/* 4503 * pmap_zero_page_area zeros the specified hardware page by mapping 4504 * the page into KVM and using bzero to clear its contents. 4505 * 4506 * off and size may not cover an area beyond a single hardware page. 4507 */ 4508void 4509pmap_zero_page_area(vm_page_t m, int off, int size) 4510{ 4511 4512 pmap_zero_page_gen(m, off, size); 4513} 4514 4515 4516/* 4517 * pmap_zero_page_idle zeros the specified hardware page by mapping 4518 * the page into KVM and using bzero to clear its contents. This 4519 * is intended to be called from the vm_pagezero process only and 4520 * outside of Giant. 4521 */ 4522void 4523pmap_zero_page_idle(vm_page_t m) 4524{ 4525 4526 pmap_zero_page(m); 4527} 4528 4529/* 4530 * pmap_copy_page copies the specified (machine independent) 4531 * page by mapping the page into virtual memory and using 4532 * bcopy to copy the page, one machine dependent page at a 4533 * time. 4534 */ 4535 4536/* 4537 * pmap_copy_page() 4538 * 4539 * Copy one physical page into another, by mapping the pages into 4540 * hook points. The same comment regarding cachability as in 4541 * pmap_zero_page also applies here. 4542 */ 4543void 4544pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst) 4545{ 4546 struct czpages *czp; 4547 4548 sched_pin(); 4549 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4550 mtx_lock(&czp->lock); 4551 4552 /* 4553 * Map the pages into the page hook points, copy them, and purge the 4554 * cache for the appropriate page. 4555 */ 4556 *czp->srcptep = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF; 4557 pmap_set_prot(czp->srcptep, VM_PROT_READ, 0); 4558 PTE_SYNC(czp->srcptep); 4559 cpu_tlb_flushD_SE(czp->srcva); 4560 *czp->dstptep = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF; 4561 pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0); 4562 PTE_SYNC(czp->dstptep); 4563 cpu_tlb_flushD_SE(czp->dstva); 4564 cpu_cpwait(); 4565 4566 bcopy_page(czp->srcva, czp->dstva); 4567 4568 /* 4569 * Although aliasing is not possible, if we use temporary mappings with 4570 * memory that will be mapped later as non-cached or with write-through 4571 * caches, we might end up overwriting it when calling wbinv_all. So 4572 * make sure caches are clean after the operation. 4573 */ 4574 cpu_idcache_wbinv_range(czp->dstva, PAGE_SIZE); 4575 pmap_l2cache_wbinv_range(czp->dstva, dst, PAGE_SIZE); 4576 4577 mtx_unlock(&czp->lock); 4578 sched_unpin(); 4579} 4580 4581int unmapped_buf_allowed = 1; 4582 4583void 4584pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 4585 vm_offset_t b_offset, int xfersize) 4586{ 4587 vm_page_t a_pg, b_pg; 4588 vm_offset_t a_pg_offset, b_pg_offset; 4589 int cnt; 4590 struct czpages *czp; 4591 4592 sched_pin(); 4593 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4594 mtx_lock(&czp->lock); 4595 4596 while (xfersize > 0) { 4597 a_pg = ma[a_offset >> PAGE_SHIFT]; 4598 a_pg_offset = a_offset & PAGE_MASK; 4599 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 4600 b_pg = mb[b_offset >> PAGE_SHIFT]; 4601 b_pg_offset = b_offset & PAGE_MASK; 4602 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 4603 *czp->srcptep = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) | 4604 pte_l2_s_cache_mode | L2_S_REF; 4605 pmap_set_prot(czp->srcptep, VM_PROT_READ, 0); 4606 PTE_SYNC(czp->srcptep); 4607 cpu_tlb_flushD_SE(czp->srcva); 4608 *czp->dstptep = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) | 4609 pte_l2_s_cache_mode | L2_S_REF; 4610 pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0); 4611 PTE_SYNC(czp->dstptep); 4612 cpu_tlb_flushD_SE(czp->dstva); 4613 cpu_cpwait(); 4614 bcopy((char *)czp->srcva + a_pg_offset, (char *)czp->dstva + b_pg_offset, 4615 cnt); 4616 cpu_idcache_wbinv_range(czp->dstva + b_pg_offset, cnt); 4617 pmap_l2cache_wbinv_range(czp->dstva + b_pg_offset, 4618 VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt); 4619 xfersize -= cnt; 4620 a_offset += cnt; 4621 b_offset += cnt; 4622 } 4623 4624 mtx_unlock(&czp->lock); 4625 sched_unpin(); 4626} 4627 4628void 4629pmap_copy_page(vm_page_t src, vm_page_t dst) 4630{ 4631 4632 if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size && 4633 _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst), 4634 (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0) 4635 return; 4636 4637 pmap_copy_page_generic(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 4638} 4639 4640/* 4641 * this routine returns true if a physical page resides 4642 * in the given pmap. 4643 */ 4644boolean_t 4645pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 4646{ 4647 struct md_page *pvh; 4648 pv_entry_t pv; 4649 int loops = 0; 4650 boolean_t rv; 4651 4652 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4653 ("pmap_page_exists_quick: page %p is not managed", m)); 4654 rv = FALSE; 4655 rw_wlock(&pvh_global_lock); 4656 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4657 if (PV_PMAP(pv) == pmap) { 4658 rv = TRUE; 4659 break; 4660 } 4661 loops++; 4662 if (loops >= 16) 4663 break; 4664 } 4665 if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 4666 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4667 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4668 if (PV_PMAP(pv) == pmap) { 4669 rv = TRUE; 4670 break; 4671 } 4672 loops++; 4673 if (loops >= 16) 4674 break; 4675 } 4676 } 4677 rw_wunlock(&pvh_global_lock); 4678 return (rv); 4679} 4680 4681/* 4682 * pmap_page_wired_mappings: 4683 * 4684 * Return the number of managed mappings to the given physical page 4685 * that are wired. 4686 */ 4687int 4688pmap_page_wired_mappings(vm_page_t m) 4689{ 4690 int count; 4691 4692 count = 0; 4693 if ((m->oflags & VPO_UNMANAGED) != 0) 4694 return (count); 4695 rw_wlock(&pvh_global_lock); 4696 count = pmap_pvh_wired_mappings(&m->md, count); 4697 if ((m->flags & PG_FICTITIOUS) == 0) { 4698 count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), 4699 count); 4700 } 4701 rw_wunlock(&pvh_global_lock); 4702 return (count); 4703} 4704 4705/* 4706 * pmap_pvh_wired_mappings: 4707 * 4708 * Return the updated number "count" of managed mappings that are wired. 4709 */ 4710static int 4711pmap_pvh_wired_mappings(struct md_page *pvh, int count) 4712{ 4713 pv_entry_t pv; 4714 4715 rw_assert(&pvh_global_lock, RA_WLOCKED); 4716 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4717 if ((pv->pv_flags & PVF_WIRED) != 0) 4718 count++; 4719 } 4720 return (count); 4721} 4722 4723/* 4724 * Returns TRUE if any of the given mappings were referenced and FALSE 4725 * otherwise. Both page and section mappings are supported. 4726 */ 4727static boolean_t 4728pmap_is_referenced_pvh(struct md_page *pvh) 4729{ 4730 struct l2_bucket *l2b; 4731 pv_entry_t pv; 4732 pd_entry_t *pl1pd; 4733 pt_entry_t *ptep; 4734 pmap_t pmap; 4735 boolean_t rv; 4736 4737 rw_assert(&pvh_global_lock, RA_WLOCKED); 4738 rv = FALSE; 4739 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4740 pmap = PV_PMAP(pv); 4741 PMAP_LOCK(pmap); 4742 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 4743 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4744 rv = L1_S_REFERENCED(*pl1pd); 4745 else { 4746 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 4747 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 4748 rv = L2_S_REFERENCED(*ptep); 4749 } 4750 PMAP_UNLOCK(pmap); 4751 if (rv) 4752 break; 4753 } 4754 return (rv); 4755} 4756 4757/* 4758 * pmap_is_referenced: 4759 * 4760 * Return whether or not the specified physical page was referenced 4761 * in any physical maps. 4762 */ 4763boolean_t 4764pmap_is_referenced(vm_page_t m) 4765{ 4766 boolean_t rv; 4767 4768 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4769 ("pmap_is_referenced: page %p is not managed", m)); 4770 rw_wlock(&pvh_global_lock); 4771 rv = pmap_is_referenced_pvh(&m->md) || 4772 ((m->flags & PG_FICTITIOUS) == 0 && 4773 pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4774 rw_wunlock(&pvh_global_lock); 4775 return (rv); 4776} 4777 4778/* 4779 * pmap_ts_referenced: 4780 * 4781 * Return the count of reference bits for a page, clearing all of them. 4782 */ 4783int 4784pmap_ts_referenced(vm_page_t m) 4785{ 4786 4787 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4788 ("pmap_ts_referenced: page %p is not managed", m)); 4789 return (pmap_clearbit(m, PVF_REF)); 4790} 4791 4792/* 4793 * Returns TRUE if any of the given mappings were used to modify 4794 * physical memory. Otherwise, returns FALSE. Both page and 1MB section 4795 * mappings are supported. 4796 */ 4797static boolean_t 4798pmap_is_modified_pvh(struct md_page *pvh) 4799{ 4800 pd_entry_t *pl1pd; 4801 struct l2_bucket *l2b; 4802 pv_entry_t pv; 4803 pt_entry_t *ptep; 4804 pmap_t pmap; 4805 boolean_t rv; 4806 4807 rw_assert(&pvh_global_lock, RA_WLOCKED); 4808 rv = FALSE; 4809 4810 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4811 pmap = PV_PMAP(pv); 4812 PMAP_LOCK(pmap); 4813 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 4814 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4815 rv = L1_S_WRITABLE(*pl1pd); 4816 else { 4817 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 4818 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 4819 rv = L2_S_WRITABLE(*ptep); 4820 } 4821 PMAP_UNLOCK(pmap); 4822 if (rv) 4823 break; 4824 } 4825 4826 return (rv); 4827} 4828 4829boolean_t 4830pmap_is_modified(vm_page_t m) 4831{ 4832 boolean_t rv; 4833 4834 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4835 ("pmap_is_modified: page %p is not managed", m)); 4836 /* 4837 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 4838 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 4839 * is clear, no PTEs can have APX cleared. 4840 */ 4841 VM_OBJECT_ASSERT_WLOCKED(m->object); 4842 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 4843 return (FALSE); 4844 rw_wlock(&pvh_global_lock); 4845 rv = pmap_is_modified_pvh(&m->md) || 4846 ((m->flags & PG_FICTITIOUS) == 0 && 4847 pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4848 rw_wunlock(&pvh_global_lock); 4849 return (rv); 4850} 4851 4852/* 4853 * Apply the given advice to the specified range of addresses within the 4854 * given pmap. Depending on the advice, clear the referenced and/or 4855 * modified flags in each mapping. 4856 */ 4857void 4858pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4859{ 4860 struct l2_bucket *l2b; 4861 struct pv_entry *pve; 4862 pd_entry_t *pl1pd, l1pd; 4863 pt_entry_t *ptep, opte, pte; 4864 vm_offset_t next_bucket; 4865 vm_page_t m; 4866 4867 if (advice != MADV_DONTNEED && advice != MADV_FREE) 4868 return; 4869 rw_wlock(&pvh_global_lock); 4870 PMAP_LOCK(pmap); 4871 for (; sva < eva; sva = next_bucket) { 4872 next_bucket = L2_NEXT_BUCKET(sva); 4873 if (next_bucket < sva) 4874 next_bucket = eva; 4875 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 4876 l1pd = *pl1pd; 4877 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 4878 if (pmap == pmap_kernel()) 4879 continue; 4880 if (!pmap_demote_section(pmap, sva)) { 4881 /* 4882 * The large page mapping was destroyed. 4883 */ 4884 continue; 4885 } 4886 /* 4887 * Unless the page mappings are wired, remove the 4888 * mapping to a single page so that a subsequent 4889 * access may repromote. Since the underlying 4890 * l2_bucket is fully populated, this removal 4891 * never frees an entire l2_bucket. 4892 */ 4893 l2b = pmap_get_l2_bucket(pmap, sva); 4894 KASSERT(l2b != NULL, 4895 ("pmap_advise: no l2 bucket for " 4896 "va 0x%#x, pmap 0x%p", sva, pmap)); 4897 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4898 opte = *ptep; 4899 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 4900 KASSERT(m != NULL, 4901 ("pmap_advise: no vm_page for demoted superpage")); 4902 pve = pmap_find_pv(&m->md, pmap, sva); 4903 KASSERT(pve != NULL, 4904 ("pmap_advise: no PV entry for managed mapping")); 4905 if ((pve->pv_flags & PVF_WIRED) == 0) { 4906 pmap_free_l2_bucket(pmap, l2b, 1); 4907 pve = pmap_remove_pv(m, pmap, sva); 4908 pmap_free_pv_entry(pmap, pve); 4909 *ptep = 0; 4910 PTE_SYNC(ptep); 4911 if (pmap_is_current(pmap)) { 4912 if (PTE_BEEN_EXECD(opte)) 4913 cpu_tlb_flushID_SE(sva); 4914 else if (PTE_BEEN_REFD(opte)) 4915 cpu_tlb_flushD_SE(sva); 4916 } 4917 } 4918 } 4919 if (next_bucket > eva) 4920 next_bucket = eva; 4921 l2b = pmap_get_l2_bucket(pmap, sva); 4922 if (l2b == NULL) 4923 continue; 4924 for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4925 sva != next_bucket; ptep++, sva += PAGE_SIZE) { 4926 opte = pte = *ptep; 4927 if ((opte & L2_S_PROTO) == 0) 4928 continue; 4929 m = PHYS_TO_VM_PAGE(l2pte_pa(opte)); 4930 if (m == NULL || (m->oflags & VPO_UNMANAGED) != 0) 4931 continue; 4932 else if (L2_S_WRITABLE(opte)) { 4933 if (advice == MADV_DONTNEED) { 4934 /* 4935 * Don't need to mark the page 4936 * dirty as it was already marked as 4937 * such in pmap_fault_fixup() or 4938 * pmap_enter_locked(). 4939 * Just clear the state. 4940 */ 4941 } else 4942 pte |= L2_APX; 4943 4944 pte &= ~L2_S_REF; 4945 *ptep = pte; 4946 PTE_SYNC(ptep); 4947 } else if (L2_S_REFERENCED(opte)) { 4948 pte &= ~L2_S_REF; 4949 *ptep = pte; 4950 PTE_SYNC(ptep); 4951 } else 4952 continue; 4953 if (pmap_is_current(pmap)) { 4954 if (PTE_BEEN_EXECD(opte)) 4955 cpu_tlb_flushID_SE(sva); 4956 else if (PTE_BEEN_REFD(opte)) 4957 cpu_tlb_flushD_SE(sva); 4958 } 4959 } 4960 } 4961 cpu_cpwait(); 4962 rw_wunlock(&pvh_global_lock); 4963 PMAP_UNLOCK(pmap); 4964} 4965 4966/* 4967 * Clear the modify bits on the specified physical page. 4968 */ 4969void 4970pmap_clear_modify(vm_page_t m) 4971{ 4972 4973 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4974 ("pmap_clear_modify: page %p is not managed", m)); 4975 VM_OBJECT_ASSERT_WLOCKED(m->object); 4976 KASSERT(!vm_page_xbusied(m), 4977 ("pmap_clear_modify: page %p is exclusive busied", m)); 4978 4979 /* 4980 * If the page is not PGA_WRITEABLE, then no mappings can be modified. 4981 * If the object containing the page is locked and the page is not 4982 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4983 */ 4984 if ((m->aflags & PGA_WRITEABLE) == 0) 4985 return; 4986 if (pmap_is_modified(m)) 4987 pmap_clearbit(m, PVF_MOD); 4988} 4989 4990 4991/* 4992 * Clear the write and modified bits in each of the given page's mappings. 4993 */ 4994void 4995pmap_remove_write(vm_page_t m) 4996{ 4997 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4998 ("pmap_remove_write: page %p is not managed", m)); 4999 5000 /* 5001 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 5002 * set by another thread while the object is locked. Thus, 5003 * if PGA_WRITEABLE is clear, no page table entries need updating. 5004 */ 5005 VM_OBJECT_ASSERT_WLOCKED(m->object); 5006 if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) 5007 pmap_clearbit(m, PVF_WRITE); 5008} 5009 5010 5011/* 5012 * perform the pmap work for mincore 5013 */ 5014int 5015pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 5016{ 5017 struct l2_bucket *l2b; 5018 pd_entry_t *pl1pd, l1pd; 5019 pt_entry_t *ptep, pte; 5020 vm_paddr_t pa; 5021 vm_page_t m; 5022 int val; 5023 boolean_t managed; 5024 5025 PMAP_LOCK(pmap); 5026retry: 5027 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(addr)]; 5028 l1pd = *pl1pd; 5029 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 5030 pa = (l1pd & L1_S_FRAME); 5031 val = MINCORE_SUPER | MINCORE_INCORE; 5032 if (L1_S_WRITABLE(l1pd)) 5033 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5034 managed = FALSE; 5035 m = PHYS_TO_VM_PAGE(pa); 5036 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) 5037 managed = TRUE; 5038 if (managed) { 5039 if (L1_S_REFERENCED(l1pd)) 5040 val |= MINCORE_REFERENCED | 5041 MINCORE_REFERENCED_OTHER; 5042 } 5043 } else { 5044 l2b = pmap_get_l2_bucket(pmap, addr); 5045 if (l2b == NULL) { 5046 val = 0; 5047 goto out; 5048 } 5049 ptep = &l2b->l2b_kva[l2pte_index(addr)]; 5050 pte = *ptep; 5051 if (!l2pte_valid(pte)) { 5052 val = 0; 5053 goto out; 5054 } 5055 val = MINCORE_INCORE; 5056 if (L2_S_WRITABLE(pte)) 5057 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5058 managed = FALSE; 5059 pa = l2pte_pa(pte); 5060 m = PHYS_TO_VM_PAGE(pa); 5061 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) 5062 managed = TRUE; 5063 if (managed) { 5064 if (L2_S_REFERENCED(pte)) 5065 val |= MINCORE_REFERENCED | 5066 MINCORE_REFERENCED_OTHER; 5067 } 5068 } 5069 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 5070 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 5071 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 5072 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 5073 goto retry; 5074 } else 5075out: 5076 PA_UNLOCK_COND(*locked_pa); 5077 PMAP_UNLOCK(pmap); 5078 return (val); 5079} 5080 5081void 5082pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 5083{ 5084} 5085 5086/* 5087 * Increase the starting virtual address of the given mapping if a 5088 * different alignment might result in more superpage mappings. 5089 */ 5090void 5091pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 5092 vm_offset_t *addr, vm_size_t size) 5093{ 5094 vm_offset_t superpage_offset; 5095 5096 if (size < NBPDR) 5097 return; 5098 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 5099 offset += ptoa(object->pg_color); 5100 superpage_offset = offset & PDRMASK; 5101 if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 5102 (*addr & PDRMASK) == superpage_offset) 5103 return; 5104 if ((*addr & PDRMASK) < superpage_offset) 5105 *addr = (*addr & ~PDRMASK) + superpage_offset; 5106 else 5107 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 5108} 5109 5110/* 5111 * pmap_map_section: 5112 * 5113 * Create a single section mapping. 5114 */ 5115void 5116pmap_map_section(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot, 5117 boolean_t ref) 5118{ 5119 pd_entry_t *pl1pd, l1pd; 5120 pd_entry_t fl; 5121 5122 KASSERT(((va | pa) & L1_S_OFFSET) == 0, 5123 ("Not a valid section mapping")); 5124 5125 fl = pte_l1_s_cache_mode; 5126 5127 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 5128 l1pd = L1_S_PROTO | pa | L1_S_PROT(PTE_USER, prot) | fl | 5129 L1_S_DOM(pmap->pm_domain); 5130 5131 /* Mark page referenced if this section is a result of a promotion. */ 5132 if (ref == TRUE) 5133 l1pd |= L1_S_REF; 5134#ifdef SMP 5135 l1pd |= L1_SHARED; 5136#endif 5137 *pl1pd = l1pd; 5138 PTE_SYNC(pl1pd); 5139} 5140 5141/* 5142 * pmap_link_l2pt: 5143 * 5144 * Link the L2 page table specified by l2pv.pv_pa into the L1 5145 * page table at the slot for "va". 5146 */ 5147void 5148pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv) 5149{ 5150 pd_entry_t *pde = (pd_entry_t *) l1pt, proto; 5151 u_int slot = va >> L1_S_SHIFT; 5152 5153 proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO; 5154 5155#ifdef VERBOSE_INIT_ARM 5156 printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va); 5157#endif 5158 5159 pde[slot + 0] = proto | (l2pv->pv_pa + 0x000); 5160 PTE_SYNC(&pde[slot]); 5161 5162 SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list); 5163 5164} 5165 5166/* 5167 * pmap_map_entry 5168 * 5169 * Create a single page mapping. 5170 */ 5171void 5172pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, 5173 int cache) 5174{ 5175 pd_entry_t *pde = (pd_entry_t *) l1pt; 5176 pt_entry_t fl; 5177 pt_entry_t *ptep; 5178 5179 KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin")); 5180 5181 fl = l2s_mem_types[cache]; 5182 5183 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) 5184 panic("pmap_map_entry: no L2 table for VA 0x%08x", va); 5185 5186 ptep = (pt_entry_t *)kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK); 5187 5188 if (ptep == NULL) 5189 panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va); 5190 5191 ptep[l2pte_index(va)] = L2_S_PROTO | pa | fl | L2_S_REF; 5192 pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); 5193 PTE_SYNC(&ptep[l2pte_index(va)]); 5194} 5195 5196/* 5197 * pmap_map_chunk: 5198 * 5199 * Map a chunk of memory using the most efficient mappings 5200 * possible (section. large page, small page) into the 5201 * provided L1 and L2 tables at the specified virtual address. 5202 */ 5203vm_size_t 5204pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, 5205 vm_size_t size, int prot, int type) 5206{ 5207 pd_entry_t *pde = (pd_entry_t *) l1pt; 5208 pt_entry_t *ptep, f1, f2s, f2l; 5209 vm_size_t resid; 5210 int i; 5211 5212 resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); 5213 5214 if (l1pt == 0) 5215 panic("pmap_map_chunk: no L1 table provided"); 5216 5217#ifdef VERBOSE_INIT_ARM 5218 printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x " 5219 "prot=0x%x type=%d\n", pa, va, size, resid, prot, type); 5220#endif 5221 5222 f1 = l1_mem_types[type]; 5223 f2l = l2l_mem_types[type]; 5224 f2s = l2s_mem_types[type]; 5225 5226 size = resid; 5227 5228 while (resid > 0) { 5229 /* See if we can use a section mapping. */ 5230 if (L1_S_MAPPABLE_P(va, pa, resid)) { 5231#ifdef VERBOSE_INIT_ARM 5232 printf("S"); 5233#endif 5234 pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | 5235 L1_S_PROT(PTE_KERNEL, prot | VM_PROT_EXECUTE) | 5236 f1 | L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_S_REF; 5237 PTE_SYNC(&pde[va >> L1_S_SHIFT]); 5238 va += L1_S_SIZE; 5239 pa += L1_S_SIZE; 5240 resid -= L1_S_SIZE; 5241 continue; 5242 } 5243 5244 /* 5245 * Ok, we're going to use an L2 table. Make sure 5246 * one is actually in the corresponding L1 slot 5247 * for the current VA. 5248 */ 5249 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) 5250 panic("pmap_map_chunk: no L2 table for VA 0x%08x", va); 5251 5252 ptep = (pt_entry_t *) kernel_pt_lookup( 5253 pde[L1_IDX(va)] & L1_C_ADDR_MASK); 5254 if (ptep == NULL) 5255 panic("pmap_map_chunk: can't find L2 table for VA" 5256 "0x%08x", va); 5257 /* See if we can use a L2 large page mapping. */ 5258 if (L2_L_MAPPABLE_P(va, pa, resid)) { 5259#ifdef VERBOSE_INIT_ARM 5260 printf("L"); 5261#endif 5262 for (i = 0; i < 16; i++) { 5263 ptep[l2pte_index(va) + i] = 5264 L2_L_PROTO | pa | 5265 L2_L_PROT(PTE_KERNEL, prot) | f2l; 5266 PTE_SYNC(&ptep[l2pte_index(va) + i]); 5267 } 5268 va += L2_L_SIZE; 5269 pa += L2_L_SIZE; 5270 resid -= L2_L_SIZE; 5271 continue; 5272 } 5273 5274 /* Use a small page mapping. */ 5275#ifdef VERBOSE_INIT_ARM 5276 printf("P"); 5277#endif 5278 ptep[l2pte_index(va)] = L2_S_PROTO | pa | f2s | L2_S_REF; 5279 pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); 5280 PTE_SYNC(&ptep[l2pte_index(va)]); 5281 va += PAGE_SIZE; 5282 pa += PAGE_SIZE; 5283 resid -= PAGE_SIZE; 5284 } 5285#ifdef VERBOSE_INIT_ARM 5286 printf("\n"); 5287#endif 5288 return (size); 5289 5290} 5291 5292int 5293pmap_dmap_iscurrent(pmap_t pmap) 5294{ 5295 return(pmap_is_current(pmap)); 5296} 5297 5298void 5299pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 5300{ 5301 /* 5302 * Remember the memattr in a field that gets used to set the appropriate 5303 * bits in the PTEs as mappings are established. 5304 */ 5305 m->md.pv_memattr = ma; 5306 5307 /* 5308 * It appears that this function can only be called before any mappings 5309 * for the page are established on ARM. If this ever changes, this code 5310 * will need to walk the pv_list and make each of the existing mappings 5311 * uncacheable, being careful to sync caches and PTEs (and maybe 5312 * invalidate TLB?) for any current mapping it modifies. 5313 */ 5314 if (TAILQ_FIRST(&m->md.pv_list) != NULL) 5315 panic("Can't change memattr on page with existing mappings"); 5316} 5317