pmap-v6.c revision 324400
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * Copyright (c) 1994 John S. Dyson 4 * Copyright (c) 1994 David Greenman 5 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 6 * Copyright (c) 2014-2016 Svatopluk Kraus <skra@FreeBSD.org> 7 * Copyright (c) 2014-2016 Michal Meloun <mmel@FreeBSD.org> 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * the Systems Programming Group of the University of Utah Computer 12 * Science Department and William Jolitz of UUNET Technologies Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 39 */ 40/*- 41 * Copyright (c) 2003 Networks Associates Technology, Inc. 42 * All rights reserved. 43 * 44 * This software was developed for the FreeBSD Project by Jake Burkholder, 45 * Safeport Network Services, and Network Associates Laboratories, the 46 * Security Research Division of Network Associates, Inc. under 47 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 48 * CHATS research program. 49 * 50 * Redistribution and use in source and binary forms, with or without 51 * modification, are permitted provided that the following conditions 52 * are met: 53 * 1. Redistributions of source code must retain the above copyright 54 * notice, this list of conditions and the following disclaimer. 55 * 2. Redistributions in binary form must reproduce the above copyright 56 * notice, this list of conditions and the following disclaimer in the 57 * documentation and/or other materials provided with the distribution. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 */ 71 72#include <sys/cdefs.h> 73__FBSDID("$FreeBSD: stable/11/sys/arm/arm/pmap-v6.c 324400 2017-10-07 21:13:54Z alc $"); 74 75/* 76 * Manages physical address maps. 77 * 78 * Since the information managed by this module is 79 * also stored by the logical address mapping module, 80 * this module may throw away valid virtual-to-physical 81 * mappings at almost any time. However, invalidations 82 * of virtual-to-physical mappings must be done as 83 * requested. 84 * 85 * In order to cope with hardware architectures which 86 * make virtual-to-physical map invalidates expensive, 87 * this module may delay invalidate or reduced protection 88 * operations until such time as they are actually 89 * necessary. This module is given full information as 90 * to which processors are currently using which maps, 91 * and to when physical maps must be made correct. 92 */ 93 94#include "opt_vm.h" 95#include "opt_pmap.h" 96#include "opt_ddb.h" 97 98#include <sys/param.h> 99#include <sys/systm.h> 100#include <sys/kernel.h> 101#include <sys/ktr.h> 102#include <sys/lock.h> 103#include <sys/proc.h> 104#include <sys/rwlock.h> 105#include <sys/malloc.h> 106#include <sys/vmmeter.h> 107#include <sys/malloc.h> 108#include <sys/mman.h> 109#include <sys/sf_buf.h> 110#include <sys/smp.h> 111#include <sys/sched.h> 112#include <sys/sysctl.h> 113 114#ifdef DDB 115#include <ddb/ddb.h> 116#endif 117 118#include <machine/physmem.h> 119 120#include <vm/vm.h> 121#include <vm/uma.h> 122#include <vm/pmap.h> 123#include <vm/vm_param.h> 124#include <vm/vm_kern.h> 125#include <vm/vm_object.h> 126#include <vm/vm_map.h> 127#include <vm/vm_page.h> 128#include <vm/vm_pageout.h> 129#include <vm/vm_phys.h> 130#include <vm/vm_extern.h> 131#include <vm/vm_reserv.h> 132#include <sys/lock.h> 133#include <sys/mutex.h> 134 135#include <machine/md_var.h> 136#include <machine/pmap_var.h> 137#include <machine/cpu.h> 138#include <machine/pcb.h> 139#include <machine/sf_buf.h> 140#ifdef SMP 141#include <machine/smp.h> 142#endif 143 144#ifndef PMAP_SHPGPERPROC 145#define PMAP_SHPGPERPROC 200 146#endif 147 148#ifndef DIAGNOSTIC 149#define PMAP_INLINE __inline 150#else 151#define PMAP_INLINE 152#endif 153 154#ifdef PMAP_DEBUG 155static void pmap_zero_page_check(vm_page_t m); 156void pmap_debug(int level); 157int pmap_pid_dump(int pid); 158 159#define PDEBUG(_lev_,_stat_) \ 160 if (pmap_debug_level >= (_lev_)) \ 161 ((_stat_)) 162#define dprintf printf 163int pmap_debug_level = 1; 164#else /* PMAP_DEBUG */ 165#define PDEBUG(_lev_,_stat_) /* Nothing */ 166#define dprintf(x, arg...) 167#endif /* PMAP_DEBUG */ 168 169/* 170 * Level 2 page tables map definion ('max' is excluded). 171 */ 172 173#define PT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) 174#define PT2V_MAX_ADDRESS ((vm_offset_t)PT2MAP + PT2MAP_SIZE) 175 176#define UPT2V_MIN_ADDRESS ((vm_offset_t)PT2MAP) 177#define UPT2V_MAX_ADDRESS \ 178 ((vm_offset_t)(PT2MAP + (KERNBASE >> PT2MAP_SHIFT))) 179 180/* 181 * Promotion to a 1MB (PTE1) page mapping requires that the corresponding 182 * 4KB (PTE2) page mappings have identical settings for the following fields: 183 */ 184#define PTE2_PROMOTE (PTE2_V | PTE2_A | PTE2_NM | PTE2_S | PTE2_NG | \ 185 PTE2_NX | PTE2_RO | PTE2_U | PTE2_W | \ 186 PTE2_ATTR_MASK) 187 188#define PTE1_PROMOTE (PTE1_V | PTE1_A | PTE1_NM | PTE1_S | PTE1_NG | \ 189 PTE1_NX | PTE1_RO | PTE1_U | PTE1_W | \ 190 PTE1_ATTR_MASK) 191 192#define ATTR_TO_L1(l2_attr) ((((l2_attr) & L2_TEX0) ? L1_S_TEX0 : 0) | \ 193 (((l2_attr) & L2_C) ? L1_S_C : 0) | \ 194 (((l2_attr) & L2_B) ? L1_S_B : 0) | \ 195 (((l2_attr) & PTE2_A) ? PTE1_A : 0) | \ 196 (((l2_attr) & PTE2_NM) ? PTE1_NM : 0) | \ 197 (((l2_attr) & PTE2_S) ? PTE1_S : 0) | \ 198 (((l2_attr) & PTE2_NG) ? PTE1_NG : 0) | \ 199 (((l2_attr) & PTE2_NX) ? PTE1_NX : 0) | \ 200 (((l2_attr) & PTE2_RO) ? PTE1_RO : 0) | \ 201 (((l2_attr) & PTE2_U) ? PTE1_U : 0) | \ 202 (((l2_attr) & PTE2_W) ? PTE1_W : 0)) 203 204#define ATTR_TO_L2(l1_attr) ((((l1_attr) & L1_S_TEX0) ? L2_TEX0 : 0) | \ 205 (((l1_attr) & L1_S_C) ? L2_C : 0) | \ 206 (((l1_attr) & L1_S_B) ? L2_B : 0) | \ 207 (((l1_attr) & PTE1_A) ? PTE2_A : 0) | \ 208 (((l1_attr) & PTE1_NM) ? PTE2_NM : 0) | \ 209 (((l1_attr) & PTE1_S) ? PTE2_S : 0) | \ 210 (((l1_attr) & PTE1_NG) ? PTE2_NG : 0) | \ 211 (((l1_attr) & PTE1_NX) ? PTE2_NX : 0) | \ 212 (((l1_attr) & PTE1_RO) ? PTE2_RO : 0) | \ 213 (((l1_attr) & PTE1_U) ? PTE2_U : 0) | \ 214 (((l1_attr) & PTE1_W) ? PTE2_W : 0)) 215 216/* 217 * PTE2 descriptors creation macros. 218 */ 219#define PTE2_ATTR_DEFAULT vm_memattr_to_pte2(VM_MEMATTR_DEFAULT) 220#define PTE2_ATTR_PT vm_memattr_to_pte2(pt_memattr) 221 222#define PTE2_KPT(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_PT) 223#define PTE2_KPT_NG(pa) PTE2_KERN_NG(pa, PTE2_AP_KRW, PTE2_ATTR_PT) 224 225#define PTE2_KRW(pa) PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT) 226#define PTE2_KRO(pa) PTE2_KERN(pa, PTE2_AP_KR, PTE2_ATTR_DEFAULT) 227 228#define PV_STATS 229#ifdef PV_STATS 230#define PV_STAT(x) do { x ; } while (0) 231#else 232#define PV_STAT(x) do { } while (0) 233#endif 234 235/* 236 * The boot_pt1 is used temporary in very early boot stage as L1 page table. 237 * We can init many things with no memory allocation thanks to its static 238 * allocation and this brings two main advantages: 239 * (1) other cores can be started very simply, 240 * (2) various boot loaders can be supported as its arguments can be processed 241 * in virtual address space and can be moved to safe location before 242 * first allocation happened. 243 * Only disadvantage is that boot_pt1 is used only in very early boot stage. 244 * However, the table is uninitialized and so lays in bss. Therefore kernel 245 * image size is not influenced. 246 * 247 * QQQ: In the future, maybe, boot_pt1 can be used for soft reset and 248 * CPU suspend/resume game. 249 */ 250extern pt1_entry_t boot_pt1[]; 251 252vm_paddr_t base_pt1; 253pt1_entry_t *kern_pt1; 254pt2_entry_t *kern_pt2tab; 255pt2_entry_t *PT2MAP; 256 257static uint32_t ttb_flags; 258static vm_memattr_t pt_memattr; 259ttb_entry_t pmap_kern_ttb; 260 261struct pmap kernel_pmap_store; 262LIST_HEAD(pmaplist, pmap); 263static struct pmaplist allpmaps; 264static struct mtx allpmaps_lock; 265 266vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 267vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 268 269static vm_offset_t kernel_vm_end_new; 270vm_offset_t kernel_vm_end = KERNBASE + NKPT2PG * NPT2_IN_PG * PTE1_SIZE; 271vm_offset_t vm_max_kernel_address; 272vm_paddr_t kernel_l1pa; 273 274static struct rwlock __aligned(CACHE_LINE_SIZE) pvh_global_lock; 275 276/* 277 * Data for the pv entry allocation mechanism 278 */ 279static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 280static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 281static struct md_page *pv_table; /* XXX: Is it used only the list in md_page? */ 282static int shpgperproc = PMAP_SHPGPERPROC; 283 284struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 285int pv_maxchunks; /* How many chunks we have KVA for */ 286vm_offset_t pv_vafree; /* freelist stored in the PTE */ 287 288vm_paddr_t first_managed_pa; 289#define pa_to_pvh(pa) (&pv_table[pte1_index(pa - first_managed_pa)]) 290 291/* 292 * All those kernel PT submaps that BSD is so fond of 293 */ 294static pt2_entry_t *CMAP3; 295static caddr_t CADDR3; 296caddr_t _tmppt = 0; 297 298struct msgbuf *msgbufp = NULL; /* XXX move it to machdep.c */ 299 300/* 301 * Crashdump maps. 302 */ 303static caddr_t crashdumpmap; 304 305static pt2_entry_t *PMAP1 = NULL, *PMAP2; 306static pt2_entry_t *PADDR1 = NULL, *PADDR2; 307#ifdef DDB 308static pt2_entry_t *PMAP3; 309static pt2_entry_t *PADDR3; 310static int PMAP3cpu __unused; /* for SMP only */ 311#endif 312#ifdef SMP 313static int PMAP1cpu; 314static int PMAP1changedcpu; 315SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 316 &PMAP1changedcpu, 0, 317 "Number of times pmap_pte2_quick changed CPU with same PMAP1"); 318#endif 319static int PMAP1changed; 320SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 321 &PMAP1changed, 0, 322 "Number of times pmap_pte2_quick changed PMAP1"); 323static int PMAP1unchanged; 324SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 325 &PMAP1unchanged, 0, 326 "Number of times pmap_pte2_quick didn't change PMAP1"); 327static struct mtx PMAP2mutex; 328 329static __inline void pt2_wirecount_init(vm_page_t m); 330static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, 331 vm_offset_t va); 332void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size); 333 334/* 335 * Function to set the debug level of the pmap code. 336 */ 337#ifdef PMAP_DEBUG 338void 339pmap_debug(int level) 340{ 341 342 pmap_debug_level = level; 343 dprintf("pmap_debug: level=%d\n", pmap_debug_level); 344} 345#endif /* PMAP_DEBUG */ 346 347/* 348 * This table must corespond with memory attribute configuration in vm.h. 349 * First entry is used for normal system mapping. 350 * 351 * Device memory is always marked as shared. 352 * Normal memory is shared only in SMP . 353 * Not outer shareable bits are not used yet. 354 * Class 6 cannot be used on ARM11. 355 */ 356#define TEXDEF_TYPE_SHIFT 0 357#define TEXDEF_TYPE_MASK 0x3 358#define TEXDEF_INNER_SHIFT 2 359#define TEXDEF_INNER_MASK 0x3 360#define TEXDEF_OUTER_SHIFT 4 361#define TEXDEF_OUTER_MASK 0x3 362#define TEXDEF_NOS_SHIFT 6 363#define TEXDEF_NOS_MASK 0x1 364 365#define TEX(t, i, o, s) \ 366 ((t) << TEXDEF_TYPE_SHIFT) | \ 367 ((i) << TEXDEF_INNER_SHIFT) | \ 368 ((o) << TEXDEF_OUTER_SHIFT | \ 369 ((s) << TEXDEF_NOS_SHIFT)) 370 371static uint32_t tex_class[8] = { 372/* type inner cache outer cache */ 373 TEX(PRRR_MEM, NMRR_WB_WA, NMRR_WB_WA, 0), /* 0 - ATTR_WB_WA */ 374 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 1 - ATTR_NOCACHE */ 375 TEX(PRRR_DEV, NMRR_NC, NMRR_NC, 0), /* 2 - ATTR_DEVICE */ 376 TEX(PRRR_SO, NMRR_NC, NMRR_NC, 0), /* 3 - ATTR_SO */ 377 TEX(PRRR_MEM, NMRR_WT, NMRR_WT, 0), /* 4 - ATTR_WT */ 378 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 5 - NOT USED YET */ 379 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 6 - NOT USED YET */ 380 TEX(PRRR_MEM, NMRR_NC, NMRR_NC, 0), /* 7 - NOT USED YET */ 381}; 382#undef TEX 383 384static uint32_t pte2_attr_tab[8] = { 385 PTE2_ATTR_WB_WA, /* 0 - VM_MEMATTR_WB_WA */ 386 PTE2_ATTR_NOCACHE, /* 1 - VM_MEMATTR_NOCACHE */ 387 PTE2_ATTR_DEVICE, /* 2 - VM_MEMATTR_DEVICE */ 388 PTE2_ATTR_SO, /* 3 - VM_MEMATTR_SO */ 389 PTE2_ATTR_WT, /* 4 - VM_MEMATTR_WRITE_THROUGH */ 390 0, /* 5 - NOT USED YET */ 391 0, /* 6 - NOT USED YET */ 392 0 /* 7 - NOT USED YET */ 393}; 394CTASSERT(VM_MEMATTR_WB_WA == 0); 395CTASSERT(VM_MEMATTR_NOCACHE == 1); 396CTASSERT(VM_MEMATTR_DEVICE == 2); 397CTASSERT(VM_MEMATTR_SO == 3); 398CTASSERT(VM_MEMATTR_WRITE_THROUGH == 4); 399 400static inline uint32_t 401vm_memattr_to_pte2(vm_memattr_t ma) 402{ 403 404 KASSERT((u_int)ma < 5, ("%s: bad vm_memattr_t %d", __func__, ma)); 405 return (pte2_attr_tab[(u_int)ma]); 406} 407 408static inline uint32_t 409vm_page_pte2_attr(vm_page_t m) 410{ 411 412 return (vm_memattr_to_pte2(m->md.pat_mode)); 413} 414 415/* 416 * Convert TEX definition entry to TTB flags. 417 */ 418static uint32_t 419encode_ttb_flags(int idx) 420{ 421 uint32_t inner, outer, nos, reg; 422 423 inner = (tex_class[idx] >> TEXDEF_INNER_SHIFT) & 424 TEXDEF_INNER_MASK; 425 outer = (tex_class[idx] >> TEXDEF_OUTER_SHIFT) & 426 TEXDEF_OUTER_MASK; 427 nos = (tex_class[idx] >> TEXDEF_NOS_SHIFT) & 428 TEXDEF_NOS_MASK; 429 430 reg = nos << 5; 431 reg |= outer << 3; 432 if (cpuinfo.coherent_walk) 433 reg |= (inner & 0x1) << 6; 434 reg |= (inner & 0x2) >> 1; 435#ifdef SMP 436 reg |= 1 << 1; 437#endif 438 return reg; 439} 440 441/* 442 * Set TEX remapping registers in current CPU. 443 */ 444void 445pmap_set_tex(void) 446{ 447 uint32_t prrr, nmrr; 448 uint32_t type, inner, outer, nos; 449 int i; 450 451#ifdef PMAP_PTE_NOCACHE 452 /* XXX fixme */ 453 if (cpuinfo.coherent_walk) { 454 pt_memattr = VM_MEMATTR_WB_WA; 455 ttb_flags = encode_ttb_flags(0); 456 } 457 else { 458 pt_memattr = VM_MEMATTR_NOCACHE; 459 ttb_flags = encode_ttb_flags(1); 460 } 461#else 462 pt_memattr = VM_MEMATTR_WB_WA; 463 ttb_flags = encode_ttb_flags(0); 464#endif 465 466 prrr = 0; 467 nmrr = 0; 468 469 /* Build remapping register from TEX classes. */ 470 for (i = 0; i < 8; i++) { 471 type = (tex_class[i] >> TEXDEF_TYPE_SHIFT) & 472 TEXDEF_TYPE_MASK; 473 inner = (tex_class[i] >> TEXDEF_INNER_SHIFT) & 474 TEXDEF_INNER_MASK; 475 outer = (tex_class[i] >> TEXDEF_OUTER_SHIFT) & 476 TEXDEF_OUTER_MASK; 477 nos = (tex_class[i] >> TEXDEF_NOS_SHIFT) & 478 TEXDEF_NOS_MASK; 479 480 prrr |= type << (i * 2); 481 prrr |= nos << (i + 24); 482 nmrr |= inner << (i * 2); 483 nmrr |= outer << (i * 2 + 16); 484 } 485 /* Add shareable bits for device memory. */ 486 prrr |= PRRR_DS0 | PRRR_DS1; 487 488 /* Add shareable bits for normal memory in SMP case. */ 489#ifdef SMP 490 prrr |= PRRR_NS1; 491#endif 492 cp15_prrr_set(prrr); 493 cp15_nmrr_set(nmrr); 494 495 /* Caches are disabled, so full TLB flush should be enough. */ 496 tlb_flush_all_local(); 497} 498 499/* 500 * Remap one vm_meattr class to another one. This can be useful as 501 * workaround for SOC errata, e.g. if devices must be accessed using 502 * SO memory class. 503 * 504 * !!! Please note that this function is absolutely last resort thing. 505 * It should not be used under normal circumstances. !!! 506 * 507 * Usage rules: 508 * - it shall be called after pmap_bootstrap_prepare() and before 509 * cpu_mp_start() (thus only on boot CPU). In practice, it's expected 510 * to be called from platform_attach() or platform_late_init(). 511 * 512 * - if remapping doesn't change caching mode, or until uncached class 513 * is remapped to any kind of cached one, then no other restriction exists. 514 * 515 * - if pmap_remap_vm_attr() changes caching mode, but both (original and 516 * remapped) remain cached, then caller is resposible for calling 517 * of dcache_wbinv_poc_all(). 518 * 519 * - remapping of any kind of cached class to uncached is not permitted. 520 */ 521void 522pmap_remap_vm_attr(vm_memattr_t old_attr, vm_memattr_t new_attr) 523{ 524 int old_idx, new_idx; 525 526 /* Map VM memattrs to indexes to tex_class table. */ 527 old_idx = pte2_attr_tab[(int)old_attr]; 528 new_idx = pte2_attr_tab[(int)new_attr]; 529 530 /* Replace TEX attribute and apply it. */ 531 tex_class[old_idx] = tex_class[new_idx]; 532 pmap_set_tex(); 533} 534 535/* 536 * KERNBASE must be multiple of NPT2_IN_PG * PTE1_SIZE. In other words, 537 * KERNBASE is mapped by first L2 page table in L2 page table page. It 538 * meets same constrain due to PT2MAP being placed just under KERNBASE. 539 */ 540CTASSERT((KERNBASE & (NPT2_IN_PG * PTE1_SIZE - 1)) == 0); 541CTASSERT((KERNBASE - VM_MAXUSER_ADDRESS) >= PT2MAP_SIZE); 542 543/* 544 * In crazy dreams, PAGE_SIZE could be a multiple of PTE2_SIZE in general. 545 * For now, anyhow, the following check must be fulfilled. 546 */ 547CTASSERT(PAGE_SIZE == PTE2_SIZE); 548/* 549 * We don't want to mess up MI code with all MMU and PMAP definitions, 550 * so some things, which depend on other ones, are defined independently. 551 * Now, it is time to check that we don't screw up something. 552 */ 553CTASSERT(PDRSHIFT == PTE1_SHIFT); 554/* 555 * Check L1 and L2 page table entries definitions consistency. 556 */ 557CTASSERT(NB_IN_PT1 == (sizeof(pt1_entry_t) * NPTE1_IN_PT1)); 558CTASSERT(NB_IN_PT2 == (sizeof(pt2_entry_t) * NPTE2_IN_PT2)); 559/* 560 * Check L2 page tables page consistency. 561 */ 562CTASSERT(PAGE_SIZE == (NPT2_IN_PG * NB_IN_PT2)); 563CTASSERT((1 << PT2PG_SHIFT) == NPT2_IN_PG); 564/* 565 * Check PT2TAB consistency. 566 * PT2TAB_ENTRIES is defined as a division of NPTE1_IN_PT1 by NPT2_IN_PG. 567 * This should be done without remainder. 568 */ 569CTASSERT(NPTE1_IN_PT1 == (PT2TAB_ENTRIES * NPT2_IN_PG)); 570 571/* 572 * A PT2MAP magic. 573 * 574 * All level 2 page tables (PT2s) are mapped continuously and accordingly 575 * into PT2MAP address space. As PT2 size is less than PAGE_SIZE, this can 576 * be done only if PAGE_SIZE is a multiple of PT2 size. All PT2s in one page 577 * must be used together, but not necessary at once. The first PT2 in a page 578 * must map things on correctly aligned address and the others must follow 579 * in right order. 580 */ 581#define NB_IN_PT2TAB (PT2TAB_ENTRIES * sizeof(pt2_entry_t)) 582#define NPT2_IN_PT2TAB (NB_IN_PT2TAB / NB_IN_PT2) 583#define NPG_IN_PT2TAB (NB_IN_PT2TAB / PAGE_SIZE) 584 585/* 586 * Check PT2TAB consistency. 587 * NPT2_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by NB_IN_PT2. 588 * NPG_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by PAGE_SIZE. 589 * The both should be done without remainder. 590 */ 591CTASSERT(NB_IN_PT2TAB == (NPT2_IN_PT2TAB * NB_IN_PT2)); 592CTASSERT(NB_IN_PT2TAB == (NPG_IN_PT2TAB * PAGE_SIZE)); 593/* 594 * The implementation was made general, however, with the assumption 595 * bellow in mind. In case of another value of NPG_IN_PT2TAB, 596 * the code should be once more rechecked. 597 */ 598CTASSERT(NPG_IN_PT2TAB == 1); 599 600/* 601 * Get offset of PT2 in a page 602 * associated with given PT1 index. 603 */ 604static __inline u_int 605page_pt2off(u_int pt1_idx) 606{ 607 608 return ((pt1_idx & PT2PG_MASK) * NB_IN_PT2); 609} 610 611/* 612 * Get physical address of PT2 613 * associated with given PT2s page and PT1 index. 614 */ 615static __inline vm_paddr_t 616page_pt2pa(vm_paddr_t pgpa, u_int pt1_idx) 617{ 618 619 return (pgpa + page_pt2off(pt1_idx)); 620} 621 622/* 623 * Get first entry of PT2 624 * associated with given PT2s page and PT1 index. 625 */ 626static __inline pt2_entry_t * 627page_pt2(vm_offset_t pgva, u_int pt1_idx) 628{ 629 630 return ((pt2_entry_t *)(pgva + page_pt2off(pt1_idx))); 631} 632 633/* 634 * Get virtual address of PT2s page (mapped in PT2MAP) 635 * which holds PT2 which holds entry which maps given virtual address. 636 */ 637static __inline vm_offset_t 638pt2map_pt2pg(vm_offset_t va) 639{ 640 641 va &= ~(NPT2_IN_PG * PTE1_SIZE - 1); 642 return ((vm_offset_t)pt2map_entry(va)); 643} 644 645/***************************************************************************** 646 * 647 * THREE pmap initialization milestones exist: 648 * 649 * locore.S 650 * -> fundamental init (including MMU) in ASM 651 * 652 * initarm() 653 * -> fundamental init continues in C 654 * -> first available physical address is known 655 * 656 * pmap_bootstrap_prepare() -> FIRST PMAP MILESTONE (first epoch begins) 657 * -> basic (safe) interface for physical address allocation is made 658 * -> basic (safe) interface for virtual mapping is made 659 * -> limited not SMP coherent work is possible 660 * 661 * -> more fundamental init continues in C 662 * -> locks and some more things are available 663 * -> all fundamental allocations and mappings are done 664 * 665 * pmap_bootstrap() -> SECOND PMAP MILESTONE (second epoch begins) 666 * -> phys_avail[] and virtual_avail is set 667 * -> control is passed to vm subsystem 668 * -> physical and virtual address allocation are off limit 669 * -> low level mapping functions, some SMP coherent, 670 * are available, which cannot be used before vm subsystem 671 * is being inited 672 * 673 * mi_startup() 674 * -> vm subsystem is being inited 675 * 676 * pmap_init() -> THIRD PMAP MILESTONE (third epoch begins) 677 * -> pmap is fully inited 678 * 679 *****************************************************************************/ 680 681/***************************************************************************** 682 * 683 * PMAP first stage initialization and utility functions 684 * for pre-bootstrap epoch. 685 * 686 * After pmap_bootstrap_prepare() is called, the following functions 687 * can be used: 688 * 689 * (1) strictly only for this stage functions for physical page allocations, 690 * virtual space allocations, and mappings: 691 * 692 * vm_paddr_t pmap_preboot_get_pages(u_int num); 693 * void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num); 694 * vm_offset_t pmap_preboot_reserve_pages(u_int num); 695 * vm_offset_t pmap_preboot_get_vpages(u_int num); 696 * void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, 697 * vm_prot_t prot, vm_memattr_t attr); 698 * 699 * (2) for all stages: 700 * 701 * vm_paddr_t pmap_kextract(vm_offset_t va); 702 * 703 * NOTE: This is not SMP coherent stage. 704 * 705 *****************************************************************************/ 706 707#define KERNEL_P2V(pa) \ 708 ((vm_offset_t)((pa) - arm_physmem_kernaddr + KERNVIRTADDR)) 709#define KERNEL_V2P(va) \ 710 ((vm_paddr_t)((va) - KERNVIRTADDR + arm_physmem_kernaddr)) 711 712static vm_paddr_t last_paddr; 713 714/* 715 * Pre-bootstrap epoch page allocator. 716 */ 717vm_paddr_t 718pmap_preboot_get_pages(u_int num) 719{ 720 vm_paddr_t ret; 721 722 ret = last_paddr; 723 last_paddr += num * PAGE_SIZE; 724 725 return (ret); 726} 727 728/* 729 * The fundamental initialization of PMAP stuff. 730 * 731 * Some things already happened in locore.S and some things could happen 732 * before pmap_bootstrap_prepare() is called, so let's recall what is done: 733 * 1. Caches are disabled. 734 * 2. We are running on virtual addresses already with 'boot_pt1' 735 * as L1 page table. 736 * 3. So far, all virtual addresses can be converted to physical ones and 737 * vice versa by the following macros: 738 * KERNEL_P2V(pa) .... physical to virtual ones, 739 * KERNEL_V2P(va) .... virtual to physical ones. 740 * 741 * What is done herein: 742 * 1. The 'boot_pt1' is replaced by real kernel L1 page table 'kern_pt1'. 743 * 2. PT2MAP magic is brought to live. 744 * 3. Basic preboot functions for page allocations and mappings can be used. 745 * 4. Everything is prepared for L1 cache enabling. 746 * 747 * Variations: 748 * 1. To use second TTB register, so kernel and users page tables will be 749 * separated. This way process forking - pmap_pinit() - could be faster, 750 * it saves physical pages and KVA per a process, and it's simple change. 751 * However, it will lead, due to hardware matter, to the following: 752 * (a) 2G space for kernel and 2G space for users. 753 * (b) 1G space for kernel in low addresses and 3G for users above it. 754 * A question is: Is the case (b) really an option? Note that case (b) 755 * does save neither physical memory and KVA. 756 */ 757void 758pmap_bootstrap_prepare(vm_paddr_t last) 759{ 760 vm_paddr_t pt2pg_pa, pt2tab_pa, pa, size; 761 vm_offset_t pt2pg_va; 762 pt1_entry_t *pte1p; 763 pt2_entry_t *pte2p; 764 u_int i; 765 uint32_t actlr_mask, actlr_set, l1_attr; 766 767 /* 768 * Now, we are going to make real kernel mapping. Note that we are 769 * already running on some mapping made in locore.S and we expect 770 * that it's large enough to ensure nofault access to physical memory 771 * allocated herein before switch. 772 * 773 * As kernel image and everything needed before are and will be mapped 774 * by section mappings, we align last physical address to PTE1_SIZE. 775 */ 776 last_paddr = pte1_roundup(last); 777 778 /* 779 * Allocate and zero page(s) for kernel L1 page table. 780 * 781 * Note that it's first allocation on space which was PTE1_SIZE 782 * aligned and as such base_pt1 is aligned to NB_IN_PT1 too. 783 */ 784 base_pt1 = pmap_preboot_get_pages(NPG_IN_PT1); 785 kern_pt1 = (pt1_entry_t *)KERNEL_P2V(base_pt1); 786 bzero((void*)kern_pt1, NB_IN_PT1); 787 pte1_sync_range(kern_pt1, NB_IN_PT1); 788 789 /* Allocate and zero page(s) for kernel PT2TAB. */ 790 pt2tab_pa = pmap_preboot_get_pages(NPG_IN_PT2TAB); 791 kern_pt2tab = (pt2_entry_t *)KERNEL_P2V(pt2tab_pa); 792 bzero(kern_pt2tab, NB_IN_PT2TAB); 793 pte2_sync_range(kern_pt2tab, NB_IN_PT2TAB); 794 795 /* Allocate and zero page(s) for kernel L2 page tables. */ 796 pt2pg_pa = pmap_preboot_get_pages(NKPT2PG); 797 pt2pg_va = KERNEL_P2V(pt2pg_pa); 798 size = NKPT2PG * PAGE_SIZE; 799 bzero((void*)pt2pg_va, size); 800 pte2_sync_range((pt2_entry_t *)pt2pg_va, size); 801 802 /* 803 * Add a physical memory segment (vm_phys_seg) corresponding to the 804 * preallocated pages for kernel L2 page tables so that vm_page 805 * structures representing these pages will be created. The vm_page 806 * structures are required for promotion of the corresponding kernel 807 * virtual addresses to section mappings. 808 */ 809 vm_phys_add_seg(pt2tab_pa, pmap_preboot_get_pages(0)); 810 811 /* 812 * Insert allocated L2 page table pages to PT2TAB and make 813 * link to all PT2s in L1 page table. See how kernel_vm_end 814 * is initialized. 815 * 816 * We play simple and safe. So every KVA will have underlaying 817 * L2 page table, even kernel image mapped by sections. 818 */ 819 pte2p = kern_pt2tab_entry(KERNBASE); 820 for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += PTE2_SIZE) 821 pt2tab_store(pte2p++, PTE2_KPT(pa)); 822 823 pte1p = kern_pte1(KERNBASE); 824 for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += NB_IN_PT2) 825 pte1_store(pte1p++, PTE1_LINK(pa)); 826 827 /* Make section mappings for kernel. */ 828 l1_attr = ATTR_TO_L1(PTE2_ATTR_DEFAULT); 829 pte1p = kern_pte1(KERNBASE); 830 for (pa = KERNEL_V2P(KERNBASE); pa < last; pa += PTE1_SIZE) 831 pte1_store(pte1p++, PTE1_KERN(pa, PTE1_AP_KRW, l1_attr)); 832 833 /* 834 * Get free and aligned space for PT2MAP and make L1 page table links 835 * to L2 page tables held in PT2TAB. 836 * 837 * Note that pages holding PT2s are stored in PT2TAB as pt2_entry_t 838 * descriptors and PT2TAB page(s) itself is(are) used as PT2s. Thus 839 * each entry in PT2TAB maps all PT2s in a page. This implies that 840 * virtual address of PT2MAP must be aligned to NPT2_IN_PG * PTE1_SIZE. 841 */ 842 PT2MAP = (pt2_entry_t *)(KERNBASE - PT2MAP_SIZE); 843 pte1p = kern_pte1((vm_offset_t)PT2MAP); 844 for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { 845 pte1_store(pte1p++, PTE1_LINK(pa)); 846 } 847 848 /* 849 * Store PT2TAB in PT2TAB itself, i.e. self reference mapping. 850 * Each pmap will hold own PT2TAB, so the mapping should be not global. 851 */ 852 pte2p = kern_pt2tab_entry((vm_offset_t)PT2MAP); 853 for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { 854 pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); 855 } 856 857 /* 858 * Choose correct L2 page table and make mappings for allocations 859 * made herein which replaces temporary locore.S mappings after a while. 860 * Note that PT2MAP cannot be used until we switch to kern_pt1. 861 * 862 * Note, that these allocations started aligned on 1M section and 863 * kernel PT1 was allocated first. Making of mappings must follow 864 * order of physical allocations as we've used KERNEL_P2V() macro 865 * for virtual addresses resolution. 866 */ 867 pte2p = kern_pt2tab_entry((vm_offset_t)kern_pt1); 868 pt2pg_va = KERNEL_P2V(pte2_pa(pte2_load(pte2p))); 869 870 pte2p = page_pt2(pt2pg_va, pte1_index((vm_offset_t)kern_pt1)); 871 872 /* Make mapping for kernel L1 page table. */ 873 for (pa = base_pt1, i = 0; i < NPG_IN_PT1; i++, pa += PTE2_SIZE) 874 pte2_store(pte2p++, PTE2_KPT(pa)); 875 876 /* Make mapping for kernel PT2TAB. */ 877 for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) 878 pte2_store(pte2p++, PTE2_KPT(pa)); 879 880 /* Finally, switch from 'boot_pt1' to 'kern_pt1'. */ 881 pmap_kern_ttb = base_pt1 | ttb_flags; 882 cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set); 883 reinit_mmu(pmap_kern_ttb, actlr_mask, actlr_set); 884 /* 885 * Initialize the first available KVA. As kernel image is mapped by 886 * sections, we are leaving some gap behind. 887 */ 888 virtual_avail = (vm_offset_t)kern_pt2tab + NPG_IN_PT2TAB * PAGE_SIZE; 889} 890 891/* 892 * Setup L2 page table page for given KVA. 893 * Used in pre-bootstrap epoch. 894 * 895 * Note that we have allocated NKPT2PG pages for L2 page tables in advance 896 * and used them for mapping KVA starting from KERNBASE. However, this is not 897 * enough. Vectors and devices need L2 page tables too. Note that they are 898 * even above VM_MAX_KERNEL_ADDRESS. 899 */ 900static __inline vm_paddr_t 901pmap_preboot_pt2pg_setup(vm_offset_t va) 902{ 903 pt2_entry_t *pte2p, pte2; 904 vm_paddr_t pt2pg_pa; 905 906 /* Get associated entry in PT2TAB. */ 907 pte2p = kern_pt2tab_entry(va); 908 909 /* Just return, if PT2s page exists already. */ 910 pte2 = pt2tab_load(pte2p); 911 if (pte2_is_valid(pte2)) 912 return (pte2_pa(pte2)); 913 914 KASSERT(va >= VM_MAX_KERNEL_ADDRESS, 915 ("%s: NKPT2PG too small", __func__)); 916 917 /* 918 * Allocate page for PT2s and insert it to PT2TAB. 919 * In other words, map it into PT2MAP space. 920 */ 921 pt2pg_pa = pmap_preboot_get_pages(1); 922 pt2tab_store(pte2p, PTE2_KPT(pt2pg_pa)); 923 924 /* Zero all PT2s in allocated page. */ 925 bzero((void*)pt2map_pt2pg(va), PAGE_SIZE); 926 pte2_sync_range((pt2_entry_t *)pt2map_pt2pg(va), PAGE_SIZE); 927 928 return (pt2pg_pa); 929} 930 931/* 932 * Setup L2 page table for given KVA. 933 * Used in pre-bootstrap epoch. 934 */ 935static void 936pmap_preboot_pt2_setup(vm_offset_t va) 937{ 938 pt1_entry_t *pte1p; 939 vm_paddr_t pt2pg_pa, pt2_pa; 940 941 /* Setup PT2's page. */ 942 pt2pg_pa = pmap_preboot_pt2pg_setup(va); 943 pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(va)); 944 945 /* Insert PT2 to PT1. */ 946 pte1p = kern_pte1(va); 947 pte1_store(pte1p, PTE1_LINK(pt2_pa)); 948} 949 950/* 951 * Get L2 page entry associated with given KVA. 952 * Used in pre-bootstrap epoch. 953 */ 954static __inline pt2_entry_t* 955pmap_preboot_vtopte2(vm_offset_t va) 956{ 957 pt1_entry_t *pte1p; 958 959 /* Setup PT2 if needed. */ 960 pte1p = kern_pte1(va); 961 if (!pte1_is_valid(pte1_load(pte1p))) /* XXX - sections ?! */ 962 pmap_preboot_pt2_setup(va); 963 964 return (pt2map_entry(va)); 965} 966 967/* 968 * Pre-bootstrap epoch page(s) mapping(s). 969 */ 970void 971pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num) 972{ 973 u_int i; 974 pt2_entry_t *pte2p; 975 976 /* Map all the pages. */ 977 for (i = 0; i < num; i++) { 978 pte2p = pmap_preboot_vtopte2(va); 979 pte2_store(pte2p, PTE2_KRW(pa)); 980 va += PAGE_SIZE; 981 pa += PAGE_SIZE; 982 } 983} 984 985/* 986 * Pre-bootstrap epoch virtual space alocator. 987 */ 988vm_offset_t 989pmap_preboot_reserve_pages(u_int num) 990{ 991 u_int i; 992 vm_offset_t start, va; 993 pt2_entry_t *pte2p; 994 995 /* Allocate virtual space. */ 996 start = va = virtual_avail; 997 virtual_avail += num * PAGE_SIZE; 998 999 /* Zero the mapping. */ 1000 for (i = 0; i < num; i++) { 1001 pte2p = pmap_preboot_vtopte2(va); 1002 pte2_store(pte2p, 0); 1003 va += PAGE_SIZE; 1004 } 1005 1006 return (start); 1007} 1008 1009/* 1010 * Pre-bootstrap epoch page(s) allocation and mapping(s). 1011 */ 1012vm_offset_t 1013pmap_preboot_get_vpages(u_int num) 1014{ 1015 vm_paddr_t pa; 1016 vm_offset_t va; 1017 1018 /* Allocate physical page(s). */ 1019 pa = pmap_preboot_get_pages(num); 1020 1021 /* Allocate virtual space. */ 1022 va = virtual_avail; 1023 virtual_avail += num * PAGE_SIZE; 1024 1025 /* Map and zero all. */ 1026 pmap_preboot_map_pages(pa, va, num); 1027 bzero((void *)va, num * PAGE_SIZE); 1028 1029 return (va); 1030} 1031 1032/* 1033 * Pre-bootstrap epoch page mapping(s) with attributes. 1034 */ 1035void 1036pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size, 1037 vm_prot_t prot, vm_memattr_t attr) 1038{ 1039 u_int num; 1040 u_int l1_attr, l1_prot, l2_prot, l2_attr; 1041 pt1_entry_t *pte1p; 1042 pt2_entry_t *pte2p; 1043 1044 l2_prot = prot & VM_PROT_WRITE ? PTE2_AP_KRW : PTE2_AP_KR; 1045 l2_prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; 1046 l2_attr = vm_memattr_to_pte2(attr); 1047 l1_prot = ATTR_TO_L1(l2_prot); 1048 l1_attr = ATTR_TO_L1(l2_attr); 1049 1050 /* Map all the pages. */ 1051 num = round_page(size); 1052 while (num > 0) { 1053 if ((((va | pa) & PTE1_OFFSET) == 0) && (num >= PTE1_SIZE)) { 1054 pte1p = kern_pte1(va); 1055 pte1_store(pte1p, PTE1_KERN(pa, l1_prot, l1_attr)); 1056 va += PTE1_SIZE; 1057 pa += PTE1_SIZE; 1058 num -= PTE1_SIZE; 1059 } else { 1060 pte2p = pmap_preboot_vtopte2(va); 1061 pte2_store(pte2p, PTE2_KERN(pa, l2_prot, l2_attr)); 1062 va += PAGE_SIZE; 1063 pa += PAGE_SIZE; 1064 num -= PAGE_SIZE; 1065 } 1066 } 1067} 1068 1069/* 1070 * Extract from the kernel page table the physical address 1071 * that is mapped by the given virtual address "va". 1072 */ 1073vm_paddr_t 1074pmap_kextract(vm_offset_t va) 1075{ 1076 vm_paddr_t pa; 1077 pt1_entry_t pte1; 1078 pt2_entry_t pte2; 1079 1080 pte1 = pte1_load(kern_pte1(va)); 1081 if (pte1_is_section(pte1)) { 1082 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1083 } else if (pte1_is_link(pte1)) { 1084 /* 1085 * We should beware of concurrent promotion that changes 1086 * pte1 at this point. However, it's not a problem as PT2 1087 * page is preserved by promotion in PT2TAB. So even if 1088 * it happens, using of PT2MAP is still safe. 1089 * 1090 * QQQ: However, concurrent removing is a problem which 1091 * ends in abort on PT2MAP space. Locking must be used 1092 * to deal with this. 1093 */ 1094 pte2 = pte2_load(pt2map_entry(va)); 1095 pa = pte2_pa(pte2) | (va & PTE2_OFFSET); 1096 } 1097 else { 1098 panic("%s: va %#x pte1 %#x", __func__, va, pte1); 1099 } 1100 return (pa); 1101} 1102 1103/* 1104 * Extract from the kernel page table the physical address 1105 * that is mapped by the given virtual address "va". Also 1106 * return L2 page table entry which maps the address. 1107 * 1108 * This is only intended to be used for panic dumps. 1109 */ 1110vm_paddr_t 1111pmap_dump_kextract(vm_offset_t va, pt2_entry_t *pte2p) 1112{ 1113 vm_paddr_t pa; 1114 pt1_entry_t pte1; 1115 pt2_entry_t pte2; 1116 1117 pte1 = pte1_load(kern_pte1(va)); 1118 if (pte1_is_section(pte1)) { 1119 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1120 pte2 = pa | ATTR_TO_L2(pte1) | PTE2_V; 1121 } else if (pte1_is_link(pte1)) { 1122 pte2 = pte2_load(pt2map_entry(va)); 1123 pa = pte2_pa(pte2); 1124 } else { 1125 pte2 = 0; 1126 pa = 0; 1127 } 1128 if (pte2p != NULL) 1129 *pte2p = pte2; 1130 return (pa); 1131} 1132 1133/***************************************************************************** 1134 * 1135 * PMAP second stage initialization and utility functions 1136 * for bootstrap epoch. 1137 * 1138 * After pmap_bootstrap() is called, the following functions for 1139 * mappings can be used: 1140 * 1141 * void pmap_kenter(vm_offset_t va, vm_paddr_t pa); 1142 * void pmap_kremove(vm_offset_t va); 1143 * vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, 1144 * int prot); 1145 * 1146 * NOTE: This is not SMP coherent stage. And physical page allocation is not 1147 * allowed during this stage. 1148 * 1149 *****************************************************************************/ 1150 1151/* 1152 * Initialize kernel PMAP locks and lists, kernel_pmap itself, and 1153 * reserve various virtual spaces for temporary mappings. 1154 */ 1155void 1156pmap_bootstrap(vm_offset_t firstaddr) 1157{ 1158 pt2_entry_t *unused __unused; 1159 struct pcpu *pc; 1160 1161 /* 1162 * Initialize the kernel pmap (which is statically allocated). 1163 */ 1164 PMAP_LOCK_INIT(kernel_pmap); 1165 kernel_l1pa = (vm_paddr_t)kern_pt1; /* for libkvm */ 1166 kernel_pmap->pm_pt1 = kern_pt1; 1167 kernel_pmap->pm_pt2tab = kern_pt2tab; 1168 CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ 1169 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 1170 1171 /* 1172 * Initialize the global pv list lock. 1173 */ 1174 rw_init(&pvh_global_lock, "pmap pv global"); 1175 1176 LIST_INIT(&allpmaps); 1177 1178 /* 1179 * Request a spin mutex so that changes to allpmaps cannot be 1180 * preempted by smp_rendezvous_cpus(). 1181 */ 1182 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 1183 mtx_lock_spin(&allpmaps_lock); 1184 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 1185 mtx_unlock_spin(&allpmaps_lock); 1186 1187 /* 1188 * Reserve some special page table entries/VA space for temporary 1189 * mapping of pages. 1190 */ 1191#define SYSMAP(c, p, v, n) do { \ 1192 v = (c)pmap_preboot_reserve_pages(n); \ 1193 p = pt2map_entry((vm_offset_t)v); \ 1194 } while (0) 1195 1196 /* 1197 * Local CMAP1/CMAP2 are used for zeroing and copying pages. 1198 * Local CMAP2 is also used for data cache cleaning. 1199 * Global CMAP3 is used for the idle process page zeroing. 1200 */ 1201 pc = get_pcpu(); 1202 mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); 1203 SYSMAP(caddr_t, pc->pc_cmap1_pte2p, pc->pc_cmap1_addr, 1); 1204 SYSMAP(caddr_t, pc->pc_cmap2_pte2p, pc->pc_cmap2_addr, 1); 1205 SYSMAP(vm_offset_t, pc->pc_qmap_pte2p, pc->pc_qmap_addr, 1); 1206 SYSMAP(caddr_t, CMAP3, CADDR3, 1); 1207 1208 /* 1209 * Crashdump maps. 1210 */ 1211 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS); 1212 1213 /* 1214 * _tmppt is used for reading arbitrary physical pages via /dev/mem. 1215 */ 1216 SYSMAP(caddr_t, unused, _tmppt, 1); 1217 1218 /* 1219 * PADDR1 and PADDR2 are used by pmap_pte2_quick() and pmap_pte2(), 1220 * respectively. PADDR3 is used by pmap_pte2_ddb(). 1221 */ 1222 SYSMAP(pt2_entry_t *, PMAP1, PADDR1, 1); 1223 SYSMAP(pt2_entry_t *, PMAP2, PADDR2, 1); 1224#ifdef DDB 1225 SYSMAP(pt2_entry_t *, PMAP3, PADDR3, 1); 1226#endif 1227 mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 1228 1229 /* 1230 * Note that in very short time in initarm(), we are going to 1231 * initialize phys_avail[] array and no further page allocation 1232 * can happen after that until vm subsystem will be initialized. 1233 */ 1234 kernel_vm_end_new = kernel_vm_end; 1235 virtual_end = vm_max_kernel_address; 1236} 1237 1238static void 1239pmap_init_reserved_pages(void) 1240{ 1241 struct pcpu *pc; 1242 vm_offset_t pages; 1243 int i; 1244 1245 CPU_FOREACH(i) { 1246 pc = pcpu_find(i); 1247 /* 1248 * Skip if the mapping has already been initialized, 1249 * i.e. this is the BSP. 1250 */ 1251 if (pc->pc_cmap1_addr != 0) 1252 continue; 1253 mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); 1254 pages = kva_alloc(PAGE_SIZE * 3); 1255 if (pages == 0) 1256 panic("%s: unable to allocate KVA", __func__); 1257 pc->pc_cmap1_pte2p = pt2map_entry(pages); 1258 pc->pc_cmap2_pte2p = pt2map_entry(pages + PAGE_SIZE); 1259 pc->pc_qmap_pte2p = pt2map_entry(pages + (PAGE_SIZE * 2)); 1260 pc->pc_cmap1_addr = (caddr_t)pages; 1261 pc->pc_cmap2_addr = (caddr_t)(pages + PAGE_SIZE); 1262 pc->pc_qmap_addr = pages + (PAGE_SIZE * 2); 1263 } 1264} 1265SYSINIT(rpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_reserved_pages, NULL); 1266 1267/* 1268 * The function can already be use in second initialization stage. 1269 * As such, the function DOES NOT call pmap_growkernel() where PT2 1270 * allocation can happen. So if used, be sure that PT2 for given 1271 * virtual address is allocated already! 1272 * 1273 * Add a wired page to the kva. 1274 * Note: not SMP coherent. 1275 */ 1276static __inline void 1277pmap_kenter_prot_attr(vm_offset_t va, vm_paddr_t pa, uint32_t prot, 1278 uint32_t attr) 1279{ 1280 pt1_entry_t *pte1p; 1281 pt2_entry_t *pte2p; 1282 1283 pte1p = kern_pte1(va); 1284 if (!pte1_is_valid(pte1_load(pte1p))) { /* XXX - sections ?! */ 1285 /* 1286 * This is a very low level function, so PT2 and particularly 1287 * PT2PG associated with given virtual address must be already 1288 * allocated. It's a pain mainly during pmap initialization 1289 * stage. However, called after pmap initialization with 1290 * virtual address not under kernel_vm_end will lead to 1291 * the same misery. 1292 */ 1293 if (!pte2_is_valid(pte2_load(kern_pt2tab_entry(va)))) 1294 panic("%s: kernel PT2 not allocated!", __func__); 1295 } 1296 1297 pte2p = pt2map_entry(va); 1298 pte2_store(pte2p, PTE2_KERN(pa, prot, attr)); 1299} 1300 1301PMAP_INLINE void 1302pmap_kenter(vm_offset_t va, vm_paddr_t pa) 1303{ 1304 1305 pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT); 1306} 1307 1308/* 1309 * Remove a page from the kernel pagetables. 1310 * Note: not SMP coherent. 1311 */ 1312PMAP_INLINE void 1313pmap_kremove(vm_offset_t va) 1314{ 1315 pt2_entry_t *pte2p; 1316 1317 pte2p = pt2map_entry(va); 1318 pte2_clear(pte2p); 1319} 1320 1321/* 1322 * Share new kernel PT2PG with all pmaps. 1323 * The caller is responsible for maintaining TLB consistency. 1324 */ 1325static void 1326pmap_kenter_pt2tab(vm_offset_t va, pt2_entry_t npte2) 1327{ 1328 pmap_t pmap; 1329 pt2_entry_t *pte2p; 1330 1331 mtx_lock_spin(&allpmaps_lock); 1332 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1333 pte2p = pmap_pt2tab_entry(pmap, va); 1334 pt2tab_store(pte2p, npte2); 1335 } 1336 mtx_unlock_spin(&allpmaps_lock); 1337} 1338 1339/* 1340 * Share new kernel PTE1 with all pmaps. 1341 * The caller is responsible for maintaining TLB consistency. 1342 */ 1343static void 1344pmap_kenter_pte1(vm_offset_t va, pt1_entry_t npte1) 1345{ 1346 pmap_t pmap; 1347 pt1_entry_t *pte1p; 1348 1349 mtx_lock_spin(&allpmaps_lock); 1350 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1351 pte1p = pmap_pte1(pmap, va); 1352 pte1_store(pte1p, npte1); 1353 } 1354 mtx_unlock_spin(&allpmaps_lock); 1355} 1356 1357/* 1358 * Used to map a range of physical addresses into kernel 1359 * virtual address space. 1360 * 1361 * The value passed in '*virt' is a suggested virtual address for 1362 * the mapping. Architectures which can support a direct-mapped 1363 * physical to virtual region can return the appropriate address 1364 * within that region, leaving '*virt' unchanged. Other 1365 * architectures should map the pages starting at '*virt' and 1366 * update '*virt' with the first usable address after the mapped 1367 * region. 1368 * 1369 * NOTE: Read the comments above pmap_kenter_prot_attr() as 1370 * the function is used herein! 1371 */ 1372vm_offset_t 1373pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1374{ 1375 vm_offset_t va, sva; 1376 vm_paddr_t pte1_offset; 1377 pt1_entry_t npte1; 1378 uint32_t l1prot, l2prot; 1379 uint32_t l1attr, l2attr; 1380 1381 PDEBUG(1, printf("%s: virt = %#x, start = %#x, end = %#x (size = %#x)," 1382 " prot = %d\n", __func__, *virt, start, end, end - start, prot)); 1383 1384 l2prot = (prot & VM_PROT_WRITE) ? PTE2_AP_KRW : PTE2_AP_KR; 1385 l2prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX; 1386 l1prot = ATTR_TO_L1(l2prot); 1387 1388 l2attr = PTE2_ATTR_DEFAULT; 1389 l1attr = ATTR_TO_L1(l2attr); 1390 1391 va = *virt; 1392 /* 1393 * Does the physical address range's size and alignment permit at 1394 * least one section mapping to be created? 1395 */ 1396 pte1_offset = start & PTE1_OFFSET; 1397 if ((end - start) - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) >= 1398 PTE1_SIZE) { 1399 /* 1400 * Increase the starting virtual address so that its alignment 1401 * does not preclude the use of section mappings. 1402 */ 1403 if ((va & PTE1_OFFSET) < pte1_offset) 1404 va = pte1_trunc(va) + pte1_offset; 1405 else if ((va & PTE1_OFFSET) > pte1_offset) 1406 va = pte1_roundup(va) + pte1_offset; 1407 } 1408 sva = va; 1409 while (start < end) { 1410 if ((start & PTE1_OFFSET) == 0 && end - start >= PTE1_SIZE) { 1411 KASSERT((va & PTE1_OFFSET) == 0, 1412 ("%s: misaligned va %#x", __func__, va)); 1413 npte1 = PTE1_KERN(start, l1prot, l1attr); 1414 pmap_kenter_pte1(va, npte1); 1415 va += PTE1_SIZE; 1416 start += PTE1_SIZE; 1417 } else { 1418 pmap_kenter_prot_attr(va, start, l2prot, l2attr); 1419 va += PAGE_SIZE; 1420 start += PAGE_SIZE; 1421 } 1422 } 1423 tlb_flush_range(sva, va - sva); 1424 *virt = va; 1425 return (sva); 1426} 1427 1428/* 1429 * Make a temporary mapping for a physical address. 1430 * This is only intended to be used for panic dumps. 1431 */ 1432void * 1433pmap_kenter_temporary(vm_paddr_t pa, int i) 1434{ 1435 vm_offset_t va; 1436 1437 /* QQQ: 'i' should be less or equal to MAXDUMPPGS. */ 1438 1439 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 1440 pmap_kenter(va, pa); 1441 tlb_flush_local(va); 1442 return ((void *)crashdumpmap); 1443} 1444 1445 1446/************************************* 1447 * 1448 * TLB & cache maintenance routines. 1449 * 1450 *************************************/ 1451 1452/* 1453 * We inline these within pmap.c for speed. 1454 */ 1455PMAP_INLINE void 1456pmap_tlb_flush(pmap_t pmap, vm_offset_t va) 1457{ 1458 1459 if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1460 tlb_flush(va); 1461} 1462 1463PMAP_INLINE void 1464pmap_tlb_flush_range(pmap_t pmap, vm_offset_t sva, vm_size_t size) 1465{ 1466 1467 if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1468 tlb_flush_range(sva, size); 1469} 1470 1471/* 1472 * Abuse the pte2 nodes for unmapped kva to thread a kva freelist through. 1473 * Requirements: 1474 * - Must deal with pages in order to ensure that none of the PTE2_* bits 1475 * are ever set, PTE2_V in particular. 1476 * - Assumes we can write to pte2s without pte2_store() atomic ops. 1477 * - Assumes nothing will ever test these addresses for 0 to indicate 1478 * no mapping instead of correctly checking PTE2_V. 1479 * - Assumes a vm_offset_t will fit in a pte2 (true for arm). 1480 * Because PTE2_V is never set, there can be no mappings to invalidate. 1481 */ 1482static vm_offset_t 1483pmap_pte2list_alloc(vm_offset_t *head) 1484{ 1485 pt2_entry_t *pte2p; 1486 vm_offset_t va; 1487 1488 va = *head; 1489 if (va == 0) 1490 panic("pmap_ptelist_alloc: exhausted ptelist KVA"); 1491 pte2p = pt2map_entry(va); 1492 *head = *pte2p; 1493 if (*head & PTE2_V) 1494 panic("%s: va with PTE2_V set!", __func__); 1495 *pte2p = 0; 1496 return (va); 1497} 1498 1499static void 1500pmap_pte2list_free(vm_offset_t *head, vm_offset_t va) 1501{ 1502 pt2_entry_t *pte2p; 1503 1504 if (va & PTE2_V) 1505 panic("%s: freeing va with PTE2_V set!", __func__); 1506 pte2p = pt2map_entry(va); 1507 *pte2p = *head; /* virtual! PTE2_V is 0 though */ 1508 *head = va; 1509} 1510 1511static void 1512pmap_pte2list_init(vm_offset_t *head, void *base, int npages) 1513{ 1514 int i; 1515 vm_offset_t va; 1516 1517 *head = 0; 1518 for (i = npages - 1; i >= 0; i--) { 1519 va = (vm_offset_t)base + i * PAGE_SIZE; 1520 pmap_pte2list_free(head, va); 1521 } 1522} 1523 1524/***************************************************************************** 1525 * 1526 * PMAP third and final stage initialization. 1527 * 1528 * After pmap_init() is called, PMAP subsystem is fully initialized. 1529 * 1530 *****************************************************************************/ 1531 1532SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 1533 1534SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 1535 "Max number of PV entries"); 1536SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 1537 "Page share factor per proc"); 1538 1539static u_long nkpt2pg = NKPT2PG; 1540SYSCTL_ULONG(_vm_pmap, OID_AUTO, nkpt2pg, CTLFLAG_RD, 1541 &nkpt2pg, 0, "Pre-allocated pages for kernel PT2s"); 1542 1543static int sp_enabled = 1; 1544SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 1545 &sp_enabled, 0, "Are large page mappings enabled?"); 1546 1547static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte1, CTLFLAG_RD, 0, 1548 "1MB page mapping counters"); 1549 1550static u_long pmap_pte1_demotions; 1551SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, demotions, CTLFLAG_RD, 1552 &pmap_pte1_demotions, 0, "1MB page demotions"); 1553 1554static u_long pmap_pte1_mappings; 1555SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, mappings, CTLFLAG_RD, 1556 &pmap_pte1_mappings, 0, "1MB page mappings"); 1557 1558static u_long pmap_pte1_p_failures; 1559SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, p_failures, CTLFLAG_RD, 1560 &pmap_pte1_p_failures, 0, "1MB page promotion failures"); 1561 1562static u_long pmap_pte1_promotions; 1563SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, promotions, CTLFLAG_RD, 1564 &pmap_pte1_promotions, 0, "1MB page promotions"); 1565 1566static u_long pmap_pte1_kern_demotions; 1567SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_demotions, CTLFLAG_RD, 1568 &pmap_pte1_kern_demotions, 0, "1MB page kernel demotions"); 1569 1570static u_long pmap_pte1_kern_promotions; 1571SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_promotions, CTLFLAG_RD, 1572 &pmap_pte1_kern_promotions, 0, "1MB page kernel promotions"); 1573 1574static __inline ttb_entry_t 1575pmap_ttb_get(pmap_t pmap) 1576{ 1577 1578 return (vtophys(pmap->pm_pt1) | ttb_flags); 1579} 1580 1581/* 1582 * Initialize a vm_page's machine-dependent fields. 1583 * 1584 * Variations: 1585 * 1. Pages for L2 page tables are always not managed. So, pv_list and 1586 * pt2_wirecount can share same physical space. However, proper 1587 * initialization on a page alloc for page tables and reinitialization 1588 * on the page free must be ensured. 1589 */ 1590void 1591pmap_page_init(vm_page_t m) 1592{ 1593 1594 TAILQ_INIT(&m->md.pv_list); 1595 pt2_wirecount_init(m); 1596 m->md.pat_mode = VM_MEMATTR_DEFAULT; 1597} 1598 1599/* 1600 * Virtualization for faster way how to zero whole page. 1601 */ 1602static __inline void 1603pagezero(void *page) 1604{ 1605 1606 bzero(page, PAGE_SIZE); 1607} 1608 1609/* 1610 * Zero L2 page table page. 1611 * Use same KVA as in pmap_zero_page(). 1612 */ 1613static __inline vm_paddr_t 1614pmap_pt2pg_zero(vm_page_t m) 1615{ 1616 pt2_entry_t *cmap2_pte2p; 1617 vm_paddr_t pa; 1618 struct pcpu *pc; 1619 1620 pa = VM_PAGE_TO_PHYS(m); 1621 1622 /* 1623 * XXX: For now, we map whole page even if it's already zero, 1624 * to sync it even if the sync is only DSB. 1625 */ 1626 sched_pin(); 1627 pc = get_pcpu(); 1628 cmap2_pte2p = pc->pc_cmap2_pte2p; 1629 mtx_lock(&pc->pc_cmap_lock); 1630 if (pte2_load(cmap2_pte2p) != 0) 1631 panic("%s: CMAP2 busy", __func__); 1632 pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, 1633 vm_page_pte2_attr(m))); 1634 /* Even VM_ALLOC_ZERO request is only advisory. */ 1635 if ((m->flags & PG_ZERO) == 0) 1636 pagezero(pc->pc_cmap2_addr); 1637 pte2_sync_range((pt2_entry_t *)pc->pc_cmap2_addr, PAGE_SIZE); 1638 pte2_clear(cmap2_pte2p); 1639 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 1640 1641 /* 1642 * Unpin the thread before releasing the lock. Otherwise the thread 1643 * could be rescheduled while still bound to the current CPU, only 1644 * to unpin itself immediately upon resuming execution. 1645 */ 1646 sched_unpin(); 1647 mtx_unlock(&pc->pc_cmap_lock); 1648 1649 return (pa); 1650} 1651 1652/* 1653 * Init just allocated page as L2 page table(s) holder 1654 * and return its physical address. 1655 */ 1656static __inline vm_paddr_t 1657pmap_pt2pg_init(pmap_t pmap, vm_offset_t va, vm_page_t m) 1658{ 1659 vm_paddr_t pa; 1660 pt2_entry_t *pte2p; 1661 1662 /* Check page attributes. */ 1663 if (m->md.pat_mode != pt_memattr) 1664 pmap_page_set_memattr(m, pt_memattr); 1665 1666 /* Zero page and init wire counts. */ 1667 pa = pmap_pt2pg_zero(m); 1668 pt2_wirecount_init(m); 1669 1670 /* 1671 * Map page to PT2MAP address space for given pmap. 1672 * Note that PT2MAP space is shared with all pmaps. 1673 */ 1674 if (pmap == kernel_pmap) 1675 pmap_kenter_pt2tab(va, PTE2_KPT(pa)); 1676 else { 1677 pte2p = pmap_pt2tab_entry(pmap, va); 1678 pt2tab_store(pte2p, PTE2_KPT_NG(pa)); 1679 } 1680 1681 return (pa); 1682} 1683 1684/* 1685 * Initialize the pmap module. 1686 * Called by vm_init, to initialize any structures that the pmap 1687 * system needs to map virtual memory. 1688 */ 1689void 1690pmap_init(void) 1691{ 1692 vm_size_t s; 1693 pt2_entry_t *pte2p, pte2; 1694 u_int i, pte1_idx, pv_npg; 1695 1696 PDEBUG(1, printf("%s: phys_start = %#x\n", __func__, PHYSADDR)); 1697 1698 /* 1699 * Initialize the vm page array entries for kernel pmap's 1700 * L2 page table pages allocated in advance. 1701 */ 1702 pte1_idx = pte1_index(KERNBASE - PT2MAP_SIZE); 1703 pte2p = kern_pt2tab_entry(KERNBASE - PT2MAP_SIZE); 1704 for (i = 0; i < nkpt2pg + NPG_IN_PT2TAB; i++, pte2p++) { 1705 vm_paddr_t pa; 1706 vm_page_t m; 1707 1708 pte2 = pte2_load(pte2p); 1709 KASSERT(pte2_is_valid(pte2), ("%s: no valid entry", __func__)); 1710 1711 pa = pte2_pa(pte2); 1712 m = PHYS_TO_VM_PAGE(pa); 1713 KASSERT(m >= vm_page_array && 1714 m < &vm_page_array[vm_page_array_size], 1715 ("%s: L2 page table page is out of range", __func__)); 1716 1717 m->pindex = pte1_idx; 1718 m->phys_addr = pa; 1719 pte1_idx += NPT2_IN_PG; 1720 } 1721 1722 /* 1723 * Initialize the address space (zone) for the pv entries. Set a 1724 * high water mark so that the system can recover from excessive 1725 * numbers of pv entries. 1726 */ 1727 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1728 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 1729 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 1730 pv_entry_max = roundup(pv_entry_max, _NPCPV); 1731 pv_entry_high_water = 9 * (pv_entry_max / 10); 1732 1733 /* 1734 * Are large page mappings enabled? 1735 */ 1736 TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); 1737 if (sp_enabled) { 1738 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 1739 ("%s: can't assign to pagesizes[1]", __func__)); 1740 pagesizes[1] = PTE1_SIZE; 1741 } 1742 1743 /* 1744 * Calculate the size of the pv head table for sections. 1745 * Handle the possibility that "vm_phys_segs[...].end" is zero. 1746 * Note that the table is only for sections which could be promoted. 1747 */ 1748 first_managed_pa = pte1_trunc(vm_phys_segs[0].start); 1749 pv_npg = (pte1_trunc(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) 1750 - first_managed_pa) / PTE1_SIZE + 1; 1751 1752 /* 1753 * Allocate memory for the pv head table for sections. 1754 */ 1755 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 1756 s = round_page(s); 1757 pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 1758 M_WAITOK | M_ZERO); 1759 for (i = 0; i < pv_npg; i++) 1760 TAILQ_INIT(&pv_table[i].pv_list); 1761 1762 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 1763 pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); 1764 if (pv_chunkbase == NULL) 1765 panic("%s: not enough kvm for pv chunks", __func__); 1766 pmap_pte2list_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 1767} 1768 1769/* 1770 * Add a list of wired pages to the kva 1771 * this routine is only used for temporary 1772 * kernel mappings that do not need to have 1773 * page modification or references recorded. 1774 * Note that old mappings are simply written 1775 * over. The page *must* be wired. 1776 * Note: SMP coherent. Uses a ranged shootdown IPI. 1777 */ 1778void 1779pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1780{ 1781 u_int anychanged; 1782 pt2_entry_t *epte2p, *pte2p, pte2; 1783 vm_page_t m; 1784 vm_paddr_t pa; 1785 1786 anychanged = 0; 1787 pte2p = pt2map_entry(sva); 1788 epte2p = pte2p + count; 1789 while (pte2p < epte2p) { 1790 m = *ma++; 1791 pa = VM_PAGE_TO_PHYS(m); 1792 pte2 = pte2_load(pte2p); 1793 if ((pte2_pa(pte2) != pa) || 1794 (pte2_attr(pte2) != vm_page_pte2_attr(m))) { 1795 anychanged++; 1796 pte2_store(pte2p, PTE2_KERN(pa, PTE2_AP_KRW, 1797 vm_page_pte2_attr(m))); 1798 } 1799 pte2p++; 1800 } 1801 if (__predict_false(anychanged)) 1802 tlb_flush_range(sva, count * PAGE_SIZE); 1803} 1804 1805/* 1806 * This routine tears out page mappings from the 1807 * kernel -- it is meant only for temporary mappings. 1808 * Note: SMP coherent. Uses a ranged shootdown IPI. 1809 */ 1810void 1811pmap_qremove(vm_offset_t sva, int count) 1812{ 1813 vm_offset_t va; 1814 1815 va = sva; 1816 while (count-- > 0) { 1817 pmap_kremove(va); 1818 va += PAGE_SIZE; 1819 } 1820 tlb_flush_range(sva, va - sva); 1821} 1822 1823/* 1824 * Are we current address space or kernel? 1825 */ 1826static __inline int 1827pmap_is_current(pmap_t pmap) 1828{ 1829 1830 return (pmap == kernel_pmap || 1831 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace))); 1832} 1833 1834/* 1835 * If the given pmap is not the current or kernel pmap, the returned 1836 * pte2 must be released by passing it to pmap_pte2_release(). 1837 */ 1838static pt2_entry_t * 1839pmap_pte2(pmap_t pmap, vm_offset_t va) 1840{ 1841 pt1_entry_t pte1; 1842 vm_paddr_t pt2pg_pa; 1843 1844 pte1 = pte1_load(pmap_pte1(pmap, va)); 1845 if (pte1_is_section(pte1)) 1846 panic("%s: attempt to map PTE1", __func__); 1847 if (pte1_is_link(pte1)) { 1848 /* Are we current address space or kernel? */ 1849 if (pmap_is_current(pmap)) 1850 return (pt2map_entry(va)); 1851 /* Note that L2 page table size is not equal to PAGE_SIZE. */ 1852 pt2pg_pa = trunc_page(pte1_link_pa(pte1)); 1853 mtx_lock(&PMAP2mutex); 1854 if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { 1855 pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); 1856 tlb_flush((vm_offset_t)PADDR2); 1857 } 1858 return (PADDR2 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); 1859 } 1860 return (NULL); 1861} 1862 1863/* 1864 * Releases a pte2 that was obtained from pmap_pte2(). 1865 * Be prepared for the pte2p being NULL. 1866 */ 1867static __inline void 1868pmap_pte2_release(pt2_entry_t *pte2p) 1869{ 1870 1871 if ((pt2_entry_t *)(trunc_page((vm_offset_t)pte2p)) == PADDR2) { 1872 mtx_unlock(&PMAP2mutex); 1873 } 1874} 1875 1876/* 1877 * Super fast pmap_pte2 routine best used when scanning 1878 * the pv lists. This eliminates many coarse-grained 1879 * invltlb calls. Note that many of the pv list 1880 * scans are across different pmaps. It is very wasteful 1881 * to do an entire tlb flush for checking a single mapping. 1882 * 1883 * If the given pmap is not the current pmap, pvh_global_lock 1884 * must be held and curthread pinned to a CPU. 1885 */ 1886static pt2_entry_t * 1887pmap_pte2_quick(pmap_t pmap, vm_offset_t va) 1888{ 1889 pt1_entry_t pte1; 1890 vm_paddr_t pt2pg_pa; 1891 1892 pte1 = pte1_load(pmap_pte1(pmap, va)); 1893 if (pte1_is_section(pte1)) 1894 panic("%s: attempt to map PTE1", __func__); 1895 if (pte1_is_link(pte1)) { 1896 /* Are we current address space or kernel? */ 1897 if (pmap_is_current(pmap)) 1898 return (pt2map_entry(va)); 1899 rw_assert(&pvh_global_lock, RA_WLOCKED); 1900 KASSERT(curthread->td_pinned > 0, 1901 ("%s: curthread not pinned", __func__)); 1902 /* Note that L2 page table size is not equal to PAGE_SIZE. */ 1903 pt2pg_pa = trunc_page(pte1_link_pa(pte1)); 1904 if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { 1905 pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); 1906#ifdef SMP 1907 PMAP1cpu = PCPU_GET(cpuid); 1908#endif 1909 tlb_flush_local((vm_offset_t)PADDR1); 1910 PMAP1changed++; 1911 } else 1912#ifdef SMP 1913 if (PMAP1cpu != PCPU_GET(cpuid)) { 1914 PMAP1cpu = PCPU_GET(cpuid); 1915 tlb_flush_local((vm_offset_t)PADDR1); 1916 PMAP1changedcpu++; 1917 } else 1918#endif 1919 PMAP1unchanged++; 1920 return (PADDR1 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); 1921 } 1922 return (NULL); 1923} 1924 1925/* 1926 * Routine: pmap_extract 1927 * Function: 1928 * Extract the physical page address associated 1929 * with the given map/virtual_address pair. 1930 */ 1931vm_paddr_t 1932pmap_extract(pmap_t pmap, vm_offset_t va) 1933{ 1934 vm_paddr_t pa; 1935 pt1_entry_t pte1; 1936 pt2_entry_t *pte2p; 1937 1938 PMAP_LOCK(pmap); 1939 pte1 = pte1_load(pmap_pte1(pmap, va)); 1940 if (pte1_is_section(pte1)) 1941 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1942 else if (pte1_is_link(pte1)) { 1943 pte2p = pmap_pte2(pmap, va); 1944 pa = pte2_pa(pte2_load(pte2p)) | (va & PTE2_OFFSET); 1945 pmap_pte2_release(pte2p); 1946 } else 1947 pa = 0; 1948 PMAP_UNLOCK(pmap); 1949 return (pa); 1950} 1951 1952/* 1953 * Routine: pmap_extract_and_hold 1954 * Function: 1955 * Atomically extract and hold the physical page 1956 * with the given pmap and virtual address pair 1957 * if that mapping permits the given protection. 1958 */ 1959vm_page_t 1960pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1961{ 1962 vm_paddr_t pa, lockpa; 1963 pt1_entry_t pte1; 1964 pt2_entry_t pte2, *pte2p; 1965 vm_page_t m; 1966 1967 lockpa = 0; 1968 m = NULL; 1969 PMAP_LOCK(pmap); 1970retry: 1971 pte1 = pte1_load(pmap_pte1(pmap, va)); 1972 if (pte1_is_section(pte1)) { 1973 if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { 1974 pa = pte1_pa(pte1) | (va & PTE1_OFFSET); 1975 if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) 1976 goto retry; 1977 m = PHYS_TO_VM_PAGE(pa); 1978 vm_page_hold(m); 1979 } 1980 } else if (pte1_is_link(pte1)) { 1981 pte2p = pmap_pte2(pmap, va); 1982 pte2 = pte2_load(pte2p); 1983 pmap_pte2_release(pte2p); 1984 if (pte2_is_valid(pte2) && 1985 (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { 1986 pa = pte2_pa(pte2); 1987 if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) 1988 goto retry; 1989 m = PHYS_TO_VM_PAGE(pa); 1990 vm_page_hold(m); 1991 } 1992 } 1993 PA_UNLOCK_COND(lockpa); 1994 PMAP_UNLOCK(pmap); 1995 return (m); 1996} 1997 1998/* 1999 * Grow the number of kernel L2 page table entries, if needed. 2000 */ 2001void 2002pmap_growkernel(vm_offset_t addr) 2003{ 2004 vm_page_t m; 2005 vm_paddr_t pt2pg_pa, pt2_pa; 2006 pt1_entry_t pte1; 2007 pt2_entry_t pte2; 2008 2009 PDEBUG(1, printf("%s: addr = %#x\n", __func__, addr)); 2010 /* 2011 * All the time kernel_vm_end is first KVA for which underlying 2012 * L2 page table is either not allocated or linked from L1 page table 2013 * (not considering sections). Except for two possible cases: 2014 * 2015 * (1) in the very beginning as long as pmap_growkernel() was 2016 * not called, it could be first unused KVA (which is not 2017 * rounded up to PTE1_SIZE), 2018 * 2019 * (2) when all KVA space is mapped and kernel_map->max_offset 2020 * address is not rounded up to PTE1_SIZE. (For example, 2021 * it could be 0xFFFFFFFF.) 2022 */ 2023 kernel_vm_end = pte1_roundup(kernel_vm_end); 2024 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 2025 addr = roundup2(addr, PTE1_SIZE); 2026 if (addr - 1 >= kernel_map->max_offset) 2027 addr = kernel_map->max_offset; 2028 while (kernel_vm_end < addr) { 2029 pte1 = pte1_load(kern_pte1(kernel_vm_end)); 2030 if (pte1_is_valid(pte1)) { 2031 kernel_vm_end += PTE1_SIZE; 2032 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 2033 kernel_vm_end = kernel_map->max_offset; 2034 break; 2035 } 2036 continue; 2037 } 2038 2039 /* 2040 * kernel_vm_end_new is used in pmap_pinit() when kernel 2041 * mappings are entered to new pmap all at once to avoid race 2042 * between pmap_kenter_pte1() and kernel_vm_end increase. 2043 * The same aplies to pmap_kenter_pt2tab(). 2044 */ 2045 kernel_vm_end_new = kernel_vm_end + PTE1_SIZE; 2046 2047 pte2 = pt2tab_load(kern_pt2tab_entry(kernel_vm_end)); 2048 if (!pte2_is_valid(pte2)) { 2049 /* 2050 * Install new PT2s page into kernel PT2TAB. 2051 */ 2052 m = vm_page_alloc(NULL, 2053 pte1_index(kernel_vm_end) & ~PT2PG_MASK, 2054 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 2055 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2056 if (m == NULL) 2057 panic("%s: no memory to grow kernel", __func__); 2058 /* 2059 * QQQ: To link all new L2 page tables from L1 page 2060 * table now and so pmap_kenter_pte1() them 2061 * at once together with pmap_kenter_pt2tab() 2062 * could be nice speed up. However, 2063 * pmap_growkernel() does not happen so often... 2064 * QQQ: The other TTBR is another option. 2065 */ 2066 pt2pg_pa = pmap_pt2pg_init(kernel_pmap, kernel_vm_end, 2067 m); 2068 } else 2069 pt2pg_pa = pte2_pa(pte2); 2070 2071 pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(kernel_vm_end)); 2072 pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa)); 2073 2074 kernel_vm_end = kernel_vm_end_new; 2075 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 2076 kernel_vm_end = kernel_map->max_offset; 2077 break; 2078 } 2079 } 2080} 2081 2082static int 2083kvm_size(SYSCTL_HANDLER_ARGS) 2084{ 2085 unsigned long ksize = vm_max_kernel_address - KERNBASE; 2086 2087 return (sysctl_handle_long(oidp, &ksize, 0, req)); 2088} 2089SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 2090 0, 0, kvm_size, "IU", "Size of KVM"); 2091 2092static int 2093kvm_free(SYSCTL_HANDLER_ARGS) 2094{ 2095 unsigned long kfree = vm_max_kernel_address - kernel_vm_end; 2096 2097 return (sysctl_handle_long(oidp, &kfree, 0, req)); 2098} 2099SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 2100 0, 0, kvm_free, "IU", "Amount of KVM free"); 2101 2102/*********************************************** 2103 * 2104 * Pmap allocation/deallocation routines. 2105 * 2106 ***********************************************/ 2107 2108/* 2109 * Initialize the pmap for the swapper process. 2110 */ 2111void 2112pmap_pinit0(pmap_t pmap) 2113{ 2114 PDEBUG(1, printf("%s: pmap = %p\n", __func__, pmap)); 2115 2116 PMAP_LOCK_INIT(pmap); 2117 2118 /* 2119 * Kernel page table directory and pmap stuff around is already 2120 * initialized, we are using it right now and here. So, finish 2121 * only PMAP structures initialization for process0 ... 2122 * 2123 * Since the L1 page table and PT2TAB is shared with the kernel pmap, 2124 * which is already included in the list "allpmaps", this pmap does 2125 * not need to be inserted into that list. 2126 */ 2127 pmap->pm_pt1 = kern_pt1; 2128 pmap->pm_pt2tab = kern_pt2tab; 2129 CPU_ZERO(&pmap->pm_active); 2130 PCPU_SET(curpmap, pmap); 2131 TAILQ_INIT(&pmap->pm_pvchunk); 2132 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 2133 CPU_SET(0, &pmap->pm_active); 2134} 2135 2136static __inline void 2137pte1_copy_nosync(pt1_entry_t *spte1p, pt1_entry_t *dpte1p, vm_offset_t sva, 2138 vm_offset_t eva) 2139{ 2140 u_int idx, count; 2141 2142 idx = pte1_index(sva); 2143 count = (pte1_index(eva) - idx + 1) * sizeof(pt1_entry_t); 2144 bcopy(spte1p + idx, dpte1p + idx, count); 2145} 2146 2147static __inline void 2148pt2tab_copy_nosync(pt2_entry_t *spte2p, pt2_entry_t *dpte2p, vm_offset_t sva, 2149 vm_offset_t eva) 2150{ 2151 u_int idx, count; 2152 2153 idx = pt2tab_index(sva); 2154 count = (pt2tab_index(eva) - idx + 1) * sizeof(pt2_entry_t); 2155 bcopy(spte2p + idx, dpte2p + idx, count); 2156} 2157 2158/* 2159 * Initialize a preallocated and zeroed pmap structure, 2160 * such as one in a vmspace structure. 2161 */ 2162int 2163pmap_pinit(pmap_t pmap) 2164{ 2165 pt1_entry_t *pte1p; 2166 pt2_entry_t *pte2p; 2167 vm_paddr_t pa, pt2tab_pa; 2168 u_int i; 2169 2170 PDEBUG(6, printf("%s: pmap = %p, pm_pt1 = %p\n", __func__, pmap, 2171 pmap->pm_pt1)); 2172 2173 /* 2174 * No need to allocate L2 page table space yet but we do need 2175 * a valid L1 page table and PT2TAB table. 2176 * 2177 * Install shared kernel mappings to these tables. It's a little 2178 * tricky as some parts of KVA are reserved for vectors, devices, 2179 * and whatever else. These parts are supposed to be above 2180 * vm_max_kernel_address. Thus two regions should be installed: 2181 * 2182 * (1) <KERNBASE, kernel_vm_end), 2183 * (2) <vm_max_kernel_address, 0xFFFFFFFF>. 2184 * 2185 * QQQ: The second region should be stable enough to be installed 2186 * only once in time when the tables are allocated. 2187 * QQQ: Maybe copy of both regions at once could be faster ... 2188 * QQQ: Maybe the other TTBR is an option. 2189 * 2190 * Finally, install own PT2TAB table to these tables. 2191 */ 2192 2193 if (pmap->pm_pt1 == NULL) { 2194 pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(kernel_arena, 2195 NB_IN_PT1, M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, 2196 pt_memattr); 2197 if (pmap->pm_pt1 == NULL) 2198 return (0); 2199 } 2200 if (pmap->pm_pt2tab == NULL) { 2201 /* 2202 * QQQ: (1) PT2TAB must be contiguous. If PT2TAB is one page 2203 * only, what should be the only size for 32 bit systems, 2204 * then we could allocate it with vm_page_alloc() and all 2205 * the stuff needed as other L2 page table pages. 2206 * (2) Note that a process PT2TAB is special L2 page table 2207 * page. Its mapping in kernel_arena is permanent and can 2208 * be used no matter which process is current. Its mapping 2209 * in PT2MAP can be used only for current process. 2210 */ 2211 pmap->pm_pt2tab = (pt2_entry_t *)kmem_alloc_attr(kernel_arena, 2212 NB_IN_PT2TAB, M_NOWAIT | M_ZERO, 0, -1UL, pt_memattr); 2213 if (pmap->pm_pt2tab == NULL) { 2214 /* 2215 * QQQ: As struct pmap is allocated from UMA with 2216 * UMA_ZONE_NOFREE flag, it's important to leave 2217 * no allocation in pmap if initialization failed. 2218 */ 2219 kmem_free(kernel_arena, (vm_offset_t)pmap->pm_pt1, 2220 NB_IN_PT1); 2221 pmap->pm_pt1 = NULL; 2222 return (0); 2223 } 2224 /* 2225 * QQQ: Each L2 page table page vm_page_t has pindex set to 2226 * pte1 index of virtual address mapped by this page. 2227 * It's not valid for non kernel PT2TABs themselves. 2228 * The pindex of these pages can not be altered because 2229 * of the way how they are allocated now. However, it 2230 * should not be a problem. 2231 */ 2232 } 2233 2234 mtx_lock_spin(&allpmaps_lock); 2235 /* 2236 * To avoid race with pmap_kenter_pte1() and pmap_kenter_pt2tab(), 2237 * kernel_vm_end_new is used here instead of kernel_vm_end. 2238 */ 2239 pte1_copy_nosync(kern_pt1, pmap->pm_pt1, KERNBASE, 2240 kernel_vm_end_new - 1); 2241 pte1_copy_nosync(kern_pt1, pmap->pm_pt1, vm_max_kernel_address, 2242 0xFFFFFFFF); 2243 pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, KERNBASE, 2244 kernel_vm_end_new - 1); 2245 pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, vm_max_kernel_address, 2246 0xFFFFFFFF); 2247 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 2248 mtx_unlock_spin(&allpmaps_lock); 2249 2250 /* 2251 * Store PT2MAP PT2 pages (a.k.a. PT2TAB) in PT2TAB itself. 2252 * I.e. self reference mapping. The PT2TAB is private, however mapped 2253 * into shared PT2MAP space, so the mapping should be not global. 2254 */ 2255 pt2tab_pa = vtophys(pmap->pm_pt2tab); 2256 pte2p = pmap_pt2tab_entry(pmap, (vm_offset_t)PT2MAP); 2257 for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) { 2258 pt2tab_store(pte2p++, PTE2_KPT_NG(pa)); 2259 } 2260 2261 /* Insert PT2MAP PT2s into pmap PT1. */ 2262 pte1p = pmap_pte1(pmap, (vm_offset_t)PT2MAP); 2263 for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) { 2264 pte1_store(pte1p++, PTE1_LINK(pa)); 2265 } 2266 2267 /* 2268 * Now synchronize new mapping which was made above. 2269 */ 2270 pte1_sync_range(pmap->pm_pt1, NB_IN_PT1); 2271 pte2_sync_range(pmap->pm_pt2tab, NB_IN_PT2TAB); 2272 2273 CPU_ZERO(&pmap->pm_active); 2274 TAILQ_INIT(&pmap->pm_pvchunk); 2275 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 2276 2277 return (1); 2278} 2279 2280#ifdef INVARIANTS 2281static boolean_t 2282pt2tab_user_is_empty(pt2_entry_t *tab) 2283{ 2284 u_int i, end; 2285 2286 end = pt2tab_index(VM_MAXUSER_ADDRESS); 2287 for (i = 0; i < end; i++) 2288 if (tab[i] != 0) return (FALSE); 2289 return (TRUE); 2290} 2291#endif 2292/* 2293 * Release any resources held by the given physical map. 2294 * Called when a pmap initialized by pmap_pinit is being released. 2295 * Should only be called if the map contains no valid mappings. 2296 */ 2297void 2298pmap_release(pmap_t pmap) 2299{ 2300#ifdef INVARIANTS 2301 vm_offset_t start, end; 2302#endif 2303 KASSERT(pmap->pm_stats.resident_count == 0, 2304 ("%s: pmap resident count %ld != 0", __func__, 2305 pmap->pm_stats.resident_count)); 2306 KASSERT(pt2tab_user_is_empty(pmap->pm_pt2tab), 2307 ("%s: has allocated user PT2(s)", __func__)); 2308 KASSERT(CPU_EMPTY(&pmap->pm_active), 2309 ("%s: pmap %p is active on some CPU(s)", __func__, pmap)); 2310 2311 mtx_lock_spin(&allpmaps_lock); 2312 LIST_REMOVE(pmap, pm_list); 2313 mtx_unlock_spin(&allpmaps_lock); 2314 2315#ifdef INVARIANTS 2316 start = pte1_index(KERNBASE) * sizeof(pt1_entry_t); 2317 end = (pte1_index(0xFFFFFFFF) + 1) * sizeof(pt1_entry_t); 2318 bzero((char *)pmap->pm_pt1 + start, end - start); 2319 2320 start = pt2tab_index(KERNBASE) * sizeof(pt2_entry_t); 2321 end = (pt2tab_index(0xFFFFFFFF) + 1) * sizeof(pt2_entry_t); 2322 bzero((char *)pmap->pm_pt2tab + start, end - start); 2323#endif 2324 /* 2325 * We are leaving PT1 and PT2TAB allocated on released pmap, 2326 * so hopefully UMA vmspace_zone will always be inited with 2327 * UMA_ZONE_NOFREE flag. 2328 */ 2329} 2330 2331/********************************************************* 2332 * 2333 * L2 table pages and their pages management routines. 2334 * 2335 *********************************************************/ 2336 2337/* 2338 * Virtual interface for L2 page table wire counting. 2339 * 2340 * Each L2 page table in a page has own counter which counts a number of 2341 * valid mappings in a table. Global page counter counts mappings in all 2342 * tables in a page plus a single itself mapping in PT2TAB. 2343 * 2344 * During a promotion we leave the associated L2 page table counter 2345 * untouched, so the table (strictly speaking a page which holds it) 2346 * is never freed if promoted. 2347 * 2348 * If a page m->wire_count == 1 then no valid mappings exist in any L2 page 2349 * table in the page and the page itself is only mapped in PT2TAB. 2350 */ 2351 2352static __inline void 2353pt2_wirecount_init(vm_page_t m) 2354{ 2355 u_int i; 2356 2357 /* 2358 * Note: A page m is allocated with VM_ALLOC_WIRED flag and 2359 * m->wire_count should be already set correctly. 2360 * So, there is no need to set it again herein. 2361 */ 2362 for (i = 0; i < NPT2_IN_PG; i++) 2363 m->md.pt2_wirecount[i] = 0; 2364} 2365 2366static __inline void 2367pt2_wirecount_inc(vm_page_t m, uint32_t pte1_idx) 2368{ 2369 2370 /* 2371 * Note: A just modificated pte2 (i.e. already allocated) 2372 * is acquiring one extra reference which must be 2373 * explicitly cleared. It influences the KASSERTs herein. 2374 * All L2 page tables in a page always belong to the same 2375 * pmap, so we allow only one extra reference for the page. 2376 */ 2377 KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] < (NPTE2_IN_PT2 + 1), 2378 ("%s: PT2 is overflowing ...", __func__)); 2379 KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), 2380 ("%s: PT2PG is overflowing ...", __func__)); 2381 2382 m->wire_count++; 2383 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]++; 2384} 2385 2386static __inline void 2387pt2_wirecount_dec(vm_page_t m, uint32_t pte1_idx) 2388{ 2389 2390 KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] != 0, 2391 ("%s: PT2 is underflowing ...", __func__)); 2392 KASSERT(m->wire_count > 1, 2393 ("%s: PT2PG is underflowing ...", __func__)); 2394 2395 m->wire_count--; 2396 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]--; 2397} 2398 2399static __inline void 2400pt2_wirecount_set(vm_page_t m, uint32_t pte1_idx, uint16_t count) 2401{ 2402 2403 KASSERT(count <= NPTE2_IN_PT2, 2404 ("%s: invalid count %u", __func__, count)); 2405 KASSERT(m->wire_count > m->md.pt2_wirecount[pte1_idx & PT2PG_MASK], 2406 ("%s: PT2PG corrupting (%u, %u) ...", __func__, m->wire_count, 2407 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK])); 2408 2409 m->wire_count -= m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]; 2410 m->wire_count += count; 2411 m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] = count; 2412 2413 KASSERT(m->wire_count <= (NPTE2_IN_PG + 1), 2414 ("%s: PT2PG is overflowed (%u) ...", __func__, m->wire_count)); 2415} 2416 2417static __inline uint32_t 2418pt2_wirecount_get(vm_page_t m, uint32_t pte1_idx) 2419{ 2420 2421 return (m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]); 2422} 2423 2424static __inline boolean_t 2425pt2_is_empty(vm_page_t m, vm_offset_t va) 2426{ 2427 2428 return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 0); 2429} 2430 2431static __inline boolean_t 2432pt2_is_full(vm_page_t m, vm_offset_t va) 2433{ 2434 2435 return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 2436 NPTE2_IN_PT2); 2437} 2438 2439static __inline boolean_t 2440pt2pg_is_empty(vm_page_t m) 2441{ 2442 2443 return (m->wire_count == 1); 2444} 2445 2446/* 2447 * This routine is called if the L2 page table 2448 * is not mapped correctly. 2449 */ 2450static vm_page_t 2451_pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) 2452{ 2453 uint32_t pte1_idx; 2454 pt1_entry_t *pte1p; 2455 pt2_entry_t pte2; 2456 vm_page_t m; 2457 vm_paddr_t pt2pg_pa, pt2_pa; 2458 2459 pte1_idx = pte1_index(va); 2460 pte1p = pmap->pm_pt1 + pte1_idx; 2461 2462 KASSERT(pte1_load(pte1p) == 0, 2463 ("%s: pm_pt1[%#x] is not zero: %#x", __func__, pte1_idx, 2464 pte1_load(pte1p))); 2465 2466 pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, va)); 2467 if (!pte2_is_valid(pte2)) { 2468 /* 2469 * Install new PT2s page into pmap PT2TAB. 2470 */ 2471 m = vm_page_alloc(NULL, pte1_idx & ~PT2PG_MASK, 2472 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2473 if (m == NULL) { 2474 if ((flags & PMAP_ENTER_NOSLEEP) == 0) { 2475 PMAP_UNLOCK(pmap); 2476 rw_wunlock(&pvh_global_lock); 2477 VM_WAIT; 2478 rw_wlock(&pvh_global_lock); 2479 PMAP_LOCK(pmap); 2480 } 2481 2482 /* 2483 * Indicate the need to retry. While waiting, 2484 * the L2 page table page may have been allocated. 2485 */ 2486 return (NULL); 2487 } 2488 pmap->pm_stats.resident_count++; 2489 pt2pg_pa = pmap_pt2pg_init(pmap, va, m); 2490 } else { 2491 pt2pg_pa = pte2_pa(pte2); 2492 m = PHYS_TO_VM_PAGE(pt2pg_pa); 2493 } 2494 2495 pt2_wirecount_inc(m, pte1_idx); 2496 pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); 2497 pte1_store(pte1p, PTE1_LINK(pt2_pa)); 2498 2499 return (m); 2500} 2501 2502static vm_page_t 2503pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags) 2504{ 2505 u_int pte1_idx; 2506 pt1_entry_t *pte1p, pte1; 2507 vm_page_t m; 2508 2509 pte1_idx = pte1_index(va); 2510retry: 2511 pte1p = pmap->pm_pt1 + pte1_idx; 2512 pte1 = pte1_load(pte1p); 2513 2514 /* 2515 * This supports switching from a 1MB page to a 2516 * normal 4K page. 2517 */ 2518 if (pte1_is_section(pte1)) { 2519 (void)pmap_demote_pte1(pmap, pte1p, va); 2520 /* 2521 * Reload pte1 after demotion. 2522 * 2523 * Note: Demotion can even fail as either PT2 is not find for 2524 * the virtual address or PT2PG can not be allocated. 2525 */ 2526 pte1 = pte1_load(pte1p); 2527 } 2528 2529 /* 2530 * If the L2 page table page is mapped, we just increment the 2531 * hold count, and activate it. 2532 */ 2533 if (pte1_is_link(pte1)) { 2534 m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 2535 pt2_wirecount_inc(m, pte1_idx); 2536 } else { 2537 /* 2538 * Here if the PT2 isn't mapped, or if it has 2539 * been deallocated. 2540 */ 2541 m = _pmap_allocpte2(pmap, va, flags); 2542 if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) 2543 goto retry; 2544 } 2545 2546 return (m); 2547} 2548 2549static __inline void 2550pmap_free_zero_pages(struct spglist *free) 2551{ 2552 vm_page_t m; 2553 2554 while ((m = SLIST_FIRST(free)) != NULL) { 2555 SLIST_REMOVE_HEAD(free, plinks.s.ss); 2556 /* Preserve the page's PG_ZERO setting. */ 2557 vm_page_free_toq(m); 2558 } 2559} 2560 2561/* 2562 * Schedule the specified unused L2 page table page to be freed. Specifically, 2563 * add the page to the specified list of pages that will be released to the 2564 * physical memory manager after the TLB has been updated. 2565 */ 2566static __inline void 2567pmap_add_delayed_free_list(vm_page_t m, struct spglist *free) 2568{ 2569 2570 /* 2571 * Put page on a list so that it is released after 2572 * *ALL* TLB shootdown is done 2573 */ 2574#ifdef PMAP_DEBUG 2575 pmap_zero_page_check(m); 2576#endif 2577 m->flags |= PG_ZERO; 2578 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 2579} 2580 2581/* 2582 * Unwire L2 page tables page. 2583 */ 2584static void 2585pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m) 2586{ 2587 pt1_entry_t *pte1p, opte1 __unused; 2588 pt2_entry_t *pte2p; 2589 uint32_t i; 2590 2591 KASSERT(pt2pg_is_empty(m), 2592 ("%s: pmap %p PT2PG %p wired", __func__, pmap, m)); 2593 2594 /* 2595 * Unmap all L2 page tables in the page from L1 page table. 2596 * 2597 * QQQ: Individual L2 page tables (except the last one) can be unmapped 2598 * earlier. However, we are doing that this way. 2599 */ 2600 KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), 2601 ("%s: pmap %p va %#x PT2PG %p bad index", __func__, pmap, va, m)); 2602 pte1p = pmap->pm_pt1 + m->pindex; 2603 for (i = 0; i < NPT2_IN_PG; i++, pte1p++) { 2604 KASSERT(m->md.pt2_wirecount[i] == 0, 2605 ("%s: pmap %p PT2 %u (PG %p) wired", __func__, pmap, i, m)); 2606 opte1 = pte1_load(pte1p); 2607 if (pte1_is_link(opte1)) { 2608 pte1_clear(pte1p); 2609 /* 2610 * Flush intermediate TLB cache. 2611 */ 2612 pmap_tlb_flush(pmap, (m->pindex + i) << PTE1_SHIFT); 2613 } 2614#ifdef INVARIANTS 2615 else 2616 KASSERT((opte1 == 0) || pte1_is_section(opte1), 2617 ("%s: pmap %p va %#x bad pte1 %x at %u", __func__, 2618 pmap, va, opte1, i)); 2619#endif 2620 } 2621 2622 /* 2623 * Unmap the page from PT2TAB. 2624 */ 2625 pte2p = pmap_pt2tab_entry(pmap, va); 2626 (void)pt2tab_load_clear(pte2p); 2627 pmap_tlb_flush(pmap, pt2map_pt2pg(va)); 2628 2629 m->wire_count = 0; 2630 pmap->pm_stats.resident_count--; 2631 2632 /* 2633 * This is a release store so that the ordinary store unmapping 2634 * the L2 page table page is globally performed before TLB shoot- 2635 * down is begun. 2636 */ 2637 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 2638} 2639 2640/* 2641 * Decrements a L2 page table page's wire count, which is used to record the 2642 * number of valid page table entries within the page. If the wire count 2643 * drops to zero, then the page table page is unmapped. Returns TRUE if the 2644 * page table page was unmapped and FALSE otherwise. 2645 */ 2646static __inline boolean_t 2647pmap_unwire_pt2(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 2648{ 2649 pt2_wirecount_dec(m, pte1_index(va)); 2650 if (pt2pg_is_empty(m)) { 2651 /* 2652 * QQQ: Wire count is zero, so whole page should be zero and 2653 * we can set PG_ZERO flag to it. 2654 * Note that when promotion is enabled, it takes some 2655 * more efforts. See pmap_unwire_pt2_all() below. 2656 */ 2657 pmap_unwire_pt2pg(pmap, va, m); 2658 pmap_add_delayed_free_list(m, free); 2659 return (TRUE); 2660 } else 2661 return (FALSE); 2662} 2663 2664/* 2665 * Drop a L2 page table page's wire count at once, which is used to record 2666 * the number of valid L2 page table entries within the page. If the wire 2667 * count drops to zero, then the L2 page table page is unmapped. 2668 */ 2669static __inline void 2670pmap_unwire_pt2_all(pmap_t pmap, vm_offset_t va, vm_page_t m, 2671 struct spglist *free) 2672{ 2673 u_int pte1_idx = pte1_index(va); 2674 2675 KASSERT(m->pindex == (pte1_idx & ~PT2PG_MASK), 2676 ("%s: PT2 page's pindex is wrong", __func__)); 2677 KASSERT(m->wire_count > pt2_wirecount_get(m, pte1_idx), 2678 ("%s: bad pt2 wire count %u > %u", __func__, m->wire_count, 2679 pt2_wirecount_get(m, pte1_idx))); 2680 2681 /* 2682 * It's possible that the L2 page table was never used. 2683 * It happened in case that a section was created without promotion. 2684 */ 2685 if (pt2_is_full(m, va)) { 2686 pt2_wirecount_set(m, pte1_idx, 0); 2687 2688 /* 2689 * QQQ: We clear L2 page table now, so when L2 page table page 2690 * is going to be freed, we can set it PG_ZERO flag ... 2691 * This function is called only on section mappings, so 2692 * hopefully it's not to big overload. 2693 * 2694 * XXX: If pmap is current, existing PT2MAP mapping could be 2695 * used for zeroing. 2696 */ 2697 pmap_zero_page_area(m, page_pt2off(pte1_idx), NB_IN_PT2); 2698 } 2699#ifdef INVARIANTS 2700 else 2701 KASSERT(pt2_is_empty(m, va), ("%s: PT2 is not empty (%u)", 2702 __func__, pt2_wirecount_get(m, pte1_idx))); 2703#endif 2704 if (pt2pg_is_empty(m)) { 2705 pmap_unwire_pt2pg(pmap, va, m); 2706 pmap_add_delayed_free_list(m, free); 2707 } 2708} 2709 2710/* 2711 * After removing a L2 page table entry, this routine is used to 2712 * conditionally free the page, and manage the hold/wire counts. 2713 */ 2714static boolean_t 2715pmap_unuse_pt2(pmap_t pmap, vm_offset_t va, struct spglist *free) 2716{ 2717 pt1_entry_t pte1; 2718 vm_page_t mpte; 2719 2720 if (va >= VM_MAXUSER_ADDRESS) 2721 return (FALSE); 2722 pte1 = pte1_load(pmap_pte1(pmap, va)); 2723 mpte = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 2724 return (pmap_unwire_pt2(pmap, va, mpte, free)); 2725} 2726 2727/************************************* 2728 * 2729 * Page management routines. 2730 * 2731 *************************************/ 2732 2733CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 2734CTASSERT(_NPCM == 11); 2735CTASSERT(_NPCPV == 336); 2736 2737static __inline struct pv_chunk * 2738pv_to_chunk(pv_entry_t pv) 2739{ 2740 2741 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 2742} 2743 2744#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 2745 2746#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 2747#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 2748 2749static const uint32_t pc_freemask[_NPCM] = { 2750 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2751 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2752 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2753 PC_FREE0_9, PC_FREE10 2754}; 2755 2756SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 2757 "Current number of pv entries"); 2758 2759#ifdef PV_STATS 2760static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 2761 2762SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 2763 "Current number of pv entry chunks"); 2764SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 2765 "Current number of pv entry chunks allocated"); 2766SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 2767 "Current number of pv entry chunks frees"); 2768SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 2769 0, "Number of times tried to get a chunk page but failed."); 2770 2771static long pv_entry_frees, pv_entry_allocs; 2772static int pv_entry_spare; 2773 2774SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 2775 "Current number of pv entry frees"); 2776SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 2777 0, "Current number of pv entry allocs"); 2778SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 2779 "Current number of spare pv entries"); 2780#endif 2781 2782/* 2783 * Is given page managed? 2784 */ 2785static __inline bool 2786is_managed(vm_paddr_t pa) 2787{ 2788 vm_page_t m; 2789 2790 m = PHYS_TO_VM_PAGE(pa); 2791 if (m == NULL) 2792 return (false); 2793 return ((m->oflags & VPO_UNMANAGED) == 0); 2794} 2795 2796static __inline bool 2797pte1_is_managed(pt1_entry_t pte1) 2798{ 2799 2800 return (is_managed(pte1_pa(pte1))); 2801} 2802 2803static __inline bool 2804pte2_is_managed(pt2_entry_t pte2) 2805{ 2806 2807 return (is_managed(pte2_pa(pte2))); 2808} 2809 2810/* 2811 * We are in a serious low memory condition. Resort to 2812 * drastic measures to free some pages so we can allocate 2813 * another pv entry chunk. 2814 */ 2815static vm_page_t 2816pmap_pv_reclaim(pmap_t locked_pmap) 2817{ 2818 struct pch newtail; 2819 struct pv_chunk *pc; 2820 struct md_page *pvh; 2821 pt1_entry_t *pte1p; 2822 pmap_t pmap; 2823 pt2_entry_t *pte2p, tpte2; 2824 pv_entry_t pv; 2825 vm_offset_t va; 2826 vm_page_t m, m_pc; 2827 struct spglist free; 2828 uint32_t inuse; 2829 int bit, field, freed; 2830 2831 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 2832 pmap = NULL; 2833 m_pc = NULL; 2834 SLIST_INIT(&free); 2835 TAILQ_INIT(&newtail); 2836 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 2837 SLIST_EMPTY(&free))) { 2838 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2839 if (pmap != pc->pc_pmap) { 2840 if (pmap != NULL) { 2841 if (pmap != locked_pmap) 2842 PMAP_UNLOCK(pmap); 2843 } 2844 pmap = pc->pc_pmap; 2845 /* Avoid deadlock and lock recursion. */ 2846 if (pmap > locked_pmap) 2847 PMAP_LOCK(pmap); 2848 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 2849 pmap = NULL; 2850 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2851 continue; 2852 } 2853 } 2854 2855 /* 2856 * Destroy every non-wired, 4 KB page mapping in the chunk. 2857 */ 2858 freed = 0; 2859 for (field = 0; field < _NPCM; field++) { 2860 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 2861 inuse != 0; inuse &= ~(1UL << bit)) { 2862 bit = ffs(inuse) - 1; 2863 pv = &pc->pc_pventry[field * 32 + bit]; 2864 va = pv->pv_va; 2865 pte1p = pmap_pte1(pmap, va); 2866 if (pte1_is_section(pte1_load(pte1p))) 2867 continue; 2868 pte2p = pmap_pte2(pmap, va); 2869 tpte2 = pte2_load(pte2p); 2870 if ((tpte2 & PTE2_W) == 0) 2871 tpte2 = pte2_load_clear(pte2p); 2872 pmap_pte2_release(pte2p); 2873 if ((tpte2 & PTE2_W) != 0) 2874 continue; 2875 KASSERT(tpte2 != 0, 2876 ("pmap_pv_reclaim: pmap %p va %#x zero pte", 2877 pmap, va)); 2878 pmap_tlb_flush(pmap, va); 2879 m = PHYS_TO_VM_PAGE(pte2_pa(tpte2)); 2880 if (pte2_is_dirty(tpte2)) 2881 vm_page_dirty(m); 2882 if ((tpte2 & PTE2_A) != 0) 2883 vm_page_aflag_set(m, PGA_REFERENCED); 2884 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2885 if (TAILQ_EMPTY(&m->md.pv_list) && 2886 (m->flags & PG_FICTITIOUS) == 0) { 2887 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2888 if (TAILQ_EMPTY(&pvh->pv_list)) { 2889 vm_page_aflag_clear(m, 2890 PGA_WRITEABLE); 2891 } 2892 } 2893 pc->pc_map[field] |= 1UL << bit; 2894 pmap_unuse_pt2(pmap, va, &free); 2895 freed++; 2896 } 2897 } 2898 if (freed == 0) { 2899 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2900 continue; 2901 } 2902 /* Every freed mapping is for a 4 KB page. */ 2903 pmap->pm_stats.resident_count -= freed; 2904 PV_STAT(pv_entry_frees += freed); 2905 PV_STAT(pv_entry_spare += freed); 2906 pv_entry_count -= freed; 2907 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2908 for (field = 0; field < _NPCM; field++) 2909 if (pc->pc_map[field] != pc_freemask[field]) { 2910 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2911 pc_list); 2912 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2913 2914 /* 2915 * One freed pv entry in locked_pmap is 2916 * sufficient. 2917 */ 2918 if (pmap == locked_pmap) 2919 goto out; 2920 break; 2921 } 2922 if (field == _NPCM) { 2923 PV_STAT(pv_entry_spare -= _NPCPV); 2924 PV_STAT(pc_chunk_count--); 2925 PV_STAT(pc_chunk_frees++); 2926 /* Entire chunk is free; return it. */ 2927 m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2928 pmap_qremove((vm_offset_t)pc, 1); 2929 pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); 2930 break; 2931 } 2932 } 2933out: 2934 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 2935 if (pmap != NULL) { 2936 if (pmap != locked_pmap) 2937 PMAP_UNLOCK(pmap); 2938 } 2939 if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { 2940 m_pc = SLIST_FIRST(&free); 2941 SLIST_REMOVE_HEAD(&free, plinks.s.ss); 2942 /* Recycle a freed page table page. */ 2943 m_pc->wire_count = 1; 2944 atomic_add_int(&vm_cnt.v_wire_count, 1); 2945 } 2946 pmap_free_zero_pages(&free); 2947 return (m_pc); 2948} 2949 2950static void 2951free_pv_chunk(struct pv_chunk *pc) 2952{ 2953 vm_page_t m; 2954 2955 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2956 PV_STAT(pv_entry_spare -= _NPCPV); 2957 PV_STAT(pc_chunk_count--); 2958 PV_STAT(pc_chunk_frees++); 2959 /* entire chunk is free, return it */ 2960 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2961 pmap_qremove((vm_offset_t)pc, 1); 2962 vm_page_unwire(m, PQ_NONE); 2963 vm_page_free(m); 2964 pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc); 2965} 2966 2967/* 2968 * Free the pv_entry back to the free list. 2969 */ 2970static void 2971free_pv_entry(pmap_t pmap, pv_entry_t pv) 2972{ 2973 struct pv_chunk *pc; 2974 int idx, field, bit; 2975 2976 rw_assert(&pvh_global_lock, RA_WLOCKED); 2977 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2978 PV_STAT(pv_entry_frees++); 2979 PV_STAT(pv_entry_spare++); 2980 pv_entry_count--; 2981 pc = pv_to_chunk(pv); 2982 idx = pv - &pc->pc_pventry[0]; 2983 field = idx / 32; 2984 bit = idx % 32; 2985 pc->pc_map[field] |= 1ul << bit; 2986 for (idx = 0; idx < _NPCM; idx++) 2987 if (pc->pc_map[idx] != pc_freemask[idx]) { 2988 /* 2989 * 98% of the time, pc is already at the head of the 2990 * list. If it isn't already, move it to the head. 2991 */ 2992 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 2993 pc)) { 2994 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2995 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2996 pc_list); 2997 } 2998 return; 2999 } 3000 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3001 free_pv_chunk(pc); 3002} 3003 3004/* 3005 * Get a new pv_entry, allocating a block from the system 3006 * when needed. 3007 */ 3008static pv_entry_t 3009get_pv_entry(pmap_t pmap, boolean_t try) 3010{ 3011 static const struct timeval printinterval = { 60, 0 }; 3012 static struct timeval lastprint; 3013 int bit, field; 3014 pv_entry_t pv; 3015 struct pv_chunk *pc; 3016 vm_page_t m; 3017 3018 rw_assert(&pvh_global_lock, RA_WLOCKED); 3019 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3020 PV_STAT(pv_entry_allocs++); 3021 pv_entry_count++; 3022 if (pv_entry_count > pv_entry_high_water) 3023 if (ratecheck(&lastprint, &printinterval)) 3024 printf("Approaching the limit on PV entries, consider " 3025 "increasing either the vm.pmap.shpgperproc or the " 3026 "vm.pmap.pv_entry_max tunable.\n"); 3027retry: 3028 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 3029 if (pc != NULL) { 3030 for (field = 0; field < _NPCM; field++) { 3031 if (pc->pc_map[field]) { 3032 bit = ffs(pc->pc_map[field]) - 1; 3033 break; 3034 } 3035 } 3036 if (field < _NPCM) { 3037 pv = &pc->pc_pventry[field * 32 + bit]; 3038 pc->pc_map[field] &= ~(1ul << bit); 3039 /* If this was the last item, move it to tail */ 3040 for (field = 0; field < _NPCM; field++) 3041 if (pc->pc_map[field] != 0) { 3042 PV_STAT(pv_entry_spare--); 3043 return (pv); /* not full, return */ 3044 } 3045 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3046 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 3047 PV_STAT(pv_entry_spare--); 3048 return (pv); 3049 } 3050 } 3051 /* 3052 * Access to the pte2list "pv_vafree" is synchronized by the pvh 3053 * global lock. If "pv_vafree" is currently non-empty, it will 3054 * remain non-empty until pmap_pte2list_alloc() completes. 3055 */ 3056 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 3057 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 3058 if (try) { 3059 pv_entry_count--; 3060 PV_STAT(pc_chunk_tryfail++); 3061 return (NULL); 3062 } 3063 m = pmap_pv_reclaim(pmap); 3064 if (m == NULL) 3065 goto retry; 3066 } 3067 PV_STAT(pc_chunk_count++); 3068 PV_STAT(pc_chunk_allocs++); 3069 pc = (struct pv_chunk *)pmap_pte2list_alloc(&pv_vafree); 3070 pmap_qenter((vm_offset_t)pc, &m, 1); 3071 pc->pc_pmap = pmap; 3072 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 3073 for (field = 1; field < _NPCM; field++) 3074 pc->pc_map[field] = pc_freemask[field]; 3075 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 3076 pv = &pc->pc_pventry[0]; 3077 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 3078 PV_STAT(pv_entry_spare += _NPCPV - 1); 3079 return (pv); 3080} 3081 3082/* 3083 * Create a pv entry for page at pa for 3084 * (pmap, va). 3085 */ 3086static void 3087pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 3088{ 3089 pv_entry_t pv; 3090 3091 rw_assert(&pvh_global_lock, RA_WLOCKED); 3092 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3093 pv = get_pv_entry(pmap, FALSE); 3094 pv->pv_va = va; 3095 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3096} 3097 3098static __inline pv_entry_t 3099pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3100{ 3101 pv_entry_t pv; 3102 3103 rw_assert(&pvh_global_lock, RA_WLOCKED); 3104 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3105 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 3106 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 3107 break; 3108 } 3109 } 3110 return (pv); 3111} 3112 3113static void 3114pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3115{ 3116 pv_entry_t pv; 3117 3118 pv = pmap_pvh_remove(pvh, pmap, va); 3119 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 3120 free_pv_entry(pmap, pv); 3121} 3122 3123static void 3124pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 3125{ 3126 struct md_page *pvh; 3127 3128 rw_assert(&pvh_global_lock, RA_WLOCKED); 3129 pmap_pvh_free(&m->md, pmap, va); 3130 if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { 3131 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3132 if (TAILQ_EMPTY(&pvh->pv_list)) 3133 vm_page_aflag_clear(m, PGA_WRITEABLE); 3134 } 3135} 3136 3137static void 3138pmap_pv_demote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3139{ 3140 struct md_page *pvh; 3141 pv_entry_t pv; 3142 vm_offset_t va_last; 3143 vm_page_t m; 3144 3145 rw_assert(&pvh_global_lock, RA_WLOCKED); 3146 KASSERT((pa & PTE1_OFFSET) == 0, 3147 ("pmap_pv_demote_pte1: pa is not 1mpage aligned")); 3148 3149 /* 3150 * Transfer the 1mpage's pv entry for this mapping to the first 3151 * page's pv list. 3152 */ 3153 pvh = pa_to_pvh(pa); 3154 va = pte1_trunc(va); 3155 pv = pmap_pvh_remove(pvh, pmap, va); 3156 KASSERT(pv != NULL, ("pmap_pv_demote_pte1: pv not found")); 3157 m = PHYS_TO_VM_PAGE(pa); 3158 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3159 /* Instantiate the remaining NPTE2_IN_PT2 - 1 pv entries. */ 3160 va_last = va + PTE1_SIZE - PAGE_SIZE; 3161 do { 3162 m++; 3163 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3164 ("pmap_pv_demote_pte1: page %p is not managed", m)); 3165 va += PAGE_SIZE; 3166 pmap_insert_entry(pmap, va, m); 3167 } while (va < va_last); 3168} 3169 3170static void 3171pmap_pv_promote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3172{ 3173 struct md_page *pvh; 3174 pv_entry_t pv; 3175 vm_offset_t va_last; 3176 vm_page_t m; 3177 3178 rw_assert(&pvh_global_lock, RA_WLOCKED); 3179 KASSERT((pa & PTE1_OFFSET) == 0, 3180 ("pmap_pv_promote_pte1: pa is not 1mpage aligned")); 3181 3182 /* 3183 * Transfer the first page's pv entry for this mapping to the 3184 * 1mpage's pv list. Aside from avoiding the cost of a call 3185 * to get_pv_entry(), a transfer avoids the possibility that 3186 * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() 3187 * removes one of the mappings that is being promoted. 3188 */ 3189 m = PHYS_TO_VM_PAGE(pa); 3190 va = pte1_trunc(va); 3191 pv = pmap_pvh_remove(&m->md, pmap, va); 3192 KASSERT(pv != NULL, ("pmap_pv_promote_pte1: pv not found")); 3193 pvh = pa_to_pvh(pa); 3194 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 3195 /* Free the remaining NPTE2_IN_PT2 - 1 pv entries. */ 3196 va_last = va + PTE1_SIZE - PAGE_SIZE; 3197 do { 3198 m++; 3199 va += PAGE_SIZE; 3200 pmap_pvh_free(&m->md, pmap, va); 3201 } while (va < va_last); 3202} 3203 3204/* 3205 * Conditionally create a pv entry. 3206 */ 3207static boolean_t 3208pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 3209{ 3210 pv_entry_t pv; 3211 3212 rw_assert(&pvh_global_lock, RA_WLOCKED); 3213 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3214 if (pv_entry_count < pv_entry_high_water && 3215 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 3216 pv->pv_va = va; 3217 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3218 return (TRUE); 3219 } else 3220 return (FALSE); 3221} 3222 3223/* 3224 * Create the pv entries for each of the pages within a section. 3225 */ 3226static boolean_t 3227pmap_pv_insert_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3228{ 3229 struct md_page *pvh; 3230 pv_entry_t pv; 3231 3232 rw_assert(&pvh_global_lock, RA_WLOCKED); 3233 if (pv_entry_count < pv_entry_high_water && 3234 (pv = get_pv_entry(pmap, TRUE)) != NULL) { 3235 pv->pv_va = va; 3236 pvh = pa_to_pvh(pa); 3237 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 3238 return (TRUE); 3239 } else 3240 return (FALSE); 3241} 3242 3243static inline void 3244pmap_tlb_flush_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t npte1) 3245{ 3246 3247 /* Kill all the small mappings or the big one only. */ 3248 if (pte1_is_section(npte1)) 3249 pmap_tlb_flush_range(pmap, pte1_trunc(va), PTE1_SIZE); 3250 else 3251 pmap_tlb_flush(pmap, pte1_trunc(va)); 3252} 3253 3254/* 3255 * Update kernel pte1 on all pmaps. 3256 * 3257 * The following function is called only on one cpu with disabled interrupts. 3258 * In SMP case, smp_rendezvous_cpus() is used to stop other cpus. This way 3259 * nobody can invoke explicit hardware table walk during the update of pte1. 3260 * Unsolicited hardware table walk can still happen, invoked by speculative 3261 * data or instruction prefetch or even by speculative hardware table walk. 3262 * 3263 * The break-before-make approach should be implemented here. However, it's 3264 * not so easy to do that for kernel mappings as it would be unhappy to unmap 3265 * itself unexpectedly but voluntarily. 3266 */ 3267static void 3268pmap_update_pte1_kernel(vm_offset_t va, pt1_entry_t npte1) 3269{ 3270 pmap_t pmap; 3271 pt1_entry_t *pte1p; 3272 3273 /* 3274 * Get current pmap. Interrupts should be disabled here 3275 * so PCPU_GET() is done atomically. 3276 */ 3277 pmap = PCPU_GET(curpmap); 3278 if (pmap == NULL) 3279 pmap = kernel_pmap; 3280 3281 /* 3282 * (1) Change pte1 on current pmap. 3283 * (2) Flush all obsolete TLB entries on current CPU. 3284 * (3) Change pte1 on all pmaps. 3285 * (4) Flush all obsolete TLB entries on all CPUs in SMP case. 3286 */ 3287 3288 pte1p = pmap_pte1(pmap, va); 3289 pte1_store(pte1p, npte1); 3290 3291 /* Kill all the small mappings or the big one only. */ 3292 if (pte1_is_section(npte1)) { 3293 pmap_pte1_kern_promotions++; 3294 tlb_flush_range_local(pte1_trunc(va), PTE1_SIZE); 3295 } else { 3296 pmap_pte1_kern_demotions++; 3297 tlb_flush_local(pte1_trunc(va)); 3298 } 3299 3300 /* 3301 * In SMP case, this function is called when all cpus are at smp 3302 * rendezvous, so there is no need to use 'allpmaps_lock' lock here. 3303 * In UP case, the function is called with this lock locked. 3304 */ 3305 LIST_FOREACH(pmap, &allpmaps, pm_list) { 3306 pte1p = pmap_pte1(pmap, va); 3307 pte1_store(pte1p, npte1); 3308 } 3309 3310#ifdef SMP 3311 /* Kill all the small mappings or the big one only. */ 3312 if (pte1_is_section(npte1)) 3313 tlb_flush_range(pte1_trunc(va), PTE1_SIZE); 3314 else 3315 tlb_flush(pte1_trunc(va)); 3316#endif 3317} 3318 3319#ifdef SMP 3320struct pte1_action { 3321 vm_offset_t va; 3322 pt1_entry_t npte1; 3323 u_int update; /* CPU that updates the PTE1 */ 3324}; 3325 3326static void 3327pmap_update_pte1_action(void *arg) 3328{ 3329 struct pte1_action *act = arg; 3330 3331 if (act->update == PCPU_GET(cpuid)) 3332 pmap_update_pte1_kernel(act->va, act->npte1); 3333} 3334 3335/* 3336 * Change pte1 on current pmap. 3337 * Note that kernel pte1 must be changed on all pmaps. 3338 * 3339 * According to the architecture reference manual published by ARM, 3340 * the behaviour is UNPREDICTABLE when two or more TLB entries map the same VA. 3341 * According to this manual, UNPREDICTABLE behaviours must never happen in 3342 * a viable system. In contrast, on x86 processors, it is not specified which 3343 * TLB entry mapping the virtual address will be used, but the MMU doesn't 3344 * generate a bogus translation the way it does on Cortex-A8 rev 2 (Beaglebone 3345 * Black). 3346 * 3347 * It's a problem when either promotion or demotion is being done. The pte1 3348 * update and appropriate TLB flush must be done atomically in general. 3349 */ 3350static void 3351pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va, 3352 pt1_entry_t npte1) 3353{ 3354 3355 if (pmap == kernel_pmap) { 3356 struct pte1_action act; 3357 3358 sched_pin(); 3359 act.va = va; 3360 act.npte1 = npte1; 3361 act.update = PCPU_GET(cpuid); 3362 smp_rendezvous_cpus(all_cpus, smp_no_rendevous_barrier, 3363 pmap_update_pte1_action, NULL, &act); 3364 sched_unpin(); 3365 } else { 3366 register_t cspr; 3367 3368 /* 3369 * Use break-before-make approach for changing userland 3370 * mappings. It can cause L1 translation aborts on other 3371 * cores in SMP case. So, special treatment is implemented 3372 * in pmap_fault(). To reduce the likelihood that another core 3373 * will be affected by the broken mapping, disable interrupts 3374 * until the mapping change is completed. 3375 */ 3376 cspr = disable_interrupts(PSR_I | PSR_F); 3377 pte1_clear(pte1p); 3378 pmap_tlb_flush_pte1(pmap, va, npte1); 3379 pte1_store(pte1p, npte1); 3380 restore_interrupts(cspr); 3381 } 3382} 3383#else 3384static void 3385pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va, 3386 pt1_entry_t npte1) 3387{ 3388 3389 if (pmap == kernel_pmap) { 3390 mtx_lock_spin(&allpmaps_lock); 3391 pmap_update_pte1_kernel(va, npte1); 3392 mtx_unlock_spin(&allpmaps_lock); 3393 } else { 3394 register_t cspr; 3395 3396 /* 3397 * Use break-before-make approach for changing userland 3398 * mappings. It's absolutely safe in UP case when interrupts 3399 * are disabled. 3400 */ 3401 cspr = disable_interrupts(PSR_I | PSR_F); 3402 pte1_clear(pte1p); 3403 pmap_tlb_flush_pte1(pmap, va, npte1); 3404 pte1_store(pte1p, npte1); 3405 restore_interrupts(cspr); 3406 } 3407} 3408#endif 3409 3410/* 3411 * Tries to promote the NPTE2_IN_PT2, contiguous 4KB page mappings that are 3412 * within a single page table page (PT2) to a single 1MB page mapping. 3413 * For promotion to occur, two conditions must be met: (1) the 4KB page 3414 * mappings must map aligned, contiguous physical memory and (2) the 4KB page 3415 * mappings must have identical characteristics. 3416 * 3417 * Managed (PG_MANAGED) mappings within the kernel address space are not 3418 * promoted. The reason is that kernel PTE1s are replicated in each pmap but 3419 * pmap_remove_write(), pmap_clear_modify(), and pmap_clear_reference() only 3420 * read the PTE1 from the kernel pmap. 3421 */ 3422static void 3423pmap_promote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) 3424{ 3425 pt1_entry_t npte1; 3426 pt2_entry_t *fpte2p, fpte2, fpte2_fav; 3427 pt2_entry_t *pte2p, pte2; 3428 vm_offset_t pteva __unused; 3429 vm_page_t m __unused; 3430 3431 PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, 3432 pmap, va, pte1_load(pte1p), pte1p)); 3433 3434 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3435 3436 /* 3437 * Examine the first PTE2 in the specified PT2. Abort if this PTE2 is 3438 * either invalid, unused, or does not map the first 4KB physical page 3439 * within a 1MB page. 3440 */ 3441 fpte2p = pmap_pte2_quick(pmap, pte1_trunc(va)); 3442 fpte2 = pte2_load(fpte2p); 3443 if ((fpte2 & ((PTE2_FRAME & PTE1_OFFSET) | PTE2_A | PTE2_V)) != 3444 (PTE2_A | PTE2_V)) { 3445 pmap_pte1_p_failures++; 3446 CTR3(KTR_PMAP, "%s: failure(1) for va %#x in pmap %p", 3447 __func__, va, pmap); 3448 return; 3449 } 3450 if (pte2_is_managed(fpte2) && pmap == kernel_pmap) { 3451 pmap_pte1_p_failures++; 3452 CTR3(KTR_PMAP, "%s: failure(2) for va %#x in pmap %p", 3453 __func__, va, pmap); 3454 return; 3455 } 3456 if ((fpte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { 3457 /* 3458 * When page is not modified, PTE2_RO can be set without 3459 * a TLB invalidation. 3460 */ 3461 fpte2 |= PTE2_RO; 3462 pte2_store(fpte2p, fpte2); 3463 } 3464 3465 /* 3466 * Examine each of the other PTE2s in the specified PT2. Abort if this 3467 * PTE2 maps an unexpected 4KB physical page or does not have identical 3468 * characteristics to the first PTE2. 3469 */ 3470 fpte2_fav = (fpte2 & (PTE2_FRAME | PTE2_A | PTE2_V)); 3471 fpte2_fav += PTE1_SIZE - PTE2_SIZE; /* examine from the end */ 3472 for (pte2p = fpte2p + NPTE2_IN_PT2 - 1; pte2p > fpte2p; pte2p--) { 3473 pte2 = pte2_load(pte2p); 3474 if ((pte2 & (PTE2_FRAME | PTE2_A | PTE2_V)) != fpte2_fav) { 3475 pmap_pte1_p_failures++; 3476 CTR3(KTR_PMAP, "%s: failure(3) for va %#x in pmap %p", 3477 __func__, va, pmap); 3478 return; 3479 } 3480 if ((pte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) { 3481 /* 3482 * When page is not modified, PTE2_RO can be set 3483 * without a TLB invalidation. See note above. 3484 */ 3485 pte2 |= PTE2_RO; 3486 pte2_store(pte2p, pte2); 3487 pteva = pte1_trunc(va) | (pte2 & PTE1_OFFSET & 3488 PTE2_FRAME); 3489 CTR3(KTR_PMAP, "%s: protect for va %#x in pmap %p", 3490 __func__, pteva, pmap); 3491 } 3492 if ((pte2 & PTE2_PROMOTE) != (fpte2 & PTE2_PROMOTE)) { 3493 pmap_pte1_p_failures++; 3494 CTR3(KTR_PMAP, "%s: failure(4) for va %#x in pmap %p", 3495 __func__, va, pmap); 3496 return; 3497 } 3498 3499 fpte2_fav -= PTE2_SIZE; 3500 } 3501 /* 3502 * The page table page in its current state will stay in PT2TAB 3503 * until the PTE1 mapping the section is demoted by pmap_demote_pte1() 3504 * or destroyed by pmap_remove_pte1(). 3505 * 3506 * Note that L2 page table size is not equal to PAGE_SIZE. 3507 */ 3508 m = PHYS_TO_VM_PAGE(trunc_page(pte1_link_pa(pte1_load(pte1p)))); 3509 KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size], 3510 ("%s: PT2 page is out of range", __func__)); 3511 KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK), 3512 ("%s: PT2 page's pindex is wrong", __func__)); 3513 3514 /* 3515 * Get pte1 from pte2 format. 3516 */ 3517 npte1 = (fpte2 & PTE1_FRAME) | ATTR_TO_L1(fpte2) | PTE1_V; 3518 3519 /* 3520 * Promote the pv entries. 3521 */ 3522 if (pte2_is_managed(fpte2)) 3523 pmap_pv_promote_pte1(pmap, va, pte1_pa(npte1)); 3524 3525 /* 3526 * Promote the mappings. 3527 */ 3528 pmap_change_pte1(pmap, pte1p, va, npte1); 3529 3530 pmap_pte1_promotions++; 3531 CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", 3532 __func__, va, pmap); 3533 3534 PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", 3535 __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); 3536} 3537 3538/* 3539 * Zero L2 page table page. 3540 */ 3541static __inline void 3542pmap_clear_pt2(pt2_entry_t *fpte2p) 3543{ 3544 pt2_entry_t *pte2p; 3545 3546 for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) 3547 pte2_clear(pte2p); 3548 3549} 3550 3551/* 3552 * Removes a 1MB page mapping from the kernel pmap. 3553 */ 3554static void 3555pmap_remove_kernel_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) 3556{ 3557 vm_page_t m; 3558 uint32_t pte1_idx; 3559 pt2_entry_t *fpte2p; 3560 vm_paddr_t pt2_pa; 3561 3562 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3563 m = pmap_pt2_page(pmap, va); 3564 if (m == NULL) 3565 /* 3566 * QQQ: Is this function called only on promoted pte1? 3567 * We certainly do section mappings directly 3568 * (without promotion) in kernel !!! 3569 */ 3570 panic("%s: missing pt2 page", __func__); 3571 3572 pte1_idx = pte1_index(va); 3573 3574 /* 3575 * Initialize the L2 page table. 3576 */ 3577 fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); 3578 pmap_clear_pt2(fpte2p); 3579 3580 /* 3581 * Remove the mapping. 3582 */ 3583 pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(m), pte1_idx); 3584 pmap_kenter_pte1(va, PTE1_LINK(pt2_pa)); 3585 3586 /* 3587 * QQQ: We do not need to invalidate PT2MAP mapping 3588 * as we did not change it. I.e. the L2 page table page 3589 * was and still is mapped the same way. 3590 */ 3591} 3592 3593/* 3594 * Do the things to unmap a section in a process 3595 */ 3596static void 3597pmap_remove_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, 3598 struct spglist *free) 3599{ 3600 pt1_entry_t opte1; 3601 struct md_page *pvh; 3602 vm_offset_t eva, va; 3603 vm_page_t m; 3604 3605 PDEBUG(6, printf("%s(%p): va %#x pte1 %#x at %p\n", __func__, pmap, sva, 3606 pte1_load(pte1p), pte1p)); 3607 3608 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3609 KASSERT((sva & PTE1_OFFSET) == 0, 3610 ("%s: sva is not 1mpage aligned", __func__)); 3611 3612 /* 3613 * Clear and invalidate the mapping. It should occupy one and only TLB 3614 * entry. So, pmap_tlb_flush() called with aligned address should be 3615 * sufficient. 3616 */ 3617 opte1 = pte1_load_clear(pte1p); 3618 pmap_tlb_flush(pmap, sva); 3619 3620 if (pte1_is_wired(opte1)) 3621 pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE; 3622 pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; 3623 if (pte1_is_managed(opte1)) { 3624 pvh = pa_to_pvh(pte1_pa(opte1)); 3625 pmap_pvh_free(pvh, pmap, sva); 3626 eva = sva + PTE1_SIZE; 3627 for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); 3628 va < eva; va += PAGE_SIZE, m++) { 3629 if (pte1_is_dirty(opte1)) 3630 vm_page_dirty(m); 3631 if (opte1 & PTE1_A) 3632 vm_page_aflag_set(m, PGA_REFERENCED); 3633 if (TAILQ_EMPTY(&m->md.pv_list) && 3634 TAILQ_EMPTY(&pvh->pv_list)) 3635 vm_page_aflag_clear(m, PGA_WRITEABLE); 3636 } 3637 } 3638 if (pmap == kernel_pmap) { 3639 /* 3640 * L2 page table(s) can't be removed from kernel map as 3641 * kernel counts on it (stuff around pmap_growkernel()). 3642 */ 3643 pmap_remove_kernel_pte1(pmap, pte1p, sva); 3644 } else { 3645 /* 3646 * Get associated L2 page table page. 3647 * It's possible that the page was never allocated. 3648 */ 3649 m = pmap_pt2_page(pmap, sva); 3650 if (m != NULL) 3651 pmap_unwire_pt2_all(pmap, sva, m, free); 3652 } 3653} 3654 3655/* 3656 * Fills L2 page table page with mappings to consecutive physical pages. 3657 */ 3658static __inline void 3659pmap_fill_pt2(pt2_entry_t *fpte2p, pt2_entry_t npte2) 3660{ 3661 pt2_entry_t *pte2p; 3662 3663 for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) { 3664 pte2_store(pte2p, npte2); 3665 npte2 += PTE2_SIZE; 3666 } 3667} 3668 3669/* 3670 * Tries to demote a 1MB page mapping. If demotion fails, the 3671 * 1MB page mapping is invalidated. 3672 */ 3673static boolean_t 3674pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va) 3675{ 3676 pt1_entry_t opte1, npte1; 3677 pt2_entry_t *fpte2p, npte2; 3678 vm_paddr_t pt2pg_pa, pt2_pa; 3679 vm_page_t m; 3680 struct spglist free; 3681 uint32_t pte1_idx, isnew = 0; 3682 3683 PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__, 3684 pmap, va, pte1_load(pte1p), pte1p)); 3685 3686 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3687 3688 opte1 = pte1_load(pte1p); 3689 KASSERT(pte1_is_section(opte1), ("%s: opte1 not a section", __func__)); 3690 3691 if ((opte1 & PTE1_A) == 0 || (m = pmap_pt2_page(pmap, va)) == NULL) { 3692 KASSERT(!pte1_is_wired(opte1), 3693 ("%s: PT2 page for a wired mapping is missing", __func__)); 3694 3695 /* 3696 * Invalidate the 1MB page mapping and return 3697 * "failure" if the mapping was never accessed or the 3698 * allocation of the new page table page fails. 3699 */ 3700 if ((opte1 & PTE1_A) == 0 || (m = vm_page_alloc(NULL, 3701 pte1_index(va) & ~PT2PG_MASK, VM_ALLOC_NOOBJ | 3702 VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { 3703 SLIST_INIT(&free); 3704 pmap_remove_pte1(pmap, pte1p, pte1_trunc(va), &free); 3705 pmap_free_zero_pages(&free); 3706 CTR3(KTR_PMAP, "%s: failure for va %#x in pmap %p", 3707 __func__, va, pmap); 3708 return (FALSE); 3709 } 3710 if (va < VM_MAXUSER_ADDRESS) 3711 pmap->pm_stats.resident_count++; 3712 3713 isnew = 1; 3714 3715 /* 3716 * We init all L2 page tables in the page even if 3717 * we are going to change everything for one L2 page 3718 * table in a while. 3719 */ 3720 pt2pg_pa = pmap_pt2pg_init(pmap, va, m); 3721 } else { 3722 if (va < VM_MAXUSER_ADDRESS) { 3723 if (pt2_is_empty(m, va)) 3724 isnew = 1; /* Demoting section w/o promotion. */ 3725#ifdef INVARIANTS 3726 else 3727 KASSERT(pt2_is_full(m, va), ("%s: bad PT2 wire" 3728 " count %u", __func__, 3729 pt2_wirecount_get(m, pte1_index(va)))); 3730#endif 3731 } 3732 } 3733 3734 pt2pg_pa = VM_PAGE_TO_PHYS(m); 3735 pte1_idx = pte1_index(va); 3736 /* 3737 * If the pmap is current, then the PT2MAP can provide access to 3738 * the page table page (promoted L2 page tables are not unmapped). 3739 * Otherwise, temporarily map the L2 page table page (m) into 3740 * the kernel's address space at either PADDR1 or PADDR2. 3741 * 3742 * Note that L2 page table size is not equal to PAGE_SIZE. 3743 */ 3744 if (pmap_is_current(pmap)) 3745 fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx); 3746 else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { 3747 if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) { 3748 pte2_store(PMAP1, PTE2_KPT(pt2pg_pa)); 3749#ifdef SMP 3750 PMAP1cpu = PCPU_GET(cpuid); 3751#endif 3752 tlb_flush_local((vm_offset_t)PADDR1); 3753 PMAP1changed++; 3754 } else 3755#ifdef SMP 3756 if (PMAP1cpu != PCPU_GET(cpuid)) { 3757 PMAP1cpu = PCPU_GET(cpuid); 3758 tlb_flush_local((vm_offset_t)PADDR1); 3759 PMAP1changedcpu++; 3760 } else 3761#endif 3762 PMAP1unchanged++; 3763 fpte2p = page_pt2((vm_offset_t)PADDR1, pte1_idx); 3764 } else { 3765 mtx_lock(&PMAP2mutex); 3766 if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) { 3767 pte2_store(PMAP2, PTE2_KPT(pt2pg_pa)); 3768 tlb_flush((vm_offset_t)PADDR2); 3769 } 3770 fpte2p = page_pt2((vm_offset_t)PADDR2, pte1_idx); 3771 } 3772 pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx); 3773 npte1 = PTE1_LINK(pt2_pa); 3774 3775 KASSERT((opte1 & PTE1_A) != 0, 3776 ("%s: opte1 is missing PTE1_A", __func__)); 3777 KASSERT((opte1 & (PTE1_NM | PTE1_RO)) != PTE1_NM, 3778 ("%s: opte1 has PTE1_NM", __func__)); 3779 3780 /* 3781 * Get pte2 from pte1 format. 3782 */ 3783 npte2 = pte1_pa(opte1) | ATTR_TO_L2(opte1) | PTE2_V; 3784 3785 /* 3786 * If the L2 page table page is new, initialize it. If the mapping 3787 * has changed attributes, update the page table entries. 3788 */ 3789 if (isnew != 0) { 3790 pt2_wirecount_set(m, pte1_idx, NPTE2_IN_PT2); 3791 pmap_fill_pt2(fpte2p, npte2); 3792 } else if ((pte2_load(fpte2p) & PTE2_PROMOTE) != 3793 (npte2 & PTE2_PROMOTE)) 3794 pmap_fill_pt2(fpte2p, npte2); 3795 3796 KASSERT(pte2_pa(pte2_load(fpte2p)) == pte2_pa(npte2), 3797 ("%s: fpte2p and npte2 map different physical addresses", 3798 __func__)); 3799 3800 if (fpte2p == PADDR2) 3801 mtx_unlock(&PMAP2mutex); 3802 3803 /* 3804 * Demote the mapping. This pmap is locked. The old PTE1 has 3805 * PTE1_A set. If the old PTE1 has not PTE1_RO set, it also 3806 * has not PTE1_NM set. Thus, there is no danger of a race with 3807 * another processor changing the setting of PTE1_A and/or PTE1_NM 3808 * between the read above and the store below. 3809 */ 3810 pmap_change_pte1(pmap, pte1p, va, npte1); 3811 3812 /* 3813 * Demote the pv entry. This depends on the earlier demotion 3814 * of the mapping. Specifically, the (re)creation of a per- 3815 * page pv entry might trigger the execution of pmap_pv_reclaim(), 3816 * which might reclaim a newly (re)created per-page pv entry 3817 * and destroy the associated mapping. In order to destroy 3818 * the mapping, the PTE1 must have already changed from mapping 3819 * the 1mpage to referencing the page table page. 3820 */ 3821 if (pte1_is_managed(opte1)) 3822 pmap_pv_demote_pte1(pmap, va, pte1_pa(opte1)); 3823 3824 pmap_pte1_demotions++; 3825 CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p", 3826 __func__, va, pmap); 3827 3828 PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n", 3829 __func__, pmap, va, npte1, pte1_load(pte1p), pte1p)); 3830 return (TRUE); 3831} 3832 3833/* 3834 * Insert the given physical page (p) at 3835 * the specified virtual address (v) in the 3836 * target physical map with the protection requested. 3837 * 3838 * If specified, the page will be wired down, meaning 3839 * that the related pte can not be reclaimed. 3840 * 3841 * NB: This is the only routine which MAY NOT lazy-evaluate 3842 * or lose information. That is, this routine must actually 3843 * insert this page into the given map NOW. 3844 */ 3845int 3846pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 3847 u_int flags, int8_t psind) 3848{ 3849 pt1_entry_t *pte1p; 3850 pt2_entry_t *pte2p; 3851 pt2_entry_t npte2, opte2; 3852 pv_entry_t pv; 3853 vm_paddr_t opa, pa; 3854 vm_page_t mpte2, om; 3855 boolean_t wired; 3856 3857 va = trunc_page(va); 3858 mpte2 = NULL; 3859 wired = (flags & PMAP_ENTER_WIRED) != 0; 3860 3861 KASSERT(va <= vm_max_kernel_address, ("%s: toobig", __func__)); 3862 KASSERT(va < UPT2V_MIN_ADDRESS || va >= UPT2V_MAX_ADDRESS, 3863 ("%s: invalid to pmap_enter page table pages (va: 0x%x)", __func__, 3864 va)); 3865 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 3866 VM_OBJECT_ASSERT_LOCKED(m->object); 3867 3868 rw_wlock(&pvh_global_lock); 3869 PMAP_LOCK(pmap); 3870 sched_pin(); 3871 3872 /* 3873 * In the case that a page table page is not 3874 * resident, we are creating it here. 3875 */ 3876 if (va < VM_MAXUSER_ADDRESS) { 3877 mpte2 = pmap_allocpte2(pmap, va, flags); 3878 if (mpte2 == NULL) { 3879 KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, 3880 ("pmap_allocpte2 failed with sleep allowed")); 3881 sched_unpin(); 3882 rw_wunlock(&pvh_global_lock); 3883 PMAP_UNLOCK(pmap); 3884 return (KERN_RESOURCE_SHORTAGE); 3885 } 3886 } 3887 pte1p = pmap_pte1(pmap, va); 3888 if (pte1_is_section(pte1_load(pte1p))) 3889 panic("%s: attempted on 1MB page", __func__); 3890 pte2p = pmap_pte2_quick(pmap, va); 3891 if (pte2p == NULL) 3892 panic("%s: invalid L1 page table entry va=%#x", __func__, va); 3893 3894 om = NULL; 3895 pa = VM_PAGE_TO_PHYS(m); 3896 opte2 = pte2_load(pte2p); 3897 opa = pte2_pa(opte2); 3898 /* 3899 * Mapping has not changed, must be protection or wiring change. 3900 */ 3901 if (pte2_is_valid(opte2) && (opa == pa)) { 3902 /* 3903 * Wiring change, just update stats. We don't worry about 3904 * wiring PT2 pages as they remain resident as long as there 3905 * are valid mappings in them. Hence, if a user page is wired, 3906 * the PT2 page will be also. 3907 */ 3908 if (wired && !pte2_is_wired(opte2)) 3909 pmap->pm_stats.wired_count++; 3910 else if (!wired && pte2_is_wired(opte2)) 3911 pmap->pm_stats.wired_count--; 3912 3913 /* 3914 * Remove extra pte2 reference 3915 */ 3916 if (mpte2) 3917 pt2_wirecount_dec(mpte2, pte1_index(va)); 3918 if (pte2_is_managed(opte2)) 3919 om = m; 3920 goto validate; 3921 } 3922 3923 /* 3924 * QQQ: We think that changing physical address on writeable mapping 3925 * is not safe. Well, maybe on kernel address space with correct 3926 * locking, it can make a sense. However, we have no idea why 3927 * anyone should do that on user address space. Are we wrong? 3928 */ 3929 KASSERT((opa == 0) || (opa == pa) || 3930 !pte2_is_valid(opte2) || ((opte2 & PTE2_RO) != 0), 3931 ("%s: pmap %p va %#x(%#x) opa %#x pa %#x - gotcha %#x %#x!", 3932 __func__, pmap, va, opte2, opa, pa, flags, prot)); 3933 3934 pv = NULL; 3935 3936 /* 3937 * Mapping has changed, invalidate old range and fall through to 3938 * handle validating new mapping. 3939 */ 3940 if (opa) { 3941 if (pte2_is_wired(opte2)) 3942 pmap->pm_stats.wired_count--; 3943 if (pte2_is_managed(opte2)) { 3944 om = PHYS_TO_VM_PAGE(opa); 3945 pv = pmap_pvh_remove(&om->md, pmap, va); 3946 } 3947 /* 3948 * Remove extra pte2 reference 3949 */ 3950 if (mpte2 != NULL) 3951 pt2_wirecount_dec(mpte2, va >> PTE1_SHIFT); 3952 } else 3953 pmap->pm_stats.resident_count++; 3954 3955 /* 3956 * Enter on the PV list if part of our managed memory. 3957 */ 3958 if ((m->oflags & VPO_UNMANAGED) == 0) { 3959 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3960 ("%s: managed mapping within the clean submap", __func__)); 3961 if (pv == NULL) 3962 pv = get_pv_entry(pmap, FALSE); 3963 pv->pv_va = va; 3964 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3965 } else if (pv != NULL) 3966 free_pv_entry(pmap, pv); 3967 3968 /* 3969 * Increment counters 3970 */ 3971 if (wired) 3972 pmap->pm_stats.wired_count++; 3973 3974validate: 3975 /* 3976 * Now validate mapping with desired protection/wiring. 3977 */ 3978 npte2 = PTE2(pa, PTE2_NM, vm_page_pte2_attr(m)); 3979 if (prot & VM_PROT_WRITE) { 3980 if (pte2_is_managed(npte2)) 3981 vm_page_aflag_set(m, PGA_WRITEABLE); 3982 } 3983 else 3984 npte2 |= PTE2_RO; 3985 if ((prot & VM_PROT_EXECUTE) == 0) 3986 npte2 |= PTE2_NX; 3987 if (wired) 3988 npte2 |= PTE2_W; 3989 if (va < VM_MAXUSER_ADDRESS) 3990 npte2 |= PTE2_U; 3991 if (pmap != kernel_pmap) 3992 npte2 |= PTE2_NG; 3993 3994 /* 3995 * If the mapping or permission bits are different, we need 3996 * to update the pte2. 3997 * 3998 * QQQ: Think again and again what to do 3999 * if the mapping is going to be changed! 4000 */ 4001 if ((opte2 & ~(PTE2_NM | PTE2_A)) != (npte2 & ~(PTE2_NM | PTE2_A))) { 4002 /* 4003 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA 4004 * is set. Do it now, before the mapping is stored and made 4005 * valid for hardware table walk. If done later, there is a race 4006 * for other threads of current process in lazy loading case. 4007 * Don't do it for kernel memory which is mapped with exec 4008 * permission even if the memory isn't going to hold executable 4009 * code. The only time when icache sync is needed is after 4010 * kernel module is loaded and the relocation info is processed. 4011 * And it's done in elf_cpu_load_file(). 4012 * 4013 * QQQ: (1) Does it exist any better way where 4014 * or how to sync icache? 4015 * (2) Now, we do it on a page basis. 4016 */ 4017 if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && 4018 m->md.pat_mode == VM_MEMATTR_WB_WA && 4019 (opa != pa || (opte2 & PTE2_NX))) 4020 cache_icache_sync_fresh(va, pa, PAGE_SIZE); 4021 4022 npte2 |= PTE2_A; 4023 if (flags & VM_PROT_WRITE) 4024 npte2 &= ~PTE2_NM; 4025 if (opte2 & PTE2_V) { 4026 /* Change mapping with break-before-make approach. */ 4027 opte2 = pte2_load_clear(pte2p); 4028 pmap_tlb_flush(pmap, va); 4029 pte2_store(pte2p, npte2); 4030 if (opte2 & PTE2_A) { 4031 if (pte2_is_managed(opte2)) 4032 vm_page_aflag_set(om, PGA_REFERENCED); 4033 } 4034 if (pte2_is_dirty(opte2)) { 4035 if (pte2_is_managed(opte2)) 4036 vm_page_dirty(om); 4037 } 4038 if (pte2_is_managed(opte2) && 4039 TAILQ_EMPTY(&om->md.pv_list) && 4040 ((om->flags & PG_FICTITIOUS) != 0 || 4041 TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 4042 vm_page_aflag_clear(om, PGA_WRITEABLE); 4043 } else 4044 pte2_store(pte2p, npte2); 4045 } 4046#if 0 4047 else { 4048 /* 4049 * QQQ: In time when both access and not mofified bits are 4050 * emulated by software, this should not happen. Some 4051 * analysis is need, if this really happen. Missing 4052 * tlb flush somewhere could be the reason. 4053 */ 4054 panic("%s: pmap %p va %#x opte2 %x npte2 %x !!", __func__, pmap, 4055 va, opte2, npte2); 4056 } 4057#endif 4058 /* 4059 * If both the L2 page table page and the reservation are fully 4060 * populated, then attempt promotion. 4061 */ 4062 if ((mpte2 == NULL || pt2_is_full(mpte2, va)) && 4063 sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && 4064 vm_reserv_level_iffullpop(m) == 0) 4065 pmap_promote_pte1(pmap, pte1p, va); 4066 sched_unpin(); 4067 rw_wunlock(&pvh_global_lock); 4068 PMAP_UNLOCK(pmap); 4069 return (KERN_SUCCESS); 4070} 4071 4072/* 4073 * Do the things to unmap a page in a process. 4074 */ 4075static int 4076pmap_remove_pte2(pmap_t pmap, pt2_entry_t *pte2p, vm_offset_t va, 4077 struct spglist *free) 4078{ 4079 pt2_entry_t opte2; 4080 vm_page_t m; 4081 4082 rw_assert(&pvh_global_lock, RA_WLOCKED); 4083 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4084 4085 /* Clear and invalidate the mapping. */ 4086 opte2 = pte2_load_clear(pte2p); 4087 pmap_tlb_flush(pmap, va); 4088 4089 KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %#x not link pte2 %#x", 4090 __func__, pmap, va, opte2)); 4091 4092 if (opte2 & PTE2_W) 4093 pmap->pm_stats.wired_count -= 1; 4094 pmap->pm_stats.resident_count -= 1; 4095 if (pte2_is_managed(opte2)) { 4096 m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); 4097 if (pte2_is_dirty(opte2)) 4098 vm_page_dirty(m); 4099 if (opte2 & PTE2_A) 4100 vm_page_aflag_set(m, PGA_REFERENCED); 4101 pmap_remove_entry(pmap, m, va); 4102 } 4103 return (pmap_unuse_pt2(pmap, va, free)); 4104} 4105 4106/* 4107 * Remove a single page from a process address space. 4108 */ 4109static void 4110pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) 4111{ 4112 pt2_entry_t *pte2p; 4113 4114 rw_assert(&pvh_global_lock, RA_WLOCKED); 4115 KASSERT(curthread->td_pinned > 0, 4116 ("%s: curthread not pinned", __func__)); 4117 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4118 if ((pte2p = pmap_pte2_quick(pmap, va)) == NULL || 4119 !pte2_is_valid(pte2_load(pte2p))) 4120 return; 4121 pmap_remove_pte2(pmap, pte2p, va, free); 4122} 4123 4124/* 4125 * Remove the given range of addresses from the specified map. 4126 * 4127 * It is assumed that the start and end are properly 4128 * rounded to the page size. 4129 */ 4130void 4131pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 4132{ 4133 vm_offset_t nextva; 4134 pt1_entry_t *pte1p, pte1; 4135 pt2_entry_t *pte2p, pte2; 4136 struct spglist free; 4137 4138 /* 4139 * Perform an unsynchronized read. This is, however, safe. 4140 */ 4141 if (pmap->pm_stats.resident_count == 0) 4142 return; 4143 4144 SLIST_INIT(&free); 4145 4146 rw_wlock(&pvh_global_lock); 4147 sched_pin(); 4148 PMAP_LOCK(pmap); 4149 4150 /* 4151 * Special handling of removing one page. A very common 4152 * operation and easy to short circuit some code. 4153 */ 4154 if (sva + PAGE_SIZE == eva) { 4155 pte1 = pte1_load(pmap_pte1(pmap, sva)); 4156 if (pte1_is_link(pte1)) { 4157 pmap_remove_page(pmap, sva, &free); 4158 goto out; 4159 } 4160 } 4161 4162 for (; sva < eva; sva = nextva) { 4163 /* 4164 * Calculate address for next L2 page table. 4165 */ 4166 nextva = pte1_trunc(sva + PTE1_SIZE); 4167 if (nextva < sva) 4168 nextva = eva; 4169 if (pmap->pm_stats.resident_count == 0) 4170 break; 4171 4172 pte1p = pmap_pte1(pmap, sva); 4173 pte1 = pte1_load(pte1p); 4174 4175 /* 4176 * Weed out invalid mappings. Note: we assume that the L1 page 4177 * table is always allocated, and in kernel virtual. 4178 */ 4179 if (pte1 == 0) 4180 continue; 4181 4182 if (pte1_is_section(pte1)) { 4183 /* 4184 * Are we removing the entire large page? If not, 4185 * demote the mapping and fall through. 4186 */ 4187 if (sva + PTE1_SIZE == nextva && eva >= nextva) { 4188 pmap_remove_pte1(pmap, pte1p, sva, &free); 4189 continue; 4190 } else if (!pmap_demote_pte1(pmap, pte1p, sva)) { 4191 /* The large page mapping was destroyed. */ 4192 continue; 4193 } 4194#ifdef INVARIANTS 4195 else { 4196 /* Update pte1 after demotion. */ 4197 pte1 = pte1_load(pte1p); 4198 } 4199#endif 4200 } 4201 4202 KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" 4203 " is not link", __func__, pmap, sva, pte1, pte1p)); 4204 4205 /* 4206 * Limit our scan to either the end of the va represented 4207 * by the current L2 page table page, or to the end of the 4208 * range being removed. 4209 */ 4210 if (nextva > eva) 4211 nextva = eva; 4212 4213 for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; 4214 pte2p++, sva += PAGE_SIZE) { 4215 pte2 = pte2_load(pte2p); 4216 if (!pte2_is_valid(pte2)) 4217 continue; 4218 if (pmap_remove_pte2(pmap, pte2p, sva, &free)) 4219 break; 4220 } 4221 } 4222out: 4223 sched_unpin(); 4224 rw_wunlock(&pvh_global_lock); 4225 PMAP_UNLOCK(pmap); 4226 pmap_free_zero_pages(&free); 4227} 4228 4229/* 4230 * Routine: pmap_remove_all 4231 * Function: 4232 * Removes this physical page from 4233 * all physical maps in which it resides. 4234 * Reflects back modify bits to the pager. 4235 * 4236 * Notes: 4237 * Original versions of this routine were very 4238 * inefficient because they iteratively called 4239 * pmap_remove (slow...) 4240 */ 4241 4242void 4243pmap_remove_all(vm_page_t m) 4244{ 4245 struct md_page *pvh; 4246 pv_entry_t pv; 4247 pmap_t pmap; 4248 pt2_entry_t *pte2p, opte2; 4249 pt1_entry_t *pte1p; 4250 vm_offset_t va; 4251 struct spglist free; 4252 4253 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4254 ("%s: page %p is not managed", __func__, m)); 4255 SLIST_INIT(&free); 4256 rw_wlock(&pvh_global_lock); 4257 sched_pin(); 4258 if ((m->flags & PG_FICTITIOUS) != 0) 4259 goto small_mappings; 4260 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4261 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 4262 va = pv->pv_va; 4263 pmap = PV_PMAP(pv); 4264 PMAP_LOCK(pmap); 4265 pte1p = pmap_pte1(pmap, va); 4266 (void)pmap_demote_pte1(pmap, pte1p, va); 4267 PMAP_UNLOCK(pmap); 4268 } 4269small_mappings: 4270 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 4271 pmap = PV_PMAP(pv); 4272 PMAP_LOCK(pmap); 4273 pmap->pm_stats.resident_count--; 4274 pte1p = pmap_pte1(pmap, pv->pv_va); 4275 KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found " 4276 "a 1mpage in page %p's pv list", __func__, m)); 4277 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 4278 opte2 = pte2_load_clear(pte2p); 4279 pmap_tlb_flush(pmap, pv->pv_va); 4280 KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %x zero pte2", 4281 __func__, pmap, pv->pv_va)); 4282 if (pte2_is_wired(opte2)) 4283 pmap->pm_stats.wired_count--; 4284 if (opte2 & PTE2_A) 4285 vm_page_aflag_set(m, PGA_REFERENCED); 4286 4287 /* 4288 * Update the vm_page_t clean and reference bits. 4289 */ 4290 if (pte2_is_dirty(opte2)) 4291 vm_page_dirty(m); 4292 pmap_unuse_pt2(pmap, pv->pv_va, &free); 4293 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4294 free_pv_entry(pmap, pv); 4295 PMAP_UNLOCK(pmap); 4296 } 4297 vm_page_aflag_clear(m, PGA_WRITEABLE); 4298 sched_unpin(); 4299 rw_wunlock(&pvh_global_lock); 4300 pmap_free_zero_pages(&free); 4301} 4302 4303/* 4304 * Just subroutine for pmap_remove_pages() to reasonably satisfy 4305 * good coding style, a.k.a. 80 character line width limit hell. 4306 */ 4307static __inline void 4308pmap_remove_pte1_quick(pmap_t pmap, pt1_entry_t pte1, pv_entry_t pv, 4309 struct spglist *free) 4310{ 4311 vm_paddr_t pa; 4312 vm_page_t m, mt, mpt2pg; 4313 struct md_page *pvh; 4314 4315 pa = pte1_pa(pte1); 4316 m = PHYS_TO_VM_PAGE(pa); 4317 4318 KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", 4319 __func__, m, m->phys_addr, pa)); 4320 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 4321 m < &vm_page_array[vm_page_array_size], 4322 ("%s: bad pte1 %#x", __func__, pte1)); 4323 4324 if (pte1_is_dirty(pte1)) { 4325 for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) 4326 vm_page_dirty(mt); 4327 } 4328 4329 pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE; 4330 pvh = pa_to_pvh(pa); 4331 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 4332 if (TAILQ_EMPTY(&pvh->pv_list)) { 4333 for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++) 4334 if (TAILQ_EMPTY(&mt->md.pv_list)) 4335 vm_page_aflag_clear(mt, PGA_WRITEABLE); 4336 } 4337 mpt2pg = pmap_pt2_page(pmap, pv->pv_va); 4338 if (mpt2pg != NULL) 4339 pmap_unwire_pt2_all(pmap, pv->pv_va, mpt2pg, free); 4340} 4341 4342/* 4343 * Just subroutine for pmap_remove_pages() to reasonably satisfy 4344 * good coding style, a.k.a. 80 character line width limit hell. 4345 */ 4346static __inline void 4347pmap_remove_pte2_quick(pmap_t pmap, pt2_entry_t pte2, pv_entry_t pv, 4348 struct spglist *free) 4349{ 4350 vm_paddr_t pa; 4351 vm_page_t m; 4352 struct md_page *pvh; 4353 4354 pa = pte2_pa(pte2); 4355 m = PHYS_TO_VM_PAGE(pa); 4356 4357 KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x", 4358 __func__, m, m->phys_addr, pa)); 4359 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 4360 m < &vm_page_array[vm_page_array_size], 4361 ("%s: bad pte2 %#x", __func__, pte2)); 4362 4363 if (pte2_is_dirty(pte2)) 4364 vm_page_dirty(m); 4365 4366 pmap->pm_stats.resident_count--; 4367 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4368 if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { 4369 pvh = pa_to_pvh(pa); 4370 if (TAILQ_EMPTY(&pvh->pv_list)) 4371 vm_page_aflag_clear(m, PGA_WRITEABLE); 4372 } 4373 pmap_unuse_pt2(pmap, pv->pv_va, free); 4374} 4375 4376/* 4377 * Remove all pages from specified address space this aids process 4378 * exit speeds. Also, this code is special cased for current process 4379 * only, but can have the more generic (and slightly slower) mode enabled. 4380 * This is much faster than pmap_remove in the case of running down 4381 * an entire address space. 4382 */ 4383void 4384pmap_remove_pages(pmap_t pmap) 4385{ 4386 pt1_entry_t *pte1p, pte1; 4387 pt2_entry_t *pte2p, pte2; 4388 pv_entry_t pv; 4389 struct pv_chunk *pc, *npc; 4390 struct spglist free; 4391 int field, idx; 4392 int32_t bit; 4393 uint32_t inuse, bitmask; 4394 boolean_t allfree; 4395 4396 /* 4397 * Assert that the given pmap is only active on the current 4398 * CPU. Unfortunately, we cannot block another CPU from 4399 * activating the pmap while this function is executing. 4400 */ 4401 KASSERT(pmap == vmspace_pmap(curthread->td_proc->p_vmspace), 4402 ("%s: non-current pmap %p", __func__, pmap)); 4403#if defined(SMP) && defined(INVARIANTS) 4404 { 4405 cpuset_t other_cpus; 4406 4407 sched_pin(); 4408 other_cpus = pmap->pm_active; 4409 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 4410 sched_unpin(); 4411 KASSERT(CPU_EMPTY(&other_cpus), 4412 ("%s: pmap %p active on other cpus", __func__, pmap)); 4413 } 4414#endif 4415 SLIST_INIT(&free); 4416 rw_wlock(&pvh_global_lock); 4417 PMAP_LOCK(pmap); 4418 sched_pin(); 4419 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 4420 KASSERT(pc->pc_pmap == pmap, ("%s: wrong pmap %p %p", 4421 __func__, pmap, pc->pc_pmap)); 4422 allfree = TRUE; 4423 for (field = 0; field < _NPCM; field++) { 4424 inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 4425 while (inuse != 0) { 4426 bit = ffs(inuse) - 1; 4427 bitmask = 1UL << bit; 4428 idx = field * 32 + bit; 4429 pv = &pc->pc_pventry[idx]; 4430 inuse &= ~bitmask; 4431 4432 /* 4433 * Note that we cannot remove wired pages 4434 * from a process' mapping at this time 4435 */ 4436 pte1p = pmap_pte1(pmap, pv->pv_va); 4437 pte1 = pte1_load(pte1p); 4438 if (pte1_is_section(pte1)) { 4439 if (pte1_is_wired(pte1)) { 4440 allfree = FALSE; 4441 continue; 4442 } 4443 pte1_clear(pte1p); 4444 pmap_remove_pte1_quick(pmap, pte1, pv, 4445 &free); 4446 } 4447 else if (pte1_is_link(pte1)) { 4448 pte2p = pt2map_entry(pv->pv_va); 4449 pte2 = pte2_load(pte2p); 4450 4451 if (!pte2_is_valid(pte2)) { 4452 printf("%s: pmap %p va %#x " 4453 "pte2 %#x\n", __func__, 4454 pmap, pv->pv_va, pte2); 4455 panic("bad pte2"); 4456 } 4457 4458 if (pte2_is_wired(pte2)) { 4459 allfree = FALSE; 4460 continue; 4461 } 4462 pte2_clear(pte2p); 4463 pmap_remove_pte2_quick(pmap, pte2, pv, 4464 &free); 4465 } else { 4466 printf("%s: pmap %p va %#x pte1 %#x\n", 4467 __func__, pmap, pv->pv_va, pte1); 4468 panic("bad pte1"); 4469 } 4470 4471 /* Mark free */ 4472 PV_STAT(pv_entry_frees++); 4473 PV_STAT(pv_entry_spare++); 4474 pv_entry_count--; 4475 pc->pc_map[field] |= bitmask; 4476 } 4477 } 4478 if (allfree) { 4479 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4480 free_pv_chunk(pc); 4481 } 4482 } 4483 tlb_flush_all_ng_local(); 4484 sched_unpin(); 4485 rw_wunlock(&pvh_global_lock); 4486 PMAP_UNLOCK(pmap); 4487 pmap_free_zero_pages(&free); 4488} 4489 4490/* 4491 * This code makes some *MAJOR* assumptions: 4492 * 1. Current pmap & pmap exists. 4493 * 2. Not wired. 4494 * 3. Read access. 4495 * 4. No L2 page table pages. 4496 * but is *MUCH* faster than pmap_enter... 4497 */ 4498static vm_page_t 4499pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 4500 vm_prot_t prot, vm_page_t mpt2pg) 4501{ 4502 pt2_entry_t *pte2p, pte2; 4503 vm_paddr_t pa; 4504 struct spglist free; 4505 uint32_t l2prot; 4506 4507 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 4508 (m->oflags & VPO_UNMANAGED) != 0, 4509 ("%s: managed mapping within the clean submap", __func__)); 4510 rw_assert(&pvh_global_lock, RA_WLOCKED); 4511 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4512 4513 /* 4514 * In the case that a L2 page table page is not 4515 * resident, we are creating it here. 4516 */ 4517 if (va < VM_MAXUSER_ADDRESS) { 4518 u_int pte1_idx; 4519 pt1_entry_t pte1, *pte1p; 4520 vm_paddr_t pt2_pa; 4521 4522 /* 4523 * Get L1 page table things. 4524 */ 4525 pte1_idx = pte1_index(va); 4526 pte1p = pmap_pte1(pmap, va); 4527 pte1 = pte1_load(pte1p); 4528 4529 if (mpt2pg && (mpt2pg->pindex == (pte1_idx & ~PT2PG_MASK))) { 4530 /* 4531 * Each of NPT2_IN_PG L2 page tables on the page can 4532 * come here. Make sure that associated L1 page table 4533 * link is established. 4534 * 4535 * QQQ: It comes that we don't establish all links to 4536 * L2 page tables for newly allocated L2 page 4537 * tables page. 4538 */ 4539 KASSERT(!pte1_is_section(pte1), 4540 ("%s: pte1 %#x is section", __func__, pte1)); 4541 if (!pte1_is_link(pte1)) { 4542 pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(mpt2pg), 4543 pte1_idx); 4544 pte1_store(pte1p, PTE1_LINK(pt2_pa)); 4545 } 4546 pt2_wirecount_inc(mpt2pg, pte1_idx); 4547 } else { 4548 /* 4549 * If the L2 page table page is mapped, we just 4550 * increment the hold count, and activate it. 4551 */ 4552 if (pte1_is_section(pte1)) { 4553 return (NULL); 4554 } else if (pte1_is_link(pte1)) { 4555 mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 4556 pt2_wirecount_inc(mpt2pg, pte1_idx); 4557 } else { 4558 mpt2pg = _pmap_allocpte2(pmap, va, 4559 PMAP_ENTER_NOSLEEP); 4560 if (mpt2pg == NULL) 4561 return (NULL); 4562 } 4563 } 4564 } else { 4565 mpt2pg = NULL; 4566 } 4567 4568 /* 4569 * This call to pt2map_entry() makes the assumption that we are 4570 * entering the page into the current pmap. In order to support 4571 * quick entry into any pmap, one would likely use pmap_pte2_quick(). 4572 * But that isn't as quick as pt2map_entry(). 4573 */ 4574 pte2p = pt2map_entry(va); 4575 pte2 = pte2_load(pte2p); 4576 if (pte2_is_valid(pte2)) { 4577 if (mpt2pg != NULL) { 4578 /* 4579 * Remove extra pte2 reference 4580 */ 4581 pt2_wirecount_dec(mpt2pg, pte1_index(va)); 4582 mpt2pg = NULL; 4583 } 4584 return (NULL); 4585 } 4586 4587 /* 4588 * Enter on the PV list if part of our managed memory. 4589 */ 4590 if ((m->oflags & VPO_UNMANAGED) == 0 && 4591 !pmap_try_insert_pv_entry(pmap, va, m)) { 4592 if (mpt2pg != NULL) { 4593 SLIST_INIT(&free); 4594 if (pmap_unwire_pt2(pmap, va, mpt2pg, &free)) { 4595 pmap_tlb_flush(pmap, va); 4596 pmap_free_zero_pages(&free); 4597 } 4598 4599 mpt2pg = NULL; 4600 } 4601 return (NULL); 4602 } 4603 4604 /* 4605 * Increment counters 4606 */ 4607 pmap->pm_stats.resident_count++; 4608 4609 /* 4610 * Now validate mapping with RO protection 4611 */ 4612 pa = VM_PAGE_TO_PHYS(m); 4613 l2prot = PTE2_RO | PTE2_NM; 4614 if (va < VM_MAXUSER_ADDRESS) 4615 l2prot |= PTE2_U | PTE2_NG; 4616 if ((prot & VM_PROT_EXECUTE) == 0) 4617 l2prot |= PTE2_NX; 4618 else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) { 4619 /* 4620 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA 4621 * is set. QQQ: For more info, see comments in pmap_enter(). 4622 */ 4623 cache_icache_sync_fresh(va, pa, PAGE_SIZE); 4624 } 4625 pte2_store(pte2p, PTE2(pa, l2prot, vm_page_pte2_attr(m))); 4626 4627 return (mpt2pg); 4628} 4629 4630void 4631pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 4632{ 4633 4634 rw_wlock(&pvh_global_lock); 4635 PMAP_LOCK(pmap); 4636 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 4637 rw_wunlock(&pvh_global_lock); 4638 PMAP_UNLOCK(pmap); 4639} 4640 4641/* 4642 * Tries to create 1MB page mapping. Returns TRUE if successful and 4643 * FALSE otherwise. Fails if (1) a page table page cannot be allocated without 4644 * blocking, (2) a mapping already exists at the specified virtual address, or 4645 * (3) a pv entry cannot be allocated without reclaiming another pv entry. 4646 */ 4647static boolean_t 4648pmap_enter_pte1(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 4649{ 4650 pt1_entry_t *pte1p; 4651 vm_paddr_t pa; 4652 uint32_t l1prot; 4653 4654 rw_assert(&pvh_global_lock, RA_WLOCKED); 4655 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4656 pte1p = pmap_pte1(pmap, va); 4657 if (pte1_is_valid(pte1_load(pte1p))) { 4658 CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", __func__, 4659 va, pmap); 4660 return (FALSE); 4661 } 4662 if ((m->oflags & VPO_UNMANAGED) == 0) { 4663 /* 4664 * Abort this mapping if its PV entry could not be created. 4665 */ 4666 if (!pmap_pv_insert_pte1(pmap, va, VM_PAGE_TO_PHYS(m))) { 4667 CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p", 4668 __func__, va, pmap); 4669 return (FALSE); 4670 } 4671 } 4672 /* 4673 * Increment counters. 4674 */ 4675 pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE; 4676 4677 /* 4678 * Map the section. 4679 * 4680 * QQQ: Why VM_PROT_WRITE is not evaluated and the mapping is 4681 * made readonly? 4682 */ 4683 pa = VM_PAGE_TO_PHYS(m); 4684 l1prot = PTE1_RO | PTE1_NM; 4685 if (va < VM_MAXUSER_ADDRESS) 4686 l1prot |= PTE1_U | PTE1_NG; 4687 if ((prot & VM_PROT_EXECUTE) == 0) 4688 l1prot |= PTE1_NX; 4689 else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) { 4690 /* 4691 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA 4692 * is set. QQQ: For more info, see comments in pmap_enter(). 4693 */ 4694 cache_icache_sync_fresh(va, pa, PTE1_SIZE); 4695 } 4696 pte1_store(pte1p, PTE1(pa, l1prot, ATTR_TO_L1(vm_page_pte2_attr(m)))); 4697 4698 pmap_pte1_mappings++; 4699 CTR3(KTR_PMAP, "%s: success for va %#lx in pmap %p", __func__, va, 4700 pmap); 4701 return (TRUE); 4702} 4703 4704/* 4705 * Maps a sequence of resident pages belonging to the same object. 4706 * The sequence begins with the given page m_start. This page is 4707 * mapped at the given virtual address start. Each subsequent page is 4708 * mapped at a virtual address that is offset from start by the same 4709 * amount as the page is offset from m_start within the object. The 4710 * last page in the sequence is the page with the largest offset from 4711 * m_start that can be mapped at a virtual address less than the given 4712 * virtual address end. Not every virtual page between start and end 4713 * is mapped; only those for which a resident page exists with the 4714 * corresponding offset from m_start are mapped. 4715 */ 4716void 4717pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 4718 vm_page_t m_start, vm_prot_t prot) 4719{ 4720 vm_offset_t va; 4721 vm_page_t m, mpt2pg; 4722 vm_pindex_t diff, psize; 4723 4724 PDEBUG(6, printf("%s: pmap %p start %#x end %#x m %p prot %#x\n", 4725 __func__, pmap, start, end, m_start, prot)); 4726 4727 VM_OBJECT_ASSERT_LOCKED(m_start->object); 4728 psize = atop(end - start); 4729 mpt2pg = NULL; 4730 m = m_start; 4731 rw_wlock(&pvh_global_lock); 4732 PMAP_LOCK(pmap); 4733 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 4734 va = start + ptoa(diff); 4735 if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end && 4736 m->psind == 1 && sp_enabled && 4737 pmap_enter_pte1(pmap, va, m, prot)) 4738 m = &m[PTE1_SIZE / PAGE_SIZE - 1]; 4739 else 4740 mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot, 4741 mpt2pg); 4742 m = TAILQ_NEXT(m, listq); 4743 } 4744 rw_wunlock(&pvh_global_lock); 4745 PMAP_UNLOCK(pmap); 4746} 4747 4748/* 4749 * This code maps large physical mmap regions into the 4750 * processor address space. Note that some shortcuts 4751 * are taken, but the code works. 4752 */ 4753void 4754pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 4755 vm_pindex_t pindex, vm_size_t size) 4756{ 4757 pt1_entry_t *pte1p; 4758 vm_paddr_t pa, pte2_pa; 4759 vm_page_t p; 4760 vm_memattr_t pat_mode; 4761 u_int l1attr, l1prot; 4762 4763 VM_OBJECT_ASSERT_WLOCKED(object); 4764 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 4765 ("%s: non-device object", __func__)); 4766 if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) { 4767 if (!vm_object_populate(object, pindex, pindex + atop(size))) 4768 return; 4769 p = vm_page_lookup(object, pindex); 4770 KASSERT(p->valid == VM_PAGE_BITS_ALL, 4771 ("%s: invalid page %p", __func__, p)); 4772 pat_mode = p->md.pat_mode; 4773 4774 /* 4775 * Abort the mapping if the first page is not physically 4776 * aligned to a 1MB page boundary. 4777 */ 4778 pte2_pa = VM_PAGE_TO_PHYS(p); 4779 if (pte2_pa & PTE1_OFFSET) 4780 return; 4781 4782 /* 4783 * Skip the first page. Abort the mapping if the rest of 4784 * the pages are not physically contiguous or have differing 4785 * memory attributes. 4786 */ 4787 p = TAILQ_NEXT(p, listq); 4788 for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size; 4789 pa += PAGE_SIZE) { 4790 KASSERT(p->valid == VM_PAGE_BITS_ALL, 4791 ("%s: invalid page %p", __func__, p)); 4792 if (pa != VM_PAGE_TO_PHYS(p) || 4793 pat_mode != p->md.pat_mode) 4794 return; 4795 p = TAILQ_NEXT(p, listq); 4796 } 4797 4798 /* 4799 * Map using 1MB pages. 4800 * 4801 * QQQ: Well, we are mapping a section, so same condition must 4802 * be hold like during promotion. It looks that only RW mapping 4803 * is done here, so readonly mapping must be done elsewhere. 4804 */ 4805 l1prot = PTE1_U | PTE1_NG | PTE1_RW | PTE1_M | PTE1_A; 4806 l1attr = ATTR_TO_L1(vm_memattr_to_pte2(pat_mode)); 4807 PMAP_LOCK(pmap); 4808 for (pa = pte2_pa; pa < pte2_pa + size; pa += PTE1_SIZE) { 4809 pte1p = pmap_pte1(pmap, addr); 4810 if (!pte1_is_valid(pte1_load(pte1p))) { 4811 pte1_store(pte1p, PTE1(pa, l1prot, l1attr)); 4812 pmap->pm_stats.resident_count += PTE1_SIZE / 4813 PAGE_SIZE; 4814 pmap_pte1_mappings++; 4815 } 4816 /* Else continue on if the PTE1 is already valid. */ 4817 addr += PTE1_SIZE; 4818 } 4819 PMAP_UNLOCK(pmap); 4820 } 4821} 4822 4823/* 4824 * Do the things to protect a 1mpage in a process. 4825 */ 4826static void 4827pmap_protect_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva, 4828 vm_prot_t prot) 4829{ 4830 pt1_entry_t npte1, opte1; 4831 vm_offset_t eva, va; 4832 vm_page_t m; 4833 4834 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4835 KASSERT((sva & PTE1_OFFSET) == 0, 4836 ("%s: sva is not 1mpage aligned", __func__)); 4837 4838 opte1 = npte1 = pte1_load(pte1p); 4839 if (pte1_is_managed(opte1) && pte1_is_dirty(opte1)) { 4840 eva = sva + PTE1_SIZE; 4841 for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1)); 4842 va < eva; va += PAGE_SIZE, m++) 4843 vm_page_dirty(m); 4844 } 4845 if ((prot & VM_PROT_WRITE) == 0) 4846 npte1 |= PTE1_RO | PTE1_NM; 4847 if ((prot & VM_PROT_EXECUTE) == 0) 4848 npte1 |= PTE1_NX; 4849 4850 /* 4851 * QQQ: Herein, execute permission is never set. 4852 * It only can be cleared. So, no icache 4853 * syncing is needed. 4854 */ 4855 4856 if (npte1 != opte1) { 4857 pte1_store(pte1p, npte1); 4858 pmap_tlb_flush(pmap, sva); 4859 } 4860} 4861 4862/* 4863 * Set the physical protection on the 4864 * specified range of this map as requested. 4865 */ 4866void 4867pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 4868{ 4869 boolean_t pv_lists_locked; 4870 vm_offset_t nextva; 4871 pt1_entry_t *pte1p, pte1; 4872 pt2_entry_t *pte2p, opte2, npte2; 4873 4874 KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); 4875 if (prot == VM_PROT_NONE) { 4876 pmap_remove(pmap, sva, eva); 4877 return; 4878 } 4879 4880 if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == 4881 (VM_PROT_WRITE | VM_PROT_EXECUTE)) 4882 return; 4883 4884 if (pmap_is_current(pmap)) 4885 pv_lists_locked = FALSE; 4886 else { 4887 pv_lists_locked = TRUE; 4888resume: 4889 rw_wlock(&pvh_global_lock); 4890 sched_pin(); 4891 } 4892 4893 PMAP_LOCK(pmap); 4894 for (; sva < eva; sva = nextva) { 4895 /* 4896 * Calculate address for next L2 page table. 4897 */ 4898 nextva = pte1_trunc(sva + PTE1_SIZE); 4899 if (nextva < sva) 4900 nextva = eva; 4901 4902 pte1p = pmap_pte1(pmap, sva); 4903 pte1 = pte1_load(pte1p); 4904 4905 /* 4906 * Weed out invalid mappings. Note: we assume that L1 page 4907 * page table is always allocated, and in kernel virtual. 4908 */ 4909 if (pte1 == 0) 4910 continue; 4911 4912 if (pte1_is_section(pte1)) { 4913 /* 4914 * Are we protecting the entire large page? If not, 4915 * demote the mapping and fall through. 4916 */ 4917 if (sva + PTE1_SIZE == nextva && eva >= nextva) { 4918 pmap_protect_pte1(pmap, pte1p, sva, prot); 4919 continue; 4920 } else { 4921 if (!pv_lists_locked) { 4922 pv_lists_locked = TRUE; 4923 if (!rw_try_wlock(&pvh_global_lock)) { 4924 PMAP_UNLOCK(pmap); 4925 goto resume; 4926 } 4927 sched_pin(); 4928 } 4929 if (!pmap_demote_pte1(pmap, pte1p, sva)) { 4930 /* 4931 * The large page mapping 4932 * was destroyed. 4933 */ 4934 continue; 4935 } 4936#ifdef INVARIANTS 4937 else { 4938 /* Update pte1 after demotion */ 4939 pte1 = pte1_load(pte1p); 4940 } 4941#endif 4942 } 4943 } 4944 4945 KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" 4946 " is not link", __func__, pmap, sva, pte1, pte1p)); 4947 4948 /* 4949 * Limit our scan to either the end of the va represented 4950 * by the current L2 page table page, or to the end of the 4951 * range being protected. 4952 */ 4953 if (nextva > eva) 4954 nextva = eva; 4955 4956 for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, 4957 sva += PAGE_SIZE) { 4958 vm_page_t m; 4959 4960 opte2 = npte2 = pte2_load(pte2p); 4961 if (!pte2_is_valid(opte2)) 4962 continue; 4963 4964 if ((prot & VM_PROT_WRITE) == 0) { 4965 if (pte2_is_managed(opte2) && 4966 pte2_is_dirty(opte2)) { 4967 m = PHYS_TO_VM_PAGE(pte2_pa(opte2)); 4968 vm_page_dirty(m); 4969 } 4970 npte2 |= PTE2_RO | PTE2_NM; 4971 } 4972 4973 if ((prot & VM_PROT_EXECUTE) == 0) 4974 npte2 |= PTE2_NX; 4975 4976 /* 4977 * QQQ: Herein, execute permission is never set. 4978 * It only can be cleared. So, no icache 4979 * syncing is needed. 4980 */ 4981 4982 if (npte2 != opte2) { 4983 pte2_store(pte2p, npte2); 4984 pmap_tlb_flush(pmap, sva); 4985 } 4986 } 4987 } 4988 if (pv_lists_locked) { 4989 sched_unpin(); 4990 rw_wunlock(&pvh_global_lock); 4991 } 4992 PMAP_UNLOCK(pmap); 4993} 4994 4995/* 4996 * pmap_pvh_wired_mappings: 4997 * 4998 * Return the updated number "count" of managed mappings that are wired. 4999 */ 5000static int 5001pmap_pvh_wired_mappings(struct md_page *pvh, int count) 5002{ 5003 pmap_t pmap; 5004 pt1_entry_t pte1; 5005 pt2_entry_t pte2; 5006 pv_entry_t pv; 5007 5008 rw_assert(&pvh_global_lock, RA_WLOCKED); 5009 sched_pin(); 5010 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5011 pmap = PV_PMAP(pv); 5012 PMAP_LOCK(pmap); 5013 pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); 5014 if (pte1_is_section(pte1)) { 5015 if (pte1_is_wired(pte1)) 5016 count++; 5017 } else { 5018 KASSERT(pte1_is_link(pte1), 5019 ("%s: pte1 %#x is not link", __func__, pte1)); 5020 pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); 5021 if (pte2_is_wired(pte2)) 5022 count++; 5023 } 5024 PMAP_UNLOCK(pmap); 5025 } 5026 sched_unpin(); 5027 return (count); 5028} 5029 5030/* 5031 * pmap_page_wired_mappings: 5032 * 5033 * Return the number of managed mappings to the given physical page 5034 * that are wired. 5035 */ 5036int 5037pmap_page_wired_mappings(vm_page_t m) 5038{ 5039 int count; 5040 5041 count = 0; 5042 if ((m->oflags & VPO_UNMANAGED) != 0) 5043 return (count); 5044 rw_wlock(&pvh_global_lock); 5045 count = pmap_pvh_wired_mappings(&m->md, count); 5046 if ((m->flags & PG_FICTITIOUS) == 0) { 5047 count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), 5048 count); 5049 } 5050 rw_wunlock(&pvh_global_lock); 5051 return (count); 5052} 5053 5054/* 5055 * Returns TRUE if any of the given mappings were used to modify 5056 * physical memory. Otherwise, returns FALSE. Both page and 1mpage 5057 * mappings are supported. 5058 */ 5059static boolean_t 5060pmap_is_modified_pvh(struct md_page *pvh) 5061{ 5062 pv_entry_t pv; 5063 pt1_entry_t pte1; 5064 pt2_entry_t pte2; 5065 pmap_t pmap; 5066 boolean_t rv; 5067 5068 rw_assert(&pvh_global_lock, RA_WLOCKED); 5069 rv = FALSE; 5070 sched_pin(); 5071 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5072 pmap = PV_PMAP(pv); 5073 PMAP_LOCK(pmap); 5074 pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); 5075 if (pte1_is_section(pte1)) { 5076 rv = pte1_is_dirty(pte1); 5077 } else { 5078 KASSERT(pte1_is_link(pte1), 5079 ("%s: pte1 %#x is not link", __func__, pte1)); 5080 pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); 5081 rv = pte2_is_dirty(pte2); 5082 } 5083 PMAP_UNLOCK(pmap); 5084 if (rv) 5085 break; 5086 } 5087 sched_unpin(); 5088 return (rv); 5089} 5090 5091/* 5092 * pmap_is_modified: 5093 * 5094 * Return whether or not the specified physical page was modified 5095 * in any physical maps. 5096 */ 5097boolean_t 5098pmap_is_modified(vm_page_t m) 5099{ 5100 boolean_t rv; 5101 5102 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5103 ("%s: page %p is not managed", __func__, m)); 5104 5105 /* 5106 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 5107 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 5108 * is clear, no PTE2s can have PG_M set. 5109 */ 5110 VM_OBJECT_ASSERT_WLOCKED(m->object); 5111 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 5112 return (FALSE); 5113 rw_wlock(&pvh_global_lock); 5114 rv = pmap_is_modified_pvh(&m->md) || 5115 ((m->flags & PG_FICTITIOUS) == 0 && 5116 pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 5117 rw_wunlock(&pvh_global_lock); 5118 return (rv); 5119} 5120 5121/* 5122 * pmap_is_prefaultable: 5123 * 5124 * Return whether or not the specified virtual address is eligible 5125 * for prefault. 5126 */ 5127boolean_t 5128pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 5129{ 5130 pt1_entry_t pte1; 5131 pt2_entry_t pte2; 5132 boolean_t rv; 5133 5134 rv = FALSE; 5135 PMAP_LOCK(pmap); 5136 pte1 = pte1_load(pmap_pte1(pmap, addr)); 5137 if (pte1_is_link(pte1)) { 5138 pte2 = pte2_load(pt2map_entry(addr)); 5139 rv = !pte2_is_valid(pte2) ; 5140 } 5141 PMAP_UNLOCK(pmap); 5142 return (rv); 5143} 5144 5145/* 5146 * Returns TRUE if any of the given mappings were referenced and FALSE 5147 * otherwise. Both page and 1mpage mappings are supported. 5148 */ 5149static boolean_t 5150pmap_is_referenced_pvh(struct md_page *pvh) 5151{ 5152 5153 pv_entry_t pv; 5154 pt1_entry_t pte1; 5155 pt2_entry_t pte2; 5156 pmap_t pmap; 5157 boolean_t rv; 5158 5159 rw_assert(&pvh_global_lock, RA_WLOCKED); 5160 rv = FALSE; 5161 sched_pin(); 5162 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5163 pmap = PV_PMAP(pv); 5164 PMAP_LOCK(pmap); 5165 pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va)); 5166 if (pte1_is_section(pte1)) { 5167 rv = (pte1 & (PTE1_A | PTE1_V)) == (PTE1_A | PTE1_V); 5168 } else { 5169 pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va)); 5170 rv = (pte2 & (PTE2_A | PTE2_V)) == (PTE2_A | PTE2_V); 5171 } 5172 PMAP_UNLOCK(pmap); 5173 if (rv) 5174 break; 5175 } 5176 sched_unpin(); 5177 return (rv); 5178} 5179 5180/* 5181 * pmap_is_referenced: 5182 * 5183 * Return whether or not the specified physical page was referenced 5184 * in any physical maps. 5185 */ 5186boolean_t 5187pmap_is_referenced(vm_page_t m) 5188{ 5189 boolean_t rv; 5190 5191 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5192 ("%s: page %p is not managed", __func__, m)); 5193 rw_wlock(&pvh_global_lock); 5194 rv = pmap_is_referenced_pvh(&m->md) || 5195 ((m->flags & PG_FICTITIOUS) == 0 && 5196 pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 5197 rw_wunlock(&pvh_global_lock); 5198 return (rv); 5199} 5200 5201/* 5202 * pmap_ts_referenced: 5203 * 5204 * Return a count of reference bits for a page, clearing those bits. 5205 * It is not necessary for every reference bit to be cleared, but it 5206 * is necessary that 0 only be returned when there are truly no 5207 * reference bits set. 5208 * 5209 * As an optimization, update the page's dirty field if a modified bit is 5210 * found while counting reference bits. This opportunistic update can be 5211 * performed at low cost and can eliminate the need for some future calls 5212 * to pmap_is_modified(). However, since this function stops after 5213 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 5214 * dirty pages. Those dirty pages will only be detected by a future call 5215 * to pmap_is_modified(). 5216 */ 5217int 5218pmap_ts_referenced(vm_page_t m) 5219{ 5220 struct md_page *pvh; 5221 pv_entry_t pv, pvf; 5222 pmap_t pmap; 5223 pt1_entry_t *pte1p, opte1; 5224 pt2_entry_t *pte2p, opte2; 5225 vm_paddr_t pa; 5226 int rtval = 0; 5227 5228 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5229 ("%s: page %p is not managed", __func__, m)); 5230 pa = VM_PAGE_TO_PHYS(m); 5231 pvh = pa_to_pvh(pa); 5232 rw_wlock(&pvh_global_lock); 5233 sched_pin(); 5234 if ((m->flags & PG_FICTITIOUS) != 0 || 5235 (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) 5236 goto small_mappings; 5237 pv = pvf; 5238 do { 5239 pmap = PV_PMAP(pv); 5240 PMAP_LOCK(pmap); 5241 pte1p = pmap_pte1(pmap, pv->pv_va); 5242 opte1 = pte1_load(pte1p); 5243 if (pte1_is_dirty(opte1)) { 5244 /* 5245 * Although "opte1" is mapping a 1MB page, because 5246 * this function is called at a 4KB page granularity, 5247 * we only update the 4KB page under test. 5248 */ 5249 vm_page_dirty(m); 5250 } 5251 if ((opte1 & PTE1_A) != 0) { 5252 /* 5253 * Since this reference bit is shared by 256 4KB pages, 5254 * it should not be cleared every time it is tested. 5255 * Apply a simple "hash" function on the physical page 5256 * number, the virtual section number, and the pmap 5257 * address to select one 4KB page out of the 256 5258 * on which testing the reference bit will result 5259 * in clearing that bit. This function is designed 5260 * to avoid the selection of the same 4KB page 5261 * for every 1MB page mapping. 5262 * 5263 * On demotion, a mapping that hasn't been referenced 5264 * is simply destroyed. To avoid the possibility of a 5265 * subsequent page fault on a demoted wired mapping, 5266 * always leave its reference bit set. Moreover, 5267 * since the section is wired, the current state of 5268 * its reference bit won't affect page replacement. 5269 */ 5270 if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PTE1_SHIFT) ^ 5271 (uintptr_t)pmap) & (NPTE2_IN_PG - 1)) == 0 && 5272 !pte1_is_wired(opte1)) { 5273 pte1_clear_bit(pte1p, PTE1_A); 5274 pmap_tlb_flush(pmap, pv->pv_va); 5275 } 5276 rtval++; 5277 } 5278 PMAP_UNLOCK(pmap); 5279 /* Rotate the PV list if it has more than one entry. */ 5280 if (TAILQ_NEXT(pv, pv_next) != NULL) { 5281 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 5282 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 5283 } 5284 if (rtval >= PMAP_TS_REFERENCED_MAX) 5285 goto out; 5286 } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); 5287small_mappings: 5288 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 5289 goto out; 5290 pv = pvf; 5291 do { 5292 pmap = PV_PMAP(pv); 5293 PMAP_LOCK(pmap); 5294 pte1p = pmap_pte1(pmap, pv->pv_va); 5295 KASSERT(pte1_is_link(pte1_load(pte1p)), 5296 ("%s: not found a link in page %p's pv list", __func__, m)); 5297 5298 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 5299 opte2 = pte2_load(pte2p); 5300 if (pte2_is_dirty(opte2)) 5301 vm_page_dirty(m); 5302 if ((opte2 & PTE2_A) != 0) { 5303 pte2_clear_bit(pte2p, PTE2_A); 5304 pmap_tlb_flush(pmap, pv->pv_va); 5305 rtval++; 5306 } 5307 PMAP_UNLOCK(pmap); 5308 /* Rotate the PV list if it has more than one entry. */ 5309 if (TAILQ_NEXT(pv, pv_next) != NULL) { 5310 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 5311 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 5312 } 5313 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < 5314 PMAP_TS_REFERENCED_MAX); 5315out: 5316 sched_unpin(); 5317 rw_wunlock(&pvh_global_lock); 5318 return (rtval); 5319} 5320 5321/* 5322 * Clear the wired attribute from the mappings for the specified range of 5323 * addresses in the given pmap. Every valid mapping within that range 5324 * must have the wired attribute set. In contrast, invalid mappings 5325 * cannot have the wired attribute set, so they are ignored. 5326 * 5327 * The wired attribute of the page table entry is not a hardware feature, 5328 * so there is no need to invalidate any TLB entries. 5329 */ 5330void 5331pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 5332{ 5333 vm_offset_t nextva; 5334 pt1_entry_t *pte1p, pte1; 5335 pt2_entry_t *pte2p, pte2; 5336 boolean_t pv_lists_locked; 5337 5338 if (pmap_is_current(pmap)) 5339 pv_lists_locked = FALSE; 5340 else { 5341 pv_lists_locked = TRUE; 5342resume: 5343 rw_wlock(&pvh_global_lock); 5344 sched_pin(); 5345 } 5346 PMAP_LOCK(pmap); 5347 for (; sva < eva; sva = nextva) { 5348 nextva = pte1_trunc(sva + PTE1_SIZE); 5349 if (nextva < sva) 5350 nextva = eva; 5351 5352 pte1p = pmap_pte1(pmap, sva); 5353 pte1 = pte1_load(pte1p); 5354 5355 /* 5356 * Weed out invalid mappings. Note: we assume that L1 page 5357 * page table is always allocated, and in kernel virtual. 5358 */ 5359 if (pte1 == 0) 5360 continue; 5361 5362 if (pte1_is_section(pte1)) { 5363 if (!pte1_is_wired(pte1)) 5364 panic("%s: pte1 %#x not wired", __func__, pte1); 5365 5366 /* 5367 * Are we unwiring the entire large page? If not, 5368 * demote the mapping and fall through. 5369 */ 5370 if (sva + PTE1_SIZE == nextva && eva >= nextva) { 5371 pte1_clear_bit(pte1p, PTE1_W); 5372 pmap->pm_stats.wired_count -= PTE1_SIZE / 5373 PAGE_SIZE; 5374 continue; 5375 } else { 5376 if (!pv_lists_locked) { 5377 pv_lists_locked = TRUE; 5378 if (!rw_try_wlock(&pvh_global_lock)) { 5379 PMAP_UNLOCK(pmap); 5380 /* Repeat sva. */ 5381 goto resume; 5382 } 5383 sched_pin(); 5384 } 5385 if (!pmap_demote_pte1(pmap, pte1p, sva)) 5386 panic("%s: demotion failed", __func__); 5387#ifdef INVARIANTS 5388 else { 5389 /* Update pte1 after demotion */ 5390 pte1 = pte1_load(pte1p); 5391 } 5392#endif 5393 } 5394 } 5395 5396 KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p" 5397 " is not link", __func__, pmap, sva, pte1, pte1p)); 5398 5399 /* 5400 * Limit our scan to either the end of the va represented 5401 * by the current L2 page table page, or to the end of the 5402 * range being protected. 5403 */ 5404 if (nextva > eva) 5405 nextva = eva; 5406 5407 for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++, 5408 sva += PAGE_SIZE) { 5409 pte2 = pte2_load(pte2p); 5410 if (!pte2_is_valid(pte2)) 5411 continue; 5412 if (!pte2_is_wired(pte2)) 5413 panic("%s: pte2 %#x is missing PTE2_W", 5414 __func__, pte2); 5415 5416 /* 5417 * PTE2_W must be cleared atomically. Although the pmap 5418 * lock synchronizes access to PTE2_W, another processor 5419 * could be changing PTE2_NM and/or PTE2_A concurrently. 5420 */ 5421 pte2_clear_bit(pte2p, PTE2_W); 5422 pmap->pm_stats.wired_count--; 5423 } 5424 } 5425 if (pv_lists_locked) { 5426 sched_unpin(); 5427 rw_wunlock(&pvh_global_lock); 5428 } 5429 PMAP_UNLOCK(pmap); 5430} 5431 5432/* 5433 * Clear the write and modified bits in each of the given page's mappings. 5434 */ 5435void 5436pmap_remove_write(vm_page_t m) 5437{ 5438 struct md_page *pvh; 5439 pv_entry_t next_pv, pv; 5440 pmap_t pmap; 5441 pt1_entry_t *pte1p; 5442 pt2_entry_t *pte2p, opte2; 5443 vm_offset_t va; 5444 5445 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5446 ("%s: page %p is not managed", __func__, m)); 5447 5448 /* 5449 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 5450 * set by another thread while the object is locked. Thus, 5451 * if PGA_WRITEABLE is clear, no page table entries need updating. 5452 */ 5453 VM_OBJECT_ASSERT_WLOCKED(m->object); 5454 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 5455 return; 5456 rw_wlock(&pvh_global_lock); 5457 sched_pin(); 5458 if ((m->flags & PG_FICTITIOUS) != 0) 5459 goto small_mappings; 5460 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 5461 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 5462 va = pv->pv_va; 5463 pmap = PV_PMAP(pv); 5464 PMAP_LOCK(pmap); 5465 pte1p = pmap_pte1(pmap, va); 5466 if (!(pte1_load(pte1p) & PTE1_RO)) 5467 (void)pmap_demote_pte1(pmap, pte1p, va); 5468 PMAP_UNLOCK(pmap); 5469 } 5470small_mappings: 5471 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 5472 pmap = PV_PMAP(pv); 5473 PMAP_LOCK(pmap); 5474 pte1p = pmap_pte1(pmap, pv->pv_va); 5475 KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" 5476 " a section in page %p's pv list", __func__, m)); 5477 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 5478 opte2 = pte2_load(pte2p); 5479 if (!(opte2 & PTE2_RO)) { 5480 pte2_store(pte2p, opte2 | PTE2_RO | PTE2_NM); 5481 if (pte2_is_dirty(opte2)) 5482 vm_page_dirty(m); 5483 pmap_tlb_flush(pmap, pv->pv_va); 5484 } 5485 PMAP_UNLOCK(pmap); 5486 } 5487 vm_page_aflag_clear(m, PGA_WRITEABLE); 5488 sched_unpin(); 5489 rw_wunlock(&pvh_global_lock); 5490} 5491 5492/* 5493 * Apply the given advice to the specified range of addresses within the 5494 * given pmap. Depending on the advice, clear the referenced and/or 5495 * modified flags in each mapping and set the mapped page's dirty field. 5496 */ 5497void 5498pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 5499{ 5500 pt1_entry_t *pte1p, opte1; 5501 pt2_entry_t *pte2p, pte2; 5502 vm_offset_t pdnxt; 5503 vm_page_t m; 5504 boolean_t pv_lists_locked; 5505 5506 if (advice != MADV_DONTNEED && advice != MADV_FREE) 5507 return; 5508 if (pmap_is_current(pmap)) 5509 pv_lists_locked = FALSE; 5510 else { 5511 pv_lists_locked = TRUE; 5512resume: 5513 rw_wlock(&pvh_global_lock); 5514 sched_pin(); 5515 } 5516 PMAP_LOCK(pmap); 5517 for (; sva < eva; sva = pdnxt) { 5518 pdnxt = pte1_trunc(sva + PTE1_SIZE); 5519 if (pdnxt < sva) 5520 pdnxt = eva; 5521 pte1p = pmap_pte1(pmap, sva); 5522 opte1 = pte1_load(pte1p); 5523 if (!pte1_is_valid(opte1)) /* XXX */ 5524 continue; 5525 else if (pte1_is_section(opte1)) { 5526 if (!pte1_is_managed(opte1)) 5527 continue; 5528 if (!pv_lists_locked) { 5529 pv_lists_locked = TRUE; 5530 if (!rw_try_wlock(&pvh_global_lock)) { 5531 PMAP_UNLOCK(pmap); 5532 goto resume; 5533 } 5534 sched_pin(); 5535 } 5536 if (!pmap_demote_pte1(pmap, pte1p, sva)) { 5537 /* 5538 * The large page mapping was destroyed. 5539 */ 5540 continue; 5541 } 5542 5543 /* 5544 * Unless the page mappings are wired, remove the 5545 * mapping to a single page so that a subsequent 5546 * access may repromote. Since the underlying L2 page 5547 * table is fully populated, this removal never 5548 * frees a L2 page table page. 5549 */ 5550 if (!pte1_is_wired(opte1)) { 5551 pte2p = pmap_pte2_quick(pmap, sva); 5552 KASSERT(pte2_is_valid(pte2_load(pte2p)), 5553 ("%s: invalid PTE2", __func__)); 5554 pmap_remove_pte2(pmap, pte2p, sva, NULL); 5555 } 5556 } 5557 if (pdnxt > eva) 5558 pdnxt = eva; 5559 for (pte2p = pmap_pte2_quick(pmap, sva); sva != pdnxt; pte2p++, 5560 sva += PAGE_SIZE) { 5561 pte2 = pte2_load(pte2p); 5562 if (!pte2_is_valid(pte2) || !pte2_is_managed(pte2)) 5563 continue; 5564 else if (pte2_is_dirty(pte2)) { 5565 if (advice == MADV_DONTNEED) { 5566 /* 5567 * Future calls to pmap_is_modified() 5568 * can be avoided by making the page 5569 * dirty now. 5570 */ 5571 m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); 5572 vm_page_dirty(m); 5573 } 5574 pte2_set_bit(pte2p, PTE2_NM); 5575 pte2_clear_bit(pte2p, PTE2_A); 5576 } else if ((pte2 & PTE2_A) != 0) 5577 pte2_clear_bit(pte2p, PTE2_A); 5578 else 5579 continue; 5580 pmap_tlb_flush(pmap, sva); 5581 } 5582 } 5583 if (pv_lists_locked) { 5584 sched_unpin(); 5585 rw_wunlock(&pvh_global_lock); 5586 } 5587 PMAP_UNLOCK(pmap); 5588} 5589 5590/* 5591 * Clear the modify bits on the specified physical page. 5592 */ 5593void 5594pmap_clear_modify(vm_page_t m) 5595{ 5596 struct md_page *pvh; 5597 pv_entry_t next_pv, pv; 5598 pmap_t pmap; 5599 pt1_entry_t *pte1p, opte1; 5600 pt2_entry_t *pte2p, opte2; 5601 vm_offset_t va; 5602 5603 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5604 ("%s: page %p is not managed", __func__, m)); 5605 VM_OBJECT_ASSERT_WLOCKED(m->object); 5606 KASSERT(!vm_page_xbusied(m), 5607 ("%s: page %p is exclusive busy", __func__, m)); 5608 5609 /* 5610 * If the page is not PGA_WRITEABLE, then no PTE2s can have PTE2_NM 5611 * cleared. If the object containing the page is locked and the page 5612 * is not exclusive busied, then PGA_WRITEABLE cannot be concurrently 5613 * set. 5614 */ 5615 if ((m->flags & PGA_WRITEABLE) == 0) 5616 return; 5617 rw_wlock(&pvh_global_lock); 5618 sched_pin(); 5619 if ((m->flags & PG_FICTITIOUS) != 0) 5620 goto small_mappings; 5621 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 5622 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 5623 va = pv->pv_va; 5624 pmap = PV_PMAP(pv); 5625 PMAP_LOCK(pmap); 5626 pte1p = pmap_pte1(pmap, va); 5627 opte1 = pte1_load(pte1p); 5628 if (!(opte1 & PTE1_RO)) { 5629 if (pmap_demote_pte1(pmap, pte1p, va) && 5630 !pte1_is_wired(opte1)) { 5631 /* 5632 * Write protect the mapping to a 5633 * single page so that a subsequent 5634 * write access may repromote. 5635 */ 5636 va += VM_PAGE_TO_PHYS(m) - pte1_pa(opte1); 5637 pte2p = pmap_pte2_quick(pmap, va); 5638 opte2 = pte2_load(pte2p); 5639 if ((opte2 & PTE2_V)) { 5640 pte2_set_bit(pte2p, PTE2_NM | PTE2_RO); 5641 vm_page_dirty(m); 5642 pmap_tlb_flush(pmap, va); 5643 } 5644 } 5645 } 5646 PMAP_UNLOCK(pmap); 5647 } 5648small_mappings: 5649 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 5650 pmap = PV_PMAP(pv); 5651 PMAP_LOCK(pmap); 5652 pte1p = pmap_pte1(pmap, pv->pv_va); 5653 KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found" 5654 " a section in page %p's pv list", __func__, m)); 5655 pte2p = pmap_pte2_quick(pmap, pv->pv_va); 5656 if (pte2_is_dirty(pte2_load(pte2p))) { 5657 pte2_set_bit(pte2p, PTE2_NM); 5658 pmap_tlb_flush(pmap, pv->pv_va); 5659 } 5660 PMAP_UNLOCK(pmap); 5661 } 5662 sched_unpin(); 5663 rw_wunlock(&pvh_global_lock); 5664} 5665 5666 5667/* 5668 * Sets the memory attribute for the specified page. 5669 */ 5670void 5671pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 5672{ 5673 pt2_entry_t *cmap2_pte2p; 5674 vm_memattr_t oma; 5675 vm_paddr_t pa; 5676 struct pcpu *pc; 5677 5678 oma = m->md.pat_mode; 5679 m->md.pat_mode = ma; 5680 5681 CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m, 5682 VM_PAGE_TO_PHYS(m), oma, ma); 5683 if ((m->flags & PG_FICTITIOUS) != 0) 5684 return; 5685#if 0 5686 /* 5687 * If "m" is a normal page, flush it from the cache. 5688 * 5689 * First, try to find an existing mapping of the page by sf 5690 * buffer. sf_buf_invalidate_cache() modifies mapping and 5691 * flushes the cache. 5692 */ 5693 if (sf_buf_invalidate_cache(m, oma)) 5694 return; 5695#endif 5696 /* 5697 * If page is not mapped by sf buffer, map the page 5698 * transient and do invalidation. 5699 */ 5700 if (ma != oma) { 5701 pa = VM_PAGE_TO_PHYS(m); 5702 sched_pin(); 5703 pc = get_pcpu(); 5704 cmap2_pte2p = pc->pc_cmap2_pte2p; 5705 mtx_lock(&pc->pc_cmap_lock); 5706 if (pte2_load(cmap2_pte2p) != 0) 5707 panic("%s: CMAP2 busy", __func__); 5708 pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, 5709 vm_memattr_to_pte2(ma))); 5710 dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE); 5711 pte2_clear(cmap2_pte2p); 5712 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5713 sched_unpin(); 5714 mtx_unlock(&pc->pc_cmap_lock); 5715 } 5716} 5717 5718/* 5719 * Miscellaneous support routines follow 5720 */ 5721 5722/* 5723 * Returns TRUE if the given page is mapped individually or as part of 5724 * a 1mpage. Otherwise, returns FALSE. 5725 */ 5726boolean_t 5727pmap_page_is_mapped(vm_page_t m) 5728{ 5729 boolean_t rv; 5730 5731 if ((m->oflags & VPO_UNMANAGED) != 0) 5732 return (FALSE); 5733 rw_wlock(&pvh_global_lock); 5734 rv = !TAILQ_EMPTY(&m->md.pv_list) || 5735 ((m->flags & PG_FICTITIOUS) == 0 && 5736 !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); 5737 rw_wunlock(&pvh_global_lock); 5738 return (rv); 5739} 5740 5741/* 5742 * Returns true if the pmap's pv is one of the first 5743 * 16 pvs linked to from this page. This count may 5744 * be changed upwards or downwards in the future; it 5745 * is only necessary that true be returned for a small 5746 * subset of pmaps for proper page aging. 5747 */ 5748boolean_t 5749pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 5750{ 5751 struct md_page *pvh; 5752 pv_entry_t pv; 5753 int loops = 0; 5754 boolean_t rv; 5755 5756 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5757 ("%s: page %p is not managed", __func__, m)); 5758 rv = FALSE; 5759 rw_wlock(&pvh_global_lock); 5760 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 5761 if (PV_PMAP(pv) == pmap) { 5762 rv = TRUE; 5763 break; 5764 } 5765 loops++; 5766 if (loops >= 16) 5767 break; 5768 } 5769 if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 5770 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 5771 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 5772 if (PV_PMAP(pv) == pmap) { 5773 rv = TRUE; 5774 break; 5775 } 5776 loops++; 5777 if (loops >= 16) 5778 break; 5779 } 5780 } 5781 rw_wunlock(&pvh_global_lock); 5782 return (rv); 5783} 5784 5785/* 5786 * pmap_zero_page zeros the specified hardware page by mapping 5787 * the page into KVM and using bzero to clear its contents. 5788 */ 5789void 5790pmap_zero_page(vm_page_t m) 5791{ 5792 pt2_entry_t *cmap2_pte2p; 5793 struct pcpu *pc; 5794 5795 sched_pin(); 5796 pc = get_pcpu(); 5797 cmap2_pte2p = pc->pc_cmap2_pte2p; 5798 mtx_lock(&pc->pc_cmap_lock); 5799 if (pte2_load(cmap2_pte2p) != 0) 5800 panic("%s: CMAP2 busy", __func__); 5801 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5802 vm_page_pte2_attr(m))); 5803 pagezero(pc->pc_cmap2_addr); 5804 pte2_clear(cmap2_pte2p); 5805 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5806 sched_unpin(); 5807 mtx_unlock(&pc->pc_cmap_lock); 5808} 5809 5810/* 5811 * pmap_zero_page_area zeros the specified hardware page by mapping 5812 * the page into KVM and using bzero to clear its contents. 5813 * 5814 * off and size may not cover an area beyond a single hardware page. 5815 */ 5816void 5817pmap_zero_page_area(vm_page_t m, int off, int size) 5818{ 5819 pt2_entry_t *cmap2_pte2p; 5820 struct pcpu *pc; 5821 5822 sched_pin(); 5823 pc = get_pcpu(); 5824 cmap2_pte2p = pc->pc_cmap2_pte2p; 5825 mtx_lock(&pc->pc_cmap_lock); 5826 if (pte2_load(cmap2_pte2p) != 0) 5827 panic("%s: CMAP2 busy", __func__); 5828 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5829 vm_page_pte2_attr(m))); 5830 if (off == 0 && size == PAGE_SIZE) 5831 pagezero(pc->pc_cmap2_addr); 5832 else 5833 bzero(pc->pc_cmap2_addr + off, size); 5834 pte2_clear(cmap2_pte2p); 5835 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5836 sched_unpin(); 5837 mtx_unlock(&pc->pc_cmap_lock); 5838} 5839 5840/* 5841 * pmap_zero_page_idle zeros the specified hardware page by mapping 5842 * the page into KVM and using bzero to clear its contents. This 5843 * is intended to be called from the vm_pagezero process only and 5844 * outside of Giant. 5845 */ 5846void 5847pmap_zero_page_idle(vm_page_t m) 5848{ 5849 5850 if (pte2_load(CMAP3) != 0) 5851 panic("%s: CMAP3 busy", __func__); 5852 sched_pin(); 5853 pte2_store(CMAP3, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5854 vm_page_pte2_attr(m))); 5855 pagezero(CADDR3); 5856 pte2_clear(CMAP3); 5857 tlb_flush((vm_offset_t)CADDR3); 5858 sched_unpin(); 5859} 5860 5861/* 5862 * pmap_copy_page copies the specified (machine independent) 5863 * page by mapping the page into virtual memory and using 5864 * bcopy to copy the page, one machine dependent page at a 5865 * time. 5866 */ 5867void 5868pmap_copy_page(vm_page_t src, vm_page_t dst) 5869{ 5870 pt2_entry_t *cmap1_pte2p, *cmap2_pte2p; 5871 struct pcpu *pc; 5872 5873 sched_pin(); 5874 pc = get_pcpu(); 5875 cmap1_pte2p = pc->pc_cmap1_pte2p; 5876 cmap2_pte2p = pc->pc_cmap2_pte2p; 5877 mtx_lock(&pc->pc_cmap_lock); 5878 if (pte2_load(cmap1_pte2p) != 0) 5879 panic("%s: CMAP1 busy", __func__); 5880 if (pte2_load(cmap2_pte2p) != 0) 5881 panic("%s: CMAP2 busy", __func__); 5882 pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(src), 5883 PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(src))); 5884 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(dst), 5885 PTE2_AP_KRW, vm_page_pte2_attr(dst))); 5886 bcopy(pc->pc_cmap1_addr, pc->pc_cmap2_addr, PAGE_SIZE); 5887 pte2_clear(cmap1_pte2p); 5888 tlb_flush((vm_offset_t)pc->pc_cmap1_addr); 5889 pte2_clear(cmap2_pte2p); 5890 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5891 sched_unpin(); 5892 mtx_unlock(&pc->pc_cmap_lock); 5893} 5894 5895int unmapped_buf_allowed = 1; 5896 5897void 5898pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 5899 vm_offset_t b_offset, int xfersize) 5900{ 5901 pt2_entry_t *cmap1_pte2p, *cmap2_pte2p; 5902 vm_page_t a_pg, b_pg; 5903 char *a_cp, *b_cp; 5904 vm_offset_t a_pg_offset, b_pg_offset; 5905 struct pcpu *pc; 5906 int cnt; 5907 5908 sched_pin(); 5909 pc = get_pcpu(); 5910 cmap1_pte2p = pc->pc_cmap1_pte2p; 5911 cmap2_pte2p = pc->pc_cmap2_pte2p; 5912 mtx_lock(&pc->pc_cmap_lock); 5913 if (pte2_load(cmap1_pte2p) != 0) 5914 panic("pmap_copy_pages: CMAP1 busy"); 5915 if (pte2_load(cmap2_pte2p) != 0) 5916 panic("pmap_copy_pages: CMAP2 busy"); 5917 while (xfersize > 0) { 5918 a_pg = ma[a_offset >> PAGE_SHIFT]; 5919 a_pg_offset = a_offset & PAGE_MASK; 5920 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 5921 b_pg = mb[b_offset >> PAGE_SHIFT]; 5922 b_pg_offset = b_offset & PAGE_MASK; 5923 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 5924 pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(a_pg), 5925 PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(a_pg))); 5926 tlb_flush_local((vm_offset_t)pc->pc_cmap1_addr); 5927 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(b_pg), 5928 PTE2_AP_KRW, vm_page_pte2_attr(b_pg))); 5929 tlb_flush_local((vm_offset_t)pc->pc_cmap2_addr); 5930 a_cp = pc->pc_cmap1_addr + a_pg_offset; 5931 b_cp = pc->pc_cmap2_addr + b_pg_offset; 5932 bcopy(a_cp, b_cp, cnt); 5933 a_offset += cnt; 5934 b_offset += cnt; 5935 xfersize -= cnt; 5936 } 5937 pte2_clear(cmap1_pte2p); 5938 tlb_flush((vm_offset_t)pc->pc_cmap1_addr); 5939 pte2_clear(cmap2_pte2p); 5940 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 5941 sched_unpin(); 5942 mtx_unlock(&pc->pc_cmap_lock); 5943} 5944 5945vm_offset_t 5946pmap_quick_enter_page(vm_page_t m) 5947{ 5948 struct pcpu *pc; 5949 pt2_entry_t *pte2p; 5950 5951 critical_enter(); 5952 pc = get_pcpu(); 5953 pte2p = pc->pc_qmap_pte2p; 5954 5955 KASSERT(pte2_load(pte2p) == 0, ("%s: PTE2 busy", __func__)); 5956 5957 pte2_store(pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 5958 vm_page_pte2_attr(m))); 5959 return (pc->pc_qmap_addr); 5960} 5961 5962void 5963pmap_quick_remove_page(vm_offset_t addr) 5964{ 5965 struct pcpu *pc; 5966 pt2_entry_t *pte2p; 5967 5968 pc = get_pcpu(); 5969 pte2p = pc->pc_qmap_pte2p; 5970 5971 KASSERT(addr == pc->pc_qmap_addr, ("%s: invalid address", __func__)); 5972 KASSERT(pte2_load(pte2p) != 0, ("%s: PTE2 not in use", __func__)); 5973 5974 pte2_clear(pte2p); 5975 tlb_flush(pc->pc_qmap_addr); 5976 critical_exit(); 5977} 5978 5979/* 5980 * Copy the range specified by src_addr/len 5981 * from the source map to the range dst_addr/len 5982 * in the destination map. 5983 * 5984 * This routine is only advisory and need not do anything. 5985 */ 5986void 5987pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 5988 vm_offset_t src_addr) 5989{ 5990 struct spglist free; 5991 vm_offset_t addr; 5992 vm_offset_t end_addr = src_addr + len; 5993 vm_offset_t nextva; 5994 5995 if (dst_addr != src_addr) 5996 return; 5997 5998 if (!pmap_is_current(src_pmap)) 5999 return; 6000 6001 rw_wlock(&pvh_global_lock); 6002 if (dst_pmap < src_pmap) { 6003 PMAP_LOCK(dst_pmap); 6004 PMAP_LOCK(src_pmap); 6005 } else { 6006 PMAP_LOCK(src_pmap); 6007 PMAP_LOCK(dst_pmap); 6008 } 6009 sched_pin(); 6010 for (addr = src_addr; addr < end_addr; addr = nextva) { 6011 pt2_entry_t *src_pte2p, *dst_pte2p; 6012 vm_page_t dst_mpt2pg, src_mpt2pg; 6013 pt1_entry_t src_pte1; 6014 u_int pte1_idx; 6015 6016 KASSERT(addr < VM_MAXUSER_ADDRESS, 6017 ("%s: invalid to pmap_copy page tables", __func__)); 6018 6019 nextva = pte1_trunc(addr + PTE1_SIZE); 6020 if (nextva < addr) 6021 nextva = end_addr; 6022 6023 pte1_idx = pte1_index(addr); 6024 src_pte1 = src_pmap->pm_pt1[pte1_idx]; 6025 if (pte1_is_section(src_pte1)) { 6026 if ((addr & PTE1_OFFSET) != 0 || 6027 (addr + PTE1_SIZE) > end_addr) 6028 continue; 6029 if (dst_pmap->pm_pt1[pte1_idx] == 0 && 6030 (!pte1_is_managed(src_pte1) || 6031 pmap_pv_insert_pte1(dst_pmap, addr, 6032 pte1_pa(src_pte1)))) { 6033 dst_pmap->pm_pt1[pte1_idx] = src_pte1 & 6034 ~PTE1_W; 6035 dst_pmap->pm_stats.resident_count += 6036 PTE1_SIZE / PAGE_SIZE; 6037 pmap_pte1_mappings++; 6038 } 6039 continue; 6040 } else if (!pte1_is_link(src_pte1)) 6041 continue; 6042 6043 src_mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(src_pte1)); 6044 6045 /* 6046 * We leave PT2s to be linked from PT1 even if they are not 6047 * referenced until all PT2s in a page are without reference. 6048 * 6049 * QQQ: It could be changed ... 6050 */ 6051#if 0 /* single_pt2_link_is_cleared */ 6052 KASSERT(pt2_wirecount_get(src_mpt2pg, pte1_idx) > 0, 6053 ("%s: source page table page is unused", __func__)); 6054#else 6055 if (pt2_wirecount_get(src_mpt2pg, pte1_idx) == 0) 6056 continue; 6057#endif 6058 if (nextva > end_addr) 6059 nextva = end_addr; 6060 6061 src_pte2p = pt2map_entry(addr); 6062 while (addr < nextva) { 6063 pt2_entry_t temp_pte2; 6064 temp_pte2 = pte2_load(src_pte2p); 6065 /* 6066 * we only virtual copy managed pages 6067 */ 6068 if (pte2_is_managed(temp_pte2)) { 6069 dst_mpt2pg = pmap_allocpte2(dst_pmap, addr, 6070 PMAP_ENTER_NOSLEEP); 6071 if (dst_mpt2pg == NULL) 6072 goto out; 6073 dst_pte2p = pmap_pte2_quick(dst_pmap, addr); 6074 if (!pte2_is_valid(pte2_load(dst_pte2p)) && 6075 pmap_try_insert_pv_entry(dst_pmap, addr, 6076 PHYS_TO_VM_PAGE(pte2_pa(temp_pte2)))) { 6077 /* 6078 * Clear the wired, modified, and 6079 * accessed (referenced) bits 6080 * during the copy. 6081 */ 6082 temp_pte2 &= ~(PTE2_W | PTE2_A); 6083 temp_pte2 |= PTE2_NM; 6084 pte2_store(dst_pte2p, temp_pte2); 6085 dst_pmap->pm_stats.resident_count++; 6086 } else { 6087 SLIST_INIT(&free); 6088 if (pmap_unwire_pt2(dst_pmap, addr, 6089 dst_mpt2pg, &free)) { 6090 pmap_tlb_flush(dst_pmap, addr); 6091 pmap_free_zero_pages(&free); 6092 } 6093 goto out; 6094 } 6095 if (pt2_wirecount_get(dst_mpt2pg, pte1_idx) >= 6096 pt2_wirecount_get(src_mpt2pg, pte1_idx)) 6097 break; 6098 } 6099 addr += PAGE_SIZE; 6100 src_pte2p++; 6101 } 6102 } 6103out: 6104 sched_unpin(); 6105 rw_wunlock(&pvh_global_lock); 6106 PMAP_UNLOCK(src_pmap); 6107 PMAP_UNLOCK(dst_pmap); 6108} 6109 6110/* 6111 * Increase the starting virtual address of the given mapping if a 6112 * different alignment might result in more section mappings. 6113 */ 6114void 6115pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 6116 vm_offset_t *addr, vm_size_t size) 6117{ 6118 vm_offset_t pte1_offset; 6119 6120 if (size < PTE1_SIZE) 6121 return; 6122 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 6123 offset += ptoa(object->pg_color); 6124 pte1_offset = offset & PTE1_OFFSET; 6125 if (size - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) < PTE1_SIZE || 6126 (*addr & PTE1_OFFSET) == pte1_offset) 6127 return; 6128 if ((*addr & PTE1_OFFSET) < pte1_offset) 6129 *addr = pte1_trunc(*addr) + pte1_offset; 6130 else 6131 *addr = pte1_roundup(*addr) + pte1_offset; 6132} 6133 6134void 6135pmap_activate(struct thread *td) 6136{ 6137 pmap_t pmap, oldpmap; 6138 u_int cpuid, ttb; 6139 6140 PDEBUG(9, printf("%s: td = %08x\n", __func__, (uint32_t)td)); 6141 6142 critical_enter(); 6143 pmap = vmspace_pmap(td->td_proc->p_vmspace); 6144 oldpmap = PCPU_GET(curpmap); 6145 cpuid = PCPU_GET(cpuid); 6146 6147#if defined(SMP) 6148 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 6149 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 6150#else 6151 CPU_CLR(cpuid, &oldpmap->pm_active); 6152 CPU_SET(cpuid, &pmap->pm_active); 6153#endif 6154 6155 ttb = pmap_ttb_get(pmap); 6156 6157 /* 6158 * pmap_activate is for the current thread on the current cpu 6159 */ 6160 td->td_pcb->pcb_pagedir = ttb; 6161 cp15_ttbr_set(ttb); 6162 PCPU_SET(curpmap, pmap); 6163 critical_exit(); 6164} 6165 6166/* 6167 * Perform the pmap work for mincore. 6168 */ 6169int 6170pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 6171{ 6172 pt1_entry_t *pte1p, pte1; 6173 pt2_entry_t *pte2p, pte2; 6174 vm_paddr_t pa; 6175 bool managed; 6176 int val; 6177 6178 PMAP_LOCK(pmap); 6179retry: 6180 pte1p = pmap_pte1(pmap, addr); 6181 pte1 = pte1_load(pte1p); 6182 if (pte1_is_section(pte1)) { 6183 pa = trunc_page(pte1_pa(pte1) | (addr & PTE1_OFFSET)); 6184 managed = pte1_is_managed(pte1); 6185 val = MINCORE_SUPER | MINCORE_INCORE; 6186 if (pte1_is_dirty(pte1)) 6187 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 6188 if (pte1 & PTE1_A) 6189 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 6190 } else if (pte1_is_link(pte1)) { 6191 pte2p = pmap_pte2(pmap, addr); 6192 pte2 = pte2_load(pte2p); 6193 pmap_pte2_release(pte2p); 6194 pa = pte2_pa(pte2); 6195 managed = pte2_is_managed(pte2); 6196 val = MINCORE_INCORE; 6197 if (pte2_is_dirty(pte2)) 6198 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 6199 if (pte2 & PTE2_A) 6200 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 6201 } else { 6202 managed = false; 6203 val = 0; 6204 } 6205 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 6206 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 6207 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 6208 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 6209 goto retry; 6210 } else 6211 PA_UNLOCK_COND(*locked_pa); 6212 PMAP_UNLOCK(pmap); 6213 return (val); 6214} 6215 6216void 6217pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) 6218{ 6219 vm_offset_t sva; 6220 uint32_t l2attr; 6221 6222 KASSERT((size & PAGE_MASK) == 0, 6223 ("%s: device mapping not page-sized", __func__)); 6224 6225 sva = va; 6226 l2attr = vm_memattr_to_pte2(VM_MEMATTR_DEVICE); 6227 while (size != 0) { 6228 pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, l2attr); 6229 va += PAGE_SIZE; 6230 pa += PAGE_SIZE; 6231 size -= PAGE_SIZE; 6232 } 6233 tlb_flush_range(sva, va - sva); 6234} 6235 6236void 6237pmap_kremove_device(vm_offset_t va, vm_size_t size) 6238{ 6239 vm_offset_t sva; 6240 6241 KASSERT((size & PAGE_MASK) == 0, 6242 ("%s: device mapping not page-sized", __func__)); 6243 6244 sva = va; 6245 while (size != 0) { 6246 pmap_kremove(va); 6247 va += PAGE_SIZE; 6248 size -= PAGE_SIZE; 6249 } 6250 tlb_flush_range(sva, va - sva); 6251} 6252 6253void 6254pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) 6255{ 6256 6257 pcb->pcb_pagedir = pmap_ttb_get(pmap); 6258} 6259 6260 6261/* 6262 * Clean L1 data cache range by physical address. 6263 * The range must be within a single page. 6264 */ 6265static void 6266pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, uint32_t attr) 6267{ 6268 pt2_entry_t *cmap2_pte2p; 6269 struct pcpu *pc; 6270 6271 KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE, 6272 ("%s: not on single page", __func__)); 6273 6274 sched_pin(); 6275 pc = get_pcpu(); 6276 cmap2_pte2p = pc->pc_cmap2_pte2p; 6277 mtx_lock(&pc->pc_cmap_lock); 6278 if (pte2_load(cmap2_pte2p) != 0) 6279 panic("%s: CMAP2 busy", __func__); 6280 pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, attr)); 6281 dcache_wb_pou((vm_offset_t)pc->pc_cmap2_addr + (pa & PAGE_MASK), size); 6282 pte2_clear(cmap2_pte2p); 6283 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 6284 sched_unpin(); 6285 mtx_unlock(&pc->pc_cmap_lock); 6286} 6287 6288/* 6289 * Sync instruction cache range which is not mapped yet. 6290 */ 6291void 6292cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size) 6293{ 6294 uint32_t len, offset; 6295 vm_page_t m; 6296 6297 /* Write back d-cache on given address range. */ 6298 offset = pa & PAGE_MASK; 6299 for ( ; size != 0; size -= len, pa += len, offset = 0) { 6300 len = min(PAGE_SIZE - offset, size); 6301 m = PHYS_TO_VM_PAGE(pa); 6302 KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", 6303 __func__, pa)); 6304 pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); 6305 } 6306 /* 6307 * I-cache is VIPT. Only way how to flush all virtual mappings 6308 * on given physical address is to invalidate all i-cache. 6309 */ 6310 icache_inv_all(); 6311} 6312 6313void 6314pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t size) 6315{ 6316 6317 /* Write back d-cache on given address range. */ 6318 if (va >= VM_MIN_KERNEL_ADDRESS) { 6319 dcache_wb_pou(va, size); 6320 } else { 6321 uint32_t len, offset; 6322 vm_paddr_t pa; 6323 vm_page_t m; 6324 6325 offset = va & PAGE_MASK; 6326 for ( ; size != 0; size -= len, va += len, offset = 0) { 6327 pa = pmap_extract(pmap, va); /* offset is preserved */ 6328 len = min(PAGE_SIZE - offset, size); 6329 m = PHYS_TO_VM_PAGE(pa); 6330 KASSERT(m != NULL, ("%s: vm_page_t is null for %#x", 6331 __func__, pa)); 6332 pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m)); 6333 } 6334 } 6335 /* 6336 * I-cache is VIPT. Only way how to flush all virtual mappings 6337 * on given physical address is to invalidate all i-cache. 6338 */ 6339 icache_inv_all(); 6340} 6341 6342/* 6343 * The implementation of pmap_fault() uses IN_RANGE2() macro which 6344 * depends on the fact that given range size is a power of 2. 6345 */ 6346CTASSERT(powerof2(NB_IN_PT1)); 6347CTASSERT(powerof2(PT2MAP_SIZE)); 6348 6349#define IN_RANGE2(addr, start, size) \ 6350 ((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1))) 6351 6352/* 6353 * Handle access and R/W emulation faults. 6354 */ 6355int 6356pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, bool usermode) 6357{ 6358 pt1_entry_t *pte1p, pte1; 6359 pt2_entry_t *pte2p, pte2; 6360 6361 if (pmap == NULL) 6362 pmap = kernel_pmap; 6363 6364 /* 6365 * In kernel, we should never get abort with FAR which is in range of 6366 * pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here 6367 * and print out a useful abort message and even get to the debugger 6368 * otherwise it likely ends with never ending loop of aborts. 6369 */ 6370 if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) { 6371 /* 6372 * All L1 tables should always be mapped and present. 6373 * However, we check only current one herein. For user mode, 6374 * only permission abort from malicious user is not fatal. 6375 * And alignment abort as it may have higher priority. 6376 */ 6377 if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_PERM_L2)) { 6378 CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x", 6379 __func__, pmap, pmap->pm_pt1, far); 6380 panic("%s: pm_pt1 abort", __func__); 6381 } 6382 return (KERN_INVALID_ADDRESS); 6383 } 6384 if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) { 6385 /* 6386 * PT2MAP should be always mapped and present in current 6387 * L1 table. However, only existing L2 tables are mapped 6388 * in PT2MAP. For user mode, only L2 translation abort and 6389 * permission abort from malicious user is not fatal. 6390 * And alignment abort as it may have higher priority. 6391 */ 6392 if (!usermode || (idx != FAULT_ALIGN && 6393 idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) { 6394 CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x", 6395 __func__, pmap, PT2MAP, far); 6396 panic("%s: PT2MAP abort", __func__); 6397 } 6398 return (KERN_INVALID_ADDRESS); 6399 } 6400 6401 /* 6402 * A pmap lock is used below for handling of access and R/W emulation 6403 * aborts. They were handled by atomic operations before so some 6404 * analysis of new situation is needed to answer the following question: 6405 * Is it safe to use the lock even for these aborts? 6406 * 6407 * There may happen two cases in general: 6408 * 6409 * (1) Aborts while the pmap lock is locked already - this should not 6410 * happen as pmap lock is not recursive. However, under pmap lock only 6411 * internal kernel data should be accessed and such data should be 6412 * mapped with A bit set and NM bit cleared. If double abort happens, 6413 * then a mapping of data which has caused it must be fixed. Further, 6414 * all new mappings are always made with A bit set and the bit can be 6415 * cleared only on managed mappings. 6416 * 6417 * (2) Aborts while another lock(s) is/are locked - this already can 6418 * happen. However, there is no difference here if it's either access or 6419 * R/W emulation abort, or if it's some other abort. 6420 */ 6421 6422 PMAP_LOCK(pmap); 6423#ifdef SMP 6424 /* 6425 * Special treatment is due to break-before-make approach done when 6426 * pte1 is updated for userland mapping during section promotion or 6427 * demotion. If not caught here, pmap_enter() can find a section 6428 * mapping on faulting address. That is not allowed. 6429 */ 6430 if (idx == FAULT_TRAN_L1 && usermode && cp15_ats1cur_check(far) == 0) { 6431 PMAP_UNLOCK(pmap); 6432 return (KERN_SUCCESS); 6433 } 6434#endif 6435 /* 6436 * Accesss bits for page and section. Note that the entry 6437 * is not in TLB yet, so TLB flush is not necessary. 6438 * 6439 * QQQ: This is hardware emulation, we do not call userret() 6440 * for aborts from user mode. 6441 */ 6442 if (idx == FAULT_ACCESS_L2) { 6443 pte2p = pt2map_entry(far); 6444 pte2 = pte2_load(pte2p); 6445 if (pte2_is_valid(pte2)) { 6446 pte2_store(pte2p, pte2 | PTE2_A); 6447 PMAP_UNLOCK(pmap); 6448 return (KERN_SUCCESS); 6449 } 6450 } 6451 if (idx == FAULT_ACCESS_L1) { 6452 pte1p = pmap_pte1(pmap, far); 6453 pte1 = pte1_load(pte1p); 6454 if (pte1_is_section(pte1)) { 6455 pte1_store(pte1p, pte1 | PTE1_A); 6456 PMAP_UNLOCK(pmap); 6457 return (KERN_SUCCESS); 6458 } 6459 } 6460 6461 /* 6462 * Handle modify bits for page and section. Note that the modify 6463 * bit is emulated by software. So PTEx_RO is software read only 6464 * bit and PTEx_NM flag is real hardware read only bit. 6465 * 6466 * QQQ: This is hardware emulation, we do not call userret() 6467 * for aborts from user mode. 6468 */ 6469 if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L2)) { 6470 pte2p = pt2map_entry(far); 6471 pte2 = pte2_load(pte2p); 6472 if (pte2_is_valid(pte2) && !(pte2 & PTE2_RO) && 6473 (pte2 & PTE2_NM)) { 6474 pte2_store(pte2p, pte2 & ~PTE2_NM); 6475 tlb_flush(trunc_page(far)); 6476 PMAP_UNLOCK(pmap); 6477 return (KERN_SUCCESS); 6478 } 6479 } 6480 if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L1)) { 6481 pte1p = pmap_pte1(pmap, far); 6482 pte1 = pte1_load(pte1p); 6483 if (pte1_is_section(pte1) && !(pte1 & PTE1_RO) && 6484 (pte1 & PTE1_NM)) { 6485 pte1_store(pte1p, pte1 & ~PTE1_NM); 6486 tlb_flush(pte1_trunc(far)); 6487 PMAP_UNLOCK(pmap); 6488 return (KERN_SUCCESS); 6489 } 6490 } 6491 6492 /* 6493 * QQQ: The previous code, mainly fast handling of access and 6494 * modify bits aborts, could be moved to ASM. Now we are 6495 * starting to deal with not fast aborts. 6496 */ 6497 6498#ifdef INVARIANTS 6499 /* 6500 * Read an entry in PT2TAB associated with both pmap and far. 6501 * It's safe because PT2TAB is always mapped. 6502 */ 6503 pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, far)); 6504 if (pte2_is_valid(pte2)) { 6505 /* 6506 * Now, when we know that L2 page table is allocated, 6507 * we can use PT2MAP to get L2 page table entry. 6508 */ 6509 pte2 = pte2_load(pt2map_entry(far)); 6510 if (pte2_is_valid(pte2)) { 6511 /* 6512 * If L2 page table entry is valid, make sure that 6513 * L1 page table entry is valid too. Note that we 6514 * leave L2 page entries untouched when promoted. 6515 */ 6516 pte1 = pte1_load(pmap_pte1(pmap, far)); 6517 if (!pte1_is_valid(pte1)) { 6518 panic("%s: missing L1 page entry (%p, %#x)", 6519 __func__, pmap, far); 6520 } 6521 } 6522 } 6523#endif 6524 PMAP_UNLOCK(pmap); 6525 return (KERN_FAILURE); 6526} 6527 6528#if defined(PMAP_DEBUG) 6529/* 6530 * Reusing of KVA used in pmap_zero_page function !!! 6531 */ 6532static void 6533pmap_zero_page_check(vm_page_t m) 6534{ 6535 pt2_entry_t *cmap2_pte2p; 6536 uint32_t *p, *end; 6537 struct pcpu *pc; 6538 6539 sched_pin(); 6540 pc = get_pcpu(); 6541 cmap2_pte2p = pc->pc_cmap2_pte2p; 6542 mtx_lock(&pc->pc_cmap_lock); 6543 if (pte2_load(cmap2_pte2p) != 0) 6544 panic("%s: CMAP2 busy", __func__); 6545 pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW, 6546 vm_page_pte2_attr(m))); 6547 end = (uint32_t*)(pc->pc_cmap2_addr + PAGE_SIZE); 6548 for (p = (uint32_t*)pc->pc_cmap2_addr; p < end; p++) 6549 if (*p != 0) 6550 panic("%s: page %p not zero, va: %p", __func__, m, 6551 pc->pc_cmap2_addr); 6552 pte2_clear(cmap2_pte2p); 6553 tlb_flush((vm_offset_t)pc->pc_cmap2_addr); 6554 sched_unpin(); 6555 mtx_unlock(&pc->pc_cmap_lock); 6556} 6557 6558int 6559pmap_pid_dump(int pid) 6560{ 6561 pmap_t pmap; 6562 struct proc *p; 6563 int npte2 = 0; 6564 int i, j, index; 6565 6566 sx_slock(&allproc_lock); 6567 FOREACH_PROC_IN_SYSTEM(p) { 6568 if (p->p_pid != pid || p->p_vmspace == NULL) 6569 continue; 6570 index = 0; 6571 pmap = vmspace_pmap(p->p_vmspace); 6572 for (i = 0; i < NPTE1_IN_PT1; i++) { 6573 pt1_entry_t pte1; 6574 pt2_entry_t *pte2p, pte2; 6575 vm_offset_t base, va; 6576 vm_paddr_t pa; 6577 vm_page_t m; 6578 6579 base = i << PTE1_SHIFT; 6580 pte1 = pte1_load(&pmap->pm_pt1[i]); 6581 6582 if (pte1_is_section(pte1)) { 6583 /* 6584 * QQQ: Do something here! 6585 */ 6586 } else if (pte1_is_link(pte1)) { 6587 for (j = 0; j < NPTE2_IN_PT2; j++) { 6588 va = base + (j << PAGE_SHIFT); 6589 if (va >= VM_MIN_KERNEL_ADDRESS) { 6590 if (index) { 6591 index = 0; 6592 printf("\n"); 6593 } 6594 sx_sunlock(&allproc_lock); 6595 return (npte2); 6596 } 6597 pte2p = pmap_pte2(pmap, va); 6598 pte2 = pte2_load(pte2p); 6599 pmap_pte2_release(pte2p); 6600 if (!pte2_is_valid(pte2)) 6601 continue; 6602 6603 pa = pte2_pa(pte2); 6604 m = PHYS_TO_VM_PAGE(pa); 6605 printf("va: 0x%x, pa: 0x%x, h: %d, w:" 6606 " %d, f: 0x%x", va, pa, 6607 m->hold_count, m->wire_count, 6608 m->flags); 6609 npte2++; 6610 index++; 6611 if (index >= 2) { 6612 index = 0; 6613 printf("\n"); 6614 } else { 6615 printf(" "); 6616 } 6617 } 6618 } 6619 } 6620 } 6621 sx_sunlock(&allproc_lock); 6622 return (npte2); 6623} 6624 6625#endif 6626 6627#ifdef DDB 6628static pt2_entry_t * 6629pmap_pte2_ddb(pmap_t pmap, vm_offset_t va) 6630{ 6631 pt1_entry_t pte1; 6632 vm_paddr_t pt2pg_pa; 6633 6634 pte1 = pte1_load(pmap_pte1(pmap, va)); 6635 if (!pte1_is_link(pte1)) 6636 return (NULL); 6637 6638 if (pmap_is_current(pmap)) 6639 return (pt2map_entry(va)); 6640 6641 /* Note that L2 page table size is not equal to PAGE_SIZE. */ 6642 pt2pg_pa = trunc_page(pte1_link_pa(pte1)); 6643 if (pte2_pa(pte2_load(PMAP3)) != pt2pg_pa) { 6644 pte2_store(PMAP3, PTE2_KPT(pt2pg_pa)); 6645#ifdef SMP 6646 PMAP3cpu = PCPU_GET(cpuid); 6647#endif 6648 tlb_flush_local((vm_offset_t)PADDR3); 6649 } 6650#ifdef SMP 6651 else if (PMAP3cpu != PCPU_GET(cpuid)) { 6652 PMAP3cpu = PCPU_GET(cpuid); 6653 tlb_flush_local((vm_offset_t)PADDR3); 6654 } 6655#endif 6656 return (PADDR3 + (arm32_btop(va) & (NPTE2_IN_PG - 1))); 6657} 6658 6659static void 6660dump_pmap(pmap_t pmap) 6661{ 6662 6663 printf("pmap %p\n", pmap); 6664 printf(" pm_pt1: %p\n", pmap->pm_pt1); 6665 printf(" pm_pt2tab: %p\n", pmap->pm_pt2tab); 6666 printf(" pm_active: 0x%08lX\n", pmap->pm_active.__bits[0]); 6667} 6668 6669DB_SHOW_COMMAND(pmaps, pmap_list_pmaps) 6670{ 6671 6672 pmap_t pmap; 6673 LIST_FOREACH(pmap, &allpmaps, pm_list) { 6674 dump_pmap(pmap); 6675 } 6676} 6677 6678static int 6679pte2_class(pt2_entry_t pte2) 6680{ 6681 int cls; 6682 6683 cls = (pte2 >> 2) & 0x03; 6684 cls |= (pte2 >> 4) & 0x04; 6685 return (cls); 6686} 6687 6688static void 6689dump_section(pmap_t pmap, uint32_t pte1_idx) 6690{ 6691} 6692 6693static void 6694dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok) 6695{ 6696 uint32_t i; 6697 vm_offset_t va; 6698 pt2_entry_t *pte2p, pte2; 6699 vm_page_t m; 6700 6701 va = pte1_idx << PTE1_SHIFT; 6702 pte2p = pmap_pte2_ddb(pmap, va); 6703 for (i = 0; i < NPTE2_IN_PT2; i++, pte2p++, va += PAGE_SIZE) { 6704 pte2 = pte2_load(pte2p); 6705 if (pte2 == 0) 6706 continue; 6707 if (!pte2_is_valid(pte2)) { 6708 printf(" 0x%08X: 0x%08X", va, pte2); 6709 if (!invalid_ok) 6710 printf(" - not valid !!!"); 6711 printf("\n"); 6712 continue; 6713 } 6714 m = PHYS_TO_VM_PAGE(pte2_pa(pte2)); 6715 printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2, 6716 pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m); 6717 if (m != NULL) { 6718 printf(" v:%d h:%d w:%d f:0x%04X\n", m->valid, 6719 m->hold_count, m->wire_count, m->flags); 6720 } else { 6721 printf("\n"); 6722 } 6723 } 6724} 6725 6726static __inline boolean_t 6727is_pv_chunk_space(vm_offset_t va) 6728{ 6729 6730 if ((((vm_offset_t)pv_chunkbase) <= va) && 6731 (va < ((vm_offset_t)pv_chunkbase + PAGE_SIZE * pv_maxchunks))) 6732 return (TRUE); 6733 return (FALSE); 6734} 6735 6736DB_SHOW_COMMAND(pmap, pmap_pmap_print) 6737{ 6738 /* XXX convert args. */ 6739 pmap_t pmap = (pmap_t)addr; 6740 pt1_entry_t pte1; 6741 pt2_entry_t pte2; 6742 vm_offset_t va, eva; 6743 vm_page_t m; 6744 uint32_t i; 6745 boolean_t invalid_ok, dump_link_ok, dump_pv_chunk; 6746 6747 if (have_addr) { 6748 pmap_t pm; 6749 6750 LIST_FOREACH(pm, &allpmaps, pm_list) 6751 if (pm == pmap) break; 6752 if (pm == NULL) { 6753 printf("given pmap %p is not in allpmaps list\n", pmap); 6754 return; 6755 } 6756 } else 6757 pmap = PCPU_GET(curpmap); 6758 6759 eva = (modif[0] == 'u') ? VM_MAXUSER_ADDRESS : 0xFFFFFFFF; 6760 dump_pv_chunk = FALSE; /* XXX evaluate from modif[] */ 6761 6762 printf("pmap: 0x%08X\n", (uint32_t)pmap); 6763 printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); 6764 printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); 6765 6766 for(i = 0; i < NPTE1_IN_PT1; i++) { 6767 pte1 = pte1_load(&pmap->pm_pt1[i]); 6768 if (pte1 == 0) 6769 continue; 6770 va = i << PTE1_SHIFT; 6771 if (va >= eva) 6772 break; 6773 6774 if (pte1_is_section(pte1)) { 6775 printf("0x%08X: Section 0x%08X, s:%d g:%d\n", va, pte1, 6776 !!(pte1 & PTE1_S), !(pte1 & PTE1_NG)); 6777 dump_section(pmap, i); 6778 } else if (pte1_is_link(pte1)) { 6779 dump_link_ok = TRUE; 6780 invalid_ok = FALSE; 6781 pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); 6782 m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1)); 6783 printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X m: %p", 6784 va, pte1, pte2, m); 6785 if (is_pv_chunk_space(va)) { 6786 printf(" - pv_chunk space"); 6787 if (dump_pv_chunk) 6788 invalid_ok = TRUE; 6789 else 6790 dump_link_ok = FALSE; 6791 } 6792 else if (m != NULL) 6793 printf(" w:%d w2:%u", m->wire_count, 6794 pt2_wirecount_get(m, pte1_index(va))); 6795 if (pte2 == 0) 6796 printf(" !!! pt2tab entry is ZERO"); 6797 else if (pte2_pa(pte1) != pte2_pa(pte2)) 6798 printf(" !!! pt2tab entry is DIFFERENT - m: %p", 6799 PHYS_TO_VM_PAGE(pte2_pa(pte2))); 6800 printf("\n"); 6801 if (dump_link_ok) 6802 dump_link(pmap, i, invalid_ok); 6803 } else 6804 printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); 6805 } 6806} 6807 6808static void 6809dump_pt2tab(pmap_t pmap) 6810{ 6811 uint32_t i; 6812 pt2_entry_t pte2; 6813 vm_offset_t va; 6814 vm_paddr_t pa; 6815 vm_page_t m; 6816 6817 printf("PT2TAB:\n"); 6818 for (i = 0; i < PT2TAB_ENTRIES; i++) { 6819 pte2 = pte2_load(&pmap->pm_pt2tab[i]); 6820 if (!pte2_is_valid(pte2)) 6821 continue; 6822 va = i << PT2TAB_SHIFT; 6823 pa = pte2_pa(pte2); 6824 m = PHYS_TO_VM_PAGE(pa); 6825 printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2, 6826 pte2_class(pte2), !!(pte2 & PTE2_S), m); 6827 if (m != NULL) 6828 printf(" , h: %d, w: %d, f: 0x%04X pidx: %lld", 6829 m->hold_count, m->wire_count, m->flags, m->pindex); 6830 printf("\n"); 6831 } 6832} 6833 6834DB_SHOW_COMMAND(pmap_pt2tab, pmap_pt2tab_print) 6835{ 6836 /* XXX convert args. */ 6837 pmap_t pmap = (pmap_t)addr; 6838 pt1_entry_t pte1; 6839 pt2_entry_t pte2; 6840 vm_offset_t va; 6841 uint32_t i, start; 6842 6843 if (have_addr) { 6844 printf("supported only on current pmap\n"); 6845 return; 6846 } 6847 6848 pmap = PCPU_GET(curpmap); 6849 printf("curpmap: 0x%08X\n", (uint32_t)pmap); 6850 printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP); 6851 printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab); 6852 6853 start = pte1_index((vm_offset_t)PT2MAP); 6854 for (i = start; i < (start + NPT2_IN_PT2TAB); i++) { 6855 pte1 = pte1_load(&pmap->pm_pt1[i]); 6856 if (pte1 == 0) 6857 continue; 6858 va = i << PTE1_SHIFT; 6859 if (pte1_is_section(pte1)) { 6860 printf("0x%08X: Section 0x%08X, s:%d\n", va, pte1, 6861 !!(pte1 & PTE1_S)); 6862 dump_section(pmap, i); 6863 } else if (pte1_is_link(pte1)) { 6864 pte2 = pte2_load(pmap_pt2tab_entry(pmap, va)); 6865 printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X\n", va, 6866 pte1, pte2); 6867 if (pte2 == 0) 6868 printf(" !!! pt2tab entry is ZERO\n"); 6869 } else 6870 printf("0x%08X: Invalid entry 0x%08X\n", va, pte1); 6871 } 6872 dump_pt2tab(pmap); 6873} 6874#endif 6875