pmap.c revision 270439
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 1998,2000 Doug Rabson 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 * from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp 45 * with some ideas from NetBSD's alpha pmap 46 */ 47 48#include <sys/cdefs.h> 49__FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/pmap.c 270439 2014-08-24 07:53:15Z kib $"); 50 51#include "opt_pmap.h" 52 53#include <sys/param.h> 54#include <sys/efi.h> 55#include <sys/kernel.h> 56#include <sys/ktr.h> 57#include <sys/lock.h> 58#include <sys/mman.h> 59#include <sys/mutex.h> 60#include <sys/proc.h> 61#include <sys/rwlock.h> 62#include <sys/smp.h> 63#include <sys/sysctl.h> 64#include <sys/systm.h> 65 66#include <vm/vm.h> 67#include <vm/vm_param.h> 68#include <vm/vm_page.h> 69#include <vm/vm_map.h> 70#include <vm/vm_object.h> 71#include <vm/vm_pageout.h> 72#include <vm/uma.h> 73 74#include <machine/bootinfo.h> 75#include <machine/md_var.h> 76#include <machine/pal.h> 77 78/* 79 * Manages physical address maps. 80 * 81 * Since the information managed by this module is 82 * also stored by the logical address mapping module, 83 * this module may throw away valid virtual-to-physical 84 * mappings at almost any time. However, invalidations 85 * of virtual-to-physical mappings must be done as 86 * requested. 87 * 88 * In order to cope with hardware architectures which 89 * make virtual-to-physical map invalidates expensive, 90 * this module may delay invalidate or reduced protection 91 * operations until such time as they are actually 92 * necessary. This module is given full information as 93 * to which processors are currently using which maps, 94 * and to when physical maps must be made correct. 95 */ 96 97/* 98 * Following the Linux model, region IDs are allocated in groups of 99 * eight so that a single region ID can be used for as many RRs as we 100 * want by encoding the RR number into the low bits of the ID. 101 * 102 * We reserve region ID 0 for the kernel and allocate the remaining 103 * IDs for user pmaps. 104 * 105 * Region 0-3: User virtually mapped 106 * Region 4: PBVM and special mappings 107 * Region 5: Kernel virtual memory 108 * Region 6: Direct-mapped uncacheable 109 * Region 7: Direct-mapped cacheable 110 */ 111 112/* XXX move to a header. */ 113extern uint64_t ia64_gateway_page[]; 114 115#if !defined(DIAGNOSTIC) 116#define PMAP_INLINE __inline 117#else 118#define PMAP_INLINE 119#endif 120 121#ifdef PV_STATS 122#define PV_STAT(x) do { x ; } while (0) 123#else 124#define PV_STAT(x) do { } while (0) 125#endif 126 127#define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED) 128#define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY) 129#define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX) 130#define pmap_managed(lpte) ((lpte)->pte & PTE_MANAGED) 131#define pmap_ppn(lpte) ((lpte)->pte & PTE_PPN_MASK) 132#define pmap_present(lpte) ((lpte)->pte & PTE_PRESENT) 133#define pmap_prot(lpte) (((lpte)->pte & PTE_PROT_MASK) >> 56) 134#define pmap_wired(lpte) ((lpte)->pte & PTE_WIRED) 135 136#define pmap_clear_accessed(lpte) (lpte)->pte &= ~PTE_ACCESSED 137#define pmap_clear_dirty(lpte) (lpte)->pte &= ~PTE_DIRTY 138#define pmap_clear_present(lpte) (lpte)->pte &= ~PTE_PRESENT 139#define pmap_clear_wired(lpte) (lpte)->pte &= ~PTE_WIRED 140 141#define pmap_set_wired(lpte) (lpte)->pte |= PTE_WIRED 142 143/* 144 * Individual PV entries are stored in per-pmap chunks. This saves 145 * space by eliminating the need to record the pmap within every PV 146 * entry. 147 */ 148#if PAGE_SIZE == 8192 149#define _NPCM 6 150#define _NPCPV 337 151#define _NPCS 2 152#elif PAGE_SIZE == 16384 153#define _NPCM 11 154#define _NPCPV 677 155#define _NPCS 1 156#endif 157struct pv_chunk { 158 pmap_t pc_pmap; 159 TAILQ_ENTRY(pv_chunk) pc_list; 160 u_long pc_map[_NPCM]; /* bitmap; 1 = free */ 161 TAILQ_ENTRY(pv_chunk) pc_lru; 162 u_long pc_spare[_NPCS]; 163 struct pv_entry pc_pventry[_NPCPV]; 164}; 165 166/* 167 * The VHPT bucket head structure. 168 */ 169struct ia64_bucket { 170 uint64_t chain; 171 struct mtx mutex; 172 u_int length; 173}; 174 175/* 176 * Statically allocated kernel pmap 177 */ 178struct pmap kernel_pmap_store; 179 180vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 181vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 182 183/* 184 * Kernel virtual memory management. 185 */ 186static int nkpt; 187extern struct ia64_lpte ***ia64_kptdir; 188 189#define KPTE_DIR0_INDEX(va) \ 190 (((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1)) 191#define KPTE_DIR1_INDEX(va) \ 192 (((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1)) 193#define KPTE_PTE_INDEX(va) \ 194 (((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1)) 195#define NKPTEPG (PAGE_SIZE / sizeof(struct ia64_lpte)) 196 197vm_offset_t kernel_vm_end; 198 199/* Defaults for ptc.e. */ 200static uint64_t pmap_ptc_e_base = 0; 201static uint32_t pmap_ptc_e_count1 = 1; 202static uint32_t pmap_ptc_e_count2 = 1; 203static uint32_t pmap_ptc_e_stride1 = 0; 204static uint32_t pmap_ptc_e_stride2 = 0; 205 206struct mtx pmap_ptc_mutex; 207 208/* 209 * Data for the RID allocator 210 */ 211static int pmap_ridcount; 212static int pmap_rididx; 213static int pmap_ridmapsz; 214static int pmap_ridmax; 215static uint64_t *pmap_ridmap; 216struct mtx pmap_ridmutex; 217 218static struct rwlock_padalign pvh_global_lock; 219 220/* 221 * Data for the pv entry allocation mechanism 222 */ 223static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 224static int pv_entry_count; 225 226/* 227 * Data for allocating PTEs for user processes. 228 */ 229static uma_zone_t ptezone; 230 231/* 232 * Virtual Hash Page Table (VHPT) data. 233 */ 234/* SYSCTL_DECL(_machdep); */ 235static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, ""); 236 237struct ia64_bucket *pmap_vhpt_bucket; 238 239int pmap_vhpt_nbuckets; 240SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD, 241 &pmap_vhpt_nbuckets, 0, ""); 242 243int pmap_vhpt_log2size = 0; 244TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size); 245SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD, 246 &pmap_vhpt_log2size, 0, ""); 247 248static int pmap_vhpt_inserts; 249SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD, 250 &pmap_vhpt_inserts, 0, ""); 251 252static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS); 253SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD, 254 NULL, 0, pmap_vhpt_population, "I", ""); 255 256static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va); 257 258static void free_pv_chunk(struct pv_chunk *pc); 259static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 260static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); 261static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); 262 263static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 264 vm_page_t m, vm_prot_t prot); 265static void pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va); 266static int pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, 267 vm_offset_t va, pv_entry_t pv, int freepte); 268static int pmap_remove_vhpt(vm_offset_t va); 269static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 270 vm_page_t m); 271 272static void 273pmap_initialize_vhpt(vm_offset_t vhpt) 274{ 275 struct ia64_lpte *pte; 276 u_int i; 277 278 pte = (struct ia64_lpte *)vhpt; 279 for (i = 0; i < pmap_vhpt_nbuckets; i++) { 280 pte[i].pte = 0; 281 pte[i].itir = 0; 282 pte[i].tag = 1UL << 63; /* Invalid tag */ 283 pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i); 284 } 285} 286 287#ifdef SMP 288vm_offset_t 289pmap_alloc_vhpt(void) 290{ 291 vm_offset_t vhpt; 292 vm_page_t m; 293 vm_size_t size; 294 295 size = 1UL << pmap_vhpt_log2size; 296 m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | 297 VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL, 298 VM_MEMATTR_DEFAULT); 299 if (m != NULL) { 300 vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 301 pmap_initialize_vhpt(vhpt); 302 return (vhpt); 303 } 304 return (0); 305} 306#endif 307 308/* 309 * Bootstrap the system enough to run with virtual memory. 310 */ 311void 312pmap_bootstrap() 313{ 314 struct ia64_pal_result res; 315 vm_offset_t base; 316 size_t size; 317 int i, ridbits; 318 319 /* 320 * Query the PAL Code to find the loop parameters for the 321 * ptc.e instruction. 322 */ 323 res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0); 324 if (res.pal_status != 0) 325 panic("Can't configure ptc.e parameters"); 326 pmap_ptc_e_base = res.pal_result[0]; 327 pmap_ptc_e_count1 = res.pal_result[1] >> 32; 328 pmap_ptc_e_count2 = res.pal_result[1]; 329 pmap_ptc_e_stride1 = res.pal_result[2] >> 32; 330 pmap_ptc_e_stride2 = res.pal_result[2]; 331 if (bootverbose) 332 printf("ptc.e base=0x%lx, count1=%u, count2=%u, " 333 "stride1=0x%x, stride2=0x%x\n", 334 pmap_ptc_e_base, 335 pmap_ptc_e_count1, 336 pmap_ptc_e_count2, 337 pmap_ptc_e_stride1, 338 pmap_ptc_e_stride2); 339 340 mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN); 341 342 /* 343 * Setup RIDs. RIDs 0..7 are reserved for the kernel. 344 * 345 * We currently need at least 19 bits in the RID because PID_MAX 346 * can only be encoded in 17 bits and we need RIDs for 4 regions 347 * per process. With PID_MAX equalling 99999 this means that we 348 * need to be able to encode 399996 (=4*PID_MAX). 349 * The Itanium processor only has 18 bits and the architected 350 * minimum is exactly that. So, we cannot use a PID based scheme 351 * in those cases. Enter pmap_ridmap... 352 * We should avoid the map when running on a processor that has 353 * implemented enough bits. This means that we should pass the 354 * process/thread ID to pmap. This we currently don't do, so we 355 * use the map anyway. However, we don't want to allocate a map 356 * that is large enough to cover the range dictated by the number 357 * of bits in the RID, because that may result in a RID map of 358 * 2MB in size for a 24-bit RID. A 64KB map is enough. 359 * The bottomline: we create a 32KB map when the processor only 360 * implements 18 bits (or when we can't figure it out). Otherwise 361 * we create a 64KB map. 362 */ 363 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 364 if (res.pal_status != 0) { 365 if (bootverbose) 366 printf("Can't read VM Summary - assuming 18 Region ID bits\n"); 367 ridbits = 18; /* guaranteed minimum */ 368 } else { 369 ridbits = (res.pal_result[1] >> 8) & 0xff; 370 if (bootverbose) 371 printf("Processor supports %d Region ID bits\n", 372 ridbits); 373 } 374 if (ridbits > 19) 375 ridbits = 19; 376 377 pmap_ridmax = (1 << ridbits); 378 pmap_ridmapsz = pmap_ridmax / 64; 379 pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE); 380 pmap_ridmap[0] |= 0xff; 381 pmap_rididx = 0; 382 pmap_ridcount = 8; 383 mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF); 384 385 /* 386 * Allocate some memory for initial kernel 'page tables'. 387 */ 388 ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE); 389 nkpt = 0; 390 kernel_vm_end = VM_INIT_KERNEL_ADDRESS; 391 392 /* 393 * Determine a valid (mappable) VHPT size. 394 */ 395 TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size); 396 if (pmap_vhpt_log2size == 0) 397 pmap_vhpt_log2size = 20; 398 else if (pmap_vhpt_log2size < 16) 399 pmap_vhpt_log2size = 16; 400 else if (pmap_vhpt_log2size > 28) 401 pmap_vhpt_log2size = 28; 402 if (pmap_vhpt_log2size & 1) 403 pmap_vhpt_log2size--; 404 405 size = 1UL << pmap_vhpt_log2size; 406 base = (uintptr_t)ia64_physmem_alloc(size, size); 407 if (base == 0) 408 panic("Unable to allocate VHPT"); 409 410 PCPU_SET(md.vhpt, base); 411 if (bootverbose) 412 printf("VHPT: address=%#lx, size=%#lx\n", base, size); 413 414 pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte); 415 pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets * 416 sizeof(struct ia64_bucket), PAGE_SIZE); 417 for (i = 0; i < pmap_vhpt_nbuckets; i++) { 418 /* Stolen memory is zeroed. */ 419 mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL, 420 MTX_NOWITNESS | MTX_SPIN); 421 } 422 423 pmap_initialize_vhpt(base); 424 map_vhpt(base); 425 ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1); 426 ia64_srlz_i(); 427 428 virtual_avail = VM_INIT_KERNEL_ADDRESS; 429 virtual_end = VM_MAX_KERNEL_ADDRESS; 430 431 /* 432 * Initialize the kernel pmap (which is statically allocated). 433 */ 434 PMAP_LOCK_INIT(kernel_pmap); 435 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 436 kernel_pmap->pm_rid[i] = 0; 437 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 438 PCPU_SET(md.current_pmap, kernel_pmap); 439 440 /* 441 * Initialize the global pv list lock. 442 */ 443 rw_init(&pvh_global_lock, "pmap pv global"); 444 445 /* Region 5 is mapped via the VHPT. */ 446 ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1); 447 448 /* 449 * Clear out any random TLB entries left over from booting. 450 */ 451 pmap_invalidate_all(); 452 453 map_gateway_page(); 454} 455 456static int 457pmap_vhpt_population(SYSCTL_HANDLER_ARGS) 458{ 459 int count, error, i; 460 461 count = 0; 462 for (i = 0; i < pmap_vhpt_nbuckets; i++) 463 count += pmap_vhpt_bucket[i].length; 464 465 error = SYSCTL_OUT(req, &count, sizeof(count)); 466 return (error); 467} 468 469vm_offset_t 470pmap_page_to_va(vm_page_t m) 471{ 472 vm_paddr_t pa; 473 vm_offset_t va; 474 475 pa = VM_PAGE_TO_PHYS(m); 476 va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) : 477 IA64_PHYS_TO_RR7(pa); 478 return (va); 479} 480 481/* 482 * Initialize a vm_page's machine-dependent fields. 483 */ 484void 485pmap_page_init(vm_page_t m) 486{ 487 488 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 489 490 TAILQ_INIT(&m->md.pv_list); 491 m->md.memattr = VM_MEMATTR_DEFAULT; 492} 493 494/* 495 * Initialize the pmap module. 496 * Called by vm_init, to initialize any structures that the pmap 497 * system needs to map virtual memory. 498 */ 499void 500pmap_init(void) 501{ 502 503 CTR1(KTR_PMAP, "%s()", __func__); 504 505 ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte), 506 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE); 507} 508 509 510/*************************************************** 511 * Manipulate TLBs for a pmap 512 ***************************************************/ 513 514static void 515pmap_invalidate_page(vm_offset_t va) 516{ 517 struct ia64_lpte *pte; 518 struct pcpu *pc; 519 uint64_t tag; 520 u_int vhpt_ofs; 521 522 critical_enter(); 523 524 vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt); 525 tag = ia64_ttag(va); 526 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 527 pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs); 528 atomic_cmpset_64(&pte->tag, tag, 1UL << 63); 529 } 530 531 mtx_lock_spin(&pmap_ptc_mutex); 532 533 ia64_ptc_ga(va, PAGE_SHIFT << 2); 534 ia64_mf(); 535 ia64_srlz_i(); 536 537 mtx_unlock_spin(&pmap_ptc_mutex); 538 539 ia64_invala(); 540 541 critical_exit(); 542} 543 544void 545pmap_invalidate_all(void) 546{ 547 uint64_t addr; 548 int i, j; 549 550 addr = pmap_ptc_e_base; 551 for (i = 0; i < pmap_ptc_e_count1; i++) { 552 for (j = 0; j < pmap_ptc_e_count2; j++) { 553 ia64_ptc_e(addr); 554 addr += pmap_ptc_e_stride2; 555 } 556 addr += pmap_ptc_e_stride1; 557 } 558 ia64_srlz_i(); 559} 560 561static uint32_t 562pmap_allocate_rid(void) 563{ 564 uint64_t bit, bits; 565 int rid; 566 567 mtx_lock(&pmap_ridmutex); 568 if (pmap_ridcount == pmap_ridmax) 569 panic("pmap_allocate_rid: All Region IDs used"); 570 571 /* Find an index with a free bit. */ 572 while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) { 573 pmap_rididx++; 574 if (pmap_rididx == pmap_ridmapsz) 575 pmap_rididx = 0; 576 } 577 rid = pmap_rididx * 64; 578 579 /* Find a free bit. */ 580 bit = 1UL; 581 while (bits & bit) { 582 rid++; 583 bit <<= 1; 584 } 585 586 pmap_ridmap[pmap_rididx] |= bit; 587 pmap_ridcount++; 588 mtx_unlock(&pmap_ridmutex); 589 590 return rid; 591} 592 593static void 594pmap_free_rid(uint32_t rid) 595{ 596 uint64_t bit; 597 int idx; 598 599 idx = rid / 64; 600 bit = ~(1UL << (rid & 63)); 601 602 mtx_lock(&pmap_ridmutex); 603 pmap_ridmap[idx] &= bit; 604 pmap_ridcount--; 605 mtx_unlock(&pmap_ridmutex); 606} 607 608/*************************************************** 609 * Page table page management routines..... 610 ***************************************************/ 611 612static void 613pmap_pinit_common(pmap_t pmap) 614{ 615 int i; 616 617 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 618 pmap->pm_rid[i] = pmap_allocate_rid(); 619 TAILQ_INIT(&pmap->pm_pvchunk); 620 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 621} 622 623void 624pmap_pinit0(pmap_t pmap) 625{ 626 627 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap); 628 629 PMAP_LOCK_INIT(pmap); 630 pmap_pinit_common(pmap); 631} 632 633/* 634 * Initialize a preallocated and zeroed pmap structure, 635 * such as one in a vmspace structure. 636 */ 637int 638pmap_pinit(pmap_t pmap) 639{ 640 641 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap); 642 643 pmap_pinit_common(pmap); 644 return (1); 645} 646 647/*************************************************** 648 * Pmap allocation/deallocation routines. 649 ***************************************************/ 650 651/* 652 * Release any resources held by the given physical map. 653 * Called when a pmap initialized by pmap_pinit is being released. 654 * Should only be called if the map contains no valid mappings. 655 */ 656void 657pmap_release(pmap_t pmap) 658{ 659 int i; 660 661 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap); 662 663 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) 664 if (pmap->pm_rid[i]) 665 pmap_free_rid(pmap->pm_rid[i]); 666} 667 668/* 669 * grow the number of kernel page table entries, if needed 670 */ 671void 672pmap_growkernel(vm_offset_t addr) 673{ 674 struct ia64_lpte **dir1; 675 struct ia64_lpte *leaf; 676 vm_page_t nkpg; 677 678 CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, addr); 679 680 while (kernel_vm_end <= addr) { 681 if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64) 682 panic("%s: out of kernel address space", __func__); 683 684 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)]; 685 if (dir1 == NULL) { 686 nkpg = vm_page_alloc(NULL, nkpt++, 687 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED); 688 if (!nkpg) 689 panic("%s: cannot add dir. page", __func__); 690 691 dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg); 692 bzero(dir1, PAGE_SIZE); 693 ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1; 694 } 695 696 nkpg = vm_page_alloc(NULL, nkpt++, 697 VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED); 698 if (!nkpg) 699 panic("%s: cannot add PTE page", __func__); 700 701 leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg); 702 bzero(leaf, PAGE_SIZE); 703 dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf; 704 705 kernel_vm_end += PAGE_SIZE * NKPTEPG; 706 } 707} 708 709/*************************************************** 710 * page management routines. 711 ***************************************************/ 712 713CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 714 715static __inline struct pv_chunk * 716pv_to_chunk(pv_entry_t pv) 717{ 718 719 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 720} 721 722#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 723 724#define PC_FREE_FULL 0xfffffffffffffffful 725#define PC_FREE_PARTIAL \ 726 ((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1) 727 728#if PAGE_SIZE == 8192 729static const u_long pc_freemask[_NPCM] = { 730 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL, 731 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL 732}; 733#elif PAGE_SIZE == 16384 734static const u_long pc_freemask[_NPCM] = { 735 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL, 736 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL, 737 PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL, 738 PC_FREE_FULL, PC_FREE_PARTIAL 739}; 740#endif 741 742static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 743 744SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 745 "Current number of pv entries"); 746 747#ifdef PV_STATS 748static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 749 750SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 751 "Current number of pv entry chunks"); 752SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 753 "Current number of pv entry chunks allocated"); 754SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 755 "Current number of pv entry chunks frees"); 756SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 757 "Number of times tried to get a chunk page but failed."); 758 759static long pv_entry_frees, pv_entry_allocs; 760static int pv_entry_spare; 761 762SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 763 "Current number of pv entry frees"); 764SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 765 "Current number of pv entry allocs"); 766SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 767 "Current number of spare pv entries"); 768#endif 769 770/* 771 * We are in a serious low memory condition. Resort to 772 * drastic measures to free some pages so we can allocate 773 * another pv entry chunk. 774 */ 775static vm_page_t 776pmap_pv_reclaim(pmap_t locked_pmap) 777{ 778 struct pch newtail; 779 struct pv_chunk *pc; 780 struct ia64_lpte *pte; 781 pmap_t pmap; 782 pv_entry_t pv; 783 vm_offset_t va; 784 vm_page_t m, m_pc; 785 u_long inuse; 786 int bit, field, freed, idx; 787 788 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 789 pmap = NULL; 790 m_pc = NULL; 791 TAILQ_INIT(&newtail); 792 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) { 793 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 794 if (pmap != pc->pc_pmap) { 795 if (pmap != NULL) { 796 if (pmap != locked_pmap) { 797 pmap_switch(locked_pmap); 798 PMAP_UNLOCK(pmap); 799 } 800 } 801 pmap = pc->pc_pmap; 802 /* Avoid deadlock and lock recursion. */ 803 if (pmap > locked_pmap) 804 PMAP_LOCK(pmap); 805 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 806 pmap = NULL; 807 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 808 continue; 809 } 810 pmap_switch(pmap); 811 } 812 813 /* 814 * Destroy every non-wired, 8 KB page mapping in the chunk. 815 */ 816 freed = 0; 817 for (field = 0; field < _NPCM; field++) { 818 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 819 inuse != 0; inuse &= ~(1UL << bit)) { 820 bit = ffsl(inuse) - 1; 821 idx = field * sizeof(inuse) * NBBY + bit; 822 pv = &pc->pc_pventry[idx]; 823 va = pv->pv_va; 824 pte = pmap_find_vhpt(va); 825 KASSERT(pte != NULL, ("pte")); 826 if (pmap_wired(pte)) 827 continue; 828 pmap_remove_vhpt(va); 829 pmap_invalidate_page(va); 830 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 831 if (pmap_accessed(pte)) 832 vm_page_aflag_set(m, PGA_REFERENCED); 833 if (pmap_dirty(pte)) 834 vm_page_dirty(m); 835 pmap_free_pte(pte, va); 836 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 837 if (TAILQ_EMPTY(&m->md.pv_list)) 838 vm_page_aflag_clear(m, PGA_WRITEABLE); 839 pc->pc_map[field] |= 1UL << bit; 840 freed++; 841 } 842 } 843 if (freed == 0) { 844 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 845 continue; 846 } 847 /* Every freed mapping is for a 8 KB page. */ 848 pmap->pm_stats.resident_count -= freed; 849 PV_STAT(pv_entry_frees += freed); 850 PV_STAT(pv_entry_spare += freed); 851 pv_entry_count -= freed; 852 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 853 for (field = 0; field < _NPCM; field++) 854 if (pc->pc_map[field] != pc_freemask[field]) { 855 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 856 pc_list); 857 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 858 859 /* 860 * One freed pv entry in locked_pmap is 861 * sufficient. 862 */ 863 if (pmap == locked_pmap) 864 goto out; 865 break; 866 } 867 if (field == _NPCM) { 868 PV_STAT(pv_entry_spare -= _NPCPV); 869 PV_STAT(pc_chunk_count--); 870 PV_STAT(pc_chunk_frees++); 871 /* Entire chunk is free; return it. */ 872 m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc)); 873 break; 874 } 875 } 876out: 877 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 878 if (pmap != NULL) { 879 if (pmap != locked_pmap) { 880 pmap_switch(locked_pmap); 881 PMAP_UNLOCK(pmap); 882 } 883 } 884 return (m_pc); 885} 886 887/* 888 * free the pv_entry back to the free list 889 */ 890static void 891free_pv_entry(pmap_t pmap, pv_entry_t pv) 892{ 893 struct pv_chunk *pc; 894 int bit, field, idx; 895 896 rw_assert(&pvh_global_lock, RA_WLOCKED); 897 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 898 PV_STAT(pv_entry_frees++); 899 PV_STAT(pv_entry_spare++); 900 pv_entry_count--; 901 pc = pv_to_chunk(pv); 902 idx = pv - &pc->pc_pventry[0]; 903 field = idx / (sizeof(u_long) * NBBY); 904 bit = idx % (sizeof(u_long) * NBBY); 905 pc->pc_map[field] |= 1ul << bit; 906 for (idx = 0; idx < _NPCM; idx++) 907 if (pc->pc_map[idx] != pc_freemask[idx]) { 908 /* 909 * 98% of the time, pc is already at the head of the 910 * list. If it isn't already, move it to the head. 911 */ 912 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 913 pc)) { 914 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 915 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 916 pc_list); 917 } 918 return; 919 } 920 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 921 free_pv_chunk(pc); 922} 923 924static void 925free_pv_chunk(struct pv_chunk *pc) 926{ 927 vm_page_t m; 928 929 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 930 PV_STAT(pv_entry_spare -= _NPCPV); 931 PV_STAT(pc_chunk_count--); 932 PV_STAT(pc_chunk_frees++); 933 /* entire chunk is free, return it */ 934 m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc)); 935 vm_page_unwire(m, 0); 936 vm_page_free(m); 937} 938 939/* 940 * get a new pv_entry, allocating a block from the system 941 * when needed. 942 */ 943static pv_entry_t 944get_pv_entry(pmap_t pmap, boolean_t try) 945{ 946 struct pv_chunk *pc; 947 pv_entry_t pv; 948 vm_page_t m; 949 int bit, field, idx; 950 951 rw_assert(&pvh_global_lock, RA_WLOCKED); 952 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 953 PV_STAT(pv_entry_allocs++); 954 pv_entry_count++; 955retry: 956 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 957 if (pc != NULL) { 958 for (field = 0; field < _NPCM; field++) { 959 if (pc->pc_map[field]) { 960 bit = ffsl(pc->pc_map[field]) - 1; 961 break; 962 } 963 } 964 if (field < _NPCM) { 965 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; 966 pv = &pc->pc_pventry[idx]; 967 pc->pc_map[field] &= ~(1ul << bit); 968 /* If this was the last item, move it to tail */ 969 for (field = 0; field < _NPCM; field++) 970 if (pc->pc_map[field] != 0) { 971 PV_STAT(pv_entry_spare--); 972 return (pv); /* not full, return */ 973 } 974 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 975 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 976 PV_STAT(pv_entry_spare--); 977 return (pv); 978 } 979 } 980 /* No free items, allocate another chunk */ 981 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 982 VM_ALLOC_WIRED); 983 if (m == NULL) { 984 if (try) { 985 pv_entry_count--; 986 PV_STAT(pc_chunk_tryfail++); 987 return (NULL); 988 } 989 m = pmap_pv_reclaim(pmap); 990 if (m == NULL) 991 goto retry; 992 } 993 PV_STAT(pc_chunk_count++); 994 PV_STAT(pc_chunk_allocs++); 995 pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); 996 pc->pc_pmap = pmap; 997 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 998 for (field = 1; field < _NPCM; field++) 999 pc->pc_map[field] = pc_freemask[field]; 1000 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1001 pv = &pc->pc_pventry[0]; 1002 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1003 PV_STAT(pv_entry_spare += _NPCPV - 1); 1004 return (pv); 1005} 1006 1007/* 1008 * Conditionally create a pv entry. 1009 */ 1010static boolean_t 1011pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 1012{ 1013 pv_entry_t pv; 1014 1015 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1016 rw_assert(&pvh_global_lock, RA_WLOCKED); 1017 if ((pv = get_pv_entry(pmap, TRUE)) != NULL) { 1018 pv->pv_va = va; 1019 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1020 return (TRUE); 1021 } else 1022 return (FALSE); 1023} 1024 1025/* 1026 * Add an ia64_lpte to the VHPT. 1027 */ 1028static void 1029pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va) 1030{ 1031 struct ia64_bucket *bckt; 1032 struct ia64_lpte *vhpte; 1033 uint64_t pte_pa; 1034 1035 /* Can fault, so get it out of the way. */ 1036 pte_pa = ia64_tpa((vm_offset_t)pte); 1037 1038 vhpte = (struct ia64_lpte *)ia64_thash(va); 1039 bckt = (struct ia64_bucket *)vhpte->chain; 1040 1041 mtx_lock_spin(&bckt->mutex); 1042 pte->chain = bckt->chain; 1043 ia64_mf(); 1044 bckt->chain = pte_pa; 1045 1046 pmap_vhpt_inserts++; 1047 bckt->length++; 1048 mtx_unlock_spin(&bckt->mutex); 1049} 1050 1051/* 1052 * Remove the ia64_lpte matching va from the VHPT. Return zero if it 1053 * worked or an appropriate error code otherwise. 1054 */ 1055static int 1056pmap_remove_vhpt(vm_offset_t va) 1057{ 1058 struct ia64_bucket *bckt; 1059 struct ia64_lpte *pte; 1060 struct ia64_lpte *lpte; 1061 struct ia64_lpte *vhpte; 1062 uint64_t chain, tag; 1063 1064 tag = ia64_ttag(va); 1065 vhpte = (struct ia64_lpte *)ia64_thash(va); 1066 bckt = (struct ia64_bucket *)vhpte->chain; 1067 1068 lpte = NULL; 1069 mtx_lock_spin(&bckt->mutex); 1070 chain = bckt->chain; 1071 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 1072 while (chain != 0 && pte->tag != tag) { 1073 lpte = pte; 1074 chain = pte->chain; 1075 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 1076 } 1077 if (chain == 0) { 1078 mtx_unlock_spin(&bckt->mutex); 1079 return (ENOENT); 1080 } 1081 1082 /* Snip this pv_entry out of the collision chain. */ 1083 if (lpte == NULL) 1084 bckt->chain = pte->chain; 1085 else 1086 lpte->chain = pte->chain; 1087 ia64_mf(); 1088 1089 bckt->length--; 1090 mtx_unlock_spin(&bckt->mutex); 1091 return (0); 1092} 1093 1094/* 1095 * Find the ia64_lpte for the given va, if any. 1096 */ 1097static struct ia64_lpte * 1098pmap_find_vhpt(vm_offset_t va) 1099{ 1100 struct ia64_bucket *bckt; 1101 struct ia64_lpte *pte; 1102 uint64_t chain, tag; 1103 1104 tag = ia64_ttag(va); 1105 pte = (struct ia64_lpte *)ia64_thash(va); 1106 bckt = (struct ia64_bucket *)pte->chain; 1107 1108 mtx_lock_spin(&bckt->mutex); 1109 chain = bckt->chain; 1110 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 1111 while (chain != 0 && pte->tag != tag) { 1112 chain = pte->chain; 1113 pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain); 1114 } 1115 mtx_unlock_spin(&bckt->mutex); 1116 return ((chain != 0) ? pte : NULL); 1117} 1118 1119/* 1120 * Remove an entry from the list of managed mappings. 1121 */ 1122static int 1123pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv) 1124{ 1125 1126 rw_assert(&pvh_global_lock, RA_WLOCKED); 1127 if (!pv) { 1128 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1129 if (pmap == PV_PMAP(pv) && va == pv->pv_va) 1130 break; 1131 } 1132 } 1133 1134 if (pv) { 1135 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1136 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1137 vm_page_aflag_clear(m, PGA_WRITEABLE); 1138 1139 free_pv_entry(pmap, pv); 1140 return 0; 1141 } else { 1142 return ENOENT; 1143 } 1144} 1145 1146/* 1147 * Create a pv entry for page at pa for 1148 * (pmap, va). 1149 */ 1150static void 1151pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 1152{ 1153 pv_entry_t pv; 1154 1155 rw_assert(&pvh_global_lock, RA_WLOCKED); 1156 pv = get_pv_entry(pmap, FALSE); 1157 pv->pv_va = va; 1158 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1159} 1160 1161/* 1162 * Routine: pmap_extract 1163 * Function: 1164 * Extract the physical page address associated 1165 * with the given map/virtual_address pair. 1166 */ 1167vm_paddr_t 1168pmap_extract(pmap_t pmap, vm_offset_t va) 1169{ 1170 struct ia64_lpte *pte; 1171 pmap_t oldpmap; 1172 vm_paddr_t pa; 1173 1174 CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, va); 1175 1176 pa = 0; 1177 PMAP_LOCK(pmap); 1178 oldpmap = pmap_switch(pmap); 1179 pte = pmap_find_vhpt(va); 1180 if (pte != NULL && pmap_present(pte)) 1181 pa = pmap_ppn(pte); 1182 pmap_switch(oldpmap); 1183 PMAP_UNLOCK(pmap); 1184 return (pa); 1185} 1186 1187/* 1188 * Routine: pmap_extract_and_hold 1189 * Function: 1190 * Atomically extract and hold the physical page 1191 * with the given pmap and virtual address pair 1192 * if that mapping permits the given protection. 1193 */ 1194vm_page_t 1195pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1196{ 1197 struct ia64_lpte *pte; 1198 pmap_t oldpmap; 1199 vm_page_t m; 1200 vm_paddr_t pa; 1201 1202 CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, prot=%#x)", __func__, pmap, va, 1203 prot); 1204 1205 pa = 0; 1206 m = NULL; 1207 PMAP_LOCK(pmap); 1208 oldpmap = pmap_switch(pmap); 1209retry: 1210 pte = pmap_find_vhpt(va); 1211 if (pte != NULL && pmap_present(pte) && 1212 (pmap_prot(pte) & prot) == prot) { 1213 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 1214 if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa)) 1215 goto retry; 1216 vm_page_hold(m); 1217 } 1218 PA_UNLOCK_COND(pa); 1219 pmap_switch(oldpmap); 1220 PMAP_UNLOCK(pmap); 1221 return (m); 1222} 1223 1224/*************************************************** 1225 * Low level mapping routines..... 1226 ***************************************************/ 1227 1228/* 1229 * Find the kernel lpte for mapping the given virtual address, which 1230 * must be in the part of region 5 which we can cover with our kernel 1231 * 'page tables'. 1232 */ 1233static struct ia64_lpte * 1234pmap_find_kpte(vm_offset_t va) 1235{ 1236 struct ia64_lpte **dir1; 1237 struct ia64_lpte *leaf; 1238 1239 KASSERT((va >> 61) == 5, 1240 ("kernel mapping 0x%lx not in region 5", va)); 1241 KASSERT(va < kernel_vm_end, 1242 ("kernel mapping 0x%lx out of range", va)); 1243 1244 dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)]; 1245 leaf = dir1[KPTE_DIR1_INDEX(va)]; 1246 return (&leaf[KPTE_PTE_INDEX(va)]); 1247} 1248 1249/* 1250 * Find a pte suitable for mapping a user-space address. If one exists 1251 * in the VHPT, that one will be returned, otherwise a new pte is 1252 * allocated. 1253 */ 1254static struct ia64_lpte * 1255pmap_find_pte(vm_offset_t va) 1256{ 1257 struct ia64_lpte *pte; 1258 1259 if (va >= VM_MAXUSER_ADDRESS) 1260 return pmap_find_kpte(va); 1261 1262 pte = pmap_find_vhpt(va); 1263 if (pte == NULL) { 1264 pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO); 1265 pte->tag = 1UL << 63; 1266 } 1267 return (pte); 1268} 1269 1270/* 1271 * Free a pte which is now unused. This simply returns it to the zone 1272 * allocator if it is a user mapping. For kernel mappings, clear the 1273 * valid bit to make it clear that the mapping is not currently used. 1274 */ 1275static void 1276pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va) 1277{ 1278 if (va < VM_MAXUSER_ADDRESS) 1279 uma_zfree(ptezone, pte); 1280 else 1281 pmap_clear_present(pte); 1282} 1283 1284static PMAP_INLINE void 1285pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot) 1286{ 1287 static long prot2ar[4] = { 1288 PTE_AR_R, /* VM_PROT_NONE */ 1289 PTE_AR_RW, /* VM_PROT_WRITE */ 1290 PTE_AR_RX|PTE_ED, /* VM_PROT_EXECUTE */ 1291 PTE_AR_RWX|PTE_ED /* VM_PROT_WRITE|VM_PROT_EXECUTE */ 1292 }; 1293 1294 pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED); 1295 pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56; 1296 pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap) 1297 ? PTE_PL_KERN : PTE_PL_USER; 1298 pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1]; 1299} 1300 1301static PMAP_INLINE void 1302pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma) 1303{ 1304 1305 pte->pte &= ~PTE_MA_MASK; 1306 pte->pte |= (ma & PTE_MA_MASK); 1307} 1308 1309/* 1310 * Set a pte to contain a valid mapping and enter it in the VHPT. If 1311 * the pte was orginally valid, then its assumed to already be in the 1312 * VHPT. 1313 * This functions does not set the protection bits. It's expected 1314 * that those have been set correctly prior to calling this function. 1315 */ 1316static void 1317pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa, 1318 boolean_t wired, boolean_t managed) 1319{ 1320 1321 pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK | 1322 PTE_AR_MASK | PTE_ED; 1323 pte->pte |= PTE_PRESENT; 1324 pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED); 1325 pte->pte |= (wired) ? PTE_WIRED : 0; 1326 pte->pte |= pa & PTE_PPN_MASK; 1327 1328 pte->itir = PAGE_SHIFT << 2; 1329 1330 ia64_mf(); 1331 1332 pte->tag = ia64_ttag(va); 1333} 1334 1335/* 1336 * Remove the (possibly managed) mapping represented by pte from the 1337 * given pmap. 1338 */ 1339static int 1340pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va, 1341 pv_entry_t pv, int freepte) 1342{ 1343 int error; 1344 vm_page_t m; 1345 1346 /* 1347 * First remove from the VHPT. 1348 */ 1349 error = pmap_remove_vhpt(va); 1350 KASSERT(error == 0, ("%s: pmap_remove_vhpt returned %d", 1351 __func__, error)); 1352 1353 pmap_invalidate_page(va); 1354 1355 if (pmap_wired(pte)) 1356 pmap->pm_stats.wired_count -= 1; 1357 1358 pmap->pm_stats.resident_count -= 1; 1359 if (pmap_managed(pte)) { 1360 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 1361 if (pmap_dirty(pte)) 1362 vm_page_dirty(m); 1363 if (pmap_accessed(pte)) 1364 vm_page_aflag_set(m, PGA_REFERENCED); 1365 1366 error = pmap_remove_entry(pmap, m, va, pv); 1367 } 1368 if (freepte) 1369 pmap_free_pte(pte, va); 1370 1371 return (error); 1372} 1373 1374/* 1375 * Extract the physical page address associated with a kernel 1376 * virtual address. 1377 */ 1378vm_paddr_t 1379pmap_kextract(vm_offset_t va) 1380{ 1381 struct ia64_lpte *pte; 1382 uint64_t *pbvm_pgtbl; 1383 vm_paddr_t pa; 1384 u_int idx; 1385 1386 CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va); 1387 1388 KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA")); 1389 1390 /* Regions 6 and 7 are direct mapped. */ 1391 if (va >= IA64_RR_BASE(6)) { 1392 pa = IA64_RR_MASK(va); 1393 goto out; 1394 } 1395 1396 /* Region 5 is our KVA. Bail out if the VA is beyond our limits. */ 1397 if (va >= kernel_vm_end) 1398 goto err_out; 1399 if (va >= VM_INIT_KERNEL_ADDRESS) { 1400 pte = pmap_find_kpte(va); 1401 pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0; 1402 goto out; 1403 } 1404 1405 /* The PBVM page table. */ 1406 if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz) 1407 goto err_out; 1408 if (va >= IA64_PBVM_PGTBL) { 1409 pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl; 1410 goto out; 1411 } 1412 1413 /* The PBVM itself. */ 1414 if (va >= IA64_PBVM_BASE) { 1415 pbvm_pgtbl = (void *)IA64_PBVM_PGTBL; 1416 idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT; 1417 if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3)) 1418 goto err_out; 1419 if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0) 1420 goto err_out; 1421 pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) + 1422 (va & IA64_PBVM_PAGE_MASK); 1423 goto out; 1424 } 1425 1426 err_out: 1427 printf("XXX: %s: va=%#lx is invalid\n", __func__, va); 1428 pa = 0; 1429 /* FALLTHROUGH */ 1430 1431 out: 1432 return (pa); 1433} 1434 1435/* 1436 * Add a list of wired pages to the kva this routine is only used for 1437 * temporary kernel mappings that do not need to have page modification 1438 * or references recorded. Note that old mappings are simply written 1439 * over. The page is effectively wired, but it's customary to not have 1440 * the PTE reflect that, nor update statistics. 1441 */ 1442void 1443pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 1444{ 1445 struct ia64_lpte *pte; 1446 int i; 1447 1448 CTR4(KTR_PMAP, "%s(va=%#lx, m_p=%p, cnt=%d)", __func__, va, m, count); 1449 1450 for (i = 0; i < count; i++) { 1451 pte = pmap_find_kpte(va); 1452 if (pmap_present(pte)) 1453 pmap_invalidate_page(va); 1454 else 1455 pmap_enter_vhpt(pte, va); 1456 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL); 1457 pmap_pte_attr(pte, m[i]->md.memattr); 1458 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE); 1459 va += PAGE_SIZE; 1460 } 1461} 1462 1463/* 1464 * this routine jerks page mappings from the 1465 * kernel -- it is meant only for temporary mappings. 1466 */ 1467void 1468pmap_qremove(vm_offset_t va, int count) 1469{ 1470 struct ia64_lpte *pte; 1471 int i; 1472 1473 CTR3(KTR_PMAP, "%s(va=%#lx, cnt=%d)", __func__, va, count); 1474 1475 for (i = 0; i < count; i++) { 1476 pte = pmap_find_kpte(va); 1477 if (pmap_present(pte)) { 1478 pmap_remove_vhpt(va); 1479 pmap_invalidate_page(va); 1480 pmap_clear_present(pte); 1481 } 1482 va += PAGE_SIZE; 1483 } 1484} 1485 1486/* 1487 * Add a wired page to the kva. As for pmap_qenter(), it's customary 1488 * to not have the PTE reflect that, nor update statistics. 1489 */ 1490void 1491pmap_kenter(vm_offset_t va, vm_paddr_t pa) 1492{ 1493 struct ia64_lpte *pte; 1494 1495 CTR3(KTR_PMAP, "%s(va=%#lx, pa=%#lx)", __func__, va, pa); 1496 1497 pte = pmap_find_kpte(va); 1498 if (pmap_present(pte)) 1499 pmap_invalidate_page(va); 1500 else 1501 pmap_enter_vhpt(pte, va); 1502 pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL); 1503 pmap_pte_attr(pte, VM_MEMATTR_DEFAULT); 1504 pmap_set_pte(pte, va, pa, FALSE, FALSE); 1505} 1506 1507/* 1508 * Remove a page from the kva 1509 */ 1510void 1511pmap_kremove(vm_offset_t va) 1512{ 1513 struct ia64_lpte *pte; 1514 1515 CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va); 1516 1517 pte = pmap_find_kpte(va); 1518 if (pmap_present(pte)) { 1519 pmap_remove_vhpt(va); 1520 pmap_invalidate_page(va); 1521 pmap_clear_present(pte); 1522 } 1523} 1524 1525/* 1526 * Used to map a range of physical addresses into kernel 1527 * virtual address space. 1528 * 1529 * The value passed in '*virt' is a suggested virtual address for 1530 * the mapping. Architectures which can support a direct-mapped 1531 * physical to virtual region can return the appropriate address 1532 * within that region, leaving '*virt' unchanged. Other 1533 * architectures should map the pages starting at '*virt' and 1534 * update '*virt' with the first usable address after the mapped 1535 * region. 1536 */ 1537vm_offset_t 1538pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 1539{ 1540 1541 CTR5(KTR_PMAP, "%s(va_p=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__, 1542 virt, start, end, prot); 1543 1544 return IA64_PHYS_TO_RR7(start); 1545} 1546 1547/* 1548 * Remove the given range of addresses from the specified map. 1549 * 1550 * It is assumed that the start and end are properly 1551 * rounded to the page size. 1552 * 1553 * Sparsely used ranges are inefficiently removed. The VHPT is 1554 * probed for every page within the range. XXX 1555 */ 1556void 1557pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1558{ 1559 pmap_t oldpmap; 1560 vm_offset_t va; 1561 struct ia64_lpte *pte; 1562 1563 CTR4(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx)", __func__, pmap, sva, 1564 eva); 1565 1566 /* 1567 * Perform an unsynchronized read. This is, however, safe. 1568 */ 1569 if (pmap->pm_stats.resident_count == 0) 1570 return; 1571 1572 rw_wlock(&pvh_global_lock); 1573 PMAP_LOCK(pmap); 1574 oldpmap = pmap_switch(pmap); 1575 for (va = sva; va < eva; va += PAGE_SIZE) { 1576 pte = pmap_find_vhpt(va); 1577 if (pte != NULL) 1578 pmap_remove_pte(pmap, pte, va, 0, 1); 1579 } 1580 rw_wunlock(&pvh_global_lock); 1581 pmap_switch(oldpmap); 1582 PMAP_UNLOCK(pmap); 1583} 1584 1585/* 1586 * Routine: pmap_remove_all 1587 * Function: 1588 * Removes this physical page from 1589 * all physical maps in which it resides. 1590 * Reflects back modify bits to the pager. 1591 * 1592 * Notes: 1593 * Original versions of this routine were very 1594 * inefficient because they iteratively called 1595 * pmap_remove (slow...) 1596 */ 1597void 1598pmap_remove_all(vm_page_t m) 1599{ 1600 pmap_t oldpmap; 1601 pv_entry_t pv; 1602 1603 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 1604 1605 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1606 ("pmap_remove_all: page %p is not managed", m)); 1607 rw_wlock(&pvh_global_lock); 1608 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1609 struct ia64_lpte *pte; 1610 pmap_t pmap = PV_PMAP(pv); 1611 vm_offset_t va = pv->pv_va; 1612 1613 PMAP_LOCK(pmap); 1614 oldpmap = pmap_switch(pmap); 1615 pte = pmap_find_vhpt(va); 1616 KASSERT(pte != NULL, ("pte")); 1617 if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m)) 1618 panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m)); 1619 pmap_remove_pte(pmap, pte, va, pv, 1); 1620 pmap_switch(oldpmap); 1621 PMAP_UNLOCK(pmap); 1622 } 1623 vm_page_aflag_clear(m, PGA_WRITEABLE); 1624 rw_wunlock(&pvh_global_lock); 1625} 1626 1627/* 1628 * Set the physical protection on the 1629 * specified range of this map as requested. 1630 */ 1631void 1632pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1633{ 1634 pmap_t oldpmap; 1635 struct ia64_lpte *pte; 1636 1637 CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__, 1638 pmap, sva, eva, prot); 1639 1640 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1641 pmap_remove(pmap, sva, eva); 1642 return; 1643 } 1644 1645 if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 1646 (VM_PROT_WRITE|VM_PROT_EXECUTE)) 1647 return; 1648 1649 if ((sva & PAGE_MASK) || (eva & PAGE_MASK)) 1650 panic("pmap_protect: unaligned addresses"); 1651 1652 PMAP_LOCK(pmap); 1653 oldpmap = pmap_switch(pmap); 1654 for ( ; sva < eva; sva += PAGE_SIZE) { 1655 /* If page is invalid, skip this page */ 1656 pte = pmap_find_vhpt(sva); 1657 if (pte == NULL) 1658 continue; 1659 1660 /* If there's no change, skip it too */ 1661 if (pmap_prot(pte) == prot) 1662 continue; 1663 1664 if ((prot & VM_PROT_WRITE) == 0 && 1665 pmap_managed(pte) && pmap_dirty(pte)) { 1666 vm_paddr_t pa = pmap_ppn(pte); 1667 vm_page_t m = PHYS_TO_VM_PAGE(pa); 1668 1669 vm_page_dirty(m); 1670 pmap_clear_dirty(pte); 1671 } 1672 1673 if (prot & VM_PROT_EXECUTE) 1674 ia64_sync_icache(sva, PAGE_SIZE); 1675 1676 pmap_pte_prot(pmap, pte, prot); 1677 pmap_invalidate_page(sva); 1678 } 1679 pmap_switch(oldpmap); 1680 PMAP_UNLOCK(pmap); 1681} 1682 1683/* 1684 * Insert the given physical page (p) at 1685 * the specified virtual address (v) in the 1686 * target physical map with the protection requested. 1687 * 1688 * If specified, the page will be wired down, meaning 1689 * that the related pte can not be reclaimed. 1690 * 1691 * NB: This is the only routine which MAY NOT lazy-evaluate 1692 * or lose information. That is, this routine must actually 1693 * insert this page into the given map NOW. 1694 */ 1695int 1696pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1697 u_int flags, int8_t psind __unused) 1698{ 1699 pmap_t oldpmap; 1700 vm_offset_t pa; 1701 vm_offset_t opa; 1702 struct ia64_lpte origpte; 1703 struct ia64_lpte *pte; 1704 boolean_t icache_inval, managed, wired; 1705 1706 CTR5(KTR_PMAP, "pmap_enter(pm=%p, va=%#lx, m=%p, prot=%#x, " 1707 "flags=%u)", pmap, va, m, prot, flags); 1708 1709 wired = (flags & PMAP_ENTER_WIRED) != 0; 1710 rw_wlock(&pvh_global_lock); 1711 PMAP_LOCK(pmap); 1712 oldpmap = pmap_switch(pmap); 1713 1714 va &= ~PAGE_MASK; 1715 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 1716 KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m), 1717 ("pmap_enter: page %p is not busy", m)); 1718 1719 /* 1720 * Find (or create) a pte for the given mapping. 1721 */ 1722 while ((pte = pmap_find_pte(va)) == NULL) { 1723 pmap_switch(oldpmap); 1724 PMAP_UNLOCK(pmap); 1725 rw_wunlock(&pvh_global_lock); 1726 if ((flags & PMAP_ENTER_NOSLEEP) != 0) 1727 return (KERN_RESOURCE_SHORTAGE); 1728 VM_WAIT; 1729 rw_wlock(&pvh_global_lock); 1730 PMAP_LOCK(pmap); 1731 oldpmap = pmap_switch(pmap); 1732 } 1733 origpte = *pte; 1734 if (!pmap_present(pte)) { 1735 opa = ~0UL; 1736 pmap_enter_vhpt(pte, va); 1737 } else 1738 opa = pmap_ppn(pte); 1739 managed = FALSE; 1740 pa = VM_PAGE_TO_PHYS(m); 1741 1742 icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE; 1743 1744 /* 1745 * Mapping has not changed, must be protection or wiring change. 1746 */ 1747 if (opa == pa) { 1748 /* 1749 * Wiring change, just update stats. We don't worry about 1750 * wiring PT pages as they remain resident as long as there 1751 * are valid mappings in them. Hence, if a user page is wired, 1752 * the PT page will be also. 1753 */ 1754 if (wired && !pmap_wired(&origpte)) 1755 pmap->pm_stats.wired_count++; 1756 else if (!wired && pmap_wired(&origpte)) 1757 pmap->pm_stats.wired_count--; 1758 1759 managed = (pmap_managed(&origpte)) ? TRUE : FALSE; 1760 1761 /* 1762 * We might be turning off write access to the page, 1763 * so we go ahead and sense modify status. Otherwise, 1764 * we can avoid I-cache invalidation if the page 1765 * already allowed execution. 1766 */ 1767 if (managed && pmap_dirty(&origpte)) 1768 vm_page_dirty(m); 1769 else if (pmap_exec(&origpte)) 1770 icache_inval = FALSE; 1771 1772 pmap_invalidate_page(va); 1773 goto validate; 1774 } 1775 1776 /* 1777 * Mapping has changed, invalidate old range and fall 1778 * through to handle validating new mapping. 1779 */ 1780 if (opa != ~0UL) { 1781 pmap_remove_pte(pmap, pte, va, 0, 0); 1782 pmap_enter_vhpt(pte, va); 1783 } 1784 1785 /* 1786 * Enter on the PV list if part of our managed memory. 1787 */ 1788 if ((m->oflags & VPO_UNMANAGED) == 0) { 1789 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 1790 ("pmap_enter: managed mapping within the clean submap")); 1791 pmap_insert_entry(pmap, va, m); 1792 managed = TRUE; 1793 } 1794 1795 /* 1796 * Increment counters 1797 */ 1798 pmap->pm_stats.resident_count++; 1799 if (wired) 1800 pmap->pm_stats.wired_count++; 1801 1802validate: 1803 1804 /* 1805 * Now validate mapping with desired protection/wiring. This 1806 * adds the pte to the VHPT if necessary. 1807 */ 1808 pmap_pte_prot(pmap, pte, prot); 1809 pmap_pte_attr(pte, m->md.memattr); 1810 pmap_set_pte(pte, va, pa, wired, managed); 1811 1812 /* Invalidate the I-cache when needed. */ 1813 if (icache_inval) 1814 ia64_sync_icache(va, PAGE_SIZE); 1815 1816 if ((prot & VM_PROT_WRITE) != 0 && managed) 1817 vm_page_aflag_set(m, PGA_WRITEABLE); 1818 rw_wunlock(&pvh_global_lock); 1819 pmap_switch(oldpmap); 1820 PMAP_UNLOCK(pmap); 1821 return (KERN_SUCCESS); 1822} 1823 1824/* 1825 * Maps a sequence of resident pages belonging to the same object. 1826 * The sequence begins with the given page m_start. This page is 1827 * mapped at the given virtual address start. Each subsequent page is 1828 * mapped at a virtual address that is offset from start by the same 1829 * amount as the page is offset from m_start within the object. The 1830 * last page in the sequence is the page with the largest offset from 1831 * m_start that can be mapped at a virtual address less than the given 1832 * virtual address end. Not every virtual page between start and end 1833 * is mapped; only those for which a resident page exists with the 1834 * corresponding offset from m_start are mapped. 1835 */ 1836void 1837pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 1838 vm_page_t m_start, vm_prot_t prot) 1839{ 1840 pmap_t oldpmap; 1841 vm_page_t m; 1842 vm_pindex_t diff, psize; 1843 1844 CTR6(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, m=%p, prot=%#x)", 1845 __func__, pmap, start, end, m_start, prot); 1846 1847 VM_OBJECT_ASSERT_LOCKED(m_start->object); 1848 1849 psize = atop(end - start); 1850 m = m_start; 1851 rw_wlock(&pvh_global_lock); 1852 PMAP_LOCK(pmap); 1853 oldpmap = pmap_switch(pmap); 1854 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1855 pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot); 1856 m = TAILQ_NEXT(m, listq); 1857 } 1858 rw_wunlock(&pvh_global_lock); 1859 pmap_switch(oldpmap); 1860 PMAP_UNLOCK(pmap); 1861} 1862 1863/* 1864 * this code makes some *MAJOR* assumptions: 1865 * 1. Current pmap & pmap exists. 1866 * 2. Not wired. 1867 * 3. Read access. 1868 * 4. No page table pages. 1869 * but is *MUCH* faster than pmap_enter... 1870 */ 1871void 1872pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 1873{ 1874 pmap_t oldpmap; 1875 1876 CTR5(KTR_PMAP, "%s(pm=%p, va=%#lx, m=%p, prot=%#x)", __func__, pmap, 1877 va, m, prot); 1878 1879 rw_wlock(&pvh_global_lock); 1880 PMAP_LOCK(pmap); 1881 oldpmap = pmap_switch(pmap); 1882 pmap_enter_quick_locked(pmap, va, m, prot); 1883 rw_wunlock(&pvh_global_lock); 1884 pmap_switch(oldpmap); 1885 PMAP_UNLOCK(pmap); 1886} 1887 1888static void 1889pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 1890 vm_prot_t prot) 1891{ 1892 struct ia64_lpte *pte; 1893 boolean_t managed; 1894 1895 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 1896 (m->oflags & VPO_UNMANAGED) != 0, 1897 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 1898 rw_assert(&pvh_global_lock, RA_WLOCKED); 1899 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1900 1901 if ((pte = pmap_find_pte(va)) == NULL) 1902 return; 1903 1904 if (!pmap_present(pte)) { 1905 /* Enter on the PV list if the page is managed. */ 1906 if ((m->oflags & VPO_UNMANAGED) == 0) { 1907 if (!pmap_try_insert_pv_entry(pmap, va, m)) { 1908 pmap_free_pte(pte, va); 1909 return; 1910 } 1911 managed = TRUE; 1912 } else 1913 managed = FALSE; 1914 1915 /* Increment counters. */ 1916 pmap->pm_stats.resident_count++; 1917 1918 /* Initialise with R/O protection and enter into VHPT. */ 1919 pmap_enter_vhpt(pte, va); 1920 pmap_pte_prot(pmap, pte, 1921 prot & (VM_PROT_READ | VM_PROT_EXECUTE)); 1922 pmap_pte_attr(pte, m->md.memattr); 1923 pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed); 1924 1925 if (prot & VM_PROT_EXECUTE) 1926 ia64_sync_icache(va, PAGE_SIZE); 1927 } 1928} 1929 1930/* 1931 * pmap_object_init_pt preloads the ptes for a given object 1932 * into the specified pmap. This eliminates the blast of soft 1933 * faults on process startup and immediately after an mmap. 1934 */ 1935void 1936pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 1937 vm_pindex_t pindex, vm_size_t size) 1938{ 1939 1940 CTR6(KTR_PMAP, "%s(pm=%p, va=%#lx, obj=%p, idx=%lu, sz=%#lx)", 1941 __func__, pmap, addr, object, pindex, size); 1942 1943 VM_OBJECT_ASSERT_WLOCKED(object); 1944 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 1945 ("pmap_object_init_pt: non-device object")); 1946} 1947 1948/* 1949 * Routine: pmap_change_wiring 1950 * Function: Change the wiring attribute for a map/virtual-address 1951 * pair. 1952 * In/out conditions: 1953 * The mapping must already exist in the pmap. 1954 */ 1955void 1956pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 1957{ 1958 pmap_t oldpmap; 1959 struct ia64_lpte *pte; 1960 1961 CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, wired=%u)", __func__, pmap, va, 1962 wired); 1963 1964 PMAP_LOCK(pmap); 1965 oldpmap = pmap_switch(pmap); 1966 1967 pte = pmap_find_vhpt(va); 1968 KASSERT(pte != NULL, ("pte")); 1969 if (wired && !pmap_wired(pte)) { 1970 pmap->pm_stats.wired_count++; 1971 pmap_set_wired(pte); 1972 } else if (!wired && pmap_wired(pte)) { 1973 pmap->pm_stats.wired_count--; 1974 pmap_clear_wired(pte); 1975 } 1976 1977 pmap_switch(oldpmap); 1978 PMAP_UNLOCK(pmap); 1979} 1980 1981/* 1982 * Copy the range specified by src_addr/len 1983 * from the source map to the range dst_addr/len 1984 * in the destination map. 1985 * 1986 * This routine is only advisory and need not do anything. 1987 */ 1988void 1989pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_va, vm_size_t len, 1990 vm_offset_t src_va) 1991{ 1992 1993 CTR6(KTR_PMAP, "%s(dpm=%p, spm=%p, dva=%#lx, sz=%#lx, sva=%#lx)", 1994 __func__, dst_pmap, src_pmap, dst_va, len, src_va); 1995} 1996 1997/* 1998 * pmap_zero_page zeros the specified hardware page by 1999 * mapping it into virtual memory and using bzero to clear 2000 * its contents. 2001 */ 2002void 2003pmap_zero_page(vm_page_t m) 2004{ 2005 void *p; 2006 2007 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2008 2009 p = (void *)pmap_page_to_va(m); 2010 bzero(p, PAGE_SIZE); 2011} 2012 2013/* 2014 * pmap_zero_page_area zeros the specified hardware page by 2015 * mapping it into virtual memory and using bzero to clear 2016 * its contents. 2017 * 2018 * off and size must reside within a single page. 2019 */ 2020void 2021pmap_zero_page_area(vm_page_t m, int off, int size) 2022{ 2023 char *p; 2024 2025 CTR4(KTR_PMAP, "%s(m=%p, ofs=%d, len=%d)", __func__, m, off, size); 2026 2027 p = (void *)pmap_page_to_va(m); 2028 bzero(p + off, size); 2029} 2030 2031/* 2032 * pmap_zero_page_idle zeros the specified hardware page by 2033 * mapping it into virtual memory and using bzero to clear 2034 * its contents. This is for the vm_idlezero process. 2035 */ 2036void 2037pmap_zero_page_idle(vm_page_t m) 2038{ 2039 void *p; 2040 2041 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2042 2043 p = (void *)pmap_page_to_va(m); 2044 bzero(p, PAGE_SIZE); 2045} 2046 2047/* 2048 * pmap_copy_page copies the specified (machine independent) 2049 * page by mapping the page into virtual memory and using 2050 * bcopy to copy the page, one machine dependent page at a 2051 * time. 2052 */ 2053void 2054pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2055{ 2056 void *dst, *src; 2057 2058 CTR3(KTR_PMAP, "%s(sm=%p, dm=%p)", __func__, msrc, mdst); 2059 2060 src = (void *)pmap_page_to_va(msrc); 2061 dst = (void *)pmap_page_to_va(mdst); 2062 bcopy(src, dst, PAGE_SIZE); 2063} 2064 2065void 2066pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2067 vm_offset_t b_offset, int xfersize) 2068{ 2069 void *a_cp, *b_cp; 2070 vm_offset_t a_pg_offset, b_pg_offset; 2071 int cnt; 2072 2073 CTR6(KTR_PMAP, "%s(m0=%p, va0=%#lx, m1=%p, va1=%#lx, sz=%#x)", 2074 __func__, ma, a_offset, mb, b_offset, xfersize); 2075 2076 while (xfersize > 0) { 2077 a_pg_offset = a_offset & PAGE_MASK; 2078 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2079 a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) + 2080 a_pg_offset; 2081 b_pg_offset = b_offset & PAGE_MASK; 2082 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2083 b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) + 2084 b_pg_offset; 2085 bcopy(a_cp, b_cp, cnt); 2086 a_offset += cnt; 2087 b_offset += cnt; 2088 xfersize -= cnt; 2089 } 2090} 2091 2092/* 2093 * Returns true if the pmap's pv is one of the first 2094 * 16 pvs linked to from this page. This count may 2095 * be changed upwards or downwards in the future; it 2096 * is only necessary that true be returned for a small 2097 * subset of pmaps for proper page aging. 2098 */ 2099boolean_t 2100pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2101{ 2102 pv_entry_t pv; 2103 int loops = 0; 2104 boolean_t rv; 2105 2106 CTR3(KTR_PMAP, "%s(pm=%p, m=%p)", __func__, pmap, m); 2107 2108 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2109 ("pmap_page_exists_quick: page %p is not managed", m)); 2110 rv = FALSE; 2111 rw_wlock(&pvh_global_lock); 2112 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2113 if (PV_PMAP(pv) == pmap) { 2114 rv = TRUE; 2115 break; 2116 } 2117 loops++; 2118 if (loops >= 16) 2119 break; 2120 } 2121 rw_wunlock(&pvh_global_lock); 2122 return (rv); 2123} 2124 2125/* 2126 * pmap_page_wired_mappings: 2127 * 2128 * Return the number of managed mappings to the given physical page 2129 * that are wired. 2130 */ 2131int 2132pmap_page_wired_mappings(vm_page_t m) 2133{ 2134 struct ia64_lpte *pte; 2135 pmap_t oldpmap, pmap; 2136 pv_entry_t pv; 2137 int count; 2138 2139 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2140 2141 count = 0; 2142 if ((m->oflags & VPO_UNMANAGED) != 0) 2143 return (count); 2144 rw_wlock(&pvh_global_lock); 2145 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2146 pmap = PV_PMAP(pv); 2147 PMAP_LOCK(pmap); 2148 oldpmap = pmap_switch(pmap); 2149 pte = pmap_find_vhpt(pv->pv_va); 2150 KASSERT(pte != NULL, ("pte")); 2151 if (pmap_wired(pte)) 2152 count++; 2153 pmap_switch(oldpmap); 2154 PMAP_UNLOCK(pmap); 2155 } 2156 rw_wunlock(&pvh_global_lock); 2157 return (count); 2158} 2159 2160/* 2161 * Remove all pages from specified address space 2162 * this aids process exit speeds. Also, this code 2163 * is special cased for current process only, but 2164 * can have the more generic (and slightly slower) 2165 * mode enabled. This is much faster than pmap_remove 2166 * in the case of running down an entire address space. 2167 */ 2168void 2169pmap_remove_pages(pmap_t pmap) 2170{ 2171 struct pv_chunk *pc, *npc; 2172 struct ia64_lpte *pte; 2173 pmap_t oldpmap; 2174 pv_entry_t pv; 2175 vm_offset_t va; 2176 vm_page_t m; 2177 u_long inuse, bitmask; 2178 int allfree, bit, field, idx; 2179 2180 CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap); 2181 2182 rw_wlock(&pvh_global_lock); 2183 PMAP_LOCK(pmap); 2184 oldpmap = pmap_switch(pmap); 2185 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2186 allfree = 1; 2187 for (field = 0; field < _NPCM; field++) { 2188 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2189 while (inuse != 0) { 2190 bit = ffsl(inuse) - 1; 2191 bitmask = 1UL << bit; 2192 idx = field * sizeof(inuse) * NBBY + bit; 2193 pv = &pc->pc_pventry[idx]; 2194 inuse &= ~bitmask; 2195 va = pv->pv_va; 2196 pte = pmap_find_vhpt(va); 2197 KASSERT(pte != NULL, ("pte")); 2198 if (pmap_wired(pte)) { 2199 allfree = 0; 2200 continue; 2201 } 2202 pmap_remove_vhpt(va); 2203 pmap_invalidate_page(va); 2204 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 2205 if (pmap_dirty(pte)) 2206 vm_page_dirty(m); 2207 pmap_free_pte(pte, va); 2208 /* Mark free */ 2209 PV_STAT(pv_entry_frees++); 2210 PV_STAT(pv_entry_spare++); 2211 pv_entry_count--; 2212 pc->pc_map[field] |= bitmask; 2213 pmap->pm_stats.resident_count--; 2214 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2215 if (TAILQ_EMPTY(&m->md.pv_list)) 2216 vm_page_aflag_clear(m, PGA_WRITEABLE); 2217 } 2218 } 2219 if (allfree) { 2220 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2221 free_pv_chunk(pc); 2222 } 2223 } 2224 pmap_switch(oldpmap); 2225 PMAP_UNLOCK(pmap); 2226 rw_wunlock(&pvh_global_lock); 2227} 2228 2229/* 2230 * pmap_ts_referenced: 2231 * 2232 * Return a count of reference bits for a page, clearing those bits. 2233 * It is not necessary for every reference bit to be cleared, but it 2234 * is necessary that 0 only be returned when there are truly no 2235 * reference bits set. 2236 * 2237 * XXX: The exact number of bits to check and clear is a matter that 2238 * should be tested and standardized at some point in the future for 2239 * optimal aging of shared pages. 2240 */ 2241int 2242pmap_ts_referenced(vm_page_t m) 2243{ 2244 struct ia64_lpte *pte; 2245 pmap_t oldpmap, pmap; 2246 pv_entry_t pv; 2247 int count = 0; 2248 2249 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2250 2251 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2252 ("pmap_ts_referenced: page %p is not managed", m)); 2253 rw_wlock(&pvh_global_lock); 2254 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2255 pmap = PV_PMAP(pv); 2256 PMAP_LOCK(pmap); 2257 oldpmap = pmap_switch(pmap); 2258 pte = pmap_find_vhpt(pv->pv_va); 2259 KASSERT(pte != NULL, ("pte")); 2260 if (pmap_accessed(pte)) { 2261 count++; 2262 pmap_clear_accessed(pte); 2263 pmap_invalidate_page(pv->pv_va); 2264 } 2265 pmap_switch(oldpmap); 2266 PMAP_UNLOCK(pmap); 2267 } 2268 rw_wunlock(&pvh_global_lock); 2269 return (count); 2270} 2271 2272/* 2273 * pmap_is_modified: 2274 * 2275 * Return whether or not the specified physical page was modified 2276 * in any physical maps. 2277 */ 2278boolean_t 2279pmap_is_modified(vm_page_t m) 2280{ 2281 struct ia64_lpte *pte; 2282 pmap_t oldpmap, pmap; 2283 pv_entry_t pv; 2284 boolean_t rv; 2285 2286 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2287 2288 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2289 ("pmap_is_modified: page %p is not managed", m)); 2290 rv = FALSE; 2291 2292 /* 2293 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2294 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2295 * is clear, no PTEs can be dirty. 2296 */ 2297 VM_OBJECT_ASSERT_WLOCKED(m->object); 2298 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2299 return (rv); 2300 rw_wlock(&pvh_global_lock); 2301 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2302 pmap = PV_PMAP(pv); 2303 PMAP_LOCK(pmap); 2304 oldpmap = pmap_switch(pmap); 2305 pte = pmap_find_vhpt(pv->pv_va); 2306 pmap_switch(oldpmap); 2307 KASSERT(pte != NULL, ("pte")); 2308 rv = pmap_dirty(pte) ? TRUE : FALSE; 2309 PMAP_UNLOCK(pmap); 2310 if (rv) 2311 break; 2312 } 2313 rw_wunlock(&pvh_global_lock); 2314 return (rv); 2315} 2316 2317/* 2318 * pmap_is_prefaultable: 2319 * 2320 * Return whether or not the specified virtual address is elgible 2321 * for prefault. 2322 */ 2323boolean_t 2324pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2325{ 2326 struct ia64_lpte *pte; 2327 2328 CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, addr); 2329 2330 pte = pmap_find_vhpt(addr); 2331 if (pte != NULL && pmap_present(pte)) 2332 return (FALSE); 2333 return (TRUE); 2334} 2335 2336/* 2337 * pmap_is_referenced: 2338 * 2339 * Return whether or not the specified physical page was referenced 2340 * in any physical maps. 2341 */ 2342boolean_t 2343pmap_is_referenced(vm_page_t m) 2344{ 2345 struct ia64_lpte *pte; 2346 pmap_t oldpmap, pmap; 2347 pv_entry_t pv; 2348 boolean_t rv; 2349 2350 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2351 2352 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2353 ("pmap_is_referenced: page %p is not managed", m)); 2354 rv = FALSE; 2355 rw_wlock(&pvh_global_lock); 2356 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2357 pmap = PV_PMAP(pv); 2358 PMAP_LOCK(pmap); 2359 oldpmap = pmap_switch(pmap); 2360 pte = pmap_find_vhpt(pv->pv_va); 2361 pmap_switch(oldpmap); 2362 KASSERT(pte != NULL, ("pte")); 2363 rv = pmap_accessed(pte) ? TRUE : FALSE; 2364 PMAP_UNLOCK(pmap); 2365 if (rv) 2366 break; 2367 } 2368 rw_wunlock(&pvh_global_lock); 2369 return (rv); 2370} 2371 2372/* 2373 * Apply the given advice to the specified range of addresses within the 2374 * given pmap. Depending on the advice, clear the referenced and/or 2375 * modified flags in each mapping and set the mapped page's dirty field. 2376 */ 2377void 2378pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 2379{ 2380 struct ia64_lpte *pte; 2381 pmap_t oldpmap; 2382 vm_page_t m; 2383 2384 CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, adv=%d)", __func__, 2385 pmap, sva, eva, advice); 2386 2387 PMAP_LOCK(pmap); 2388 oldpmap = pmap_switch(pmap); 2389 for (; sva < eva; sva += PAGE_SIZE) { 2390 /* If page is invalid, skip this page. */ 2391 pte = pmap_find_vhpt(sva); 2392 if (pte == NULL) 2393 continue; 2394 2395 /* If it isn't managed, skip it too. */ 2396 if (!pmap_managed(pte)) 2397 continue; 2398 2399 /* Clear its modified and referenced bits. */ 2400 if (pmap_dirty(pte)) { 2401 if (advice == MADV_DONTNEED) { 2402 /* 2403 * Future calls to pmap_is_modified() can be 2404 * avoided by making the page dirty now. 2405 */ 2406 m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); 2407 vm_page_dirty(m); 2408 } 2409 pmap_clear_dirty(pte); 2410 } else if (!pmap_accessed(pte)) 2411 continue; 2412 pmap_clear_accessed(pte); 2413 pmap_invalidate_page(sva); 2414 } 2415 pmap_switch(oldpmap); 2416 PMAP_UNLOCK(pmap); 2417} 2418 2419/* 2420 * Clear the modify bits on the specified physical page. 2421 */ 2422void 2423pmap_clear_modify(vm_page_t m) 2424{ 2425 struct ia64_lpte *pte; 2426 pmap_t oldpmap, pmap; 2427 pv_entry_t pv; 2428 2429 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2430 2431 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2432 ("pmap_clear_modify: page %p is not managed", m)); 2433 VM_OBJECT_ASSERT_WLOCKED(m->object); 2434 KASSERT(!vm_page_xbusied(m), 2435 ("pmap_clear_modify: page %p is exclusive busied", m)); 2436 2437 /* 2438 * If the page is not PGA_WRITEABLE, then no PTEs can be modified. 2439 * If the object containing the page is locked and the page is not 2440 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 2441 */ 2442 if ((m->aflags & PGA_WRITEABLE) == 0) 2443 return; 2444 rw_wlock(&pvh_global_lock); 2445 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2446 pmap = PV_PMAP(pv); 2447 PMAP_LOCK(pmap); 2448 oldpmap = pmap_switch(pmap); 2449 pte = pmap_find_vhpt(pv->pv_va); 2450 KASSERT(pte != NULL, ("pte")); 2451 if (pmap_dirty(pte)) { 2452 pmap_clear_dirty(pte); 2453 pmap_invalidate_page(pv->pv_va); 2454 } 2455 pmap_switch(oldpmap); 2456 PMAP_UNLOCK(pmap); 2457 } 2458 rw_wunlock(&pvh_global_lock); 2459} 2460 2461/* 2462 * Clear the write and modified bits in each of the given page's mappings. 2463 */ 2464void 2465pmap_remove_write(vm_page_t m) 2466{ 2467 struct ia64_lpte *pte; 2468 pmap_t oldpmap, pmap; 2469 pv_entry_t pv; 2470 vm_prot_t prot; 2471 2472 CTR2(KTR_PMAP, "%s(m=%p)", __func__, m); 2473 2474 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2475 ("pmap_remove_write: page %p is not managed", m)); 2476 2477 /* 2478 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2479 * set by another thread while the object is locked. Thus, 2480 * if PGA_WRITEABLE is clear, no page table entries need updating. 2481 */ 2482 VM_OBJECT_ASSERT_WLOCKED(m->object); 2483 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2484 return; 2485 rw_wlock(&pvh_global_lock); 2486 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2487 pmap = PV_PMAP(pv); 2488 PMAP_LOCK(pmap); 2489 oldpmap = pmap_switch(pmap); 2490 pte = pmap_find_vhpt(pv->pv_va); 2491 KASSERT(pte != NULL, ("pte")); 2492 prot = pmap_prot(pte); 2493 if ((prot & VM_PROT_WRITE) != 0) { 2494 if (pmap_dirty(pte)) { 2495 vm_page_dirty(m); 2496 pmap_clear_dirty(pte); 2497 } 2498 prot &= ~VM_PROT_WRITE; 2499 pmap_pte_prot(pmap, pte, prot); 2500 pmap_pte_attr(pte, m->md.memattr); 2501 pmap_invalidate_page(pv->pv_va); 2502 } 2503 pmap_switch(oldpmap); 2504 PMAP_UNLOCK(pmap); 2505 } 2506 vm_page_aflag_clear(m, PGA_WRITEABLE); 2507 rw_wunlock(&pvh_global_lock); 2508} 2509 2510vm_offset_t 2511pmap_mapdev_priv(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr) 2512{ 2513 static vm_offset_t last_va = 0; 2514 static vm_paddr_t last_pa = ~0UL; 2515 static vm_size_t last_sz = 0; 2516 struct efi_md *md; 2517 2518 if (pa == last_pa && sz == last_sz) 2519 return (last_va); 2520 2521 md = efi_md_find(pa); 2522 if (md == NULL) { 2523 printf("%s: [%#lx..%#lx] not covered by memory descriptor\n", 2524 __func__, pa, pa + sz - 1); 2525 return (IA64_PHYS_TO_RR6(pa)); 2526 } 2527 2528 if (md->md_type == EFI_MD_TYPE_FREE) { 2529 printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa, 2530 pa + sz - 1); 2531 return (0); 2532 } 2533 2534 last_va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) : 2535 IA64_PHYS_TO_RR6(pa); 2536 last_pa = pa; 2537 last_sz = sz; 2538 return (last_va); 2539} 2540 2541/* 2542 * Map a set of physical memory pages into the kernel virtual 2543 * address space. Return a pointer to where it is mapped. This 2544 * routine is intended to be used for mapping device memory, 2545 * NOT real memory. 2546 */ 2547void * 2548pmap_mapdev_attr(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr) 2549{ 2550 vm_offset_t va; 2551 2552 CTR4(KTR_PMAP, "%s(pa=%#lx, sz=%#lx, attr=%#x)", __func__, pa, sz, 2553 attr); 2554 2555 va = pmap_mapdev_priv(pa, sz, attr); 2556 return ((void *)(uintptr_t)va); 2557} 2558 2559/* 2560 * 'Unmap' a range mapped by pmap_mapdev_attr(). 2561 */ 2562void 2563pmap_unmapdev(vm_offset_t va, vm_size_t size) 2564{ 2565 2566 CTR3(KTR_PMAP, "%s(va=%#lx, sz=%#lx)", __func__, va, size); 2567} 2568 2569/* 2570 * Sets the memory attribute for the specified page. 2571 */ 2572static void 2573pmap_page_set_memattr_1(void *arg) 2574{ 2575 struct ia64_pal_result res; 2576 register_t is; 2577 uintptr_t pp = (uintptr_t)arg; 2578 2579 is = intr_disable(); 2580 res = ia64_call_pal_static(pp, 0, 0, 0); 2581 intr_restore(is); 2582} 2583 2584void 2585pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 2586{ 2587 struct ia64_lpte *pte; 2588 pmap_t oldpmap, pmap; 2589 pv_entry_t pv; 2590 void *va; 2591 2592 CTR3(KTR_PMAP, "%s(m=%p, attr=%#x)", __func__, m, ma); 2593 2594 rw_wlock(&pvh_global_lock); 2595 m->md.memattr = ma; 2596 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2597 pmap = PV_PMAP(pv); 2598 PMAP_LOCK(pmap); 2599 oldpmap = pmap_switch(pmap); 2600 pte = pmap_find_vhpt(pv->pv_va); 2601 KASSERT(pte != NULL, ("pte")); 2602 pmap_pte_attr(pte, ma); 2603 pmap_invalidate_page(pv->pv_va); 2604 pmap_switch(oldpmap); 2605 PMAP_UNLOCK(pmap); 2606 } 2607 rw_wunlock(&pvh_global_lock); 2608 2609 if (ma == VM_MEMATTR_UNCACHEABLE) { 2610#ifdef SMP 2611 smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL, 2612 (void *)PAL_PREFETCH_VISIBILITY); 2613#else 2614 pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY); 2615#endif 2616 va = (void *)pmap_page_to_va(m); 2617 critical_enter(); 2618 cpu_flush_dcache(va, PAGE_SIZE); 2619 critical_exit(); 2620#ifdef SMP 2621 smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL, 2622 (void *)PAL_MC_DRAIN); 2623#else 2624 pmap_page_set_memattr_1((void *)PAL_MC_DRAIN); 2625#endif 2626 } 2627} 2628 2629/* 2630 * perform the pmap work for mincore 2631 */ 2632int 2633pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2634{ 2635 pmap_t oldpmap; 2636 struct ia64_lpte *pte, tpte; 2637 vm_paddr_t pa; 2638 int val; 2639 2640 CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, pa_p=%p)", __func__, pmap, addr, 2641 locked_pa); 2642 2643 PMAP_LOCK(pmap); 2644retry: 2645 oldpmap = pmap_switch(pmap); 2646 pte = pmap_find_vhpt(addr); 2647 if (pte != NULL) { 2648 tpte = *pte; 2649 pte = &tpte; 2650 } 2651 pmap_switch(oldpmap); 2652 if (pte == NULL || !pmap_present(pte)) { 2653 val = 0; 2654 goto out; 2655 } 2656 val = MINCORE_INCORE; 2657 if (pmap_dirty(pte)) 2658 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 2659 if (pmap_accessed(pte)) 2660 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 2661 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 2662 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 2663 pmap_managed(pte)) { 2664 pa = pmap_ppn(pte); 2665 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 2666 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 2667 goto retry; 2668 } else 2669out: 2670 PA_UNLOCK_COND(*locked_pa); 2671 PMAP_UNLOCK(pmap); 2672 return (val); 2673} 2674 2675/* 2676 * 2677 */ 2678void 2679pmap_activate(struct thread *td) 2680{ 2681 2682 CTR2(KTR_PMAP, "%s(td=%p)", __func__, td); 2683 2684 pmap_switch(vmspace_pmap(td->td_proc->p_vmspace)); 2685} 2686 2687pmap_t 2688pmap_switch(pmap_t pm) 2689{ 2690 pmap_t prevpm; 2691 int i; 2692 2693 critical_enter(); 2694 prevpm = PCPU_GET(md.current_pmap); 2695 if (prevpm == pm) 2696 goto out; 2697 if (pm == NULL) { 2698 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) { 2699 ia64_set_rr(IA64_RR_BASE(i), 2700 (i << 8)|(PAGE_SHIFT << 2)|1); 2701 } 2702 } else { 2703 for (i = 0; i < IA64_VM_MINKERN_REGION; i++) { 2704 ia64_set_rr(IA64_RR_BASE(i), 2705 (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1); 2706 } 2707 } 2708 PCPU_SET(md.current_pmap, pm); 2709 ia64_srlz_d(); 2710 2711out: 2712 critical_exit(); 2713 return (prevpm); 2714} 2715 2716/* 2717 * 2718 */ 2719void 2720pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2721{ 2722 pmap_t oldpm; 2723 struct ia64_lpte *pte; 2724 vm_offset_t lim; 2725 vm_size_t len; 2726 2727 CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, sz=%#lx)", __func__, pm, va, sz); 2728 2729 sz += va & 31; 2730 va &= ~31; 2731 sz = (sz + 31) & ~31; 2732 2733 PMAP_LOCK(pm); 2734 oldpm = pmap_switch(pm); 2735 while (sz > 0) { 2736 lim = round_page(va); 2737 len = MIN(lim - va, sz); 2738 pte = pmap_find_vhpt(va); 2739 if (pte != NULL && pmap_present(pte)) 2740 ia64_sync_icache(va, len); 2741 va += len; 2742 sz -= len; 2743 } 2744 pmap_switch(oldpm); 2745 PMAP_UNLOCK(pm); 2746} 2747 2748/* 2749 * Increase the starting virtual address of the given mapping if a 2750 * different alignment might result in more superpage mappings. 2751 */ 2752void 2753pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2754 vm_offset_t *addr, vm_size_t size) 2755{ 2756 2757 CTR5(KTR_PMAP, "%s(obj=%p, ofs=%#lx, va_p=%p, sz=%#lx)", __func__, 2758 object, offset, addr, size); 2759} 2760 2761#include "opt_ddb.h" 2762 2763#ifdef DDB 2764 2765#include <ddb/ddb.h> 2766 2767static const char* psnames[] = { 2768 "1B", "2B", "4B", "8B", 2769 "16B", "32B", "64B", "128B", 2770 "256B", "512B", "1K", "2K", 2771 "4K", "8K", "16K", "32K", 2772 "64K", "128K", "256K", "512K", 2773 "1M", "2M", "4M", "8M", 2774 "16M", "32M", "64M", "128M", 2775 "256M", "512M", "1G", "2G" 2776}; 2777 2778static void 2779print_trs(int type) 2780{ 2781 struct ia64_pal_result res; 2782 int i, maxtr; 2783 struct { 2784 pt_entry_t pte; 2785 uint64_t itir; 2786 uint64_t ifa; 2787 struct ia64_rr rr; 2788 } buf; 2789 static const char *manames[] = { 2790 "WB", "bad", "bad", "bad", 2791 "UC", "UCE", "WC", "NaT", 2792 }; 2793 2794 res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0); 2795 if (res.pal_status != 0) { 2796 db_printf("Can't get VM summary\n"); 2797 return; 2798 } 2799 2800 if (type == 0) 2801 maxtr = (res.pal_result[0] >> 40) & 0xff; 2802 else 2803 maxtr = (res.pal_result[0] >> 32) & 0xff; 2804 2805 db_printf("V RID Virtual Page Physical Page PgSz ED AR PL D A MA P KEY\n"); 2806 for (i = 0; i <= maxtr; i++) { 2807 bzero(&buf, sizeof(buf)); 2808 res = ia64_pal_physical(PAL_VM_TR_READ, i, type, 2809 ia64_tpa((uint64_t)&buf)); 2810 if (!(res.pal_result[0] & 1)) 2811 buf.pte &= ~PTE_AR_MASK; 2812 if (!(res.pal_result[0] & 2)) 2813 buf.pte &= ~PTE_PL_MASK; 2814 if (!(res.pal_result[0] & 4)) 2815 pmap_clear_dirty(&buf); 2816 if (!(res.pal_result[0] & 8)) 2817 buf.pte &= ~PTE_MA_MASK; 2818 db_printf("%d %06x %013lx %013lx %4s %d %d %d %d %d %-3s " 2819 "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid, 2820 buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12, 2821 psnames[(buf.itir & ITIR_PS_MASK) >> 2], 2822 (buf.pte & PTE_ED) ? 1 : 0, 2823 (int)(buf.pte & PTE_AR_MASK) >> 9, 2824 (int)(buf.pte & PTE_PL_MASK) >> 7, 2825 (pmap_dirty(&buf)) ? 1 : 0, 2826 (pmap_accessed(&buf)) ? 1 : 0, 2827 manames[(buf.pte & PTE_MA_MASK) >> 2], 2828 (pmap_present(&buf)) ? 1 : 0, 2829 (int)((buf.itir & ITIR_KEY_MASK) >> 8)); 2830 } 2831} 2832 2833DB_COMMAND(itr, db_itr) 2834{ 2835 print_trs(0); 2836} 2837 2838DB_COMMAND(dtr, db_dtr) 2839{ 2840 print_trs(1); 2841} 2842 2843DB_COMMAND(rr, db_rr) 2844{ 2845 int i; 2846 uint64_t t; 2847 struct ia64_rr rr; 2848 2849 printf("RR RID PgSz VE\n"); 2850 for (i = 0; i < 8; i++) { 2851 __asm __volatile ("mov %0=rr[%1]" 2852 : "=r"(t) 2853 : "r"(IA64_RR_BASE(i))); 2854 *(uint64_t *) &rr = t; 2855 printf("%d %06x %4s %d\n", 2856 i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve); 2857 } 2858} 2859 2860DB_COMMAND(thash, db_thash) 2861{ 2862 if (!have_addr) 2863 return; 2864 2865 db_printf("%p\n", (void *) ia64_thash(addr)); 2866} 2867 2868DB_COMMAND(ttag, db_ttag) 2869{ 2870 if (!have_addr) 2871 return; 2872 2873 db_printf("0x%lx\n", ia64_ttag(addr)); 2874} 2875 2876DB_COMMAND(kpte, db_kpte) 2877{ 2878 struct ia64_lpte *pte; 2879 2880 if (!have_addr) { 2881 db_printf("usage: kpte <kva>\n"); 2882 return; 2883 } 2884 if (addr < VM_INIT_KERNEL_ADDRESS) { 2885 db_printf("kpte: error: invalid <kva>\n"); 2886 return; 2887 } 2888 pte = pmap_find_kpte(addr); 2889 db_printf("kpte at %p:\n", pte); 2890 db_printf(" pte =%016lx\n", pte->pte); 2891 db_printf(" itir =%016lx\n", pte->itir); 2892 db_printf(" tag =%016lx\n", pte->tag); 2893 db_printf(" chain=%016lx\n", pte->chain); 2894} 2895 2896#endif 2897