pmap.c revision 310133
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps 39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish 40 */ 41 42/* 43 * Manages physical address maps. 44 * 45 * Since the information managed by this module is 46 * also stored by the logical address mapping module, 47 * this module may throw away valid virtual-to-physical 48 * mappings at almost any time. However, invalidations 49 * of virtual-to-physical mappings must be done as 50 * requested. 51 * 52 * In order to cope with hardware architectures which 53 * make virtual-to-physical map invalidates expensive, 54 * this module may delay invalidate or reduced protection 55 * operations until such time as they are actually 56 * necessary. This module is given full information as 57 * to which processors are currently using which maps, 58 * and to when physical maps must be made correct. 59 */ 60 61#include <sys/cdefs.h> 62__FBSDID("$FreeBSD: stable/10/sys/mips/mips/pmap.c 310133 2016-12-16 01:06:35Z jhb $"); 63 64#include "opt_ddb.h" 65#include "opt_pmap.h" 66 67#include <sys/param.h> 68#include <sys/systm.h> 69#include <sys/lock.h> 70#include <sys/mman.h> 71#include <sys/msgbuf.h> 72#include <sys/mutex.h> 73#include <sys/pcpu.h> 74#include <sys/proc.h> 75#include <sys/rwlock.h> 76#include <sys/sched.h> 77#include <sys/smp.h> 78#include <sys/sysctl.h> 79#include <sys/vmmeter.h> 80 81#ifdef DDB 82#include <ddb/ddb.h> 83#endif 84 85#include <vm/vm.h> 86#include <vm/vm_param.h> 87#include <vm/vm_kern.h> 88#include <vm/vm_page.h> 89#include <vm/vm_map.h> 90#include <vm/vm_object.h> 91#include <vm/vm_extern.h> 92#include <vm/vm_pageout.h> 93#include <vm/vm_pager.h> 94#include <vm/uma.h> 95 96#include <machine/cache.h> 97#include <machine/md_var.h> 98#include <machine/tlb.h> 99 100#undef PMAP_DEBUG 101 102#if !defined(DIAGNOSTIC) 103#define PMAP_INLINE __inline 104#else 105#define PMAP_INLINE 106#endif 107 108#ifdef PV_STATS 109#define PV_STAT(x) do { x ; } while (0) 110#else 111#define PV_STAT(x) do { } while (0) 112#endif 113 114/* 115 * Get PDEs and PTEs for user/kernel address space 116 */ 117#define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1)) 118#define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1)) 119#define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1)) 120#define pmap_pde_pindex(v) ((v) >> PDRSHIFT) 121 122#ifdef __mips_n64 123#define NUPDE (NPDEPG * NPDEPG) 124#define NUSERPGTBLS (NUPDE + NPDEPG) 125#else 126#define NUPDE (NPDEPG) 127#define NUSERPGTBLS (NUPDE) 128#endif 129 130#define is_kernel_pmap(x) ((x) == kernel_pmap) 131 132struct pmap kernel_pmap_store; 133pd_entry_t *kernel_segmap; 134 135vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 136vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 137 138static int nkpt; 139unsigned pmap_max_asid; /* max ASID supported by the system */ 140 141#define PMAP_ASID_RESERVED 0 142 143vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 144 145static void pmap_asid_alloc(pmap_t pmap); 146 147static struct rwlock_padalign pvh_global_lock; 148 149/* 150 * Data for the pv entry allocation mechanism 151 */ 152static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 153static int pv_entry_count; 154 155static void free_pv_chunk(struct pv_chunk *pc); 156static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 157static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); 158static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); 159static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 160static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 161 vm_offset_t va); 162static vm_page_t pmap_alloc_direct_page(unsigned int index, int req); 163static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 164 vm_page_t m, vm_prot_t prot, vm_page_t mpte); 165static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, 166 pd_entry_t pde); 167static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 168static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); 169static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, 170 vm_offset_t va, vm_page_t m); 171static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte); 172static void pmap_invalidate_all(pmap_t pmap); 173static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va); 174static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m); 175 176static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags); 177static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags); 178static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t); 179static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot); 180 181static void pmap_invalidate_page_action(void *arg); 182static void pmap_invalidate_range_action(void *arg); 183static void pmap_update_page_action(void *arg); 184 185#ifndef __mips_n64 186/* 187 * This structure is for high memory (memory above 512Meg in 32 bit) support. 188 * The highmem area does not have a KSEG0 mapping, and we need a mechanism to 189 * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc. 190 * 191 * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To 192 * access a highmem physical address on a CPU, we map the physical address to 193 * the reserved virtual address for the CPU in the kernel pagetable. This is 194 * done with interrupts disabled(although a spinlock and sched_pin would be 195 * sufficient). 196 */ 197struct local_sysmaps { 198 vm_offset_t base; 199 uint32_t saved_intr; 200 uint16_t valid1, valid2; 201}; 202static struct local_sysmaps sysmap_lmem[MAXCPU]; 203 204static __inline void 205pmap_alloc_lmem_map(void) 206{ 207 int i; 208 209 for (i = 0; i < MAXCPU; i++) { 210 sysmap_lmem[i].base = virtual_avail; 211 virtual_avail += PAGE_SIZE * 2; 212 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0; 213 } 214} 215 216static __inline vm_offset_t 217pmap_lmem_map1(vm_paddr_t phys) 218{ 219 struct local_sysmaps *sysm; 220 pt_entry_t *pte, npte; 221 vm_offset_t va; 222 uint32_t intr; 223 int cpu; 224 225 intr = intr_disable(); 226 cpu = PCPU_GET(cpuid); 227 sysm = &sysmap_lmem[cpu]; 228 sysm->saved_intr = intr; 229 va = sysm->base; 230 npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 231 pte = pmap_pte(kernel_pmap, va); 232 *pte = npte; 233 sysm->valid1 = 1; 234 return (va); 235} 236 237static __inline vm_offset_t 238pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 239{ 240 struct local_sysmaps *sysm; 241 pt_entry_t *pte, npte; 242 vm_offset_t va1, va2; 243 uint32_t intr; 244 int cpu; 245 246 intr = intr_disable(); 247 cpu = PCPU_GET(cpuid); 248 sysm = &sysmap_lmem[cpu]; 249 sysm->saved_intr = intr; 250 va1 = sysm->base; 251 va2 = sysm->base + PAGE_SIZE; 252 npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 253 pte = pmap_pte(kernel_pmap, va1); 254 *pte = npte; 255 npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G; 256 pte = pmap_pte(kernel_pmap, va2); 257 *pte = npte; 258 sysm->valid1 = 1; 259 sysm->valid2 = 1; 260 return (va1); 261} 262 263static __inline void 264pmap_lmem_unmap(void) 265{ 266 struct local_sysmaps *sysm; 267 pt_entry_t *pte; 268 int cpu; 269 270 cpu = PCPU_GET(cpuid); 271 sysm = &sysmap_lmem[cpu]; 272 pte = pmap_pte(kernel_pmap, sysm->base); 273 *pte = PTE_G; 274 tlb_invalidate_address(kernel_pmap, sysm->base); 275 sysm->valid1 = 0; 276 if (sysm->valid2) { 277 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE); 278 *pte = PTE_G; 279 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE); 280 sysm->valid2 = 0; 281 } 282 intr_restore(sysm->saved_intr); 283} 284#else /* __mips_n64 */ 285 286static __inline void 287pmap_alloc_lmem_map(void) 288{ 289} 290 291static __inline vm_offset_t 292pmap_lmem_map1(vm_paddr_t phys) 293{ 294 295 return (0); 296} 297 298static __inline vm_offset_t 299pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2) 300{ 301 302 return (0); 303} 304 305static __inline vm_offset_t 306pmap_lmem_unmap(void) 307{ 308 309 return (0); 310} 311#endif /* !__mips_n64 */ 312 313/* 314 * Page table entry lookup routines. 315 */ 316static __inline pd_entry_t * 317pmap_segmap(pmap_t pmap, vm_offset_t va) 318{ 319 320 return (&pmap->pm_segtab[pmap_seg_index(va)]); 321} 322 323#ifdef __mips_n64 324static __inline pd_entry_t * 325pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 326{ 327 pd_entry_t *pde; 328 329 pde = (pd_entry_t *)*pdpe; 330 return (&pde[pmap_pde_index(va)]); 331} 332 333static __inline pd_entry_t * 334pmap_pde(pmap_t pmap, vm_offset_t va) 335{ 336 pd_entry_t *pdpe; 337 338 pdpe = pmap_segmap(pmap, va); 339 if (*pdpe == NULL) 340 return (NULL); 341 342 return (pmap_pdpe_to_pde(pdpe, va)); 343} 344#else 345static __inline pd_entry_t * 346pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va) 347{ 348 349 return (pdpe); 350} 351 352static __inline 353pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va) 354{ 355 356 return (pmap_segmap(pmap, va)); 357} 358#endif 359 360static __inline pt_entry_t * 361pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 362{ 363 pt_entry_t *pte; 364 365 pte = (pt_entry_t *)*pde; 366 return (&pte[pmap_pte_index(va)]); 367} 368 369pt_entry_t * 370pmap_pte(pmap_t pmap, vm_offset_t va) 371{ 372 pd_entry_t *pde; 373 374 pde = pmap_pde(pmap, va); 375 if (pde == NULL || *pde == NULL) 376 return (NULL); 377 378 return (pmap_pde_to_pte(pde, va)); 379} 380 381vm_offset_t 382pmap_steal_memory(vm_size_t size) 383{ 384 vm_paddr_t bank_size, pa; 385 vm_offset_t va; 386 387 size = round_page(size); 388 bank_size = phys_avail[1] - phys_avail[0]; 389 while (size > bank_size) { 390 int i; 391 392 for (i = 0; phys_avail[i + 2]; i += 2) { 393 phys_avail[i] = phys_avail[i + 2]; 394 phys_avail[i + 1] = phys_avail[i + 3]; 395 } 396 phys_avail[i] = 0; 397 phys_avail[i + 1] = 0; 398 if (!phys_avail[0]) 399 panic("pmap_steal_memory: out of memory"); 400 bank_size = phys_avail[1] - phys_avail[0]; 401 } 402 403 pa = phys_avail[0]; 404 phys_avail[0] += size; 405 if (MIPS_DIRECT_MAPPABLE(pa) == 0) 406 panic("Out of memory below 512Meg?"); 407 va = MIPS_PHYS_TO_DIRECT(pa); 408 bzero((caddr_t)va, size); 409 return (va); 410} 411 412/* 413 * Bootstrap the system enough to run with virtual memory. This 414 * assumes that the phys_avail array has been initialized. 415 */ 416static void 417pmap_create_kernel_pagetable(void) 418{ 419 int i, j; 420 vm_offset_t ptaddr; 421 pt_entry_t *pte; 422#ifdef __mips_n64 423 pd_entry_t *pde; 424 vm_offset_t pdaddr; 425 int npt, npde; 426#endif 427 428 /* 429 * Allocate segment table for the kernel 430 */ 431 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE); 432 433 /* 434 * Allocate second level page tables for the kernel 435 */ 436#ifdef __mips_n64 437 npde = howmany(NKPT, NPDEPG); 438 pdaddr = pmap_steal_memory(PAGE_SIZE * npde); 439#endif 440 nkpt = NKPT; 441 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt); 442 443 /* 444 * The R[4-7]?00 stores only one copy of the Global bit in the 445 * translation lookaside buffer for each 2 page entry. Thus invalid 446 * entrys must have the Global bit set so when Entry LO and Entry HI 447 * G bits are anded together they will produce a global bit to store 448 * in the tlb. 449 */ 450 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++) 451 *pte = PTE_G; 452 453#ifdef __mips_n64 454 for (i = 0, npt = nkpt; npt > 0; i++) { 455 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE); 456 pde = (pd_entry_t *)kernel_segmap[i]; 457 458 for (j = 0; j < NPDEPG && npt > 0; j++, npt--) 459 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE); 460 } 461#else 462 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++) 463 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE)); 464#endif 465 466 PMAP_LOCK_INIT(kernel_pmap); 467 kernel_pmap->pm_segtab = kernel_segmap; 468 CPU_FILL(&kernel_pmap->pm_active); 469 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 470 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; 471 kernel_pmap->pm_asid[0].gen = 0; 472 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE; 473} 474 475void 476pmap_bootstrap(void) 477{ 478 int i; 479 int need_local_mappings = 0; 480 481 /* Sort. */ 482again: 483 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 484 /* 485 * Keep the memory aligned on page boundary. 486 */ 487 phys_avail[i] = round_page(phys_avail[i]); 488 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 489 490 if (i < 2) 491 continue; 492 if (phys_avail[i - 2] > phys_avail[i]) { 493 vm_paddr_t ptemp[2]; 494 495 ptemp[0] = phys_avail[i + 0]; 496 ptemp[1] = phys_avail[i + 1]; 497 498 phys_avail[i + 0] = phys_avail[i - 2]; 499 phys_avail[i + 1] = phys_avail[i - 1]; 500 501 phys_avail[i - 2] = ptemp[0]; 502 phys_avail[i - 1] = ptemp[1]; 503 goto again; 504 } 505 } 506 507 /* 508 * In 32 bit, we may have memory which cannot be mapped directly. 509 * This memory will need temporary mapping before it can be 510 * accessed. 511 */ 512 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1)) 513 need_local_mappings = 1; 514 515 /* 516 * Copy the phys_avail[] array before we start stealing memory from it. 517 */ 518 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 519 physmem_desc[i] = phys_avail[i]; 520 physmem_desc[i + 1] = phys_avail[i + 1]; 521 } 522 523 Maxmem = atop(phys_avail[i - 1]); 524 525 if (bootverbose) { 526 printf("Physical memory chunk(s):\n"); 527 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 528 vm_paddr_t size; 529 530 size = phys_avail[i + 1] - phys_avail[i]; 531 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", 532 (uintmax_t) phys_avail[i], 533 (uintmax_t) phys_avail[i + 1] - 1, 534 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE); 535 } 536 printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem)); 537 } 538 /* 539 * Steal the message buffer from the beginning of memory. 540 */ 541 msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize); 542 msgbufinit(msgbufp, msgbufsize); 543 544 /* 545 * Steal thread0 kstack. 546 */ 547 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT); 548 549 virtual_avail = VM_MIN_KERNEL_ADDRESS; 550 virtual_end = VM_MAX_KERNEL_ADDRESS; 551 552#ifdef SMP 553 /* 554 * Steal some virtual address space to map the pcpu area. 555 */ 556 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2); 557 pcpup = (struct pcpu *)virtual_avail; 558 virtual_avail += PAGE_SIZE * 2; 559 560 /* 561 * Initialize the wired TLB entry mapping the pcpu region for 562 * the BSP at 'pcpup'. Up until this point we were operating 563 * with the 'pcpup' for the BSP pointing to a virtual address 564 * in KSEG0 so there was no need for a TLB mapping. 565 */ 566 mips_pcpu_tlb_init(PCPU_ADDR(0)); 567 568 if (bootverbose) 569 printf("pcpu is available at virtual address %p.\n", pcpup); 570#endif 571 572 if (need_local_mappings) 573 pmap_alloc_lmem_map(); 574 pmap_create_kernel_pagetable(); 575 pmap_max_asid = VMNUM_PIDS; 576 mips_wr_entryhi(0); 577 mips_wr_pagemask(0); 578 579 /* 580 * Initialize the global pv list lock. 581 */ 582 rw_init(&pvh_global_lock, "pmap pv global"); 583} 584 585/* 586 * Initialize a vm_page's machine-dependent fields. 587 */ 588void 589pmap_page_init(vm_page_t m) 590{ 591 592 TAILQ_INIT(&m->md.pv_list); 593 m->md.pv_flags = 0; 594} 595 596/* 597 * Initialize the pmap module. 598 * Called by vm_init, to initialize any structures that the pmap 599 * system needs to map virtual memory. 600 */ 601void 602pmap_init(void) 603{ 604} 605 606/*************************************************** 607 * Low level helper routines..... 608 ***************************************************/ 609 610#ifdef SMP 611static __inline void 612pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) 613{ 614 int cpuid, cpu, self; 615 cpuset_t active_cpus; 616 617 sched_pin(); 618 if (is_kernel_pmap(pmap)) { 619 smp_rendezvous(NULL, fn, NULL, arg); 620 goto out; 621 } 622 /* Force ASID update on inactive CPUs */ 623 CPU_FOREACH(cpu) { 624 if (!CPU_ISSET(cpu, &pmap->pm_active)) 625 pmap->pm_asid[cpu].gen = 0; 626 } 627 cpuid = PCPU_GET(cpuid); 628 /* 629 * XXX: barrier/locking for active? 630 * 631 * Take a snapshot of active here, any further changes are ignored. 632 * tlb update/invalidate should be harmless on inactive CPUs 633 */ 634 active_cpus = pmap->pm_active; 635 self = CPU_ISSET(cpuid, &active_cpus); 636 CPU_CLR(cpuid, &active_cpus); 637 /* Optimize for the case where this cpu is the only active one */ 638 if (CPU_EMPTY(&active_cpus)) { 639 if (self) 640 fn(arg); 641 } else { 642 if (self) 643 CPU_SET(cpuid, &active_cpus); 644 smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg); 645 } 646out: 647 sched_unpin(); 648} 649#else /* !SMP */ 650static __inline void 651pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) 652{ 653 int cpuid; 654 655 if (is_kernel_pmap(pmap)) { 656 fn(arg); 657 return; 658 } 659 cpuid = PCPU_GET(cpuid); 660 if (!CPU_ISSET(cpuid, &pmap->pm_active)) 661 pmap->pm_asid[cpuid].gen = 0; 662 else 663 fn(arg); 664} 665#endif /* SMP */ 666 667static void 668pmap_invalidate_all(pmap_t pmap) 669{ 670 671 pmap_call_on_active_cpus(pmap, 672 (void (*)(void *))tlb_invalidate_all_user, pmap); 673} 674 675struct pmap_invalidate_page_arg { 676 pmap_t pmap; 677 vm_offset_t va; 678}; 679 680static void 681pmap_invalidate_page_action(void *arg) 682{ 683 struct pmap_invalidate_page_arg *p = arg; 684 685 tlb_invalidate_address(p->pmap, p->va); 686} 687 688static void 689pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 690{ 691 struct pmap_invalidate_page_arg arg; 692 693 arg.pmap = pmap; 694 arg.va = va; 695 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg); 696} 697 698struct pmap_invalidate_range_arg { 699 pmap_t pmap; 700 vm_offset_t sva; 701 vm_offset_t eva; 702}; 703 704static void 705pmap_invalidate_range_action(void *arg) 706{ 707 struct pmap_invalidate_range_arg *p = arg; 708 709 tlb_invalidate_range(p->pmap, p->sva, p->eva); 710} 711 712static void 713pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 714{ 715 struct pmap_invalidate_range_arg arg; 716 717 arg.pmap = pmap; 718 arg.sva = sva; 719 arg.eva = eva; 720 pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg); 721} 722 723struct pmap_update_page_arg { 724 pmap_t pmap; 725 vm_offset_t va; 726 pt_entry_t pte; 727}; 728 729static void 730pmap_update_page_action(void *arg) 731{ 732 struct pmap_update_page_arg *p = arg; 733 734 tlb_update(p->pmap, p->va, p->pte); 735} 736 737static void 738pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) 739{ 740 struct pmap_update_page_arg arg; 741 742 arg.pmap = pmap; 743 arg.va = va; 744 arg.pte = pte; 745 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg); 746} 747 748/* 749 * Routine: pmap_extract 750 * Function: 751 * Extract the physical page address associated 752 * with the given map/virtual_address pair. 753 */ 754vm_paddr_t 755pmap_extract(pmap_t pmap, vm_offset_t va) 756{ 757 pt_entry_t *pte; 758 vm_offset_t retval = 0; 759 760 PMAP_LOCK(pmap); 761 pte = pmap_pte(pmap, va); 762 if (pte) { 763 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK); 764 } 765 PMAP_UNLOCK(pmap); 766 return (retval); 767} 768 769/* 770 * Routine: pmap_extract_and_hold 771 * Function: 772 * Atomically extract and hold the physical page 773 * with the given pmap and virtual address pair 774 * if that mapping permits the given protection. 775 */ 776vm_page_t 777pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 778{ 779 pt_entry_t pte, *ptep; 780 vm_paddr_t pa, pte_pa; 781 vm_page_t m; 782 783 m = NULL; 784 pa = 0; 785 PMAP_LOCK(pmap); 786retry: 787 ptep = pmap_pte(pmap, va); 788 if (ptep != NULL) { 789 pte = *ptep; 790 if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) || 791 (prot & VM_PROT_WRITE) == 0)) { 792 pte_pa = TLBLO_PTE_TO_PA(pte); 793 if (vm_page_pa_tryrelock(pmap, pte_pa, &pa)) 794 goto retry; 795 m = PHYS_TO_VM_PAGE(pte_pa); 796 vm_page_hold(m); 797 } 798 } 799 PA_UNLOCK_COND(pa); 800 PMAP_UNLOCK(pmap); 801 return (m); 802} 803 804/*************************************************** 805 * Low level mapping routines..... 806 ***************************************************/ 807 808/* 809 * add a wired page to the kva 810 */ 811void 812pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr) 813{ 814 pt_entry_t *pte; 815 pt_entry_t opte, npte; 816 817#ifdef PMAP_DEBUG 818 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 819#endif 820 821 pte = pmap_pte(kernel_pmap, va); 822 opte = *pte; 823 npte = TLBLO_PA_TO_PFN(pa) | attr | PTE_D | PTE_V | PTE_G; 824 *pte = npte; 825 if (pte_test(&opte, PTE_V) && opte != npte) 826 pmap_update_page(kernel_pmap, va, npte); 827} 828 829void 830pmap_kenter(vm_offset_t va, vm_paddr_t pa) 831{ 832 833 KASSERT(is_cacheable_mem(pa), 834 ("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa)); 835 836 pmap_kenter_attr(va, pa, PTE_C_CACHE); 837} 838 839/* 840 * remove a page from the kernel pagetables 841 */ 842 /* PMAP_INLINE */ void 843pmap_kremove(vm_offset_t va) 844{ 845 pt_entry_t *pte; 846 847 /* 848 * Write back all caches from the page being destroyed 849 */ 850 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 851 852 pte = pmap_pte(kernel_pmap, va); 853 *pte = PTE_G; 854 pmap_invalidate_page(kernel_pmap, va); 855} 856 857/* 858 * Used to map a range of physical addresses into kernel 859 * virtual address space. 860 * 861 * The value passed in '*virt' is a suggested virtual address for 862 * the mapping. Architectures which can support a direct-mapped 863 * physical to virtual region can return the appropriate address 864 * within that region, leaving '*virt' unchanged. Other 865 * architectures should map the pages starting at '*virt' and 866 * update '*virt' with the first usable address after the mapped 867 * region. 868 * 869 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 870 */ 871vm_offset_t 872pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 873{ 874 vm_offset_t va, sva; 875 876 if (MIPS_DIRECT_MAPPABLE(end - 1)) 877 return (MIPS_PHYS_TO_DIRECT(start)); 878 879 va = sva = *virt; 880 while (start < end) { 881 pmap_kenter(va, start); 882 va += PAGE_SIZE; 883 start += PAGE_SIZE; 884 } 885 *virt = va; 886 return (sva); 887} 888 889/* 890 * Add a list of wired pages to the kva 891 * this routine is only used for temporary 892 * kernel mappings that do not need to have 893 * page modification or references recorded. 894 * Note that old mappings are simply written 895 * over. The page *must* be wired. 896 */ 897void 898pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 899{ 900 int i; 901 vm_offset_t origva = va; 902 903 for (i = 0; i < count; i++) { 904 pmap_flush_pvcache(m[i]); 905 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i])); 906 va += PAGE_SIZE; 907 } 908 909 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count); 910} 911 912/* 913 * this routine jerks page mappings from the 914 * kernel -- it is meant only for temporary mappings. 915 */ 916void 917pmap_qremove(vm_offset_t va, int count) 918{ 919 pt_entry_t *pte; 920 vm_offset_t origva; 921 922 if (count < 1) 923 return; 924 mips_dcache_wbinv_range_index(va, PAGE_SIZE * count); 925 origva = va; 926 do { 927 pte = pmap_pte(kernel_pmap, va); 928 *pte = PTE_G; 929 va += PAGE_SIZE; 930 } while (--count > 0); 931 pmap_invalidate_range(kernel_pmap, origva, va); 932} 933 934/*************************************************** 935 * Page table page management routines..... 936 ***************************************************/ 937 938/* 939 * Decrements a page table page's wire count, which is used to record the 940 * number of valid page table entries within the page. If the wire count 941 * drops to zero, then the page table page is unmapped. Returns TRUE if the 942 * page table page was unmapped and FALSE otherwise. 943 */ 944static PMAP_INLINE boolean_t 945pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) 946{ 947 948 --m->wire_count; 949 if (m->wire_count == 0) { 950 _pmap_unwire_ptp(pmap, va, m); 951 return (TRUE); 952 } else 953 return (FALSE); 954} 955 956static void 957_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m) 958{ 959 pd_entry_t *pde; 960 961 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 962 /* 963 * unmap the page table page 964 */ 965#ifdef __mips_n64 966 if (m->pindex < NUPDE) 967 pde = pmap_pde(pmap, va); 968 else 969 pde = pmap_segmap(pmap, va); 970#else 971 pde = pmap_pde(pmap, va); 972#endif 973 *pde = 0; 974 pmap->pm_stats.resident_count--; 975 976#ifdef __mips_n64 977 if (m->pindex < NUPDE) { 978 pd_entry_t *pdp; 979 vm_page_t pdpg; 980 981 /* 982 * Recursively decrement next level pagetable refcount 983 */ 984 pdp = (pd_entry_t *)*pmap_segmap(pmap, va); 985 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp)); 986 pmap_unwire_ptp(pmap, va, pdpg); 987 } 988#endif 989 990 /* 991 * If the page is finally unwired, simply free it. 992 */ 993 vm_page_free_zero(m); 994 atomic_subtract_int(&cnt.v_wire_count, 1); 995} 996 997/* 998 * After removing a page table entry, this routine is used to 999 * conditionally free the page, and manage the hold/wire counts. 1000 */ 1001static int 1002pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde) 1003{ 1004 vm_page_t mpte; 1005 1006 if (va >= VM_MAXUSER_ADDRESS) 1007 return (0); 1008 KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0")); 1009 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde)); 1010 return (pmap_unwire_ptp(pmap, va, mpte)); 1011} 1012 1013void 1014pmap_pinit0(pmap_t pmap) 1015{ 1016 int i; 1017 1018 PMAP_LOCK_INIT(pmap); 1019 pmap->pm_segtab = kernel_segmap; 1020 CPU_ZERO(&pmap->pm_active); 1021 for (i = 0; i < MAXCPU; i++) { 1022 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1023 pmap->pm_asid[i].gen = 0; 1024 } 1025 PCPU_SET(curpmap, pmap); 1026 TAILQ_INIT(&pmap->pm_pvchunk); 1027 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1028} 1029 1030void 1031pmap_grow_direct_page_cache() 1032{ 1033 1034#ifdef __mips_n64 1035 vm_pageout_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS); 1036#else 1037 vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); 1038#endif 1039} 1040 1041static vm_page_t 1042pmap_alloc_direct_page(unsigned int index, int req) 1043{ 1044 vm_page_t m; 1045 1046 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED | 1047 VM_ALLOC_ZERO); 1048 if (m == NULL) 1049 return (NULL); 1050 1051 if ((m->flags & PG_ZERO) == 0) 1052 pmap_zero_page(m); 1053 1054 m->pindex = index; 1055 return (m); 1056} 1057 1058/* 1059 * Initialize a preallocated and zeroed pmap structure, 1060 * such as one in a vmspace structure. 1061 */ 1062int 1063pmap_pinit(pmap_t pmap) 1064{ 1065 vm_offset_t ptdva; 1066 vm_page_t ptdpg; 1067 int i; 1068 1069 /* 1070 * allocate the page directory page 1071 */ 1072 while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) 1073 pmap_grow_direct_page_cache(); 1074 1075 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); 1076 pmap->pm_segtab = (pd_entry_t *)ptdva; 1077 CPU_ZERO(&pmap->pm_active); 1078 for (i = 0; i < MAXCPU; i++) { 1079 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; 1080 pmap->pm_asid[i].gen = 0; 1081 } 1082 TAILQ_INIT(&pmap->pm_pvchunk); 1083 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1084 1085 return (1); 1086} 1087 1088/* 1089 * this routine is called if the page table page is not 1090 * mapped correctly. 1091 */ 1092static vm_page_t 1093_pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags) 1094{ 1095 vm_offset_t pageva; 1096 vm_page_t m; 1097 1098 /* 1099 * Find or fabricate a new pagetable page 1100 */ 1101 if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { 1102 if ((flags & PMAP_ENTER_NOSLEEP) == 0) { 1103 PMAP_UNLOCK(pmap); 1104 rw_wunlock(&pvh_global_lock); 1105 pmap_grow_direct_page_cache(); 1106 rw_wlock(&pvh_global_lock); 1107 PMAP_LOCK(pmap); 1108 } 1109 1110 /* 1111 * Indicate the need to retry. While waiting, the page 1112 * table page may have been allocated. 1113 */ 1114 return (NULL); 1115 } 1116 1117 /* 1118 * Map the pagetable page into the process address space, if it 1119 * isn't already there. 1120 */ 1121 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1122 1123#ifdef __mips_n64 1124 if (ptepindex >= NUPDE) { 1125 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva; 1126 } else { 1127 pd_entry_t *pdep, *pde; 1128 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT); 1129 int pdeindex = ptepindex & (NPDEPG - 1); 1130 vm_page_t pg; 1131 1132 pdep = &pmap->pm_segtab[segindex]; 1133 if (*pdep == NULL) { 1134 /* recurse for allocating page dir */ 1135 if (_pmap_allocpte(pmap, NUPDE + segindex, 1136 flags) == NULL) { 1137 /* alloc failed, release current */ 1138 --m->wire_count; 1139 atomic_subtract_int(&cnt.v_wire_count, 1); 1140 vm_page_free_zero(m); 1141 return (NULL); 1142 } 1143 } else { 1144 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep)); 1145 pg->wire_count++; 1146 } 1147 /* Next level entry */ 1148 pde = (pd_entry_t *)*pdep; 1149 pde[pdeindex] = (pd_entry_t)pageva; 1150 } 1151#else 1152 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva; 1153#endif 1154 pmap->pm_stats.resident_count++; 1155 return (m); 1156} 1157 1158static vm_page_t 1159pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags) 1160{ 1161 unsigned ptepindex; 1162 pd_entry_t *pde; 1163 vm_page_t m; 1164 1165 /* 1166 * Calculate pagetable page index 1167 */ 1168 ptepindex = pmap_pde_pindex(va); 1169retry: 1170 /* 1171 * Get the page directory entry 1172 */ 1173 pde = pmap_pde(pmap, va); 1174 1175 /* 1176 * If the page table page is mapped, we just increment the hold 1177 * count, and activate it. 1178 */ 1179 if (pde != NULL && *pde != NULL) { 1180 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde)); 1181 m->wire_count++; 1182 } else { 1183 /* 1184 * Here if the pte page isn't mapped, or if it has been 1185 * deallocated. 1186 */ 1187 m = _pmap_allocpte(pmap, ptepindex, flags); 1188 if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) 1189 goto retry; 1190 } 1191 return (m); 1192} 1193 1194 1195/*************************************************** 1196 * Pmap allocation/deallocation routines. 1197 ***************************************************/ 1198 1199/* 1200 * Release any resources held by the given physical map. 1201 * Called when a pmap initialized by pmap_pinit is being released. 1202 * Should only be called if the map contains no valid mappings. 1203 */ 1204void 1205pmap_release(pmap_t pmap) 1206{ 1207 vm_offset_t ptdva; 1208 vm_page_t ptdpg; 1209 1210 KASSERT(pmap->pm_stats.resident_count == 0, 1211 ("pmap_release: pmap resident count %ld != 0", 1212 pmap->pm_stats.resident_count)); 1213 1214 ptdva = (vm_offset_t)pmap->pm_segtab; 1215 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva)); 1216 1217 ptdpg->wire_count--; 1218 atomic_subtract_int(&cnt.v_wire_count, 1); 1219 vm_page_free_zero(ptdpg); 1220} 1221 1222/* 1223 * grow the number of kernel page table entries, if needed 1224 */ 1225void 1226pmap_growkernel(vm_offset_t addr) 1227{ 1228 vm_page_t nkpg; 1229 pd_entry_t *pde, *pdpe; 1230 pt_entry_t *pte; 1231 int i; 1232 1233 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1234 addr = roundup2(addr, NBSEG); 1235 if (addr - 1 >= kernel_map->max_offset) 1236 addr = kernel_map->max_offset; 1237 while (kernel_vm_end < addr) { 1238 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end); 1239#ifdef __mips_n64 1240 if (*pdpe == 0) { 1241 /* new intermediate page table entry */ 1242 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); 1243 if (nkpg == NULL) 1244 panic("pmap_growkernel: no memory to grow kernel"); 1245 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1246 continue; /* try again */ 1247 } 1248#endif 1249 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); 1250 if (*pde != 0) { 1251 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1252 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1253 kernel_vm_end = kernel_map->max_offset; 1254 break; 1255 } 1256 continue; 1257 } 1258 1259 /* 1260 * This index is bogus, but out of the way 1261 */ 1262 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); 1263 if (!nkpg) 1264 panic("pmap_growkernel: no memory to grow kernel"); 1265 nkpt++; 1266 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); 1267 1268 /* 1269 * The R[4-7]?00 stores only one copy of the Global bit in 1270 * the translation lookaside buffer for each 2 page entry. 1271 * Thus invalid entrys must have the Global bit set so when 1272 * Entry LO and Entry HI G bits are anded together they will 1273 * produce a global bit to store in the tlb. 1274 */ 1275 pte = (pt_entry_t *)*pde; 1276 for (i = 0; i < NPTEPG; i++) 1277 pte[i] = PTE_G; 1278 1279 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1280 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1281 kernel_vm_end = kernel_map->max_offset; 1282 break; 1283 } 1284 } 1285} 1286 1287/*************************************************** 1288 * page management routines. 1289 ***************************************************/ 1290 1291CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1292#ifdef __mips_n64 1293CTASSERT(_NPCM == 3); 1294CTASSERT(_NPCPV == 168); 1295#else 1296CTASSERT(_NPCM == 11); 1297CTASSERT(_NPCPV == 336); 1298#endif 1299 1300static __inline struct pv_chunk * 1301pv_to_chunk(pv_entry_t pv) 1302{ 1303 1304 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1305} 1306 1307#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1308 1309#ifdef __mips_n64 1310#define PC_FREE0_1 0xfffffffffffffffful 1311#define PC_FREE2 0x000000fffffffffful 1312#else 1313#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1314#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1315#endif 1316 1317static const u_long pc_freemask[_NPCM] = { 1318#ifdef __mips_n64 1319 PC_FREE0_1, PC_FREE0_1, PC_FREE2 1320#else 1321 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1322 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1323 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1324 PC_FREE0_9, PC_FREE10 1325#endif 1326}; 1327 1328static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 1329 1330SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1331 "Current number of pv entries"); 1332 1333#ifdef PV_STATS 1334static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1335 1336SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1337 "Current number of pv entry chunks"); 1338SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1339 "Current number of pv entry chunks allocated"); 1340SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1341 "Current number of pv entry chunks frees"); 1342SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1343 "Number of times tried to get a chunk page but failed."); 1344 1345static long pv_entry_frees, pv_entry_allocs; 1346static int pv_entry_spare; 1347 1348SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1349 "Current number of pv entry frees"); 1350SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1351 "Current number of pv entry allocs"); 1352SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1353 "Current number of spare pv entries"); 1354#endif 1355 1356/* 1357 * We are in a serious low memory condition. Resort to 1358 * drastic measures to free some pages so we can allocate 1359 * another pv entry chunk. 1360 */ 1361static vm_page_t 1362pmap_pv_reclaim(pmap_t locked_pmap) 1363{ 1364 struct pch newtail; 1365 struct pv_chunk *pc; 1366 pd_entry_t *pde; 1367 pmap_t pmap; 1368 pt_entry_t *pte, oldpte; 1369 pv_entry_t pv; 1370 vm_offset_t va; 1371 vm_page_t m, m_pc; 1372 u_long inuse; 1373 int bit, field, freed, idx; 1374 1375 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1376 pmap = NULL; 1377 m_pc = NULL; 1378 TAILQ_INIT(&newtail); 1379 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) { 1380 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1381 if (pmap != pc->pc_pmap) { 1382 if (pmap != NULL) { 1383 pmap_invalidate_all(pmap); 1384 if (pmap != locked_pmap) 1385 PMAP_UNLOCK(pmap); 1386 } 1387 pmap = pc->pc_pmap; 1388 /* Avoid deadlock and lock recursion. */ 1389 if (pmap > locked_pmap) 1390 PMAP_LOCK(pmap); 1391 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 1392 pmap = NULL; 1393 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1394 continue; 1395 } 1396 } 1397 1398 /* 1399 * Destroy every non-wired, 4 KB page mapping in the chunk. 1400 */ 1401 freed = 0; 1402 for (field = 0; field < _NPCM; field++) { 1403 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1404 inuse != 0; inuse &= ~(1UL << bit)) { 1405 bit = ffsl(inuse) - 1; 1406 idx = field * sizeof(inuse) * NBBY + bit; 1407 pv = &pc->pc_pventry[idx]; 1408 va = pv->pv_va; 1409 pde = pmap_pde(pmap, va); 1410 KASSERT(pde != NULL && *pde != 0, 1411 ("pmap_pv_reclaim: pde")); 1412 pte = pmap_pde_to_pte(pde, va); 1413 oldpte = *pte; 1414 if (pte_test(&oldpte, PTE_W)) 1415 continue; 1416 if (is_kernel_pmap(pmap)) 1417 *pte = PTE_G; 1418 else 1419 *pte = 0; 1420 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte)); 1421 if (pte_test(&oldpte, PTE_D)) 1422 vm_page_dirty(m); 1423 if (m->md.pv_flags & PV_TABLE_REF) 1424 vm_page_aflag_set(m, PGA_REFERENCED); 1425 m->md.pv_flags &= ~PV_TABLE_REF; 1426 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1427 if (TAILQ_EMPTY(&m->md.pv_list)) 1428 vm_page_aflag_clear(m, PGA_WRITEABLE); 1429 pc->pc_map[field] |= 1UL << bit; 1430 pmap_unuse_pt(pmap, va, *pde); 1431 freed++; 1432 } 1433 } 1434 if (freed == 0) { 1435 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1436 continue; 1437 } 1438 /* Every freed mapping is for a 4 KB page. */ 1439 pmap->pm_stats.resident_count -= freed; 1440 PV_STAT(pv_entry_frees += freed); 1441 PV_STAT(pv_entry_spare += freed); 1442 pv_entry_count -= freed; 1443 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1444 for (field = 0; field < _NPCM; field++) 1445 if (pc->pc_map[field] != pc_freemask[field]) { 1446 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 1447 pc_list); 1448 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 1449 1450 /* 1451 * One freed pv entry in locked_pmap is 1452 * sufficient. 1453 */ 1454 if (pmap == locked_pmap) 1455 goto out; 1456 break; 1457 } 1458 if (field == _NPCM) { 1459 PV_STAT(pv_entry_spare -= _NPCPV); 1460 PV_STAT(pc_chunk_count--); 1461 PV_STAT(pc_chunk_frees++); 1462 /* Entire chunk is free; return it. */ 1463 m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS( 1464 (vm_offset_t)pc)); 1465 break; 1466 } 1467 } 1468out: 1469 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 1470 if (pmap != NULL) { 1471 pmap_invalidate_all(pmap); 1472 if (pmap != locked_pmap) 1473 PMAP_UNLOCK(pmap); 1474 } 1475 return (m_pc); 1476} 1477 1478/* 1479 * free the pv_entry back to the free list 1480 */ 1481static void 1482free_pv_entry(pmap_t pmap, pv_entry_t pv) 1483{ 1484 struct pv_chunk *pc; 1485 int bit, field, idx; 1486 1487 rw_assert(&pvh_global_lock, RA_WLOCKED); 1488 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1489 PV_STAT(pv_entry_frees++); 1490 PV_STAT(pv_entry_spare++); 1491 pv_entry_count--; 1492 pc = pv_to_chunk(pv); 1493 idx = pv - &pc->pc_pventry[0]; 1494 field = idx / (sizeof(u_long) * NBBY); 1495 bit = idx % (sizeof(u_long) * NBBY); 1496 pc->pc_map[field] |= 1ul << bit; 1497 for (idx = 0; idx < _NPCM; idx++) 1498 if (pc->pc_map[idx] != pc_freemask[idx]) { 1499 /* 1500 * 98% of the time, pc is already at the head of the 1501 * list. If it isn't already, move it to the head. 1502 */ 1503 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 1504 pc)) { 1505 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1506 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 1507 pc_list); 1508 } 1509 return; 1510 } 1511 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1512 free_pv_chunk(pc); 1513} 1514 1515static void 1516free_pv_chunk(struct pv_chunk *pc) 1517{ 1518 vm_page_t m; 1519 1520 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1521 PV_STAT(pv_entry_spare -= _NPCPV); 1522 PV_STAT(pc_chunk_count--); 1523 PV_STAT(pc_chunk_frees++); 1524 /* entire chunk is free, return it */ 1525 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc)); 1526 vm_page_unwire(m, 0); 1527 vm_page_free(m); 1528} 1529 1530/* 1531 * get a new pv_entry, allocating a block from the system 1532 * when needed. 1533 */ 1534static pv_entry_t 1535get_pv_entry(pmap_t pmap, boolean_t try) 1536{ 1537 struct pv_chunk *pc; 1538 pv_entry_t pv; 1539 vm_page_t m; 1540 int bit, field, idx; 1541 1542 rw_assert(&pvh_global_lock, RA_WLOCKED); 1543 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1544 PV_STAT(pv_entry_allocs++); 1545 pv_entry_count++; 1546retry: 1547 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1548 if (pc != NULL) { 1549 for (field = 0; field < _NPCM; field++) { 1550 if (pc->pc_map[field]) { 1551 bit = ffsl(pc->pc_map[field]) - 1; 1552 break; 1553 } 1554 } 1555 if (field < _NPCM) { 1556 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; 1557 pv = &pc->pc_pventry[idx]; 1558 pc->pc_map[field] &= ~(1ul << bit); 1559 /* If this was the last item, move it to tail */ 1560 for (field = 0; field < _NPCM; field++) 1561 if (pc->pc_map[field] != 0) { 1562 PV_STAT(pv_entry_spare--); 1563 return (pv); /* not full, return */ 1564 } 1565 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1566 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 1567 PV_STAT(pv_entry_spare--); 1568 return (pv); 1569 } 1570 } 1571 /* No free items, allocate another chunk */ 1572 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL | 1573 VM_ALLOC_WIRED); 1574 if (m == NULL) { 1575 if (try) { 1576 pv_entry_count--; 1577 PV_STAT(pc_chunk_tryfail++); 1578 return (NULL); 1579 } 1580 m = pmap_pv_reclaim(pmap); 1581 if (m == NULL) 1582 goto retry; 1583 } 1584 PV_STAT(pc_chunk_count++); 1585 PV_STAT(pc_chunk_allocs++); 1586 pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)); 1587 pc->pc_pmap = pmap; 1588 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 1589 for (field = 1; field < _NPCM; field++) 1590 pc->pc_map[field] = pc_freemask[field]; 1591 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1592 pv = &pc->pc_pventry[0]; 1593 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1594 PV_STAT(pv_entry_spare += _NPCPV - 1); 1595 return (pv); 1596} 1597 1598static pv_entry_t 1599pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1600{ 1601 pv_entry_t pv; 1602 1603 rw_assert(&pvh_global_lock, RA_WLOCKED); 1604 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 1605 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1606 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 1607 break; 1608 } 1609 } 1610 return (pv); 1611} 1612 1613static void 1614pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1615{ 1616 pv_entry_t pv; 1617 1618 pv = pmap_pvh_remove(pvh, pmap, va); 1619 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx", 1620 (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)), 1621 (u_long)va)); 1622 free_pv_entry(pmap, pv); 1623} 1624 1625static void 1626pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1627{ 1628 1629 rw_assert(&pvh_global_lock, RA_WLOCKED); 1630 pmap_pvh_free(&m->md, pmap, va); 1631 if (TAILQ_EMPTY(&m->md.pv_list)) 1632 vm_page_aflag_clear(m, PGA_WRITEABLE); 1633} 1634 1635/* 1636 * Conditionally create a pv entry. 1637 */ 1638static boolean_t 1639pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va, 1640 vm_page_t m) 1641{ 1642 pv_entry_t pv; 1643 1644 rw_assert(&pvh_global_lock, RA_WLOCKED); 1645 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1646 if ((pv = get_pv_entry(pmap, TRUE)) != NULL) { 1647 pv->pv_va = va; 1648 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1649 return (TRUE); 1650 } else 1651 return (FALSE); 1652} 1653 1654/* 1655 * pmap_remove_pte: do the things to unmap a page in a process 1656 */ 1657static int 1658pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va, 1659 pd_entry_t pde) 1660{ 1661 pt_entry_t oldpte; 1662 vm_page_t m; 1663 vm_paddr_t pa; 1664 1665 rw_assert(&pvh_global_lock, RA_WLOCKED); 1666 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1667 1668 /* 1669 * Write back all cache lines from the page being unmapped. 1670 */ 1671 mips_dcache_wbinv_range_index(va, PAGE_SIZE); 1672 1673 oldpte = *ptq; 1674 if (is_kernel_pmap(pmap)) 1675 *ptq = PTE_G; 1676 else 1677 *ptq = 0; 1678 1679 if (pte_test(&oldpte, PTE_W)) 1680 pmap->pm_stats.wired_count -= 1; 1681 1682 pmap->pm_stats.resident_count -= 1; 1683 1684 if (pte_test(&oldpte, PTE_MANAGED)) { 1685 pa = TLBLO_PTE_TO_PA(oldpte); 1686 m = PHYS_TO_VM_PAGE(pa); 1687 if (pte_test(&oldpte, PTE_D)) { 1688 KASSERT(!pte_test(&oldpte, PTE_RO), 1689 ("%s: modified page not writable: va: %p, pte: %#jx", 1690 __func__, (void *)va, (uintmax_t)oldpte)); 1691 vm_page_dirty(m); 1692 } 1693 if (m->md.pv_flags & PV_TABLE_REF) 1694 vm_page_aflag_set(m, PGA_REFERENCED); 1695 m->md.pv_flags &= ~PV_TABLE_REF; 1696 1697 pmap_remove_entry(pmap, m, va); 1698 } 1699 return (pmap_unuse_pt(pmap, va, pde)); 1700} 1701 1702/* 1703 * Remove a single page from a process address space 1704 */ 1705static void 1706pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1707{ 1708 pd_entry_t *pde; 1709 pt_entry_t *ptq; 1710 1711 rw_assert(&pvh_global_lock, RA_WLOCKED); 1712 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1713 pde = pmap_pde(pmap, va); 1714 if (pde == NULL || *pde == 0) 1715 return; 1716 ptq = pmap_pde_to_pte(pde, va); 1717 1718 /* 1719 * If there is no pte for this address, just skip it! 1720 */ 1721 if (!pte_test(ptq, PTE_V)) 1722 return; 1723 1724 (void)pmap_remove_pte(pmap, ptq, va, *pde); 1725 pmap_invalidate_page(pmap, va); 1726} 1727 1728/* 1729 * Remove the given range of addresses from the specified map. 1730 * 1731 * It is assumed that the start and end are properly 1732 * rounded to the page size. 1733 */ 1734void 1735pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1736{ 1737 pd_entry_t *pde, *pdpe; 1738 pt_entry_t *pte; 1739 vm_offset_t va, va_next; 1740 1741 /* 1742 * Perform an unsynchronized read. This is, however, safe. 1743 */ 1744 if (pmap->pm_stats.resident_count == 0) 1745 return; 1746 1747 rw_wlock(&pvh_global_lock); 1748 PMAP_LOCK(pmap); 1749 1750 /* 1751 * special handling of removing one page. a very common operation 1752 * and easy to short circuit some code. 1753 */ 1754 if ((sva + PAGE_SIZE) == eva) { 1755 pmap_remove_page(pmap, sva); 1756 goto out; 1757 } 1758 for (; sva < eva; sva = va_next) { 1759 pdpe = pmap_segmap(pmap, sva); 1760#ifdef __mips_n64 1761 if (*pdpe == 0) { 1762 va_next = (sva + NBSEG) & ~SEGMASK; 1763 if (va_next < sva) 1764 va_next = eva; 1765 continue; 1766 } 1767#endif 1768 va_next = (sva + NBPDR) & ~PDRMASK; 1769 if (va_next < sva) 1770 va_next = eva; 1771 1772 pde = pmap_pdpe_to_pde(pdpe, sva); 1773 if (*pde == NULL) 1774 continue; 1775 1776 /* 1777 * Limit our scan to either the end of the va represented 1778 * by the current page table page, or to the end of the 1779 * range being removed. 1780 */ 1781 if (va_next > eva) 1782 va_next = eva; 1783 1784 va = va_next; 1785 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1786 sva += PAGE_SIZE) { 1787 if (!pte_test(pte, PTE_V)) { 1788 if (va != va_next) { 1789 pmap_invalidate_range(pmap, va, sva); 1790 va = va_next; 1791 } 1792 continue; 1793 } 1794 if (va == va_next) 1795 va = sva; 1796 if (pmap_remove_pte(pmap, pte, sva, *pde)) { 1797 sva += PAGE_SIZE; 1798 break; 1799 } 1800 } 1801 if (va != va_next) 1802 pmap_invalidate_range(pmap, va, sva); 1803 } 1804out: 1805 rw_wunlock(&pvh_global_lock); 1806 PMAP_UNLOCK(pmap); 1807} 1808 1809/* 1810 * Routine: pmap_remove_all 1811 * Function: 1812 * Removes this physical page from 1813 * all physical maps in which it resides. 1814 * Reflects back modify bits to the pager. 1815 * 1816 * Notes: 1817 * Original versions of this routine were very 1818 * inefficient because they iteratively called 1819 * pmap_remove (slow...) 1820 */ 1821 1822void 1823pmap_remove_all(vm_page_t m) 1824{ 1825 pv_entry_t pv; 1826 pmap_t pmap; 1827 pd_entry_t *pde; 1828 pt_entry_t *pte, tpte; 1829 1830 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1831 ("pmap_remove_all: page %p is not managed", m)); 1832 rw_wlock(&pvh_global_lock); 1833 1834 if (m->md.pv_flags & PV_TABLE_REF) 1835 vm_page_aflag_set(m, PGA_REFERENCED); 1836 1837 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1838 pmap = PV_PMAP(pv); 1839 PMAP_LOCK(pmap); 1840 1841 /* 1842 * If it's last mapping writeback all caches from 1843 * the page being destroyed 1844 */ 1845 if (TAILQ_NEXT(pv, pv_list) == NULL) 1846 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 1847 1848 pmap->pm_stats.resident_count--; 1849 1850 pde = pmap_pde(pmap, pv->pv_va); 1851 KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde")); 1852 pte = pmap_pde_to_pte(pde, pv->pv_va); 1853 1854 tpte = *pte; 1855 if (is_kernel_pmap(pmap)) 1856 *pte = PTE_G; 1857 else 1858 *pte = 0; 1859 1860 if (pte_test(&tpte, PTE_W)) 1861 pmap->pm_stats.wired_count--; 1862 1863 /* 1864 * Update the vm_page_t clean and reference bits. 1865 */ 1866 if (pte_test(&tpte, PTE_D)) { 1867 KASSERT(!pte_test(&tpte, PTE_RO), 1868 ("%s: modified page not writable: va: %p, pte: %#jx", 1869 __func__, (void *)pv->pv_va, (uintmax_t)tpte)); 1870 vm_page_dirty(m); 1871 } 1872 pmap_invalidate_page(pmap, pv->pv_va); 1873 1874 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1875 pmap_unuse_pt(pmap, pv->pv_va, *pde); 1876 free_pv_entry(pmap, pv); 1877 PMAP_UNLOCK(pmap); 1878 } 1879 1880 vm_page_aflag_clear(m, PGA_WRITEABLE); 1881 m->md.pv_flags &= ~PV_TABLE_REF; 1882 rw_wunlock(&pvh_global_lock); 1883} 1884 1885/* 1886 * Set the physical protection on the 1887 * specified range of this map as requested. 1888 */ 1889void 1890pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1891{ 1892 pt_entry_t pbits, *pte; 1893 pd_entry_t *pde, *pdpe; 1894 vm_offset_t va, va_next; 1895 vm_paddr_t pa; 1896 vm_page_t m; 1897 1898 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1899 pmap_remove(pmap, sva, eva); 1900 return; 1901 } 1902 if (prot & VM_PROT_WRITE) 1903 return; 1904 1905 PMAP_LOCK(pmap); 1906 for (; sva < eva; sva = va_next) { 1907 pdpe = pmap_segmap(pmap, sva); 1908#ifdef __mips_n64 1909 if (*pdpe == 0) { 1910 va_next = (sva + NBSEG) & ~SEGMASK; 1911 if (va_next < sva) 1912 va_next = eva; 1913 continue; 1914 } 1915#endif 1916 va_next = (sva + NBPDR) & ~PDRMASK; 1917 if (va_next < sva) 1918 va_next = eva; 1919 1920 pde = pmap_pdpe_to_pde(pdpe, sva); 1921 if (*pde == NULL) 1922 continue; 1923 1924 /* 1925 * Limit our scan to either the end of the va represented 1926 * by the current page table page, or to the end of the 1927 * range being write protected. 1928 */ 1929 if (va_next > eva) 1930 va_next = eva; 1931 1932 va = va_next; 1933 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1934 sva += PAGE_SIZE) { 1935 pbits = *pte; 1936 if (!pte_test(&pbits, PTE_V) || pte_test(&pbits, 1937 PTE_RO)) { 1938 if (va != va_next) { 1939 pmap_invalidate_range(pmap, va, sva); 1940 va = va_next; 1941 } 1942 continue; 1943 } 1944 pte_set(&pbits, PTE_RO); 1945 if (pte_test(&pbits, PTE_D)) { 1946 pte_clear(&pbits, PTE_D); 1947 if (pte_test(&pbits, PTE_MANAGED)) { 1948 pa = TLBLO_PTE_TO_PA(pbits); 1949 m = PHYS_TO_VM_PAGE(pa); 1950 vm_page_dirty(m); 1951 } 1952 if (va == va_next) 1953 va = sva; 1954 } else { 1955 /* 1956 * Unless PTE_D is set, any TLB entries 1957 * mapping "sva" don't allow write access, so 1958 * they needn't be invalidated. 1959 */ 1960 if (va != va_next) { 1961 pmap_invalidate_range(pmap, va, sva); 1962 va = va_next; 1963 } 1964 } 1965 *pte = pbits; 1966 } 1967 if (va != va_next) 1968 pmap_invalidate_range(pmap, va, sva); 1969 } 1970 PMAP_UNLOCK(pmap); 1971} 1972 1973/* 1974 * Insert the given physical page (p) at 1975 * the specified virtual address (v) in the 1976 * target physical map with the protection requested. 1977 * 1978 * If specified, the page will be wired down, meaning 1979 * that the related pte can not be reclaimed. 1980 * 1981 * NB: This is the only routine which MAY NOT lazy-evaluate 1982 * or lose information. That is, this routine must actually 1983 * insert this page into the given map NOW. 1984 */ 1985int 1986pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1987 u_int flags, int8_t psind __unused) 1988{ 1989 vm_paddr_t pa, opa; 1990 pt_entry_t *pte; 1991 pt_entry_t origpte, newpte; 1992 pv_entry_t pv; 1993 vm_page_t mpte, om; 1994 1995 va &= ~PAGE_MASK; 1996 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 1997 KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || 1998 va >= kmi.clean_eva, 1999 ("pmap_enter: managed mapping within the clean submap")); 2000 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2001 VM_OBJECT_ASSERT_LOCKED(m->object); 2002 pa = VM_PAGE_TO_PHYS(m); 2003 newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, flags, prot); 2004 if ((flags & PMAP_ENTER_WIRED) != 0) 2005 newpte |= PTE_W; 2006 if (is_kernel_pmap(pmap)) 2007 newpte |= PTE_G; 2008 if (is_cacheable_mem(pa)) 2009 newpte |= PTE_C_CACHE; 2010 else 2011 newpte |= PTE_C_UNCACHED; 2012 2013 mpte = NULL; 2014 2015 rw_wlock(&pvh_global_lock); 2016 PMAP_LOCK(pmap); 2017 2018 /* 2019 * In the case that a page table page is not resident, we are 2020 * creating it here. 2021 */ 2022 if (va < VM_MAXUSER_ADDRESS) { 2023 mpte = pmap_allocpte(pmap, va, flags); 2024 if (mpte == NULL) { 2025 KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, 2026 ("pmap_allocpte failed with sleep allowed")); 2027 rw_wunlock(&pvh_global_lock); 2028 PMAP_UNLOCK(pmap); 2029 return (KERN_RESOURCE_SHORTAGE); 2030 } 2031 } 2032 pte = pmap_pte(pmap, va); 2033 2034 /* 2035 * Page Directory table entry not valid, we need a new PT page 2036 */ 2037 if (pte == NULL) { 2038 panic("pmap_enter: invalid page directory, pdir=%p, va=%p", 2039 (void *)pmap->pm_segtab, (void *)va); 2040 } 2041 om = NULL; 2042 origpte = *pte; 2043 opa = TLBLO_PTE_TO_PA(origpte); 2044 2045 /* 2046 * Mapping has not changed, must be protection or wiring change. 2047 */ 2048 if (pte_test(&origpte, PTE_V) && opa == pa) { 2049 /* 2050 * Wiring change, just update stats. We don't worry about 2051 * wiring PT pages as they remain resident as long as there 2052 * are valid mappings in them. Hence, if a user page is 2053 * wired, the PT page will be also. 2054 */ 2055 if (pte_test(&newpte, PTE_W) && !pte_test(&origpte, PTE_W)) 2056 pmap->pm_stats.wired_count++; 2057 else if (!pte_test(&newpte, PTE_W) && pte_test(&origpte, 2058 PTE_W)) 2059 pmap->pm_stats.wired_count--; 2060 2061 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO), 2062 ("%s: modified page not writable: va: %p, pte: %#jx", 2063 __func__, (void *)va, (uintmax_t)origpte)); 2064 2065 /* 2066 * Remove extra pte reference 2067 */ 2068 if (mpte) 2069 mpte->wire_count--; 2070 2071 if (pte_test(&origpte, PTE_MANAGED)) { 2072 m->md.pv_flags |= PV_TABLE_REF; 2073 om = m; 2074 newpte |= PTE_MANAGED; 2075 if (!pte_test(&newpte, PTE_RO)) 2076 vm_page_aflag_set(m, PGA_WRITEABLE); 2077 } 2078 goto validate; 2079 } 2080 2081 pv = NULL; 2082 2083 /* 2084 * Mapping has changed, invalidate old range and fall through to 2085 * handle validating new mapping. 2086 */ 2087 if (opa) { 2088 if (pte_test(&origpte, PTE_W)) 2089 pmap->pm_stats.wired_count--; 2090 2091 if (pte_test(&origpte, PTE_MANAGED)) { 2092 om = PHYS_TO_VM_PAGE(opa); 2093 pv = pmap_pvh_remove(&om->md, pmap, va); 2094 } 2095 if (mpte != NULL) { 2096 mpte->wire_count--; 2097 KASSERT(mpte->wire_count > 0, 2098 ("pmap_enter: missing reference to page table page," 2099 " va: %p", (void *)va)); 2100 } 2101 } else 2102 pmap->pm_stats.resident_count++; 2103 2104 /* 2105 * Enter on the PV list if part of our managed memory. 2106 */ 2107 if ((m->oflags & VPO_UNMANAGED) == 0) { 2108 m->md.pv_flags |= PV_TABLE_REF; 2109 if (pv == NULL) 2110 pv = get_pv_entry(pmap, FALSE); 2111 pv->pv_va = va; 2112 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2113 newpte |= PTE_MANAGED; 2114 if (!pte_test(&newpte, PTE_RO)) 2115 vm_page_aflag_set(m, PGA_WRITEABLE); 2116 } else if (pv != NULL) 2117 free_pv_entry(pmap, pv); 2118 2119 /* 2120 * Increment counters 2121 */ 2122 if (pte_test(&newpte, PTE_W)) 2123 pmap->pm_stats.wired_count++; 2124 2125validate: 2126 2127#ifdef PMAP_DEBUG 2128 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa); 2129#endif 2130 2131 /* 2132 * if the mapping or permission bits are different, we need to 2133 * update the pte. 2134 */ 2135 if (origpte != newpte) { 2136 *pte = newpte; 2137 if (pte_test(&origpte, PTE_V)) { 2138 if (pte_test(&origpte, PTE_MANAGED) && opa != pa) { 2139 if (om->md.pv_flags & PV_TABLE_REF) 2140 vm_page_aflag_set(om, PGA_REFERENCED); 2141 om->md.pv_flags &= ~PV_TABLE_REF; 2142 } 2143 if (pte_test(&origpte, PTE_D)) { 2144 KASSERT(!pte_test(&origpte, PTE_RO), 2145 ("pmap_enter: modified page not writable:" 2146 " va: %p, pte: %#jx", (void *)va, (uintmax_t)origpte)); 2147 if (pte_test(&origpte, PTE_MANAGED)) 2148 vm_page_dirty(om); 2149 } 2150 if (pte_test(&origpte, PTE_MANAGED) && 2151 TAILQ_EMPTY(&om->md.pv_list)) 2152 vm_page_aflag_clear(om, PGA_WRITEABLE); 2153 pmap_update_page(pmap, va, newpte); 2154 } 2155 } 2156 2157 /* 2158 * Sync I & D caches for executable pages. Do this only if the 2159 * target pmap belongs to the current process. Otherwise, an 2160 * unresolvable TLB miss may occur. 2161 */ 2162 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) && 2163 (prot & VM_PROT_EXECUTE)) { 2164 mips_icache_sync_range(va, PAGE_SIZE); 2165 mips_dcache_wbinv_range(va, PAGE_SIZE); 2166 } 2167 rw_wunlock(&pvh_global_lock); 2168 PMAP_UNLOCK(pmap); 2169 return (KERN_SUCCESS); 2170} 2171 2172/* 2173 * this code makes some *MAJOR* assumptions: 2174 * 1. Current pmap & pmap exists. 2175 * 2. Not wired. 2176 * 3. Read access. 2177 * 4. No page table pages. 2178 * but is *MUCH* faster than pmap_enter... 2179 */ 2180 2181void 2182pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2183{ 2184 2185 rw_wlock(&pvh_global_lock); 2186 PMAP_LOCK(pmap); 2187 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 2188 rw_wunlock(&pvh_global_lock); 2189 PMAP_UNLOCK(pmap); 2190} 2191 2192static vm_page_t 2193pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2194 vm_prot_t prot, vm_page_t mpte) 2195{ 2196 pt_entry_t *pte; 2197 vm_paddr_t pa; 2198 2199 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2200 (m->oflags & VPO_UNMANAGED) != 0, 2201 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2202 rw_assert(&pvh_global_lock, RA_WLOCKED); 2203 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2204 2205 /* 2206 * In the case that a page table page is not resident, we are 2207 * creating it here. 2208 */ 2209 if (va < VM_MAXUSER_ADDRESS) { 2210 pd_entry_t *pde; 2211 unsigned ptepindex; 2212 2213 /* 2214 * Calculate pagetable page index 2215 */ 2216 ptepindex = pmap_pde_pindex(va); 2217 if (mpte && (mpte->pindex == ptepindex)) { 2218 mpte->wire_count++; 2219 } else { 2220 /* 2221 * Get the page directory entry 2222 */ 2223 pde = pmap_pde(pmap, va); 2224 2225 /* 2226 * If the page table page is mapped, we just 2227 * increment the hold count, and activate it. 2228 */ 2229 if (pde && *pde != 0) { 2230 mpte = PHYS_TO_VM_PAGE( 2231 MIPS_DIRECT_TO_PHYS(*pde)); 2232 mpte->wire_count++; 2233 } else { 2234 mpte = _pmap_allocpte(pmap, ptepindex, 2235 PMAP_ENTER_NOSLEEP); 2236 if (mpte == NULL) 2237 return (mpte); 2238 } 2239 } 2240 } else { 2241 mpte = NULL; 2242 } 2243 2244 pte = pmap_pte(pmap, va); 2245 if (pte_test(pte, PTE_V)) { 2246 if (mpte != NULL) { 2247 mpte->wire_count--; 2248 mpte = NULL; 2249 } 2250 return (mpte); 2251 } 2252 2253 /* 2254 * Enter on the PV list if part of our managed memory. 2255 */ 2256 if ((m->oflags & VPO_UNMANAGED) == 0 && 2257 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) { 2258 if (mpte != NULL) { 2259 pmap_unwire_ptp(pmap, va, mpte); 2260 mpte = NULL; 2261 } 2262 return (mpte); 2263 } 2264 2265 /* 2266 * Increment counters 2267 */ 2268 pmap->pm_stats.resident_count++; 2269 2270 pa = VM_PAGE_TO_PHYS(m); 2271 2272 /* 2273 * Now validate mapping with RO protection 2274 */ 2275 *pte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V; 2276 if ((m->oflags & VPO_UNMANAGED) == 0) 2277 *pte |= PTE_MANAGED; 2278 2279 if (is_cacheable_mem(pa)) 2280 *pte |= PTE_C_CACHE; 2281 else 2282 *pte |= PTE_C_UNCACHED; 2283 2284 if (is_kernel_pmap(pmap)) 2285 *pte |= PTE_G; 2286 else { 2287 /* 2288 * Sync I & D caches. Do this only if the target pmap 2289 * belongs to the current process. Otherwise, an 2290 * unresolvable TLB miss may occur. */ 2291 if (pmap == &curproc->p_vmspace->vm_pmap) { 2292 va &= ~PAGE_MASK; 2293 mips_icache_sync_range(va, PAGE_SIZE); 2294 mips_dcache_wbinv_range(va, PAGE_SIZE); 2295 } 2296 } 2297 return (mpte); 2298} 2299 2300/* 2301 * Make a temporary mapping for a physical address. This is only intended 2302 * to be used for panic dumps. 2303 * 2304 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2305 */ 2306void * 2307pmap_kenter_temporary(vm_paddr_t pa, int i) 2308{ 2309 vm_offset_t va; 2310 2311 if (i != 0) 2312 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n", 2313 __func__); 2314 2315 if (MIPS_DIRECT_MAPPABLE(pa)) { 2316 va = MIPS_PHYS_TO_DIRECT(pa); 2317 } else { 2318#ifndef __mips_n64 /* XXX : to be converted to new style */ 2319 int cpu; 2320 register_t intr; 2321 struct local_sysmaps *sysm; 2322 pt_entry_t *pte, npte; 2323 2324 /* If this is used other than for dumps, we may need to leave 2325 * interrupts disasbled on return. If crash dumps don't work when 2326 * we get to this point, we might want to consider this (leaving things 2327 * disabled as a starting point ;-) 2328 */ 2329 intr = intr_disable(); 2330 cpu = PCPU_GET(cpuid); 2331 sysm = &sysmap_lmem[cpu]; 2332 /* Since this is for the debugger, no locks or any other fun */ 2333 npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V | 2334 PTE_G; 2335 pte = pmap_pte(kernel_pmap, sysm->base); 2336 *pte = npte; 2337 sysm->valid1 = 1; 2338 pmap_update_page(kernel_pmap, sysm->base, npte); 2339 va = sysm->base; 2340 intr_restore(intr); 2341#endif 2342 } 2343 return ((void *)va); 2344} 2345 2346void 2347pmap_kenter_temporary_free(vm_paddr_t pa) 2348{ 2349#ifndef __mips_n64 /* XXX : to be converted to new style */ 2350 int cpu; 2351 register_t intr; 2352 struct local_sysmaps *sysm; 2353#endif 2354 2355 if (MIPS_DIRECT_MAPPABLE(pa)) { 2356 /* nothing to do for this case */ 2357 return; 2358 } 2359#ifndef __mips_n64 /* XXX : to be converted to new style */ 2360 cpu = PCPU_GET(cpuid); 2361 sysm = &sysmap_lmem[cpu]; 2362 if (sysm->valid1) { 2363 pt_entry_t *pte; 2364 2365 intr = intr_disable(); 2366 pte = pmap_pte(kernel_pmap, sysm->base); 2367 *pte = PTE_G; 2368 pmap_invalidate_page(kernel_pmap, sysm->base); 2369 intr_restore(intr); 2370 sysm->valid1 = 0; 2371 } 2372#endif 2373} 2374 2375/* 2376 * Maps a sequence of resident pages belonging to the same object. 2377 * The sequence begins with the given page m_start. This page is 2378 * mapped at the given virtual address start. Each subsequent page is 2379 * mapped at a virtual address that is offset from start by the same 2380 * amount as the page is offset from m_start within the object. The 2381 * last page in the sequence is the page with the largest offset from 2382 * m_start that can be mapped at a virtual address less than the given 2383 * virtual address end. Not every virtual page between start and end 2384 * is mapped; only those for which a resident page exists with the 2385 * corresponding offset from m_start are mapped. 2386 */ 2387void 2388pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2389 vm_page_t m_start, vm_prot_t prot) 2390{ 2391 vm_page_t m, mpte; 2392 vm_pindex_t diff, psize; 2393 2394 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2395 2396 psize = atop(end - start); 2397 mpte = NULL; 2398 m = m_start; 2399 rw_wlock(&pvh_global_lock); 2400 PMAP_LOCK(pmap); 2401 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2402 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, 2403 prot, mpte); 2404 m = TAILQ_NEXT(m, listq); 2405 } 2406 rw_wunlock(&pvh_global_lock); 2407 PMAP_UNLOCK(pmap); 2408} 2409 2410/* 2411 * pmap_object_init_pt preloads the ptes for a given object 2412 * into the specified pmap. This eliminates the blast of soft 2413 * faults on process startup and immediately after an mmap. 2414 */ 2415void 2416pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2417 vm_object_t object, vm_pindex_t pindex, vm_size_t size) 2418{ 2419 VM_OBJECT_ASSERT_WLOCKED(object); 2420 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2421 ("pmap_object_init_pt: non-device object")); 2422} 2423 2424/* 2425 * Clear the wired attribute from the mappings for the specified range of 2426 * addresses in the given pmap. Every valid mapping within that range 2427 * must have the wired attribute set. In contrast, invalid mappings 2428 * cannot have the wired attribute set, so they are ignored. 2429 * 2430 * The wired attribute of the page table entry is not a hardware feature, 2431 * so there is no need to invalidate any TLB entries. 2432 */ 2433void 2434pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2435{ 2436 pd_entry_t *pde, *pdpe; 2437 pt_entry_t *pte; 2438 vm_offset_t va_next; 2439 2440 PMAP_LOCK(pmap); 2441 for (; sva < eva; sva = va_next) { 2442 pdpe = pmap_segmap(pmap, sva); 2443#ifdef __mips_n64 2444 if (*pdpe == NULL) { 2445 va_next = (sva + NBSEG) & ~SEGMASK; 2446 if (va_next < sva) 2447 va_next = eva; 2448 continue; 2449 } 2450#endif 2451 va_next = (sva + NBPDR) & ~PDRMASK; 2452 if (va_next < sva) 2453 va_next = eva; 2454 pde = pmap_pdpe_to_pde(pdpe, sva); 2455 if (*pde == NULL) 2456 continue; 2457 if (va_next > eva) 2458 va_next = eva; 2459 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2460 sva += PAGE_SIZE) { 2461 if (!pte_test(pte, PTE_V)) 2462 continue; 2463 if (!pte_test(pte, PTE_W)) 2464 panic("pmap_unwire: pte %#jx is missing PG_W", 2465 (uintmax_t)*pte); 2466 pte_clear(pte, PTE_W); 2467 pmap->pm_stats.wired_count--; 2468 } 2469 } 2470 PMAP_UNLOCK(pmap); 2471} 2472 2473/* 2474 * Copy the range specified by src_addr/len 2475 * from the source map to the range dst_addr/len 2476 * in the destination map. 2477 * 2478 * This routine is only advisory and need not do anything. 2479 */ 2480 2481void 2482pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2483 vm_size_t len, vm_offset_t src_addr) 2484{ 2485} 2486 2487/* 2488 * pmap_zero_page zeros the specified hardware page by mapping 2489 * the page into KVM and using bzero to clear its contents. 2490 * 2491 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2492 */ 2493void 2494pmap_zero_page(vm_page_t m) 2495{ 2496 vm_offset_t va; 2497 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2498 2499 if (MIPS_DIRECT_MAPPABLE(phys)) { 2500 va = MIPS_PHYS_TO_DIRECT(phys); 2501 bzero((caddr_t)va, PAGE_SIZE); 2502 mips_dcache_wbinv_range(va, PAGE_SIZE); 2503 } else { 2504 va = pmap_lmem_map1(phys); 2505 bzero((caddr_t)va, PAGE_SIZE); 2506 mips_dcache_wbinv_range(va, PAGE_SIZE); 2507 pmap_lmem_unmap(); 2508 } 2509} 2510 2511/* 2512 * pmap_zero_page_area zeros the specified hardware page by mapping 2513 * the page into KVM and using bzero to clear its contents. 2514 * 2515 * off and size may not cover an area beyond a single hardware page. 2516 */ 2517void 2518pmap_zero_page_area(vm_page_t m, int off, int size) 2519{ 2520 vm_offset_t va; 2521 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2522 2523 if (MIPS_DIRECT_MAPPABLE(phys)) { 2524 va = MIPS_PHYS_TO_DIRECT(phys); 2525 bzero((char *)(caddr_t)va + off, size); 2526 mips_dcache_wbinv_range(va + off, size); 2527 } else { 2528 va = pmap_lmem_map1(phys); 2529 bzero((char *)va + off, size); 2530 mips_dcache_wbinv_range(va + off, size); 2531 pmap_lmem_unmap(); 2532 } 2533} 2534 2535void 2536pmap_zero_page_idle(vm_page_t m) 2537{ 2538 vm_offset_t va; 2539 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 2540 2541 if (MIPS_DIRECT_MAPPABLE(phys)) { 2542 va = MIPS_PHYS_TO_DIRECT(phys); 2543 bzero((caddr_t)va, PAGE_SIZE); 2544 mips_dcache_wbinv_range(va, PAGE_SIZE); 2545 } else { 2546 va = pmap_lmem_map1(phys); 2547 bzero((caddr_t)va, PAGE_SIZE); 2548 mips_dcache_wbinv_range(va, PAGE_SIZE); 2549 pmap_lmem_unmap(); 2550 } 2551} 2552 2553/* 2554 * pmap_copy_page copies the specified (machine independent) 2555 * page by mapping the page into virtual memory and using 2556 * bcopy to copy the page, one machine dependent page at a 2557 * time. 2558 * 2559 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit. 2560 */ 2561void 2562pmap_copy_page(vm_page_t src, vm_page_t dst) 2563{ 2564 vm_offset_t va_src, va_dst; 2565 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src); 2566 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst); 2567 2568 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) { 2569 /* easy case, all can be accessed via KSEG0 */ 2570 /* 2571 * Flush all caches for VA that are mapped to this page 2572 * to make sure that data in SDRAM is up to date 2573 */ 2574 pmap_flush_pvcache(src); 2575 mips_dcache_wbinv_range_index( 2576 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE); 2577 va_src = MIPS_PHYS_TO_DIRECT(phys_src); 2578 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst); 2579 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE); 2580 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2581 } else { 2582 va_src = pmap_lmem_map2(phys_src, phys_dst); 2583 va_dst = va_src + PAGE_SIZE; 2584 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE); 2585 mips_dcache_wbinv_range(va_dst, PAGE_SIZE); 2586 pmap_lmem_unmap(); 2587 } 2588} 2589 2590int unmapped_buf_allowed; 2591 2592void 2593pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2594 vm_offset_t b_offset, int xfersize) 2595{ 2596 char *a_cp, *b_cp; 2597 vm_page_t a_m, b_m; 2598 vm_offset_t a_pg_offset, b_pg_offset; 2599 vm_paddr_t a_phys, b_phys; 2600 int cnt; 2601 2602 while (xfersize > 0) { 2603 a_pg_offset = a_offset & PAGE_MASK; 2604 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2605 a_m = ma[a_offset >> PAGE_SHIFT]; 2606 a_phys = VM_PAGE_TO_PHYS(a_m); 2607 b_pg_offset = b_offset & PAGE_MASK; 2608 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2609 b_m = mb[b_offset >> PAGE_SHIFT]; 2610 b_phys = VM_PAGE_TO_PHYS(b_m); 2611 if (MIPS_DIRECT_MAPPABLE(a_phys) && 2612 MIPS_DIRECT_MAPPABLE(b_phys)) { 2613 pmap_flush_pvcache(a_m); 2614 mips_dcache_wbinv_range_index( 2615 MIPS_PHYS_TO_DIRECT(b_phys), PAGE_SIZE); 2616 a_cp = (char *)MIPS_PHYS_TO_DIRECT(a_phys) + 2617 a_pg_offset; 2618 b_cp = (char *)MIPS_PHYS_TO_DIRECT(b_phys) + 2619 b_pg_offset; 2620 bcopy(a_cp, b_cp, cnt); 2621 mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt); 2622 } else { 2623 a_cp = (char *)pmap_lmem_map2(a_phys, b_phys); 2624 b_cp = (char *)a_cp + PAGE_SIZE; 2625 a_cp += a_pg_offset; 2626 b_cp += b_pg_offset; 2627 bcopy(a_cp, b_cp, cnt); 2628 mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt); 2629 pmap_lmem_unmap(); 2630 } 2631 a_offset += cnt; 2632 b_offset += cnt; 2633 xfersize -= cnt; 2634 } 2635} 2636 2637/* 2638 * Returns true if the pmap's pv is one of the first 2639 * 16 pvs linked to from this page. This count may 2640 * be changed upwards or downwards in the future; it 2641 * is only necessary that true be returned for a small 2642 * subset of pmaps for proper page aging. 2643 */ 2644boolean_t 2645pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2646{ 2647 pv_entry_t pv; 2648 int loops = 0; 2649 boolean_t rv; 2650 2651 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2652 ("pmap_page_exists_quick: page %p is not managed", m)); 2653 rv = FALSE; 2654 rw_wlock(&pvh_global_lock); 2655 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2656 if (PV_PMAP(pv) == pmap) { 2657 rv = TRUE; 2658 break; 2659 } 2660 loops++; 2661 if (loops >= 16) 2662 break; 2663 } 2664 rw_wunlock(&pvh_global_lock); 2665 return (rv); 2666} 2667 2668/* 2669 * Remove all pages from specified address space 2670 * this aids process exit speeds. Also, this code 2671 * is special cased for current process only, but 2672 * can have the more generic (and slightly slower) 2673 * mode enabled. This is much faster than pmap_remove 2674 * in the case of running down an entire address space. 2675 */ 2676void 2677pmap_remove_pages(pmap_t pmap) 2678{ 2679 pd_entry_t *pde; 2680 pt_entry_t *pte, tpte; 2681 pv_entry_t pv; 2682 vm_page_t m; 2683 struct pv_chunk *pc, *npc; 2684 u_long inuse, bitmask; 2685 int allfree, bit, field, idx; 2686 2687 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2688 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2689 return; 2690 } 2691 rw_wlock(&pvh_global_lock); 2692 PMAP_LOCK(pmap); 2693 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2694 allfree = 1; 2695 for (field = 0; field < _NPCM; field++) { 2696 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2697 while (inuse != 0) { 2698 bit = ffsl(inuse) - 1; 2699 bitmask = 1UL << bit; 2700 idx = field * sizeof(inuse) * NBBY + bit; 2701 pv = &pc->pc_pventry[idx]; 2702 inuse &= ~bitmask; 2703 2704 pde = pmap_pde(pmap, pv->pv_va); 2705 KASSERT(pde != NULL && *pde != 0, 2706 ("pmap_remove_pages: pde")); 2707 pte = pmap_pde_to_pte(pde, pv->pv_va); 2708 if (!pte_test(pte, PTE_V)) 2709 panic("pmap_remove_pages: bad pte"); 2710 tpte = *pte; 2711 2712/* 2713 * We cannot remove wired pages from a process' mapping at this time 2714 */ 2715 if (pte_test(&tpte, PTE_W)) { 2716 allfree = 0; 2717 continue; 2718 } 2719 *pte = is_kernel_pmap(pmap) ? PTE_G : 0; 2720 2721 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte)); 2722 KASSERT(m != NULL, 2723 ("pmap_remove_pages: bad tpte %#jx", 2724 (uintmax_t)tpte)); 2725 2726 /* 2727 * Update the vm_page_t clean and reference bits. 2728 */ 2729 if (pte_test(&tpte, PTE_D)) 2730 vm_page_dirty(m); 2731 2732 /* Mark free */ 2733 PV_STAT(pv_entry_frees++); 2734 PV_STAT(pv_entry_spare++); 2735 pv_entry_count--; 2736 pc->pc_map[field] |= bitmask; 2737 pmap->pm_stats.resident_count--; 2738 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2739 if (TAILQ_EMPTY(&m->md.pv_list)) 2740 vm_page_aflag_clear(m, PGA_WRITEABLE); 2741 pmap_unuse_pt(pmap, pv->pv_va, *pde); 2742 } 2743 } 2744 if (allfree) { 2745 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2746 free_pv_chunk(pc); 2747 } 2748 } 2749 pmap_invalidate_all(pmap); 2750 PMAP_UNLOCK(pmap); 2751 rw_wunlock(&pvh_global_lock); 2752} 2753 2754/* 2755 * pmap_testbit tests bits in pte's 2756 */ 2757static boolean_t 2758pmap_testbit(vm_page_t m, int bit) 2759{ 2760 pv_entry_t pv; 2761 pmap_t pmap; 2762 pt_entry_t *pte; 2763 boolean_t rv = FALSE; 2764 2765 if (m->oflags & VPO_UNMANAGED) 2766 return (rv); 2767 2768 rw_assert(&pvh_global_lock, RA_WLOCKED); 2769 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2770 pmap = PV_PMAP(pv); 2771 PMAP_LOCK(pmap); 2772 pte = pmap_pte(pmap, pv->pv_va); 2773 rv = pte_test(pte, bit); 2774 PMAP_UNLOCK(pmap); 2775 if (rv) 2776 break; 2777 } 2778 return (rv); 2779} 2780 2781/* 2782 * pmap_page_wired_mappings: 2783 * 2784 * Return the number of managed mappings to the given physical page 2785 * that are wired. 2786 */ 2787int 2788pmap_page_wired_mappings(vm_page_t m) 2789{ 2790 pv_entry_t pv; 2791 pmap_t pmap; 2792 pt_entry_t *pte; 2793 int count; 2794 2795 count = 0; 2796 if ((m->oflags & VPO_UNMANAGED) != 0) 2797 return (count); 2798 rw_wlock(&pvh_global_lock); 2799 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2800 pmap = PV_PMAP(pv); 2801 PMAP_LOCK(pmap); 2802 pte = pmap_pte(pmap, pv->pv_va); 2803 if (pte_test(pte, PTE_W)) 2804 count++; 2805 PMAP_UNLOCK(pmap); 2806 } 2807 rw_wunlock(&pvh_global_lock); 2808 return (count); 2809} 2810 2811/* 2812 * Clear the write and modified bits in each of the given page's mappings. 2813 */ 2814void 2815pmap_remove_write(vm_page_t m) 2816{ 2817 pmap_t pmap; 2818 pt_entry_t pbits, *pte; 2819 pv_entry_t pv; 2820 2821 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2822 ("pmap_remove_write: page %p is not managed", m)); 2823 2824 /* 2825 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2826 * set by another thread while the object is locked. Thus, 2827 * if PGA_WRITEABLE is clear, no page table entries need updating. 2828 */ 2829 VM_OBJECT_ASSERT_WLOCKED(m->object); 2830 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2831 return; 2832 rw_wlock(&pvh_global_lock); 2833 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2834 pmap = PV_PMAP(pv); 2835 PMAP_LOCK(pmap); 2836 pte = pmap_pte(pmap, pv->pv_va); 2837 KASSERT(pte != NULL && pte_test(pte, PTE_V), 2838 ("page on pv_list has no pte")); 2839 pbits = *pte; 2840 if (pte_test(&pbits, PTE_D)) { 2841 pte_clear(&pbits, PTE_D); 2842 vm_page_dirty(m); 2843 } 2844 pte_set(&pbits, PTE_RO); 2845 if (pbits != *pte) { 2846 *pte = pbits; 2847 pmap_update_page(pmap, pv->pv_va, pbits); 2848 } 2849 PMAP_UNLOCK(pmap); 2850 } 2851 vm_page_aflag_clear(m, PGA_WRITEABLE); 2852 rw_wunlock(&pvh_global_lock); 2853} 2854 2855/* 2856 * pmap_ts_referenced: 2857 * 2858 * Return the count of reference bits for a page, clearing all of them. 2859 */ 2860int 2861pmap_ts_referenced(vm_page_t m) 2862{ 2863 2864 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2865 ("pmap_ts_referenced: page %p is not managed", m)); 2866 if (m->md.pv_flags & PV_TABLE_REF) { 2867 rw_wlock(&pvh_global_lock); 2868 m->md.pv_flags &= ~PV_TABLE_REF; 2869 rw_wunlock(&pvh_global_lock); 2870 return (1); 2871 } 2872 return (0); 2873} 2874 2875/* 2876 * pmap_is_modified: 2877 * 2878 * Return whether or not the specified physical page was modified 2879 * in any physical maps. 2880 */ 2881boolean_t 2882pmap_is_modified(vm_page_t m) 2883{ 2884 boolean_t rv; 2885 2886 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2887 ("pmap_is_modified: page %p is not managed", m)); 2888 2889 /* 2890 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2891 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2892 * is clear, no PTEs can have PTE_D set. 2893 */ 2894 VM_OBJECT_ASSERT_WLOCKED(m->object); 2895 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2896 return (FALSE); 2897 rw_wlock(&pvh_global_lock); 2898 rv = pmap_testbit(m, PTE_D); 2899 rw_wunlock(&pvh_global_lock); 2900 return (rv); 2901} 2902 2903/* N/C */ 2904 2905/* 2906 * pmap_is_prefaultable: 2907 * 2908 * Return whether or not the specified virtual address is elgible 2909 * for prefault. 2910 */ 2911boolean_t 2912pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2913{ 2914 pd_entry_t *pde; 2915 pt_entry_t *pte; 2916 boolean_t rv; 2917 2918 rv = FALSE; 2919 PMAP_LOCK(pmap); 2920 pde = pmap_pde(pmap, addr); 2921 if (pde != NULL && *pde != 0) { 2922 pte = pmap_pde_to_pte(pde, addr); 2923 rv = (*pte == 0); 2924 } 2925 PMAP_UNLOCK(pmap); 2926 return (rv); 2927} 2928 2929/* 2930 * Apply the given advice to the specified range of addresses within the 2931 * given pmap. Depending on the advice, clear the referenced and/or 2932 * modified flags in each mapping and set the mapped page's dirty field. 2933 */ 2934void 2935pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 2936{ 2937 pd_entry_t *pde, *pdpe; 2938 pt_entry_t *pte; 2939 vm_offset_t va, va_next; 2940 vm_paddr_t pa; 2941 vm_page_t m; 2942 2943 if (advice != MADV_DONTNEED && advice != MADV_FREE) 2944 return; 2945 rw_wlock(&pvh_global_lock); 2946 PMAP_LOCK(pmap); 2947 for (; sva < eva; sva = va_next) { 2948 pdpe = pmap_segmap(pmap, sva); 2949#ifdef __mips_n64 2950 if (*pdpe == 0) { 2951 va_next = (sva + NBSEG) & ~SEGMASK; 2952 if (va_next < sva) 2953 va_next = eva; 2954 continue; 2955 } 2956#endif 2957 va_next = (sva + NBPDR) & ~PDRMASK; 2958 if (va_next < sva) 2959 va_next = eva; 2960 2961 pde = pmap_pdpe_to_pde(pdpe, sva); 2962 if (*pde == NULL) 2963 continue; 2964 2965 /* 2966 * Limit our scan to either the end of the va represented 2967 * by the current page table page, or to the end of the 2968 * range being write protected. 2969 */ 2970 if (va_next > eva) 2971 va_next = eva; 2972 2973 va = va_next; 2974 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2975 sva += PAGE_SIZE) { 2976 if (!pte_test(pte, PTE_MANAGED | PTE_V)) { 2977 if (va != va_next) { 2978 pmap_invalidate_range(pmap, va, sva); 2979 va = va_next; 2980 } 2981 continue; 2982 } 2983 pa = TLBLO_PTE_TO_PA(*pte); 2984 m = PHYS_TO_VM_PAGE(pa); 2985 m->md.pv_flags &= ~PV_TABLE_REF; 2986 if (pte_test(pte, PTE_D)) { 2987 if (advice == MADV_DONTNEED) { 2988 /* 2989 * Future calls to pmap_is_modified() 2990 * can be avoided by making the page 2991 * dirty now. 2992 */ 2993 vm_page_dirty(m); 2994 } else { 2995 pte_clear(pte, PTE_D); 2996 if (va == va_next) 2997 va = sva; 2998 } 2999 } else { 3000 /* 3001 * Unless PTE_D is set, any TLB entries 3002 * mapping "sva" don't allow write access, so 3003 * they needn't be invalidated. 3004 */ 3005 if (va != va_next) { 3006 pmap_invalidate_range(pmap, va, sva); 3007 va = va_next; 3008 } 3009 } 3010 } 3011 if (va != va_next) 3012 pmap_invalidate_range(pmap, va, sva); 3013 } 3014 rw_wunlock(&pvh_global_lock); 3015 PMAP_UNLOCK(pmap); 3016} 3017 3018/* 3019 * Clear the modify bits on the specified physical page. 3020 */ 3021void 3022pmap_clear_modify(vm_page_t m) 3023{ 3024 pmap_t pmap; 3025 pt_entry_t *pte; 3026 pv_entry_t pv; 3027 3028 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3029 ("pmap_clear_modify: page %p is not managed", m)); 3030 VM_OBJECT_ASSERT_WLOCKED(m->object); 3031 KASSERT(!vm_page_xbusied(m), 3032 ("pmap_clear_modify: page %p is exclusive busied", m)); 3033 3034 /* 3035 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set. 3036 * If the object containing the page is locked and the page is not 3037 * write busied, then PGA_WRITEABLE cannot be concurrently set. 3038 */ 3039 if ((m->aflags & PGA_WRITEABLE) == 0) 3040 return; 3041 rw_wlock(&pvh_global_lock); 3042 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3043 pmap = PV_PMAP(pv); 3044 PMAP_LOCK(pmap); 3045 pte = pmap_pte(pmap, pv->pv_va); 3046 if (pte_test(pte, PTE_D)) { 3047 pte_clear(pte, PTE_D); 3048 pmap_update_page(pmap, pv->pv_va, *pte); 3049 } 3050 PMAP_UNLOCK(pmap); 3051 } 3052 rw_wunlock(&pvh_global_lock); 3053} 3054 3055/* 3056 * pmap_is_referenced: 3057 * 3058 * Return whether or not the specified physical page was referenced 3059 * in any physical maps. 3060 */ 3061boolean_t 3062pmap_is_referenced(vm_page_t m) 3063{ 3064 3065 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3066 ("pmap_is_referenced: page %p is not managed", m)); 3067 return ((m->md.pv_flags & PV_TABLE_REF) != 0); 3068} 3069 3070/* 3071 * Miscellaneous support routines follow 3072 */ 3073 3074/* 3075 * Map a set of physical memory pages into the kernel virtual 3076 * address space. Return a pointer to where it is mapped. This 3077 * routine is intended to be used for mapping device memory, 3078 * NOT real memory. 3079 * 3080 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit. 3081 */ 3082void * 3083pmap_mapdev(vm_paddr_t pa, vm_size_t size) 3084{ 3085 vm_offset_t va, tmpva, offset; 3086 3087 /* 3088 * KSEG1 maps only first 512M of phys address space. For 3089 * pa > 0x20000000 we should make proper mapping * using pmap_kenter. 3090 */ 3091 if (MIPS_DIRECT_MAPPABLE(pa + size - 1)) 3092 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa)); 3093 else { 3094 offset = pa & PAGE_MASK; 3095 size = roundup(size + offset, PAGE_SIZE); 3096 3097 va = kva_alloc(size); 3098 if (!va) 3099 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3100 pa = trunc_page(pa); 3101 for (tmpva = va; size > 0;) { 3102 pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED); 3103 size -= PAGE_SIZE; 3104 tmpva += PAGE_SIZE; 3105 pa += PAGE_SIZE; 3106 } 3107 } 3108 3109 return ((void *)(va + offset)); 3110} 3111 3112void 3113pmap_unmapdev(vm_offset_t va, vm_size_t size) 3114{ 3115#ifndef __mips_n64 3116 vm_offset_t base, offset; 3117 3118 /* If the address is within KSEG1 then there is nothing to do */ 3119 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END) 3120 return; 3121 3122 base = trunc_page(va); 3123 offset = va & PAGE_MASK; 3124 size = roundup(size + offset, PAGE_SIZE); 3125 kva_free(base, size); 3126#endif 3127} 3128 3129/* 3130 * perform the pmap work for mincore 3131 */ 3132int 3133pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3134{ 3135 pt_entry_t *ptep, pte; 3136 vm_paddr_t pa; 3137 vm_page_t m; 3138 int val; 3139 3140 PMAP_LOCK(pmap); 3141retry: 3142 ptep = pmap_pte(pmap, addr); 3143 pte = (ptep != NULL) ? *ptep : 0; 3144 if (!pte_test(&pte, PTE_V)) { 3145 val = 0; 3146 goto out; 3147 } 3148 val = MINCORE_INCORE; 3149 if (pte_test(&pte, PTE_D)) 3150 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3151 pa = TLBLO_PTE_TO_PA(pte); 3152 if (pte_test(&pte, PTE_MANAGED)) { 3153 /* 3154 * This may falsely report the given address as 3155 * MINCORE_REFERENCED. Unfortunately, due to the lack of 3156 * per-PTE reference information, it is impossible to 3157 * determine if the address is MINCORE_REFERENCED. 3158 */ 3159 m = PHYS_TO_VM_PAGE(pa); 3160 if ((m->aflags & PGA_REFERENCED) != 0) 3161 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3162 } 3163 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 3164 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 3165 pte_test(&pte, PTE_MANAGED)) { 3166 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 3167 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 3168 goto retry; 3169 } else 3170out: 3171 PA_UNLOCK_COND(*locked_pa); 3172 PMAP_UNLOCK(pmap); 3173 return (val); 3174} 3175 3176void 3177pmap_activate(struct thread *td) 3178{ 3179 pmap_t pmap, oldpmap; 3180 struct proc *p = td->td_proc; 3181 u_int cpuid; 3182 3183 critical_enter(); 3184 3185 pmap = vmspace_pmap(p->p_vmspace); 3186 oldpmap = PCPU_GET(curpmap); 3187 cpuid = PCPU_GET(cpuid); 3188 3189 if (oldpmap) 3190 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 3191 CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 3192 pmap_asid_alloc(pmap); 3193 if (td == curthread) { 3194 PCPU_SET(segbase, pmap->pm_segtab); 3195 mips_wr_entryhi(pmap->pm_asid[cpuid].asid); 3196 } 3197 3198 PCPU_SET(curpmap, pmap); 3199 critical_exit(); 3200} 3201 3202static void 3203pmap_sync_icache_one(void *arg __unused) 3204{ 3205 3206 mips_icache_sync_all(); 3207 mips_dcache_wbinv_all(); 3208} 3209 3210void 3211pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 3212{ 3213 3214 smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL); 3215} 3216 3217/* 3218 * Increase the starting virtual address of the given mapping if a 3219 * different alignment might result in more superpage mappings. 3220 */ 3221void 3222pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3223 vm_offset_t *addr, vm_size_t size) 3224{ 3225 vm_offset_t superpage_offset; 3226 3227 if (size < NBSEG) 3228 return; 3229 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 3230 offset += ptoa(object->pg_color); 3231 superpage_offset = offset & SEGMASK; 3232 if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG || 3233 (*addr & SEGMASK) == superpage_offset) 3234 return; 3235 if ((*addr & SEGMASK) < superpage_offset) 3236 *addr = (*addr & ~SEGMASK) + superpage_offset; 3237 else 3238 *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset; 3239} 3240 3241#ifdef DDB 3242DB_SHOW_COMMAND(ptable, ddb_pid_dump) 3243{ 3244 pmap_t pmap; 3245 struct thread *td = NULL; 3246 struct proc *p; 3247 int i, j, k; 3248 vm_paddr_t pa; 3249 vm_offset_t va; 3250 3251 if (have_addr) { 3252 td = db_lookup_thread(addr, TRUE); 3253 if (td == NULL) { 3254 db_printf("Invalid pid or tid"); 3255 return; 3256 } 3257 p = td->td_proc; 3258 if (p->p_vmspace == NULL) { 3259 db_printf("No vmspace for process"); 3260 return; 3261 } 3262 pmap = vmspace_pmap(p->p_vmspace); 3263 } else 3264 pmap = kernel_pmap; 3265 3266 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n", 3267 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid, 3268 pmap->pm_asid[0].gen); 3269 for (i = 0; i < NPDEPG; i++) { 3270 pd_entry_t *pdpe; 3271 pt_entry_t *pde; 3272 pt_entry_t pte; 3273 3274 pdpe = (pd_entry_t *)pmap->pm_segtab[i]; 3275 if (pdpe == NULL) 3276 continue; 3277 db_printf("[%4d] %p\n", i, pdpe); 3278#ifdef __mips_n64 3279 for (j = 0; j < NPDEPG; j++) { 3280 pde = (pt_entry_t *)pdpe[j]; 3281 if (pde == NULL) 3282 continue; 3283 db_printf("\t[%4d] %p\n", j, pde); 3284#else 3285 { 3286 j = 0; 3287 pde = (pt_entry_t *)pdpe; 3288#endif 3289 for (k = 0; k < NPTEPG; k++) { 3290 pte = pde[k]; 3291 if (pte == 0 || !pte_test(&pte, PTE_V)) 3292 continue; 3293 pa = TLBLO_PTE_TO_PA(pte); 3294 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT); 3295 db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n", 3296 k, (void *)va, (uintmax_t)pte, (uintmax_t)pa); 3297 } 3298 } 3299 } 3300} 3301#endif 3302 3303#if defined(DEBUG) 3304 3305static void pads(pmap_t pm); 3306void pmap_pvdump(vm_offset_t pa); 3307 3308/* print address space of pmap*/ 3309static void 3310pads(pmap_t pm) 3311{ 3312 unsigned va, i, j; 3313 pt_entry_t *ptep; 3314 3315 if (pm == kernel_pmap) 3316 return; 3317 for (i = 0; i < NPTEPG; i++) 3318 if (pm->pm_segtab[i]) 3319 for (j = 0; j < NPTEPG; j++) { 3320 va = (i << SEGSHIFT) + (j << PAGE_SHIFT); 3321 if (pm == kernel_pmap && va < KERNBASE) 3322 continue; 3323 if (pm != kernel_pmap && 3324 va >= VM_MAXUSER_ADDRESS) 3325 continue; 3326 ptep = pmap_pte(pm, va); 3327 if (pte_test(ptep, PTE_V)) 3328 printf("%x:%x ", va, *(int *)ptep); 3329 } 3330 3331} 3332 3333void 3334pmap_pvdump(vm_offset_t pa) 3335{ 3336 register pv_entry_t pv; 3337 vm_page_t m; 3338 3339 printf("pa %x", pa); 3340 m = PHYS_TO_VM_PAGE(pa); 3341 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3342 pv = TAILQ_NEXT(pv, pv_list)) { 3343 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3344 pads(pv->pv_pmap); 3345 } 3346 printf(" "); 3347} 3348 3349/* N/C */ 3350#endif 3351 3352 3353/* 3354 * Allocate TLB address space tag (called ASID or TLBPID) and return it. 3355 * It takes almost as much or more time to search the TLB for a 3356 * specific ASID and flush those entries as it does to flush the entire TLB. 3357 * Therefore, when we allocate a new ASID, we just take the next number. When 3358 * we run out of numbers, we flush the TLB, increment the generation count 3359 * and start over. ASID zero is reserved for kernel use. 3360 */ 3361static void 3362pmap_asid_alloc(pmap) 3363 pmap_t pmap; 3364{ 3365 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED && 3366 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation)); 3367 else { 3368 if (PCPU_GET(next_asid) == pmap_max_asid) { 3369 tlb_invalidate_all_user(NULL); 3370 PCPU_SET(asid_generation, 3371 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK); 3372 if (PCPU_GET(asid_generation) == 0) { 3373 PCPU_SET(asid_generation, 1); 3374 } 3375 PCPU_SET(next_asid, 1); /* 0 means invalid */ 3376 } 3377 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid); 3378 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation); 3379 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1); 3380 } 3381} 3382 3383static pt_entry_t 3384init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot) 3385{ 3386 pt_entry_t rw; 3387 3388 if (!(prot & VM_PROT_WRITE)) 3389 rw = PTE_V | PTE_RO; 3390 else if ((m->oflags & VPO_UNMANAGED) == 0) { 3391 if ((access & VM_PROT_WRITE) != 0) 3392 rw = PTE_V | PTE_D; 3393 else 3394 rw = PTE_V; 3395 } else 3396 /* Needn't emulate a modified bit for unmanaged pages. */ 3397 rw = PTE_V | PTE_D; 3398 return (rw); 3399} 3400 3401/* 3402 * pmap_emulate_modified : do dirty bit emulation 3403 * 3404 * On SMP, update just the local TLB, other CPUs will update their 3405 * TLBs from PTE lazily, if they get the exception. 3406 * Returns 0 in case of sucess, 1 if the page is read only and we 3407 * need to fault. 3408 */ 3409int 3410pmap_emulate_modified(pmap_t pmap, vm_offset_t va) 3411{ 3412 pt_entry_t *pte; 3413 3414 PMAP_LOCK(pmap); 3415 pte = pmap_pte(pmap, va); 3416 if (pte == NULL) 3417 panic("pmap_emulate_modified: can't find PTE"); 3418#ifdef SMP 3419 /* It is possible that some other CPU changed m-bit */ 3420 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) { 3421 tlb_update(pmap, va, *pte); 3422 PMAP_UNLOCK(pmap); 3423 return (0); 3424 } 3425#else 3426 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) 3427 panic("pmap_emulate_modified: invalid pte"); 3428#endif 3429 if (pte_test(pte, PTE_RO)) { 3430 PMAP_UNLOCK(pmap); 3431 return (1); 3432 } 3433 pte_set(pte, PTE_D); 3434 tlb_update(pmap, va, *pte); 3435 if (!pte_test(pte, PTE_MANAGED)) 3436 panic("pmap_emulate_modified: unmanaged page"); 3437 PMAP_UNLOCK(pmap); 3438 return (0); 3439} 3440 3441/* 3442 * Routine: pmap_kextract 3443 * Function: 3444 * Extract the physical page address associated 3445 * virtual address. 3446 */ 3447vm_paddr_t 3448pmap_kextract(vm_offset_t va) 3449{ 3450 int mapped; 3451 3452 /* 3453 * First, the direct-mapped regions. 3454 */ 3455#if defined(__mips_n64) 3456 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END) 3457 return (MIPS_XKPHYS_TO_PHYS(va)); 3458#endif 3459 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END) 3460 return (MIPS_KSEG0_TO_PHYS(va)); 3461 3462 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END) 3463 return (MIPS_KSEG1_TO_PHYS(va)); 3464 3465 /* 3466 * User virtual addresses. 3467 */ 3468 if (va < VM_MAXUSER_ADDRESS) { 3469 pt_entry_t *ptep; 3470 3471 if (curproc && curproc->p_vmspace) { 3472 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va); 3473 if (ptep) { 3474 return (TLBLO_PTE_TO_PA(*ptep) | 3475 (va & PAGE_MASK)); 3476 } 3477 return (0); 3478 } 3479 } 3480 3481 /* 3482 * Should be kernel virtual here, otherwise fail 3483 */ 3484 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END); 3485#if defined(__mips_n64) 3486 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END); 3487#endif 3488 /* 3489 * Kernel virtual. 3490 */ 3491 3492 if (mapped) { 3493 pt_entry_t *ptep; 3494 3495 /* Is the kernel pmap initialized? */ 3496 if (!CPU_EMPTY(&kernel_pmap->pm_active)) { 3497 /* It's inside the virtual address range */ 3498 ptep = pmap_pte(kernel_pmap, va); 3499 if (ptep) { 3500 return (TLBLO_PTE_TO_PA(*ptep) | 3501 (va & PAGE_MASK)); 3502 } 3503 } 3504 return (0); 3505 } 3506 3507 panic("%s for unknown address space %p.", __func__, (void *)va); 3508} 3509 3510 3511void 3512pmap_flush_pvcache(vm_page_t m) 3513{ 3514 pv_entry_t pv; 3515 3516 if (m != NULL) { 3517 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; 3518 pv = TAILQ_NEXT(pv, pv_list)) { 3519 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE); 3520 } 3521 } 3522} 3523