pmap.c revision 338484
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11 * All rights reserved. 12 * Copyright (c) 2014 Andrew Turner 13 * All rights reserved. 14 * Copyright (c) 2014 The FreeBSD Foundation 15 * All rights reserved. 16 * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com> 17 * All rights reserved. 18 * 19 * This code is derived from software contributed to Berkeley by 20 * the Systems Programming Group of the University of Utah Computer 21 * Science Department and William Jolitz of UUNET Technologies Inc. 22 * 23 * Portions of this software were developed by Andrew Turner under 24 * sponsorship from The FreeBSD Foundation. 25 * 26 * Portions of this software were developed by SRI International and the 27 * University of Cambridge Computer Laboratory under DARPA/AFRL contract 28 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. 29 * 30 * Portions of this software were developed by the University of Cambridge 31 * Computer Laboratory as part of the CTSRD Project, with support from the 32 * UK Higher Education Innovation Fund (HEIF). 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the University of 45 * California, Berkeley and its contributors. 46 * 4. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 63 */ 64/*- 65 * Copyright (c) 2003 Networks Associates Technology, Inc. 66 * All rights reserved. 67 * 68 * This software was developed for the FreeBSD Project by Jake Burkholder, 69 * Safeport Network Services, and Network Associates Laboratories, the 70 * Security Research Division of Network Associates, Inc. under 71 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 72 * CHATS research program. 73 * 74 * Redistribution and use in source and binary forms, with or without 75 * modification, are permitted provided that the following conditions 76 * are met: 77 * 1. Redistributions of source code must retain the above copyright 78 * notice, this list of conditions and the following disclaimer. 79 * 2. Redistributions in binary form must reproduce the above copyright 80 * notice, this list of conditions and the following disclaimer in the 81 * documentation and/or other materials provided with the distribution. 82 * 83 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 86 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 93 * SUCH DAMAGE. 94 */ 95 96#include <sys/cdefs.h> 97__FBSDID("$FreeBSD: stable/11/sys/riscv/riscv/pmap.c 338484 2018-09-05 21:28:33Z kib $"); 98 99/* 100 * Manages physical address maps. 101 * 102 * Since the information managed by this module is 103 * also stored by the logical address mapping module, 104 * this module may throw away valid virtual-to-physical 105 * mappings at almost any time. However, invalidations 106 * of virtual-to-physical mappings must be done as 107 * requested. 108 * 109 * In order to cope with hardware architectures which 110 * make virtual-to-physical map invalidates expensive, 111 * this module may delay invalidate or reduced protection 112 * operations until such time as they are actually 113 * necessary. This module is given full information as 114 * to which processors are currently using which maps, 115 * and to when physical maps must be made correct. 116 */ 117 118#include <sys/param.h> 119#include <sys/bus.h> 120#include <sys/systm.h> 121#include <sys/kernel.h> 122#include <sys/ktr.h> 123#include <sys/lock.h> 124#include <sys/malloc.h> 125#include <sys/mman.h> 126#include <sys/msgbuf.h> 127#include <sys/mutex.h> 128#include <sys/proc.h> 129#include <sys/rwlock.h> 130#include <sys/sx.h> 131#include <sys/vmem.h> 132#include <sys/vmmeter.h> 133#include <sys/sched.h> 134#include <sys/sysctl.h> 135#include <sys/smp.h> 136 137#include <vm/vm.h> 138#include <vm/vm_param.h> 139#include <vm/vm_kern.h> 140#include <vm/vm_page.h> 141#include <vm/vm_map.h> 142#include <vm/vm_object.h> 143#include <vm/vm_extern.h> 144#include <vm/vm_pageout.h> 145#include <vm/vm_pager.h> 146#include <vm/vm_radix.h> 147#include <vm/vm_reserv.h> 148#include <vm/uma.h> 149 150#include <machine/machdep.h> 151#include <machine/md_var.h> 152#include <machine/pcb.h> 153 154#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) 155#define NUPDE (NPDEPG * NPDEPG) 156#define NUSERPGTBLS (NUPDE + NPDEPG) 157 158#if !defined(DIAGNOSTIC) 159#ifdef __GNUC_GNU_INLINE__ 160#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 161#else 162#define PMAP_INLINE extern inline 163#endif 164#else 165#define PMAP_INLINE 166#endif 167 168#ifdef PV_STATS 169#define PV_STAT(x) do { x ; } while (0) 170#else 171#define PV_STAT(x) do { } while (0) 172#endif 173 174#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 175 176#define NPV_LIST_LOCKS MAXCPU 177 178#define PHYS_TO_PV_LIST_LOCK(pa) \ 179 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 180 181#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 182 struct rwlock **_lockp = (lockp); \ 183 struct rwlock *_new_lock; \ 184 \ 185 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 186 if (_new_lock != *_lockp) { \ 187 if (*_lockp != NULL) \ 188 rw_wunlock(*_lockp); \ 189 *_lockp = _new_lock; \ 190 rw_wlock(*_lockp); \ 191 } \ 192} while (0) 193 194#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 195 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 196 197#define RELEASE_PV_LIST_LOCK(lockp) do { \ 198 struct rwlock **_lockp = (lockp); \ 199 \ 200 if (*_lockp != NULL) { \ 201 rw_wunlock(*_lockp); \ 202 *_lockp = NULL; \ 203 } \ 204} while (0) 205 206#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 207 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 208 209/* The list of all the user pmaps */ 210LIST_HEAD(pmaplist, pmap); 211static struct pmaplist allpmaps; 212 213static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); 214 215struct pmap kernel_pmap_store; 216 217vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 218vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 219vm_offset_t kernel_vm_end = 0; 220 221struct msgbuf *msgbufp = NULL; 222 223static struct rwlock_padalign pvh_global_lock; 224 225/* 226 * Data for the pv entry allocation mechanism 227 */ 228static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 229static struct mtx pv_chunks_mutex; 230static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 231 232static void free_pv_chunk(struct pv_chunk *pc); 233static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 234static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 235static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 236static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 237static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 238 vm_offset_t va); 239static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 240 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 241static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 242 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 243static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 244 vm_page_t m, struct rwlock **lockp); 245 246static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 247 struct rwlock **lockp); 248 249static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 250 struct spglist *free); 251static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 252 253/* 254 * These load the old table data and store the new value. 255 * They need to be atomic as the System MMU may write to the table at 256 * the same time as the CPU. 257 */ 258#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 259#define pmap_set(table, mask) atomic_set_64(table, mask) 260#define pmap_load_clear(table) atomic_swap_64(table, 0) 261#define pmap_load(table) (*table) 262 263/********************/ 264/* Inline functions */ 265/********************/ 266 267static __inline void 268pagecopy(void *s, void *d) 269{ 270 271 memcpy(d, s, PAGE_SIZE); 272} 273 274static __inline void 275pagezero(void *p) 276{ 277 278 bzero(p, PAGE_SIZE); 279} 280 281#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 282#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 283#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 284 285#define PTE_TO_PHYS(pte) ((pte >> PTE_PPN0_S) * PAGE_SIZE) 286 287static __inline pd_entry_t * 288pmap_l1(pmap_t pmap, vm_offset_t va) 289{ 290 291 return (&pmap->pm_l1[pmap_l1_index(va)]); 292} 293 294static __inline pd_entry_t * 295pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 296{ 297 vm_paddr_t phys; 298 pd_entry_t *l2; 299 300 phys = PTE_TO_PHYS(pmap_load(l1)); 301 l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); 302 303 return (&l2[pmap_l2_index(va)]); 304} 305 306static __inline pd_entry_t * 307pmap_l2(pmap_t pmap, vm_offset_t va) 308{ 309 pd_entry_t *l1; 310 311 l1 = pmap_l1(pmap, va); 312 if (l1 == NULL) 313 return (NULL); 314 if ((pmap_load(l1) & PTE_VALID) == 0) 315 return (NULL); 316 if ((pmap_load(l1) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) 317 return (NULL); 318 319 return (pmap_l1_to_l2(l1, va)); 320} 321 322static __inline pt_entry_t * 323pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 324{ 325 vm_paddr_t phys; 326 pt_entry_t *l3; 327 328 phys = PTE_TO_PHYS(pmap_load(l2)); 329 l3 = (pd_entry_t *)PHYS_TO_DMAP(phys); 330 331 return (&l3[pmap_l3_index(va)]); 332} 333 334static __inline pt_entry_t * 335pmap_l3(pmap_t pmap, vm_offset_t va) 336{ 337 pd_entry_t *l2; 338 339 l2 = pmap_l2(pmap, va); 340 if (l2 == NULL) 341 return (NULL); 342 if ((pmap_load(l2) & PTE_VALID) == 0) 343 return (NULL); 344 if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) 345 return (NULL); 346 347 return (pmap_l2_to_l3(l2, va)); 348} 349 350 351static __inline int 352pmap_is_write(pt_entry_t entry) 353{ 354 355 if (entry & (1 << PTE_TYPE_S)) 356 return (1); 357 358 return (0); 359} 360 361static __inline int 362pmap_is_current(pmap_t pmap) 363{ 364 365 return ((pmap == pmap_kernel()) || 366 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 367} 368 369static __inline int 370pmap_l3_valid(pt_entry_t l3) 371{ 372 373 return (l3 & PTE_VALID); 374} 375 376static __inline int 377pmap_l3_valid_cacheable(pt_entry_t l3) 378{ 379 380 /* TODO */ 381 382 return (0); 383} 384 385#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 386 387/* Checks if the page is dirty. */ 388static inline int 389pmap_page_dirty(pt_entry_t pte) 390{ 391 392 return (pte & PTE_DIRTY); 393} 394 395static __inline void 396pmap_resident_count_inc(pmap_t pmap, int count) 397{ 398 399 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 400 pmap->pm_stats.resident_count += count; 401} 402 403static __inline void 404pmap_resident_count_dec(pmap_t pmap, int count) 405{ 406 407 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 408 KASSERT(pmap->pm_stats.resident_count >= count, 409 ("pmap %p resident count underflow %ld %d", pmap, 410 pmap->pm_stats.resident_count, count)); 411 pmap->pm_stats.resident_count -= count; 412} 413 414static void 415pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index, 416 pt_entry_t entry) 417{ 418 struct pmap *user_pmap; 419 pd_entry_t *l1; 420 421 /* Distribute new kernel L1 entry to all the user pmaps */ 422 if (pmap != kernel_pmap) 423 return; 424 425 LIST_FOREACH(user_pmap, &allpmaps, pm_list) { 426 l1 = &user_pmap->pm_l1[l1index]; 427 if (entry) 428 pmap_load_store(l1, entry); 429 else 430 pmap_load_clear(l1); 431 } 432} 433 434static pt_entry_t * 435pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 436 u_int *l2_slot) 437{ 438 pt_entry_t *l2; 439 pd_entry_t *l1; 440 441 l1 = (pd_entry_t *)l1pt; 442 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 443 444 /* Check locore has used a table L1 map */ 445 KASSERT((l1[*l1_slot] & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), 446 ("Invalid bootstrap L1 table")); 447 448 /* Find the address of the L2 table */ 449 l2 = (pt_entry_t *)init_pt_va; 450 *l2_slot = pmap_l2_index(va); 451 452 return (l2); 453} 454 455static vm_paddr_t 456pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 457{ 458 u_int l1_slot, l2_slot; 459 pt_entry_t *l2; 460 u_int ret; 461 462 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 463 464 /* L2 is superpages */ 465 ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; 466 ret += (va & L2_OFFSET); 467 468 return (ret); 469} 470 471static void 472pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) 473{ 474 vm_offset_t va; 475 vm_paddr_t pa; 476 pd_entry_t *l1; 477 u_int l1_slot; 478 pt_entry_t entry; 479 pn_t pn; 480 481 pa = kernstart & ~L1_OFFSET; 482 va = DMAP_MIN_ADDRESS; 483 l1 = (pd_entry_t *)l1pt; 484 l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); 485 486 for (; va < DMAP_MAX_ADDRESS; 487 pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 488 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 489 490 /* superpages */ 491 pn = (pa / PAGE_SIZE); 492 entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); 493 entry |= (pn << PTE_PPN0_S); 494 pmap_load_store(&l1[l1_slot], entry); 495 } 496 497 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 498 cpu_tlb_flushID(); 499} 500 501static vm_offset_t 502pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 503{ 504 vm_offset_t l2pt, l3pt; 505 pt_entry_t entry; 506 pd_entry_t *l2; 507 vm_paddr_t pa; 508 u_int l2_slot; 509 pn_t pn; 510 511 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 512 513 l2 = pmap_l2(kernel_pmap, va); 514 l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); 515 l2pt = (vm_offset_t)l2; 516 l2_slot = pmap_l2_index(va); 517 l3pt = l3_start; 518 519 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 520 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 521 522 pa = pmap_early_vtophys(l1pt, l3pt); 523 pn = (pa / PAGE_SIZE); 524 entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); 525 entry |= (pn << PTE_PPN0_S); 526 pmap_load_store(&l2[l2_slot], entry); 527 l3pt += PAGE_SIZE; 528 } 529 530 /* Clean the L2 page table */ 531 memset((void *)l3_start, 0, l3pt - l3_start); 532 cpu_dcache_wb_range(l3_start, l3pt - l3_start); 533 534 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 535 536 return l3pt; 537} 538 539/* 540 * Bootstrap the system enough to run with virtual memory. 541 */ 542void 543pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) 544{ 545 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 546 uint64_t kern_delta; 547 pt_entry_t *l2; 548 vm_offset_t va, freemempos; 549 vm_offset_t dpcpu, msgbufpv; 550 vm_paddr_t pa, min_pa; 551 int i; 552 553 kern_delta = KERNBASE - kernstart; 554 physmem = 0; 555 556 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 557 printf("%lx\n", l1pt); 558 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 559 560 /* Set this early so we can use the pagetable walking functions */ 561 kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; 562 PMAP_LOCK_INIT(kernel_pmap); 563 564 /* 565 * Initialize the global pv list lock. 566 */ 567 rw_init(&pvh_global_lock, "pmap pv global"); 568 569 LIST_INIT(&allpmaps); 570 571 /* Assume the address we were loaded to is a valid physical address */ 572 min_pa = KERNBASE - kern_delta; 573 574 /* 575 * Find the minimum physical address. physmap is sorted, 576 * but may contain empty ranges. 577 */ 578 for (i = 0; i < (physmap_idx * 2); i += 2) { 579 if (physmap[i] == physmap[i + 1]) 580 continue; 581 if (physmap[i] <= min_pa) 582 min_pa = physmap[i]; 583 break; 584 } 585 586 /* Create a direct map region early so we can use it for pa -> va */ 587 pmap_bootstrap_dmap(l1pt, min_pa); 588 589 va = KERNBASE; 590 pa = KERNBASE - kern_delta; 591 592 /* 593 * Start to initialize phys_avail by copying from physmap 594 * up to the physical address KERNBASE points at. 595 */ 596 map_slot = avail_slot = 0; 597 for (; map_slot < (physmap_idx * 2); map_slot += 2) { 598 if (physmap[map_slot] == physmap[map_slot + 1]) 599 continue; 600 601 phys_avail[avail_slot] = physmap[map_slot]; 602 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 603 physmem += (phys_avail[avail_slot + 1] - 604 phys_avail[avail_slot]) >> PAGE_SHIFT; 605 avail_slot += 2; 606 } 607 608 /* Add the memory before the kernel */ 609 if (physmap[avail_slot] < pa) { 610 phys_avail[avail_slot] = physmap[map_slot]; 611 phys_avail[avail_slot + 1] = pa; 612 physmem += (phys_avail[avail_slot + 1] - 613 phys_avail[avail_slot]) >> PAGE_SHIFT; 614 avail_slot += 2; 615 } 616 used_map_slot = map_slot; 617 618 /* 619 * Read the page table to find out what is already mapped. 620 * This assumes we have mapped a block of memory from KERNBASE 621 * using a single L1 entry. 622 */ 623 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 624 625 /* Sanity check the index, KERNBASE should be the first VA */ 626 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 627 628 /* Find how many pages we have mapped */ 629 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 630 if ((l2[l2_slot] & PTE_VALID) == 0) 631 break; 632 633 /* Check locore used L2 superpages */ 634 KASSERT((l2[l2_slot] & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S), 635 ("Invalid bootstrap L2 table")); 636 637 va += L2_SIZE; 638 pa += L2_SIZE; 639 } 640 641 va = roundup2(va, L2_SIZE); 642 643 freemempos = KERNBASE + kernlen; 644 freemempos = roundup2(freemempos, PAGE_SIZE); 645 646 /* Create the l3 tables for the early devmap */ 647 freemempos = pmap_bootstrap_l3(l1pt, 648 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 649 650 cpu_tlb_flushID(); 651 652#define alloc_pages(var, np) \ 653 (var) = freemempos; \ 654 freemempos += (np * PAGE_SIZE); \ 655 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 656 657 /* Allocate dynamic per-cpu area. */ 658 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 659 dpcpu_init((void *)dpcpu, 0); 660 661 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 662 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 663 msgbufp = (void *)msgbufpv; 664 665 virtual_avail = roundup2(freemempos, L2_SIZE); 666 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 667 kernel_vm_end = virtual_avail; 668 669 pa = pmap_early_vtophys(l1pt, freemempos); 670 671 /* Finish initialising physmap */ 672 map_slot = used_map_slot; 673 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 674 map_slot < (physmap_idx * 2); map_slot += 2) { 675 if (physmap[map_slot] == physmap[map_slot + 1]) 676 continue; 677 678 /* Have we used the current range? */ 679 if (physmap[map_slot + 1] <= pa) 680 continue; 681 682 /* Do we need to split the entry? */ 683 if (physmap[map_slot] < pa) { 684 phys_avail[avail_slot] = pa; 685 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 686 } else { 687 phys_avail[avail_slot] = physmap[map_slot]; 688 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 689 } 690 physmem += (phys_avail[avail_slot + 1] - 691 phys_avail[avail_slot]) >> PAGE_SHIFT; 692 693 avail_slot += 2; 694 } 695 phys_avail[avail_slot] = 0; 696 phys_avail[avail_slot + 1] = 0; 697 698 /* 699 * Maxmem isn't the "maximum memory", it's one larger than the 700 * highest page of the physical address space. It should be 701 * called something like "Maxphyspage". 702 */ 703 Maxmem = atop(phys_avail[avail_slot - 1]); 704 705 cpu_tlb_flushID(); 706} 707 708/* 709 * Initialize a vm_page's machine-dependent fields. 710 */ 711void 712pmap_page_init(vm_page_t m) 713{ 714 715 TAILQ_INIT(&m->md.pv_list); 716 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 717} 718 719/* 720 * Initialize the pmap module. 721 * Called by vm_init, to initialize any structures that the pmap 722 * system needs to map virtual memory. 723 */ 724void 725pmap_init(void) 726{ 727 int i; 728 729 /* 730 * Initialize the pv chunk list mutex. 731 */ 732 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 733 734 /* 735 * Initialize the pool of pv list locks. 736 */ 737 for (i = 0; i < NPV_LIST_LOCKS; i++) 738 rw_init(&pv_list_locks[i], "pmap pv list"); 739} 740 741/* 742 * Normal, non-SMP, invalidation functions. 743 * We inline these within pmap.c for speed. 744 */ 745PMAP_INLINE void 746pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 747{ 748 749 /* TODO */ 750 751 sched_pin(); 752 __asm __volatile("sfence.vm"); 753 sched_unpin(); 754} 755 756PMAP_INLINE void 757pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 758{ 759 760 /* TODO */ 761 762 sched_pin(); 763 __asm __volatile("sfence.vm"); 764 sched_unpin(); 765} 766 767PMAP_INLINE void 768pmap_invalidate_all(pmap_t pmap) 769{ 770 771 /* TODO */ 772 773 sched_pin(); 774 __asm __volatile("sfence.vm"); 775 sched_unpin(); 776} 777 778/* 779 * Routine: pmap_extract 780 * Function: 781 * Extract the physical page address associated 782 * with the given map/virtual_address pair. 783 */ 784vm_paddr_t 785pmap_extract(pmap_t pmap, vm_offset_t va) 786{ 787 pd_entry_t *l2p, l2; 788 pt_entry_t *l3p, l3; 789 vm_paddr_t pa; 790 791 pa = 0; 792 PMAP_LOCK(pmap); 793 /* 794 * Start with the l2 tabel. We are unable to allocate 795 * pages in the l1 table. 796 */ 797 l2p = pmap_l2(pmap, va); 798 if (l2p != NULL) { 799 l2 = pmap_load(l2p); 800 if ((l2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S)) { 801 l3p = pmap_l2_to_l3(l2p, va); 802 if (l3p != NULL) { 803 l3 = pmap_load(l3p); 804 pa = PTE_TO_PHYS(l3); 805 pa |= (va & L3_OFFSET); 806 } 807 } else { 808 /* L2 is superpages */ 809 pa = (l2 >> PTE_PPN1_S) << L2_SHIFT; 810 pa |= (va & L2_OFFSET); 811 } 812 } 813 PMAP_UNLOCK(pmap); 814 return (pa); 815} 816 817/* 818 * Routine: pmap_extract_and_hold 819 * Function: 820 * Atomically extract and hold the physical page 821 * with the given pmap and virtual address pair 822 * if that mapping permits the given protection. 823 */ 824vm_page_t 825pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 826{ 827 pt_entry_t *l3p, l3; 828 vm_paddr_t phys; 829 vm_paddr_t pa; 830 vm_page_t m; 831 832 pa = 0; 833 m = NULL; 834 PMAP_LOCK(pmap); 835retry: 836 l3p = pmap_l3(pmap, va); 837 if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { 838 if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) { 839 phys = PTE_TO_PHYS(l3); 840 if (vm_page_pa_tryrelock(pmap, phys, &pa)) 841 goto retry; 842 m = PHYS_TO_VM_PAGE(phys); 843 vm_page_hold(m); 844 } 845 } 846 PA_UNLOCK_COND(pa); 847 PMAP_UNLOCK(pmap); 848 return (m); 849} 850 851vm_paddr_t 852pmap_kextract(vm_offset_t va) 853{ 854 pd_entry_t *l2; 855 pt_entry_t *l3; 856 vm_paddr_t pa; 857 858 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 859 pa = DMAP_TO_PHYS(va); 860 } else { 861 l2 = pmap_l2(kernel_pmap, va); 862 if (l2 == NULL) 863 panic("pmap_kextract: No l2"); 864 if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) { 865 /* superpages */ 866 pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT; 867 pa |= (va & L2_OFFSET); 868 return (pa); 869 } 870 871 l3 = pmap_l2_to_l3(l2, va); 872 if (l3 == NULL) 873 panic("pmap_kextract: No l3..."); 874 pa = PTE_TO_PHYS(pmap_load(l3)); 875 pa |= (va & PAGE_MASK); 876 } 877 return (pa); 878} 879 880/*************************************************** 881 * Low level mapping routines..... 882 ***************************************************/ 883 884void 885pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 886{ 887 pt_entry_t entry; 888 pt_entry_t *l3; 889 vm_offset_t va; 890 pn_t pn; 891 892 KASSERT((pa & L3_OFFSET) == 0, 893 ("pmap_kenter_device: Invalid physical address")); 894 KASSERT((sva & L3_OFFSET) == 0, 895 ("pmap_kenter_device: Invalid virtual address")); 896 KASSERT((size & PAGE_MASK) == 0, 897 ("pmap_kenter_device: Mapping is not page-sized")); 898 899 va = sva; 900 while (size != 0) { 901 l3 = pmap_l3(kernel_pmap, va); 902 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 903 904 pn = (pa / PAGE_SIZE); 905 entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); 906 entry |= (pn << PTE_PPN0_S); 907 pmap_load_store(l3, entry); 908 909 PTE_SYNC(l3); 910 911 va += PAGE_SIZE; 912 pa += PAGE_SIZE; 913 size -= PAGE_SIZE; 914 } 915 pmap_invalidate_range(kernel_pmap, sva, va); 916} 917 918/* 919 * Remove a page from the kernel pagetables. 920 * Note: not SMP coherent. 921 */ 922PMAP_INLINE void 923pmap_kremove(vm_offset_t va) 924{ 925 pt_entry_t *l3; 926 927 l3 = pmap_l3(kernel_pmap, va); 928 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 929 930 if (pmap_l3_valid_cacheable(pmap_load(l3))) 931 cpu_dcache_wb_range(va, L3_SIZE); 932 pmap_load_clear(l3); 933 PTE_SYNC(l3); 934 pmap_invalidate_page(kernel_pmap, va); 935} 936 937void 938pmap_kremove_device(vm_offset_t sva, vm_size_t size) 939{ 940 pt_entry_t *l3; 941 vm_offset_t va; 942 943 KASSERT((sva & L3_OFFSET) == 0, 944 ("pmap_kremove_device: Invalid virtual address")); 945 KASSERT((size & PAGE_MASK) == 0, 946 ("pmap_kremove_device: Mapping is not page-sized")); 947 948 va = sva; 949 while (size != 0) { 950 l3 = pmap_l3(kernel_pmap, va); 951 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 952 pmap_load_clear(l3); 953 PTE_SYNC(l3); 954 955 va += PAGE_SIZE; 956 size -= PAGE_SIZE; 957 } 958 pmap_invalidate_range(kernel_pmap, sva, va); 959} 960 961/* 962 * Used to map a range of physical addresses into kernel 963 * virtual address space. 964 * 965 * The value passed in '*virt' is a suggested virtual address for 966 * the mapping. Architectures which can support a direct-mapped 967 * physical to virtual region can return the appropriate address 968 * within that region, leaving '*virt' unchanged. Other 969 * architectures should map the pages starting at '*virt' and 970 * update '*virt' with the first usable address after the mapped 971 * region. 972 */ 973vm_offset_t 974pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 975{ 976 977 return PHYS_TO_DMAP(start); 978} 979 980 981/* 982 * Add a list of wired pages to the kva 983 * this routine is only used for temporary 984 * kernel mappings that do not need to have 985 * page modification or references recorded. 986 * Note that old mappings are simply written 987 * over. The page *must* be wired. 988 * Note: SMP coherent. Uses a ranged shootdown IPI. 989 */ 990void 991pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 992{ 993 pt_entry_t *l3, pa; 994 vm_offset_t va; 995 vm_page_t m; 996 pt_entry_t entry; 997 pn_t pn; 998 int i; 999 1000 va = sva; 1001 for (i = 0; i < count; i++) { 1002 m = ma[i]; 1003 pa = VM_PAGE_TO_PHYS(m); 1004 pn = (pa / PAGE_SIZE); 1005 l3 = pmap_l3(kernel_pmap, va); 1006 1007 entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); 1008 entry |= (pn << PTE_PPN0_S); 1009 pmap_load_store(l3, entry); 1010 1011 PTE_SYNC(l3); 1012 va += L3_SIZE; 1013 } 1014 pmap_invalidate_range(kernel_pmap, sva, va); 1015} 1016 1017/* 1018 * This routine tears out page mappings from the 1019 * kernel -- it is meant only for temporary mappings. 1020 * Note: SMP coherent. Uses a ranged shootdown IPI. 1021 */ 1022void 1023pmap_qremove(vm_offset_t sva, int count) 1024{ 1025 pt_entry_t *l3; 1026 vm_offset_t va; 1027 1028 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1029 1030 va = sva; 1031 while (count-- > 0) { 1032 l3 = pmap_l3(kernel_pmap, va); 1033 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 1034 1035 if (pmap_l3_valid_cacheable(pmap_load(l3))) 1036 cpu_dcache_wb_range(va, L3_SIZE); 1037 pmap_load_clear(l3); 1038 PTE_SYNC(l3); 1039 1040 va += PAGE_SIZE; 1041 } 1042 pmap_invalidate_range(kernel_pmap, sva, va); 1043} 1044 1045/*************************************************** 1046 * Page table page management routines..... 1047 ***************************************************/ 1048static __inline void 1049pmap_free_zero_pages(struct spglist *free) 1050{ 1051 vm_page_t m; 1052 1053 while ((m = SLIST_FIRST(free)) != NULL) { 1054 SLIST_REMOVE_HEAD(free, plinks.s.ss); 1055 /* Preserve the page's PG_ZERO setting. */ 1056 vm_page_free_toq(m); 1057 } 1058} 1059 1060/* 1061 * Schedule the specified unused page table page to be freed. Specifically, 1062 * add the page to the specified list of pages that will be released to the 1063 * physical memory manager after the TLB has been updated. 1064 */ 1065static __inline void 1066pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1067 boolean_t set_PG_ZERO) 1068{ 1069 1070 if (set_PG_ZERO) 1071 m->flags |= PG_ZERO; 1072 else 1073 m->flags &= ~PG_ZERO; 1074 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1075} 1076 1077/* 1078 * Decrements a page table page's wire count, which is used to record the 1079 * number of valid page table entries within the page. If the wire count 1080 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1081 * page table page was unmapped and FALSE otherwise. 1082 */ 1083static inline boolean_t 1084pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1085{ 1086 1087 --m->wire_count; 1088 if (m->wire_count == 0) { 1089 _pmap_unwire_l3(pmap, va, m, free); 1090 return (TRUE); 1091 } else { 1092 return (FALSE); 1093 } 1094} 1095 1096static void 1097_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1098{ 1099 vm_paddr_t phys; 1100 1101 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1102 /* 1103 * unmap the page table page 1104 */ 1105 if (m->pindex >= NUPDE) { 1106 /* PD page */ 1107 pd_entry_t *l1; 1108 l1 = pmap_l1(pmap, va); 1109 pmap_load_clear(l1); 1110 pmap_distribute_l1(pmap, pmap_l1_index(va), 0); 1111 PTE_SYNC(l1); 1112 } else { 1113 /* PTE page */ 1114 pd_entry_t *l2; 1115 l2 = pmap_l2(pmap, va); 1116 pmap_load_clear(l2); 1117 PTE_SYNC(l2); 1118 } 1119 pmap_resident_count_dec(pmap, 1); 1120 if (m->pindex < NUPDE) { 1121 pd_entry_t *l1; 1122 /* We just released a PT, unhold the matching PD */ 1123 vm_page_t pdpg; 1124 1125 l1 = pmap_l1(pmap, va); 1126 phys = PTE_TO_PHYS(pmap_load(l1)); 1127 pdpg = PHYS_TO_VM_PAGE(phys); 1128 pmap_unwire_l3(pmap, va, pdpg, free); 1129 } 1130 pmap_invalidate_page(pmap, va); 1131 1132 /* 1133 * This is a release store so that the ordinary store unmapping 1134 * the page table page is globally performed before TLB shoot- 1135 * down is begun. 1136 */ 1137 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1138 1139 /* 1140 * Put page on a list so that it is released after 1141 * *ALL* TLB shootdown is done 1142 */ 1143 pmap_add_delayed_free_list(m, free, TRUE); 1144} 1145 1146/* 1147 * After removing an l3 entry, this routine is used to 1148 * conditionally free the page, and manage the hold/wire counts. 1149 */ 1150static int 1151pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1152 struct spglist *free) 1153{ 1154 vm_paddr_t phys; 1155 vm_page_t mpte; 1156 1157 if (va >= VM_MAXUSER_ADDRESS) 1158 return (0); 1159 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1160 1161 phys = PTE_TO_PHYS(ptepde); 1162 1163 mpte = PHYS_TO_VM_PAGE(phys); 1164 return (pmap_unwire_l3(pmap, va, mpte, free)); 1165} 1166 1167void 1168pmap_pinit0(pmap_t pmap) 1169{ 1170 1171 PMAP_LOCK_INIT(pmap); 1172 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1173 pmap->pm_l1 = kernel_pmap->pm_l1; 1174} 1175 1176int 1177pmap_pinit(pmap_t pmap) 1178{ 1179 vm_paddr_t l1phys; 1180 vm_page_t l1pt; 1181 1182 /* 1183 * allocate the l1 page 1184 */ 1185 while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | 1186 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1187 VM_WAIT; 1188 1189 l1phys = VM_PAGE_TO_PHYS(l1pt); 1190 pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); 1191 1192 if ((l1pt->flags & PG_ZERO) == 0) 1193 pagezero(pmap->pm_l1); 1194 1195 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1196 1197 /* Install kernel pagetables */ 1198 memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); 1199 1200 /* Add to the list of all user pmaps */ 1201 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1202 1203 return (1); 1204} 1205 1206/* 1207 * This routine is called if the desired page table page does not exist. 1208 * 1209 * If page table page allocation fails, this routine may sleep before 1210 * returning NULL. It sleeps only if a lock pointer was given. 1211 * 1212 * Note: If a page allocation fails at page table level two or three, 1213 * one or two pages may be held during the wait, only to be released 1214 * afterwards. This conservative approach is easily argued to avoid 1215 * race conditions. 1216 */ 1217static vm_page_t 1218_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1219{ 1220 vm_page_t m, /*pdppg, */pdpg; 1221 pt_entry_t entry; 1222 vm_paddr_t phys; 1223 pn_t pn; 1224 1225 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1226 1227 /* 1228 * Allocate a page table page. 1229 */ 1230 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1231 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1232 if (lockp != NULL) { 1233 RELEASE_PV_LIST_LOCK(lockp); 1234 PMAP_UNLOCK(pmap); 1235 rw_runlock(&pvh_global_lock); 1236 VM_WAIT; 1237 rw_rlock(&pvh_global_lock); 1238 PMAP_LOCK(pmap); 1239 } 1240 1241 /* 1242 * Indicate the need to retry. While waiting, the page table 1243 * page may have been allocated. 1244 */ 1245 return (NULL); 1246 } 1247 1248 if ((m->flags & PG_ZERO) == 0) 1249 pmap_zero_page(m); 1250 1251 /* 1252 * Map the pagetable page into the process address space, if 1253 * it isn't already there. 1254 */ 1255 1256 if (ptepindex >= NUPDE) { 1257 pd_entry_t *l1; 1258 vm_pindex_t l1index; 1259 1260 l1index = ptepindex - NUPDE; 1261 l1 = &pmap->pm_l1[l1index]; 1262 1263 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); 1264 entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); 1265 entry |= (pn << PTE_PPN0_S); 1266 pmap_load_store(l1, entry); 1267 pmap_distribute_l1(pmap, l1index, entry); 1268 1269 PTE_SYNC(l1); 1270 1271 } else { 1272 vm_pindex_t l1index; 1273 pd_entry_t *l1, *l2; 1274 1275 l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); 1276 l1 = &pmap->pm_l1[l1index]; 1277 if (pmap_load(l1) == 0) { 1278 /* recurse for allocating page dir */ 1279 if (_pmap_alloc_l3(pmap, NUPDE + l1index, 1280 lockp) == NULL) { 1281 --m->wire_count; 1282 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1283 vm_page_free_zero(m); 1284 return (NULL); 1285 } 1286 } else { 1287 phys = PTE_TO_PHYS(pmap_load(l1)); 1288 pdpg = PHYS_TO_VM_PAGE(phys); 1289 pdpg->wire_count++; 1290 } 1291 1292 phys = PTE_TO_PHYS(pmap_load(l1)); 1293 l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); 1294 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1295 1296 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); 1297 entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); 1298 entry |= (pn << PTE_PPN0_S); 1299 pmap_load_store(l2, entry); 1300 1301 PTE_SYNC(l2); 1302 } 1303 1304 pmap_resident_count_inc(pmap, 1); 1305 1306 return (m); 1307} 1308 1309static vm_page_t 1310pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1311{ 1312 vm_pindex_t ptepindex; 1313 pd_entry_t *l2; 1314 vm_paddr_t phys; 1315 vm_page_t m; 1316 1317 /* 1318 * Calculate pagetable page index 1319 */ 1320 ptepindex = pmap_l2_pindex(va); 1321retry: 1322 /* 1323 * Get the page directory entry 1324 */ 1325 l2 = pmap_l2(pmap, va); 1326 1327 /* 1328 * If the page table page is mapped, we just increment the 1329 * hold count, and activate it. 1330 */ 1331 if (l2 != NULL && pmap_load(l2) != 0) { 1332 phys = PTE_TO_PHYS(pmap_load(l2)); 1333 m = PHYS_TO_VM_PAGE(phys); 1334 m->wire_count++; 1335 } else { 1336 /* 1337 * Here if the pte page isn't mapped, or if it has been 1338 * deallocated. 1339 */ 1340 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1341 if (m == NULL && lockp != NULL) 1342 goto retry; 1343 } 1344 return (m); 1345} 1346 1347 1348/*************************************************** 1349 * Pmap allocation/deallocation routines. 1350 ***************************************************/ 1351 1352/* 1353 * Release any resources held by the given physical map. 1354 * Called when a pmap initialized by pmap_pinit is being released. 1355 * Should only be called if the map contains no valid mappings. 1356 */ 1357void 1358pmap_release(pmap_t pmap) 1359{ 1360 vm_page_t m; 1361 1362 KASSERT(pmap->pm_stats.resident_count == 0, 1363 ("pmap_release: pmap resident count %ld != 0", 1364 pmap->pm_stats.resident_count)); 1365 1366 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); 1367 m->wire_count--; 1368 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1369 vm_page_free_zero(m); 1370 1371 /* Remove pmap from the allpmaps list */ 1372 LIST_REMOVE(pmap, pm_list); 1373 1374 /* Remove kernel pagetables */ 1375 bzero(pmap->pm_l1, PAGE_SIZE); 1376} 1377 1378#if 0 1379static int 1380kvm_size(SYSCTL_HANDLER_ARGS) 1381{ 1382 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1383 1384 return sysctl_handle_long(oidp, &ksize, 0, req); 1385} 1386SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1387 0, 0, kvm_size, "LU", "Size of KVM"); 1388 1389static int 1390kvm_free(SYSCTL_HANDLER_ARGS) 1391{ 1392 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1393 1394 return sysctl_handle_long(oidp, &kfree, 0, req); 1395} 1396SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1397 0, 0, kvm_free, "LU", "Amount of KVM free"); 1398#endif /* 0 */ 1399 1400/* 1401 * grow the number of kernel page table entries, if needed 1402 */ 1403void 1404pmap_growkernel(vm_offset_t addr) 1405{ 1406 vm_paddr_t paddr; 1407 vm_page_t nkpg; 1408 pd_entry_t *l1, *l2; 1409 pt_entry_t entry; 1410 pn_t pn; 1411 1412 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1413 1414 addr = roundup2(addr, L2_SIZE); 1415 if (addr - 1 >= vm_map_max(kernel_map)) 1416 addr = vm_map_max(kernel_map); 1417 while (kernel_vm_end < addr) { 1418 l1 = pmap_l1(kernel_pmap, kernel_vm_end); 1419 if (pmap_load(l1) == 0) { 1420 /* We need a new PDP entry */ 1421 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1422 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1423 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1424 if (nkpg == NULL) 1425 panic("pmap_growkernel: no memory to grow kernel"); 1426 if ((nkpg->flags & PG_ZERO) == 0) 1427 pmap_zero_page(nkpg); 1428 paddr = VM_PAGE_TO_PHYS(nkpg); 1429 1430 pn = (paddr / PAGE_SIZE); 1431 entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); 1432 entry |= (pn << PTE_PPN0_S); 1433 pmap_load_store(l1, entry); 1434 pmap_distribute_l1(kernel_pmap, 1435 pmap_l1_index(kernel_vm_end), entry); 1436 1437 PTE_SYNC(l1); 1438 continue; /* try again */ 1439 } 1440 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1441 if ((pmap_load(l2) & PTE_REF) != 0) { 1442 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1443 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1444 kernel_vm_end = vm_map_max(kernel_map); 1445 break; 1446 } 1447 continue; 1448 } 1449 1450 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1451 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1452 VM_ALLOC_ZERO); 1453 if (nkpg == NULL) 1454 panic("pmap_growkernel: no memory to grow kernel"); 1455 if ((nkpg->flags & PG_ZERO) == 0) 1456 pmap_zero_page(nkpg); 1457 paddr = VM_PAGE_TO_PHYS(nkpg); 1458 1459 pn = (paddr / PAGE_SIZE); 1460 entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); 1461 entry |= (pn << PTE_PPN0_S); 1462 pmap_load_store(l2, entry); 1463 1464 PTE_SYNC(l2); 1465 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1466 1467 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1468 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1469 kernel_vm_end = vm_map_max(kernel_map); 1470 break; 1471 } 1472 } 1473} 1474 1475 1476/*************************************************** 1477 * page management routines. 1478 ***************************************************/ 1479 1480CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1481CTASSERT(_NPCM == 3); 1482CTASSERT(_NPCPV == 168); 1483 1484static __inline struct pv_chunk * 1485pv_to_chunk(pv_entry_t pv) 1486{ 1487 1488 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1489} 1490 1491#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1492 1493#define PC_FREE0 0xfffffffffffffffful 1494#define PC_FREE1 0xfffffffffffffffful 1495#define PC_FREE2 0x000000fffffffffful 1496 1497static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1498 1499#if 0 1500#ifdef PV_STATS 1501static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1502 1503SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1504 "Current number of pv entry chunks"); 1505SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1506 "Current number of pv entry chunks allocated"); 1507SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1508 "Current number of pv entry chunks frees"); 1509SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1510 "Number of times tried to get a chunk page but failed."); 1511 1512static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1513static int pv_entry_spare; 1514 1515SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1516 "Current number of pv entry frees"); 1517SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1518 "Current number of pv entry allocs"); 1519SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1520 "Current number of pv entries"); 1521SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1522 "Current number of spare pv entries"); 1523#endif 1524#endif /* 0 */ 1525 1526/* 1527 * We are in a serious low memory condition. Resort to 1528 * drastic measures to free some pages so we can allocate 1529 * another pv entry chunk. 1530 * 1531 * Returns NULL if PV entries were reclaimed from the specified pmap. 1532 * 1533 * We do not, however, unmap 2mpages because subsequent accesses will 1534 * allocate per-page pv entries until repromotion occurs, thereby 1535 * exacerbating the shortage of free pv entries. 1536 */ 1537static vm_page_t 1538reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1539{ 1540 1541 panic("RISCVTODO: reclaim_pv_chunk"); 1542} 1543 1544/* 1545 * free the pv_entry back to the free list 1546 */ 1547static void 1548free_pv_entry(pmap_t pmap, pv_entry_t pv) 1549{ 1550 struct pv_chunk *pc; 1551 int idx, field, bit; 1552 1553 rw_assert(&pvh_global_lock, RA_LOCKED); 1554 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1555 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1556 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1557 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1558 pc = pv_to_chunk(pv); 1559 idx = pv - &pc->pc_pventry[0]; 1560 field = idx / 64; 1561 bit = idx % 64; 1562 pc->pc_map[field] |= 1ul << bit; 1563 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1564 pc->pc_map[2] != PC_FREE2) { 1565 /* 98% of the time, pc is already at the head of the list. */ 1566 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1567 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1568 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1569 } 1570 return; 1571 } 1572 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1573 free_pv_chunk(pc); 1574} 1575 1576static void 1577free_pv_chunk(struct pv_chunk *pc) 1578{ 1579 vm_page_t m; 1580 1581 mtx_lock(&pv_chunks_mutex); 1582 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1583 mtx_unlock(&pv_chunks_mutex); 1584 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1585 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1586 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1587 /* entire chunk is free, return it */ 1588 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1589#if 0 /* TODO: For minidump */ 1590 dump_drop_page(m->phys_addr); 1591#endif 1592 vm_page_unwire(m, PQ_NONE); 1593 vm_page_free(m); 1594} 1595 1596/* 1597 * Returns a new PV entry, allocating a new PV chunk from the system when 1598 * needed. If this PV chunk allocation fails and a PV list lock pointer was 1599 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1600 * returned. 1601 * 1602 * The given PV list lock may be released. 1603 */ 1604static pv_entry_t 1605get_pv_entry(pmap_t pmap, struct rwlock **lockp) 1606{ 1607 int bit, field; 1608 pv_entry_t pv; 1609 struct pv_chunk *pc; 1610 vm_page_t m; 1611 1612 rw_assert(&pvh_global_lock, RA_LOCKED); 1613 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1614 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1615retry: 1616 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1617 if (pc != NULL) { 1618 for (field = 0; field < _NPCM; field++) { 1619 if (pc->pc_map[field]) { 1620 bit = ffsl(pc->pc_map[field]) - 1; 1621 break; 1622 } 1623 } 1624 if (field < _NPCM) { 1625 pv = &pc->pc_pventry[field * 64 + bit]; 1626 pc->pc_map[field] &= ~(1ul << bit); 1627 /* If this was the last item, move it to tail */ 1628 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1629 pc->pc_map[2] == 0) { 1630 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1631 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1632 pc_list); 1633 } 1634 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1635 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1636 return (pv); 1637 } 1638 } 1639 /* No free items, allocate another chunk */ 1640 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1641 VM_ALLOC_WIRED); 1642 if (m == NULL) { 1643 if (lockp == NULL) { 1644 PV_STAT(pc_chunk_tryfail++); 1645 return (NULL); 1646 } 1647 m = reclaim_pv_chunk(pmap, lockp); 1648 if (m == NULL) 1649 goto retry; 1650 } 1651 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1652 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1653#if 0 /* TODO: This is for minidump */ 1654 dump_add_page(m->phys_addr); 1655#endif 1656 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1657 pc->pc_pmap = pmap; 1658 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1659 pc->pc_map[1] = PC_FREE1; 1660 pc->pc_map[2] = PC_FREE2; 1661 mtx_lock(&pv_chunks_mutex); 1662 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1663 mtx_unlock(&pv_chunks_mutex); 1664 pv = &pc->pc_pventry[0]; 1665 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1666 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1667 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1668 return (pv); 1669} 1670 1671/* 1672 * First find and then remove the pv entry for the specified pmap and virtual 1673 * address from the specified pv list. Returns the pv entry if found and NULL 1674 * otherwise. This operation can be performed on pv lists for either 4KB or 1675 * 2MB page mappings. 1676 */ 1677static __inline pv_entry_t 1678pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1679{ 1680 pv_entry_t pv; 1681 1682 rw_assert(&pvh_global_lock, RA_LOCKED); 1683 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1684 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1685 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1686 pvh->pv_gen++; 1687 break; 1688 } 1689 } 1690 return (pv); 1691} 1692 1693/* 1694 * First find and then destroy the pv entry for the specified pmap and virtual 1695 * address. This operation can be performed on pv lists for either 4KB or 2MB 1696 * page mappings. 1697 */ 1698static void 1699pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1700{ 1701 pv_entry_t pv; 1702 1703 pv = pmap_pvh_remove(pvh, pmap, va); 1704 1705 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1706 free_pv_entry(pmap, pv); 1707} 1708 1709/* 1710 * Conditionally create the PV entry for a 4KB page mapping if the required 1711 * memory can be allocated without resorting to reclamation. 1712 */ 1713static boolean_t 1714pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1715 struct rwlock **lockp) 1716{ 1717 pv_entry_t pv; 1718 1719 rw_assert(&pvh_global_lock, RA_LOCKED); 1720 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1721 /* Pass NULL instead of the lock pointer to disable reclamation. */ 1722 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1723 pv->pv_va = va; 1724 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1725 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1726 m->md.pv_gen++; 1727 return (TRUE); 1728 } else 1729 return (FALSE); 1730} 1731 1732/* 1733 * pmap_remove_l3: do the things to unmap a page in a process 1734 */ 1735static int 1736pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1737 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1738{ 1739 pt_entry_t old_l3; 1740 vm_paddr_t phys; 1741 vm_page_t m; 1742 1743 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1744 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1745 cpu_dcache_wb_range(va, L3_SIZE); 1746 old_l3 = pmap_load_clear(l3); 1747 PTE_SYNC(l3); 1748 pmap_invalidate_page(pmap, va); 1749 if (old_l3 & PTE_SW_WIRED) 1750 pmap->pm_stats.wired_count -= 1; 1751 pmap_resident_count_dec(pmap, 1); 1752 if (old_l3 & PTE_SW_MANAGED) { 1753 phys = PTE_TO_PHYS(old_l3); 1754 m = PHYS_TO_VM_PAGE(phys); 1755 if (pmap_page_dirty(old_l3)) 1756 vm_page_dirty(m); 1757 if (old_l3 & PTE_REF) 1758 vm_page_aflag_set(m, PGA_REFERENCED); 1759 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1760 pmap_pvh_free(&m->md, pmap, va); 1761 } 1762 1763 return (pmap_unuse_l3(pmap, va, l2e, free)); 1764} 1765 1766/* 1767 * Remove the given range of addresses from the specified map. 1768 * 1769 * It is assumed that the start and end are properly 1770 * rounded to the page size. 1771 */ 1772void 1773pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1774{ 1775 struct rwlock *lock; 1776 vm_offset_t va, va_next; 1777 pd_entry_t *l1, *l2; 1778 pt_entry_t l3_pte, *l3; 1779 struct spglist free; 1780 int anyvalid; 1781 1782 /* 1783 * Perform an unsynchronized read. This is, however, safe. 1784 */ 1785 if (pmap->pm_stats.resident_count == 0) 1786 return; 1787 1788 anyvalid = 0; 1789 SLIST_INIT(&free); 1790 1791 rw_rlock(&pvh_global_lock); 1792 PMAP_LOCK(pmap); 1793 1794 lock = NULL; 1795 for (; sva < eva; sva = va_next) { 1796 if (pmap->pm_stats.resident_count == 0) 1797 break; 1798 1799 l1 = pmap_l1(pmap, sva); 1800 if (pmap_load(l1) == 0) { 1801 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1802 if (va_next < sva) 1803 va_next = eva; 1804 continue; 1805 } 1806 1807 /* 1808 * Calculate index for next page table. 1809 */ 1810 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1811 if (va_next < sva) 1812 va_next = eva; 1813 1814 l2 = pmap_l1_to_l2(l1, sva); 1815 if (l2 == NULL) 1816 continue; 1817 1818 l3_pte = pmap_load(l2); 1819 1820 /* 1821 * Weed out invalid mappings. 1822 */ 1823 if (l3_pte == 0) 1824 continue; 1825 if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) 1826 continue; 1827 1828 /* 1829 * Limit our scan to either the end of the va represented 1830 * by the current page table page, or to the end of the 1831 * range being removed. 1832 */ 1833 if (va_next > eva) 1834 va_next = eva; 1835 1836 va = va_next; 1837 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 1838 sva += L3_SIZE) { 1839 if (l3 == NULL) 1840 panic("l3 == NULL"); 1841 if (pmap_load(l3) == 0) { 1842 if (va != va_next) { 1843 pmap_invalidate_range(pmap, va, sva); 1844 va = va_next; 1845 } 1846 continue; 1847 } 1848 if (va == va_next) 1849 va = sva; 1850 if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free, 1851 &lock)) { 1852 sva += L3_SIZE; 1853 break; 1854 } 1855 } 1856 if (va != va_next) 1857 pmap_invalidate_range(pmap, va, sva); 1858 } 1859 if (lock != NULL) 1860 rw_wunlock(lock); 1861 if (anyvalid) 1862 pmap_invalidate_all(pmap); 1863 rw_runlock(&pvh_global_lock); 1864 PMAP_UNLOCK(pmap); 1865 pmap_free_zero_pages(&free); 1866} 1867 1868/* 1869 * Routine: pmap_remove_all 1870 * Function: 1871 * Removes this physical page from 1872 * all physical maps in which it resides. 1873 * Reflects back modify bits to the pager. 1874 * 1875 * Notes: 1876 * Original versions of this routine were very 1877 * inefficient because they iteratively called 1878 * pmap_remove (slow...) 1879 */ 1880 1881void 1882pmap_remove_all(vm_page_t m) 1883{ 1884 pv_entry_t pv; 1885 pmap_t pmap; 1886 pt_entry_t *l3, tl3; 1887 pd_entry_t *l2, tl2; 1888 struct spglist free; 1889 1890 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1891 ("pmap_remove_all: page %p is not managed", m)); 1892 SLIST_INIT(&free); 1893 rw_wlock(&pvh_global_lock); 1894 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1895 pmap = PV_PMAP(pv); 1896 PMAP_LOCK(pmap); 1897 pmap_resident_count_dec(pmap, 1); 1898 l2 = pmap_l2(pmap, pv->pv_va); 1899 KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); 1900 tl2 = pmap_load(l2); 1901 1902 KASSERT((tl2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), 1903 ("pmap_remove_all: found a table when expecting " 1904 "a block in %p's pv list", m)); 1905 1906 l3 = pmap_l2_to_l3(l2, pv->pv_va); 1907 if (pmap_is_current(pmap) && 1908 pmap_l3_valid_cacheable(pmap_load(l3))) 1909 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 1910 tl3 = pmap_load_clear(l3); 1911 PTE_SYNC(l3); 1912 pmap_invalidate_page(pmap, pv->pv_va); 1913 if (tl3 & PTE_SW_WIRED) 1914 pmap->pm_stats.wired_count--; 1915 if ((tl3 & PTE_REF) != 0) 1916 vm_page_aflag_set(m, PGA_REFERENCED); 1917 1918 /* 1919 * Update the vm_page_t clean and reference bits. 1920 */ 1921 if (pmap_page_dirty(tl3)) 1922 vm_page_dirty(m); 1923 pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free); 1924 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1925 m->md.pv_gen++; 1926 free_pv_entry(pmap, pv); 1927 PMAP_UNLOCK(pmap); 1928 } 1929 vm_page_aflag_clear(m, PGA_WRITEABLE); 1930 rw_wunlock(&pvh_global_lock); 1931 pmap_free_zero_pages(&free); 1932} 1933 1934/* 1935 * Set the physical protection on the 1936 * specified range of this map as requested. 1937 */ 1938void 1939pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1940{ 1941 vm_offset_t va, va_next; 1942 pd_entry_t *l1, *l2; 1943 pt_entry_t *l3p, l3; 1944 pt_entry_t entry; 1945 1946 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1947 pmap_remove(pmap, sva, eva); 1948 return; 1949 } 1950 1951 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 1952 return; 1953 1954 PMAP_LOCK(pmap); 1955 for (; sva < eva; sva = va_next) { 1956 1957 l1 = pmap_l1(pmap, sva); 1958 if (pmap_load(l1) == 0) { 1959 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1960 if (va_next < sva) 1961 va_next = eva; 1962 continue; 1963 } 1964 1965 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1966 if (va_next < sva) 1967 va_next = eva; 1968 1969 l2 = pmap_l1_to_l2(l1, sva); 1970 if (l2 == NULL) 1971 continue; 1972 if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) 1973 continue; 1974 1975 if (va_next > eva) 1976 va_next = eva; 1977 1978 va = va_next; 1979 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 1980 sva += L3_SIZE) { 1981 l3 = pmap_load(l3p); 1982 if (pmap_l3_valid(l3)) { 1983 entry = pmap_load(l3p); 1984 entry &= ~(1 << PTE_TYPE_S); 1985 pmap_load_store(l3p, entry); 1986 PTE_SYNC(l3p); 1987 /* XXX: Use pmap_invalidate_range */ 1988 pmap_invalidate_page(pmap, va); 1989 } 1990 } 1991 } 1992 PMAP_UNLOCK(pmap); 1993 1994 /* TODO: Only invalidate entries we are touching */ 1995 pmap_invalidate_all(pmap); 1996} 1997 1998/* 1999 * Insert the given physical page (p) at 2000 * the specified virtual address (v) in the 2001 * target physical map with the protection requested. 2002 * 2003 * If specified, the page will be wired down, meaning 2004 * that the related pte can not be reclaimed. 2005 * 2006 * NB: This is the only routine which MAY NOT lazy-evaluate 2007 * or lose information. That is, this routine must actually 2008 * insert this page into the given map NOW. 2009 */ 2010int 2011pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2012 u_int flags, int8_t psind __unused) 2013{ 2014 struct rwlock *lock; 2015 pd_entry_t *l1, *l2; 2016 pt_entry_t new_l3, orig_l3; 2017 pt_entry_t *l3; 2018 pv_entry_t pv; 2019 vm_paddr_t opa, pa, l2_pa, l3_pa; 2020 vm_page_t mpte, om, l2_m, l3_m; 2021 boolean_t nosleep; 2022 pt_entry_t entry; 2023 pn_t l2_pn; 2024 pn_t l3_pn; 2025 pn_t pn; 2026 2027 va = trunc_page(va); 2028 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2029 VM_OBJECT_ASSERT_LOCKED(m->object); 2030 pa = VM_PAGE_TO_PHYS(m); 2031 pn = (pa / PAGE_SIZE); 2032 2033 new_l3 = PTE_VALID; 2034 2035 if ((prot & VM_PROT_WRITE) == 0) { /* Read-only */ 2036 if ((va >> 63) == 0) /* USER */ 2037 new_l3 |= (PTE_TYPE_SURX << PTE_TYPE_S); 2038 else /* KERNEL */ 2039 new_l3 |= (PTE_TYPE_SRX << PTE_TYPE_S); 2040 } else { 2041 if ((va >> 63) == 0) /* USER */ 2042 new_l3 |= (PTE_TYPE_SURWX << PTE_TYPE_S); 2043 else /* KERNEL */ 2044 new_l3 |= (PTE_TYPE_SRWX << PTE_TYPE_S); 2045 } 2046 2047 new_l3 |= (pn << PTE_PPN0_S); 2048 if ((flags & PMAP_ENTER_WIRED) != 0) 2049 new_l3 |= PTE_SW_WIRED; 2050 2051 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2052 2053 mpte = NULL; 2054 2055 lock = NULL; 2056 rw_rlock(&pvh_global_lock); 2057 PMAP_LOCK(pmap); 2058 2059 if (va < VM_MAXUSER_ADDRESS) { 2060 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2061 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2062 if (mpte == NULL && nosleep) { 2063 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2064 if (lock != NULL) 2065 rw_wunlock(lock); 2066 rw_runlock(&pvh_global_lock); 2067 PMAP_UNLOCK(pmap); 2068 return (KERN_RESOURCE_SHORTAGE); 2069 } 2070 l3 = pmap_l3(pmap, va); 2071 } else { 2072 l3 = pmap_l3(pmap, va); 2073 /* TODO: This is not optimal, but should mostly work */ 2074 if (l3 == NULL) { 2075 l2 = pmap_l2(pmap, va); 2076 if (l2 == NULL) { 2077 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2078 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2079 VM_ALLOC_ZERO); 2080 if (l2_m == NULL) 2081 panic("pmap_enter: l2 pte_m == NULL"); 2082 if ((l2_m->flags & PG_ZERO) == 0) 2083 pmap_zero_page(l2_m); 2084 2085 l2_pa = VM_PAGE_TO_PHYS(l2_m); 2086 l2_pn = (l2_pa / PAGE_SIZE); 2087 2088 l1 = pmap_l1(pmap, va); 2089 entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); 2090 entry |= (l2_pn << PTE_PPN0_S); 2091 pmap_load_store(l1, entry); 2092 pmap_distribute_l1(pmap, pmap_l1_index(va), entry); 2093 PTE_SYNC(l1); 2094 2095 l2 = pmap_l1_to_l2(l1, va); 2096 } 2097 2098 KASSERT(l2 != NULL, 2099 ("No l2 table after allocating one")); 2100 2101 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2102 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2103 if (l3_m == NULL) 2104 panic("pmap_enter: l3 pte_m == NULL"); 2105 if ((l3_m->flags & PG_ZERO) == 0) 2106 pmap_zero_page(l3_m); 2107 2108 l3_pa = VM_PAGE_TO_PHYS(l3_m); 2109 l3_pn = (l3_pa / PAGE_SIZE); 2110 entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); 2111 entry |= (l3_pn << PTE_PPN0_S); 2112 pmap_load_store(l2, entry); 2113 PTE_SYNC(l2); 2114 l3 = pmap_l2_to_l3(l2, va); 2115 } 2116 pmap_invalidate_page(pmap, va); 2117 } 2118 2119 om = NULL; 2120 orig_l3 = pmap_load(l3); 2121 opa = PTE_TO_PHYS(orig_l3); 2122 2123 /* 2124 * Is the specified virtual address already mapped? 2125 */ 2126 if (pmap_l3_valid(orig_l3)) { 2127 /* 2128 * Wiring change, just update stats. We don't worry about 2129 * wiring PT pages as they remain resident as long as there 2130 * are valid mappings in them. Hence, if a user page is wired, 2131 * the PT page will be also. 2132 */ 2133 if ((flags & PMAP_ENTER_WIRED) != 0 && 2134 (orig_l3 & PTE_SW_WIRED) == 0) 2135 pmap->pm_stats.wired_count++; 2136 else if ((flags & PMAP_ENTER_WIRED) == 0 && 2137 (orig_l3 & PTE_SW_WIRED) != 0) 2138 pmap->pm_stats.wired_count--; 2139 2140 /* 2141 * Remove the extra PT page reference. 2142 */ 2143 if (mpte != NULL) { 2144 mpte->wire_count--; 2145 KASSERT(mpte->wire_count > 0, 2146 ("pmap_enter: missing reference to page table page," 2147 " va: 0x%lx", va)); 2148 } 2149 2150 /* 2151 * Has the physical page changed? 2152 */ 2153 if (opa == pa) { 2154 /* 2155 * No, might be a protection or wiring change. 2156 */ 2157 if ((orig_l3 & PTE_SW_MANAGED) != 0) { 2158 new_l3 |= PTE_SW_MANAGED; 2159 if (pmap_is_write(new_l3)) 2160 vm_page_aflag_set(m, PGA_WRITEABLE); 2161 } 2162 goto validate; 2163 } 2164 2165 /* Flush the cache, there might be uncommitted data in it */ 2166 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2167 cpu_dcache_wb_range(va, L3_SIZE); 2168 } else { 2169 /* 2170 * Increment the counters. 2171 */ 2172 if ((new_l3 & PTE_SW_WIRED) != 0) 2173 pmap->pm_stats.wired_count++; 2174 pmap_resident_count_inc(pmap, 1); 2175 } 2176 /* 2177 * Enter on the PV list if part of our managed memory. 2178 */ 2179 if ((m->oflags & VPO_UNMANAGED) == 0) { 2180 new_l3 |= PTE_SW_MANAGED; 2181 pv = get_pv_entry(pmap, &lock); 2182 pv->pv_va = va; 2183 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2184 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2185 m->md.pv_gen++; 2186 if (pmap_is_write(new_l3)) 2187 vm_page_aflag_set(m, PGA_WRITEABLE); 2188 } 2189 2190 /* 2191 * Update the L3 entry. 2192 */ 2193 if (orig_l3 != 0) { 2194validate: 2195 orig_l3 = pmap_load_store(l3, new_l3); 2196 PTE_SYNC(l3); 2197 opa = PTE_TO_PHYS(orig_l3); 2198 2199 if (opa != pa) { 2200 if ((orig_l3 & PTE_SW_MANAGED) != 0) { 2201 om = PHYS_TO_VM_PAGE(opa); 2202 if (pmap_page_dirty(orig_l3)) 2203 vm_page_dirty(om); 2204 if ((orig_l3 & PTE_REF) != 0) 2205 vm_page_aflag_set(om, PGA_REFERENCED); 2206 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2207 pmap_pvh_free(&om->md, pmap, va); 2208 } 2209 } else if (pmap_page_dirty(orig_l3)) { 2210 if ((orig_l3 & PTE_SW_MANAGED) != 0) 2211 vm_page_dirty(m); 2212 } 2213 } else { 2214 pmap_load_store(l3, new_l3); 2215 PTE_SYNC(l3); 2216 } 2217 pmap_invalidate_page(pmap, va); 2218 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2219 cpu_icache_sync_range(va, PAGE_SIZE); 2220 2221 if (lock != NULL) 2222 rw_wunlock(lock); 2223 rw_runlock(&pvh_global_lock); 2224 PMAP_UNLOCK(pmap); 2225 return (KERN_SUCCESS); 2226} 2227 2228/* 2229 * Maps a sequence of resident pages belonging to the same object. 2230 * The sequence begins with the given page m_start. This page is 2231 * mapped at the given virtual address start. Each subsequent page is 2232 * mapped at a virtual address that is offset from start by the same 2233 * amount as the page is offset from m_start within the object. The 2234 * last page in the sequence is the page with the largest offset from 2235 * m_start that can be mapped at a virtual address less than the given 2236 * virtual address end. Not every virtual page between start and end 2237 * is mapped; only those for which a resident page exists with the 2238 * corresponding offset from m_start are mapped. 2239 */ 2240void 2241pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2242 vm_page_t m_start, vm_prot_t prot) 2243{ 2244 struct rwlock *lock; 2245 vm_offset_t va; 2246 vm_page_t m, mpte; 2247 vm_pindex_t diff, psize; 2248 2249 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2250 2251 psize = atop(end - start); 2252 mpte = NULL; 2253 m = m_start; 2254 lock = NULL; 2255 rw_rlock(&pvh_global_lock); 2256 PMAP_LOCK(pmap); 2257 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2258 va = start + ptoa(diff); 2259 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2260 m = TAILQ_NEXT(m, listq); 2261 } 2262 if (lock != NULL) 2263 rw_wunlock(lock); 2264 rw_runlock(&pvh_global_lock); 2265 PMAP_UNLOCK(pmap); 2266} 2267 2268/* 2269 * this code makes some *MAJOR* assumptions: 2270 * 1. Current pmap & pmap exists. 2271 * 2. Not wired. 2272 * 3. Read access. 2273 * 4. No page table pages. 2274 * but is *MUCH* faster than pmap_enter... 2275 */ 2276 2277void 2278pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2279{ 2280 struct rwlock *lock; 2281 2282 lock = NULL; 2283 rw_rlock(&pvh_global_lock); 2284 PMAP_LOCK(pmap); 2285 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2286 if (lock != NULL) 2287 rw_wunlock(lock); 2288 rw_runlock(&pvh_global_lock); 2289 PMAP_UNLOCK(pmap); 2290} 2291 2292static vm_page_t 2293pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2294 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2295{ 2296 struct spglist free; 2297 vm_paddr_t phys; 2298 pd_entry_t *l2; 2299 pt_entry_t *l3; 2300 vm_paddr_t pa; 2301 pt_entry_t entry; 2302 pn_t pn; 2303 2304 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2305 (m->oflags & VPO_UNMANAGED) != 0, 2306 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2307 rw_assert(&pvh_global_lock, RA_LOCKED); 2308 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2309 2310 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 2311 /* 2312 * In the case that a page table page is not 2313 * resident, we are creating it here. 2314 */ 2315 if (va < VM_MAXUSER_ADDRESS) { 2316 vm_pindex_t l2pindex; 2317 2318 /* 2319 * Calculate pagetable page index 2320 */ 2321 l2pindex = pmap_l2_pindex(va); 2322 if (mpte && (mpte->pindex == l2pindex)) { 2323 mpte->wire_count++; 2324 } else { 2325 /* 2326 * Get the l2 entry 2327 */ 2328 l2 = pmap_l2(pmap, va); 2329 2330 /* 2331 * If the page table page is mapped, we just increment 2332 * the hold count, and activate it. Otherwise, we 2333 * attempt to allocate a page table page. If this 2334 * attempt fails, we don't retry. Instead, we give up. 2335 */ 2336 if (l2 != NULL && pmap_load(l2) != 0) { 2337 phys = PTE_TO_PHYS(pmap_load(l2)); 2338 mpte = PHYS_TO_VM_PAGE(phys); 2339 mpte->wire_count++; 2340 } else { 2341 /* 2342 * Pass NULL instead of the PV list lock 2343 * pointer, because we don't intend to sleep. 2344 */ 2345 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2346 if (mpte == NULL) 2347 return (mpte); 2348 } 2349 } 2350 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2351 l3 = &l3[pmap_l3_index(va)]; 2352 } else { 2353 mpte = NULL; 2354 l3 = pmap_l3(kernel_pmap, va); 2355 } 2356 if (l3 == NULL) 2357 panic("pmap_enter_quick_locked: No l3"); 2358 if (pmap_load(l3) != 0) { 2359 if (mpte != NULL) { 2360 mpte->wire_count--; 2361 mpte = NULL; 2362 } 2363 return (mpte); 2364 } 2365 2366 /* 2367 * Enter on the PV list if part of our managed memory. 2368 */ 2369 if ((m->oflags & VPO_UNMANAGED) == 0 && 2370 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2371 if (mpte != NULL) { 2372 SLIST_INIT(&free); 2373 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2374 pmap_invalidate_page(pmap, va); 2375 pmap_free_zero_pages(&free); 2376 } 2377 mpte = NULL; 2378 } 2379 return (mpte); 2380 } 2381 2382 /* 2383 * Increment counters 2384 */ 2385 pmap_resident_count_inc(pmap, 1); 2386 2387 pa = VM_PAGE_TO_PHYS(m); 2388 pn = (pa / PAGE_SIZE); 2389 2390 /* RISCVTODO: check permissions */ 2391 entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); 2392 entry |= (pn << PTE_PPN0_S); 2393 2394 /* 2395 * Now validate mapping with RO protection 2396 */ 2397 if ((m->oflags & VPO_UNMANAGED) == 0) 2398 entry |= PTE_SW_MANAGED; 2399 pmap_load_store(l3, entry); 2400 2401 PTE_SYNC(l3); 2402 pmap_invalidate_page(pmap, va); 2403 return (mpte); 2404} 2405 2406/* 2407 * This code maps large physical mmap regions into the 2408 * processor address space. Note that some shortcuts 2409 * are taken, but the code works. 2410 */ 2411void 2412pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2413 vm_pindex_t pindex, vm_size_t size) 2414{ 2415 2416 VM_OBJECT_ASSERT_WLOCKED(object); 2417 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2418 ("pmap_object_init_pt: non-device object")); 2419} 2420 2421/* 2422 * Clear the wired attribute from the mappings for the specified range of 2423 * addresses in the given pmap. Every valid mapping within that range 2424 * must have the wired attribute set. In contrast, invalid mappings 2425 * cannot have the wired attribute set, so they are ignored. 2426 * 2427 * The wired attribute of the page table entry is not a hardware feature, 2428 * so there is no need to invalidate any TLB entries. 2429 */ 2430void 2431pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2432{ 2433 vm_offset_t va_next; 2434 pd_entry_t *l1, *l2; 2435 pt_entry_t *l3; 2436 boolean_t pv_lists_locked; 2437 2438 pv_lists_locked = FALSE; 2439 PMAP_LOCK(pmap); 2440 for (; sva < eva; sva = va_next) { 2441 l1 = pmap_l1(pmap, sva); 2442 if (pmap_load(l1) == 0) { 2443 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2444 if (va_next < sva) 2445 va_next = eva; 2446 continue; 2447 } 2448 2449 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2450 if (va_next < sva) 2451 va_next = eva; 2452 2453 l2 = pmap_l1_to_l2(l1, sva); 2454 if (pmap_load(l2) == 0) 2455 continue; 2456 2457 if (va_next > eva) 2458 va_next = eva; 2459 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2460 sva += L3_SIZE) { 2461 if (pmap_load(l3) == 0) 2462 continue; 2463 if ((pmap_load(l3) & PTE_SW_WIRED) == 0) 2464 panic("pmap_unwire: l3 %#jx is missing " 2465 "PTE_SW_WIRED", (uintmax_t)*l3); 2466 2467 /* 2468 * PG_W must be cleared atomically. Although the pmap 2469 * lock synchronizes access to PG_W, another processor 2470 * could be setting PG_M and/or PG_A concurrently. 2471 */ 2472 atomic_clear_long(l3, PTE_SW_WIRED); 2473 pmap->pm_stats.wired_count--; 2474 } 2475 } 2476 if (pv_lists_locked) 2477 rw_runlock(&pvh_global_lock); 2478 PMAP_UNLOCK(pmap); 2479} 2480 2481/* 2482 * Copy the range specified by src_addr/len 2483 * from the source map to the range dst_addr/len 2484 * in the destination map. 2485 * 2486 * This routine is only advisory and need not do anything. 2487 */ 2488 2489void 2490pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2491 vm_offset_t src_addr) 2492{ 2493 2494} 2495 2496/* 2497 * pmap_zero_page zeros the specified hardware page by mapping 2498 * the page into KVM and using bzero to clear its contents. 2499 */ 2500void 2501pmap_zero_page(vm_page_t m) 2502{ 2503 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2504 2505 pagezero((void *)va); 2506} 2507 2508/* 2509 * pmap_zero_page_area zeros the specified hardware page by mapping 2510 * the page into KVM and using bzero to clear its contents. 2511 * 2512 * off and size may not cover an area beyond a single hardware page. 2513 */ 2514void 2515pmap_zero_page_area(vm_page_t m, int off, int size) 2516{ 2517 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2518 2519 if (off == 0 && size == PAGE_SIZE) 2520 pagezero((void *)va); 2521 else 2522 bzero((char *)va + off, size); 2523} 2524 2525/* 2526 * pmap_zero_page_idle zeros the specified hardware page by mapping 2527 * the page into KVM and using bzero to clear its contents. This 2528 * is intended to be called from the vm_pagezero process only and 2529 * outside of Giant. 2530 */ 2531void 2532pmap_zero_page_idle(vm_page_t m) 2533{ 2534 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2535 2536 pagezero((void *)va); 2537} 2538 2539/* 2540 * pmap_copy_page copies the specified (machine independent) 2541 * page by mapping the page into virtual memory and using 2542 * bcopy to copy the page, one machine dependent page at a 2543 * time. 2544 */ 2545void 2546pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2547{ 2548 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2549 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2550 2551 pagecopy((void *)src, (void *)dst); 2552} 2553 2554int unmapped_buf_allowed = 1; 2555 2556void 2557pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2558 vm_offset_t b_offset, int xfersize) 2559{ 2560 void *a_cp, *b_cp; 2561 vm_page_t m_a, m_b; 2562 vm_paddr_t p_a, p_b; 2563 vm_offset_t a_pg_offset, b_pg_offset; 2564 int cnt; 2565 2566 while (xfersize > 0) { 2567 a_pg_offset = a_offset & PAGE_MASK; 2568 m_a = ma[a_offset >> PAGE_SHIFT]; 2569 p_a = m_a->phys_addr; 2570 b_pg_offset = b_offset & PAGE_MASK; 2571 m_b = mb[b_offset >> PAGE_SHIFT]; 2572 p_b = m_b->phys_addr; 2573 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2574 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2575 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2576 panic("!DMAP a %lx", p_a); 2577 } else { 2578 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2579 } 2580 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2581 panic("!DMAP b %lx", p_b); 2582 } else { 2583 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2584 } 2585 bcopy(a_cp, b_cp, cnt); 2586 a_offset += cnt; 2587 b_offset += cnt; 2588 xfersize -= cnt; 2589 } 2590} 2591 2592vm_offset_t 2593pmap_quick_enter_page(vm_page_t m) 2594{ 2595 2596 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2597} 2598 2599void 2600pmap_quick_remove_page(vm_offset_t addr) 2601{ 2602} 2603 2604/* 2605 * Returns true if the pmap's pv is one of the first 2606 * 16 pvs linked to from this page. This count may 2607 * be changed upwards or downwards in the future; it 2608 * is only necessary that true be returned for a small 2609 * subset of pmaps for proper page aging. 2610 */ 2611boolean_t 2612pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2613{ 2614 struct rwlock *lock; 2615 pv_entry_t pv; 2616 int loops = 0; 2617 boolean_t rv; 2618 2619 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2620 ("pmap_page_exists_quick: page %p is not managed", m)); 2621 rv = FALSE; 2622 rw_rlock(&pvh_global_lock); 2623 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2624 rw_rlock(lock); 2625 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2626 if (PV_PMAP(pv) == pmap) { 2627 rv = TRUE; 2628 break; 2629 } 2630 loops++; 2631 if (loops >= 16) 2632 break; 2633 } 2634 rw_runlock(lock); 2635 rw_runlock(&pvh_global_lock); 2636 return (rv); 2637} 2638 2639/* 2640 * pmap_page_wired_mappings: 2641 * 2642 * Return the number of managed mappings to the given physical page 2643 * that are wired. 2644 */ 2645int 2646pmap_page_wired_mappings(vm_page_t m) 2647{ 2648 struct rwlock *lock; 2649 pmap_t pmap; 2650 pt_entry_t *l3; 2651 pv_entry_t pv; 2652 int count, md_gen; 2653 2654 if ((m->oflags & VPO_UNMANAGED) != 0) 2655 return (0); 2656 rw_rlock(&pvh_global_lock); 2657 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2658 rw_rlock(lock); 2659restart: 2660 count = 0; 2661 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2662 pmap = PV_PMAP(pv); 2663 if (!PMAP_TRYLOCK(pmap)) { 2664 md_gen = m->md.pv_gen; 2665 rw_runlock(lock); 2666 PMAP_LOCK(pmap); 2667 rw_rlock(lock); 2668 if (md_gen != m->md.pv_gen) { 2669 PMAP_UNLOCK(pmap); 2670 goto restart; 2671 } 2672 } 2673 l3 = pmap_l3(pmap, pv->pv_va); 2674 if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0) 2675 count++; 2676 PMAP_UNLOCK(pmap); 2677 } 2678 rw_runlock(lock); 2679 rw_runlock(&pvh_global_lock); 2680 return (count); 2681} 2682 2683/* 2684 * Destroy all managed, non-wired mappings in the given user-space 2685 * pmap. This pmap cannot be active on any processor besides the 2686 * caller. 2687 * 2688 * This function cannot be applied to the kernel pmap. Moreover, it 2689 * is not intended for general use. It is only to be used during 2690 * process termination. Consequently, it can be implemented in ways 2691 * that make it faster than pmap_remove(). First, it can more quickly 2692 * destroy mappings by iterating over the pmap's collection of PV 2693 * entries, rather than searching the page table. Second, it doesn't 2694 * have to test and clear the page table entries atomically, because 2695 * no processor is currently accessing the user address space. In 2696 * particular, a page table entry's dirty bit won't change state once 2697 * this function starts. 2698 */ 2699void 2700pmap_remove_pages(pmap_t pmap) 2701{ 2702 pd_entry_t ptepde, *l2; 2703 pt_entry_t *l3, tl3; 2704 struct spglist free; 2705 vm_page_t m; 2706 pv_entry_t pv; 2707 struct pv_chunk *pc, *npc; 2708 struct rwlock *lock; 2709 int64_t bit; 2710 uint64_t inuse, bitmask; 2711 int allfree, field, freed, idx; 2712 vm_paddr_t pa; 2713 2714 lock = NULL; 2715 2716 SLIST_INIT(&free); 2717 rw_rlock(&pvh_global_lock); 2718 PMAP_LOCK(pmap); 2719 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2720 allfree = 1; 2721 freed = 0; 2722 for (field = 0; field < _NPCM; field++) { 2723 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2724 while (inuse != 0) { 2725 bit = ffsl(inuse) - 1; 2726 bitmask = 1UL << bit; 2727 idx = field * 64 + bit; 2728 pv = &pc->pc_pventry[idx]; 2729 inuse &= ~bitmask; 2730 2731 l2 = pmap_l2(pmap, pv->pv_va); 2732 ptepde = pmap_load(l2); 2733 l3 = pmap_l2_to_l3(l2, pv->pv_va); 2734 tl3 = pmap_load(l3); 2735 2736/* 2737 * We cannot remove wired pages from a process' mapping at this time 2738 */ 2739 if (tl3 & PTE_SW_WIRED) { 2740 allfree = 0; 2741 continue; 2742 } 2743 2744 pa = PTE_TO_PHYS(tl3); 2745 m = PHYS_TO_VM_PAGE(pa); 2746 KASSERT(m->phys_addr == pa, 2747 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2748 m, (uintmax_t)m->phys_addr, 2749 (uintmax_t)tl3)); 2750 2751 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2752 m < &vm_page_array[vm_page_array_size], 2753 ("pmap_remove_pages: bad l3 %#jx", 2754 (uintmax_t)tl3)); 2755 2756 if (pmap_is_current(pmap) && 2757 pmap_l3_valid_cacheable(pmap_load(l3))) 2758 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2759 pmap_load_clear(l3); 2760 PTE_SYNC(l3); 2761 pmap_invalidate_page(pmap, pv->pv_va); 2762 2763 /* 2764 * Update the vm_page_t clean/reference bits. 2765 */ 2766 if (pmap_page_dirty(tl3)) 2767 vm_page_dirty(m); 2768 2769 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2770 2771 /* Mark free */ 2772 pc->pc_map[field] |= bitmask; 2773 2774 pmap_resident_count_dec(pmap, 1); 2775 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2776 m->md.pv_gen++; 2777 2778 pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); 2779 freed++; 2780 } 2781 } 2782 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2783 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2784 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2785 if (allfree) { 2786 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2787 free_pv_chunk(pc); 2788 } 2789 } 2790 pmap_invalidate_all(pmap); 2791 if (lock != NULL) 2792 rw_wunlock(lock); 2793 rw_runlock(&pvh_global_lock); 2794 PMAP_UNLOCK(pmap); 2795 pmap_free_zero_pages(&free); 2796} 2797 2798/* 2799 * This is used to check if a page has been accessed or modified. As we 2800 * don't have a bit to see if it has been modified we have to assume it 2801 * has been if the page is read/write. 2802 */ 2803static boolean_t 2804pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 2805{ 2806 struct rwlock *lock; 2807 pv_entry_t pv; 2808 pt_entry_t *l3, mask, value; 2809 pmap_t pmap; 2810 int md_gen; 2811 boolean_t rv; 2812 2813 rv = FALSE; 2814 rw_rlock(&pvh_global_lock); 2815 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2816 rw_rlock(lock); 2817restart: 2818 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2819 pmap = PV_PMAP(pv); 2820 if (!PMAP_TRYLOCK(pmap)) { 2821 md_gen = m->md.pv_gen; 2822 rw_runlock(lock); 2823 PMAP_LOCK(pmap); 2824 rw_rlock(lock); 2825 if (md_gen != m->md.pv_gen) { 2826 PMAP_UNLOCK(pmap); 2827 goto restart; 2828 } 2829 } 2830 l3 = pmap_l3(pmap, pv->pv_va); 2831 mask = 0; 2832 value = 0; 2833 if (modified) { 2834 mask |= PTE_DIRTY; 2835 value |= PTE_DIRTY; 2836 } 2837 if (accessed) { 2838 mask |= PTE_REF; 2839 value |= PTE_REF; 2840 } 2841 2842#if 0 2843 if (modified) { 2844 mask |= ATTR_AP_RW_BIT; 2845 value |= ATTR_AP(ATTR_AP_RW); 2846 } 2847 if (accessed) { 2848 mask |= ATTR_AF | ATTR_DESCR_MASK; 2849 value |= ATTR_AF | L3_PAGE; 2850 } 2851#endif 2852 2853 rv = (pmap_load(l3) & mask) == value; 2854 PMAP_UNLOCK(pmap); 2855 if (rv) 2856 goto out; 2857 } 2858out: 2859 rw_runlock(lock); 2860 rw_runlock(&pvh_global_lock); 2861 return (rv); 2862} 2863 2864/* 2865 * pmap_is_modified: 2866 * 2867 * Return whether or not the specified physical page was modified 2868 * in any physical maps. 2869 */ 2870boolean_t 2871pmap_is_modified(vm_page_t m) 2872{ 2873 2874 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2875 ("pmap_is_modified: page %p is not managed", m)); 2876 2877 /* 2878 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2879 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2880 * is clear, no PTEs can have PG_M set. 2881 */ 2882 VM_OBJECT_ASSERT_WLOCKED(m->object); 2883 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2884 return (FALSE); 2885 return (pmap_page_test_mappings(m, FALSE, TRUE)); 2886} 2887 2888/* 2889 * pmap_is_prefaultable: 2890 * 2891 * Return whether or not the specified virtual address is eligible 2892 * for prefault. 2893 */ 2894boolean_t 2895pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2896{ 2897 pt_entry_t *l3; 2898 boolean_t rv; 2899 2900 rv = FALSE; 2901 PMAP_LOCK(pmap); 2902 l3 = pmap_l3(pmap, addr); 2903 if (l3 != NULL && pmap_load(l3) != 0) { 2904 rv = TRUE; 2905 } 2906 PMAP_UNLOCK(pmap); 2907 return (rv); 2908} 2909 2910/* 2911 * pmap_is_referenced: 2912 * 2913 * Return whether or not the specified physical page was referenced 2914 * in any physical maps. 2915 */ 2916boolean_t 2917pmap_is_referenced(vm_page_t m) 2918{ 2919 2920 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2921 ("pmap_is_referenced: page %p is not managed", m)); 2922 return (pmap_page_test_mappings(m, TRUE, FALSE)); 2923} 2924 2925/* 2926 * Clear the write and modified bits in each of the given page's mappings. 2927 */ 2928void 2929pmap_remove_write(vm_page_t m) 2930{ 2931 pmap_t pmap; 2932 struct rwlock *lock; 2933 pv_entry_t pv; 2934 pt_entry_t *l3, oldl3; 2935 pt_entry_t newl3; 2936 int md_gen; 2937 2938 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2939 ("pmap_remove_write: page %p is not managed", m)); 2940 2941 /* 2942 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2943 * set by another thread while the object is locked. Thus, 2944 * if PGA_WRITEABLE is clear, no page table entries need updating. 2945 */ 2946 VM_OBJECT_ASSERT_WLOCKED(m->object); 2947 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2948 return; 2949 rw_rlock(&pvh_global_lock); 2950 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2951retry_pv_loop: 2952 rw_wlock(lock); 2953 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2954 pmap = PV_PMAP(pv); 2955 if (!PMAP_TRYLOCK(pmap)) { 2956 md_gen = m->md.pv_gen; 2957 rw_wunlock(lock); 2958 PMAP_LOCK(pmap); 2959 rw_wlock(lock); 2960 if (md_gen != m->md.pv_gen) { 2961 PMAP_UNLOCK(pmap); 2962 rw_wunlock(lock); 2963 goto retry_pv_loop; 2964 } 2965 } 2966 l3 = pmap_l3(pmap, pv->pv_va); 2967retry: 2968 oldl3 = pmap_load(l3); 2969 2970 if (pmap_is_write(oldl3)) { 2971 newl3 = oldl3 & ~(1 << PTE_TYPE_S); 2972 if (!atomic_cmpset_long(l3, oldl3, newl3)) 2973 goto retry; 2974 /* TODO: use pmap_page_dirty(oldl3) ? */ 2975 if ((oldl3 & PTE_REF) != 0) 2976 vm_page_dirty(m); 2977 pmap_invalidate_page(pmap, pv->pv_va); 2978 } 2979 PMAP_UNLOCK(pmap); 2980 } 2981 rw_wunlock(lock); 2982 vm_page_aflag_clear(m, PGA_WRITEABLE); 2983 rw_runlock(&pvh_global_lock); 2984} 2985 2986static __inline boolean_t 2987safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 2988{ 2989 2990 return (FALSE); 2991} 2992 2993/* 2994 * pmap_ts_referenced: 2995 * 2996 * Return a count of reference bits for a page, clearing those bits. 2997 * It is not necessary for every reference bit to be cleared, but it 2998 * is necessary that 0 only be returned when there are truly no 2999 * reference bits set. 3000 * 3001 * As an optimization, update the page's dirty field if a modified bit is 3002 * found while counting reference bits. This opportunistic update can be 3003 * performed at low cost and can eliminate the need for some future calls 3004 * to pmap_is_modified(). However, since this function stops after 3005 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 3006 * dirty pages. Those dirty pages will only be detected by a future call 3007 * to pmap_is_modified(). 3008 */ 3009int 3010pmap_ts_referenced(vm_page_t m) 3011{ 3012 pv_entry_t pv, pvf; 3013 pmap_t pmap; 3014 struct rwlock *lock; 3015 pd_entry_t *l2; 3016 pt_entry_t *l3, old_l3; 3017 vm_paddr_t pa; 3018 int cleared, md_gen, not_cleared; 3019 struct spglist free; 3020 3021 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3022 ("pmap_ts_referenced: page %p is not managed", m)); 3023 SLIST_INIT(&free); 3024 cleared = 0; 3025 pa = VM_PAGE_TO_PHYS(m); 3026 lock = PHYS_TO_PV_LIST_LOCK(pa); 3027 rw_rlock(&pvh_global_lock); 3028 rw_wlock(lock); 3029retry: 3030 not_cleared = 0; 3031 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 3032 goto out; 3033 pv = pvf; 3034 do { 3035 if (pvf == NULL) 3036 pvf = pv; 3037 pmap = PV_PMAP(pv); 3038 if (!PMAP_TRYLOCK(pmap)) { 3039 md_gen = m->md.pv_gen; 3040 rw_wunlock(lock); 3041 PMAP_LOCK(pmap); 3042 rw_wlock(lock); 3043 if (md_gen != m->md.pv_gen) { 3044 PMAP_UNLOCK(pmap); 3045 goto retry; 3046 } 3047 } 3048 l2 = pmap_l2(pmap, pv->pv_va); 3049 3050 KASSERT((pmap_load(l2) & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), 3051 ("pmap_ts_referenced: found an invalid l2 table")); 3052 3053 l3 = pmap_l2_to_l3(l2, pv->pv_va); 3054 old_l3 = pmap_load(l3); 3055 if (pmap_page_dirty(old_l3)) 3056 vm_page_dirty(m); 3057 if ((old_l3 & PTE_REF) != 0) { 3058 if (safe_to_clear_referenced(pmap, old_l3)) { 3059 /* 3060 * TODO: We don't handle the access flag 3061 * at all. We need to be able to set it in 3062 * the exception handler. 3063 */ 3064 panic("RISCVTODO: safe_to_clear_referenced\n"); 3065 } else if ((old_l3 & PTE_SW_WIRED) == 0) { 3066 /* 3067 * Wired pages cannot be paged out so 3068 * doing accessed bit emulation for 3069 * them is wasted effort. We do the 3070 * hard work for unwired pages only. 3071 */ 3072 pmap_remove_l3(pmap, l3, pv->pv_va, 3073 pmap_load(l2), &free, &lock); 3074 pmap_invalidate_page(pmap, pv->pv_va); 3075 cleared++; 3076 if (pvf == pv) 3077 pvf = NULL; 3078 pv = NULL; 3079 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 3080 ("inconsistent pv lock %p %p for page %p", 3081 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 3082 } else 3083 not_cleared++; 3084 } 3085 PMAP_UNLOCK(pmap); 3086 /* Rotate the PV list if it has more than one entry. */ 3087 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 3088 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 3089 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3090 m->md.pv_gen++; 3091 } 3092 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 3093 not_cleared < PMAP_TS_REFERENCED_MAX); 3094out: 3095 rw_wunlock(lock); 3096 rw_runlock(&pvh_global_lock); 3097 pmap_free_zero_pages(&free); 3098 return (cleared + not_cleared); 3099} 3100 3101/* 3102 * Apply the given advice to the specified range of addresses within the 3103 * given pmap. Depending on the advice, clear the referenced and/or 3104 * modified flags in each mapping and set the mapped page's dirty field. 3105 */ 3106void 3107pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 3108{ 3109} 3110 3111/* 3112 * Clear the modify bits on the specified physical page. 3113 */ 3114void 3115pmap_clear_modify(vm_page_t m) 3116{ 3117 3118 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3119 ("pmap_clear_modify: page %p is not managed", m)); 3120 VM_OBJECT_ASSERT_WLOCKED(m->object); 3121 KASSERT(!vm_page_xbusied(m), 3122 ("pmap_clear_modify: page %p is exclusive busied", m)); 3123 3124 /* 3125 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3126 * If the object containing the page is locked and the page is not 3127 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 3128 */ 3129 if ((m->aflags & PGA_WRITEABLE) == 0) 3130 return; 3131 3132 /* RISCVTODO: We lack support for tracking if a page is modified */ 3133} 3134 3135void * 3136pmap_mapbios(vm_paddr_t pa, vm_size_t size) 3137{ 3138 3139 return ((void *)PHYS_TO_DMAP(pa)); 3140} 3141 3142void 3143pmap_unmapbios(vm_paddr_t pa, vm_size_t size) 3144{ 3145} 3146 3147/* 3148 * Sets the memory attribute for the specified page. 3149 */ 3150void 3151pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3152{ 3153 3154 m->md.pv_memattr = ma; 3155 3156 /* 3157 * RISCVTODO: Implement the below (from the amd64 pmap) 3158 * If "m" is a normal page, update its direct mapping. This update 3159 * can be relied upon to perform any cache operations that are 3160 * required for data coherence. 3161 */ 3162 if ((m->flags & PG_FICTITIOUS) == 0 && 3163 PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) 3164 panic("RISCVTODO: pmap_page_set_memattr"); 3165} 3166 3167/* 3168 * perform the pmap work for mincore 3169 */ 3170int 3171pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3172{ 3173 3174 panic("RISCVTODO: pmap_mincore"); 3175} 3176 3177void 3178pmap_activate(struct thread *td) 3179{ 3180 pmap_t pmap; 3181 3182 critical_enter(); 3183 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3184 td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); 3185 3186 __asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr)); 3187 3188 pmap_invalidate_all(pmap); 3189 critical_exit(); 3190} 3191 3192void 3193pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 3194{ 3195 3196 panic("RISCVTODO: pmap_sync_icache"); 3197} 3198 3199/* 3200 * Increase the starting virtual address of the given mapping if a 3201 * different alignment might result in more superpage mappings. 3202 */ 3203void 3204pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3205 vm_offset_t *addr, vm_size_t size) 3206{ 3207} 3208 3209/** 3210 * Get the kernel virtual address of a set of physical pages. If there are 3211 * physical addresses not covered by the DMAP perform a transient mapping 3212 * that will be removed when calling pmap_unmap_io_transient. 3213 * 3214 * \param page The pages the caller wishes to obtain the virtual 3215 * address on the kernel memory map. 3216 * \param vaddr On return contains the kernel virtual memory address 3217 * of the pages passed in the page parameter. 3218 * \param count Number of pages passed in. 3219 * \param can_fault TRUE if the thread using the mapped pages can take 3220 * page faults, FALSE otherwise. 3221 * 3222 * \returns TRUE if the caller must call pmap_unmap_io_transient when 3223 * finished or FALSE otherwise. 3224 * 3225 */ 3226boolean_t 3227pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3228 boolean_t can_fault) 3229{ 3230 vm_paddr_t paddr; 3231 boolean_t needs_mapping; 3232 int error, i; 3233 3234 /* 3235 * Allocate any KVA space that we need, this is done in a separate 3236 * loop to prevent calling vmem_alloc while pinned. 3237 */ 3238 needs_mapping = FALSE; 3239 for (i = 0; i < count; i++) { 3240 paddr = VM_PAGE_TO_PHYS(page[i]); 3241 if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { 3242 error = vmem_alloc(kernel_arena, PAGE_SIZE, 3243 M_BESTFIT | M_WAITOK, &vaddr[i]); 3244 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3245 needs_mapping = TRUE; 3246 } else { 3247 vaddr[i] = PHYS_TO_DMAP(paddr); 3248 } 3249 } 3250 3251 /* Exit early if everything is covered by the DMAP */ 3252 if (!needs_mapping) 3253 return (FALSE); 3254 3255 if (!can_fault) 3256 sched_pin(); 3257 for (i = 0; i < count; i++) { 3258 paddr = VM_PAGE_TO_PHYS(page[i]); 3259 if (paddr >= DMAP_MAX_PHYSADDR) { 3260 panic( 3261 "pmap_map_io_transient: TODO: Map out of DMAP data"); 3262 } 3263 } 3264 3265 return (needs_mapping); 3266} 3267 3268void 3269pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3270 boolean_t can_fault) 3271{ 3272 vm_paddr_t paddr; 3273 int i; 3274 3275 if (!can_fault) 3276 sched_unpin(); 3277 for (i = 0; i < count; i++) { 3278 paddr = VM_PAGE_TO_PHYS(page[i]); 3279 if (paddr >= DMAP_MAX_PHYSADDR) { 3280 panic("RISCVTODO: pmap_unmap_io_transient: Unmap data"); 3281 } 3282 } 3283} 3284