pmap.c revision 338484
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 2003 Peter Wemm
9 * All rights reserved.
10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
11 * All rights reserved.
12 * Copyright (c) 2014 Andrew Turner
13 * All rights reserved.
14 * Copyright (c) 2014 The FreeBSD Foundation
15 * All rights reserved.
16 * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
17 * All rights reserved.
18 *
19 * This code is derived from software contributed to Berkeley by
20 * the Systems Programming Group of the University of Utah Computer
21 * Science Department and William Jolitz of UUNET Technologies Inc.
22 *
23 * Portions of this software were developed by Andrew Turner under
24 * sponsorship from The FreeBSD Foundation.
25 *
26 * Portions of this software were developed by SRI International and the
27 * University of Cambridge Computer Laboratory under DARPA/AFRL contract
28 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
29 *
30 * Portions of this software were developed by the University of Cambridge
31 * Computer Laboratory as part of the CTSRD Project, with support from the
32 * UK Higher Education Innovation Fund (HEIF).
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the University of
45 *	California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
63 */
64/*-
65 * Copyright (c) 2003 Networks Associates Technology, Inc.
66 * All rights reserved.
67 *
68 * This software was developed for the FreeBSD Project by Jake Burkholder,
69 * Safeport Network Services, and Network Associates Laboratories, the
70 * Security Research Division of Network Associates, Inc. under
71 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
72 * CHATS research program.
73 *
74 * Redistribution and use in source and binary forms, with or without
75 * modification, are permitted provided that the following conditions
76 * are met:
77 * 1. Redistributions of source code must retain the above copyright
78 *    notice, this list of conditions and the following disclaimer.
79 * 2. Redistributions in binary form must reproduce the above copyright
80 *    notice, this list of conditions and the following disclaimer in the
81 *    documentation and/or other materials provided with the distribution.
82 *
83 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93 * SUCH DAMAGE.
94 */
95
96#include <sys/cdefs.h>
97__FBSDID("$FreeBSD: stable/11/sys/riscv/riscv/pmap.c 338484 2018-09-05 21:28:33Z kib $");
98
99/*
100 *	Manages physical address maps.
101 *
102 *	Since the information managed by this module is
103 *	also stored by the logical address mapping module,
104 *	this module may throw away valid virtual-to-physical
105 *	mappings at almost any time.  However, invalidations
106 *	of virtual-to-physical mappings must be done as
107 *	requested.
108 *
109 *	In order to cope with hardware architectures which
110 *	make virtual-to-physical map invalidates expensive,
111 *	this module may delay invalidate or reduced protection
112 *	operations until such time as they are actually
113 *	necessary.  This module is given full information as
114 *	to which processors are currently using which maps,
115 *	and to when physical maps must be made correct.
116 */
117
118#include <sys/param.h>
119#include <sys/bus.h>
120#include <sys/systm.h>
121#include <sys/kernel.h>
122#include <sys/ktr.h>
123#include <sys/lock.h>
124#include <sys/malloc.h>
125#include <sys/mman.h>
126#include <sys/msgbuf.h>
127#include <sys/mutex.h>
128#include <sys/proc.h>
129#include <sys/rwlock.h>
130#include <sys/sx.h>
131#include <sys/vmem.h>
132#include <sys/vmmeter.h>
133#include <sys/sched.h>
134#include <sys/sysctl.h>
135#include <sys/smp.h>
136
137#include <vm/vm.h>
138#include <vm/vm_param.h>
139#include <vm/vm_kern.h>
140#include <vm/vm_page.h>
141#include <vm/vm_map.h>
142#include <vm/vm_object.h>
143#include <vm/vm_extern.h>
144#include <vm/vm_pageout.h>
145#include <vm/vm_pager.h>
146#include <vm/vm_radix.h>
147#include <vm/vm_reserv.h>
148#include <vm/uma.h>
149
150#include <machine/machdep.h>
151#include <machine/md_var.h>
152#include <machine/pcb.h>
153
154#define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
155#define	NUPDE			(NPDEPG * NPDEPG)
156#define	NUSERPGTBLS		(NUPDE + NPDEPG)
157
158#if !defined(DIAGNOSTIC)
159#ifdef __GNUC_GNU_INLINE__
160#define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
161#else
162#define PMAP_INLINE	extern inline
163#endif
164#else
165#define PMAP_INLINE
166#endif
167
168#ifdef PV_STATS
169#define PV_STAT(x)	do { x ; } while (0)
170#else
171#define PV_STAT(x)	do { } while (0)
172#endif
173
174#define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
175
176#define	NPV_LIST_LOCKS	MAXCPU
177
178#define	PHYS_TO_PV_LIST_LOCK(pa)	\
179			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
180
181#define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
182	struct rwlock **_lockp = (lockp);		\
183	struct rwlock *_new_lock;			\
184							\
185	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
186	if (_new_lock != *_lockp) {			\
187		if (*_lockp != NULL)			\
188			rw_wunlock(*_lockp);		\
189		*_lockp = _new_lock;			\
190		rw_wlock(*_lockp);			\
191	}						\
192} while (0)
193
194#define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
195			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
196
197#define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
198	struct rwlock **_lockp = (lockp);		\
199							\
200	if (*_lockp != NULL) {				\
201		rw_wunlock(*_lockp);			\
202		*_lockp = NULL;				\
203	}						\
204} while (0)
205
206#define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
207			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
208
209/* The list of all the user pmaps */
210LIST_HEAD(pmaplist, pmap);
211static struct pmaplist allpmaps;
212
213static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
214
215struct pmap kernel_pmap_store;
216
217vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
218vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
219vm_offset_t kernel_vm_end = 0;
220
221struct msgbuf *msgbufp = NULL;
222
223static struct rwlock_padalign pvh_global_lock;
224
225/*
226 * Data for the pv entry allocation mechanism
227 */
228static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
229static struct mtx pv_chunks_mutex;
230static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
231
232static void	free_pv_chunk(struct pv_chunk *pc);
233static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
234static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
235static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
236static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
237static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
238		    vm_offset_t va);
239static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
240    vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
241static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
242    pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
243static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
244    vm_page_t m, struct rwlock **lockp);
245
246static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
247		struct rwlock **lockp);
248
249static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
250    struct spglist *free);
251static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
252
253/*
254 * These load the old table data and store the new value.
255 * They need to be atomic as the System MMU may write to the table at
256 * the same time as the CPU.
257 */
258#define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
259#define	pmap_set(table, mask) atomic_set_64(table, mask)
260#define	pmap_load_clear(table) atomic_swap_64(table, 0)
261#define	pmap_load(table) (*table)
262
263/********************/
264/* Inline functions */
265/********************/
266
267static __inline void
268pagecopy(void *s, void *d)
269{
270
271	memcpy(d, s, PAGE_SIZE);
272}
273
274static __inline void
275pagezero(void *p)
276{
277
278	bzero(p, PAGE_SIZE);
279}
280
281#define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
282#define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
283#define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
284
285#define	PTE_TO_PHYS(pte)	((pte >> PTE_PPN0_S) * PAGE_SIZE)
286
287static __inline pd_entry_t *
288pmap_l1(pmap_t pmap, vm_offset_t va)
289{
290
291	return (&pmap->pm_l1[pmap_l1_index(va)]);
292}
293
294static __inline pd_entry_t *
295pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
296{
297	vm_paddr_t phys;
298	pd_entry_t *l2;
299
300	phys = PTE_TO_PHYS(pmap_load(l1));
301	l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
302
303	return (&l2[pmap_l2_index(va)]);
304}
305
306static __inline pd_entry_t *
307pmap_l2(pmap_t pmap, vm_offset_t va)
308{
309	pd_entry_t *l1;
310
311	l1 = pmap_l1(pmap, va);
312	if (l1 == NULL)
313		return (NULL);
314	if ((pmap_load(l1) & PTE_VALID) == 0)
315		return (NULL);
316	if ((pmap_load(l1) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
317		return (NULL);
318
319	return (pmap_l1_to_l2(l1, va));
320}
321
322static __inline pt_entry_t *
323pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
324{
325	vm_paddr_t phys;
326	pt_entry_t *l3;
327
328	phys = PTE_TO_PHYS(pmap_load(l2));
329	l3 = (pd_entry_t *)PHYS_TO_DMAP(phys);
330
331	return (&l3[pmap_l3_index(va)]);
332}
333
334static __inline pt_entry_t *
335pmap_l3(pmap_t pmap, vm_offset_t va)
336{
337	pd_entry_t *l2;
338
339	l2 = pmap_l2(pmap, va);
340	if (l2 == NULL)
341		return (NULL);
342	if ((pmap_load(l2) & PTE_VALID) == 0)
343		return (NULL);
344	if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
345		return (NULL);
346
347	return (pmap_l2_to_l3(l2, va));
348}
349
350
351static __inline int
352pmap_is_write(pt_entry_t entry)
353{
354
355	if (entry & (1 << PTE_TYPE_S))
356		return (1);
357
358	return (0);
359}
360
361static __inline int
362pmap_is_current(pmap_t pmap)
363{
364
365	return ((pmap == pmap_kernel()) ||
366	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
367}
368
369static __inline int
370pmap_l3_valid(pt_entry_t l3)
371{
372
373	return (l3 & PTE_VALID);
374}
375
376static __inline int
377pmap_l3_valid_cacheable(pt_entry_t l3)
378{
379
380	/* TODO */
381
382	return (0);
383}
384
385#define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
386
387/* Checks if the page is dirty. */
388static inline int
389pmap_page_dirty(pt_entry_t pte)
390{
391
392	return (pte & PTE_DIRTY);
393}
394
395static __inline void
396pmap_resident_count_inc(pmap_t pmap, int count)
397{
398
399	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
400	pmap->pm_stats.resident_count += count;
401}
402
403static __inline void
404pmap_resident_count_dec(pmap_t pmap, int count)
405{
406
407	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
408	KASSERT(pmap->pm_stats.resident_count >= count,
409	    ("pmap %p resident count underflow %ld %d", pmap,
410	    pmap->pm_stats.resident_count, count));
411	pmap->pm_stats.resident_count -= count;
412}
413
414static void
415pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index,
416    pt_entry_t entry)
417{
418	struct pmap *user_pmap;
419	pd_entry_t *l1;
420
421	/* Distribute new kernel L1 entry to all the user pmaps */
422	if (pmap != kernel_pmap)
423		return;
424
425	LIST_FOREACH(user_pmap, &allpmaps, pm_list) {
426		l1 = &user_pmap->pm_l1[l1index];
427		if (entry)
428			pmap_load_store(l1, entry);
429		else
430			pmap_load_clear(l1);
431	}
432}
433
434static pt_entry_t *
435pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
436    u_int *l2_slot)
437{
438	pt_entry_t *l2;
439	pd_entry_t *l1;
440
441	l1 = (pd_entry_t *)l1pt;
442	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
443
444	/* Check locore has used a table L1 map */
445	KASSERT((l1[*l1_slot] & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S),
446		("Invalid bootstrap L1 table"));
447
448	/* Find the address of the L2 table */
449	l2 = (pt_entry_t *)init_pt_va;
450	*l2_slot = pmap_l2_index(va);
451
452	return (l2);
453}
454
455static vm_paddr_t
456pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
457{
458	u_int l1_slot, l2_slot;
459	pt_entry_t *l2;
460	u_int ret;
461
462	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
463
464	/* L2 is superpages */
465	ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT;
466	ret += (va & L2_OFFSET);
467
468	return (ret);
469}
470
471static void
472pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart)
473{
474	vm_offset_t va;
475	vm_paddr_t pa;
476	pd_entry_t *l1;
477	u_int l1_slot;
478	pt_entry_t entry;
479	pn_t pn;
480
481	pa = kernstart & ~L1_OFFSET;
482	va = DMAP_MIN_ADDRESS;
483	l1 = (pd_entry_t *)l1pt;
484	l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
485
486	for (; va < DMAP_MAX_ADDRESS;
487	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
488		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
489
490		/* superpages */
491		pn = (pa / PAGE_SIZE);
492		entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
493		entry |= (pn << PTE_PPN0_S);
494		pmap_load_store(&l1[l1_slot], entry);
495	}
496
497	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
498	cpu_tlb_flushID();
499}
500
501static vm_offset_t
502pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
503{
504	vm_offset_t l2pt, l3pt;
505	pt_entry_t entry;
506	pd_entry_t *l2;
507	vm_paddr_t pa;
508	u_int l2_slot;
509	pn_t pn;
510
511	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
512
513	l2 = pmap_l2(kernel_pmap, va);
514	l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
515	l2pt = (vm_offset_t)l2;
516	l2_slot = pmap_l2_index(va);
517	l3pt = l3_start;
518
519	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
520		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
521
522		pa = pmap_early_vtophys(l1pt, l3pt);
523		pn = (pa / PAGE_SIZE);
524		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
525		entry |= (pn << PTE_PPN0_S);
526		pmap_load_store(&l2[l2_slot], entry);
527		l3pt += PAGE_SIZE;
528	}
529
530	/* Clean the L2 page table */
531	memset((void *)l3_start, 0, l3pt - l3_start);
532	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
533
534	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
535
536	return l3pt;
537}
538
539/*
540 *	Bootstrap the system enough to run with virtual memory.
541 */
542void
543pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
544{
545	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
546	uint64_t kern_delta;
547	pt_entry_t *l2;
548	vm_offset_t va, freemempos;
549	vm_offset_t dpcpu, msgbufpv;
550	vm_paddr_t pa, min_pa;
551	int i;
552
553	kern_delta = KERNBASE - kernstart;
554	physmem = 0;
555
556	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
557	printf("%lx\n", l1pt);
558	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
559
560	/* Set this early so we can use the pagetable walking functions */
561	kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
562	PMAP_LOCK_INIT(kernel_pmap);
563
564 	/*
565	 * Initialize the global pv list lock.
566	 */
567	rw_init(&pvh_global_lock, "pmap pv global");
568
569	LIST_INIT(&allpmaps);
570
571	/* Assume the address we were loaded to is a valid physical address */
572	min_pa = KERNBASE - kern_delta;
573
574	/*
575	 * Find the minimum physical address. physmap is sorted,
576	 * but may contain empty ranges.
577	 */
578	for (i = 0; i < (physmap_idx * 2); i += 2) {
579		if (physmap[i] == physmap[i + 1])
580			continue;
581		if (physmap[i] <= min_pa)
582			min_pa = physmap[i];
583		break;
584	}
585
586	/* Create a direct map region early so we can use it for pa -> va */
587	pmap_bootstrap_dmap(l1pt, min_pa);
588
589	va = KERNBASE;
590	pa = KERNBASE - kern_delta;
591
592	/*
593	 * Start to initialize phys_avail by copying from physmap
594	 * up to the physical address KERNBASE points at.
595	 */
596	map_slot = avail_slot = 0;
597	for (; map_slot < (physmap_idx * 2); map_slot += 2) {
598		if (physmap[map_slot] == physmap[map_slot + 1])
599			continue;
600
601		phys_avail[avail_slot] = physmap[map_slot];
602		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
603		physmem += (phys_avail[avail_slot + 1] -
604		    phys_avail[avail_slot]) >> PAGE_SHIFT;
605		avail_slot += 2;
606	}
607
608	/* Add the memory before the kernel */
609	if (physmap[avail_slot] < pa) {
610		phys_avail[avail_slot] = physmap[map_slot];
611		phys_avail[avail_slot + 1] = pa;
612		physmem += (phys_avail[avail_slot + 1] -
613		    phys_avail[avail_slot]) >> PAGE_SHIFT;
614		avail_slot += 2;
615	}
616	used_map_slot = map_slot;
617
618	/*
619	 * Read the page table to find out what is already mapped.
620	 * This assumes we have mapped a block of memory from KERNBASE
621	 * using a single L1 entry.
622	 */
623	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
624
625	/* Sanity check the index, KERNBASE should be the first VA */
626	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
627
628	/* Find how many pages we have mapped */
629	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
630		if ((l2[l2_slot] & PTE_VALID) == 0)
631			break;
632
633		/* Check locore used L2 superpages */
634		KASSERT((l2[l2_slot] & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S),
635		    ("Invalid bootstrap L2 table"));
636
637		va += L2_SIZE;
638		pa += L2_SIZE;
639	}
640
641	va = roundup2(va, L2_SIZE);
642
643	freemempos = KERNBASE + kernlen;
644	freemempos = roundup2(freemempos, PAGE_SIZE);
645
646	/* Create the l3 tables for the early devmap */
647	freemempos = pmap_bootstrap_l3(l1pt,
648	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
649
650	cpu_tlb_flushID();
651
652#define alloc_pages(var, np)						\
653	(var) = freemempos;						\
654	freemempos += (np * PAGE_SIZE);					\
655	memset((char *)(var), 0, ((np) * PAGE_SIZE));
656
657	/* Allocate dynamic per-cpu area. */
658	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
659	dpcpu_init((void *)dpcpu, 0);
660
661	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
662	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
663	msgbufp = (void *)msgbufpv;
664
665	virtual_avail = roundup2(freemempos, L2_SIZE);
666	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
667	kernel_vm_end = virtual_avail;
668
669	pa = pmap_early_vtophys(l1pt, freemempos);
670
671	/* Finish initialising physmap */
672	map_slot = used_map_slot;
673	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
674	    map_slot < (physmap_idx * 2); map_slot += 2) {
675		if (physmap[map_slot] == physmap[map_slot + 1])
676			continue;
677
678		/* Have we used the current range? */
679		if (physmap[map_slot + 1] <= pa)
680			continue;
681
682		/* Do we need to split the entry? */
683		if (physmap[map_slot] < pa) {
684			phys_avail[avail_slot] = pa;
685			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
686		} else {
687			phys_avail[avail_slot] = physmap[map_slot];
688			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
689		}
690		physmem += (phys_avail[avail_slot + 1] -
691		    phys_avail[avail_slot]) >> PAGE_SHIFT;
692
693		avail_slot += 2;
694	}
695	phys_avail[avail_slot] = 0;
696	phys_avail[avail_slot + 1] = 0;
697
698	/*
699	 * Maxmem isn't the "maximum memory", it's one larger than the
700	 * highest page of the physical address space.  It should be
701	 * called something like "Maxphyspage".
702	 */
703	Maxmem = atop(phys_avail[avail_slot - 1]);
704
705	cpu_tlb_flushID();
706}
707
708/*
709 *	Initialize a vm_page's machine-dependent fields.
710 */
711void
712pmap_page_init(vm_page_t m)
713{
714
715	TAILQ_INIT(&m->md.pv_list);
716	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
717}
718
719/*
720 *	Initialize the pmap module.
721 *	Called by vm_init, to initialize any structures that the pmap
722 *	system needs to map virtual memory.
723 */
724void
725pmap_init(void)
726{
727	int i;
728
729	/*
730	 * Initialize the pv chunk list mutex.
731	 */
732	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
733
734	/*
735	 * Initialize the pool of pv list locks.
736	 */
737	for (i = 0; i < NPV_LIST_LOCKS; i++)
738		rw_init(&pv_list_locks[i], "pmap pv list");
739}
740
741/*
742 * Normal, non-SMP, invalidation functions.
743 * We inline these within pmap.c for speed.
744 */
745PMAP_INLINE void
746pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
747{
748
749	/* TODO */
750
751	sched_pin();
752	__asm __volatile("sfence.vm");
753	sched_unpin();
754}
755
756PMAP_INLINE void
757pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
758{
759
760	/* TODO */
761
762	sched_pin();
763	__asm __volatile("sfence.vm");
764	sched_unpin();
765}
766
767PMAP_INLINE void
768pmap_invalidate_all(pmap_t pmap)
769{
770
771	/* TODO */
772
773	sched_pin();
774	__asm __volatile("sfence.vm");
775	sched_unpin();
776}
777
778/*
779 *	Routine:	pmap_extract
780 *	Function:
781 *		Extract the physical page address associated
782 *		with the given map/virtual_address pair.
783 */
784vm_paddr_t
785pmap_extract(pmap_t pmap, vm_offset_t va)
786{
787	pd_entry_t *l2p, l2;
788	pt_entry_t *l3p, l3;
789	vm_paddr_t pa;
790
791	pa = 0;
792	PMAP_LOCK(pmap);
793	/*
794	 * Start with the l2 tabel. We are unable to allocate
795	 * pages in the l1 table.
796	 */
797	l2p = pmap_l2(pmap, va);
798	if (l2p != NULL) {
799		l2 = pmap_load(l2p);
800		if ((l2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S)) {
801			l3p = pmap_l2_to_l3(l2p, va);
802			if (l3p != NULL) {
803				l3 = pmap_load(l3p);
804				pa = PTE_TO_PHYS(l3);
805				pa |= (va & L3_OFFSET);
806			}
807		} else {
808			/* L2 is superpages */
809			pa = (l2 >> PTE_PPN1_S) << L2_SHIFT;
810			pa |= (va & L2_OFFSET);
811		}
812	}
813	PMAP_UNLOCK(pmap);
814	return (pa);
815}
816
817/*
818 *	Routine:	pmap_extract_and_hold
819 *	Function:
820 *		Atomically extract and hold the physical page
821 *		with the given pmap and virtual address pair
822 *		if that mapping permits the given protection.
823 */
824vm_page_t
825pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
826{
827	pt_entry_t *l3p, l3;
828	vm_paddr_t phys;
829	vm_paddr_t pa;
830	vm_page_t m;
831
832	pa = 0;
833	m = NULL;
834	PMAP_LOCK(pmap);
835retry:
836	l3p = pmap_l3(pmap, va);
837	if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
838		if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) {
839			phys = PTE_TO_PHYS(l3);
840			if (vm_page_pa_tryrelock(pmap, phys, &pa))
841				goto retry;
842			m = PHYS_TO_VM_PAGE(phys);
843			vm_page_hold(m);
844		}
845	}
846	PA_UNLOCK_COND(pa);
847	PMAP_UNLOCK(pmap);
848	return (m);
849}
850
851vm_paddr_t
852pmap_kextract(vm_offset_t va)
853{
854	pd_entry_t *l2;
855	pt_entry_t *l3;
856	vm_paddr_t pa;
857
858	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
859		pa = DMAP_TO_PHYS(va);
860	} else {
861		l2 = pmap_l2(kernel_pmap, va);
862		if (l2 == NULL)
863			panic("pmap_kextract: No l2");
864		if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) {
865			/* superpages */
866			pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT;
867			pa |= (va & L2_OFFSET);
868			return (pa);
869		}
870
871		l3 = pmap_l2_to_l3(l2, va);
872		if (l3 == NULL)
873			panic("pmap_kextract: No l3...");
874		pa = PTE_TO_PHYS(pmap_load(l3));
875		pa |= (va & PAGE_MASK);
876	}
877	return (pa);
878}
879
880/***************************************************
881 * Low level mapping routines.....
882 ***************************************************/
883
884void
885pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
886{
887	pt_entry_t entry;
888	pt_entry_t *l3;
889	vm_offset_t va;
890	pn_t pn;
891
892	KASSERT((pa & L3_OFFSET) == 0,
893	   ("pmap_kenter_device: Invalid physical address"));
894	KASSERT((sva & L3_OFFSET) == 0,
895	   ("pmap_kenter_device: Invalid virtual address"));
896	KASSERT((size & PAGE_MASK) == 0,
897	    ("pmap_kenter_device: Mapping is not page-sized"));
898
899	va = sva;
900	while (size != 0) {
901		l3 = pmap_l3(kernel_pmap, va);
902		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
903
904		pn = (pa / PAGE_SIZE);
905		entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
906		entry |= (pn << PTE_PPN0_S);
907		pmap_load_store(l3, entry);
908
909		PTE_SYNC(l3);
910
911		va += PAGE_SIZE;
912		pa += PAGE_SIZE;
913		size -= PAGE_SIZE;
914	}
915	pmap_invalidate_range(kernel_pmap, sva, va);
916}
917
918/*
919 * Remove a page from the kernel pagetables.
920 * Note: not SMP coherent.
921 */
922PMAP_INLINE void
923pmap_kremove(vm_offset_t va)
924{
925	pt_entry_t *l3;
926
927	l3 = pmap_l3(kernel_pmap, va);
928	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
929
930	if (pmap_l3_valid_cacheable(pmap_load(l3)))
931		cpu_dcache_wb_range(va, L3_SIZE);
932	pmap_load_clear(l3);
933	PTE_SYNC(l3);
934	pmap_invalidate_page(kernel_pmap, va);
935}
936
937void
938pmap_kremove_device(vm_offset_t sva, vm_size_t size)
939{
940	pt_entry_t *l3;
941	vm_offset_t va;
942
943	KASSERT((sva & L3_OFFSET) == 0,
944	   ("pmap_kremove_device: Invalid virtual address"));
945	KASSERT((size & PAGE_MASK) == 0,
946	    ("pmap_kremove_device: Mapping is not page-sized"));
947
948	va = sva;
949	while (size != 0) {
950		l3 = pmap_l3(kernel_pmap, va);
951		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
952		pmap_load_clear(l3);
953		PTE_SYNC(l3);
954
955		va += PAGE_SIZE;
956		size -= PAGE_SIZE;
957	}
958	pmap_invalidate_range(kernel_pmap, sva, va);
959}
960
961/*
962 *	Used to map a range of physical addresses into kernel
963 *	virtual address space.
964 *
965 *	The value passed in '*virt' is a suggested virtual address for
966 *	the mapping. Architectures which can support a direct-mapped
967 *	physical to virtual region can return the appropriate address
968 *	within that region, leaving '*virt' unchanged. Other
969 *	architectures should map the pages starting at '*virt' and
970 *	update '*virt' with the first usable address after the mapped
971 *	region.
972 */
973vm_offset_t
974pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
975{
976
977	return PHYS_TO_DMAP(start);
978}
979
980
981/*
982 * Add a list of wired pages to the kva
983 * this routine is only used for temporary
984 * kernel mappings that do not need to have
985 * page modification or references recorded.
986 * Note that old mappings are simply written
987 * over.  The page *must* be wired.
988 * Note: SMP coherent.  Uses a ranged shootdown IPI.
989 */
990void
991pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
992{
993	pt_entry_t *l3, pa;
994	vm_offset_t va;
995	vm_page_t m;
996	pt_entry_t entry;
997	pn_t pn;
998	int i;
999
1000	va = sva;
1001	for (i = 0; i < count; i++) {
1002		m = ma[i];
1003		pa = VM_PAGE_TO_PHYS(m);
1004		pn = (pa / PAGE_SIZE);
1005		l3 = pmap_l3(kernel_pmap, va);
1006
1007		entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
1008		entry |= (pn << PTE_PPN0_S);
1009		pmap_load_store(l3, entry);
1010
1011		PTE_SYNC(l3);
1012		va += L3_SIZE;
1013	}
1014	pmap_invalidate_range(kernel_pmap, sva, va);
1015}
1016
1017/*
1018 * This routine tears out page mappings from the
1019 * kernel -- it is meant only for temporary mappings.
1020 * Note: SMP coherent.  Uses a ranged shootdown IPI.
1021 */
1022void
1023pmap_qremove(vm_offset_t sva, int count)
1024{
1025	pt_entry_t *l3;
1026	vm_offset_t va;
1027
1028	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
1029
1030	va = sva;
1031	while (count-- > 0) {
1032		l3 = pmap_l3(kernel_pmap, va);
1033		KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
1034
1035		if (pmap_l3_valid_cacheable(pmap_load(l3)))
1036			cpu_dcache_wb_range(va, L3_SIZE);
1037		pmap_load_clear(l3);
1038		PTE_SYNC(l3);
1039
1040		va += PAGE_SIZE;
1041	}
1042	pmap_invalidate_range(kernel_pmap, sva, va);
1043}
1044
1045/***************************************************
1046 * Page table page management routines.....
1047 ***************************************************/
1048static __inline void
1049pmap_free_zero_pages(struct spglist *free)
1050{
1051	vm_page_t m;
1052
1053	while ((m = SLIST_FIRST(free)) != NULL) {
1054		SLIST_REMOVE_HEAD(free, plinks.s.ss);
1055		/* Preserve the page's PG_ZERO setting. */
1056		vm_page_free_toq(m);
1057	}
1058}
1059
1060/*
1061 * Schedule the specified unused page table page to be freed.  Specifically,
1062 * add the page to the specified list of pages that will be released to the
1063 * physical memory manager after the TLB has been updated.
1064 */
1065static __inline void
1066pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
1067    boolean_t set_PG_ZERO)
1068{
1069
1070	if (set_PG_ZERO)
1071		m->flags |= PG_ZERO;
1072	else
1073		m->flags &= ~PG_ZERO;
1074	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
1075}
1076
1077/*
1078 * Decrements a page table page's wire count, which is used to record the
1079 * number of valid page table entries within the page.  If the wire count
1080 * drops to zero, then the page table page is unmapped.  Returns TRUE if the
1081 * page table page was unmapped and FALSE otherwise.
1082 */
1083static inline boolean_t
1084pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1085{
1086
1087	--m->wire_count;
1088	if (m->wire_count == 0) {
1089		_pmap_unwire_l3(pmap, va, m, free);
1090		return (TRUE);
1091	} else {
1092		return (FALSE);
1093	}
1094}
1095
1096static void
1097_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1098{
1099	vm_paddr_t phys;
1100
1101	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1102	/*
1103	 * unmap the page table page
1104	 */
1105	if (m->pindex >= NUPDE) {
1106		/* PD page */
1107		pd_entry_t *l1;
1108		l1 = pmap_l1(pmap, va);
1109		pmap_load_clear(l1);
1110		pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
1111		PTE_SYNC(l1);
1112	} else {
1113		/* PTE page */
1114		pd_entry_t *l2;
1115		l2 = pmap_l2(pmap, va);
1116		pmap_load_clear(l2);
1117		PTE_SYNC(l2);
1118	}
1119	pmap_resident_count_dec(pmap, 1);
1120	if (m->pindex < NUPDE) {
1121		pd_entry_t *l1;
1122		/* We just released a PT, unhold the matching PD */
1123		vm_page_t pdpg;
1124
1125		l1 = pmap_l1(pmap, va);
1126		phys = PTE_TO_PHYS(pmap_load(l1));
1127		pdpg = PHYS_TO_VM_PAGE(phys);
1128		pmap_unwire_l3(pmap, va, pdpg, free);
1129	}
1130	pmap_invalidate_page(pmap, va);
1131
1132	/*
1133	 * This is a release store so that the ordinary store unmapping
1134	 * the page table page is globally performed before TLB shoot-
1135	 * down is begun.
1136	 */
1137	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
1138
1139	/*
1140	 * Put page on a list so that it is released after
1141	 * *ALL* TLB shootdown is done
1142	 */
1143	pmap_add_delayed_free_list(m, free, TRUE);
1144}
1145
1146/*
1147 * After removing an l3 entry, this routine is used to
1148 * conditionally free the page, and manage the hold/wire counts.
1149 */
1150static int
1151pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
1152    struct spglist *free)
1153{
1154	vm_paddr_t phys;
1155	vm_page_t mpte;
1156
1157	if (va >= VM_MAXUSER_ADDRESS)
1158		return (0);
1159	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
1160
1161	phys = PTE_TO_PHYS(ptepde);
1162
1163	mpte = PHYS_TO_VM_PAGE(phys);
1164	return (pmap_unwire_l3(pmap, va, mpte, free));
1165}
1166
1167void
1168pmap_pinit0(pmap_t pmap)
1169{
1170
1171	PMAP_LOCK_INIT(pmap);
1172	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1173	pmap->pm_l1 = kernel_pmap->pm_l1;
1174}
1175
1176int
1177pmap_pinit(pmap_t pmap)
1178{
1179	vm_paddr_t l1phys;
1180	vm_page_t l1pt;
1181
1182	/*
1183	 * allocate the l1 page
1184	 */
1185	while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
1186	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
1187		VM_WAIT;
1188
1189	l1phys = VM_PAGE_TO_PHYS(l1pt);
1190	pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
1191
1192	if ((l1pt->flags & PG_ZERO) == 0)
1193		pagezero(pmap->pm_l1);
1194
1195	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1196
1197	/* Install kernel pagetables */
1198	memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE);
1199
1200	/* Add to the list of all user pmaps */
1201	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
1202
1203	return (1);
1204}
1205
1206/*
1207 * This routine is called if the desired page table page does not exist.
1208 *
1209 * If page table page allocation fails, this routine may sleep before
1210 * returning NULL.  It sleeps only if a lock pointer was given.
1211 *
1212 * Note: If a page allocation fails at page table level two or three,
1213 * one or two pages may be held during the wait, only to be released
1214 * afterwards.  This conservative approach is easily argued to avoid
1215 * race conditions.
1216 */
1217static vm_page_t
1218_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
1219{
1220	vm_page_t m, /*pdppg, */pdpg;
1221	pt_entry_t entry;
1222	vm_paddr_t phys;
1223	pn_t pn;
1224
1225	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1226
1227	/*
1228	 * Allocate a page table page.
1229	 */
1230	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
1231	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
1232		if (lockp != NULL) {
1233			RELEASE_PV_LIST_LOCK(lockp);
1234			PMAP_UNLOCK(pmap);
1235			rw_runlock(&pvh_global_lock);
1236			VM_WAIT;
1237			rw_rlock(&pvh_global_lock);
1238			PMAP_LOCK(pmap);
1239		}
1240
1241		/*
1242		 * Indicate the need to retry.  While waiting, the page table
1243		 * page may have been allocated.
1244		 */
1245		return (NULL);
1246	}
1247
1248	if ((m->flags & PG_ZERO) == 0)
1249		pmap_zero_page(m);
1250
1251	/*
1252	 * Map the pagetable page into the process address space, if
1253	 * it isn't already there.
1254	 */
1255
1256	if (ptepindex >= NUPDE) {
1257		pd_entry_t *l1;
1258		vm_pindex_t l1index;
1259
1260		l1index = ptepindex - NUPDE;
1261		l1 = &pmap->pm_l1[l1index];
1262
1263		pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
1264		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
1265		entry |= (pn << PTE_PPN0_S);
1266		pmap_load_store(l1, entry);
1267		pmap_distribute_l1(pmap, l1index, entry);
1268
1269		PTE_SYNC(l1);
1270
1271	} else {
1272		vm_pindex_t l1index;
1273		pd_entry_t *l1, *l2;
1274
1275		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
1276		l1 = &pmap->pm_l1[l1index];
1277		if (pmap_load(l1) == 0) {
1278			/* recurse for allocating page dir */
1279			if (_pmap_alloc_l3(pmap, NUPDE + l1index,
1280			    lockp) == NULL) {
1281				--m->wire_count;
1282				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1283				vm_page_free_zero(m);
1284				return (NULL);
1285			}
1286		} else {
1287			phys = PTE_TO_PHYS(pmap_load(l1));
1288			pdpg = PHYS_TO_VM_PAGE(phys);
1289			pdpg->wire_count++;
1290		}
1291
1292		phys = PTE_TO_PHYS(pmap_load(l1));
1293		l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
1294		l2 = &l2[ptepindex & Ln_ADDR_MASK];
1295
1296		pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
1297		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
1298		entry |= (pn << PTE_PPN0_S);
1299		pmap_load_store(l2, entry);
1300
1301		PTE_SYNC(l2);
1302	}
1303
1304	pmap_resident_count_inc(pmap, 1);
1305
1306	return (m);
1307}
1308
1309static vm_page_t
1310pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
1311{
1312	vm_pindex_t ptepindex;
1313	pd_entry_t *l2;
1314	vm_paddr_t phys;
1315	vm_page_t m;
1316
1317	/*
1318	 * Calculate pagetable page index
1319	 */
1320	ptepindex = pmap_l2_pindex(va);
1321retry:
1322	/*
1323	 * Get the page directory entry
1324	 */
1325	l2 = pmap_l2(pmap, va);
1326
1327	/*
1328	 * If the page table page is mapped, we just increment the
1329	 * hold count, and activate it.
1330	 */
1331	if (l2 != NULL && pmap_load(l2) != 0) {
1332		phys = PTE_TO_PHYS(pmap_load(l2));
1333		m = PHYS_TO_VM_PAGE(phys);
1334		m->wire_count++;
1335	} else {
1336		/*
1337		 * Here if the pte page isn't mapped, or if it has been
1338		 * deallocated.
1339		 */
1340		m = _pmap_alloc_l3(pmap, ptepindex, lockp);
1341		if (m == NULL && lockp != NULL)
1342			goto retry;
1343	}
1344	return (m);
1345}
1346
1347
1348/***************************************************
1349 * Pmap allocation/deallocation routines.
1350 ***************************************************/
1351
1352/*
1353 * Release any resources held by the given physical map.
1354 * Called when a pmap initialized by pmap_pinit is being released.
1355 * Should only be called if the map contains no valid mappings.
1356 */
1357void
1358pmap_release(pmap_t pmap)
1359{
1360	vm_page_t m;
1361
1362	KASSERT(pmap->pm_stats.resident_count == 0,
1363	    ("pmap_release: pmap resident count %ld != 0",
1364	    pmap->pm_stats.resident_count));
1365
1366	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
1367	m->wire_count--;
1368	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1369	vm_page_free_zero(m);
1370
1371	/* Remove pmap from the allpmaps list */
1372	LIST_REMOVE(pmap, pm_list);
1373
1374	/* Remove kernel pagetables */
1375	bzero(pmap->pm_l1, PAGE_SIZE);
1376}
1377
1378#if 0
1379static int
1380kvm_size(SYSCTL_HANDLER_ARGS)
1381{
1382	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
1383
1384	return sysctl_handle_long(oidp, &ksize, 0, req);
1385}
1386SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
1387    0, 0, kvm_size, "LU", "Size of KVM");
1388
1389static int
1390kvm_free(SYSCTL_HANDLER_ARGS)
1391{
1392	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
1393
1394	return sysctl_handle_long(oidp, &kfree, 0, req);
1395}
1396SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
1397    0, 0, kvm_free, "LU", "Amount of KVM free");
1398#endif /* 0 */
1399
1400/*
1401 * grow the number of kernel page table entries, if needed
1402 */
1403void
1404pmap_growkernel(vm_offset_t addr)
1405{
1406	vm_paddr_t paddr;
1407	vm_page_t nkpg;
1408	pd_entry_t *l1, *l2;
1409	pt_entry_t entry;
1410	pn_t pn;
1411
1412	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1413
1414	addr = roundup2(addr, L2_SIZE);
1415	if (addr - 1 >= vm_map_max(kernel_map))
1416		addr = vm_map_max(kernel_map);
1417	while (kernel_vm_end < addr) {
1418		l1 = pmap_l1(kernel_pmap, kernel_vm_end);
1419		if (pmap_load(l1) == 0) {
1420			/* We need a new PDP entry */
1421			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
1422			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
1423			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1424			if (nkpg == NULL)
1425				panic("pmap_growkernel: no memory to grow kernel");
1426			if ((nkpg->flags & PG_ZERO) == 0)
1427				pmap_zero_page(nkpg);
1428			paddr = VM_PAGE_TO_PHYS(nkpg);
1429
1430			pn = (paddr / PAGE_SIZE);
1431			entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
1432			entry |= (pn << PTE_PPN0_S);
1433			pmap_load_store(l1, entry);
1434			pmap_distribute_l1(kernel_pmap,
1435			    pmap_l1_index(kernel_vm_end), entry);
1436
1437			PTE_SYNC(l1);
1438			continue; /* try again */
1439		}
1440		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
1441		if ((pmap_load(l2) & PTE_REF) != 0) {
1442			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1443			if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1444				kernel_vm_end = vm_map_max(kernel_map);
1445				break;
1446			}
1447			continue;
1448		}
1449
1450		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
1451		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1452		    VM_ALLOC_ZERO);
1453		if (nkpg == NULL)
1454			panic("pmap_growkernel: no memory to grow kernel");
1455		if ((nkpg->flags & PG_ZERO) == 0)
1456			pmap_zero_page(nkpg);
1457		paddr = VM_PAGE_TO_PHYS(nkpg);
1458
1459		pn = (paddr / PAGE_SIZE);
1460		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
1461		entry |= (pn << PTE_PPN0_S);
1462		pmap_load_store(l2, entry);
1463
1464		PTE_SYNC(l2);
1465		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
1466
1467		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1468		if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1469			kernel_vm_end = vm_map_max(kernel_map);
1470			break;
1471		}
1472	}
1473}
1474
1475
1476/***************************************************
1477 * page management routines.
1478 ***************************************************/
1479
1480CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1481CTASSERT(_NPCM == 3);
1482CTASSERT(_NPCPV == 168);
1483
1484static __inline struct pv_chunk *
1485pv_to_chunk(pv_entry_t pv)
1486{
1487
1488	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1489}
1490
1491#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1492
1493#define	PC_FREE0	0xfffffffffffffffful
1494#define	PC_FREE1	0xfffffffffffffffful
1495#define	PC_FREE2	0x000000fffffffffful
1496
1497static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
1498
1499#if 0
1500#ifdef PV_STATS
1501static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1502
1503SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1504	"Current number of pv entry chunks");
1505SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1506	"Current number of pv entry chunks allocated");
1507SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1508	"Current number of pv entry chunks frees");
1509SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1510	"Number of times tried to get a chunk page but failed.");
1511
1512static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
1513static int pv_entry_spare;
1514
1515SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1516	"Current number of pv entry frees");
1517SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1518	"Current number of pv entry allocs");
1519SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1520	"Current number of pv entries");
1521SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1522	"Current number of spare pv entries");
1523#endif
1524#endif /* 0 */
1525
1526/*
1527 * We are in a serious low memory condition.  Resort to
1528 * drastic measures to free some pages so we can allocate
1529 * another pv entry chunk.
1530 *
1531 * Returns NULL if PV entries were reclaimed from the specified pmap.
1532 *
1533 * We do not, however, unmap 2mpages because subsequent accesses will
1534 * allocate per-page pv entries until repromotion occurs, thereby
1535 * exacerbating the shortage of free pv entries.
1536 */
1537static vm_page_t
1538reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
1539{
1540
1541	panic("RISCVTODO: reclaim_pv_chunk");
1542}
1543
1544/*
1545 * free the pv_entry back to the free list
1546 */
1547static void
1548free_pv_entry(pmap_t pmap, pv_entry_t pv)
1549{
1550	struct pv_chunk *pc;
1551	int idx, field, bit;
1552
1553	rw_assert(&pvh_global_lock, RA_LOCKED);
1554	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1555	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
1556	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
1557	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
1558	pc = pv_to_chunk(pv);
1559	idx = pv - &pc->pc_pventry[0];
1560	field = idx / 64;
1561	bit = idx % 64;
1562	pc->pc_map[field] |= 1ul << bit;
1563	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
1564	    pc->pc_map[2] != PC_FREE2) {
1565		/* 98% of the time, pc is already at the head of the list. */
1566		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
1567			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1568			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1569		}
1570		return;
1571	}
1572	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1573	free_pv_chunk(pc);
1574}
1575
1576static void
1577free_pv_chunk(struct pv_chunk *pc)
1578{
1579	vm_page_t m;
1580
1581	mtx_lock(&pv_chunks_mutex);
1582 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1583	mtx_unlock(&pv_chunks_mutex);
1584	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
1585	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
1586	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
1587	/* entire chunk is free, return it */
1588	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
1589#if 0 /* TODO: For minidump */
1590	dump_drop_page(m->phys_addr);
1591#endif
1592	vm_page_unwire(m, PQ_NONE);
1593	vm_page_free(m);
1594}
1595
1596/*
1597 * Returns a new PV entry, allocating a new PV chunk from the system when
1598 * needed.  If this PV chunk allocation fails and a PV list lock pointer was
1599 * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
1600 * returned.
1601 *
1602 * The given PV list lock may be released.
1603 */
1604static pv_entry_t
1605get_pv_entry(pmap_t pmap, struct rwlock **lockp)
1606{
1607	int bit, field;
1608	pv_entry_t pv;
1609	struct pv_chunk *pc;
1610	vm_page_t m;
1611
1612	rw_assert(&pvh_global_lock, RA_LOCKED);
1613	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1614	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
1615retry:
1616	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1617	if (pc != NULL) {
1618		for (field = 0; field < _NPCM; field++) {
1619			if (pc->pc_map[field]) {
1620				bit = ffsl(pc->pc_map[field]) - 1;
1621				break;
1622			}
1623		}
1624		if (field < _NPCM) {
1625			pv = &pc->pc_pventry[field * 64 + bit];
1626			pc->pc_map[field] &= ~(1ul << bit);
1627			/* If this was the last item, move it to tail */
1628			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
1629			    pc->pc_map[2] == 0) {
1630				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1631				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
1632				    pc_list);
1633			}
1634			PV_STAT(atomic_add_long(&pv_entry_count, 1));
1635			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
1636			return (pv);
1637		}
1638	}
1639	/* No free items, allocate another chunk */
1640	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
1641	    VM_ALLOC_WIRED);
1642	if (m == NULL) {
1643		if (lockp == NULL) {
1644			PV_STAT(pc_chunk_tryfail++);
1645			return (NULL);
1646		}
1647		m = reclaim_pv_chunk(pmap, lockp);
1648		if (m == NULL)
1649			goto retry;
1650	}
1651	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
1652	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
1653#if 0 /* TODO: This is for minidump */
1654	dump_add_page(m->phys_addr);
1655#endif
1656	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
1657	pc->pc_pmap = pmap;
1658	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
1659	pc->pc_map[1] = PC_FREE1;
1660	pc->pc_map[2] = PC_FREE2;
1661	mtx_lock(&pv_chunks_mutex);
1662	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1663	mtx_unlock(&pv_chunks_mutex);
1664	pv = &pc->pc_pventry[0];
1665	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1666	PV_STAT(atomic_add_long(&pv_entry_count, 1));
1667	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
1668	return (pv);
1669}
1670
1671/*
1672 * First find and then remove the pv entry for the specified pmap and virtual
1673 * address from the specified pv list.  Returns the pv entry if found and NULL
1674 * otherwise.  This operation can be performed on pv lists for either 4KB or
1675 * 2MB page mappings.
1676 */
1677static __inline pv_entry_t
1678pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1679{
1680	pv_entry_t pv;
1681
1682	rw_assert(&pvh_global_lock, RA_LOCKED);
1683	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
1684		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1685			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
1686			pvh->pv_gen++;
1687			break;
1688		}
1689	}
1690	return (pv);
1691}
1692
1693/*
1694 * First find and then destroy the pv entry for the specified pmap and virtual
1695 * address.  This operation can be performed on pv lists for either 4KB or 2MB
1696 * page mappings.
1697 */
1698static void
1699pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1700{
1701	pv_entry_t pv;
1702
1703	pv = pmap_pvh_remove(pvh, pmap, va);
1704
1705	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
1706	free_pv_entry(pmap, pv);
1707}
1708
1709/*
1710 * Conditionally create the PV entry for a 4KB page mapping if the required
1711 * memory can be allocated without resorting to reclamation.
1712 */
1713static boolean_t
1714pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
1715    struct rwlock **lockp)
1716{
1717	pv_entry_t pv;
1718
1719	rw_assert(&pvh_global_lock, RA_LOCKED);
1720	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1721	/* Pass NULL instead of the lock pointer to disable reclamation. */
1722	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
1723		pv->pv_va = va;
1724		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1725		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
1726		m->md.pv_gen++;
1727		return (TRUE);
1728	} else
1729		return (FALSE);
1730}
1731
1732/*
1733 * pmap_remove_l3: do the things to unmap a page in a process
1734 */
1735static int
1736pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
1737    pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
1738{
1739	pt_entry_t old_l3;
1740	vm_paddr_t phys;
1741	vm_page_t m;
1742
1743	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1744	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
1745		cpu_dcache_wb_range(va, L3_SIZE);
1746	old_l3 = pmap_load_clear(l3);
1747	PTE_SYNC(l3);
1748	pmap_invalidate_page(pmap, va);
1749	if (old_l3 & PTE_SW_WIRED)
1750		pmap->pm_stats.wired_count -= 1;
1751	pmap_resident_count_dec(pmap, 1);
1752	if (old_l3 & PTE_SW_MANAGED) {
1753		phys = PTE_TO_PHYS(old_l3);
1754		m = PHYS_TO_VM_PAGE(phys);
1755		if (pmap_page_dirty(old_l3))
1756			vm_page_dirty(m);
1757		if (old_l3 & PTE_REF)
1758			vm_page_aflag_set(m, PGA_REFERENCED);
1759		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1760		pmap_pvh_free(&m->md, pmap, va);
1761	}
1762
1763	return (pmap_unuse_l3(pmap, va, l2e, free));
1764}
1765
1766/*
1767 *	Remove the given range of addresses from the specified map.
1768 *
1769 *	It is assumed that the start and end are properly
1770 *	rounded to the page size.
1771 */
1772void
1773pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1774{
1775	struct rwlock *lock;
1776	vm_offset_t va, va_next;
1777	pd_entry_t *l1, *l2;
1778	pt_entry_t l3_pte, *l3;
1779	struct spglist free;
1780	int anyvalid;
1781
1782	/*
1783	 * Perform an unsynchronized read.  This is, however, safe.
1784	 */
1785	if (pmap->pm_stats.resident_count == 0)
1786		return;
1787
1788	anyvalid = 0;
1789	SLIST_INIT(&free);
1790
1791	rw_rlock(&pvh_global_lock);
1792	PMAP_LOCK(pmap);
1793
1794	lock = NULL;
1795	for (; sva < eva; sva = va_next) {
1796		if (pmap->pm_stats.resident_count == 0)
1797			break;
1798
1799		l1 = pmap_l1(pmap, sva);
1800		if (pmap_load(l1) == 0) {
1801			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1802			if (va_next < sva)
1803				va_next = eva;
1804			continue;
1805		}
1806
1807		/*
1808		 * Calculate index for next page table.
1809		 */
1810		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1811		if (va_next < sva)
1812			va_next = eva;
1813
1814		l2 = pmap_l1_to_l2(l1, sva);
1815		if (l2 == NULL)
1816			continue;
1817
1818		l3_pte = pmap_load(l2);
1819
1820		/*
1821		 * Weed out invalid mappings.
1822		 */
1823		if (l3_pte == 0)
1824			continue;
1825		if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
1826			continue;
1827
1828		/*
1829		 * Limit our scan to either the end of the va represented
1830		 * by the current page table page, or to the end of the
1831		 * range being removed.
1832		 */
1833		if (va_next > eva)
1834			va_next = eva;
1835
1836		va = va_next;
1837		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
1838		    sva += L3_SIZE) {
1839			if (l3 == NULL)
1840				panic("l3 == NULL");
1841			if (pmap_load(l3) == 0) {
1842				if (va != va_next) {
1843					pmap_invalidate_range(pmap, va, sva);
1844					va = va_next;
1845				}
1846				continue;
1847			}
1848			if (va == va_next)
1849				va = sva;
1850			if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free,
1851			    &lock)) {
1852				sva += L3_SIZE;
1853				break;
1854			}
1855		}
1856		if (va != va_next)
1857			pmap_invalidate_range(pmap, va, sva);
1858	}
1859	if (lock != NULL)
1860		rw_wunlock(lock);
1861	if (anyvalid)
1862		pmap_invalidate_all(pmap);
1863	rw_runlock(&pvh_global_lock);
1864	PMAP_UNLOCK(pmap);
1865	pmap_free_zero_pages(&free);
1866}
1867
1868/*
1869 *	Routine:	pmap_remove_all
1870 *	Function:
1871 *		Removes this physical page from
1872 *		all physical maps in which it resides.
1873 *		Reflects back modify bits to the pager.
1874 *
1875 *	Notes:
1876 *		Original versions of this routine were very
1877 *		inefficient because they iteratively called
1878 *		pmap_remove (slow...)
1879 */
1880
1881void
1882pmap_remove_all(vm_page_t m)
1883{
1884	pv_entry_t pv;
1885	pmap_t pmap;
1886	pt_entry_t *l3, tl3;
1887	pd_entry_t *l2, tl2;
1888	struct spglist free;
1889
1890	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1891	    ("pmap_remove_all: page %p is not managed", m));
1892	SLIST_INIT(&free);
1893	rw_wlock(&pvh_global_lock);
1894	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1895		pmap = PV_PMAP(pv);
1896		PMAP_LOCK(pmap);
1897		pmap_resident_count_dec(pmap, 1);
1898		l2 = pmap_l2(pmap, pv->pv_va);
1899		KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
1900		tl2 = pmap_load(l2);
1901
1902		KASSERT((tl2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S),
1903		    ("pmap_remove_all: found a table when expecting "
1904		    "a block in %p's pv list", m));
1905
1906		l3 = pmap_l2_to_l3(l2, pv->pv_va);
1907		if (pmap_is_current(pmap) &&
1908		    pmap_l3_valid_cacheable(pmap_load(l3)))
1909			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
1910		tl3 = pmap_load_clear(l3);
1911		PTE_SYNC(l3);
1912		pmap_invalidate_page(pmap, pv->pv_va);
1913		if (tl3 & PTE_SW_WIRED)
1914			pmap->pm_stats.wired_count--;
1915		if ((tl3 & PTE_REF) != 0)
1916			vm_page_aflag_set(m, PGA_REFERENCED);
1917
1918		/*
1919		 * Update the vm_page_t clean and reference bits.
1920		 */
1921		if (pmap_page_dirty(tl3))
1922			vm_page_dirty(m);
1923		pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free);
1924		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
1925		m->md.pv_gen++;
1926		free_pv_entry(pmap, pv);
1927		PMAP_UNLOCK(pmap);
1928	}
1929	vm_page_aflag_clear(m, PGA_WRITEABLE);
1930	rw_wunlock(&pvh_global_lock);
1931	pmap_free_zero_pages(&free);
1932}
1933
1934/*
1935 *	Set the physical protection on the
1936 *	specified range of this map as requested.
1937 */
1938void
1939pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1940{
1941	vm_offset_t va, va_next;
1942	pd_entry_t *l1, *l2;
1943	pt_entry_t *l3p, l3;
1944	pt_entry_t entry;
1945
1946	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1947		pmap_remove(pmap, sva, eva);
1948		return;
1949	}
1950
1951	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
1952		return;
1953
1954	PMAP_LOCK(pmap);
1955	for (; sva < eva; sva = va_next) {
1956
1957		l1 = pmap_l1(pmap, sva);
1958		if (pmap_load(l1) == 0) {
1959			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1960			if (va_next < sva)
1961				va_next = eva;
1962			continue;
1963		}
1964
1965		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1966		if (va_next < sva)
1967			va_next = eva;
1968
1969		l2 = pmap_l1_to_l2(l1, sva);
1970		if (l2 == NULL)
1971			continue;
1972		if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
1973			continue;
1974
1975		if (va_next > eva)
1976			va_next = eva;
1977
1978		va = va_next;
1979		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
1980		    sva += L3_SIZE) {
1981			l3 = pmap_load(l3p);
1982			if (pmap_l3_valid(l3)) {
1983				entry = pmap_load(l3p);
1984				entry &= ~(1 << PTE_TYPE_S);
1985				pmap_load_store(l3p, entry);
1986				PTE_SYNC(l3p);
1987				/* XXX: Use pmap_invalidate_range */
1988				pmap_invalidate_page(pmap, va);
1989			}
1990		}
1991	}
1992	PMAP_UNLOCK(pmap);
1993
1994	/* TODO: Only invalidate entries we are touching */
1995	pmap_invalidate_all(pmap);
1996}
1997
1998/*
1999 *	Insert the given physical page (p) at
2000 *	the specified virtual address (v) in the
2001 *	target physical map with the protection requested.
2002 *
2003 *	If specified, the page will be wired down, meaning
2004 *	that the related pte can not be reclaimed.
2005 *
2006 *	NB:  This is the only routine which MAY NOT lazy-evaluate
2007 *	or lose information.  That is, this routine must actually
2008 *	insert this page into the given map NOW.
2009 */
2010int
2011pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
2012    u_int flags, int8_t psind __unused)
2013{
2014	struct rwlock *lock;
2015	pd_entry_t *l1, *l2;
2016	pt_entry_t new_l3, orig_l3;
2017	pt_entry_t *l3;
2018	pv_entry_t pv;
2019	vm_paddr_t opa, pa, l2_pa, l3_pa;
2020	vm_page_t mpte, om, l2_m, l3_m;
2021	boolean_t nosleep;
2022	pt_entry_t entry;
2023	pn_t l2_pn;
2024	pn_t l3_pn;
2025	pn_t pn;
2026
2027	va = trunc_page(va);
2028	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
2029		VM_OBJECT_ASSERT_LOCKED(m->object);
2030	pa = VM_PAGE_TO_PHYS(m);
2031	pn = (pa / PAGE_SIZE);
2032
2033	new_l3 = PTE_VALID;
2034
2035	if ((prot & VM_PROT_WRITE) == 0) { /* Read-only */
2036		if ((va >> 63) == 0) /* USER */
2037			new_l3 |= (PTE_TYPE_SURX << PTE_TYPE_S);
2038		else /* KERNEL */
2039			new_l3 |= (PTE_TYPE_SRX << PTE_TYPE_S);
2040	} else {
2041		if ((va >> 63) == 0) /* USER */
2042			new_l3 |= (PTE_TYPE_SURWX << PTE_TYPE_S);
2043		else /* KERNEL */
2044			new_l3 |= (PTE_TYPE_SRWX << PTE_TYPE_S);
2045	}
2046
2047	new_l3 |= (pn << PTE_PPN0_S);
2048	if ((flags & PMAP_ENTER_WIRED) != 0)
2049		new_l3 |= PTE_SW_WIRED;
2050
2051	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
2052
2053	mpte = NULL;
2054
2055	lock = NULL;
2056	rw_rlock(&pvh_global_lock);
2057	PMAP_LOCK(pmap);
2058
2059	if (va < VM_MAXUSER_ADDRESS) {
2060		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
2061		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
2062		if (mpte == NULL && nosleep) {
2063			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
2064			if (lock != NULL)
2065				rw_wunlock(lock);
2066			rw_runlock(&pvh_global_lock);
2067			PMAP_UNLOCK(pmap);
2068			return (KERN_RESOURCE_SHORTAGE);
2069		}
2070		l3 = pmap_l3(pmap, va);
2071	} else {
2072		l3 = pmap_l3(pmap, va);
2073		/* TODO: This is not optimal, but should mostly work */
2074		if (l3 == NULL) {
2075			l2 = pmap_l2(pmap, va);
2076			if (l2 == NULL) {
2077				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2078				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2079				    VM_ALLOC_ZERO);
2080				if (l2_m == NULL)
2081					panic("pmap_enter: l2 pte_m == NULL");
2082				if ((l2_m->flags & PG_ZERO) == 0)
2083					pmap_zero_page(l2_m);
2084
2085				l2_pa = VM_PAGE_TO_PHYS(l2_m);
2086				l2_pn = (l2_pa / PAGE_SIZE);
2087
2088				l1 = pmap_l1(pmap, va);
2089				entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
2090				entry |= (l2_pn << PTE_PPN0_S);
2091				pmap_load_store(l1, entry);
2092				pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
2093				PTE_SYNC(l1);
2094
2095				l2 = pmap_l1_to_l2(l1, va);
2096			}
2097
2098			KASSERT(l2 != NULL,
2099			    ("No l2 table after allocating one"));
2100
2101			l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2102			    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
2103			if (l3_m == NULL)
2104				panic("pmap_enter: l3 pte_m == NULL");
2105			if ((l3_m->flags & PG_ZERO) == 0)
2106				pmap_zero_page(l3_m);
2107
2108			l3_pa = VM_PAGE_TO_PHYS(l3_m);
2109			l3_pn = (l3_pa / PAGE_SIZE);
2110			entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
2111			entry |= (l3_pn << PTE_PPN0_S);
2112			pmap_load_store(l2, entry);
2113			PTE_SYNC(l2);
2114			l3 = pmap_l2_to_l3(l2, va);
2115		}
2116		pmap_invalidate_page(pmap, va);
2117	}
2118
2119	om = NULL;
2120	orig_l3 = pmap_load(l3);
2121	opa = PTE_TO_PHYS(orig_l3);
2122
2123	/*
2124	 * Is the specified virtual address already mapped?
2125	 */
2126	if (pmap_l3_valid(orig_l3)) {
2127		/*
2128		 * Wiring change, just update stats. We don't worry about
2129		 * wiring PT pages as they remain resident as long as there
2130		 * are valid mappings in them. Hence, if a user page is wired,
2131		 * the PT page will be also.
2132		 */
2133		if ((flags & PMAP_ENTER_WIRED) != 0 &&
2134		    (orig_l3 & PTE_SW_WIRED) == 0)
2135			pmap->pm_stats.wired_count++;
2136		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
2137		    (orig_l3 & PTE_SW_WIRED) != 0)
2138			pmap->pm_stats.wired_count--;
2139
2140		/*
2141		 * Remove the extra PT page reference.
2142		 */
2143		if (mpte != NULL) {
2144			mpte->wire_count--;
2145			KASSERT(mpte->wire_count > 0,
2146			    ("pmap_enter: missing reference to page table page,"
2147			     " va: 0x%lx", va));
2148		}
2149
2150		/*
2151		 * Has the physical page changed?
2152		 */
2153		if (opa == pa) {
2154			/*
2155			 * No, might be a protection or wiring change.
2156			 */
2157			if ((orig_l3 & PTE_SW_MANAGED) != 0) {
2158				new_l3 |= PTE_SW_MANAGED;
2159				if (pmap_is_write(new_l3))
2160					vm_page_aflag_set(m, PGA_WRITEABLE);
2161			}
2162			goto validate;
2163		}
2164
2165		/* Flush the cache, there might be uncommitted data in it */
2166		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
2167			cpu_dcache_wb_range(va, L3_SIZE);
2168	} else {
2169		/*
2170		 * Increment the counters.
2171		 */
2172		if ((new_l3 & PTE_SW_WIRED) != 0)
2173			pmap->pm_stats.wired_count++;
2174		pmap_resident_count_inc(pmap, 1);
2175	}
2176	/*
2177	 * Enter on the PV list if part of our managed memory.
2178	 */
2179	if ((m->oflags & VPO_UNMANAGED) == 0) {
2180		new_l3 |= PTE_SW_MANAGED;
2181		pv = get_pv_entry(pmap, &lock);
2182		pv->pv_va = va;
2183		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
2184		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2185		m->md.pv_gen++;
2186		if (pmap_is_write(new_l3))
2187			vm_page_aflag_set(m, PGA_WRITEABLE);
2188	}
2189
2190	/*
2191	 * Update the L3 entry.
2192	 */
2193	if (orig_l3 != 0) {
2194validate:
2195		orig_l3 = pmap_load_store(l3, new_l3);
2196		PTE_SYNC(l3);
2197		opa = PTE_TO_PHYS(orig_l3);
2198
2199		if (opa != pa) {
2200			if ((orig_l3 & PTE_SW_MANAGED) != 0) {
2201				om = PHYS_TO_VM_PAGE(opa);
2202				if (pmap_page_dirty(orig_l3))
2203					vm_page_dirty(om);
2204				if ((orig_l3 & PTE_REF) != 0)
2205					vm_page_aflag_set(om, PGA_REFERENCED);
2206				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
2207				pmap_pvh_free(&om->md, pmap, va);
2208			}
2209		} else if (pmap_page_dirty(orig_l3)) {
2210			if ((orig_l3 & PTE_SW_MANAGED) != 0)
2211				vm_page_dirty(m);
2212		}
2213	} else {
2214		pmap_load_store(l3, new_l3);
2215		PTE_SYNC(l3);
2216	}
2217	pmap_invalidate_page(pmap, va);
2218	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
2219	    cpu_icache_sync_range(va, PAGE_SIZE);
2220
2221	if (lock != NULL)
2222		rw_wunlock(lock);
2223	rw_runlock(&pvh_global_lock);
2224	PMAP_UNLOCK(pmap);
2225	return (KERN_SUCCESS);
2226}
2227
2228/*
2229 * Maps a sequence of resident pages belonging to the same object.
2230 * The sequence begins with the given page m_start.  This page is
2231 * mapped at the given virtual address start.  Each subsequent page is
2232 * mapped at a virtual address that is offset from start by the same
2233 * amount as the page is offset from m_start within the object.  The
2234 * last page in the sequence is the page with the largest offset from
2235 * m_start that can be mapped at a virtual address less than the given
2236 * virtual address end.  Not every virtual page between start and end
2237 * is mapped; only those for which a resident page exists with the
2238 * corresponding offset from m_start are mapped.
2239 */
2240void
2241pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2242    vm_page_t m_start, vm_prot_t prot)
2243{
2244	struct rwlock *lock;
2245	vm_offset_t va;
2246	vm_page_t m, mpte;
2247	vm_pindex_t diff, psize;
2248
2249	VM_OBJECT_ASSERT_LOCKED(m_start->object);
2250
2251	psize = atop(end - start);
2252	mpte = NULL;
2253	m = m_start;
2254	lock = NULL;
2255	rw_rlock(&pvh_global_lock);
2256	PMAP_LOCK(pmap);
2257	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2258		va = start + ptoa(diff);
2259		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
2260		m = TAILQ_NEXT(m, listq);
2261	}
2262	if (lock != NULL)
2263		rw_wunlock(lock);
2264	rw_runlock(&pvh_global_lock);
2265	PMAP_UNLOCK(pmap);
2266}
2267
2268/*
2269 * this code makes some *MAJOR* assumptions:
2270 * 1. Current pmap & pmap exists.
2271 * 2. Not wired.
2272 * 3. Read access.
2273 * 4. No page table pages.
2274 * but is *MUCH* faster than pmap_enter...
2275 */
2276
2277void
2278pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2279{
2280	struct rwlock *lock;
2281
2282	lock = NULL;
2283	rw_rlock(&pvh_global_lock);
2284	PMAP_LOCK(pmap);
2285	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
2286	if (lock != NULL)
2287		rw_wunlock(lock);
2288	rw_runlock(&pvh_global_lock);
2289	PMAP_UNLOCK(pmap);
2290}
2291
2292static vm_page_t
2293pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2294    vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
2295{
2296	struct spglist free;
2297	vm_paddr_t phys;
2298	pd_entry_t *l2;
2299	pt_entry_t *l3;
2300	vm_paddr_t pa;
2301	pt_entry_t entry;
2302	pn_t pn;
2303
2304	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2305	    (m->oflags & VPO_UNMANAGED) != 0,
2306	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2307	rw_assert(&pvh_global_lock, RA_LOCKED);
2308	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2309
2310	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
2311	/*
2312	 * In the case that a page table page is not
2313	 * resident, we are creating it here.
2314	 */
2315	if (va < VM_MAXUSER_ADDRESS) {
2316		vm_pindex_t l2pindex;
2317
2318		/*
2319		 * Calculate pagetable page index
2320		 */
2321		l2pindex = pmap_l2_pindex(va);
2322		if (mpte && (mpte->pindex == l2pindex)) {
2323			mpte->wire_count++;
2324		} else {
2325			/*
2326			 * Get the l2 entry
2327			 */
2328			l2 = pmap_l2(pmap, va);
2329
2330			/*
2331			 * If the page table page is mapped, we just increment
2332			 * the hold count, and activate it.  Otherwise, we
2333			 * attempt to allocate a page table page.  If this
2334			 * attempt fails, we don't retry.  Instead, we give up.
2335			 */
2336			if (l2 != NULL && pmap_load(l2) != 0) {
2337				phys = PTE_TO_PHYS(pmap_load(l2));
2338				mpte = PHYS_TO_VM_PAGE(phys);
2339				mpte->wire_count++;
2340			} else {
2341				/*
2342				 * Pass NULL instead of the PV list lock
2343				 * pointer, because we don't intend to sleep.
2344				 */
2345				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
2346				if (mpte == NULL)
2347					return (mpte);
2348			}
2349		}
2350		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
2351		l3 = &l3[pmap_l3_index(va)];
2352	} else {
2353		mpte = NULL;
2354		l3 = pmap_l3(kernel_pmap, va);
2355	}
2356	if (l3 == NULL)
2357		panic("pmap_enter_quick_locked: No l3");
2358	if (pmap_load(l3) != 0) {
2359		if (mpte != NULL) {
2360			mpte->wire_count--;
2361			mpte = NULL;
2362		}
2363		return (mpte);
2364	}
2365
2366	/*
2367	 * Enter on the PV list if part of our managed memory.
2368	 */
2369	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2370	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
2371		if (mpte != NULL) {
2372			SLIST_INIT(&free);
2373			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
2374				pmap_invalidate_page(pmap, va);
2375				pmap_free_zero_pages(&free);
2376			}
2377			mpte = NULL;
2378		}
2379		return (mpte);
2380	}
2381
2382	/*
2383	 * Increment counters
2384	 */
2385	pmap_resident_count_inc(pmap, 1);
2386
2387	pa = VM_PAGE_TO_PHYS(m);
2388	pn = (pa / PAGE_SIZE);
2389
2390	/* RISCVTODO: check permissions */
2391	entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
2392	entry |= (pn << PTE_PPN0_S);
2393
2394	/*
2395	 * Now validate mapping with RO protection
2396	 */
2397	if ((m->oflags & VPO_UNMANAGED) == 0)
2398		entry |= PTE_SW_MANAGED;
2399	pmap_load_store(l3, entry);
2400
2401	PTE_SYNC(l3);
2402	pmap_invalidate_page(pmap, va);
2403	return (mpte);
2404}
2405
2406/*
2407 * This code maps large physical mmap regions into the
2408 * processor address space.  Note that some shortcuts
2409 * are taken, but the code works.
2410 */
2411void
2412pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
2413    vm_pindex_t pindex, vm_size_t size)
2414{
2415
2416	VM_OBJECT_ASSERT_WLOCKED(object);
2417	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2418	    ("pmap_object_init_pt: non-device object"));
2419}
2420
2421/*
2422 *	Clear the wired attribute from the mappings for the specified range of
2423 *	addresses in the given pmap.  Every valid mapping within that range
2424 *	must have the wired attribute set.  In contrast, invalid mappings
2425 *	cannot have the wired attribute set, so they are ignored.
2426 *
2427 *	The wired attribute of the page table entry is not a hardware feature,
2428 *	so there is no need to invalidate any TLB entries.
2429 */
2430void
2431pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2432{
2433	vm_offset_t va_next;
2434	pd_entry_t *l1, *l2;
2435	pt_entry_t *l3;
2436	boolean_t pv_lists_locked;
2437
2438	pv_lists_locked = FALSE;
2439	PMAP_LOCK(pmap);
2440	for (; sva < eva; sva = va_next) {
2441		l1 = pmap_l1(pmap, sva);
2442		if (pmap_load(l1) == 0) {
2443			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2444			if (va_next < sva)
2445				va_next = eva;
2446			continue;
2447		}
2448
2449		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2450		if (va_next < sva)
2451			va_next = eva;
2452
2453		l2 = pmap_l1_to_l2(l1, sva);
2454		if (pmap_load(l2) == 0)
2455			continue;
2456
2457		if (va_next > eva)
2458			va_next = eva;
2459		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2460		    sva += L3_SIZE) {
2461			if (pmap_load(l3) == 0)
2462				continue;
2463			if ((pmap_load(l3) & PTE_SW_WIRED) == 0)
2464				panic("pmap_unwire: l3 %#jx is missing "
2465				    "PTE_SW_WIRED", (uintmax_t)*l3);
2466
2467			/*
2468			 * PG_W must be cleared atomically.  Although the pmap
2469			 * lock synchronizes access to PG_W, another processor
2470			 * could be setting PG_M and/or PG_A concurrently.
2471			 */
2472			atomic_clear_long(l3, PTE_SW_WIRED);
2473			pmap->pm_stats.wired_count--;
2474		}
2475	}
2476	if (pv_lists_locked)
2477		rw_runlock(&pvh_global_lock);
2478	PMAP_UNLOCK(pmap);
2479}
2480
2481/*
2482 *	Copy the range specified by src_addr/len
2483 *	from the source map to the range dst_addr/len
2484 *	in the destination map.
2485 *
2486 *	This routine is only advisory and need not do anything.
2487 */
2488
2489void
2490pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2491    vm_offset_t src_addr)
2492{
2493
2494}
2495
2496/*
2497 *	pmap_zero_page zeros the specified hardware page by mapping
2498 *	the page into KVM and using bzero to clear its contents.
2499 */
2500void
2501pmap_zero_page(vm_page_t m)
2502{
2503	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2504
2505	pagezero((void *)va);
2506}
2507
2508/*
2509 *	pmap_zero_page_area zeros the specified hardware page by mapping
2510 *	the page into KVM and using bzero to clear its contents.
2511 *
2512 *	off and size may not cover an area beyond a single hardware page.
2513 */
2514void
2515pmap_zero_page_area(vm_page_t m, int off, int size)
2516{
2517	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2518
2519	if (off == 0 && size == PAGE_SIZE)
2520		pagezero((void *)va);
2521	else
2522		bzero((char *)va + off, size);
2523}
2524
2525/*
2526 *	pmap_zero_page_idle zeros the specified hardware page by mapping
2527 *	the page into KVM and using bzero to clear its contents.  This
2528 *	is intended to be called from the vm_pagezero process only and
2529 *	outside of Giant.
2530 */
2531void
2532pmap_zero_page_idle(vm_page_t m)
2533{
2534	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2535
2536	pagezero((void *)va);
2537}
2538
2539/*
2540 *	pmap_copy_page copies the specified (machine independent)
2541 *	page by mapping the page into virtual memory and using
2542 *	bcopy to copy the page, one machine dependent page at a
2543 *	time.
2544 */
2545void
2546pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2547{
2548	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
2549	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
2550
2551	pagecopy((void *)src, (void *)dst);
2552}
2553
2554int unmapped_buf_allowed = 1;
2555
2556void
2557pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2558    vm_offset_t b_offset, int xfersize)
2559{
2560	void *a_cp, *b_cp;
2561	vm_page_t m_a, m_b;
2562	vm_paddr_t p_a, p_b;
2563	vm_offset_t a_pg_offset, b_pg_offset;
2564	int cnt;
2565
2566	while (xfersize > 0) {
2567		a_pg_offset = a_offset & PAGE_MASK;
2568		m_a = ma[a_offset >> PAGE_SHIFT];
2569		p_a = m_a->phys_addr;
2570		b_pg_offset = b_offset & PAGE_MASK;
2571		m_b = mb[b_offset >> PAGE_SHIFT];
2572		p_b = m_b->phys_addr;
2573		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2574		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2575		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
2576			panic("!DMAP a %lx", p_a);
2577		} else {
2578			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
2579		}
2580		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
2581			panic("!DMAP b %lx", p_b);
2582		} else {
2583			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
2584		}
2585		bcopy(a_cp, b_cp, cnt);
2586		a_offset += cnt;
2587		b_offset += cnt;
2588		xfersize -= cnt;
2589	}
2590}
2591
2592vm_offset_t
2593pmap_quick_enter_page(vm_page_t m)
2594{
2595
2596	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
2597}
2598
2599void
2600pmap_quick_remove_page(vm_offset_t addr)
2601{
2602}
2603
2604/*
2605 * Returns true if the pmap's pv is one of the first
2606 * 16 pvs linked to from this page.  This count may
2607 * be changed upwards or downwards in the future; it
2608 * is only necessary that true be returned for a small
2609 * subset of pmaps for proper page aging.
2610 */
2611boolean_t
2612pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2613{
2614	struct rwlock *lock;
2615	pv_entry_t pv;
2616	int loops = 0;
2617	boolean_t rv;
2618
2619	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2620	    ("pmap_page_exists_quick: page %p is not managed", m));
2621	rv = FALSE;
2622	rw_rlock(&pvh_global_lock);
2623	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2624	rw_rlock(lock);
2625	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2626		if (PV_PMAP(pv) == pmap) {
2627			rv = TRUE;
2628			break;
2629		}
2630		loops++;
2631		if (loops >= 16)
2632			break;
2633	}
2634	rw_runlock(lock);
2635	rw_runlock(&pvh_global_lock);
2636	return (rv);
2637}
2638
2639/*
2640 *	pmap_page_wired_mappings:
2641 *
2642 *	Return the number of managed mappings to the given physical page
2643 *	that are wired.
2644 */
2645int
2646pmap_page_wired_mappings(vm_page_t m)
2647{
2648	struct rwlock *lock;
2649	pmap_t pmap;
2650	pt_entry_t *l3;
2651	pv_entry_t pv;
2652	int count, md_gen;
2653
2654	if ((m->oflags & VPO_UNMANAGED) != 0)
2655		return (0);
2656	rw_rlock(&pvh_global_lock);
2657	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2658	rw_rlock(lock);
2659restart:
2660	count = 0;
2661	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2662		pmap = PV_PMAP(pv);
2663		if (!PMAP_TRYLOCK(pmap)) {
2664			md_gen = m->md.pv_gen;
2665			rw_runlock(lock);
2666			PMAP_LOCK(pmap);
2667			rw_rlock(lock);
2668			if (md_gen != m->md.pv_gen) {
2669				PMAP_UNLOCK(pmap);
2670				goto restart;
2671			}
2672		}
2673		l3 = pmap_l3(pmap, pv->pv_va);
2674		if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0)
2675			count++;
2676		PMAP_UNLOCK(pmap);
2677	}
2678	rw_runlock(lock);
2679	rw_runlock(&pvh_global_lock);
2680	return (count);
2681}
2682
2683/*
2684 * Destroy all managed, non-wired mappings in the given user-space
2685 * pmap.  This pmap cannot be active on any processor besides the
2686 * caller.
2687 *
2688 * This function cannot be applied to the kernel pmap.  Moreover, it
2689 * is not intended for general use.  It is only to be used during
2690 * process termination.  Consequently, it can be implemented in ways
2691 * that make it faster than pmap_remove().  First, it can more quickly
2692 * destroy mappings by iterating over the pmap's collection of PV
2693 * entries, rather than searching the page table.  Second, it doesn't
2694 * have to test and clear the page table entries atomically, because
2695 * no processor is currently accessing the user address space.  In
2696 * particular, a page table entry's dirty bit won't change state once
2697 * this function starts.
2698 */
2699void
2700pmap_remove_pages(pmap_t pmap)
2701{
2702	pd_entry_t ptepde, *l2;
2703	pt_entry_t *l3, tl3;
2704	struct spglist free;
2705	vm_page_t m;
2706	pv_entry_t pv;
2707	struct pv_chunk *pc, *npc;
2708	struct rwlock *lock;
2709	int64_t bit;
2710	uint64_t inuse, bitmask;
2711	int allfree, field, freed, idx;
2712	vm_paddr_t pa;
2713
2714	lock = NULL;
2715
2716	SLIST_INIT(&free);
2717	rw_rlock(&pvh_global_lock);
2718	PMAP_LOCK(pmap);
2719	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2720		allfree = 1;
2721		freed = 0;
2722		for (field = 0; field < _NPCM; field++) {
2723			inuse = ~pc->pc_map[field] & pc_freemask[field];
2724			while (inuse != 0) {
2725				bit = ffsl(inuse) - 1;
2726				bitmask = 1UL << bit;
2727				idx = field * 64 + bit;
2728				pv = &pc->pc_pventry[idx];
2729				inuse &= ~bitmask;
2730
2731				l2 = pmap_l2(pmap, pv->pv_va);
2732				ptepde = pmap_load(l2);
2733				l3 = pmap_l2_to_l3(l2, pv->pv_va);
2734				tl3 = pmap_load(l3);
2735
2736/*
2737 * We cannot remove wired pages from a process' mapping at this time
2738 */
2739				if (tl3 & PTE_SW_WIRED) {
2740					allfree = 0;
2741					continue;
2742				}
2743
2744				pa = PTE_TO_PHYS(tl3);
2745				m = PHYS_TO_VM_PAGE(pa);
2746				KASSERT(m->phys_addr == pa,
2747				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
2748				    m, (uintmax_t)m->phys_addr,
2749				    (uintmax_t)tl3));
2750
2751				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
2752				    m < &vm_page_array[vm_page_array_size],
2753				    ("pmap_remove_pages: bad l3 %#jx",
2754				    (uintmax_t)tl3));
2755
2756				if (pmap_is_current(pmap) &&
2757				    pmap_l3_valid_cacheable(pmap_load(l3)))
2758					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2759				pmap_load_clear(l3);
2760				PTE_SYNC(l3);
2761				pmap_invalidate_page(pmap, pv->pv_va);
2762
2763				/*
2764				 * Update the vm_page_t clean/reference bits.
2765				 */
2766				if (pmap_page_dirty(tl3))
2767					vm_page_dirty(m);
2768
2769				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
2770
2771				/* Mark free */
2772				pc->pc_map[field] |= bitmask;
2773
2774				pmap_resident_count_dec(pmap, 1);
2775				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2776				m->md.pv_gen++;
2777
2778				pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free);
2779				freed++;
2780			}
2781		}
2782		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
2783		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
2784		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
2785		if (allfree) {
2786			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2787			free_pv_chunk(pc);
2788		}
2789	}
2790	pmap_invalidate_all(pmap);
2791	if (lock != NULL)
2792		rw_wunlock(lock);
2793	rw_runlock(&pvh_global_lock);
2794	PMAP_UNLOCK(pmap);
2795	pmap_free_zero_pages(&free);
2796}
2797
2798/*
2799 * This is used to check if a page has been accessed or modified. As we
2800 * don't have a bit to see if it has been modified we have to assume it
2801 * has been if the page is read/write.
2802 */
2803static boolean_t
2804pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
2805{
2806	struct rwlock *lock;
2807	pv_entry_t pv;
2808	pt_entry_t *l3, mask, value;
2809	pmap_t pmap;
2810	int md_gen;
2811	boolean_t rv;
2812
2813	rv = FALSE;
2814	rw_rlock(&pvh_global_lock);
2815	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2816	rw_rlock(lock);
2817restart:
2818	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2819		pmap = PV_PMAP(pv);
2820		if (!PMAP_TRYLOCK(pmap)) {
2821			md_gen = m->md.pv_gen;
2822			rw_runlock(lock);
2823			PMAP_LOCK(pmap);
2824			rw_rlock(lock);
2825			if (md_gen != m->md.pv_gen) {
2826				PMAP_UNLOCK(pmap);
2827				goto restart;
2828			}
2829		}
2830		l3 = pmap_l3(pmap, pv->pv_va);
2831		mask = 0;
2832		value = 0;
2833		if (modified) {
2834			mask |= PTE_DIRTY;
2835			value |= PTE_DIRTY;
2836		}
2837		if (accessed) {
2838			mask |= PTE_REF;
2839			value |= PTE_REF;
2840		}
2841
2842#if 0
2843		if (modified) {
2844			mask |= ATTR_AP_RW_BIT;
2845			value |= ATTR_AP(ATTR_AP_RW);
2846		}
2847		if (accessed) {
2848			mask |= ATTR_AF | ATTR_DESCR_MASK;
2849			value |= ATTR_AF | L3_PAGE;
2850		}
2851#endif
2852
2853		rv = (pmap_load(l3) & mask) == value;
2854		PMAP_UNLOCK(pmap);
2855		if (rv)
2856			goto out;
2857	}
2858out:
2859	rw_runlock(lock);
2860	rw_runlock(&pvh_global_lock);
2861	return (rv);
2862}
2863
2864/*
2865 *	pmap_is_modified:
2866 *
2867 *	Return whether or not the specified physical page was modified
2868 *	in any physical maps.
2869 */
2870boolean_t
2871pmap_is_modified(vm_page_t m)
2872{
2873
2874	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2875	    ("pmap_is_modified: page %p is not managed", m));
2876
2877	/*
2878	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2879	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2880	 * is clear, no PTEs can have PG_M set.
2881	 */
2882	VM_OBJECT_ASSERT_WLOCKED(m->object);
2883	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2884		return (FALSE);
2885	return (pmap_page_test_mappings(m, FALSE, TRUE));
2886}
2887
2888/*
2889 *	pmap_is_prefaultable:
2890 *
2891 *	Return whether or not the specified virtual address is eligible
2892 *	for prefault.
2893 */
2894boolean_t
2895pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2896{
2897	pt_entry_t *l3;
2898	boolean_t rv;
2899
2900	rv = FALSE;
2901	PMAP_LOCK(pmap);
2902	l3 = pmap_l3(pmap, addr);
2903	if (l3 != NULL && pmap_load(l3) != 0) {
2904		rv = TRUE;
2905	}
2906	PMAP_UNLOCK(pmap);
2907	return (rv);
2908}
2909
2910/*
2911 *	pmap_is_referenced:
2912 *
2913 *	Return whether or not the specified physical page was referenced
2914 *	in any physical maps.
2915 */
2916boolean_t
2917pmap_is_referenced(vm_page_t m)
2918{
2919
2920	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2921	    ("pmap_is_referenced: page %p is not managed", m));
2922	return (pmap_page_test_mappings(m, TRUE, FALSE));
2923}
2924
2925/*
2926 * Clear the write and modified bits in each of the given page's mappings.
2927 */
2928void
2929pmap_remove_write(vm_page_t m)
2930{
2931	pmap_t pmap;
2932	struct rwlock *lock;
2933	pv_entry_t pv;
2934	pt_entry_t *l3, oldl3;
2935	pt_entry_t newl3;
2936	int md_gen;
2937
2938	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2939	    ("pmap_remove_write: page %p is not managed", m));
2940
2941	/*
2942	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2943	 * set by another thread while the object is locked.  Thus,
2944	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2945	 */
2946	VM_OBJECT_ASSERT_WLOCKED(m->object);
2947	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2948		return;
2949	rw_rlock(&pvh_global_lock);
2950	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2951retry_pv_loop:
2952	rw_wlock(lock);
2953	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2954		pmap = PV_PMAP(pv);
2955		if (!PMAP_TRYLOCK(pmap)) {
2956			md_gen = m->md.pv_gen;
2957			rw_wunlock(lock);
2958			PMAP_LOCK(pmap);
2959			rw_wlock(lock);
2960			if (md_gen != m->md.pv_gen) {
2961				PMAP_UNLOCK(pmap);
2962				rw_wunlock(lock);
2963				goto retry_pv_loop;
2964			}
2965		}
2966		l3 = pmap_l3(pmap, pv->pv_va);
2967retry:
2968		oldl3 = pmap_load(l3);
2969
2970		if (pmap_is_write(oldl3)) {
2971			newl3 = oldl3 & ~(1 << PTE_TYPE_S);
2972			if (!atomic_cmpset_long(l3, oldl3, newl3))
2973				goto retry;
2974			/* TODO: use pmap_page_dirty(oldl3) ? */
2975			if ((oldl3 & PTE_REF) != 0)
2976				vm_page_dirty(m);
2977			pmap_invalidate_page(pmap, pv->pv_va);
2978		}
2979		PMAP_UNLOCK(pmap);
2980	}
2981	rw_wunlock(lock);
2982	vm_page_aflag_clear(m, PGA_WRITEABLE);
2983	rw_runlock(&pvh_global_lock);
2984}
2985
2986static __inline boolean_t
2987safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
2988{
2989
2990	return (FALSE);
2991}
2992
2993/*
2994 *	pmap_ts_referenced:
2995 *
2996 *	Return a count of reference bits for a page, clearing those bits.
2997 *	It is not necessary for every reference bit to be cleared, but it
2998 *	is necessary that 0 only be returned when there are truly no
2999 *	reference bits set.
3000 *
3001 *	As an optimization, update the page's dirty field if a modified bit is
3002 *	found while counting reference bits.  This opportunistic update can be
3003 *	performed at low cost and can eliminate the need for some future calls
3004 *	to pmap_is_modified().  However, since this function stops after
3005 *	finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
3006 *	dirty pages.  Those dirty pages will only be detected by a future call
3007 *	to pmap_is_modified().
3008 */
3009int
3010pmap_ts_referenced(vm_page_t m)
3011{
3012	pv_entry_t pv, pvf;
3013	pmap_t pmap;
3014	struct rwlock *lock;
3015	pd_entry_t *l2;
3016	pt_entry_t *l3, old_l3;
3017	vm_paddr_t pa;
3018	int cleared, md_gen, not_cleared;
3019	struct spglist free;
3020
3021	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3022	    ("pmap_ts_referenced: page %p is not managed", m));
3023	SLIST_INIT(&free);
3024	cleared = 0;
3025	pa = VM_PAGE_TO_PHYS(m);
3026	lock = PHYS_TO_PV_LIST_LOCK(pa);
3027	rw_rlock(&pvh_global_lock);
3028	rw_wlock(lock);
3029retry:
3030	not_cleared = 0;
3031	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
3032		goto out;
3033	pv = pvf;
3034	do {
3035		if (pvf == NULL)
3036			pvf = pv;
3037		pmap = PV_PMAP(pv);
3038		if (!PMAP_TRYLOCK(pmap)) {
3039			md_gen = m->md.pv_gen;
3040			rw_wunlock(lock);
3041			PMAP_LOCK(pmap);
3042			rw_wlock(lock);
3043			if (md_gen != m->md.pv_gen) {
3044				PMAP_UNLOCK(pmap);
3045				goto retry;
3046			}
3047		}
3048		l2 = pmap_l2(pmap, pv->pv_va);
3049
3050		KASSERT((pmap_load(l2) & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S),
3051		    ("pmap_ts_referenced: found an invalid l2 table"));
3052
3053		l3 = pmap_l2_to_l3(l2, pv->pv_va);
3054		old_l3 = pmap_load(l3);
3055		if (pmap_page_dirty(old_l3))
3056			vm_page_dirty(m);
3057		if ((old_l3 & PTE_REF) != 0) {
3058			if (safe_to_clear_referenced(pmap, old_l3)) {
3059				/*
3060				 * TODO: We don't handle the access flag
3061				 * at all. We need to be able to set it in
3062				 * the exception handler.
3063				 */
3064				panic("RISCVTODO: safe_to_clear_referenced\n");
3065			} else if ((old_l3 & PTE_SW_WIRED) == 0) {
3066				/*
3067				 * Wired pages cannot be paged out so
3068				 * doing accessed bit emulation for
3069				 * them is wasted effort. We do the
3070				 * hard work for unwired pages only.
3071				 */
3072				pmap_remove_l3(pmap, l3, pv->pv_va,
3073				    pmap_load(l2), &free, &lock);
3074				pmap_invalidate_page(pmap, pv->pv_va);
3075				cleared++;
3076				if (pvf == pv)
3077					pvf = NULL;
3078				pv = NULL;
3079				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
3080				    ("inconsistent pv lock %p %p for page %p",
3081				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
3082			} else
3083				not_cleared++;
3084		}
3085		PMAP_UNLOCK(pmap);
3086		/* Rotate the PV list if it has more than one entry. */
3087		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
3088			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
3089			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
3090			m->md.pv_gen++;
3091		}
3092	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
3093	    not_cleared < PMAP_TS_REFERENCED_MAX);
3094out:
3095	rw_wunlock(lock);
3096	rw_runlock(&pvh_global_lock);
3097	pmap_free_zero_pages(&free);
3098	return (cleared + not_cleared);
3099}
3100
3101/*
3102 *	Apply the given advice to the specified range of addresses within the
3103 *	given pmap.  Depending on the advice, clear the referenced and/or
3104 *	modified flags in each mapping and set the mapped page's dirty field.
3105 */
3106void
3107pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
3108{
3109}
3110
3111/*
3112 *	Clear the modify bits on the specified physical page.
3113 */
3114void
3115pmap_clear_modify(vm_page_t m)
3116{
3117
3118	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3119	    ("pmap_clear_modify: page %p is not managed", m));
3120	VM_OBJECT_ASSERT_WLOCKED(m->object);
3121	KASSERT(!vm_page_xbusied(m),
3122	    ("pmap_clear_modify: page %p is exclusive busied", m));
3123
3124	/*
3125	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
3126	 * If the object containing the page is locked and the page is not
3127	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
3128	 */
3129	if ((m->aflags & PGA_WRITEABLE) == 0)
3130		return;
3131
3132	/* RISCVTODO: We lack support for tracking if a page is modified */
3133}
3134
3135void *
3136pmap_mapbios(vm_paddr_t pa, vm_size_t size)
3137{
3138
3139        return ((void *)PHYS_TO_DMAP(pa));
3140}
3141
3142void
3143pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
3144{
3145}
3146
3147/*
3148 * Sets the memory attribute for the specified page.
3149 */
3150void
3151pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
3152{
3153
3154	m->md.pv_memattr = ma;
3155
3156	/*
3157	 * RISCVTODO: Implement the below (from the amd64 pmap)
3158	 * If "m" is a normal page, update its direct mapping.  This update
3159	 * can be relied upon to perform any cache operations that are
3160	 * required for data coherence.
3161	 */
3162	if ((m->flags & PG_FICTITIOUS) == 0 &&
3163	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
3164		panic("RISCVTODO: pmap_page_set_memattr");
3165}
3166
3167/*
3168 * perform the pmap work for mincore
3169 */
3170int
3171pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3172{
3173
3174	panic("RISCVTODO: pmap_mincore");
3175}
3176
3177void
3178pmap_activate(struct thread *td)
3179{
3180	pmap_t pmap;
3181
3182	critical_enter();
3183	pmap = vmspace_pmap(td->td_proc->p_vmspace);
3184	td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
3185
3186	__asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr));
3187
3188	pmap_invalidate_all(pmap);
3189	critical_exit();
3190}
3191
3192void
3193pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3194{
3195
3196	panic("RISCVTODO: pmap_sync_icache");
3197}
3198
3199/*
3200 *	Increase the starting virtual address of the given mapping if a
3201 *	different alignment might result in more superpage mappings.
3202 */
3203void
3204pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3205    vm_offset_t *addr, vm_size_t size)
3206{
3207}
3208
3209/**
3210 * Get the kernel virtual address of a set of physical pages. If there are
3211 * physical addresses not covered by the DMAP perform a transient mapping
3212 * that will be removed when calling pmap_unmap_io_transient.
3213 *
3214 * \param page        The pages the caller wishes to obtain the virtual
3215 *                    address on the kernel memory map.
3216 * \param vaddr       On return contains the kernel virtual memory address
3217 *                    of the pages passed in the page parameter.
3218 * \param count       Number of pages passed in.
3219 * \param can_fault   TRUE if the thread using the mapped pages can take
3220 *                    page faults, FALSE otherwise.
3221 *
3222 * \returns TRUE if the caller must call pmap_unmap_io_transient when
3223 *          finished or FALSE otherwise.
3224 *
3225 */
3226boolean_t
3227pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3228    boolean_t can_fault)
3229{
3230	vm_paddr_t paddr;
3231	boolean_t needs_mapping;
3232	int error, i;
3233
3234	/*
3235	 * Allocate any KVA space that we need, this is done in a separate
3236	 * loop to prevent calling vmem_alloc while pinned.
3237	 */
3238	needs_mapping = FALSE;
3239	for (i = 0; i < count; i++) {
3240		paddr = VM_PAGE_TO_PHYS(page[i]);
3241		if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
3242			error = vmem_alloc(kernel_arena, PAGE_SIZE,
3243			    M_BESTFIT | M_WAITOK, &vaddr[i]);
3244			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
3245			needs_mapping = TRUE;
3246		} else {
3247			vaddr[i] = PHYS_TO_DMAP(paddr);
3248		}
3249	}
3250
3251	/* Exit early if everything is covered by the DMAP */
3252	if (!needs_mapping)
3253		return (FALSE);
3254
3255	if (!can_fault)
3256		sched_pin();
3257	for (i = 0; i < count; i++) {
3258		paddr = VM_PAGE_TO_PHYS(page[i]);
3259		if (paddr >= DMAP_MAX_PHYSADDR) {
3260			panic(
3261			   "pmap_map_io_transient: TODO: Map out of DMAP data");
3262		}
3263	}
3264
3265	return (needs_mapping);
3266}
3267
3268void
3269pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3270    boolean_t can_fault)
3271{
3272	vm_paddr_t paddr;
3273	int i;
3274
3275	if (!can_fault)
3276		sched_unpin();
3277	for (i = 0; i < count; i++) {
3278		paddr = VM_PAGE_TO_PHYS(page[i]);
3279		if (paddr >= DMAP_MAX_PHYSADDR) {
3280			panic("RISCVTODO: pmap_unmap_io_transient: Unmap data");
3281		}
3282	}
3283}
3284