pmap.c revision 293853
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: stable/10/sys/sparc64/sparc64/pmap.c 293853 2016-01-13 21:38:52Z marius $");
42
43/*
44 * Manages physical address maps.
45 *
46 * Since the information managed by this module is also stored by the
47 * logical address mapping module, this module may throw away valid virtual
48 * to physical mappings at almost any time.  However, invalidations of
49 * mappings must be done as requested.
50 *
51 * In order to cope with hardware architectures which make virtual to
52 * physical map invalidates expensive, this module may delay invalidate
53 * reduced protection operations until such time as they are actually
54 * necessary.  This module is given full information as to which processors
55 * are currently using which maps, and to when physical maps must be made
56 * correct.
57 */
58
59#include "opt_kstack_pages.h"
60#include "opt_pmap.h"
61
62#include <sys/param.h>
63#include <sys/kernel.h>
64#include <sys/ktr.h>
65#include <sys/lock.h>
66#include <sys/msgbuf.h>
67#include <sys/mutex.h>
68#include <sys/proc.h>
69#include <sys/rwlock.h>
70#include <sys/smp.h>
71#include <sys/sysctl.h>
72#include <sys/systm.h>
73#include <sys/vmmeter.h>
74
75#include <dev/ofw/openfirm.h>
76
77#include <vm/vm.h>
78#include <vm/vm_param.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_page.h>
81#include <vm/vm_map.h>
82#include <vm/vm_object.h>
83#include <vm/vm_extern.h>
84#include <vm/vm_pageout.h>
85#include <vm/vm_pager.h>
86#include <vm/vm_phys.h>
87
88#include <machine/cache.h>
89#include <machine/frame.h>
90#include <machine/instr.h>
91#include <machine/md_var.h>
92#include <machine/metadata.h>
93#include <machine/ofw_mem.h>
94#include <machine/smp.h>
95#include <machine/tlb.h>
96#include <machine/tte.h>
97#include <machine/tsb.h>
98#include <machine/ver.h>
99
100/*
101 * Virtual address of message buffer
102 */
103struct msgbuf *msgbufp;
104
105/*
106 * Map of physical memory reagions
107 */
108vm_paddr_t phys_avail[128];
109static struct ofw_mem_region mra[128];
110struct ofw_mem_region sparc64_memreg[128];
111int sparc64_nmemreg;
112static struct ofw_map translations[128];
113static int translations_size;
114
115static vm_offset_t pmap_idle_map;
116static vm_offset_t pmap_temp_map_1;
117static vm_offset_t pmap_temp_map_2;
118
119/*
120 * First and last available kernel virtual addresses
121 */
122vm_offset_t virtual_avail;
123vm_offset_t virtual_end;
124vm_offset_t kernel_vm_end;
125
126vm_offset_t vm_max_kernel_address;
127
128/*
129 * Kernel pmap
130 */
131struct pmap kernel_pmap_store;
132
133struct rwlock_padalign tte_list_global_lock;
134
135/*
136 * Allocate physical memory for use in pmap_bootstrap.
137 */
138static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
139
140static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
141static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
142static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
143    struct tte *tp, vm_offset_t va);
144static int pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp,
145    vm_offset_t va);
146
147/*
148 * Map the given physical page at the specified virtual address in the
149 * target pmap with the protection requested.  If specified the page
150 * will be wired down.
151 *
152 * The page queues and pmap must be locked.
153 */
154static int pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
155    vm_prot_t prot, u_int flags, int8_t psind);
156
157extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
158extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
159extern int tl1_dmmu_miss_patch_asi_1[];
160extern int tl1_dmmu_miss_patch_quad_ldd_1[];
161extern int tl1_dmmu_miss_patch_tsb_1[];
162extern int tl1_dmmu_miss_patch_tsb_2[];
163extern int tl1_dmmu_miss_patch_tsb_mask_1[];
164extern int tl1_dmmu_miss_patch_tsb_mask_2[];
165extern int tl1_dmmu_prot_patch_asi_1[];
166extern int tl1_dmmu_prot_patch_quad_ldd_1[];
167extern int tl1_dmmu_prot_patch_tsb_1[];
168extern int tl1_dmmu_prot_patch_tsb_2[];
169extern int tl1_dmmu_prot_patch_tsb_mask_1[];
170extern int tl1_dmmu_prot_patch_tsb_mask_2[];
171extern int tl1_immu_miss_patch_asi_1[];
172extern int tl1_immu_miss_patch_quad_ldd_1[];
173extern int tl1_immu_miss_patch_tsb_1[];
174extern int tl1_immu_miss_patch_tsb_2[];
175extern int tl1_immu_miss_patch_tsb_mask_1[];
176extern int tl1_immu_miss_patch_tsb_mask_2[];
177
178/*
179 * If user pmap is processed with pmap_remove and with pmap_remove and the
180 * resident count drops to 0, there are no more pages to remove, so we
181 * need not continue.
182 */
183#define	PMAP_REMOVE_DONE(pm) \
184	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
185
186/*
187 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
188 * and pmap_protect() instead of trying each virtual address.
189 */
190#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
191
192SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
193
194PMAP_STATS_VAR(pmap_nenter);
195PMAP_STATS_VAR(pmap_nenter_update);
196PMAP_STATS_VAR(pmap_nenter_replace);
197PMAP_STATS_VAR(pmap_nenter_new);
198PMAP_STATS_VAR(pmap_nkenter);
199PMAP_STATS_VAR(pmap_nkenter_oc);
200PMAP_STATS_VAR(pmap_nkenter_stupid);
201PMAP_STATS_VAR(pmap_nkremove);
202PMAP_STATS_VAR(pmap_nqenter);
203PMAP_STATS_VAR(pmap_nqremove);
204PMAP_STATS_VAR(pmap_ncache_enter);
205PMAP_STATS_VAR(pmap_ncache_enter_c);
206PMAP_STATS_VAR(pmap_ncache_enter_oc);
207PMAP_STATS_VAR(pmap_ncache_enter_cc);
208PMAP_STATS_VAR(pmap_ncache_enter_coc);
209PMAP_STATS_VAR(pmap_ncache_enter_nc);
210PMAP_STATS_VAR(pmap_ncache_enter_cnc);
211PMAP_STATS_VAR(pmap_ncache_remove);
212PMAP_STATS_VAR(pmap_ncache_remove_c);
213PMAP_STATS_VAR(pmap_ncache_remove_oc);
214PMAP_STATS_VAR(pmap_ncache_remove_cc);
215PMAP_STATS_VAR(pmap_ncache_remove_coc);
216PMAP_STATS_VAR(pmap_ncache_remove_nc);
217PMAP_STATS_VAR(pmap_nzero_page);
218PMAP_STATS_VAR(pmap_nzero_page_c);
219PMAP_STATS_VAR(pmap_nzero_page_oc);
220PMAP_STATS_VAR(pmap_nzero_page_nc);
221PMAP_STATS_VAR(pmap_nzero_page_area);
222PMAP_STATS_VAR(pmap_nzero_page_area_c);
223PMAP_STATS_VAR(pmap_nzero_page_area_oc);
224PMAP_STATS_VAR(pmap_nzero_page_area_nc);
225PMAP_STATS_VAR(pmap_nzero_page_idle);
226PMAP_STATS_VAR(pmap_nzero_page_idle_c);
227PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
228PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
229PMAP_STATS_VAR(pmap_ncopy_page);
230PMAP_STATS_VAR(pmap_ncopy_page_c);
231PMAP_STATS_VAR(pmap_ncopy_page_oc);
232PMAP_STATS_VAR(pmap_ncopy_page_nc);
233PMAP_STATS_VAR(pmap_ncopy_page_dc);
234PMAP_STATS_VAR(pmap_ncopy_page_doc);
235PMAP_STATS_VAR(pmap_ncopy_page_sc);
236PMAP_STATS_VAR(pmap_ncopy_page_soc);
237
238PMAP_STATS_VAR(pmap_nnew_thread);
239PMAP_STATS_VAR(pmap_nnew_thread_oc);
240
241static inline u_long dtlb_get_data(u_int tlb, u_int slot);
242
243/*
244 * Quick sort callout for comparing memory regions
245 */
246static int mr_cmp(const void *a, const void *b);
247static int om_cmp(const void *a, const void *b);
248
249static int
250mr_cmp(const void *a, const void *b)
251{
252	const struct ofw_mem_region *mra;
253	const struct ofw_mem_region *mrb;
254
255	mra = a;
256	mrb = b;
257	if (mra->mr_start < mrb->mr_start)
258		return (-1);
259	else if (mra->mr_start > mrb->mr_start)
260		return (1);
261	else
262		return (0);
263}
264
265static int
266om_cmp(const void *a, const void *b)
267{
268	const struct ofw_map *oma;
269	const struct ofw_map *omb;
270
271	oma = a;
272	omb = b;
273	if (oma->om_start < omb->om_start)
274		return (-1);
275	else if (oma->om_start > omb->om_start)
276		return (1);
277	else
278		return (0);
279}
280
281static inline u_long
282dtlb_get_data(u_int tlb, u_int slot)
283{
284	u_long data;
285	register_t s;
286
287	slot = TLB_DAR_SLOT(tlb, slot);
288	/*
289	 * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
290	 * work around errata of USIII and beyond.
291	 */
292	s = intr_disable();
293	(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
294	data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
295	intr_restore(s);
296	return (data);
297}
298
299/*
300 * Bootstrap the system enough to run with virtual memory.
301 */
302void
303pmap_bootstrap(u_int cpu_impl)
304{
305	struct pmap *pm;
306	struct tte *tp;
307	vm_offset_t off;
308	vm_offset_t va;
309	vm_paddr_t pa;
310	vm_size_t physsz;
311	vm_size_t virtsz;
312	u_long data;
313	u_long vpn;
314	phandle_t pmem;
315	phandle_t vmem;
316	u_int dtlb_slots_avail;
317	int i;
318	int j;
319	int sz;
320	uint32_t asi;
321	uint32_t colors;
322	uint32_t ldd;
323
324	/*
325	 * Set the kernel context.
326	 */
327	pmap_set_kctx();
328
329	colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
330
331	/*
332	 * Find out what physical memory is available from the PROM and
333	 * initialize the phys_avail array.  This must be done before
334	 * pmap_bootstrap_alloc is called.
335	 */
336	if ((pmem = OF_finddevice("/memory")) == -1)
337		OF_panic("%s: finddevice /memory", __func__);
338	if ((sz = OF_getproplen(pmem, "available")) == -1)
339		OF_panic("%s: getproplen /memory/available", __func__);
340	if (sizeof(phys_avail) < sz)
341		OF_panic("%s: phys_avail too small", __func__);
342	if (sizeof(mra) < sz)
343		OF_panic("%s: mra too small", __func__);
344	bzero(mra, sz);
345	if (OF_getprop(pmem, "available", mra, sz) == -1)
346		OF_panic("%s: getprop /memory/available", __func__);
347	sz /= sizeof(*mra);
348#ifdef DIAGNOSTIC
349	OF_printf("pmap_bootstrap: physical memory\n");
350#endif
351	qsort(mra, sz, sizeof (*mra), mr_cmp);
352	physsz = 0;
353	getenv_quad("hw.physmem", &physmem);
354	physmem = btoc(physmem);
355	for (i = 0, j = 0; i < sz; i++, j += 2) {
356#ifdef DIAGNOSTIC
357		OF_printf("start=%#lx size=%#lx\n", mra[i].mr_start,
358		    mra[i].mr_size);
359#endif
360		if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
361			if (btoc(physsz) < physmem) {
362				phys_avail[j] = mra[i].mr_start;
363				phys_avail[j + 1] = mra[i].mr_start +
364				    (ctob(physmem) - physsz);
365				physsz = ctob(physmem);
366			}
367			break;
368		}
369		phys_avail[j] = mra[i].mr_start;
370		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
371		physsz += mra[i].mr_size;
372	}
373	physmem = btoc(physsz);
374
375	/*
376	 * Calculate the size of kernel virtual memory, and the size and mask
377	 * for the kernel TSB based on the phsyical memory size but limited
378	 * by the amount of dTLB slots available for locked entries if we have
379	 * to lock the TSB in the TLB (given that for spitfire-class CPUs all
380	 * of the dt64 slots can hold locked entries but there is no large
381	 * dTLB for unlocked ones, we don't use more than half of it for the
382	 * TSB).
383	 * Note that for reasons unknown OpenSolaris doesn't take advantage of
384	 * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
385	 * public documentation is available for these, the latter just might
386	 * not support it, yet.
387	 */
388	if (cpu_impl == CPU_IMPL_SPARC64V ||
389	    cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
390		tsb_kernel_ldd_phys = 1;
391		virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
392		    (PAGE_SHIFT - TTE_SHIFT));
393	} else {
394		dtlb_slots_avail = 0;
395		for (i = 0; i < dtlb_slots; i++) {
396			data = dtlb_get_data(cpu_impl ==
397			    CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
398			    TLB_DAR_T32, i);
399			if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
400				dtlb_slots_avail++;
401		}
402#ifdef SMP
403		dtlb_slots_avail -= PCPU_PAGES;
404#endif
405		if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
406		    cpu_impl < CPU_IMPL_ULTRASPARCIII)
407			dtlb_slots_avail /= 2;
408		virtsz = roundup(physsz, PAGE_SIZE_4M <<
409		    (PAGE_SHIFT - TTE_SHIFT));
410		virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
411		    (PAGE_SHIFT - TTE_SHIFT));
412	}
413	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
414	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
415	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
416
417	/*
418	 * Allocate the kernel TSB and lock it in the TLB if necessary.
419	 */
420	pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
421	if (pa & PAGE_MASK_4M)
422		OF_panic("%s: TSB unaligned", __func__);
423	tsb_kernel_phys = pa;
424	if (tsb_kernel_ldd_phys == 0) {
425		tsb_kernel =
426		    (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
427		pmap_map_tsb();
428		bzero(tsb_kernel, tsb_kernel_size);
429	} else {
430		tsb_kernel =
431		    (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
432		aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
433	}
434
435	/*
436	 * Allocate and map the dynamic per-CPU area for the BSP.
437	 */
438	pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
439	dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
440
441	/*
442	 * Allocate and map the message buffer.
443	 */
444	pa = pmap_bootstrap_alloc(msgbufsize, colors);
445	msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
446
447	/*
448	 * Patch the TSB addresses and mask as well as the ASIs used to load
449	 * it into the trap table.
450	 */
451
452#define	LDDA_R_I_R(rd, imm_asi, rs1, rs2)				\
453	(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |	\
454	    EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |	\
455	    EIF_F3_RS2(rs2))
456#define	OR_R_I_R(rd, imm13, rs1)					\
457	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |	\
458	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
459#define	SETHI(rd, imm22)						\
460	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |	\
461	    EIF_IMM((imm22) >> 10, 22))
462#define	WR_R_I(rd, imm13, rs1)						\
463	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |	\
464	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
465
466#define	PATCH_ASI(addr, asi) do {					\
467	if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,			\
468	    IF_F3_RS1(addr[0])))					\
469		OF_panic("%s: patched instructions have changed",	\
470		    __func__);						\
471	addr[0] |= EIF_IMM((asi), 13);					\
472	flush(addr);							\
473} while (0)
474
475#define	PATCH_LDD(addr, asi) do {					\
476	if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,		\
477	    IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))			\
478		OF_panic("%s: patched instructions have changed",	\
479		    __func__);						\
480	addr[0] |= EIF_F3_IMM_ASI(asi);					\
481	flush(addr);							\
482} while (0)
483
484#define	PATCH_TSB(addr, val) do {					\
485	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
486	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
487	    IF_F3_RS1(addr[1]))	||					\
488	    addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))			\
489		OF_panic("%s: patched instructions have changed",	\
490		    __func__);						\
491	addr[0] |= EIF_IMM((val) >> 42, 22);				\
492	addr[1] |= EIF_IMM((val) >> 32, 10);				\
493	addr[3] |= EIF_IMM((val) >> 10, 22);				\
494	flush(addr);							\
495	flush(addr + 1);						\
496	flush(addr + 3);						\
497} while (0)
498
499#define	PATCH_TSB_MASK(addr, val) do {					\
500	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
501	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
502	    IF_F3_RS1(addr[1])))					\
503		OF_panic("%s: patched instructions have changed",	\
504		    __func__);						\
505	addr[0] |= EIF_IMM((val) >> 10, 22);				\
506	addr[1] |= EIF_IMM((val), 10);					\
507	flush(addr);							\
508	flush(addr + 1);						\
509} while (0)
510
511	if (tsb_kernel_ldd_phys == 0) {
512		asi = ASI_N;
513		ldd = ASI_NUCLEUS_QUAD_LDD;
514		off = (vm_offset_t)tsb_kernel;
515	} else {
516		asi = ASI_PHYS_USE_EC;
517		ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
518		off = (vm_offset_t)tsb_kernel_phys;
519	}
520	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
521	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
522	    tsb_kernel_phys + tsb_kernel_size - 1);
523	PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
524	PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
525	PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
526	PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
527	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
528	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
529	PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
530	PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
531	PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
532	PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
533	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
534	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
535	PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
536	PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
537	PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
538	PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
539	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
540	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
541
542	/*
543	 * Enter fake 8k pages for the 4MB kernel pages, so that
544	 * pmap_kextract() will work for them.
545	 */
546	for (i = 0; i < kernel_tlb_slots; i++) {
547		pa = kernel_tlbs[i].te_pa;
548		va = kernel_tlbs[i].te_va;
549		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
550			tp = tsb_kvtotte(va + off);
551			vpn = TV_VPN(va + off, TS_8K);
552			data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
553			    TD_SW | TD_CP | TD_CV | TD_P | TD_W;
554			pmap_bootstrap_set_tte(tp, vpn, data);
555		}
556	}
557
558	/*
559	 * Set the start and end of KVA.  The kernel is loaded starting
560	 * at the first available 4MB super page, so we advance to the
561	 * end of the last one used for it.
562	 */
563	virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
564	virtual_end = vm_max_kernel_address;
565	kernel_vm_end = vm_max_kernel_address;
566
567	/*
568	 * Allocate kva space for temporary mappings.
569	 */
570	pmap_idle_map = virtual_avail;
571	virtual_avail += PAGE_SIZE * colors;
572	pmap_temp_map_1 = virtual_avail;
573	virtual_avail += PAGE_SIZE * colors;
574	pmap_temp_map_2 = virtual_avail;
575	virtual_avail += PAGE_SIZE * colors;
576
577	/*
578	 * Allocate a kernel stack with guard page for thread0 and map it
579	 * into the kernel TSB.  We must ensure that the virtual address is
580	 * colored properly for corresponding CPUs, since we're allocating
581	 * from phys_avail so the memory won't have an associated vm_page_t.
582	 */
583	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
584	kstack0_phys = pa;
585	virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
586	kstack0 = virtual_avail;
587	virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
588	if (dcache_color_ignore == 0)
589		KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
590		    ("pmap_bootstrap: kstack0 miscolored"));
591	for (i = 0; i < KSTACK_PAGES; i++) {
592		pa = kstack0_phys + i * PAGE_SIZE;
593		va = kstack0 + i * PAGE_SIZE;
594		tp = tsb_kvtotte(va);
595		vpn = TV_VPN(va, TS_8K);
596		data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
597		    TD_CV | TD_P | TD_W;
598		pmap_bootstrap_set_tte(tp, vpn, data);
599	}
600
601	/*
602	 * Calculate the last available physical address.
603	 */
604	for (i = 0; phys_avail[i + 2] != 0; i += 2)
605		;
606	Maxmem = sparc64_btop(phys_avail[i + 1]);
607
608	/*
609	 * Add the PROM mappings to the kernel TSB.
610	 */
611	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
612		OF_panic("%s: finddevice /virtual-memory", __func__);
613	if ((sz = OF_getproplen(vmem, "translations")) == -1)
614		OF_panic("%s: getproplen translations", __func__);
615	if (sizeof(translations) < sz)
616		OF_panic("%s: translations too small", __func__);
617	bzero(translations, sz);
618	if (OF_getprop(vmem, "translations", translations, sz) == -1)
619		OF_panic("%s: getprop /virtual-memory/translations",
620		    __func__);
621	sz /= sizeof(*translations);
622	translations_size = sz;
623#ifdef DIAGNOSTIC
624	OF_printf("pmap_bootstrap: translations\n");
625#endif
626	qsort(translations, sz, sizeof (*translations), om_cmp);
627	for (i = 0; i < sz; i++) {
628#ifdef DIAGNOSTIC
629		OF_printf("translation: start=%#lx size=%#lx tte=%#lx\n",
630		    translations[i].om_start, translations[i].om_size,
631		    translations[i].om_tte);
632#endif
633		if ((translations[i].om_tte & TD_V) == 0)
634			continue;
635		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
636		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
637			continue;
638		for (off = 0; off < translations[i].om_size;
639		    off += PAGE_SIZE) {
640			va = translations[i].om_start + off;
641			tp = tsb_kvtotte(va);
642			vpn = TV_VPN(va, TS_8K);
643			data = ((translations[i].om_tte &
644			    ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
645			    (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
646			    cpu_impl < CPU_IMPL_ULTRASPARCIII ?
647			    (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
648			    (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
649			    (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
650			    off;
651			pmap_bootstrap_set_tte(tp, vpn, data);
652		}
653	}
654
655	/*
656	 * Get the available physical memory ranges from /memory/reg.  These
657	 * are only used for kernel dumps, but it may not be wise to do PROM
658	 * calls in that situation.
659	 */
660	if ((sz = OF_getproplen(pmem, "reg")) == -1)
661		OF_panic("%s: getproplen /memory/reg", __func__);
662	if (sizeof(sparc64_memreg) < sz)
663		OF_panic("%s: sparc64_memreg too small", __func__);
664	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
665		OF_panic("%s: getprop /memory/reg", __func__);
666	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
667
668	/*
669	 * Initialize the kernel pmap (which is statically allocated).
670	 */
671	pm = kernel_pmap;
672	PMAP_LOCK_INIT(pm);
673	for (i = 0; i < MAXCPU; i++)
674		pm->pm_context[i] = TLB_CTX_KERNEL;
675	CPU_FILL(&pm->pm_active);
676
677	/*
678	 * Initialize the global tte list lock, which is more commonly
679	 * known as the pmap pv global lock.
680	 */
681	rw_init(&tte_list_global_lock, "pmap pv global");
682
683	/*
684	 * Flush all non-locked TLB entries possibly left over by the
685	 * firmware.
686	 */
687	tlb_flush_nonlocked();
688}
689
690/*
691 * Map the 4MB kernel TSB pages.
692 */
693void
694pmap_map_tsb(void)
695{
696	vm_offset_t va;
697	vm_paddr_t pa;
698	u_long data;
699	int i;
700
701	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
702		va = (vm_offset_t)tsb_kernel + i;
703		pa = tsb_kernel_phys + i;
704		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
705		    TD_P | TD_W;
706		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
707		    TLB_TAR_CTX(TLB_CTX_KERNEL));
708		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
709	}
710}
711
712/*
713 * Set the secondary context to be the kernel context (needed for FP block
714 * operations in the kernel).
715 */
716void
717pmap_set_kctx(void)
718{
719
720	stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
721	    TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
722	flush(KERNBASE);
723}
724
725/*
726 * Allocate a physical page of memory directly from the phys_avail map.
727 * Can only be called from pmap_bootstrap before avail start and end are
728 * calculated.
729 */
730static vm_paddr_t
731pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
732{
733	vm_paddr_t pa;
734	int i;
735
736	size = roundup(size, PAGE_SIZE * colors);
737	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
738		if (phys_avail[i + 1] - phys_avail[i] < size)
739			continue;
740		pa = phys_avail[i];
741		phys_avail[i] += size;
742		return (pa);
743	}
744	OF_panic("%s: no suitable region found", __func__);
745}
746
747/*
748 * Set a TTE.  This function is intended as a helper when tsb_kernel is
749 * direct-mapped but we haven't taken over the trap table, yet, as it's the
750 * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
751 * the kernel TSB.
752 */
753void
754pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
755{
756
757	if (tsb_kernel_ldd_phys == 0) {
758		tp->tte_vpn = vpn;
759		tp->tte_data = data;
760	} else {
761		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
762		    ASI_PHYS_USE_EC, vpn);
763		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
764		    ASI_PHYS_USE_EC, data);
765	}
766}
767
768/*
769 * Initialize a vm_page's machine-dependent fields.
770 */
771void
772pmap_page_init(vm_page_t m)
773{
774
775	TAILQ_INIT(&m->md.tte_list);
776	m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
777	m->md.pmap = NULL;
778}
779
780/*
781 * Initialize the pmap module.
782 */
783void
784pmap_init(void)
785{
786	vm_offset_t addr;
787	vm_size_t size;
788	int result;
789	int i;
790
791	for (i = 0; i < translations_size; i++) {
792		addr = translations[i].om_start;
793		size = translations[i].om_size;
794		if ((translations[i].om_tte & TD_V) == 0)
795			continue;
796		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
797			continue;
798		result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
799		    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
800		if (result != KERN_SUCCESS || addr != translations[i].om_start)
801			panic("pmap_init: vm_map_find");
802	}
803}
804
805/*
806 * Extract the physical page address associated with the given
807 * map/virtual_address pair.
808 */
809vm_paddr_t
810pmap_extract(pmap_t pm, vm_offset_t va)
811{
812	struct tte *tp;
813	vm_paddr_t pa;
814
815	if (pm == kernel_pmap)
816		return (pmap_kextract(va));
817	PMAP_LOCK(pm);
818	tp = tsb_tte_lookup(pm, va);
819	if (tp == NULL)
820		pa = 0;
821	else
822		pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
823	PMAP_UNLOCK(pm);
824	return (pa);
825}
826
827/*
828 * Atomically extract and hold the physical page with the given
829 * pmap and virtual address pair if that mapping permits the given
830 * protection.
831 */
832vm_page_t
833pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
834{
835	struct tte *tp;
836	vm_page_t m;
837	vm_paddr_t pa;
838
839	m = NULL;
840	pa = 0;
841	PMAP_LOCK(pm);
842retry:
843	if (pm == kernel_pmap) {
844		if (va >= VM_MIN_DIRECT_ADDRESS) {
845			tp = NULL;
846			m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
847			(void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
848			    &pa);
849			vm_page_hold(m);
850		} else {
851			tp = tsb_kvtotte(va);
852			if ((tp->tte_data & TD_V) == 0)
853				tp = NULL;
854		}
855	} else
856		tp = tsb_tte_lookup(pm, va);
857	if (tp != NULL && ((tp->tte_data & TD_SW) ||
858	    (prot & VM_PROT_WRITE) == 0)) {
859		if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
860			goto retry;
861		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
862		vm_page_hold(m);
863	}
864	PA_UNLOCK_COND(pa);
865	PMAP_UNLOCK(pm);
866	return (m);
867}
868
869/*
870 * Extract the physical page address associated with the given kernel virtual
871 * address.
872 */
873vm_paddr_t
874pmap_kextract(vm_offset_t va)
875{
876	struct tte *tp;
877
878	if (va >= VM_MIN_DIRECT_ADDRESS)
879		return (TLB_DIRECT_TO_PHYS(va));
880	tp = tsb_kvtotte(va);
881	if ((tp->tte_data & TD_V) == 0)
882		return (0);
883	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
884}
885
886int
887pmap_cache_enter(vm_page_t m, vm_offset_t va)
888{
889	struct tte *tp;
890	int color;
891
892	rw_assert(&tte_list_global_lock, RA_WLOCKED);
893	KASSERT((m->flags & PG_FICTITIOUS) == 0,
894	    ("pmap_cache_enter: fake page"));
895	PMAP_STATS_INC(pmap_ncache_enter);
896
897	if (dcache_color_ignore != 0)
898		return (1);
899
900	/*
901	 * Find the color for this virtual address and note the added mapping.
902	 */
903	color = DCACHE_COLOR(va);
904	m->md.colors[color]++;
905
906	/*
907	 * If all existing mappings have the same color, the mapping is
908	 * cacheable.
909	 */
910	if (m->md.color == color) {
911		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
912		    ("pmap_cache_enter: cacheable, mappings of other color"));
913		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
914			PMAP_STATS_INC(pmap_ncache_enter_c);
915		else
916			PMAP_STATS_INC(pmap_ncache_enter_oc);
917		return (1);
918	}
919
920	/*
921	 * If there are no mappings of the other color, and the page still has
922	 * the wrong color, this must be a new mapping.  Change the color to
923	 * match the new mapping, which is cacheable.  We must flush the page
924	 * from the cache now.
925	 */
926	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
927		KASSERT(m->md.colors[color] == 1,
928		    ("pmap_cache_enter: changing color, not new mapping"));
929		dcache_page_inval(VM_PAGE_TO_PHYS(m));
930		m->md.color = color;
931		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
932			PMAP_STATS_INC(pmap_ncache_enter_cc);
933		else
934			PMAP_STATS_INC(pmap_ncache_enter_coc);
935		return (1);
936	}
937
938	/*
939	 * If the mapping is already non-cacheable, just return.
940	 */
941	if (m->md.color == -1) {
942		PMAP_STATS_INC(pmap_ncache_enter_nc);
943		return (0);
944	}
945
946	PMAP_STATS_INC(pmap_ncache_enter_cnc);
947
948	/*
949	 * Mark all mappings as uncacheable, flush any lines with the other
950	 * color out of the dcache, and set the color to none (-1).
951	 */
952	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
953		atomic_clear_long(&tp->tte_data, TD_CV);
954		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
955	}
956	dcache_page_inval(VM_PAGE_TO_PHYS(m));
957	m->md.color = -1;
958	return (0);
959}
960
961static void
962pmap_cache_remove(vm_page_t m, vm_offset_t va)
963{
964	struct tte *tp;
965	int color;
966
967	rw_assert(&tte_list_global_lock, RA_WLOCKED);
968	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
969	    m->md.colors[DCACHE_COLOR(va)]);
970	KASSERT((m->flags & PG_FICTITIOUS) == 0,
971	    ("pmap_cache_remove: fake page"));
972	PMAP_STATS_INC(pmap_ncache_remove);
973
974	if (dcache_color_ignore != 0)
975		return;
976
977	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
978	    ("pmap_cache_remove: no mappings %d <= 0",
979	    m->md.colors[DCACHE_COLOR(va)]));
980
981	/*
982	 * Find the color for this virtual address and note the removal of
983	 * the mapping.
984	 */
985	color = DCACHE_COLOR(va);
986	m->md.colors[color]--;
987
988	/*
989	 * If the page is cacheable, just return and keep the same color, even
990	 * if there are no longer any mappings.
991	 */
992	if (m->md.color != -1) {
993		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
994			PMAP_STATS_INC(pmap_ncache_remove_c);
995		else
996			PMAP_STATS_INC(pmap_ncache_remove_oc);
997		return;
998	}
999
1000	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
1001	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
1002
1003	/*
1004	 * If the page is not cacheable (color is -1), and the number of
1005	 * mappings for this color is not zero, just return.  There are
1006	 * mappings of the other color still, so remain non-cacheable.
1007	 */
1008	if (m->md.colors[color] != 0) {
1009		PMAP_STATS_INC(pmap_ncache_remove_nc);
1010		return;
1011	}
1012
1013	/*
1014	 * The number of mappings for this color is now zero.  Recache the
1015	 * other colored mappings, and change the page color to the other
1016	 * color.  There should be no lines in the data cache for this page,
1017	 * so flushing should not be needed.
1018	 */
1019	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1020		atomic_set_long(&tp->tte_data, TD_CV);
1021		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1022	}
1023	m->md.color = DCACHE_OTHER_COLOR(color);
1024
1025	if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1026		PMAP_STATS_INC(pmap_ncache_remove_cc);
1027	else
1028		PMAP_STATS_INC(pmap_ncache_remove_coc);
1029}
1030
1031/*
1032 * Map a wired page into kernel virtual address space.
1033 */
1034void
1035pmap_kenter(vm_offset_t va, vm_page_t m)
1036{
1037	vm_offset_t ova;
1038	struct tte *tp;
1039	vm_page_t om;
1040	u_long data;
1041
1042	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1043	PMAP_STATS_INC(pmap_nkenter);
1044	tp = tsb_kvtotte(va);
1045	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1046	    va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1047	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1048		CTR5(KTR_SPARE2,
1049	"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1050		    va, VM_PAGE_TO_PHYS(m), m->object,
1051		    m->object ? m->object->type : -1,
1052		    m->pindex);
1053		PMAP_STATS_INC(pmap_nkenter_oc);
1054	}
1055	if ((tp->tte_data & TD_V) != 0) {
1056		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1057		ova = TTE_GET_VA(tp);
1058		if (m == om && va == ova) {
1059			PMAP_STATS_INC(pmap_nkenter_stupid);
1060			return;
1061		}
1062		TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1063		pmap_cache_remove(om, ova);
1064		if (va != ova)
1065			tlb_page_demap(kernel_pmap, ova);
1066	}
1067	data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1068	    TD_P | TD_W;
1069	if (pmap_cache_enter(m, va) != 0)
1070		data |= TD_CV;
1071	tp->tte_vpn = TV_VPN(va, TS_8K);
1072	tp->tte_data = data;
1073	TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1074}
1075
1076/*
1077 * Map a wired page into kernel virtual address space.  This additionally
1078 * takes a flag argument which is or'ed to the TTE data.  This is used by
1079 * sparc64_bus_mem_map().
1080 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1081 * to flush entries that might still be in the cache, if applicable.
1082 */
1083void
1084pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1085{
1086	struct tte *tp;
1087
1088	tp = tsb_kvtotte(va);
1089	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1090	    va, pa, tp, tp->tte_data);
1091	tp->tte_vpn = TV_VPN(va, TS_8K);
1092	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1093}
1094
1095/*
1096 * Remove a wired page from kernel virtual address space.
1097 */
1098void
1099pmap_kremove(vm_offset_t va)
1100{
1101	struct tte *tp;
1102	vm_page_t m;
1103
1104	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1105	PMAP_STATS_INC(pmap_nkremove);
1106	tp = tsb_kvtotte(va);
1107	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1108	    tp->tte_data);
1109	if ((tp->tte_data & TD_V) == 0)
1110		return;
1111	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1112	TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1113	pmap_cache_remove(m, va);
1114	TTE_ZERO(tp);
1115}
1116
1117/*
1118 * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1119 */
1120void
1121pmap_kremove_flags(vm_offset_t va)
1122{
1123	struct tte *tp;
1124
1125	tp = tsb_kvtotte(va);
1126	CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1127	    tp->tte_data);
1128	TTE_ZERO(tp);
1129}
1130
1131/*
1132 * Map a range of physical addresses into kernel virtual address space.
1133 *
1134 * The value passed in *virt is a suggested virtual address for the mapping.
1135 * Architectures which can support a direct-mapped physical to virtual region
1136 * can return the appropriate address within that region, leaving '*virt'
1137 * unchanged.
1138 */
1139vm_offset_t
1140pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1141{
1142
1143	return (TLB_PHYS_TO_DIRECT(start));
1144}
1145
1146/*
1147 * Map a list of wired pages into kernel virtual address space.  This is
1148 * intended for temporary mappings which do not need page modification or
1149 * references recorded.  Existing mappings in the region are overwritten.
1150 */
1151void
1152pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1153{
1154	vm_offset_t va;
1155
1156	PMAP_STATS_INC(pmap_nqenter);
1157	va = sva;
1158	rw_wlock(&tte_list_global_lock);
1159	while (count-- > 0) {
1160		pmap_kenter(va, *m);
1161		va += PAGE_SIZE;
1162		m++;
1163	}
1164	rw_wunlock(&tte_list_global_lock);
1165	tlb_range_demap(kernel_pmap, sva, va);
1166}
1167
1168/*
1169 * Remove page mappings from kernel virtual address space.  Intended for
1170 * temporary mappings entered by pmap_qenter.
1171 */
1172void
1173pmap_qremove(vm_offset_t sva, int count)
1174{
1175	vm_offset_t va;
1176
1177	PMAP_STATS_INC(pmap_nqremove);
1178	va = sva;
1179	rw_wlock(&tte_list_global_lock);
1180	while (count-- > 0) {
1181		pmap_kremove(va);
1182		va += PAGE_SIZE;
1183	}
1184	rw_wunlock(&tte_list_global_lock);
1185	tlb_range_demap(kernel_pmap, sva, va);
1186}
1187
1188/*
1189 * Initialize the pmap associated with process 0.
1190 */
1191void
1192pmap_pinit0(pmap_t pm)
1193{
1194	int i;
1195
1196	PMAP_LOCK_INIT(pm);
1197	for (i = 0; i < MAXCPU; i++)
1198		pm->pm_context[i] = TLB_CTX_KERNEL;
1199	CPU_ZERO(&pm->pm_active);
1200	pm->pm_tsb = NULL;
1201	pm->pm_tsb_obj = NULL;
1202	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1203}
1204
1205/*
1206 * Initialize a preallocated and zeroed pmap structure, such as one in a
1207 * vmspace structure.
1208 */
1209int
1210pmap_pinit(pmap_t pm)
1211{
1212	vm_page_t ma[TSB_PAGES];
1213	vm_page_t m;
1214	int i;
1215
1216	/*
1217	 * Allocate KVA space for the TSB.
1218	 */
1219	if (pm->pm_tsb == NULL) {
1220		pm->pm_tsb = (struct tte *)kva_alloc(TSB_BSIZE);
1221		if (pm->pm_tsb == NULL)
1222			return (0);
1223		}
1224
1225	/*
1226	 * Allocate an object for it.
1227	 */
1228	if (pm->pm_tsb_obj == NULL)
1229		pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1230
1231	for (i = 0; i < MAXCPU; i++)
1232		pm->pm_context[i] = -1;
1233	CPU_ZERO(&pm->pm_active);
1234
1235	VM_OBJECT_WLOCK(pm->pm_tsb_obj);
1236	for (i = 0; i < TSB_PAGES; i++) {
1237		m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1238		    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1239		m->valid = VM_PAGE_BITS_ALL;
1240		m->md.pmap = pm;
1241		ma[i] = m;
1242	}
1243	VM_OBJECT_WUNLOCK(pm->pm_tsb_obj);
1244	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1245
1246	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1247	return (1);
1248}
1249
1250/*
1251 * Release any resources held by the given physical map.
1252 * Called when a pmap initialized by pmap_pinit is being released.
1253 * Should only be called if the map contains no valid mappings.
1254 */
1255void
1256pmap_release(pmap_t pm)
1257{
1258	vm_object_t obj;
1259	vm_page_t m;
1260#ifdef SMP
1261	struct pcpu *pc;
1262#endif
1263
1264	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1265	    pm->pm_context[curcpu], pm->pm_tsb);
1266	KASSERT(pmap_resident_count(pm) == 0,
1267	    ("pmap_release: resident pages %ld != 0",
1268	    pmap_resident_count(pm)));
1269
1270	/*
1271	 * After the pmap was freed, it might be reallocated to a new process.
1272	 * When switching, this might lead us to wrongly assume that we need
1273	 * not switch contexts because old and new pmap pointer are equal.
1274	 * Therefore, make sure that this pmap is not referenced by any PCPU
1275	 * pointer any more.  This could happen in two cases:
1276	 * - A process that referenced the pmap is currently exiting on a CPU.
1277	 *   However, it is guaranteed to not switch in any more after setting
1278	 *   its state to PRS_ZOMBIE.
1279	 * - A process that referenced this pmap ran on a CPU, but we switched
1280	 *   to a kernel thread, leaving the pmap pointer unchanged.
1281	 */
1282#ifdef SMP
1283	sched_pin();
1284	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
1285		atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap,
1286		    (uintptr_t)pm, (uintptr_t)NULL);
1287	sched_unpin();
1288#else
1289	critical_enter();
1290	if (PCPU_GET(pmap) == pm)
1291		PCPU_SET(pmap, NULL);
1292	critical_exit();
1293#endif
1294
1295	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1296	obj = pm->pm_tsb_obj;
1297	VM_OBJECT_WLOCK(obj);
1298	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1299	while (!TAILQ_EMPTY(&obj->memq)) {
1300		m = TAILQ_FIRST(&obj->memq);
1301		m->md.pmap = NULL;
1302		m->wire_count--;
1303		atomic_subtract_int(&cnt.v_wire_count, 1);
1304		vm_page_free_zero(m);
1305	}
1306	VM_OBJECT_WUNLOCK(obj);
1307}
1308
1309/*
1310 * Grow the number of kernel page table entries.  Unneeded.
1311 */
1312void
1313pmap_growkernel(vm_offset_t addr)
1314{
1315
1316	panic("pmap_growkernel: can't grow kernel");
1317}
1318
1319int
1320pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1321    vm_offset_t va)
1322{
1323	vm_page_t m;
1324	u_long data;
1325
1326	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1327	data = atomic_readandclear_long(&tp->tte_data);
1328	if ((data & TD_FAKE) == 0) {
1329		m = PHYS_TO_VM_PAGE(TD_PA(data));
1330		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1331		if ((data & TD_WIRED) != 0)
1332			pm->pm_stats.wired_count--;
1333		if ((data & TD_PV) != 0) {
1334			if ((data & TD_W) != 0)
1335				vm_page_dirty(m);
1336			if ((data & TD_REF) != 0)
1337				vm_page_aflag_set(m, PGA_REFERENCED);
1338			if (TAILQ_EMPTY(&m->md.tte_list))
1339				vm_page_aflag_clear(m, PGA_WRITEABLE);
1340			pm->pm_stats.resident_count--;
1341		}
1342		pmap_cache_remove(m, va);
1343	}
1344	TTE_ZERO(tp);
1345	if (PMAP_REMOVE_DONE(pm))
1346		return (0);
1347	return (1);
1348}
1349
1350/*
1351 * Remove the given range of addresses from the specified map.
1352 */
1353void
1354pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1355{
1356	struct tte *tp;
1357	vm_offset_t va;
1358
1359	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1360	    pm->pm_context[curcpu], start, end);
1361	if (PMAP_REMOVE_DONE(pm))
1362		return;
1363	rw_wlock(&tte_list_global_lock);
1364	PMAP_LOCK(pm);
1365	if (end - start > PMAP_TSB_THRESH) {
1366		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1367		tlb_context_demap(pm);
1368	} else {
1369		for (va = start; va < end; va += PAGE_SIZE)
1370			if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1371			    !pmap_remove_tte(pm, NULL, tp, va))
1372				break;
1373		tlb_range_demap(pm, start, end - 1);
1374	}
1375	PMAP_UNLOCK(pm);
1376	rw_wunlock(&tte_list_global_lock);
1377}
1378
1379void
1380pmap_remove_all(vm_page_t m)
1381{
1382	struct pmap *pm;
1383	struct tte *tpn;
1384	struct tte *tp;
1385	vm_offset_t va;
1386
1387	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1388	    ("pmap_remove_all: page %p is not managed", m));
1389	rw_wlock(&tte_list_global_lock);
1390	for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1391		tpn = TAILQ_NEXT(tp, tte_link);
1392		if ((tp->tte_data & TD_PV) == 0)
1393			continue;
1394		pm = TTE_GET_PMAP(tp);
1395		va = TTE_GET_VA(tp);
1396		PMAP_LOCK(pm);
1397		if ((tp->tte_data & TD_WIRED) != 0)
1398			pm->pm_stats.wired_count--;
1399		if ((tp->tte_data & TD_REF) != 0)
1400			vm_page_aflag_set(m, PGA_REFERENCED);
1401		if ((tp->tte_data & TD_W) != 0)
1402			vm_page_dirty(m);
1403		tp->tte_data &= ~TD_V;
1404		tlb_page_demap(pm, va);
1405		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1406		pm->pm_stats.resident_count--;
1407		pmap_cache_remove(m, va);
1408		TTE_ZERO(tp);
1409		PMAP_UNLOCK(pm);
1410	}
1411	vm_page_aflag_clear(m, PGA_WRITEABLE);
1412	rw_wunlock(&tte_list_global_lock);
1413}
1414
1415static int
1416pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1417    vm_offset_t va)
1418{
1419	u_long data;
1420	vm_page_t m;
1421
1422	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1423	data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1424	if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1425		m = PHYS_TO_VM_PAGE(TD_PA(data));
1426		vm_page_dirty(m);
1427	}
1428	return (1);
1429}
1430
1431/*
1432 * Set the physical protection on the specified range of this map as requested.
1433 */
1434void
1435pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1436{
1437	vm_offset_t va;
1438	struct tte *tp;
1439
1440	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1441	    pm->pm_context[curcpu], sva, eva, prot);
1442
1443	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1444		pmap_remove(pm, sva, eva);
1445		return;
1446	}
1447
1448	if (prot & VM_PROT_WRITE)
1449		return;
1450
1451	PMAP_LOCK(pm);
1452	if (eva - sva > PMAP_TSB_THRESH) {
1453		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1454		tlb_context_demap(pm);
1455	} else {
1456		for (va = sva; va < eva; va += PAGE_SIZE)
1457			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1458				pmap_protect_tte(pm, NULL, tp, va);
1459		tlb_range_demap(pm, sva, eva - 1);
1460	}
1461	PMAP_UNLOCK(pm);
1462}
1463
1464/*
1465 * Map the given physical page at the specified virtual address in the
1466 * target pmap with the protection requested.  If specified the page
1467 * will be wired down.
1468 */
1469int
1470pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1471    u_int flags, int8_t psind)
1472{
1473	int rv;
1474
1475	rw_wlock(&tte_list_global_lock);
1476	PMAP_LOCK(pm);
1477	rv = pmap_enter_locked(pm, va, m, prot, flags, psind);
1478	rw_wunlock(&tte_list_global_lock);
1479	PMAP_UNLOCK(pm);
1480	return (rv);
1481}
1482
1483/*
1484 * Map the given physical page at the specified virtual address in the
1485 * target pmap with the protection requested.  If specified the page
1486 * will be wired down.
1487 *
1488 * The page queues and pmap must be locked.
1489 */
1490static int
1491pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1492    u_int flags, int8_t psind __unused)
1493{
1494	struct tte *tp;
1495	vm_paddr_t pa;
1496	vm_page_t real;
1497	u_long data;
1498	boolean_t wired;
1499
1500	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1501	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1502	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1503		VM_OBJECT_ASSERT_LOCKED(m->object);
1504	PMAP_STATS_INC(pmap_nenter);
1505	pa = VM_PAGE_TO_PHYS(m);
1506	wired = (flags & PMAP_ENTER_WIRED) != 0;
1507
1508	/*
1509	 * If this is a fake page from the device_pager, but it covers actual
1510	 * physical memory, convert to the real backing page.
1511	 */
1512	if ((m->flags & PG_FICTITIOUS) != 0) {
1513		real = vm_phys_paddr_to_vm_page(pa);
1514		if (real != NULL)
1515			m = real;
1516	}
1517
1518	CTR6(KTR_PMAP,
1519	    "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1520	    pm->pm_context[curcpu], m, va, pa, prot, wired);
1521
1522	/*
1523	 * If there is an existing mapping, and the physical address has not
1524	 * changed, must be protection or wiring change.
1525	 */
1526	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1527		CTR0(KTR_PMAP, "pmap_enter_locked: update");
1528		PMAP_STATS_INC(pmap_nenter_update);
1529
1530		/*
1531		 * Wiring change, just update stats.
1532		 */
1533		if (wired) {
1534			if ((tp->tte_data & TD_WIRED) == 0) {
1535				tp->tte_data |= TD_WIRED;
1536				pm->pm_stats.wired_count++;
1537			}
1538		} else {
1539			if ((tp->tte_data & TD_WIRED) != 0) {
1540				tp->tte_data &= ~TD_WIRED;
1541				pm->pm_stats.wired_count--;
1542			}
1543		}
1544
1545		/*
1546		 * Save the old bits and clear the ones we're interested in.
1547		 */
1548		data = tp->tte_data;
1549		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1550
1551		/*
1552		 * If we're turning off write permissions, sense modify status.
1553		 */
1554		if ((prot & VM_PROT_WRITE) != 0) {
1555			tp->tte_data |= TD_SW;
1556			if (wired)
1557				tp->tte_data |= TD_W;
1558			if ((m->oflags & VPO_UNMANAGED) == 0)
1559				vm_page_aflag_set(m, PGA_WRITEABLE);
1560		} else if ((data & TD_W) != 0)
1561			vm_page_dirty(m);
1562
1563		/*
1564		 * If we're turning on execute permissions, flush the icache.
1565		 */
1566		if ((prot & VM_PROT_EXECUTE) != 0) {
1567			if ((data & TD_EXEC) == 0)
1568				icache_page_inval(pa);
1569			tp->tte_data |= TD_EXEC;
1570		}
1571
1572		/*
1573		 * Delete the old mapping.
1574		 */
1575		tlb_page_demap(pm, TTE_GET_VA(tp));
1576	} else {
1577		/*
1578		 * If there is an existing mapping, but its for a different
1579		 * physical address, delete the old mapping.
1580		 */
1581		if (tp != NULL) {
1582			CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1583			PMAP_STATS_INC(pmap_nenter_replace);
1584			pmap_remove_tte(pm, NULL, tp, va);
1585			tlb_page_demap(pm, va);
1586		} else {
1587			CTR0(KTR_PMAP, "pmap_enter_locked: new");
1588			PMAP_STATS_INC(pmap_nenter_new);
1589		}
1590
1591		/*
1592		 * Now set up the data and install the new mapping.
1593		 */
1594		data = TD_V | TD_8K | TD_PA(pa);
1595		if (pm == kernel_pmap)
1596			data |= TD_P;
1597		if ((prot & VM_PROT_WRITE) != 0) {
1598			data |= TD_SW;
1599			if ((m->oflags & VPO_UNMANAGED) == 0)
1600				vm_page_aflag_set(m, PGA_WRITEABLE);
1601		}
1602		if (prot & VM_PROT_EXECUTE) {
1603			data |= TD_EXEC;
1604			icache_page_inval(pa);
1605		}
1606
1607		/*
1608		 * If its wired update stats.  We also don't need reference or
1609		 * modify tracking for wired mappings, so set the bits now.
1610		 */
1611		if (wired) {
1612			pm->pm_stats.wired_count++;
1613			data |= TD_REF | TD_WIRED;
1614			if ((prot & VM_PROT_WRITE) != 0)
1615				data |= TD_W;
1616		}
1617
1618		tsb_tte_enter(pm, m, va, TS_8K, data);
1619	}
1620
1621	return (KERN_SUCCESS);
1622}
1623
1624/*
1625 * Maps a sequence of resident pages belonging to the same object.
1626 * The sequence begins with the given page m_start.  This page is
1627 * mapped at the given virtual address start.  Each subsequent page is
1628 * mapped at a virtual address that is offset from start by the same
1629 * amount as the page is offset from m_start within the object.  The
1630 * last page in the sequence is the page with the largest offset from
1631 * m_start that can be mapped at a virtual address less than the given
1632 * virtual address end.  Not every virtual page between start and end
1633 * is mapped; only those for which a resident page exists with the
1634 * corresponding offset from m_start are mapped.
1635 */
1636void
1637pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1638    vm_page_t m_start, vm_prot_t prot)
1639{
1640	vm_page_t m;
1641	vm_pindex_t diff, psize;
1642
1643	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1644
1645	psize = atop(end - start);
1646	m = m_start;
1647	rw_wlock(&tte_list_global_lock);
1648	PMAP_LOCK(pm);
1649	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1650		pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1651		    (VM_PROT_READ | VM_PROT_EXECUTE), 0, 0);
1652		m = TAILQ_NEXT(m, listq);
1653	}
1654	rw_wunlock(&tte_list_global_lock);
1655	PMAP_UNLOCK(pm);
1656}
1657
1658void
1659pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1660{
1661
1662	rw_wlock(&tte_list_global_lock);
1663	PMAP_LOCK(pm);
1664	pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1665	    0, 0);
1666	rw_wunlock(&tte_list_global_lock);
1667	PMAP_UNLOCK(pm);
1668}
1669
1670void
1671pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1672    vm_pindex_t pindex, vm_size_t size)
1673{
1674
1675	VM_OBJECT_ASSERT_WLOCKED(object);
1676	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1677	    ("pmap_object_init_pt: non-device object"));
1678}
1679
1680static int
1681pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp, vm_offset_t va)
1682{
1683
1684	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1685	if ((tp->tte_data & TD_WIRED) == 0)
1686		panic("pmap_unwire_tte: tp %p is missing TD_WIRED", tp);
1687	atomic_clear_long(&tp->tte_data, TD_WIRED);
1688	pm->pm_stats.wired_count--;
1689	return (1);
1690}
1691
1692/*
1693 * Clear the wired attribute from the mappings for the specified range of
1694 * addresses in the given pmap.  Every valid mapping within that range must
1695 * have the wired attribute set.  In contrast, invalid mappings cannot have
1696 * the wired attribute set, so they are ignored.
1697 *
1698 * The wired attribute of the translation table entry is not a hardware
1699 * feature, so there is no need to invalidate any TLB entries.
1700 */
1701void
1702pmap_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1703{
1704	vm_offset_t va;
1705	struct tte *tp;
1706
1707	PMAP_LOCK(pm);
1708	if (eva - sva > PMAP_TSB_THRESH)
1709		tsb_foreach(pm, NULL, sva, eva, pmap_unwire_tte);
1710	else {
1711		for (va = sva; va < eva; va += PAGE_SIZE)
1712			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1713				pmap_unwire_tte(pm, NULL, tp, va);
1714	}
1715	PMAP_UNLOCK(pm);
1716}
1717
1718static int
1719pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1720    vm_offset_t va)
1721{
1722	vm_page_t m;
1723	u_long data;
1724
1725	if ((tp->tte_data & TD_FAKE) != 0)
1726		return (1);
1727	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1728		data = tp->tte_data &
1729		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1730		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1731		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1732	}
1733	return (1);
1734}
1735
1736void
1737pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1738    vm_size_t len, vm_offset_t src_addr)
1739{
1740	struct tte *tp;
1741	vm_offset_t va;
1742
1743	if (dst_addr != src_addr)
1744		return;
1745	rw_wlock(&tte_list_global_lock);
1746	if (dst_pmap < src_pmap) {
1747		PMAP_LOCK(dst_pmap);
1748		PMAP_LOCK(src_pmap);
1749	} else {
1750		PMAP_LOCK(src_pmap);
1751		PMAP_LOCK(dst_pmap);
1752	}
1753	if (len > PMAP_TSB_THRESH) {
1754		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1755		    pmap_copy_tte);
1756		tlb_context_demap(dst_pmap);
1757	} else {
1758		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1759			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1760				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1761		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1762	}
1763	rw_wunlock(&tte_list_global_lock);
1764	PMAP_UNLOCK(src_pmap);
1765	PMAP_UNLOCK(dst_pmap);
1766}
1767
1768void
1769pmap_zero_page(vm_page_t m)
1770{
1771	struct tte *tp;
1772	vm_offset_t va;
1773	vm_paddr_t pa;
1774
1775	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1776	    ("pmap_zero_page: fake page"));
1777	PMAP_STATS_INC(pmap_nzero_page);
1778	pa = VM_PAGE_TO_PHYS(m);
1779	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1780		PMAP_STATS_INC(pmap_nzero_page_c);
1781		va = TLB_PHYS_TO_DIRECT(pa);
1782		cpu_block_zero((void *)va, PAGE_SIZE);
1783	} else if (m->md.color == -1) {
1784		PMAP_STATS_INC(pmap_nzero_page_nc);
1785		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1786	} else {
1787		PMAP_STATS_INC(pmap_nzero_page_oc);
1788		PMAP_LOCK(kernel_pmap);
1789		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1790		tp = tsb_kvtotte(va);
1791		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1792		tp->tte_vpn = TV_VPN(va, TS_8K);
1793		cpu_block_zero((void *)va, PAGE_SIZE);
1794		tlb_page_demap(kernel_pmap, va);
1795		PMAP_UNLOCK(kernel_pmap);
1796	}
1797}
1798
1799void
1800pmap_zero_page_area(vm_page_t m, int off, int size)
1801{
1802	struct tte *tp;
1803	vm_offset_t va;
1804	vm_paddr_t pa;
1805
1806	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1807	    ("pmap_zero_page_area: fake page"));
1808	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1809	PMAP_STATS_INC(pmap_nzero_page_area);
1810	pa = VM_PAGE_TO_PHYS(m);
1811	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1812		PMAP_STATS_INC(pmap_nzero_page_area_c);
1813		va = TLB_PHYS_TO_DIRECT(pa);
1814		bzero((void *)(va + off), size);
1815	} else if (m->md.color == -1) {
1816		PMAP_STATS_INC(pmap_nzero_page_area_nc);
1817		aszero(ASI_PHYS_USE_EC, pa + off, size);
1818	} else {
1819		PMAP_STATS_INC(pmap_nzero_page_area_oc);
1820		PMAP_LOCK(kernel_pmap);
1821		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1822		tp = tsb_kvtotte(va);
1823		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1824		tp->tte_vpn = TV_VPN(va, TS_8K);
1825		bzero((void *)(va + off), size);
1826		tlb_page_demap(kernel_pmap, va);
1827		PMAP_UNLOCK(kernel_pmap);
1828	}
1829}
1830
1831void
1832pmap_zero_page_idle(vm_page_t m)
1833{
1834	struct tte *tp;
1835	vm_offset_t va;
1836	vm_paddr_t pa;
1837
1838	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1839	    ("pmap_zero_page_idle: fake page"));
1840	PMAP_STATS_INC(pmap_nzero_page_idle);
1841	pa = VM_PAGE_TO_PHYS(m);
1842	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1843		PMAP_STATS_INC(pmap_nzero_page_idle_c);
1844		va = TLB_PHYS_TO_DIRECT(pa);
1845		cpu_block_zero((void *)va, PAGE_SIZE);
1846	} else if (m->md.color == -1) {
1847		PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1848		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1849	} else {
1850		PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1851		va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1852		tp = tsb_kvtotte(va);
1853		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1854		tp->tte_vpn = TV_VPN(va, TS_8K);
1855		cpu_block_zero((void *)va, PAGE_SIZE);
1856		tlb_page_demap(kernel_pmap, va);
1857	}
1858}
1859
1860void
1861pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1862{
1863	vm_offset_t vdst;
1864	vm_offset_t vsrc;
1865	vm_paddr_t pdst;
1866	vm_paddr_t psrc;
1867	struct tte *tp;
1868
1869	KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1870	    ("pmap_copy_page: fake dst page"));
1871	KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1872	    ("pmap_copy_page: fake src page"));
1873	PMAP_STATS_INC(pmap_ncopy_page);
1874	pdst = VM_PAGE_TO_PHYS(mdst);
1875	psrc = VM_PAGE_TO_PHYS(msrc);
1876	if (dcache_color_ignore != 0 ||
1877	    (msrc->md.color == DCACHE_COLOR(psrc) &&
1878	    mdst->md.color == DCACHE_COLOR(pdst))) {
1879		PMAP_STATS_INC(pmap_ncopy_page_c);
1880		vdst = TLB_PHYS_TO_DIRECT(pdst);
1881		vsrc = TLB_PHYS_TO_DIRECT(psrc);
1882		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1883	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
1884		PMAP_STATS_INC(pmap_ncopy_page_nc);
1885		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1886	} else if (msrc->md.color == -1) {
1887		if (mdst->md.color == DCACHE_COLOR(pdst)) {
1888			PMAP_STATS_INC(pmap_ncopy_page_dc);
1889			vdst = TLB_PHYS_TO_DIRECT(pdst);
1890			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1891			    PAGE_SIZE);
1892		} else {
1893			PMAP_STATS_INC(pmap_ncopy_page_doc);
1894			PMAP_LOCK(kernel_pmap);
1895			vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1896			tp = tsb_kvtotte(vdst);
1897			tp->tte_data =
1898			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1899			tp->tte_vpn = TV_VPN(vdst, TS_8K);
1900			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1901			    PAGE_SIZE);
1902			tlb_page_demap(kernel_pmap, vdst);
1903			PMAP_UNLOCK(kernel_pmap);
1904		}
1905	} else if (mdst->md.color == -1) {
1906		if (msrc->md.color == DCACHE_COLOR(psrc)) {
1907			PMAP_STATS_INC(pmap_ncopy_page_sc);
1908			vsrc = TLB_PHYS_TO_DIRECT(psrc);
1909			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1910			    PAGE_SIZE);
1911		} else {
1912			PMAP_STATS_INC(pmap_ncopy_page_soc);
1913			PMAP_LOCK(kernel_pmap);
1914			vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1915			tp = tsb_kvtotte(vsrc);
1916			tp->tte_data =
1917			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1918			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1919			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1920			    PAGE_SIZE);
1921			tlb_page_demap(kernel_pmap, vsrc);
1922			PMAP_UNLOCK(kernel_pmap);
1923		}
1924	} else {
1925		PMAP_STATS_INC(pmap_ncopy_page_oc);
1926		PMAP_LOCK(kernel_pmap);
1927		vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1928		tp = tsb_kvtotte(vdst);
1929		tp->tte_data =
1930		    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1931		tp->tte_vpn = TV_VPN(vdst, TS_8K);
1932		vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1933		tp = tsb_kvtotte(vsrc);
1934		tp->tte_data =
1935		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1936		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1937		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1938		tlb_page_demap(kernel_pmap, vdst);
1939		tlb_page_demap(kernel_pmap, vsrc);
1940		PMAP_UNLOCK(kernel_pmap);
1941	}
1942}
1943
1944int unmapped_buf_allowed;
1945
1946void
1947pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
1948    vm_offset_t b_offset, int xfersize)
1949{
1950
1951	panic("pmap_copy_pages: not implemented");
1952}
1953
1954/*
1955 * Returns true if the pmap's pv is one of the first
1956 * 16 pvs linked to from this page.  This count may
1957 * be changed upwards or downwards in the future; it
1958 * is only necessary that true be returned for a small
1959 * subset of pmaps for proper page aging.
1960 */
1961boolean_t
1962pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1963{
1964	struct tte *tp;
1965	int loops;
1966	boolean_t rv;
1967
1968	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1969	    ("pmap_page_exists_quick: page %p is not managed", m));
1970	loops = 0;
1971	rv = FALSE;
1972	rw_wlock(&tte_list_global_lock);
1973	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1974		if ((tp->tte_data & TD_PV) == 0)
1975			continue;
1976		if (TTE_GET_PMAP(tp) == pm) {
1977			rv = TRUE;
1978			break;
1979		}
1980		if (++loops >= 16)
1981			break;
1982	}
1983	rw_wunlock(&tte_list_global_lock);
1984	return (rv);
1985}
1986
1987/*
1988 * Return the number of managed mappings to the given physical page
1989 * that are wired.
1990 */
1991int
1992pmap_page_wired_mappings(vm_page_t m)
1993{
1994	struct tte *tp;
1995	int count;
1996
1997	count = 0;
1998	if ((m->oflags & VPO_UNMANAGED) != 0)
1999		return (count);
2000	rw_wlock(&tte_list_global_lock);
2001	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2002		if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
2003			count++;
2004	rw_wunlock(&tte_list_global_lock);
2005	return (count);
2006}
2007
2008/*
2009 * Remove all pages from specified address space, this aids process exit
2010 * speeds.  This is much faster than pmap_remove in the case of running down
2011 * an entire address space.  Only works for the current pmap.
2012 */
2013void
2014pmap_remove_pages(pmap_t pm)
2015{
2016
2017}
2018
2019/*
2020 * Returns TRUE if the given page has a managed mapping.
2021 */
2022boolean_t
2023pmap_page_is_mapped(vm_page_t m)
2024{
2025	struct tte *tp;
2026	boolean_t rv;
2027
2028	rv = FALSE;
2029	if ((m->oflags & VPO_UNMANAGED) != 0)
2030		return (rv);
2031	rw_wlock(&tte_list_global_lock);
2032	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2033		if ((tp->tte_data & TD_PV) != 0) {
2034			rv = TRUE;
2035			break;
2036		}
2037	rw_wunlock(&tte_list_global_lock);
2038	return (rv);
2039}
2040
2041/*
2042 * Return a count of reference bits for a page, clearing those bits.
2043 * It is not necessary for every reference bit to be cleared, but it
2044 * is necessary that 0 only be returned when there are truly no
2045 * reference bits set.
2046 *
2047 * XXX: The exact number of bits to check and clear is a matter that
2048 * should be tested and standardized at some point in the future for
2049 * optimal aging of shared pages.
2050 */
2051int
2052pmap_ts_referenced(vm_page_t m)
2053{
2054	struct tte *tpf;
2055	struct tte *tpn;
2056	struct tte *tp;
2057	u_long data;
2058	int count;
2059
2060	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2061	    ("pmap_ts_referenced: page %p is not managed", m));
2062	count = 0;
2063	rw_wlock(&tte_list_global_lock);
2064	if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2065		tpf = tp;
2066		do {
2067			tpn = TAILQ_NEXT(tp, tte_link);
2068			TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2069			TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2070			if ((tp->tte_data & TD_PV) == 0)
2071				continue;
2072			data = atomic_clear_long(&tp->tte_data, TD_REF);
2073			if ((data & TD_REF) != 0 && ++count > 4)
2074				break;
2075		} while ((tp = tpn) != NULL && tp != tpf);
2076	}
2077	rw_wunlock(&tte_list_global_lock);
2078	return (count);
2079}
2080
2081boolean_t
2082pmap_is_modified(vm_page_t m)
2083{
2084	struct tte *tp;
2085	boolean_t rv;
2086
2087	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2088	    ("pmap_is_modified: page %p is not managed", m));
2089	rv = FALSE;
2090
2091	/*
2092	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2093	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2094	 * is clear, no TTEs can have TD_W set.
2095	 */
2096	VM_OBJECT_ASSERT_WLOCKED(m->object);
2097	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2098		return (rv);
2099	rw_wlock(&tte_list_global_lock);
2100	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2101		if ((tp->tte_data & TD_PV) == 0)
2102			continue;
2103		if ((tp->tte_data & TD_W) != 0) {
2104			rv = TRUE;
2105			break;
2106		}
2107	}
2108	rw_wunlock(&tte_list_global_lock);
2109	return (rv);
2110}
2111
2112/*
2113 *	pmap_is_prefaultable:
2114 *
2115 *	Return whether or not the specified virtual address is elgible
2116 *	for prefault.
2117 */
2118boolean_t
2119pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2120{
2121	boolean_t rv;
2122
2123	PMAP_LOCK(pmap);
2124	rv = tsb_tte_lookup(pmap, addr) == NULL;
2125	PMAP_UNLOCK(pmap);
2126	return (rv);
2127}
2128
2129/*
2130 * Return whether or not the specified physical page was referenced
2131 * in any physical maps.
2132 */
2133boolean_t
2134pmap_is_referenced(vm_page_t m)
2135{
2136	struct tte *tp;
2137	boolean_t rv;
2138
2139	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2140	    ("pmap_is_referenced: page %p is not managed", m));
2141	rv = FALSE;
2142	rw_wlock(&tte_list_global_lock);
2143	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2144		if ((tp->tte_data & TD_PV) == 0)
2145			continue;
2146		if ((tp->tte_data & TD_REF) != 0) {
2147			rv = TRUE;
2148			break;
2149		}
2150	}
2151	rw_wunlock(&tte_list_global_lock);
2152	return (rv);
2153}
2154
2155/*
2156 * This function is advisory.
2157 */
2158void
2159pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2160{
2161}
2162
2163void
2164pmap_clear_modify(vm_page_t m)
2165{
2166	struct tte *tp;
2167	u_long data;
2168
2169	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2170	    ("pmap_clear_modify: page %p is not managed", m));
2171	VM_OBJECT_ASSERT_WLOCKED(m->object);
2172	KASSERT(!vm_page_xbusied(m),
2173	    ("pmap_clear_modify: page %p is exclusive busied", m));
2174
2175	/*
2176	 * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
2177	 * If the object containing the page is locked and the page is not
2178	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2179	 */
2180	if ((m->aflags & PGA_WRITEABLE) == 0)
2181		return;
2182	rw_wlock(&tte_list_global_lock);
2183	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2184		if ((tp->tte_data & TD_PV) == 0)
2185			continue;
2186		data = atomic_clear_long(&tp->tte_data, TD_W);
2187		if ((data & TD_W) != 0)
2188			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2189	}
2190	rw_wunlock(&tte_list_global_lock);
2191}
2192
2193void
2194pmap_remove_write(vm_page_t m)
2195{
2196	struct tte *tp;
2197	u_long data;
2198
2199	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2200	    ("pmap_remove_write: page %p is not managed", m));
2201
2202	/*
2203	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2204	 * set by another thread while the object is locked.  Thus,
2205	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2206	 */
2207	VM_OBJECT_ASSERT_WLOCKED(m->object);
2208	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2209		return;
2210	rw_wlock(&tte_list_global_lock);
2211	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2212		if ((tp->tte_data & TD_PV) == 0)
2213			continue;
2214		data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2215		if ((data & TD_W) != 0) {
2216			vm_page_dirty(m);
2217			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2218		}
2219	}
2220	vm_page_aflag_clear(m, PGA_WRITEABLE);
2221	rw_wunlock(&tte_list_global_lock);
2222}
2223
2224int
2225pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2226{
2227
2228	/* TODO; */
2229	return (0);
2230}
2231
2232/*
2233 * Activate a user pmap.  The pmap must be activated before its address space
2234 * can be accessed in any way.
2235 */
2236void
2237pmap_activate(struct thread *td)
2238{
2239	struct vmspace *vm;
2240	struct pmap *pm;
2241	int context;
2242
2243	critical_enter();
2244	vm = td->td_proc->p_vmspace;
2245	pm = vmspace_pmap(vm);
2246
2247	context = PCPU_GET(tlb_ctx);
2248	if (context == PCPU_GET(tlb_ctx_max)) {
2249		tlb_flush_user();
2250		context = PCPU_GET(tlb_ctx_min);
2251	}
2252	PCPU_SET(tlb_ctx, context + 1);
2253
2254	pm->pm_context[curcpu] = context;
2255#ifdef SMP
2256	CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active);
2257	atomic_store_acq_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm);
2258#else
2259	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
2260	PCPU_SET(pmap, pm);
2261#endif
2262
2263	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2264	stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2265	stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2266	    TLB_CXR_PGSZ_MASK) | context);
2267	flush(KERNBASE);
2268	critical_exit();
2269}
2270
2271void
2272pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2273{
2274
2275}
2276
2277/*
2278 * Increase the starting virtual address of the given mapping if a
2279 * different alignment might result in more superpage mappings.
2280 */
2281void
2282pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2283    vm_offset_t *addr, vm_size_t size)
2284{
2285
2286}
2287