1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD$");
42
43/*
44 * Manages physical address maps.
45 *
46 * Since the information managed by this module is also stored by the
47 * logical address mapping module, this module may throw away valid virtual
48 * to physical mappings at almost any time.  However, invalidations of
49 * mappings must be done as requested.
50 *
51 * In order to cope with hardware architectures which make virtual to
52 * physical map invalidates expensive, this module may delay invalidate
53 * reduced protection operations until such time as they are actually
54 * necessary.  This module is given full information as to which processors
55 * are currently using which maps, and to when physical maps must be made
56 * correct.
57 */
58
59#include "opt_kstack_pages.h"
60#include "opt_pmap.h"
61
62#include <sys/param.h>
63#include <sys/kernel.h>
64#include <sys/ktr.h>
65#include <sys/lock.h>
66#include <sys/msgbuf.h>
67#include <sys/mutex.h>
68#include <sys/proc.h>
69#include <sys/rwlock.h>
70#include <sys/smp.h>
71#include <sys/sysctl.h>
72#include <sys/systm.h>
73#include <sys/vmmeter.h>
74
75#include <dev/ofw/openfirm.h>
76
77#include <vm/vm.h>
78#include <vm/vm_param.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_page.h>
81#include <vm/vm_map.h>
82#include <vm/vm_object.h>
83#include <vm/vm_extern.h>
84#include <vm/vm_pageout.h>
85#include <vm/vm_pager.h>
86#include <vm/vm_phys.h>
87
88#include <machine/cache.h>
89#include <machine/frame.h>
90#include <machine/instr.h>
91#include <machine/md_var.h>
92#include <machine/metadata.h>
93#include <machine/ofw_mem.h>
94#include <machine/smp.h>
95#include <machine/tlb.h>
96#include <machine/tte.h>
97#include <machine/tsb.h>
98#include <machine/ver.h>
99
100/*
101 * Virtual address of message buffer
102 */
103struct msgbuf *msgbufp;
104
105/*
106 * Map of physical memory reagions
107 */
108vm_paddr_t phys_avail[128];
109static struct ofw_mem_region mra[128];
110struct ofw_mem_region sparc64_memreg[128];
111int sparc64_nmemreg;
112static struct ofw_map translations[128];
113static int translations_size;
114
115static vm_offset_t pmap_idle_map;
116static vm_offset_t pmap_temp_map_1;
117static vm_offset_t pmap_temp_map_2;
118
119/*
120 * First and last available kernel virtual addresses
121 */
122vm_offset_t virtual_avail;
123vm_offset_t virtual_end;
124vm_offset_t kernel_vm_end;
125
126vm_offset_t vm_max_kernel_address;
127
128/*
129 * Kernel pmap
130 */
131struct pmap kernel_pmap_store;
132
133struct rwlock_padalign tte_list_global_lock;
134
135/*
136 * Allocate physical memory for use in pmap_bootstrap.
137 */
138static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
139
140static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
141static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
142static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
143    struct tte *tp, vm_offset_t va);
144static int pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp,
145    vm_offset_t va);
146
147/*
148 * Map the given physical page at the specified virtual address in the
149 * target pmap with the protection requested.  If specified the page
150 * will be wired down.
151 *
152 * The page queues and pmap must be locked.
153 */
154static int pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
155    vm_prot_t prot, u_int flags, int8_t psind);
156
157extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
158extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
159extern int tl1_dmmu_miss_patch_asi_1[];
160extern int tl1_dmmu_miss_patch_quad_ldd_1[];
161extern int tl1_dmmu_miss_patch_tsb_1[];
162extern int tl1_dmmu_miss_patch_tsb_2[];
163extern int tl1_dmmu_miss_patch_tsb_mask_1[];
164extern int tl1_dmmu_miss_patch_tsb_mask_2[];
165extern int tl1_dmmu_prot_patch_asi_1[];
166extern int tl1_dmmu_prot_patch_quad_ldd_1[];
167extern int tl1_dmmu_prot_patch_tsb_1[];
168extern int tl1_dmmu_prot_patch_tsb_2[];
169extern int tl1_dmmu_prot_patch_tsb_mask_1[];
170extern int tl1_dmmu_prot_patch_tsb_mask_2[];
171extern int tl1_immu_miss_patch_asi_1[];
172extern int tl1_immu_miss_patch_quad_ldd_1[];
173extern int tl1_immu_miss_patch_tsb_1[];
174extern int tl1_immu_miss_patch_tsb_2[];
175extern int tl1_immu_miss_patch_tsb_mask_1[];
176extern int tl1_immu_miss_patch_tsb_mask_2[];
177
178/*
179 * If user pmap is processed with pmap_remove and with pmap_remove and the
180 * resident count drops to 0, there are no more pages to remove, so we
181 * need not continue.
182 */
183#define	PMAP_REMOVE_DONE(pm) \
184	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
185
186/*
187 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
188 * and pmap_protect() instead of trying each virtual address.
189 */
190#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
191
192SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
193
194PMAP_STATS_VAR(pmap_nenter);
195PMAP_STATS_VAR(pmap_nenter_update);
196PMAP_STATS_VAR(pmap_nenter_replace);
197PMAP_STATS_VAR(pmap_nenter_new);
198PMAP_STATS_VAR(pmap_nkenter);
199PMAP_STATS_VAR(pmap_nkenter_oc);
200PMAP_STATS_VAR(pmap_nkenter_stupid);
201PMAP_STATS_VAR(pmap_nkremove);
202PMAP_STATS_VAR(pmap_nqenter);
203PMAP_STATS_VAR(pmap_nqremove);
204PMAP_STATS_VAR(pmap_ncache_enter);
205PMAP_STATS_VAR(pmap_ncache_enter_c);
206PMAP_STATS_VAR(pmap_ncache_enter_oc);
207PMAP_STATS_VAR(pmap_ncache_enter_cc);
208PMAP_STATS_VAR(pmap_ncache_enter_coc);
209PMAP_STATS_VAR(pmap_ncache_enter_nc);
210PMAP_STATS_VAR(pmap_ncache_enter_cnc);
211PMAP_STATS_VAR(pmap_ncache_remove);
212PMAP_STATS_VAR(pmap_ncache_remove_c);
213PMAP_STATS_VAR(pmap_ncache_remove_oc);
214PMAP_STATS_VAR(pmap_ncache_remove_cc);
215PMAP_STATS_VAR(pmap_ncache_remove_coc);
216PMAP_STATS_VAR(pmap_ncache_remove_nc);
217PMAP_STATS_VAR(pmap_nzero_page);
218PMAP_STATS_VAR(pmap_nzero_page_c);
219PMAP_STATS_VAR(pmap_nzero_page_oc);
220PMAP_STATS_VAR(pmap_nzero_page_nc);
221PMAP_STATS_VAR(pmap_nzero_page_area);
222PMAP_STATS_VAR(pmap_nzero_page_area_c);
223PMAP_STATS_VAR(pmap_nzero_page_area_oc);
224PMAP_STATS_VAR(pmap_nzero_page_area_nc);
225PMAP_STATS_VAR(pmap_nzero_page_idle);
226PMAP_STATS_VAR(pmap_nzero_page_idle_c);
227PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
228PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
229PMAP_STATS_VAR(pmap_ncopy_page);
230PMAP_STATS_VAR(pmap_ncopy_page_c);
231PMAP_STATS_VAR(pmap_ncopy_page_oc);
232PMAP_STATS_VAR(pmap_ncopy_page_nc);
233PMAP_STATS_VAR(pmap_ncopy_page_dc);
234PMAP_STATS_VAR(pmap_ncopy_page_doc);
235PMAP_STATS_VAR(pmap_ncopy_page_sc);
236PMAP_STATS_VAR(pmap_ncopy_page_soc);
237
238PMAP_STATS_VAR(pmap_nnew_thread);
239PMAP_STATS_VAR(pmap_nnew_thread_oc);
240
241static inline u_long dtlb_get_data(u_int tlb, u_int slot);
242
243/*
244 * Quick sort callout for comparing memory regions
245 */
246static int mr_cmp(const void *a, const void *b);
247static int om_cmp(const void *a, const void *b);
248
249static int
250mr_cmp(const void *a, const void *b)
251{
252	const struct ofw_mem_region *mra;
253	const struct ofw_mem_region *mrb;
254
255	mra = a;
256	mrb = b;
257	if (mra->mr_start < mrb->mr_start)
258		return (-1);
259	else if (mra->mr_start > mrb->mr_start)
260		return (1);
261	else
262		return (0);
263}
264
265static int
266om_cmp(const void *a, const void *b)
267{
268	const struct ofw_map *oma;
269	const struct ofw_map *omb;
270
271	oma = a;
272	omb = b;
273	if (oma->om_start < omb->om_start)
274		return (-1);
275	else if (oma->om_start > omb->om_start)
276		return (1);
277	else
278		return (0);
279}
280
281static inline u_long
282dtlb_get_data(u_int tlb, u_int slot)
283{
284	u_long data;
285	register_t s;
286
287	slot = TLB_DAR_SLOT(tlb, slot);
288	/*
289	 * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
290	 * work around errata of USIII and beyond.
291	 */
292	s = intr_disable();
293	(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
294	data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
295	intr_restore(s);
296	return (data);
297}
298
299/*
300 * Bootstrap the system enough to run with virtual memory.
301 */
302void
303pmap_bootstrap(u_int cpu_impl)
304{
305	struct pmap *pm;
306	struct tte *tp;
307	vm_offset_t off;
308	vm_offset_t va;
309	vm_paddr_t pa;
310	vm_size_t physsz;
311	vm_size_t virtsz;
312	u_long data;
313	u_long vpn;
314	phandle_t pmem;
315	phandle_t vmem;
316	u_int dtlb_slots_avail;
317	int i;
318	int j;
319	int sz;
320	uint32_t asi;
321	uint32_t colors;
322	uint32_t ldd;
323
324	/*
325	 * Set the kernel context.
326	 */
327	pmap_set_kctx();
328
329	colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
330
331	/*
332	 * Find out what physical memory is available from the PROM and
333	 * initialize the phys_avail array.  This must be done before
334	 * pmap_bootstrap_alloc is called.
335	 */
336	if ((pmem = OF_finddevice("/memory")) == -1)
337		OF_panic("%s: finddevice /memory", __func__);
338	if ((sz = OF_getproplen(pmem, "available")) == -1)
339		OF_panic("%s: getproplen /memory/available", __func__);
340	if (sizeof(phys_avail) < sz)
341		OF_panic("%s: phys_avail too small", __func__);
342	if (sizeof(mra) < sz)
343		OF_panic("%s: mra too small", __func__);
344	bzero(mra, sz);
345	if (OF_getprop(pmem, "available", mra, sz) == -1)
346		OF_panic("%s: getprop /memory/available", __func__);
347	sz /= sizeof(*mra);
348	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
349	qsort(mra, sz, sizeof (*mra), mr_cmp);
350	physsz = 0;
351	getenv_quad("hw.physmem", &physmem);
352	physmem = btoc(physmem);
353	for (i = 0, j = 0; i < sz; i++, j += 2) {
354		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
355		    mra[i].mr_size);
356		if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
357			if (btoc(physsz) < physmem) {
358				phys_avail[j] = mra[i].mr_start;
359				phys_avail[j + 1] = mra[i].mr_start +
360				    (ctob(physmem) - physsz);
361				physsz = ctob(physmem);
362			}
363			break;
364		}
365		phys_avail[j] = mra[i].mr_start;
366		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
367		physsz += mra[i].mr_size;
368	}
369	physmem = btoc(physsz);
370
371	/*
372	 * Calculate the size of kernel virtual memory, and the size and mask
373	 * for the kernel TSB based on the phsyical memory size but limited
374	 * by the amount of dTLB slots available for locked entries if we have
375	 * to lock the TSB in the TLB (given that for spitfire-class CPUs all
376	 * of the dt64 slots can hold locked entries but there is no large
377	 * dTLB for unlocked ones, we don't use more than half of it for the
378	 * TSB).
379	 * Note that for reasons unknown OpenSolaris doesn't take advantage of
380	 * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
381	 * public documentation is available for these, the latter just might
382	 * not support it, yet.
383	 */
384	if (cpu_impl == CPU_IMPL_SPARC64V ||
385	    cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
386		tsb_kernel_ldd_phys = 1;
387		virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
388		    (PAGE_SHIFT - TTE_SHIFT));
389	} else {
390		dtlb_slots_avail = 0;
391		for (i = 0; i < dtlb_slots; i++) {
392			data = dtlb_get_data(cpu_impl ==
393			    CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
394			    TLB_DAR_T32, i);
395			if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
396				dtlb_slots_avail++;
397		}
398#ifdef SMP
399		dtlb_slots_avail -= PCPU_PAGES;
400#endif
401		if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
402		    cpu_impl < CPU_IMPL_ULTRASPARCIII)
403			dtlb_slots_avail /= 2;
404		virtsz = roundup(physsz, PAGE_SIZE_4M <<
405		    (PAGE_SHIFT - TTE_SHIFT));
406		virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
407		    (PAGE_SHIFT - TTE_SHIFT));
408	}
409	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
410	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
411	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
412
413	/*
414	 * Allocate the kernel TSB and lock it in the TLB if necessary.
415	 */
416	pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
417	if (pa & PAGE_MASK_4M)
418		OF_panic("%s: TSB unaligned", __func__);
419	tsb_kernel_phys = pa;
420	if (tsb_kernel_ldd_phys == 0) {
421		tsb_kernel =
422		    (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
423		pmap_map_tsb();
424		bzero(tsb_kernel, tsb_kernel_size);
425	} else {
426		tsb_kernel =
427		    (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
428		aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
429	}
430
431	/*
432	 * Allocate and map the dynamic per-CPU area for the BSP.
433	 */
434	pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
435	dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
436
437	/*
438	 * Allocate and map the message buffer.
439	 */
440	pa = pmap_bootstrap_alloc(msgbufsize, colors);
441	msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
442
443	/*
444	 * Patch the TSB addresses and mask as well as the ASIs used to load
445	 * it into the trap table.
446	 */
447
448#define	LDDA_R_I_R(rd, imm_asi, rs1, rs2)				\
449	(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |	\
450	    EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |	\
451	    EIF_F3_RS2(rs2))
452#define	OR_R_I_R(rd, imm13, rs1)					\
453	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |	\
454	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
455#define	SETHI(rd, imm22)						\
456	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |	\
457	    EIF_IMM((imm22) >> 10, 22))
458#define	WR_R_I(rd, imm13, rs1)						\
459	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |	\
460	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
461
462#define	PATCH_ASI(addr, asi) do {					\
463	if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,			\
464	    IF_F3_RS1(addr[0])))					\
465		OF_panic("%s: patched instructions have changed",	\
466		    __func__);						\
467	addr[0] |= EIF_IMM((asi), 13);					\
468	flush(addr);							\
469} while (0)
470
471#define	PATCH_LDD(addr, asi) do {					\
472	if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,		\
473	    IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))			\
474		OF_panic("%s: patched instructions have changed",	\
475		    __func__);						\
476	addr[0] |= EIF_F3_IMM_ASI(asi);					\
477	flush(addr);							\
478} while (0)
479
480#define	PATCH_TSB(addr, val) do {					\
481	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
482	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
483	    IF_F3_RS1(addr[1]))	||					\
484	    addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))			\
485		OF_panic("%s: patched instructions have changed",	\
486		    __func__);						\
487	addr[0] |= EIF_IMM((val) >> 42, 22);				\
488	addr[1] |= EIF_IMM((val) >> 32, 10);				\
489	addr[3] |= EIF_IMM((val) >> 10, 22);				\
490	flush(addr);							\
491	flush(addr + 1);						\
492	flush(addr + 3);						\
493} while (0)
494
495#define	PATCH_TSB_MASK(addr, val) do {					\
496	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
497	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
498	    IF_F3_RS1(addr[1])))					\
499		OF_panic("%s: patched instructions have changed",	\
500		    __func__);						\
501	addr[0] |= EIF_IMM((val) >> 10, 22);				\
502	addr[1] |= EIF_IMM((val), 10);					\
503	flush(addr);							\
504	flush(addr + 1);						\
505} while (0)
506
507	if (tsb_kernel_ldd_phys == 0) {
508		asi = ASI_N;
509		ldd = ASI_NUCLEUS_QUAD_LDD;
510		off = (vm_offset_t)tsb_kernel;
511	} else {
512		asi = ASI_PHYS_USE_EC;
513		ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
514		off = (vm_offset_t)tsb_kernel_phys;
515	}
516	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
517	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
518	    tsb_kernel_phys + tsb_kernel_size - 1);
519	PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
520	PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
521	PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
522	PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
523	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
524	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
525	PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
526	PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
527	PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
528	PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
529	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
530	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
531	PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
532	PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
533	PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
534	PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
535	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
536	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
537
538	/*
539	 * Enter fake 8k pages for the 4MB kernel pages, so that
540	 * pmap_kextract() will work for them.
541	 */
542	for (i = 0; i < kernel_tlb_slots; i++) {
543		pa = kernel_tlbs[i].te_pa;
544		va = kernel_tlbs[i].te_va;
545		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
546			tp = tsb_kvtotte(va + off);
547			vpn = TV_VPN(va + off, TS_8K);
548			data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
549			    TD_SW | TD_CP | TD_CV | TD_P | TD_W;
550			pmap_bootstrap_set_tte(tp, vpn, data);
551		}
552	}
553
554	/*
555	 * Set the start and end of KVA.  The kernel is loaded starting
556	 * at the first available 4MB super page, so we advance to the
557	 * end of the last one used for it.
558	 */
559	virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
560	virtual_end = vm_max_kernel_address;
561	kernel_vm_end = vm_max_kernel_address;
562
563	/*
564	 * Allocate kva space for temporary mappings.
565	 */
566	pmap_idle_map = virtual_avail;
567	virtual_avail += PAGE_SIZE * colors;
568	pmap_temp_map_1 = virtual_avail;
569	virtual_avail += PAGE_SIZE * colors;
570	pmap_temp_map_2 = virtual_avail;
571	virtual_avail += PAGE_SIZE * colors;
572
573	/*
574	 * Allocate a kernel stack with guard page for thread0 and map it
575	 * into the kernel TSB.  We must ensure that the virtual address is
576	 * colored properly for corresponding CPUs, since we're allocating
577	 * from phys_avail so the memory won't have an associated vm_page_t.
578	 */
579	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
580	kstack0_phys = pa;
581	virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
582	kstack0 = virtual_avail;
583	virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
584	if (dcache_color_ignore == 0)
585		KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
586		    ("pmap_bootstrap: kstack0 miscolored"));
587	for (i = 0; i < KSTACK_PAGES; i++) {
588		pa = kstack0_phys + i * PAGE_SIZE;
589		va = kstack0 + i * PAGE_SIZE;
590		tp = tsb_kvtotte(va);
591		vpn = TV_VPN(va, TS_8K);
592		data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
593		    TD_CV | TD_P | TD_W;
594		pmap_bootstrap_set_tte(tp, vpn, data);
595	}
596
597	/*
598	 * Calculate the last available physical address.
599	 */
600	for (i = 0; phys_avail[i + 2] != 0; i += 2)
601		;
602	Maxmem = sparc64_btop(phys_avail[i + 1]);
603
604	/*
605	 * Add the PROM mappings to the kernel TSB.
606	 */
607	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
608		OF_panic("%s: finddevice /virtual-memory", __func__);
609	if ((sz = OF_getproplen(vmem, "translations")) == -1)
610		OF_panic("%s: getproplen translations", __func__);
611	if (sizeof(translations) < sz)
612		OF_panic("%s: translations too small", __func__);
613	bzero(translations, sz);
614	if (OF_getprop(vmem, "translations", translations, sz) == -1)
615		OF_panic("%s: getprop /virtual-memory/translations",
616		    __func__);
617	sz /= sizeof(*translations);
618	translations_size = sz;
619	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
620	qsort(translations, sz, sizeof (*translations), om_cmp);
621	for (i = 0; i < sz; i++) {
622		CTR3(KTR_PMAP,
623		    "translation: start=%#lx size=%#lx tte=%#lx",
624		    translations[i].om_start, translations[i].om_size,
625		    translations[i].om_tte);
626		if ((translations[i].om_tte & TD_V) == 0)
627			continue;
628		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
629		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
630			continue;
631		for (off = 0; off < translations[i].om_size;
632		    off += PAGE_SIZE) {
633			va = translations[i].om_start + off;
634			tp = tsb_kvtotte(va);
635			vpn = TV_VPN(va, TS_8K);
636			data = ((translations[i].om_tte &
637			    ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
638			    (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
639			    cpu_impl < CPU_IMPL_ULTRASPARCIII ?
640			    (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
641			    (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
642			    (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
643			    off;
644			pmap_bootstrap_set_tte(tp, vpn, data);
645		}
646	}
647
648	/*
649	 * Get the available physical memory ranges from /memory/reg.  These
650	 * are only used for kernel dumps, but it may not be wise to do PROM
651	 * calls in that situation.
652	 */
653	if ((sz = OF_getproplen(pmem, "reg")) == -1)
654		OF_panic("%s: getproplen /memory/reg", __func__);
655	if (sizeof(sparc64_memreg) < sz)
656		OF_panic("%s: sparc64_memreg too small", __func__);
657	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
658		OF_panic("%s: getprop /memory/reg", __func__);
659	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
660
661	/*
662	 * Initialize the kernel pmap (which is statically allocated).
663	 */
664	pm = kernel_pmap;
665	PMAP_LOCK_INIT(pm);
666	for (i = 0; i < MAXCPU; i++)
667		pm->pm_context[i] = TLB_CTX_KERNEL;
668	CPU_FILL(&pm->pm_active);
669
670	/*
671	 * Initialize the global tte list lock, which is more commonly
672	 * known as the pmap pv global lock.
673	 */
674	rw_init(&tte_list_global_lock, "pmap pv global");
675
676	/*
677	 * Flush all non-locked TLB entries possibly left over by the
678	 * firmware.
679	 */
680	tlb_flush_nonlocked();
681}
682
683/*
684 * Map the 4MB kernel TSB pages.
685 */
686void
687pmap_map_tsb(void)
688{
689	vm_offset_t va;
690	vm_paddr_t pa;
691	u_long data;
692	int i;
693
694	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
695		va = (vm_offset_t)tsb_kernel + i;
696		pa = tsb_kernel_phys + i;
697		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
698		    TD_P | TD_W;
699		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
700		    TLB_TAR_CTX(TLB_CTX_KERNEL));
701		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
702	}
703}
704
705/*
706 * Set the secondary context to be the kernel context (needed for FP block
707 * operations in the kernel).
708 */
709void
710pmap_set_kctx(void)
711{
712
713	stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
714	    TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
715	flush(KERNBASE);
716}
717
718/*
719 * Allocate a physical page of memory directly from the phys_avail map.
720 * Can only be called from pmap_bootstrap before avail start and end are
721 * calculated.
722 */
723static vm_paddr_t
724pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
725{
726	vm_paddr_t pa;
727	int i;
728
729	size = roundup(size, PAGE_SIZE * colors);
730	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
731		if (phys_avail[i + 1] - phys_avail[i] < size)
732			continue;
733		pa = phys_avail[i];
734		phys_avail[i] += size;
735		return (pa);
736	}
737	OF_panic("%s: no suitable region found", __func__);
738}
739
740/*
741 * Set a TTE.  This function is intended as a helper when tsb_kernel is
742 * direct-mapped but we haven't taken over the trap table, yet, as it's the
743 * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
744 * the kernel TSB.
745 */
746void
747pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
748{
749
750	if (tsb_kernel_ldd_phys == 0) {
751		tp->tte_vpn = vpn;
752		tp->tte_data = data;
753	} else {
754		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
755		    ASI_PHYS_USE_EC, vpn);
756		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
757		    ASI_PHYS_USE_EC, data);
758	}
759}
760
761/*
762 * Initialize a vm_page's machine-dependent fields.
763 */
764void
765pmap_page_init(vm_page_t m)
766{
767
768	TAILQ_INIT(&m->md.tte_list);
769	m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
770	m->md.pmap = NULL;
771}
772
773/*
774 * Initialize the pmap module.
775 */
776void
777pmap_init(void)
778{
779	vm_offset_t addr;
780	vm_size_t size;
781	int result;
782	int i;
783
784	for (i = 0; i < translations_size; i++) {
785		addr = translations[i].om_start;
786		size = translations[i].om_size;
787		if ((translations[i].om_tte & TD_V) == 0)
788			continue;
789		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
790			continue;
791		result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
792		    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
793		if (result != KERN_SUCCESS || addr != translations[i].om_start)
794			panic("pmap_init: vm_map_find");
795	}
796}
797
798/*
799 * Extract the physical page address associated with the given
800 * map/virtual_address pair.
801 */
802vm_paddr_t
803pmap_extract(pmap_t pm, vm_offset_t va)
804{
805	struct tte *tp;
806	vm_paddr_t pa;
807
808	if (pm == kernel_pmap)
809		return (pmap_kextract(va));
810	PMAP_LOCK(pm);
811	tp = tsb_tte_lookup(pm, va);
812	if (tp == NULL)
813		pa = 0;
814	else
815		pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
816	PMAP_UNLOCK(pm);
817	return (pa);
818}
819
820/*
821 * Atomically extract and hold the physical page with the given
822 * pmap and virtual address pair if that mapping permits the given
823 * protection.
824 */
825vm_page_t
826pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
827{
828	struct tte *tp;
829	vm_page_t m;
830	vm_paddr_t pa;
831
832	m = NULL;
833	pa = 0;
834	PMAP_LOCK(pm);
835retry:
836	if (pm == kernel_pmap) {
837		if (va >= VM_MIN_DIRECT_ADDRESS) {
838			tp = NULL;
839			m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
840			(void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
841			    &pa);
842			vm_page_hold(m);
843		} else {
844			tp = tsb_kvtotte(va);
845			if ((tp->tte_data & TD_V) == 0)
846				tp = NULL;
847		}
848	} else
849		tp = tsb_tte_lookup(pm, va);
850	if (tp != NULL && ((tp->tte_data & TD_SW) ||
851	    (prot & VM_PROT_WRITE) == 0)) {
852		if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
853			goto retry;
854		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
855		vm_page_hold(m);
856	}
857	PA_UNLOCK_COND(pa);
858	PMAP_UNLOCK(pm);
859	return (m);
860}
861
862/*
863 * Extract the physical page address associated with the given kernel virtual
864 * address.
865 */
866vm_paddr_t
867pmap_kextract(vm_offset_t va)
868{
869	struct tte *tp;
870
871	if (va >= VM_MIN_DIRECT_ADDRESS)
872		return (TLB_DIRECT_TO_PHYS(va));
873	tp = tsb_kvtotte(va);
874	if ((tp->tte_data & TD_V) == 0)
875		return (0);
876	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
877}
878
879int
880pmap_cache_enter(vm_page_t m, vm_offset_t va)
881{
882	struct tte *tp;
883	int color;
884
885	rw_assert(&tte_list_global_lock, RA_WLOCKED);
886	KASSERT((m->flags & PG_FICTITIOUS) == 0,
887	    ("pmap_cache_enter: fake page"));
888	PMAP_STATS_INC(pmap_ncache_enter);
889
890	if (dcache_color_ignore != 0)
891		return (1);
892
893	/*
894	 * Find the color for this virtual address and note the added mapping.
895	 */
896	color = DCACHE_COLOR(va);
897	m->md.colors[color]++;
898
899	/*
900	 * If all existing mappings have the same color, the mapping is
901	 * cacheable.
902	 */
903	if (m->md.color == color) {
904		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
905		    ("pmap_cache_enter: cacheable, mappings of other color"));
906		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
907			PMAP_STATS_INC(pmap_ncache_enter_c);
908		else
909			PMAP_STATS_INC(pmap_ncache_enter_oc);
910		return (1);
911	}
912
913	/*
914	 * If there are no mappings of the other color, and the page still has
915	 * the wrong color, this must be a new mapping.  Change the color to
916	 * match the new mapping, which is cacheable.  We must flush the page
917	 * from the cache now.
918	 */
919	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
920		KASSERT(m->md.colors[color] == 1,
921		    ("pmap_cache_enter: changing color, not new mapping"));
922		dcache_page_inval(VM_PAGE_TO_PHYS(m));
923		m->md.color = color;
924		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
925			PMAP_STATS_INC(pmap_ncache_enter_cc);
926		else
927			PMAP_STATS_INC(pmap_ncache_enter_coc);
928		return (1);
929	}
930
931	/*
932	 * If the mapping is already non-cacheable, just return.
933	 */
934	if (m->md.color == -1) {
935		PMAP_STATS_INC(pmap_ncache_enter_nc);
936		return (0);
937	}
938
939	PMAP_STATS_INC(pmap_ncache_enter_cnc);
940
941	/*
942	 * Mark all mappings as uncacheable, flush any lines with the other
943	 * color out of the dcache, and set the color to none (-1).
944	 */
945	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
946		atomic_clear_long(&tp->tte_data, TD_CV);
947		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
948	}
949	dcache_page_inval(VM_PAGE_TO_PHYS(m));
950	m->md.color = -1;
951	return (0);
952}
953
954static void
955pmap_cache_remove(vm_page_t m, vm_offset_t va)
956{
957	struct tte *tp;
958	int color;
959
960	rw_assert(&tte_list_global_lock, RA_WLOCKED);
961	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
962	    m->md.colors[DCACHE_COLOR(va)]);
963	KASSERT((m->flags & PG_FICTITIOUS) == 0,
964	    ("pmap_cache_remove: fake page"));
965	PMAP_STATS_INC(pmap_ncache_remove);
966
967	if (dcache_color_ignore != 0)
968		return;
969
970	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
971	    ("pmap_cache_remove: no mappings %d <= 0",
972	    m->md.colors[DCACHE_COLOR(va)]));
973
974	/*
975	 * Find the color for this virtual address and note the removal of
976	 * the mapping.
977	 */
978	color = DCACHE_COLOR(va);
979	m->md.colors[color]--;
980
981	/*
982	 * If the page is cacheable, just return and keep the same color, even
983	 * if there are no longer any mappings.
984	 */
985	if (m->md.color != -1) {
986		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
987			PMAP_STATS_INC(pmap_ncache_remove_c);
988		else
989			PMAP_STATS_INC(pmap_ncache_remove_oc);
990		return;
991	}
992
993	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
994	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
995
996	/*
997	 * If the page is not cacheable (color is -1), and the number of
998	 * mappings for this color is not zero, just return.  There are
999	 * mappings of the other color still, so remain non-cacheable.
1000	 */
1001	if (m->md.colors[color] != 0) {
1002		PMAP_STATS_INC(pmap_ncache_remove_nc);
1003		return;
1004	}
1005
1006	/*
1007	 * The number of mappings for this color is now zero.  Recache the
1008	 * other colored mappings, and change the page color to the other
1009	 * color.  There should be no lines in the data cache for this page,
1010	 * so flushing should not be needed.
1011	 */
1012	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1013		atomic_set_long(&tp->tte_data, TD_CV);
1014		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1015	}
1016	m->md.color = DCACHE_OTHER_COLOR(color);
1017
1018	if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1019		PMAP_STATS_INC(pmap_ncache_remove_cc);
1020	else
1021		PMAP_STATS_INC(pmap_ncache_remove_coc);
1022}
1023
1024/*
1025 * Map a wired page into kernel virtual address space.
1026 */
1027void
1028pmap_kenter(vm_offset_t va, vm_page_t m)
1029{
1030	vm_offset_t ova;
1031	struct tte *tp;
1032	vm_page_t om;
1033	u_long data;
1034
1035	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1036	PMAP_STATS_INC(pmap_nkenter);
1037	tp = tsb_kvtotte(va);
1038	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1039	    va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1040	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1041		CTR5(KTR_SPARE2,
1042	"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1043		    va, VM_PAGE_TO_PHYS(m), m->object,
1044		    m->object ? m->object->type : -1,
1045		    m->pindex);
1046		PMAP_STATS_INC(pmap_nkenter_oc);
1047	}
1048	if ((tp->tte_data & TD_V) != 0) {
1049		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1050		ova = TTE_GET_VA(tp);
1051		if (m == om && va == ova) {
1052			PMAP_STATS_INC(pmap_nkenter_stupid);
1053			return;
1054		}
1055		TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1056		pmap_cache_remove(om, ova);
1057		if (va != ova)
1058			tlb_page_demap(kernel_pmap, ova);
1059	}
1060	data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1061	    TD_P | TD_W;
1062	if (pmap_cache_enter(m, va) != 0)
1063		data |= TD_CV;
1064	tp->tte_vpn = TV_VPN(va, TS_8K);
1065	tp->tte_data = data;
1066	TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1067}
1068
1069/*
1070 * Map a wired page into kernel virtual address space.  This additionally
1071 * takes a flag argument which is or'ed to the TTE data.  This is used by
1072 * sparc64_bus_mem_map().
1073 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1074 * to flush entries that might still be in the cache, if applicable.
1075 */
1076void
1077pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1078{
1079	struct tte *tp;
1080
1081	tp = tsb_kvtotte(va);
1082	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1083	    va, pa, tp, tp->tte_data);
1084	tp->tte_vpn = TV_VPN(va, TS_8K);
1085	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1086}
1087
1088/*
1089 * Remove a wired page from kernel virtual address space.
1090 */
1091void
1092pmap_kremove(vm_offset_t va)
1093{
1094	struct tte *tp;
1095	vm_page_t m;
1096
1097	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1098	PMAP_STATS_INC(pmap_nkremove);
1099	tp = tsb_kvtotte(va);
1100	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1101	    tp->tte_data);
1102	if ((tp->tte_data & TD_V) == 0)
1103		return;
1104	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1105	TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1106	pmap_cache_remove(m, va);
1107	TTE_ZERO(tp);
1108}
1109
1110/*
1111 * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1112 */
1113void
1114pmap_kremove_flags(vm_offset_t va)
1115{
1116	struct tte *tp;
1117
1118	tp = tsb_kvtotte(va);
1119	CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1120	    tp->tte_data);
1121	TTE_ZERO(tp);
1122}
1123
1124/*
1125 * Map a range of physical addresses into kernel virtual address space.
1126 *
1127 * The value passed in *virt is a suggested virtual address for the mapping.
1128 * Architectures which can support a direct-mapped physical to virtual region
1129 * can return the appropriate address within that region, leaving '*virt'
1130 * unchanged.
1131 */
1132vm_offset_t
1133pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1134{
1135
1136	return (TLB_PHYS_TO_DIRECT(start));
1137}
1138
1139/*
1140 * Map a list of wired pages into kernel virtual address space.  This is
1141 * intended for temporary mappings which do not need page modification or
1142 * references recorded.  Existing mappings in the region are overwritten.
1143 */
1144void
1145pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1146{
1147	vm_offset_t va;
1148
1149	PMAP_STATS_INC(pmap_nqenter);
1150	va = sva;
1151	rw_wlock(&tte_list_global_lock);
1152	while (count-- > 0) {
1153		pmap_kenter(va, *m);
1154		va += PAGE_SIZE;
1155		m++;
1156	}
1157	rw_wunlock(&tte_list_global_lock);
1158	tlb_range_demap(kernel_pmap, sva, va);
1159}
1160
1161/*
1162 * Remove page mappings from kernel virtual address space.  Intended for
1163 * temporary mappings entered by pmap_qenter.
1164 */
1165void
1166pmap_qremove(vm_offset_t sva, int count)
1167{
1168	vm_offset_t va;
1169
1170	PMAP_STATS_INC(pmap_nqremove);
1171	va = sva;
1172	rw_wlock(&tte_list_global_lock);
1173	while (count-- > 0) {
1174		pmap_kremove(va);
1175		va += PAGE_SIZE;
1176	}
1177	rw_wunlock(&tte_list_global_lock);
1178	tlb_range_demap(kernel_pmap, sva, va);
1179}
1180
1181/*
1182 * Initialize the pmap associated with process 0.
1183 */
1184void
1185pmap_pinit0(pmap_t pm)
1186{
1187	int i;
1188
1189	PMAP_LOCK_INIT(pm);
1190	for (i = 0; i < MAXCPU; i++)
1191		pm->pm_context[i] = TLB_CTX_KERNEL;
1192	CPU_ZERO(&pm->pm_active);
1193	pm->pm_tsb = NULL;
1194	pm->pm_tsb_obj = NULL;
1195	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1196}
1197
1198/*
1199 * Initialize a preallocated and zeroed pmap structure, such as one in a
1200 * vmspace structure.
1201 */
1202int
1203pmap_pinit(pmap_t pm)
1204{
1205	vm_page_t ma[TSB_PAGES];
1206	vm_page_t m;
1207	int i;
1208
1209	/*
1210	 * Allocate KVA space for the TSB.
1211	 */
1212	if (pm->pm_tsb == NULL) {
1213		pm->pm_tsb = (struct tte *)kva_alloc(TSB_BSIZE);
1214		if (pm->pm_tsb == NULL)
1215			return (0);
1216		}
1217
1218	/*
1219	 * Allocate an object for it.
1220	 */
1221	if (pm->pm_tsb_obj == NULL)
1222		pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1223
1224	for (i = 0; i < MAXCPU; i++)
1225		pm->pm_context[i] = -1;
1226	CPU_ZERO(&pm->pm_active);
1227
1228	VM_OBJECT_WLOCK(pm->pm_tsb_obj);
1229	for (i = 0; i < TSB_PAGES; i++) {
1230		m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1231		    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1232		m->valid = VM_PAGE_BITS_ALL;
1233		m->md.pmap = pm;
1234		ma[i] = m;
1235	}
1236	VM_OBJECT_WUNLOCK(pm->pm_tsb_obj);
1237	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1238
1239	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1240	return (1);
1241}
1242
1243/*
1244 * Release any resources held by the given physical map.
1245 * Called when a pmap initialized by pmap_pinit is being released.
1246 * Should only be called if the map contains no valid mappings.
1247 */
1248void
1249pmap_release(pmap_t pm)
1250{
1251	vm_object_t obj;
1252	vm_page_t m;
1253#ifdef SMP
1254	struct pcpu *pc;
1255#endif
1256
1257	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1258	    pm->pm_context[curcpu], pm->pm_tsb);
1259	KASSERT(pmap_resident_count(pm) == 0,
1260	    ("pmap_release: resident pages %ld != 0",
1261	    pmap_resident_count(pm)));
1262
1263	/*
1264	 * After the pmap was freed, it might be reallocated to a new process.
1265	 * When switching, this might lead us to wrongly assume that we need
1266	 * not switch contexts because old and new pmap pointer are equal.
1267	 * Therefore, make sure that this pmap is not referenced by any PCPU
1268	 * pointer any more.  This could happen in two cases:
1269	 * - A process that referenced the pmap is currently exiting on a CPU.
1270	 *   However, it is guaranteed to not switch in any more after setting
1271	 *   its state to PRS_ZOMBIE.
1272	 * - A process that referenced this pmap ran on a CPU, but we switched
1273	 *   to a kernel thread, leaving the pmap pointer unchanged.
1274	 */
1275#ifdef SMP
1276	sched_pin();
1277	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
1278		atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap,
1279		    (uintptr_t)pm, (uintptr_t)NULL);
1280	sched_unpin();
1281#else
1282	critical_enter();
1283	if (PCPU_GET(pmap) == pm)
1284		PCPU_SET(pmap, NULL);
1285	critical_exit();
1286#endif
1287
1288	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1289	obj = pm->pm_tsb_obj;
1290	VM_OBJECT_WLOCK(obj);
1291	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1292	while (!TAILQ_EMPTY(&obj->memq)) {
1293		m = TAILQ_FIRST(&obj->memq);
1294		m->md.pmap = NULL;
1295		m->wire_count--;
1296		atomic_subtract_int(&cnt.v_wire_count, 1);
1297		vm_page_free_zero(m);
1298	}
1299	VM_OBJECT_WUNLOCK(obj);
1300}
1301
1302/*
1303 * Grow the number of kernel page table entries.  Unneeded.
1304 */
1305void
1306pmap_growkernel(vm_offset_t addr)
1307{
1308
1309	panic("pmap_growkernel: can't grow kernel");
1310}
1311
1312int
1313pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1314    vm_offset_t va)
1315{
1316	vm_page_t m;
1317	u_long data;
1318
1319	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1320	data = atomic_readandclear_long(&tp->tte_data);
1321	if ((data & TD_FAKE) == 0) {
1322		m = PHYS_TO_VM_PAGE(TD_PA(data));
1323		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1324		if ((data & TD_WIRED) != 0)
1325			pm->pm_stats.wired_count--;
1326		if ((data & TD_PV) != 0) {
1327			if ((data & TD_W) != 0)
1328				vm_page_dirty(m);
1329			if ((data & TD_REF) != 0)
1330				vm_page_aflag_set(m, PGA_REFERENCED);
1331			if (TAILQ_EMPTY(&m->md.tte_list))
1332				vm_page_aflag_clear(m, PGA_WRITEABLE);
1333			pm->pm_stats.resident_count--;
1334		}
1335		pmap_cache_remove(m, va);
1336	}
1337	TTE_ZERO(tp);
1338	if (PMAP_REMOVE_DONE(pm))
1339		return (0);
1340	return (1);
1341}
1342
1343/*
1344 * Remove the given range of addresses from the specified map.
1345 */
1346void
1347pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1348{
1349	struct tte *tp;
1350	vm_offset_t va;
1351
1352	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1353	    pm->pm_context[curcpu], start, end);
1354	if (PMAP_REMOVE_DONE(pm))
1355		return;
1356	rw_wlock(&tte_list_global_lock);
1357	PMAP_LOCK(pm);
1358	if (end - start > PMAP_TSB_THRESH) {
1359		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1360		tlb_context_demap(pm);
1361	} else {
1362		for (va = start; va < end; va += PAGE_SIZE)
1363			if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1364			    !pmap_remove_tte(pm, NULL, tp, va))
1365				break;
1366		tlb_range_demap(pm, start, end - 1);
1367	}
1368	PMAP_UNLOCK(pm);
1369	rw_wunlock(&tte_list_global_lock);
1370}
1371
1372void
1373pmap_remove_all(vm_page_t m)
1374{
1375	struct pmap *pm;
1376	struct tte *tpn;
1377	struct tte *tp;
1378	vm_offset_t va;
1379
1380	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1381	    ("pmap_remove_all: page %p is not managed", m));
1382	rw_wlock(&tte_list_global_lock);
1383	for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1384		tpn = TAILQ_NEXT(tp, tte_link);
1385		if ((tp->tte_data & TD_PV) == 0)
1386			continue;
1387		pm = TTE_GET_PMAP(tp);
1388		va = TTE_GET_VA(tp);
1389		PMAP_LOCK(pm);
1390		if ((tp->tte_data & TD_WIRED) != 0)
1391			pm->pm_stats.wired_count--;
1392		if ((tp->tte_data & TD_REF) != 0)
1393			vm_page_aflag_set(m, PGA_REFERENCED);
1394		if ((tp->tte_data & TD_W) != 0)
1395			vm_page_dirty(m);
1396		tp->tte_data &= ~TD_V;
1397		tlb_page_demap(pm, va);
1398		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1399		pm->pm_stats.resident_count--;
1400		pmap_cache_remove(m, va);
1401		TTE_ZERO(tp);
1402		PMAP_UNLOCK(pm);
1403	}
1404	vm_page_aflag_clear(m, PGA_WRITEABLE);
1405	rw_wunlock(&tte_list_global_lock);
1406}
1407
1408static int
1409pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1410    vm_offset_t va)
1411{
1412	u_long data;
1413	vm_page_t m;
1414
1415	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1416	data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1417	if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1418		m = PHYS_TO_VM_PAGE(TD_PA(data));
1419		vm_page_dirty(m);
1420	}
1421	return (1);
1422}
1423
1424/*
1425 * Set the physical protection on the specified range of this map as requested.
1426 */
1427void
1428pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1429{
1430	vm_offset_t va;
1431	struct tte *tp;
1432
1433	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1434	    pm->pm_context[curcpu], sva, eva, prot);
1435
1436	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1437		pmap_remove(pm, sva, eva);
1438		return;
1439	}
1440
1441	if (prot & VM_PROT_WRITE)
1442		return;
1443
1444	PMAP_LOCK(pm);
1445	if (eva - sva > PMAP_TSB_THRESH) {
1446		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1447		tlb_context_demap(pm);
1448	} else {
1449		for (va = sva; va < eva; va += PAGE_SIZE)
1450			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1451				pmap_protect_tte(pm, NULL, tp, va);
1452		tlb_range_demap(pm, sva, eva - 1);
1453	}
1454	PMAP_UNLOCK(pm);
1455}
1456
1457/*
1458 * Map the given physical page at the specified virtual address in the
1459 * target pmap with the protection requested.  If specified the page
1460 * will be wired down.
1461 */
1462int
1463pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1464    u_int flags, int8_t psind)
1465{
1466	int rv;
1467
1468	rw_wlock(&tte_list_global_lock);
1469	PMAP_LOCK(pm);
1470	rv = pmap_enter_locked(pm, va, m, prot, flags, psind);
1471	rw_wunlock(&tte_list_global_lock);
1472	PMAP_UNLOCK(pm);
1473	return (rv);
1474}
1475
1476/*
1477 * Map the given physical page at the specified virtual address in the
1478 * target pmap with the protection requested.  If specified the page
1479 * will be wired down.
1480 *
1481 * The page queues and pmap must be locked.
1482 */
1483static int
1484pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1485    u_int flags, int8_t psind __unused)
1486{
1487	struct tte *tp;
1488	vm_paddr_t pa;
1489	vm_page_t real;
1490	u_long data;
1491	boolean_t wired;
1492
1493	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1494	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1495	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1496		VM_OBJECT_ASSERT_LOCKED(m->object);
1497	PMAP_STATS_INC(pmap_nenter);
1498	pa = VM_PAGE_TO_PHYS(m);
1499	wired = (flags & PMAP_ENTER_WIRED) != 0;
1500
1501	/*
1502	 * If this is a fake page from the device_pager, but it covers actual
1503	 * physical memory, convert to the real backing page.
1504	 */
1505	if ((m->flags & PG_FICTITIOUS) != 0) {
1506		real = vm_phys_paddr_to_vm_page(pa);
1507		if (real != NULL)
1508			m = real;
1509	}
1510
1511	CTR6(KTR_PMAP,
1512	    "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1513	    pm->pm_context[curcpu], m, va, pa, prot, wired);
1514
1515	/*
1516	 * If there is an existing mapping, and the physical address has not
1517	 * changed, must be protection or wiring change.
1518	 */
1519	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1520		CTR0(KTR_PMAP, "pmap_enter_locked: update");
1521		PMAP_STATS_INC(pmap_nenter_update);
1522
1523		/*
1524		 * Wiring change, just update stats.
1525		 */
1526		if (wired) {
1527			if ((tp->tte_data & TD_WIRED) == 0) {
1528				tp->tte_data |= TD_WIRED;
1529				pm->pm_stats.wired_count++;
1530			}
1531		} else {
1532			if ((tp->tte_data & TD_WIRED) != 0) {
1533				tp->tte_data &= ~TD_WIRED;
1534				pm->pm_stats.wired_count--;
1535			}
1536		}
1537
1538		/*
1539		 * Save the old bits and clear the ones we're interested in.
1540		 */
1541		data = tp->tte_data;
1542		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1543
1544		/*
1545		 * If we're turning off write permissions, sense modify status.
1546		 */
1547		if ((prot & VM_PROT_WRITE) != 0) {
1548			tp->tte_data |= TD_SW;
1549			if (wired)
1550				tp->tte_data |= TD_W;
1551			if ((m->oflags & VPO_UNMANAGED) == 0)
1552				vm_page_aflag_set(m, PGA_WRITEABLE);
1553		} else if ((data & TD_W) != 0)
1554			vm_page_dirty(m);
1555
1556		/*
1557		 * If we're turning on execute permissions, flush the icache.
1558		 */
1559		if ((prot & VM_PROT_EXECUTE) != 0) {
1560			if ((data & TD_EXEC) == 0)
1561				icache_page_inval(pa);
1562			tp->tte_data |= TD_EXEC;
1563		}
1564
1565		/*
1566		 * Delete the old mapping.
1567		 */
1568		tlb_page_demap(pm, TTE_GET_VA(tp));
1569	} else {
1570		/*
1571		 * If there is an existing mapping, but its for a different
1572		 * physical address, delete the old mapping.
1573		 */
1574		if (tp != NULL) {
1575			CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1576			PMAP_STATS_INC(pmap_nenter_replace);
1577			pmap_remove_tte(pm, NULL, tp, va);
1578			tlb_page_demap(pm, va);
1579		} else {
1580			CTR0(KTR_PMAP, "pmap_enter_locked: new");
1581			PMAP_STATS_INC(pmap_nenter_new);
1582		}
1583
1584		/*
1585		 * Now set up the data and install the new mapping.
1586		 */
1587		data = TD_V | TD_8K | TD_PA(pa);
1588		if (pm == kernel_pmap)
1589			data |= TD_P;
1590		if ((prot & VM_PROT_WRITE) != 0) {
1591			data |= TD_SW;
1592			if ((m->oflags & VPO_UNMANAGED) == 0)
1593				vm_page_aflag_set(m, PGA_WRITEABLE);
1594		}
1595		if (prot & VM_PROT_EXECUTE) {
1596			data |= TD_EXEC;
1597			icache_page_inval(pa);
1598		}
1599
1600		/*
1601		 * If its wired update stats.  We also don't need reference or
1602		 * modify tracking for wired mappings, so set the bits now.
1603		 */
1604		if (wired) {
1605			pm->pm_stats.wired_count++;
1606			data |= TD_REF | TD_WIRED;
1607			if ((prot & VM_PROT_WRITE) != 0)
1608				data |= TD_W;
1609		}
1610
1611		tsb_tte_enter(pm, m, va, TS_8K, data);
1612	}
1613
1614	return (KERN_SUCCESS);
1615}
1616
1617/*
1618 * Maps a sequence of resident pages belonging to the same object.
1619 * The sequence begins with the given page m_start.  This page is
1620 * mapped at the given virtual address start.  Each subsequent page is
1621 * mapped at a virtual address that is offset from start by the same
1622 * amount as the page is offset from m_start within the object.  The
1623 * last page in the sequence is the page with the largest offset from
1624 * m_start that can be mapped at a virtual address less than the given
1625 * virtual address end.  Not every virtual page between start and end
1626 * is mapped; only those for which a resident page exists with the
1627 * corresponding offset from m_start are mapped.
1628 */
1629void
1630pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1631    vm_page_t m_start, vm_prot_t prot)
1632{
1633	vm_page_t m;
1634	vm_pindex_t diff, psize;
1635
1636	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1637
1638	psize = atop(end - start);
1639	m = m_start;
1640	rw_wlock(&tte_list_global_lock);
1641	PMAP_LOCK(pm);
1642	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1643		pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1644		    (VM_PROT_READ | VM_PROT_EXECUTE), 0, 0);
1645		m = TAILQ_NEXT(m, listq);
1646	}
1647	rw_wunlock(&tte_list_global_lock);
1648	PMAP_UNLOCK(pm);
1649}
1650
1651void
1652pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1653{
1654
1655	rw_wlock(&tte_list_global_lock);
1656	PMAP_LOCK(pm);
1657	pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1658	    0, 0);
1659	rw_wunlock(&tte_list_global_lock);
1660	PMAP_UNLOCK(pm);
1661}
1662
1663void
1664pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1665    vm_pindex_t pindex, vm_size_t size)
1666{
1667
1668	VM_OBJECT_ASSERT_WLOCKED(object);
1669	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1670	    ("pmap_object_init_pt: non-device object"));
1671}
1672
1673static int
1674pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp, vm_offset_t va)
1675{
1676
1677	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1678	if ((tp->tte_data & TD_WIRED) == 0)
1679		panic("pmap_unwire_tte: tp %p is missing TD_WIRED", tp);
1680	atomic_clear_long(&tp->tte_data, TD_WIRED);
1681	pm->pm_stats.wired_count--;
1682	return (1);
1683}
1684
1685/*
1686 * Clear the wired attribute from the mappings for the specified range of
1687 * addresses in the given pmap.  Every valid mapping within that range must
1688 * have the wired attribute set.  In contrast, invalid mappings cannot have
1689 * the wired attribute set, so they are ignored.
1690 *
1691 * The wired attribute of the translation table entry is not a hardware
1692 * feature, so there is no need to invalidate any TLB entries.
1693 */
1694void
1695pmap_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1696{
1697	vm_offset_t va;
1698	struct tte *tp;
1699
1700	PMAP_LOCK(pm);
1701	if (eva - sva > PMAP_TSB_THRESH)
1702		tsb_foreach(pm, NULL, sva, eva, pmap_unwire_tte);
1703	else {
1704		for (va = sva; va < eva; va += PAGE_SIZE)
1705			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1706				pmap_unwire_tte(pm, NULL, tp, va);
1707	}
1708	PMAP_UNLOCK(pm);
1709}
1710
1711static int
1712pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1713    vm_offset_t va)
1714{
1715	vm_page_t m;
1716	u_long data;
1717
1718	if ((tp->tte_data & TD_FAKE) != 0)
1719		return (1);
1720	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1721		data = tp->tte_data &
1722		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1723		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1724		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1725	}
1726	return (1);
1727}
1728
1729void
1730pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1731    vm_size_t len, vm_offset_t src_addr)
1732{
1733	struct tte *tp;
1734	vm_offset_t va;
1735
1736	if (dst_addr != src_addr)
1737		return;
1738	rw_wlock(&tte_list_global_lock);
1739	if (dst_pmap < src_pmap) {
1740		PMAP_LOCK(dst_pmap);
1741		PMAP_LOCK(src_pmap);
1742	} else {
1743		PMAP_LOCK(src_pmap);
1744		PMAP_LOCK(dst_pmap);
1745	}
1746	if (len > PMAP_TSB_THRESH) {
1747		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1748		    pmap_copy_tte);
1749		tlb_context_demap(dst_pmap);
1750	} else {
1751		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1752			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1753				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1754		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1755	}
1756	rw_wunlock(&tte_list_global_lock);
1757	PMAP_UNLOCK(src_pmap);
1758	PMAP_UNLOCK(dst_pmap);
1759}
1760
1761void
1762pmap_zero_page(vm_page_t m)
1763{
1764	struct tte *tp;
1765	vm_offset_t va;
1766	vm_paddr_t pa;
1767
1768	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1769	    ("pmap_zero_page: fake page"));
1770	PMAP_STATS_INC(pmap_nzero_page);
1771	pa = VM_PAGE_TO_PHYS(m);
1772	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1773		PMAP_STATS_INC(pmap_nzero_page_c);
1774		va = TLB_PHYS_TO_DIRECT(pa);
1775		cpu_block_zero((void *)va, PAGE_SIZE);
1776	} else if (m->md.color == -1) {
1777		PMAP_STATS_INC(pmap_nzero_page_nc);
1778		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1779	} else {
1780		PMAP_STATS_INC(pmap_nzero_page_oc);
1781		PMAP_LOCK(kernel_pmap);
1782		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1783		tp = tsb_kvtotte(va);
1784		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1785		tp->tte_vpn = TV_VPN(va, TS_8K);
1786		cpu_block_zero((void *)va, PAGE_SIZE);
1787		tlb_page_demap(kernel_pmap, va);
1788		PMAP_UNLOCK(kernel_pmap);
1789	}
1790}
1791
1792void
1793pmap_zero_page_area(vm_page_t m, int off, int size)
1794{
1795	struct tte *tp;
1796	vm_offset_t va;
1797	vm_paddr_t pa;
1798
1799	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1800	    ("pmap_zero_page_area: fake page"));
1801	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1802	PMAP_STATS_INC(pmap_nzero_page_area);
1803	pa = VM_PAGE_TO_PHYS(m);
1804	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1805		PMAP_STATS_INC(pmap_nzero_page_area_c);
1806		va = TLB_PHYS_TO_DIRECT(pa);
1807		bzero((void *)(va + off), size);
1808	} else if (m->md.color == -1) {
1809		PMAP_STATS_INC(pmap_nzero_page_area_nc);
1810		aszero(ASI_PHYS_USE_EC, pa + off, size);
1811	} else {
1812		PMAP_STATS_INC(pmap_nzero_page_area_oc);
1813		PMAP_LOCK(kernel_pmap);
1814		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1815		tp = tsb_kvtotte(va);
1816		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1817		tp->tte_vpn = TV_VPN(va, TS_8K);
1818		bzero((void *)(va + off), size);
1819		tlb_page_demap(kernel_pmap, va);
1820		PMAP_UNLOCK(kernel_pmap);
1821	}
1822}
1823
1824void
1825pmap_zero_page_idle(vm_page_t m)
1826{
1827	struct tte *tp;
1828	vm_offset_t va;
1829	vm_paddr_t pa;
1830
1831	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1832	    ("pmap_zero_page_idle: fake page"));
1833	PMAP_STATS_INC(pmap_nzero_page_idle);
1834	pa = VM_PAGE_TO_PHYS(m);
1835	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1836		PMAP_STATS_INC(pmap_nzero_page_idle_c);
1837		va = TLB_PHYS_TO_DIRECT(pa);
1838		cpu_block_zero((void *)va, PAGE_SIZE);
1839	} else if (m->md.color == -1) {
1840		PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1841		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1842	} else {
1843		PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1844		va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1845		tp = tsb_kvtotte(va);
1846		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1847		tp->tte_vpn = TV_VPN(va, TS_8K);
1848		cpu_block_zero((void *)va, PAGE_SIZE);
1849		tlb_page_demap(kernel_pmap, va);
1850	}
1851}
1852
1853void
1854pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1855{
1856	vm_offset_t vdst;
1857	vm_offset_t vsrc;
1858	vm_paddr_t pdst;
1859	vm_paddr_t psrc;
1860	struct tte *tp;
1861
1862	KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1863	    ("pmap_copy_page: fake dst page"));
1864	KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1865	    ("pmap_copy_page: fake src page"));
1866	PMAP_STATS_INC(pmap_ncopy_page);
1867	pdst = VM_PAGE_TO_PHYS(mdst);
1868	psrc = VM_PAGE_TO_PHYS(msrc);
1869	if (dcache_color_ignore != 0 ||
1870	    (msrc->md.color == DCACHE_COLOR(psrc) &&
1871	    mdst->md.color == DCACHE_COLOR(pdst))) {
1872		PMAP_STATS_INC(pmap_ncopy_page_c);
1873		vdst = TLB_PHYS_TO_DIRECT(pdst);
1874		vsrc = TLB_PHYS_TO_DIRECT(psrc);
1875		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1876	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
1877		PMAP_STATS_INC(pmap_ncopy_page_nc);
1878		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1879	} else if (msrc->md.color == -1) {
1880		if (mdst->md.color == DCACHE_COLOR(pdst)) {
1881			PMAP_STATS_INC(pmap_ncopy_page_dc);
1882			vdst = TLB_PHYS_TO_DIRECT(pdst);
1883			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1884			    PAGE_SIZE);
1885		} else {
1886			PMAP_STATS_INC(pmap_ncopy_page_doc);
1887			PMAP_LOCK(kernel_pmap);
1888			vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1889			tp = tsb_kvtotte(vdst);
1890			tp->tte_data =
1891			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1892			tp->tte_vpn = TV_VPN(vdst, TS_8K);
1893			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1894			    PAGE_SIZE);
1895			tlb_page_demap(kernel_pmap, vdst);
1896			PMAP_UNLOCK(kernel_pmap);
1897		}
1898	} else if (mdst->md.color == -1) {
1899		if (msrc->md.color == DCACHE_COLOR(psrc)) {
1900			PMAP_STATS_INC(pmap_ncopy_page_sc);
1901			vsrc = TLB_PHYS_TO_DIRECT(psrc);
1902			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1903			    PAGE_SIZE);
1904		} else {
1905			PMAP_STATS_INC(pmap_ncopy_page_soc);
1906			PMAP_LOCK(kernel_pmap);
1907			vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1908			tp = tsb_kvtotte(vsrc);
1909			tp->tte_data =
1910			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1911			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1912			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1913			    PAGE_SIZE);
1914			tlb_page_demap(kernel_pmap, vsrc);
1915			PMAP_UNLOCK(kernel_pmap);
1916		}
1917	} else {
1918		PMAP_STATS_INC(pmap_ncopy_page_oc);
1919		PMAP_LOCK(kernel_pmap);
1920		vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1921		tp = tsb_kvtotte(vdst);
1922		tp->tte_data =
1923		    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1924		tp->tte_vpn = TV_VPN(vdst, TS_8K);
1925		vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1926		tp = tsb_kvtotte(vsrc);
1927		tp->tte_data =
1928		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1929		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1930		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1931		tlb_page_demap(kernel_pmap, vdst);
1932		tlb_page_demap(kernel_pmap, vsrc);
1933		PMAP_UNLOCK(kernel_pmap);
1934	}
1935}
1936
1937int unmapped_buf_allowed;
1938
1939void
1940pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
1941    vm_offset_t b_offset, int xfersize)
1942{
1943
1944	panic("pmap_copy_pages: not implemented");
1945}
1946
1947/*
1948 * Returns true if the pmap's pv is one of the first
1949 * 16 pvs linked to from this page.  This count may
1950 * be changed upwards or downwards in the future; it
1951 * is only necessary that true be returned for a small
1952 * subset of pmaps for proper page aging.
1953 */
1954boolean_t
1955pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1956{
1957	struct tte *tp;
1958	int loops;
1959	boolean_t rv;
1960
1961	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1962	    ("pmap_page_exists_quick: page %p is not managed", m));
1963	loops = 0;
1964	rv = FALSE;
1965	rw_wlock(&tte_list_global_lock);
1966	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1967		if ((tp->tte_data & TD_PV) == 0)
1968			continue;
1969		if (TTE_GET_PMAP(tp) == pm) {
1970			rv = TRUE;
1971			break;
1972		}
1973		if (++loops >= 16)
1974			break;
1975	}
1976	rw_wunlock(&tte_list_global_lock);
1977	return (rv);
1978}
1979
1980/*
1981 * Return the number of managed mappings to the given physical page
1982 * that are wired.
1983 */
1984int
1985pmap_page_wired_mappings(vm_page_t m)
1986{
1987	struct tte *tp;
1988	int count;
1989
1990	count = 0;
1991	if ((m->oflags & VPO_UNMANAGED) != 0)
1992		return (count);
1993	rw_wlock(&tte_list_global_lock);
1994	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1995		if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1996			count++;
1997	rw_wunlock(&tte_list_global_lock);
1998	return (count);
1999}
2000
2001/*
2002 * Remove all pages from specified address space, this aids process exit
2003 * speeds.  This is much faster than pmap_remove in the case of running down
2004 * an entire address space.  Only works for the current pmap.
2005 */
2006void
2007pmap_remove_pages(pmap_t pm)
2008{
2009
2010}
2011
2012/*
2013 * Returns TRUE if the given page has a managed mapping.
2014 */
2015boolean_t
2016pmap_page_is_mapped(vm_page_t m)
2017{
2018	struct tte *tp;
2019	boolean_t rv;
2020
2021	rv = FALSE;
2022	if ((m->oflags & VPO_UNMANAGED) != 0)
2023		return (rv);
2024	rw_wlock(&tte_list_global_lock);
2025	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2026		if ((tp->tte_data & TD_PV) != 0) {
2027			rv = TRUE;
2028			break;
2029		}
2030	rw_wunlock(&tte_list_global_lock);
2031	return (rv);
2032}
2033
2034/*
2035 * Return a count of reference bits for a page, clearing those bits.
2036 * It is not necessary for every reference bit to be cleared, but it
2037 * is necessary that 0 only be returned when there are truly no
2038 * reference bits set.
2039 *
2040 * XXX: The exact number of bits to check and clear is a matter that
2041 * should be tested and standardized at some point in the future for
2042 * optimal aging of shared pages.
2043 */
2044int
2045pmap_ts_referenced(vm_page_t m)
2046{
2047	struct tte *tpf;
2048	struct tte *tpn;
2049	struct tte *tp;
2050	u_long data;
2051	int count;
2052
2053	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2054	    ("pmap_ts_referenced: page %p is not managed", m));
2055	count = 0;
2056	rw_wlock(&tte_list_global_lock);
2057	if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2058		tpf = tp;
2059		do {
2060			tpn = TAILQ_NEXT(tp, tte_link);
2061			TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2062			TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2063			if ((tp->tte_data & TD_PV) == 0)
2064				continue;
2065			data = atomic_clear_long(&tp->tte_data, TD_REF);
2066			if ((data & TD_REF) != 0 && ++count > 4)
2067				break;
2068		} while ((tp = tpn) != NULL && tp != tpf);
2069	}
2070	rw_wunlock(&tte_list_global_lock);
2071	return (count);
2072}
2073
2074boolean_t
2075pmap_is_modified(vm_page_t m)
2076{
2077	struct tte *tp;
2078	boolean_t rv;
2079
2080	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2081	    ("pmap_is_modified: page %p is not managed", m));
2082	rv = FALSE;
2083
2084	/*
2085	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2086	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2087	 * is clear, no TTEs can have TD_W set.
2088	 */
2089	VM_OBJECT_ASSERT_WLOCKED(m->object);
2090	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2091		return (rv);
2092	rw_wlock(&tte_list_global_lock);
2093	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2094		if ((tp->tte_data & TD_PV) == 0)
2095			continue;
2096		if ((tp->tte_data & TD_W) != 0) {
2097			rv = TRUE;
2098			break;
2099		}
2100	}
2101	rw_wunlock(&tte_list_global_lock);
2102	return (rv);
2103}
2104
2105/*
2106 *	pmap_is_prefaultable:
2107 *
2108 *	Return whether or not the specified virtual address is elgible
2109 *	for prefault.
2110 */
2111boolean_t
2112pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2113{
2114	boolean_t rv;
2115
2116	PMAP_LOCK(pmap);
2117	rv = tsb_tte_lookup(pmap, addr) == NULL;
2118	PMAP_UNLOCK(pmap);
2119	return (rv);
2120}
2121
2122/*
2123 * Return whether or not the specified physical page was referenced
2124 * in any physical maps.
2125 */
2126boolean_t
2127pmap_is_referenced(vm_page_t m)
2128{
2129	struct tte *tp;
2130	boolean_t rv;
2131
2132	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2133	    ("pmap_is_referenced: page %p is not managed", m));
2134	rv = FALSE;
2135	rw_wlock(&tte_list_global_lock);
2136	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2137		if ((tp->tte_data & TD_PV) == 0)
2138			continue;
2139		if ((tp->tte_data & TD_REF) != 0) {
2140			rv = TRUE;
2141			break;
2142		}
2143	}
2144	rw_wunlock(&tte_list_global_lock);
2145	return (rv);
2146}
2147
2148/*
2149 * This function is advisory.
2150 */
2151void
2152pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2153{
2154}
2155
2156void
2157pmap_clear_modify(vm_page_t m)
2158{
2159	struct tte *tp;
2160	u_long data;
2161
2162	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2163	    ("pmap_clear_modify: page %p is not managed", m));
2164	VM_OBJECT_ASSERT_WLOCKED(m->object);
2165	KASSERT(!vm_page_xbusied(m),
2166	    ("pmap_clear_modify: page %p is exclusive busied", m));
2167
2168	/*
2169	 * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
2170	 * If the object containing the page is locked and the page is not
2171	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2172	 */
2173	if ((m->aflags & PGA_WRITEABLE) == 0)
2174		return;
2175	rw_wlock(&tte_list_global_lock);
2176	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2177		if ((tp->tte_data & TD_PV) == 0)
2178			continue;
2179		data = atomic_clear_long(&tp->tte_data, TD_W);
2180		if ((data & TD_W) != 0)
2181			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2182	}
2183	rw_wunlock(&tte_list_global_lock);
2184}
2185
2186void
2187pmap_remove_write(vm_page_t m)
2188{
2189	struct tte *tp;
2190	u_long data;
2191
2192	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2193	    ("pmap_remove_write: page %p is not managed", m));
2194
2195	/*
2196	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2197	 * set by another thread while the object is locked.  Thus,
2198	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2199	 */
2200	VM_OBJECT_ASSERT_WLOCKED(m->object);
2201	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2202		return;
2203	rw_wlock(&tte_list_global_lock);
2204	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2205		if ((tp->tte_data & TD_PV) == 0)
2206			continue;
2207		data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2208		if ((data & TD_W) != 0) {
2209			vm_page_dirty(m);
2210			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2211		}
2212	}
2213	vm_page_aflag_clear(m, PGA_WRITEABLE);
2214	rw_wunlock(&tte_list_global_lock);
2215}
2216
2217int
2218pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2219{
2220
2221	/* TODO; */
2222	return (0);
2223}
2224
2225/*
2226 * Activate a user pmap.  The pmap must be activated before its address space
2227 * can be accessed in any way.
2228 */
2229void
2230pmap_activate(struct thread *td)
2231{
2232	struct vmspace *vm;
2233	struct pmap *pm;
2234	int context;
2235
2236	critical_enter();
2237	vm = td->td_proc->p_vmspace;
2238	pm = vmspace_pmap(vm);
2239
2240	context = PCPU_GET(tlb_ctx);
2241	if (context == PCPU_GET(tlb_ctx_max)) {
2242		tlb_flush_user();
2243		context = PCPU_GET(tlb_ctx_min);
2244	}
2245	PCPU_SET(tlb_ctx, context + 1);
2246
2247	pm->pm_context[curcpu] = context;
2248#ifdef SMP
2249	CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active);
2250	atomic_store_acq_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm);
2251#else
2252	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
2253	PCPU_SET(pmap, pm);
2254#endif
2255
2256	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2257	stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2258	stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2259	    TLB_CXR_PGSZ_MASK) | context);
2260	flush(KERNBASE);
2261	critical_exit();
2262}
2263
2264void
2265pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2266{
2267
2268}
2269
2270/*
2271 * Increase the starting virtual address of the given mapping if a
2272 * different alignment might result in more superpage mappings.
2273 */
2274void
2275pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2276    vm_offset_t *addr, vm_size_t size)
2277{
2278
2279}
2280