pmap.c revision 270441
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
38 */
39
40#include <sys/cdefs.h>
41__FBSDID("$FreeBSD: stable/10/sys/sparc64/sparc64/pmap.c 270441 2014-08-24 07:59:01Z kib $");
42
43/*
44 * Manages physical address maps.
45 *
46 * Since the information managed by this module is also stored by the
47 * logical address mapping module, this module may throw away valid virtual
48 * to physical mappings at almost any time.  However, invalidations of
49 * mappings must be done as requested.
50 *
51 * In order to cope with hardware architectures which make virtual to
52 * physical map invalidates expensive, this module may delay invalidate
53 * reduced protection operations until such time as they are actually
54 * necessary.  This module is given full information as to which processors
55 * are currently using which maps, and to when physical maps must be made
56 * correct.
57 */
58
59#include "opt_kstack_pages.h"
60#include "opt_pmap.h"
61
62#include <sys/param.h>
63#include <sys/kernel.h>
64#include <sys/ktr.h>
65#include <sys/lock.h>
66#include <sys/msgbuf.h>
67#include <sys/mutex.h>
68#include <sys/proc.h>
69#include <sys/rwlock.h>
70#include <sys/smp.h>
71#include <sys/sysctl.h>
72#include <sys/systm.h>
73#include <sys/vmmeter.h>
74
75#include <dev/ofw/openfirm.h>
76
77#include <vm/vm.h>
78#include <vm/vm_param.h>
79#include <vm/vm_kern.h>
80#include <vm/vm_page.h>
81#include <vm/vm_map.h>
82#include <vm/vm_object.h>
83#include <vm/vm_extern.h>
84#include <vm/vm_pageout.h>
85#include <vm/vm_pager.h>
86#include <vm/vm_phys.h>
87
88#include <machine/cache.h>
89#include <machine/frame.h>
90#include <machine/instr.h>
91#include <machine/md_var.h>
92#include <machine/metadata.h>
93#include <machine/ofw_mem.h>
94#include <machine/smp.h>
95#include <machine/tlb.h>
96#include <machine/tte.h>
97#include <machine/tsb.h>
98#include <machine/ver.h>
99
100/*
101 * Virtual address of message buffer
102 */
103struct msgbuf *msgbufp;
104
105/*
106 * Map of physical memory reagions
107 */
108vm_paddr_t phys_avail[128];
109static struct ofw_mem_region mra[128];
110struct ofw_mem_region sparc64_memreg[128];
111int sparc64_nmemreg;
112static struct ofw_map translations[128];
113static int translations_size;
114
115static vm_offset_t pmap_idle_map;
116static vm_offset_t pmap_temp_map_1;
117static vm_offset_t pmap_temp_map_2;
118
119/*
120 * First and last available kernel virtual addresses
121 */
122vm_offset_t virtual_avail;
123vm_offset_t virtual_end;
124vm_offset_t kernel_vm_end;
125
126vm_offset_t vm_max_kernel_address;
127
128/*
129 * Kernel pmap
130 */
131struct pmap kernel_pmap_store;
132
133struct rwlock_padalign tte_list_global_lock;
134
135/*
136 * Allocate physical memory for use in pmap_bootstrap.
137 */
138static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
139
140static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
141static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
142static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
143    struct tte *tp, vm_offset_t va);
144
145/*
146 * Map the given physical page at the specified virtual address in the
147 * target pmap with the protection requested.  If specified the page
148 * will be wired down.
149 *
150 * The page queues and pmap must be locked.
151 */
152static int pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
153    vm_prot_t prot, u_int flags, int8_t psind);
154
155extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
156extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
157extern int tl1_dmmu_miss_patch_asi_1[];
158extern int tl1_dmmu_miss_patch_quad_ldd_1[];
159extern int tl1_dmmu_miss_patch_tsb_1[];
160extern int tl1_dmmu_miss_patch_tsb_2[];
161extern int tl1_dmmu_miss_patch_tsb_mask_1[];
162extern int tl1_dmmu_miss_patch_tsb_mask_2[];
163extern int tl1_dmmu_prot_patch_asi_1[];
164extern int tl1_dmmu_prot_patch_quad_ldd_1[];
165extern int tl1_dmmu_prot_patch_tsb_1[];
166extern int tl1_dmmu_prot_patch_tsb_2[];
167extern int tl1_dmmu_prot_patch_tsb_mask_1[];
168extern int tl1_dmmu_prot_patch_tsb_mask_2[];
169extern int tl1_immu_miss_patch_asi_1[];
170extern int tl1_immu_miss_patch_quad_ldd_1[];
171extern int tl1_immu_miss_patch_tsb_1[];
172extern int tl1_immu_miss_patch_tsb_2[];
173extern int tl1_immu_miss_patch_tsb_mask_1[];
174extern int tl1_immu_miss_patch_tsb_mask_2[];
175
176/*
177 * If user pmap is processed with pmap_remove and with pmap_remove and the
178 * resident count drops to 0, there are no more pages to remove, so we
179 * need not continue.
180 */
181#define	PMAP_REMOVE_DONE(pm) \
182	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
183
184/*
185 * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
186 * and pmap_protect() instead of trying each virtual address.
187 */
188#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
189
190SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
191
192PMAP_STATS_VAR(pmap_nenter);
193PMAP_STATS_VAR(pmap_nenter_update);
194PMAP_STATS_VAR(pmap_nenter_replace);
195PMAP_STATS_VAR(pmap_nenter_new);
196PMAP_STATS_VAR(pmap_nkenter);
197PMAP_STATS_VAR(pmap_nkenter_oc);
198PMAP_STATS_VAR(pmap_nkenter_stupid);
199PMAP_STATS_VAR(pmap_nkremove);
200PMAP_STATS_VAR(pmap_nqenter);
201PMAP_STATS_VAR(pmap_nqremove);
202PMAP_STATS_VAR(pmap_ncache_enter);
203PMAP_STATS_VAR(pmap_ncache_enter_c);
204PMAP_STATS_VAR(pmap_ncache_enter_oc);
205PMAP_STATS_VAR(pmap_ncache_enter_cc);
206PMAP_STATS_VAR(pmap_ncache_enter_coc);
207PMAP_STATS_VAR(pmap_ncache_enter_nc);
208PMAP_STATS_VAR(pmap_ncache_enter_cnc);
209PMAP_STATS_VAR(pmap_ncache_remove);
210PMAP_STATS_VAR(pmap_ncache_remove_c);
211PMAP_STATS_VAR(pmap_ncache_remove_oc);
212PMAP_STATS_VAR(pmap_ncache_remove_cc);
213PMAP_STATS_VAR(pmap_ncache_remove_coc);
214PMAP_STATS_VAR(pmap_ncache_remove_nc);
215PMAP_STATS_VAR(pmap_nzero_page);
216PMAP_STATS_VAR(pmap_nzero_page_c);
217PMAP_STATS_VAR(pmap_nzero_page_oc);
218PMAP_STATS_VAR(pmap_nzero_page_nc);
219PMAP_STATS_VAR(pmap_nzero_page_area);
220PMAP_STATS_VAR(pmap_nzero_page_area_c);
221PMAP_STATS_VAR(pmap_nzero_page_area_oc);
222PMAP_STATS_VAR(pmap_nzero_page_area_nc);
223PMAP_STATS_VAR(pmap_nzero_page_idle);
224PMAP_STATS_VAR(pmap_nzero_page_idle_c);
225PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
226PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
227PMAP_STATS_VAR(pmap_ncopy_page);
228PMAP_STATS_VAR(pmap_ncopy_page_c);
229PMAP_STATS_VAR(pmap_ncopy_page_oc);
230PMAP_STATS_VAR(pmap_ncopy_page_nc);
231PMAP_STATS_VAR(pmap_ncopy_page_dc);
232PMAP_STATS_VAR(pmap_ncopy_page_doc);
233PMAP_STATS_VAR(pmap_ncopy_page_sc);
234PMAP_STATS_VAR(pmap_ncopy_page_soc);
235
236PMAP_STATS_VAR(pmap_nnew_thread);
237PMAP_STATS_VAR(pmap_nnew_thread_oc);
238
239static inline u_long dtlb_get_data(u_int tlb, u_int slot);
240
241/*
242 * Quick sort callout for comparing memory regions
243 */
244static int mr_cmp(const void *a, const void *b);
245static int om_cmp(const void *a, const void *b);
246
247static int
248mr_cmp(const void *a, const void *b)
249{
250	const struct ofw_mem_region *mra;
251	const struct ofw_mem_region *mrb;
252
253	mra = a;
254	mrb = b;
255	if (mra->mr_start < mrb->mr_start)
256		return (-1);
257	else if (mra->mr_start > mrb->mr_start)
258		return (1);
259	else
260		return (0);
261}
262
263static int
264om_cmp(const void *a, const void *b)
265{
266	const struct ofw_map *oma;
267	const struct ofw_map *omb;
268
269	oma = a;
270	omb = b;
271	if (oma->om_start < omb->om_start)
272		return (-1);
273	else if (oma->om_start > omb->om_start)
274		return (1);
275	else
276		return (0);
277}
278
279static inline u_long
280dtlb_get_data(u_int tlb, u_int slot)
281{
282	u_long data;
283	register_t s;
284
285	slot = TLB_DAR_SLOT(tlb, slot);
286	/*
287	 * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
288	 * work around errata of USIII and beyond.
289	 */
290	s = intr_disable();
291	(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
292	data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
293	intr_restore(s);
294	return (data);
295}
296
297/*
298 * Bootstrap the system enough to run with virtual memory.
299 */
300void
301pmap_bootstrap(u_int cpu_impl)
302{
303	struct pmap *pm;
304	struct tte *tp;
305	vm_offset_t off;
306	vm_offset_t va;
307	vm_paddr_t pa;
308	vm_size_t physsz;
309	vm_size_t virtsz;
310	u_long data;
311	u_long vpn;
312	phandle_t pmem;
313	phandle_t vmem;
314	u_int dtlb_slots_avail;
315	int i;
316	int j;
317	int sz;
318	uint32_t asi;
319	uint32_t colors;
320	uint32_t ldd;
321
322	/*
323	 * Set the kernel context.
324	 */
325	pmap_set_kctx();
326
327	colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
328
329	/*
330	 * Find out what physical memory is available from the PROM and
331	 * initialize the phys_avail array.  This must be done before
332	 * pmap_bootstrap_alloc is called.
333	 */
334	if ((pmem = OF_finddevice("/memory")) == -1)
335		OF_panic("%s: finddevice /memory", __func__);
336	if ((sz = OF_getproplen(pmem, "available")) == -1)
337		OF_panic("%s: getproplen /memory/available", __func__);
338	if (sizeof(phys_avail) < sz)
339		OF_panic("%s: phys_avail too small", __func__);
340	if (sizeof(mra) < sz)
341		OF_panic("%s: mra too small", __func__);
342	bzero(mra, sz);
343	if (OF_getprop(pmem, "available", mra, sz) == -1)
344		OF_panic("%s: getprop /memory/available", __func__);
345	sz /= sizeof(*mra);
346	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
347	qsort(mra, sz, sizeof (*mra), mr_cmp);
348	physsz = 0;
349	getenv_quad("hw.physmem", &physmem);
350	physmem = btoc(physmem);
351	for (i = 0, j = 0; i < sz; i++, j += 2) {
352		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
353		    mra[i].mr_size);
354		if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
355			if (btoc(physsz) < physmem) {
356				phys_avail[j] = mra[i].mr_start;
357				phys_avail[j + 1] = mra[i].mr_start +
358				    (ctob(physmem) - physsz);
359				physsz = ctob(physmem);
360			}
361			break;
362		}
363		phys_avail[j] = mra[i].mr_start;
364		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
365		physsz += mra[i].mr_size;
366	}
367	physmem = btoc(physsz);
368
369	/*
370	 * Calculate the size of kernel virtual memory, and the size and mask
371	 * for the kernel TSB based on the phsyical memory size but limited
372	 * by the amount of dTLB slots available for locked entries if we have
373	 * to lock the TSB in the TLB (given that for spitfire-class CPUs all
374	 * of the dt64 slots can hold locked entries but there is no large
375	 * dTLB for unlocked ones, we don't use more than half of it for the
376	 * TSB).
377	 * Note that for reasons unknown OpenSolaris doesn't take advantage of
378	 * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
379	 * public documentation is available for these, the latter just might
380	 * not support it, yet.
381	 */
382	if (cpu_impl == CPU_IMPL_SPARC64V ||
383	    cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
384		tsb_kernel_ldd_phys = 1;
385		virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
386		    (PAGE_SHIFT - TTE_SHIFT));
387	} else {
388		dtlb_slots_avail = 0;
389		for (i = 0; i < dtlb_slots; i++) {
390			data = dtlb_get_data(cpu_impl ==
391			    CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
392			    TLB_DAR_T32, i);
393			if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
394				dtlb_slots_avail++;
395		}
396#ifdef SMP
397		dtlb_slots_avail -= PCPU_PAGES;
398#endif
399		if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
400		    cpu_impl < CPU_IMPL_ULTRASPARCIII)
401			dtlb_slots_avail /= 2;
402		virtsz = roundup(physsz, PAGE_SIZE_4M <<
403		    (PAGE_SHIFT - TTE_SHIFT));
404		virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
405		    (PAGE_SHIFT - TTE_SHIFT));
406	}
407	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
408	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
409	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
410
411	/*
412	 * Allocate the kernel TSB and lock it in the TLB if necessary.
413	 */
414	pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
415	if (pa & PAGE_MASK_4M)
416		OF_panic("%s: TSB unaligned", __func__);
417	tsb_kernel_phys = pa;
418	if (tsb_kernel_ldd_phys == 0) {
419		tsb_kernel =
420		    (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
421		pmap_map_tsb();
422		bzero(tsb_kernel, tsb_kernel_size);
423	} else {
424		tsb_kernel =
425		    (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
426		aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
427	}
428
429	/*
430	 * Allocate and map the dynamic per-CPU area for the BSP.
431	 */
432	pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
433	dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
434
435	/*
436	 * Allocate and map the message buffer.
437	 */
438	pa = pmap_bootstrap_alloc(msgbufsize, colors);
439	msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
440
441	/*
442	 * Patch the TSB addresses and mask as well as the ASIs used to load
443	 * it into the trap table.
444	 */
445
446#define	LDDA_R_I_R(rd, imm_asi, rs1, rs2)				\
447	(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |	\
448	    EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |	\
449	    EIF_F3_RS2(rs2))
450#define	OR_R_I_R(rd, imm13, rs1)					\
451	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |	\
452	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
453#define	SETHI(rd, imm22)						\
454	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |	\
455	    EIF_IMM((imm22) >> 10, 22))
456#define	WR_R_I(rd, imm13, rs1)						\
457	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |	\
458	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
459
460#define	PATCH_ASI(addr, asi) do {					\
461	if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,			\
462	    IF_F3_RS1(addr[0])))					\
463		OF_panic("%s: patched instructions have changed",	\
464		    __func__);						\
465	addr[0] |= EIF_IMM((asi), 13);					\
466	flush(addr);							\
467} while (0)
468
469#define	PATCH_LDD(addr, asi) do {					\
470	if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,		\
471	    IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))			\
472		OF_panic("%s: patched instructions have changed",	\
473		    __func__);						\
474	addr[0] |= EIF_F3_IMM_ASI(asi);					\
475	flush(addr);							\
476} while (0)
477
478#define	PATCH_TSB(addr, val) do {					\
479	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
480	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
481	    IF_F3_RS1(addr[1]))	||					\
482	    addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))			\
483		OF_panic("%s: patched instructions have changed",	\
484		    __func__);						\
485	addr[0] |= EIF_IMM((val) >> 42, 22);				\
486	addr[1] |= EIF_IMM((val) >> 32, 10);				\
487	addr[3] |= EIF_IMM((val) >> 10, 22);				\
488	flush(addr);							\
489	flush(addr + 1);						\
490	flush(addr + 3);						\
491} while (0)
492
493#define	PATCH_TSB_MASK(addr, val) do {					\
494	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
495	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
496	    IF_F3_RS1(addr[1])))					\
497		OF_panic("%s: patched instructions have changed",	\
498		    __func__);						\
499	addr[0] |= EIF_IMM((val) >> 10, 22);				\
500	addr[1] |= EIF_IMM((val), 10);					\
501	flush(addr);							\
502	flush(addr + 1);						\
503} while (0)
504
505	if (tsb_kernel_ldd_phys == 0) {
506		asi = ASI_N;
507		ldd = ASI_NUCLEUS_QUAD_LDD;
508		off = (vm_offset_t)tsb_kernel;
509	} else {
510		asi = ASI_PHYS_USE_EC;
511		ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
512		off = (vm_offset_t)tsb_kernel_phys;
513	}
514	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
515	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
516	    tsb_kernel_phys + tsb_kernel_size - 1);
517	PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
518	PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
519	PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
520	PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
521	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
522	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
523	PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
524	PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
525	PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
526	PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
527	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
528	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
529	PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
530	PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
531	PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
532	PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
533	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
534	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
535
536	/*
537	 * Enter fake 8k pages for the 4MB kernel pages, so that
538	 * pmap_kextract() will work for them.
539	 */
540	for (i = 0; i < kernel_tlb_slots; i++) {
541		pa = kernel_tlbs[i].te_pa;
542		va = kernel_tlbs[i].te_va;
543		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
544			tp = tsb_kvtotte(va + off);
545			vpn = TV_VPN(va + off, TS_8K);
546			data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
547			    TD_SW | TD_CP | TD_CV | TD_P | TD_W;
548			pmap_bootstrap_set_tte(tp, vpn, data);
549		}
550	}
551
552	/*
553	 * Set the start and end of KVA.  The kernel is loaded starting
554	 * at the first available 4MB super page, so we advance to the
555	 * end of the last one used for it.
556	 */
557	virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
558	virtual_end = vm_max_kernel_address;
559	kernel_vm_end = vm_max_kernel_address;
560
561	/*
562	 * Allocate kva space for temporary mappings.
563	 */
564	pmap_idle_map = virtual_avail;
565	virtual_avail += PAGE_SIZE * colors;
566	pmap_temp_map_1 = virtual_avail;
567	virtual_avail += PAGE_SIZE * colors;
568	pmap_temp_map_2 = virtual_avail;
569	virtual_avail += PAGE_SIZE * colors;
570
571	/*
572	 * Allocate a kernel stack with guard page for thread0 and map it
573	 * into the kernel TSB.  We must ensure that the virtual address is
574	 * colored properly for corresponding CPUs, since we're allocating
575	 * from phys_avail so the memory won't have an associated vm_page_t.
576	 */
577	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
578	kstack0_phys = pa;
579	virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
580	kstack0 = virtual_avail;
581	virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
582	if (dcache_color_ignore == 0)
583		KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
584		    ("pmap_bootstrap: kstack0 miscolored"));
585	for (i = 0; i < KSTACK_PAGES; i++) {
586		pa = kstack0_phys + i * PAGE_SIZE;
587		va = kstack0 + i * PAGE_SIZE;
588		tp = tsb_kvtotte(va);
589		vpn = TV_VPN(va, TS_8K);
590		data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
591		    TD_CV | TD_P | TD_W;
592		pmap_bootstrap_set_tte(tp, vpn, data);
593	}
594
595	/*
596	 * Calculate the last available physical address.
597	 */
598	for (i = 0; phys_avail[i + 2] != 0; i += 2)
599		;
600	Maxmem = sparc64_btop(phys_avail[i + 1]);
601
602	/*
603	 * Add the PROM mappings to the kernel TSB.
604	 */
605	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
606		OF_panic("%s: finddevice /virtual-memory", __func__);
607	if ((sz = OF_getproplen(vmem, "translations")) == -1)
608		OF_panic("%s: getproplen translations", __func__);
609	if (sizeof(translations) < sz)
610		OF_panic("%s: translations too small", __func__);
611	bzero(translations, sz);
612	if (OF_getprop(vmem, "translations", translations, sz) == -1)
613		OF_panic("%s: getprop /virtual-memory/translations",
614		    __func__);
615	sz /= sizeof(*translations);
616	translations_size = sz;
617	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
618	qsort(translations, sz, sizeof (*translations), om_cmp);
619	for (i = 0; i < sz; i++) {
620		CTR3(KTR_PMAP,
621		    "translation: start=%#lx size=%#lx tte=%#lx",
622		    translations[i].om_start, translations[i].om_size,
623		    translations[i].om_tte);
624		if ((translations[i].om_tte & TD_V) == 0)
625			continue;
626		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
627		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
628			continue;
629		for (off = 0; off < translations[i].om_size;
630		    off += PAGE_SIZE) {
631			va = translations[i].om_start + off;
632			tp = tsb_kvtotte(va);
633			vpn = TV_VPN(va, TS_8K);
634			data = ((translations[i].om_tte &
635			    ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
636			    (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
637			    cpu_impl < CPU_IMPL_ULTRASPARCIII ?
638			    (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
639			    (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
640			    (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
641			    off;
642			pmap_bootstrap_set_tte(tp, vpn, data);
643		}
644	}
645
646	/*
647	 * Get the available physical memory ranges from /memory/reg.  These
648	 * are only used for kernel dumps, but it may not be wise to do PROM
649	 * calls in that situation.
650	 */
651	if ((sz = OF_getproplen(pmem, "reg")) == -1)
652		OF_panic("%s: getproplen /memory/reg", __func__);
653	if (sizeof(sparc64_memreg) < sz)
654		OF_panic("%s: sparc64_memreg too small", __func__);
655	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
656		OF_panic("%s: getprop /memory/reg", __func__);
657	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
658
659	/*
660	 * Initialize the kernel pmap (which is statically allocated).
661	 */
662	pm = kernel_pmap;
663	PMAP_LOCK_INIT(pm);
664	for (i = 0; i < MAXCPU; i++)
665		pm->pm_context[i] = TLB_CTX_KERNEL;
666	CPU_FILL(&pm->pm_active);
667
668	/*
669	 * Initialize the global tte list lock, which is more commonly
670	 * known as the pmap pv global lock.
671	 */
672	rw_init(&tte_list_global_lock, "pmap pv global");
673
674	/*
675	 * Flush all non-locked TLB entries possibly left over by the
676	 * firmware.
677	 */
678	tlb_flush_nonlocked();
679}
680
681/*
682 * Map the 4MB kernel TSB pages.
683 */
684void
685pmap_map_tsb(void)
686{
687	vm_offset_t va;
688	vm_paddr_t pa;
689	u_long data;
690	int i;
691
692	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
693		va = (vm_offset_t)tsb_kernel + i;
694		pa = tsb_kernel_phys + i;
695		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
696		    TD_P | TD_W;
697		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
698		    TLB_TAR_CTX(TLB_CTX_KERNEL));
699		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
700	}
701}
702
703/*
704 * Set the secondary context to be the kernel context (needed for FP block
705 * operations in the kernel).
706 */
707void
708pmap_set_kctx(void)
709{
710
711	stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
712	    TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
713	flush(KERNBASE);
714}
715
716/*
717 * Allocate a physical page of memory directly from the phys_avail map.
718 * Can only be called from pmap_bootstrap before avail start and end are
719 * calculated.
720 */
721static vm_paddr_t
722pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
723{
724	vm_paddr_t pa;
725	int i;
726
727	size = roundup(size, PAGE_SIZE * colors);
728	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
729		if (phys_avail[i + 1] - phys_avail[i] < size)
730			continue;
731		pa = phys_avail[i];
732		phys_avail[i] += size;
733		return (pa);
734	}
735	OF_panic("%s: no suitable region found", __func__);
736}
737
738/*
739 * Set a TTE.  This function is intended as a helper when tsb_kernel is
740 * direct-mapped but we haven't taken over the trap table, yet, as it's the
741 * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
742 * the kernel TSB.
743 */
744void
745pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
746{
747
748	if (tsb_kernel_ldd_phys == 0) {
749		tp->tte_vpn = vpn;
750		tp->tte_data = data;
751	} else {
752		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
753		    ASI_PHYS_USE_EC, vpn);
754		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
755		    ASI_PHYS_USE_EC, data);
756	}
757}
758
759/*
760 * Initialize a vm_page's machine-dependent fields.
761 */
762void
763pmap_page_init(vm_page_t m)
764{
765
766	TAILQ_INIT(&m->md.tte_list);
767	m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
768	m->md.pmap = NULL;
769}
770
771/*
772 * Initialize the pmap module.
773 */
774void
775pmap_init(void)
776{
777	vm_offset_t addr;
778	vm_size_t size;
779	int result;
780	int i;
781
782	for (i = 0; i < translations_size; i++) {
783		addr = translations[i].om_start;
784		size = translations[i].om_size;
785		if ((translations[i].om_tte & TD_V) == 0)
786			continue;
787		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
788			continue;
789		result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
790		    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
791		if (result != KERN_SUCCESS || addr != translations[i].om_start)
792			panic("pmap_init: vm_map_find");
793	}
794}
795
796/*
797 * Extract the physical page address associated with the given
798 * map/virtual_address pair.
799 */
800vm_paddr_t
801pmap_extract(pmap_t pm, vm_offset_t va)
802{
803	struct tte *tp;
804	vm_paddr_t pa;
805
806	if (pm == kernel_pmap)
807		return (pmap_kextract(va));
808	PMAP_LOCK(pm);
809	tp = tsb_tte_lookup(pm, va);
810	if (tp == NULL)
811		pa = 0;
812	else
813		pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
814	PMAP_UNLOCK(pm);
815	return (pa);
816}
817
818/*
819 * Atomically extract and hold the physical page with the given
820 * pmap and virtual address pair if that mapping permits the given
821 * protection.
822 */
823vm_page_t
824pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
825{
826	struct tte *tp;
827	vm_page_t m;
828	vm_paddr_t pa;
829
830	m = NULL;
831	pa = 0;
832	PMAP_LOCK(pm);
833retry:
834	if (pm == kernel_pmap) {
835		if (va >= VM_MIN_DIRECT_ADDRESS) {
836			tp = NULL;
837			m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
838			(void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
839			    &pa);
840			vm_page_hold(m);
841		} else {
842			tp = tsb_kvtotte(va);
843			if ((tp->tte_data & TD_V) == 0)
844				tp = NULL;
845		}
846	} else
847		tp = tsb_tte_lookup(pm, va);
848	if (tp != NULL && ((tp->tte_data & TD_SW) ||
849	    (prot & VM_PROT_WRITE) == 0)) {
850		if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
851			goto retry;
852		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
853		vm_page_hold(m);
854	}
855	PA_UNLOCK_COND(pa);
856	PMAP_UNLOCK(pm);
857	return (m);
858}
859
860/*
861 * Extract the physical page address associated with the given kernel virtual
862 * address.
863 */
864vm_paddr_t
865pmap_kextract(vm_offset_t va)
866{
867	struct tte *tp;
868
869	if (va >= VM_MIN_DIRECT_ADDRESS)
870		return (TLB_DIRECT_TO_PHYS(va));
871	tp = tsb_kvtotte(va);
872	if ((tp->tte_data & TD_V) == 0)
873		return (0);
874	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
875}
876
877int
878pmap_cache_enter(vm_page_t m, vm_offset_t va)
879{
880	struct tte *tp;
881	int color;
882
883	rw_assert(&tte_list_global_lock, RA_WLOCKED);
884	KASSERT((m->flags & PG_FICTITIOUS) == 0,
885	    ("pmap_cache_enter: fake page"));
886	PMAP_STATS_INC(pmap_ncache_enter);
887
888	if (dcache_color_ignore != 0)
889		return (1);
890
891	/*
892	 * Find the color for this virtual address and note the added mapping.
893	 */
894	color = DCACHE_COLOR(va);
895	m->md.colors[color]++;
896
897	/*
898	 * If all existing mappings have the same color, the mapping is
899	 * cacheable.
900	 */
901	if (m->md.color == color) {
902		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
903		    ("pmap_cache_enter: cacheable, mappings of other color"));
904		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
905			PMAP_STATS_INC(pmap_ncache_enter_c);
906		else
907			PMAP_STATS_INC(pmap_ncache_enter_oc);
908		return (1);
909	}
910
911	/*
912	 * If there are no mappings of the other color, and the page still has
913	 * the wrong color, this must be a new mapping.  Change the color to
914	 * match the new mapping, which is cacheable.  We must flush the page
915	 * from the cache now.
916	 */
917	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
918		KASSERT(m->md.colors[color] == 1,
919		    ("pmap_cache_enter: changing color, not new mapping"));
920		dcache_page_inval(VM_PAGE_TO_PHYS(m));
921		m->md.color = color;
922		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
923			PMAP_STATS_INC(pmap_ncache_enter_cc);
924		else
925			PMAP_STATS_INC(pmap_ncache_enter_coc);
926		return (1);
927	}
928
929	/*
930	 * If the mapping is already non-cacheable, just return.
931	 */
932	if (m->md.color == -1) {
933		PMAP_STATS_INC(pmap_ncache_enter_nc);
934		return (0);
935	}
936
937	PMAP_STATS_INC(pmap_ncache_enter_cnc);
938
939	/*
940	 * Mark all mappings as uncacheable, flush any lines with the other
941	 * color out of the dcache, and set the color to none (-1).
942	 */
943	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
944		atomic_clear_long(&tp->tte_data, TD_CV);
945		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
946	}
947	dcache_page_inval(VM_PAGE_TO_PHYS(m));
948	m->md.color = -1;
949	return (0);
950}
951
952static void
953pmap_cache_remove(vm_page_t m, vm_offset_t va)
954{
955	struct tte *tp;
956	int color;
957
958	rw_assert(&tte_list_global_lock, RA_WLOCKED);
959	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
960	    m->md.colors[DCACHE_COLOR(va)]);
961	KASSERT((m->flags & PG_FICTITIOUS) == 0,
962	    ("pmap_cache_remove: fake page"));
963	PMAP_STATS_INC(pmap_ncache_remove);
964
965	if (dcache_color_ignore != 0)
966		return;
967
968	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
969	    ("pmap_cache_remove: no mappings %d <= 0",
970	    m->md.colors[DCACHE_COLOR(va)]));
971
972	/*
973	 * Find the color for this virtual address and note the removal of
974	 * the mapping.
975	 */
976	color = DCACHE_COLOR(va);
977	m->md.colors[color]--;
978
979	/*
980	 * If the page is cacheable, just return and keep the same color, even
981	 * if there are no longer any mappings.
982	 */
983	if (m->md.color != -1) {
984		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
985			PMAP_STATS_INC(pmap_ncache_remove_c);
986		else
987			PMAP_STATS_INC(pmap_ncache_remove_oc);
988		return;
989	}
990
991	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
992	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
993
994	/*
995	 * If the page is not cacheable (color is -1), and the number of
996	 * mappings for this color is not zero, just return.  There are
997	 * mappings of the other color still, so remain non-cacheable.
998	 */
999	if (m->md.colors[color] != 0) {
1000		PMAP_STATS_INC(pmap_ncache_remove_nc);
1001		return;
1002	}
1003
1004	/*
1005	 * The number of mappings for this color is now zero.  Recache the
1006	 * other colored mappings, and change the page color to the other
1007	 * color.  There should be no lines in the data cache for this page,
1008	 * so flushing should not be needed.
1009	 */
1010	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1011		atomic_set_long(&tp->tte_data, TD_CV);
1012		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
1013	}
1014	m->md.color = DCACHE_OTHER_COLOR(color);
1015
1016	if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
1017		PMAP_STATS_INC(pmap_ncache_remove_cc);
1018	else
1019		PMAP_STATS_INC(pmap_ncache_remove_coc);
1020}
1021
1022/*
1023 * Map a wired page into kernel virtual address space.
1024 */
1025void
1026pmap_kenter(vm_offset_t va, vm_page_t m)
1027{
1028	vm_offset_t ova;
1029	struct tte *tp;
1030	vm_page_t om;
1031	u_long data;
1032
1033	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1034	PMAP_STATS_INC(pmap_nkenter);
1035	tp = tsb_kvtotte(va);
1036	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
1037	    va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
1038	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
1039		CTR5(KTR_SPARE2,
1040	"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
1041		    va, VM_PAGE_TO_PHYS(m), m->object,
1042		    m->object ? m->object->type : -1,
1043		    m->pindex);
1044		PMAP_STATS_INC(pmap_nkenter_oc);
1045	}
1046	if ((tp->tte_data & TD_V) != 0) {
1047		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1048		ova = TTE_GET_VA(tp);
1049		if (m == om && va == ova) {
1050			PMAP_STATS_INC(pmap_nkenter_stupid);
1051			return;
1052		}
1053		TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
1054		pmap_cache_remove(om, ova);
1055		if (va != ova)
1056			tlb_page_demap(kernel_pmap, ova);
1057	}
1058	data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
1059	    TD_P | TD_W;
1060	if (pmap_cache_enter(m, va) != 0)
1061		data |= TD_CV;
1062	tp->tte_vpn = TV_VPN(va, TS_8K);
1063	tp->tte_data = data;
1064	TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
1065}
1066
1067/*
1068 * Map a wired page into kernel virtual address space.  This additionally
1069 * takes a flag argument which is or'ed to the TTE data.  This is used by
1070 * sparc64_bus_mem_map().
1071 * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
1072 * to flush entries that might still be in the cache, if applicable.
1073 */
1074void
1075pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
1076{
1077	struct tte *tp;
1078
1079	tp = tsb_kvtotte(va);
1080	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
1081	    va, pa, tp, tp->tte_data);
1082	tp->tte_vpn = TV_VPN(va, TS_8K);
1083	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
1084}
1085
1086/*
1087 * Remove a wired page from kernel virtual address space.
1088 */
1089void
1090pmap_kremove(vm_offset_t va)
1091{
1092	struct tte *tp;
1093	vm_page_t m;
1094
1095	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1096	PMAP_STATS_INC(pmap_nkremove);
1097	tp = tsb_kvtotte(va);
1098	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
1099	    tp->tte_data);
1100	if ((tp->tte_data & TD_V) == 0)
1101		return;
1102	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1103	TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1104	pmap_cache_remove(m, va);
1105	TTE_ZERO(tp);
1106}
1107
1108/*
1109 * Inverse of pmap_kenter_flags, used by bus_space_unmap().
1110 */
1111void
1112pmap_kremove_flags(vm_offset_t va)
1113{
1114	struct tte *tp;
1115
1116	tp = tsb_kvtotte(va);
1117	CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
1118	    tp->tte_data);
1119	TTE_ZERO(tp);
1120}
1121
1122/*
1123 * Map a range of physical addresses into kernel virtual address space.
1124 *
1125 * The value passed in *virt is a suggested virtual address for the mapping.
1126 * Architectures which can support a direct-mapped physical to virtual region
1127 * can return the appropriate address within that region, leaving '*virt'
1128 * unchanged.
1129 */
1130vm_offset_t
1131pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
1132{
1133
1134	return (TLB_PHYS_TO_DIRECT(start));
1135}
1136
1137/*
1138 * Map a list of wired pages into kernel virtual address space.  This is
1139 * intended for temporary mappings which do not need page modification or
1140 * references recorded.  Existing mappings in the region are overwritten.
1141 */
1142void
1143pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
1144{
1145	vm_offset_t va;
1146
1147	PMAP_STATS_INC(pmap_nqenter);
1148	va = sva;
1149	rw_wlock(&tte_list_global_lock);
1150	while (count-- > 0) {
1151		pmap_kenter(va, *m);
1152		va += PAGE_SIZE;
1153		m++;
1154	}
1155	rw_wunlock(&tte_list_global_lock);
1156	tlb_range_demap(kernel_pmap, sva, va);
1157}
1158
1159/*
1160 * Remove page mappings from kernel virtual address space.  Intended for
1161 * temporary mappings entered by pmap_qenter.
1162 */
1163void
1164pmap_qremove(vm_offset_t sva, int count)
1165{
1166	vm_offset_t va;
1167
1168	PMAP_STATS_INC(pmap_nqremove);
1169	va = sva;
1170	rw_wlock(&tte_list_global_lock);
1171	while (count-- > 0) {
1172		pmap_kremove(va);
1173		va += PAGE_SIZE;
1174	}
1175	rw_wunlock(&tte_list_global_lock);
1176	tlb_range_demap(kernel_pmap, sva, va);
1177}
1178
1179/*
1180 * Initialize the pmap associated with process 0.
1181 */
1182void
1183pmap_pinit0(pmap_t pm)
1184{
1185	int i;
1186
1187	PMAP_LOCK_INIT(pm);
1188	for (i = 0; i < MAXCPU; i++)
1189		pm->pm_context[i] = TLB_CTX_KERNEL;
1190	CPU_ZERO(&pm->pm_active);
1191	pm->pm_tsb = NULL;
1192	pm->pm_tsb_obj = NULL;
1193	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1194}
1195
1196/*
1197 * Initialize a preallocated and zeroed pmap structure, such as one in a
1198 * vmspace structure.
1199 */
1200int
1201pmap_pinit(pmap_t pm)
1202{
1203	vm_page_t ma[TSB_PAGES];
1204	vm_page_t m;
1205	int i;
1206
1207	/*
1208	 * Allocate KVA space for the TSB.
1209	 */
1210	if (pm->pm_tsb == NULL) {
1211		pm->pm_tsb = (struct tte *)kva_alloc(TSB_BSIZE);
1212		if (pm->pm_tsb == NULL)
1213			return (0);
1214		}
1215
1216	/*
1217	 * Allocate an object for it.
1218	 */
1219	if (pm->pm_tsb_obj == NULL)
1220		pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
1221
1222	for (i = 0; i < MAXCPU; i++)
1223		pm->pm_context[i] = -1;
1224	CPU_ZERO(&pm->pm_active);
1225
1226	VM_OBJECT_WLOCK(pm->pm_tsb_obj);
1227	for (i = 0; i < TSB_PAGES; i++) {
1228		m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
1229		    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1230		m->valid = VM_PAGE_BITS_ALL;
1231		m->md.pmap = pm;
1232		ma[i] = m;
1233	}
1234	VM_OBJECT_WUNLOCK(pm->pm_tsb_obj);
1235	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
1236
1237	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
1238	return (1);
1239}
1240
1241/*
1242 * Release any resources held by the given physical map.
1243 * Called when a pmap initialized by pmap_pinit is being released.
1244 * Should only be called if the map contains no valid mappings.
1245 */
1246void
1247pmap_release(pmap_t pm)
1248{
1249	vm_object_t obj;
1250	vm_page_t m;
1251#ifdef SMP
1252	struct pcpu *pc;
1253#endif
1254
1255	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
1256	    pm->pm_context[curcpu], pm->pm_tsb);
1257	KASSERT(pmap_resident_count(pm) == 0,
1258	    ("pmap_release: resident pages %ld != 0",
1259	    pmap_resident_count(pm)));
1260
1261	/*
1262	 * After the pmap was freed, it might be reallocated to a new process.
1263	 * When switching, this might lead us to wrongly assume that we need
1264	 * not switch contexts because old and new pmap pointer are equal.
1265	 * Therefore, make sure that this pmap is not referenced by any PCPU
1266	 * pointer any more.  This could happen in two cases:
1267	 * - A process that referenced the pmap is currently exiting on a CPU.
1268	 *   However, it is guaranteed to not switch in any more after setting
1269	 *   its state to PRS_ZOMBIE.
1270	 * - A process that referenced this pmap ran on a CPU, but we switched
1271	 *   to a kernel thread, leaving the pmap pointer unchanged.
1272	 */
1273#ifdef SMP
1274	sched_pin();
1275	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
1276		atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap,
1277		    (uintptr_t)pm, (uintptr_t)NULL);
1278	sched_unpin();
1279#else
1280	critical_enter();
1281	if (PCPU_GET(pmap) == pm)
1282		PCPU_SET(pmap, NULL);
1283	critical_exit();
1284#endif
1285
1286	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
1287	obj = pm->pm_tsb_obj;
1288	VM_OBJECT_WLOCK(obj);
1289	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
1290	while (!TAILQ_EMPTY(&obj->memq)) {
1291		m = TAILQ_FIRST(&obj->memq);
1292		m->md.pmap = NULL;
1293		m->wire_count--;
1294		atomic_subtract_int(&cnt.v_wire_count, 1);
1295		vm_page_free_zero(m);
1296	}
1297	VM_OBJECT_WUNLOCK(obj);
1298}
1299
1300/*
1301 * Grow the number of kernel page table entries.  Unneeded.
1302 */
1303void
1304pmap_growkernel(vm_offset_t addr)
1305{
1306
1307	panic("pmap_growkernel: can't grow kernel");
1308}
1309
1310int
1311pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1312    vm_offset_t va)
1313{
1314	vm_page_t m;
1315	u_long data;
1316
1317	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1318	data = atomic_readandclear_long(&tp->tte_data);
1319	if ((data & TD_FAKE) == 0) {
1320		m = PHYS_TO_VM_PAGE(TD_PA(data));
1321		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1322		if ((data & TD_WIRED) != 0)
1323			pm->pm_stats.wired_count--;
1324		if ((data & TD_PV) != 0) {
1325			if ((data & TD_W) != 0)
1326				vm_page_dirty(m);
1327			if ((data & TD_REF) != 0)
1328				vm_page_aflag_set(m, PGA_REFERENCED);
1329			if (TAILQ_EMPTY(&m->md.tte_list))
1330				vm_page_aflag_clear(m, PGA_WRITEABLE);
1331			pm->pm_stats.resident_count--;
1332		}
1333		pmap_cache_remove(m, va);
1334	}
1335	TTE_ZERO(tp);
1336	if (PMAP_REMOVE_DONE(pm))
1337		return (0);
1338	return (1);
1339}
1340
1341/*
1342 * Remove the given range of addresses from the specified map.
1343 */
1344void
1345pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
1346{
1347	struct tte *tp;
1348	vm_offset_t va;
1349
1350	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
1351	    pm->pm_context[curcpu], start, end);
1352	if (PMAP_REMOVE_DONE(pm))
1353		return;
1354	rw_wlock(&tte_list_global_lock);
1355	PMAP_LOCK(pm);
1356	if (end - start > PMAP_TSB_THRESH) {
1357		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
1358		tlb_context_demap(pm);
1359	} else {
1360		for (va = start; va < end; va += PAGE_SIZE)
1361			if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
1362			    !pmap_remove_tte(pm, NULL, tp, va))
1363				break;
1364		tlb_range_demap(pm, start, end - 1);
1365	}
1366	PMAP_UNLOCK(pm);
1367	rw_wunlock(&tte_list_global_lock);
1368}
1369
1370void
1371pmap_remove_all(vm_page_t m)
1372{
1373	struct pmap *pm;
1374	struct tte *tpn;
1375	struct tte *tp;
1376	vm_offset_t va;
1377
1378	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1379	    ("pmap_remove_all: page %p is not managed", m));
1380	rw_wlock(&tte_list_global_lock);
1381	for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
1382		tpn = TAILQ_NEXT(tp, tte_link);
1383		if ((tp->tte_data & TD_PV) == 0)
1384			continue;
1385		pm = TTE_GET_PMAP(tp);
1386		va = TTE_GET_VA(tp);
1387		PMAP_LOCK(pm);
1388		if ((tp->tte_data & TD_WIRED) != 0)
1389			pm->pm_stats.wired_count--;
1390		if ((tp->tte_data & TD_REF) != 0)
1391			vm_page_aflag_set(m, PGA_REFERENCED);
1392		if ((tp->tte_data & TD_W) != 0)
1393			vm_page_dirty(m);
1394		tp->tte_data &= ~TD_V;
1395		tlb_page_demap(pm, va);
1396		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
1397		pm->pm_stats.resident_count--;
1398		pmap_cache_remove(m, va);
1399		TTE_ZERO(tp);
1400		PMAP_UNLOCK(pm);
1401	}
1402	vm_page_aflag_clear(m, PGA_WRITEABLE);
1403	rw_wunlock(&tte_list_global_lock);
1404}
1405
1406static int
1407pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
1408    vm_offset_t va)
1409{
1410	u_long data;
1411	vm_page_t m;
1412
1413	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1414	data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
1415	if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
1416		m = PHYS_TO_VM_PAGE(TD_PA(data));
1417		vm_page_dirty(m);
1418	}
1419	return (1);
1420}
1421
1422/*
1423 * Set the physical protection on the specified range of this map as requested.
1424 */
1425void
1426pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1427{
1428	vm_offset_t va;
1429	struct tte *tp;
1430
1431	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
1432	    pm->pm_context[curcpu], sva, eva, prot);
1433
1434	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1435		pmap_remove(pm, sva, eva);
1436		return;
1437	}
1438
1439	if (prot & VM_PROT_WRITE)
1440		return;
1441
1442	PMAP_LOCK(pm);
1443	if (eva - sva > PMAP_TSB_THRESH) {
1444		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
1445		tlb_context_demap(pm);
1446	} else {
1447		for (va = sva; va < eva; va += PAGE_SIZE)
1448			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
1449				pmap_protect_tte(pm, NULL, tp, va);
1450		tlb_range_demap(pm, sva, eva - 1);
1451	}
1452	PMAP_UNLOCK(pm);
1453}
1454
1455/*
1456 * Map the given physical page at the specified virtual address in the
1457 * target pmap with the protection requested.  If specified the page
1458 * will be wired down.
1459 */
1460int
1461pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1462    u_int flags, int8_t psind)
1463{
1464	int rv;
1465
1466	rw_wlock(&tte_list_global_lock);
1467	PMAP_LOCK(pm);
1468	rv = pmap_enter_locked(pm, va, m, prot, flags, psind);
1469	rw_wunlock(&tte_list_global_lock);
1470	PMAP_UNLOCK(pm);
1471	return (rv);
1472}
1473
1474/*
1475 * Map the given physical page at the specified virtual address in the
1476 * target pmap with the protection requested.  If specified the page
1477 * will be wired down.
1478 *
1479 * The page queues and pmap must be locked.
1480 */
1481static int
1482pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1483    u_int flags, int8_t psind __unused)
1484{
1485	struct tte *tp;
1486	vm_paddr_t pa;
1487	vm_page_t real;
1488	u_long data;
1489	boolean_t wired;
1490
1491	rw_assert(&tte_list_global_lock, RA_WLOCKED);
1492	PMAP_LOCK_ASSERT(pm, MA_OWNED);
1493	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1494		VM_OBJECT_ASSERT_LOCKED(m->object);
1495	PMAP_STATS_INC(pmap_nenter);
1496	pa = VM_PAGE_TO_PHYS(m);
1497	wired = (flags & PMAP_ENTER_WIRED) != 0;
1498
1499	/*
1500	 * If this is a fake page from the device_pager, but it covers actual
1501	 * physical memory, convert to the real backing page.
1502	 */
1503	if ((m->flags & PG_FICTITIOUS) != 0) {
1504		real = vm_phys_paddr_to_vm_page(pa);
1505		if (real != NULL)
1506			m = real;
1507	}
1508
1509	CTR6(KTR_PMAP,
1510	    "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
1511	    pm->pm_context[curcpu], m, va, pa, prot, wired);
1512
1513	/*
1514	 * If there is an existing mapping, and the physical address has not
1515	 * changed, must be protection or wiring change.
1516	 */
1517	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
1518		CTR0(KTR_PMAP, "pmap_enter_locked: update");
1519		PMAP_STATS_INC(pmap_nenter_update);
1520
1521		/*
1522		 * Wiring change, just update stats.
1523		 */
1524		if (wired) {
1525			if ((tp->tte_data & TD_WIRED) == 0) {
1526				tp->tte_data |= TD_WIRED;
1527				pm->pm_stats.wired_count++;
1528			}
1529		} else {
1530			if ((tp->tte_data & TD_WIRED) != 0) {
1531				tp->tte_data &= ~TD_WIRED;
1532				pm->pm_stats.wired_count--;
1533			}
1534		}
1535
1536		/*
1537		 * Save the old bits and clear the ones we're interested in.
1538		 */
1539		data = tp->tte_data;
1540		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
1541
1542		/*
1543		 * If we're turning off write permissions, sense modify status.
1544		 */
1545		if ((prot & VM_PROT_WRITE) != 0) {
1546			tp->tte_data |= TD_SW;
1547			if (wired)
1548				tp->tte_data |= TD_W;
1549			if ((m->oflags & VPO_UNMANAGED) == 0)
1550				vm_page_aflag_set(m, PGA_WRITEABLE);
1551		} else if ((data & TD_W) != 0)
1552			vm_page_dirty(m);
1553
1554		/*
1555		 * If we're turning on execute permissions, flush the icache.
1556		 */
1557		if ((prot & VM_PROT_EXECUTE) != 0) {
1558			if ((data & TD_EXEC) == 0)
1559				icache_page_inval(pa);
1560			tp->tte_data |= TD_EXEC;
1561		}
1562
1563		/*
1564		 * Delete the old mapping.
1565		 */
1566		tlb_page_demap(pm, TTE_GET_VA(tp));
1567	} else {
1568		/*
1569		 * If there is an existing mapping, but its for a different
1570		 * physical address, delete the old mapping.
1571		 */
1572		if (tp != NULL) {
1573			CTR0(KTR_PMAP, "pmap_enter_locked: replace");
1574			PMAP_STATS_INC(pmap_nenter_replace);
1575			pmap_remove_tte(pm, NULL, tp, va);
1576			tlb_page_demap(pm, va);
1577		} else {
1578			CTR0(KTR_PMAP, "pmap_enter_locked: new");
1579			PMAP_STATS_INC(pmap_nenter_new);
1580		}
1581
1582		/*
1583		 * Now set up the data and install the new mapping.
1584		 */
1585		data = TD_V | TD_8K | TD_PA(pa);
1586		if (pm == kernel_pmap)
1587			data |= TD_P;
1588		if ((prot & VM_PROT_WRITE) != 0) {
1589			data |= TD_SW;
1590			if ((m->oflags & VPO_UNMANAGED) == 0)
1591				vm_page_aflag_set(m, PGA_WRITEABLE);
1592		}
1593		if (prot & VM_PROT_EXECUTE) {
1594			data |= TD_EXEC;
1595			icache_page_inval(pa);
1596		}
1597
1598		/*
1599		 * If its wired update stats.  We also don't need reference or
1600		 * modify tracking for wired mappings, so set the bits now.
1601		 */
1602		if (wired) {
1603			pm->pm_stats.wired_count++;
1604			data |= TD_REF | TD_WIRED;
1605			if ((prot & VM_PROT_WRITE) != 0)
1606				data |= TD_W;
1607		}
1608
1609		tsb_tte_enter(pm, m, va, TS_8K, data);
1610	}
1611
1612	return (KERN_SUCCESS);
1613}
1614
1615/*
1616 * Maps a sequence of resident pages belonging to the same object.
1617 * The sequence begins with the given page m_start.  This page is
1618 * mapped at the given virtual address start.  Each subsequent page is
1619 * mapped at a virtual address that is offset from start by the same
1620 * amount as the page is offset from m_start within the object.  The
1621 * last page in the sequence is the page with the largest offset from
1622 * m_start that can be mapped at a virtual address less than the given
1623 * virtual address end.  Not every virtual page between start and end
1624 * is mapped; only those for which a resident page exists with the
1625 * corresponding offset from m_start are mapped.
1626 */
1627void
1628pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1629    vm_page_t m_start, vm_prot_t prot)
1630{
1631	vm_page_t m;
1632	vm_pindex_t diff, psize;
1633
1634	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1635
1636	psize = atop(end - start);
1637	m = m_start;
1638	rw_wlock(&tte_list_global_lock);
1639	PMAP_LOCK(pm);
1640	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1641		pmap_enter_locked(pm, start + ptoa(diff), m, prot &
1642		    (VM_PROT_READ | VM_PROT_EXECUTE), 0, 0);
1643		m = TAILQ_NEXT(m, listq);
1644	}
1645	rw_wunlock(&tte_list_global_lock);
1646	PMAP_UNLOCK(pm);
1647}
1648
1649void
1650pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1651{
1652
1653	rw_wlock(&tte_list_global_lock);
1654	PMAP_LOCK(pm);
1655	pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1656	    0, 0);
1657	rw_wunlock(&tte_list_global_lock);
1658	PMAP_UNLOCK(pm);
1659}
1660
1661void
1662pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
1663    vm_pindex_t pindex, vm_size_t size)
1664{
1665
1666	VM_OBJECT_ASSERT_WLOCKED(object);
1667	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1668	    ("pmap_object_init_pt: non-device object"));
1669}
1670
1671/*
1672 * Change the wiring attribute for a map/virtual-address pair.
1673 * The mapping must already exist in the pmap.
1674 */
1675void
1676pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
1677{
1678	struct tte *tp;
1679	u_long data;
1680
1681	PMAP_LOCK(pm);
1682	if ((tp = tsb_tte_lookup(pm, va)) != NULL) {
1683		if (wired) {
1684			data = atomic_set_long(&tp->tte_data, TD_WIRED);
1685			if ((data & TD_WIRED) == 0)
1686				pm->pm_stats.wired_count++;
1687		} else {
1688			data = atomic_clear_long(&tp->tte_data, TD_WIRED);
1689			if ((data & TD_WIRED) != 0)
1690				pm->pm_stats.wired_count--;
1691		}
1692	}
1693	PMAP_UNLOCK(pm);
1694}
1695
1696static int
1697pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
1698    vm_offset_t va)
1699{
1700	vm_page_t m;
1701	u_long data;
1702
1703	if ((tp->tte_data & TD_FAKE) != 0)
1704		return (1);
1705	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
1706		data = tp->tte_data &
1707		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
1708		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
1709		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
1710	}
1711	return (1);
1712}
1713
1714void
1715pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
1716    vm_size_t len, vm_offset_t src_addr)
1717{
1718	struct tte *tp;
1719	vm_offset_t va;
1720
1721	if (dst_addr != src_addr)
1722		return;
1723	rw_wlock(&tte_list_global_lock);
1724	if (dst_pmap < src_pmap) {
1725		PMAP_LOCK(dst_pmap);
1726		PMAP_LOCK(src_pmap);
1727	} else {
1728		PMAP_LOCK(src_pmap);
1729		PMAP_LOCK(dst_pmap);
1730	}
1731	if (len > PMAP_TSB_THRESH) {
1732		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
1733		    pmap_copy_tte);
1734		tlb_context_demap(dst_pmap);
1735	} else {
1736		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
1737			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
1738				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
1739		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
1740	}
1741	rw_wunlock(&tte_list_global_lock);
1742	PMAP_UNLOCK(src_pmap);
1743	PMAP_UNLOCK(dst_pmap);
1744}
1745
1746void
1747pmap_zero_page(vm_page_t m)
1748{
1749	struct tte *tp;
1750	vm_offset_t va;
1751	vm_paddr_t pa;
1752
1753	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1754	    ("pmap_zero_page: fake page"));
1755	PMAP_STATS_INC(pmap_nzero_page);
1756	pa = VM_PAGE_TO_PHYS(m);
1757	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1758		PMAP_STATS_INC(pmap_nzero_page_c);
1759		va = TLB_PHYS_TO_DIRECT(pa);
1760		cpu_block_zero((void *)va, PAGE_SIZE);
1761	} else if (m->md.color == -1) {
1762		PMAP_STATS_INC(pmap_nzero_page_nc);
1763		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1764	} else {
1765		PMAP_STATS_INC(pmap_nzero_page_oc);
1766		PMAP_LOCK(kernel_pmap);
1767		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1768		tp = tsb_kvtotte(va);
1769		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1770		tp->tte_vpn = TV_VPN(va, TS_8K);
1771		cpu_block_zero((void *)va, PAGE_SIZE);
1772		tlb_page_demap(kernel_pmap, va);
1773		PMAP_UNLOCK(kernel_pmap);
1774	}
1775}
1776
1777void
1778pmap_zero_page_area(vm_page_t m, int off, int size)
1779{
1780	struct tte *tp;
1781	vm_offset_t va;
1782	vm_paddr_t pa;
1783
1784	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1785	    ("pmap_zero_page_area: fake page"));
1786	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
1787	PMAP_STATS_INC(pmap_nzero_page_area);
1788	pa = VM_PAGE_TO_PHYS(m);
1789	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1790		PMAP_STATS_INC(pmap_nzero_page_area_c);
1791		va = TLB_PHYS_TO_DIRECT(pa);
1792		bzero((void *)(va + off), size);
1793	} else if (m->md.color == -1) {
1794		PMAP_STATS_INC(pmap_nzero_page_area_nc);
1795		aszero(ASI_PHYS_USE_EC, pa + off, size);
1796	} else {
1797		PMAP_STATS_INC(pmap_nzero_page_area_oc);
1798		PMAP_LOCK(kernel_pmap);
1799		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
1800		tp = tsb_kvtotte(va);
1801		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1802		tp->tte_vpn = TV_VPN(va, TS_8K);
1803		bzero((void *)(va + off), size);
1804		tlb_page_demap(kernel_pmap, va);
1805		PMAP_UNLOCK(kernel_pmap);
1806	}
1807}
1808
1809void
1810pmap_zero_page_idle(vm_page_t m)
1811{
1812	struct tte *tp;
1813	vm_offset_t va;
1814	vm_paddr_t pa;
1815
1816	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1817	    ("pmap_zero_page_idle: fake page"));
1818	PMAP_STATS_INC(pmap_nzero_page_idle);
1819	pa = VM_PAGE_TO_PHYS(m);
1820	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
1821		PMAP_STATS_INC(pmap_nzero_page_idle_c);
1822		va = TLB_PHYS_TO_DIRECT(pa);
1823		cpu_block_zero((void *)va, PAGE_SIZE);
1824	} else if (m->md.color == -1) {
1825		PMAP_STATS_INC(pmap_nzero_page_idle_nc);
1826		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
1827	} else {
1828		PMAP_STATS_INC(pmap_nzero_page_idle_oc);
1829		va = pmap_idle_map + (m->md.color * PAGE_SIZE);
1830		tp = tsb_kvtotte(va);
1831		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
1832		tp->tte_vpn = TV_VPN(va, TS_8K);
1833		cpu_block_zero((void *)va, PAGE_SIZE);
1834		tlb_page_demap(kernel_pmap, va);
1835	}
1836}
1837
1838void
1839pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1840{
1841	vm_offset_t vdst;
1842	vm_offset_t vsrc;
1843	vm_paddr_t pdst;
1844	vm_paddr_t psrc;
1845	struct tte *tp;
1846
1847	KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
1848	    ("pmap_copy_page: fake dst page"));
1849	KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
1850	    ("pmap_copy_page: fake src page"));
1851	PMAP_STATS_INC(pmap_ncopy_page);
1852	pdst = VM_PAGE_TO_PHYS(mdst);
1853	psrc = VM_PAGE_TO_PHYS(msrc);
1854	if (dcache_color_ignore != 0 ||
1855	    (msrc->md.color == DCACHE_COLOR(psrc) &&
1856	    mdst->md.color == DCACHE_COLOR(pdst))) {
1857		PMAP_STATS_INC(pmap_ncopy_page_c);
1858		vdst = TLB_PHYS_TO_DIRECT(pdst);
1859		vsrc = TLB_PHYS_TO_DIRECT(psrc);
1860		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1861	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
1862		PMAP_STATS_INC(pmap_ncopy_page_nc);
1863		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
1864	} else if (msrc->md.color == -1) {
1865		if (mdst->md.color == DCACHE_COLOR(pdst)) {
1866			PMAP_STATS_INC(pmap_ncopy_page_dc);
1867			vdst = TLB_PHYS_TO_DIRECT(pdst);
1868			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1869			    PAGE_SIZE);
1870		} else {
1871			PMAP_STATS_INC(pmap_ncopy_page_doc);
1872			PMAP_LOCK(kernel_pmap);
1873			vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1874			tp = tsb_kvtotte(vdst);
1875			tp->tte_data =
1876			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1877			tp->tte_vpn = TV_VPN(vdst, TS_8K);
1878			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
1879			    PAGE_SIZE);
1880			tlb_page_demap(kernel_pmap, vdst);
1881			PMAP_UNLOCK(kernel_pmap);
1882		}
1883	} else if (mdst->md.color == -1) {
1884		if (msrc->md.color == DCACHE_COLOR(psrc)) {
1885			PMAP_STATS_INC(pmap_ncopy_page_sc);
1886			vsrc = TLB_PHYS_TO_DIRECT(psrc);
1887			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1888			    PAGE_SIZE);
1889		} else {
1890			PMAP_STATS_INC(pmap_ncopy_page_soc);
1891			PMAP_LOCK(kernel_pmap);
1892			vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
1893			tp = tsb_kvtotte(vsrc);
1894			tp->tte_data =
1895			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1896			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1897			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
1898			    PAGE_SIZE);
1899			tlb_page_demap(kernel_pmap, vsrc);
1900			PMAP_UNLOCK(kernel_pmap);
1901		}
1902	} else {
1903		PMAP_STATS_INC(pmap_ncopy_page_oc);
1904		PMAP_LOCK(kernel_pmap);
1905		vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
1906		tp = tsb_kvtotte(vdst);
1907		tp->tte_data =
1908		    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
1909		tp->tte_vpn = TV_VPN(vdst, TS_8K);
1910		vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
1911		tp = tsb_kvtotte(vsrc);
1912		tp->tte_data =
1913		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
1914		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
1915		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
1916		tlb_page_demap(kernel_pmap, vdst);
1917		tlb_page_demap(kernel_pmap, vsrc);
1918		PMAP_UNLOCK(kernel_pmap);
1919	}
1920}
1921
1922int unmapped_buf_allowed;
1923
1924void
1925pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
1926    vm_offset_t b_offset, int xfersize)
1927{
1928
1929	panic("pmap_copy_pages: not implemented");
1930}
1931
1932/*
1933 * Returns true if the pmap's pv is one of the first
1934 * 16 pvs linked to from this page.  This count may
1935 * be changed upwards or downwards in the future; it
1936 * is only necessary that true be returned for a small
1937 * subset of pmaps for proper page aging.
1938 */
1939boolean_t
1940pmap_page_exists_quick(pmap_t pm, vm_page_t m)
1941{
1942	struct tte *tp;
1943	int loops;
1944	boolean_t rv;
1945
1946	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1947	    ("pmap_page_exists_quick: page %p is not managed", m));
1948	loops = 0;
1949	rv = FALSE;
1950	rw_wlock(&tte_list_global_lock);
1951	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
1952		if ((tp->tte_data & TD_PV) == 0)
1953			continue;
1954		if (TTE_GET_PMAP(tp) == pm) {
1955			rv = TRUE;
1956			break;
1957		}
1958		if (++loops >= 16)
1959			break;
1960	}
1961	rw_wunlock(&tte_list_global_lock);
1962	return (rv);
1963}
1964
1965/*
1966 * Return the number of managed mappings to the given physical page
1967 * that are wired.
1968 */
1969int
1970pmap_page_wired_mappings(vm_page_t m)
1971{
1972	struct tte *tp;
1973	int count;
1974
1975	count = 0;
1976	if ((m->oflags & VPO_UNMANAGED) != 0)
1977		return (count);
1978	rw_wlock(&tte_list_global_lock);
1979	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
1980		if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
1981			count++;
1982	rw_wunlock(&tte_list_global_lock);
1983	return (count);
1984}
1985
1986/*
1987 * Remove all pages from specified address space, this aids process exit
1988 * speeds.  This is much faster than pmap_remove in the case of running down
1989 * an entire address space.  Only works for the current pmap.
1990 */
1991void
1992pmap_remove_pages(pmap_t pm)
1993{
1994
1995}
1996
1997/*
1998 * Returns TRUE if the given page has a managed mapping.
1999 */
2000boolean_t
2001pmap_page_is_mapped(vm_page_t m)
2002{
2003	struct tte *tp;
2004	boolean_t rv;
2005
2006	rv = FALSE;
2007	if ((m->oflags & VPO_UNMANAGED) != 0)
2008		return (rv);
2009	rw_wlock(&tte_list_global_lock);
2010	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
2011		if ((tp->tte_data & TD_PV) != 0) {
2012			rv = TRUE;
2013			break;
2014		}
2015	rw_wunlock(&tte_list_global_lock);
2016	return (rv);
2017}
2018
2019/*
2020 * Return a count of reference bits for a page, clearing those bits.
2021 * It is not necessary for every reference bit to be cleared, but it
2022 * is necessary that 0 only be returned when there are truly no
2023 * reference bits set.
2024 *
2025 * XXX: The exact number of bits to check and clear is a matter that
2026 * should be tested and standardized at some point in the future for
2027 * optimal aging of shared pages.
2028 */
2029int
2030pmap_ts_referenced(vm_page_t m)
2031{
2032	struct tte *tpf;
2033	struct tte *tpn;
2034	struct tte *tp;
2035	u_long data;
2036	int count;
2037
2038	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2039	    ("pmap_ts_referenced: page %p is not managed", m));
2040	count = 0;
2041	rw_wlock(&tte_list_global_lock);
2042	if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
2043		tpf = tp;
2044		do {
2045			tpn = TAILQ_NEXT(tp, tte_link);
2046			TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
2047			TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
2048			if ((tp->tte_data & TD_PV) == 0)
2049				continue;
2050			data = atomic_clear_long(&tp->tte_data, TD_REF);
2051			if ((data & TD_REF) != 0 && ++count > 4)
2052				break;
2053		} while ((tp = tpn) != NULL && tp != tpf);
2054	}
2055	rw_wunlock(&tte_list_global_lock);
2056	return (count);
2057}
2058
2059boolean_t
2060pmap_is_modified(vm_page_t m)
2061{
2062	struct tte *tp;
2063	boolean_t rv;
2064
2065	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2066	    ("pmap_is_modified: page %p is not managed", m));
2067	rv = FALSE;
2068
2069	/*
2070	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2071	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2072	 * is clear, no TTEs can have TD_W set.
2073	 */
2074	VM_OBJECT_ASSERT_WLOCKED(m->object);
2075	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2076		return (rv);
2077	rw_wlock(&tte_list_global_lock);
2078	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2079		if ((tp->tte_data & TD_PV) == 0)
2080			continue;
2081		if ((tp->tte_data & TD_W) != 0) {
2082			rv = TRUE;
2083			break;
2084		}
2085	}
2086	rw_wunlock(&tte_list_global_lock);
2087	return (rv);
2088}
2089
2090/*
2091 *	pmap_is_prefaultable:
2092 *
2093 *	Return whether or not the specified virtual address is elgible
2094 *	for prefault.
2095 */
2096boolean_t
2097pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2098{
2099	boolean_t rv;
2100
2101	PMAP_LOCK(pmap);
2102	rv = tsb_tte_lookup(pmap, addr) == NULL;
2103	PMAP_UNLOCK(pmap);
2104	return (rv);
2105}
2106
2107/*
2108 * Return whether or not the specified physical page was referenced
2109 * in any physical maps.
2110 */
2111boolean_t
2112pmap_is_referenced(vm_page_t m)
2113{
2114	struct tte *tp;
2115	boolean_t rv;
2116
2117	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2118	    ("pmap_is_referenced: page %p is not managed", m));
2119	rv = FALSE;
2120	rw_wlock(&tte_list_global_lock);
2121	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2122		if ((tp->tte_data & TD_PV) == 0)
2123			continue;
2124		if ((tp->tte_data & TD_REF) != 0) {
2125			rv = TRUE;
2126			break;
2127		}
2128	}
2129	rw_wunlock(&tte_list_global_lock);
2130	return (rv);
2131}
2132
2133/*
2134 * This function is advisory.
2135 */
2136void
2137pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2138{
2139}
2140
2141void
2142pmap_clear_modify(vm_page_t m)
2143{
2144	struct tte *tp;
2145	u_long data;
2146
2147	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2148	    ("pmap_clear_modify: page %p is not managed", m));
2149	VM_OBJECT_ASSERT_WLOCKED(m->object);
2150	KASSERT(!vm_page_xbusied(m),
2151	    ("pmap_clear_modify: page %p is exclusive busied", m));
2152
2153	/*
2154	 * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
2155	 * If the object containing the page is locked and the page is not
2156	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2157	 */
2158	if ((m->aflags & PGA_WRITEABLE) == 0)
2159		return;
2160	rw_wlock(&tte_list_global_lock);
2161	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2162		if ((tp->tte_data & TD_PV) == 0)
2163			continue;
2164		data = atomic_clear_long(&tp->tte_data, TD_W);
2165		if ((data & TD_W) != 0)
2166			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2167	}
2168	rw_wunlock(&tte_list_global_lock);
2169}
2170
2171void
2172pmap_remove_write(vm_page_t m)
2173{
2174	struct tte *tp;
2175	u_long data;
2176
2177	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2178	    ("pmap_remove_write: page %p is not managed", m));
2179
2180	/*
2181	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2182	 * set by another thread while the object is locked.  Thus,
2183	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2184	 */
2185	VM_OBJECT_ASSERT_WLOCKED(m->object);
2186	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2187		return;
2188	rw_wlock(&tte_list_global_lock);
2189	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
2190		if ((tp->tte_data & TD_PV) == 0)
2191			continue;
2192		data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
2193		if ((data & TD_W) != 0) {
2194			vm_page_dirty(m);
2195			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
2196		}
2197	}
2198	vm_page_aflag_clear(m, PGA_WRITEABLE);
2199	rw_wunlock(&tte_list_global_lock);
2200}
2201
2202int
2203pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
2204{
2205
2206	/* TODO; */
2207	return (0);
2208}
2209
2210/*
2211 * Activate a user pmap.  The pmap must be activated before its address space
2212 * can be accessed in any way.
2213 */
2214void
2215pmap_activate(struct thread *td)
2216{
2217	struct vmspace *vm;
2218	struct pmap *pm;
2219	int context;
2220
2221	critical_enter();
2222	vm = td->td_proc->p_vmspace;
2223	pm = vmspace_pmap(vm);
2224
2225	context = PCPU_GET(tlb_ctx);
2226	if (context == PCPU_GET(tlb_ctx_max)) {
2227		tlb_flush_user();
2228		context = PCPU_GET(tlb_ctx_min);
2229	}
2230	PCPU_SET(tlb_ctx, context + 1);
2231
2232	pm->pm_context[curcpu] = context;
2233#ifdef SMP
2234	CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active);
2235	atomic_store_acq_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm);
2236#else
2237	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
2238	PCPU_SET(pmap, pm);
2239#endif
2240
2241	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
2242	stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
2243	stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
2244	    TLB_CXR_PGSZ_MASK) | context);
2245	flush(KERNBASE);
2246	critical_exit();
2247}
2248
2249void
2250pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2251{
2252
2253}
2254
2255/*
2256 * Increase the starting virtual address of the given mapping if a
2257 * different alignment might result in more superpage mappings.
2258 */
2259void
2260pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2261    vm_offset_t *addr, vm_size_t size)
2262{
2263
2264}
2265