pmap.c revision 261996
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/pmap.c 261996 2014-02-16 20:26:22Z marcel $");
50
51#include "opt_pmap.h"
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/lock.h>
56#include <sys/mman.h>
57#include <sys/mutex.h>
58#include <sys/proc.h>
59#include <sys/rwlock.h>
60#include <sys/smp.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_page.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_pageout.h>
70#include <vm/uma.h>
71
72#include <machine/bootinfo.h>
73#include <machine/efi.h>
74#include <machine/md_var.h>
75#include <machine/pal.h>
76
77/*
78 *	Manages physical address maps.
79 *
80 *	Since the information managed by this module is
81 *	also stored by the logical address mapping module,
82 *	this module may throw away valid virtual-to-physical
83 *	mappings at almost any time.  However, invalidations
84 *	of virtual-to-physical mappings must be done as
85 *	requested.
86 *
87 *	In order to cope with hardware architectures which
88 *	make virtual-to-physical map invalidates expensive,
89 *	this module may delay invalidate or reduced protection
90 *	operations until such time as they are actually
91 *	necessary.  This module is given full information as
92 *	to which processors are currently using which maps,
93 *	and to when physical maps must be made correct.
94 */
95
96/*
97 * Following the Linux model, region IDs are allocated in groups of
98 * eight so that a single region ID can be used for as many RRs as we
99 * want by encoding the RR number into the low bits of the ID.
100 *
101 * We reserve region ID 0 for the kernel and allocate the remaining
102 * IDs for user pmaps.
103 *
104 * Region 0-3:	User virtually mapped
105 * Region 4:	PBVM and special mappings
106 * Region 5:	Kernel virtual memory
107 * Region 6:	Direct-mapped uncacheable
108 * Region 7:	Direct-mapped cacheable
109 */
110
111/* XXX move to a header. */
112extern uint64_t ia64_gateway_page[];
113
114#if !defined(DIAGNOSTIC)
115#define PMAP_INLINE __inline
116#else
117#define PMAP_INLINE
118#endif
119
120#ifdef PV_STATS
121#define PV_STAT(x)	do { x ; } while (0)
122#else
123#define PV_STAT(x)	do { } while (0)
124#endif
125
126#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
127#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
128#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
129#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
130#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
131#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
132#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
133#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
134
135#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
136#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
137#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
138#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
139
140#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
141
142/*
143 * Individual PV entries are stored in per-pmap chunks.  This saves
144 * space by eliminating the need to record the pmap within every PV
145 * entry.
146 */
147#if PAGE_SIZE == 8192
148#define	_NPCM	6
149#define	_NPCPV	337
150#define	_NPCS	2
151#elif PAGE_SIZE == 16384
152#define	_NPCM	11
153#define	_NPCPV	677
154#define	_NPCS	1
155#endif
156struct pv_chunk {
157	pmap_t			pc_pmap;
158	TAILQ_ENTRY(pv_chunk)	pc_list;
159	u_long			pc_map[_NPCM];	/* bitmap; 1 = free */
160	TAILQ_ENTRY(pv_chunk)	pc_lru;
161	u_long			pc_spare[_NPCS];
162	struct pv_entry		pc_pventry[_NPCPV];
163};
164
165/*
166 * The VHPT bucket head structure.
167 */
168struct ia64_bucket {
169	uint64_t	chain;
170	struct mtx	mutex;
171	u_int		length;
172};
173
174/*
175 * Statically allocated kernel pmap
176 */
177struct pmap kernel_pmap_store;
178
179vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
180vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
181
182/*
183 * Kernel virtual memory management.
184 */
185static int nkpt;
186extern struct ia64_lpte ***ia64_kptdir;
187
188#define KPTE_DIR0_INDEX(va) \
189	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
190#define KPTE_DIR1_INDEX(va) \
191	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
192#define KPTE_PTE_INDEX(va) \
193	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
194#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
195
196vm_offset_t kernel_vm_end;
197
198/* Values for ptc.e. XXX values for SKI. */
199static uint64_t pmap_ptc_e_base = 0x100000000;
200static uint64_t pmap_ptc_e_count1 = 3;
201static uint64_t pmap_ptc_e_count2 = 2;
202static uint64_t pmap_ptc_e_stride1 = 0x2000;
203static uint64_t pmap_ptc_e_stride2 = 0x100000000;
204
205struct mtx pmap_ptc_mutex;
206
207/*
208 * Data for the RID allocator
209 */
210static int pmap_ridcount;
211static int pmap_rididx;
212static int pmap_ridmapsz;
213static int pmap_ridmax;
214static uint64_t *pmap_ridmap;
215struct mtx pmap_ridmutex;
216
217static struct rwlock_padalign pvh_global_lock;
218
219/*
220 * Data for the pv entry allocation mechanism
221 */
222static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
223static int pv_entry_count;
224
225/*
226 * Data for allocating PTEs for user processes.
227 */
228static uma_zone_t ptezone;
229
230/*
231 * Virtual Hash Page Table (VHPT) data.
232 */
233/* SYSCTL_DECL(_machdep); */
234static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
235
236struct ia64_bucket *pmap_vhpt_bucket;
237
238int pmap_vhpt_nbuckets;
239SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
240    &pmap_vhpt_nbuckets, 0, "");
241
242int pmap_vhpt_log2size = 0;
243TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
244SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
245    &pmap_vhpt_log2size, 0, "");
246
247static int pmap_vhpt_inserts;
248SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
249    &pmap_vhpt_inserts, 0, "");
250
251static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
252SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
253    NULL, 0, pmap_vhpt_population, "I", "");
254
255static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
256
257static void free_pv_chunk(struct pv_chunk *pc);
258static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
259static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
260static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
261
262static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
263		    vm_page_t m, vm_prot_t prot);
264static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
265static void	pmap_invalidate_all(void);
266static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
267		    vm_offset_t va, pv_entry_t pv, int freepte);
268static int	pmap_remove_vhpt(vm_offset_t va);
269static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
270		    vm_page_t m);
271
272static void
273pmap_initialize_vhpt(vm_offset_t vhpt)
274{
275	struct ia64_lpte *pte;
276	u_int i;
277
278	pte = (struct ia64_lpte *)vhpt;
279	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
280		pte[i].pte = 0;
281		pte[i].itir = 0;
282		pte[i].tag = 1UL << 63; /* Invalid tag */
283		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
284	}
285}
286
287#ifdef SMP
288vm_offset_t
289pmap_alloc_vhpt(void)
290{
291	vm_offset_t vhpt;
292	vm_page_t m;
293	vm_size_t size;
294
295	size = 1UL << pmap_vhpt_log2size;
296	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
297	    VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
298	    VM_MEMATTR_DEFAULT);
299	if (m != NULL) {
300		vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
301		pmap_initialize_vhpt(vhpt);
302		return (vhpt);
303	}
304	return (0);
305}
306#endif
307
308/*
309 *	Bootstrap the system enough to run with virtual memory.
310 */
311void
312pmap_bootstrap()
313{
314	struct ia64_pal_result res;
315	vm_offset_t base;
316	size_t size;
317	int i, ridbits;
318
319	/*
320	 * Query the PAL Code to find the loop parameters for the
321	 * ptc.e instruction.
322	 */
323	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
324	if (res.pal_status != 0)
325		panic("Can't configure ptc.e parameters");
326	pmap_ptc_e_base = res.pal_result[0];
327	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
328	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
329	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
330	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
331	if (bootverbose)
332		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
333		       "stride1=0x%lx, stride2=0x%lx\n",
334		       pmap_ptc_e_base,
335		       pmap_ptc_e_count1,
336		       pmap_ptc_e_count2,
337		       pmap_ptc_e_stride1,
338		       pmap_ptc_e_stride2);
339
340	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
341
342	/*
343	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
344	 *
345	 * We currently need at least 19 bits in the RID because PID_MAX
346	 * can only be encoded in 17 bits and we need RIDs for 4 regions
347	 * per process. With PID_MAX equalling 99999 this means that we
348	 * need to be able to encode 399996 (=4*PID_MAX).
349	 * The Itanium processor only has 18 bits and the architected
350	 * minimum is exactly that. So, we cannot use a PID based scheme
351	 * in those cases. Enter pmap_ridmap...
352	 * We should avoid the map when running on a processor that has
353	 * implemented enough bits. This means that we should pass the
354	 * process/thread ID to pmap. This we currently don't do, so we
355	 * use the map anyway. However, we don't want to allocate a map
356	 * that is large enough to cover the range dictated by the number
357	 * of bits in the RID, because that may result in a RID map of
358	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
359	 * The bottomline: we create a 32KB map when the processor only
360	 * implements 18 bits (or when we can't figure it out). Otherwise
361	 * we create a 64KB map.
362	 */
363	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
364	if (res.pal_status != 0) {
365		if (bootverbose)
366			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
367		ridbits = 18; /* guaranteed minimum */
368	} else {
369		ridbits = (res.pal_result[1] >> 8) & 0xff;
370		if (bootverbose)
371			printf("Processor supports %d Region ID bits\n",
372			    ridbits);
373	}
374	if (ridbits > 19)
375		ridbits = 19;
376
377	pmap_ridmax = (1 << ridbits);
378	pmap_ridmapsz = pmap_ridmax / 64;
379	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
380	pmap_ridmap[0] |= 0xff;
381	pmap_rididx = 0;
382	pmap_ridcount = 8;
383	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
384
385	/*
386	 * Allocate some memory for initial kernel 'page tables'.
387	 */
388	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
389	nkpt = 0;
390	kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
391
392	/*
393	 * Determine a valid (mappable) VHPT size.
394	 */
395	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
396	if (pmap_vhpt_log2size == 0)
397		pmap_vhpt_log2size = 20;
398	else if (pmap_vhpt_log2size < 16)
399		pmap_vhpt_log2size = 16;
400	else if (pmap_vhpt_log2size > 28)
401		pmap_vhpt_log2size = 28;
402	if (pmap_vhpt_log2size & 1)
403		pmap_vhpt_log2size--;
404
405	size = 1UL << pmap_vhpt_log2size;
406	base = (uintptr_t)ia64_physmem_alloc(size, size);
407	if (base == 0)
408		panic("Unable to allocate VHPT");
409
410	PCPU_SET(md.vhpt, base);
411	if (bootverbose)
412		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
413
414	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
415	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
416	    sizeof(struct ia64_bucket), PAGE_SIZE);
417	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
418		/* Stolen memory is zeroed. */
419		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
420		    MTX_NOWITNESS | MTX_SPIN);
421	}
422
423	pmap_initialize_vhpt(base);
424	map_vhpt(base);
425	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
426	ia64_srlz_i();
427
428	virtual_avail = VM_INIT_KERNEL_ADDRESS;
429	virtual_end = VM_MAX_KERNEL_ADDRESS;
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	PMAP_LOCK_INIT(kernel_pmap);
435	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
436		kernel_pmap->pm_rid[i] = 0;
437	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
438	PCPU_SET(md.current_pmap, kernel_pmap);
439
440 	/*
441	 * Initialize the global pv list lock.
442	 */
443	rw_init(&pvh_global_lock, "pmap pv global");
444
445	/* Region 5 is mapped via the VHPT. */
446	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
447
448	/*
449	 * Clear out any random TLB entries left over from booting.
450	 */
451	pmap_invalidate_all();
452
453	map_gateway_page();
454}
455
456static int
457pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
458{
459	int count, error, i;
460
461	count = 0;
462	for (i = 0; i < pmap_vhpt_nbuckets; i++)
463		count += pmap_vhpt_bucket[i].length;
464
465	error = SYSCTL_OUT(req, &count, sizeof(count));
466	return (error);
467}
468
469vm_offset_t
470pmap_page_to_va(vm_page_t m)
471{
472	vm_paddr_t pa;
473	vm_offset_t va;
474
475	pa = VM_PAGE_TO_PHYS(m);
476	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
477	    IA64_PHYS_TO_RR7(pa);
478	return (va);
479}
480
481/*
482 *	Initialize a vm_page's machine-dependent fields.
483 */
484void
485pmap_page_init(vm_page_t m)
486{
487
488	TAILQ_INIT(&m->md.pv_list);
489	m->md.memattr = VM_MEMATTR_DEFAULT;
490}
491
492/*
493 *	Initialize the pmap module.
494 *	Called by vm_init, to initialize any structures that the pmap
495 *	system needs to map virtual memory.
496 */
497void
498pmap_init(void)
499{
500
501	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
502	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
503}
504
505
506/***************************************************
507 * Manipulate TLBs for a pmap
508 ***************************************************/
509
510static void
511pmap_invalidate_page(vm_offset_t va)
512{
513	struct ia64_lpte *pte;
514	struct pcpu *pc;
515	uint64_t tag;
516	u_int vhpt_ofs;
517
518	critical_enter();
519
520	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
521	tag = ia64_ttag(va);
522	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
523		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
524		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
525	}
526
527	mtx_lock_spin(&pmap_ptc_mutex);
528
529	ia64_ptc_ga(va, PAGE_SHIFT << 2);
530	ia64_mf();
531	ia64_srlz_i();
532
533	mtx_unlock_spin(&pmap_ptc_mutex);
534
535	ia64_invala();
536
537	critical_exit();
538}
539
540static void
541pmap_invalidate_all_1(void *arg)
542{
543	uint64_t addr;
544	int i, j;
545
546	critical_enter();
547	addr = pmap_ptc_e_base;
548	for (i = 0; i < pmap_ptc_e_count1; i++) {
549		for (j = 0; j < pmap_ptc_e_count2; j++) {
550			ia64_ptc_e(addr);
551			addr += pmap_ptc_e_stride2;
552		}
553		addr += pmap_ptc_e_stride1;
554	}
555	critical_exit();
556}
557
558static void
559pmap_invalidate_all(void)
560{
561
562#ifdef SMP
563	if (mp_ncpus > 1) {
564		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
565		return;
566	}
567#endif
568	pmap_invalidate_all_1(NULL);
569}
570
571static uint32_t
572pmap_allocate_rid(void)
573{
574	uint64_t bit, bits;
575	int rid;
576
577	mtx_lock(&pmap_ridmutex);
578	if (pmap_ridcount == pmap_ridmax)
579		panic("pmap_allocate_rid: All Region IDs used");
580
581	/* Find an index with a free bit. */
582	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
583		pmap_rididx++;
584		if (pmap_rididx == pmap_ridmapsz)
585			pmap_rididx = 0;
586	}
587	rid = pmap_rididx * 64;
588
589	/* Find a free bit. */
590	bit = 1UL;
591	while (bits & bit) {
592		rid++;
593		bit <<= 1;
594	}
595
596	pmap_ridmap[pmap_rididx] |= bit;
597	pmap_ridcount++;
598	mtx_unlock(&pmap_ridmutex);
599
600	return rid;
601}
602
603static void
604pmap_free_rid(uint32_t rid)
605{
606	uint64_t bit;
607	int idx;
608
609	idx = rid / 64;
610	bit = ~(1UL << (rid & 63));
611
612	mtx_lock(&pmap_ridmutex);
613	pmap_ridmap[idx] &= bit;
614	pmap_ridcount--;
615	mtx_unlock(&pmap_ridmutex);
616}
617
618/***************************************************
619 * Page table page management routines.....
620 ***************************************************/
621
622void
623pmap_pinit0(struct pmap *pmap)
624{
625
626	PMAP_LOCK_INIT(pmap);
627	/* kernel_pmap is the same as any other pmap. */
628	pmap_pinit(pmap);
629}
630
631/*
632 * Initialize a preallocated and zeroed pmap structure,
633 * such as one in a vmspace structure.
634 */
635int
636pmap_pinit(struct pmap *pmap)
637{
638	int i;
639
640	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
641		pmap->pm_rid[i] = pmap_allocate_rid();
642	TAILQ_INIT(&pmap->pm_pvchunk);
643	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
644	return (1);
645}
646
647/***************************************************
648 * Pmap allocation/deallocation routines.
649 ***************************************************/
650
651/*
652 * Release any resources held by the given physical map.
653 * Called when a pmap initialized by pmap_pinit is being released.
654 * Should only be called if the map contains no valid mappings.
655 */
656void
657pmap_release(pmap_t pmap)
658{
659	int i;
660
661	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
662		if (pmap->pm_rid[i])
663			pmap_free_rid(pmap->pm_rid[i]);
664}
665
666/*
667 * grow the number of kernel page table entries, if needed
668 */
669void
670pmap_growkernel(vm_offset_t addr)
671{
672	struct ia64_lpte **dir1;
673	struct ia64_lpte *leaf;
674	vm_page_t nkpg;
675
676	while (kernel_vm_end <= addr) {
677		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
678			panic("%s: out of kernel address space", __func__);
679
680		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
681		if (dir1 == NULL) {
682			nkpg = vm_page_alloc(NULL, nkpt++,
683			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
684			if (!nkpg)
685				panic("%s: cannot add dir. page", __func__);
686
687			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
688			bzero(dir1, PAGE_SIZE);
689			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
690		}
691
692		nkpg = vm_page_alloc(NULL, nkpt++,
693		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
694		if (!nkpg)
695			panic("%s: cannot add PTE page", __func__);
696
697		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
698		bzero(leaf, PAGE_SIZE);
699		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
700
701		kernel_vm_end += PAGE_SIZE * NKPTEPG;
702	}
703}
704
705/***************************************************
706 * page management routines.
707 ***************************************************/
708
709CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
710
711static __inline struct pv_chunk *
712pv_to_chunk(pv_entry_t pv)
713{
714
715	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
716}
717
718#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
719
720#define	PC_FREE_FULL	0xfffffffffffffffful
721#define	PC_FREE_PARTIAL	\
722	((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
723
724#if PAGE_SIZE == 8192
725static const u_long pc_freemask[_NPCM] = {
726	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
727	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
728};
729#elif PAGE_SIZE == 16384
730static const u_long pc_freemask[_NPCM] = {
731	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
732	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
733	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
734	PC_FREE_FULL, PC_FREE_PARTIAL
735};
736#endif
737
738static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
739
740SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
741    "Current number of pv entries");
742
743#ifdef PV_STATS
744static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
745
746SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
747    "Current number of pv entry chunks");
748SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
749    "Current number of pv entry chunks allocated");
750SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
751    "Current number of pv entry chunks frees");
752SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
753    "Number of times tried to get a chunk page but failed.");
754
755static long pv_entry_frees, pv_entry_allocs;
756static int pv_entry_spare;
757
758SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
759    "Current number of pv entry frees");
760SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
761    "Current number of pv entry allocs");
762SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
763    "Current number of spare pv entries");
764#endif
765
766/*
767 * We are in a serious low memory condition.  Resort to
768 * drastic measures to free some pages so we can allocate
769 * another pv entry chunk.
770 */
771static vm_page_t
772pmap_pv_reclaim(pmap_t locked_pmap)
773{
774	struct pch newtail;
775	struct pv_chunk *pc;
776	struct ia64_lpte *pte;
777	pmap_t pmap;
778	pv_entry_t pv;
779	vm_offset_t va;
780	vm_page_t m, m_pc;
781	u_long inuse;
782	int bit, field, freed, idx;
783
784	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
785	pmap = NULL;
786	m_pc = NULL;
787	TAILQ_INIT(&newtail);
788	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
789		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
790		if (pmap != pc->pc_pmap) {
791			if (pmap != NULL) {
792				if (pmap != locked_pmap) {
793					pmap_switch(locked_pmap);
794					PMAP_UNLOCK(pmap);
795				}
796			}
797			pmap = pc->pc_pmap;
798			/* Avoid deadlock and lock recursion. */
799			if (pmap > locked_pmap)
800				PMAP_LOCK(pmap);
801			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
802				pmap = NULL;
803				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
804				continue;
805			}
806			pmap_switch(pmap);
807		}
808
809		/*
810		 * Destroy every non-wired, 8 KB page mapping in the chunk.
811		 */
812		freed = 0;
813		for (field = 0; field < _NPCM; field++) {
814			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
815			    inuse != 0; inuse &= ~(1UL << bit)) {
816				bit = ffsl(inuse) - 1;
817				idx = field * sizeof(inuse) * NBBY + bit;
818				pv = &pc->pc_pventry[idx];
819				va = pv->pv_va;
820				pte = pmap_find_vhpt(va);
821				KASSERT(pte != NULL, ("pte"));
822				if (pmap_wired(pte))
823					continue;
824				pmap_remove_vhpt(va);
825				pmap_invalidate_page(va);
826				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
827				if (pmap_accessed(pte))
828					vm_page_aflag_set(m, PGA_REFERENCED);
829				if (pmap_dirty(pte))
830					vm_page_dirty(m);
831				pmap_free_pte(pte, va);
832				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
833				if (TAILQ_EMPTY(&m->md.pv_list))
834					vm_page_aflag_clear(m, PGA_WRITEABLE);
835				pc->pc_map[field] |= 1UL << bit;
836				freed++;
837			}
838		}
839		if (freed == 0) {
840			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
841			continue;
842		}
843		/* Every freed mapping is for a 8 KB page. */
844		pmap->pm_stats.resident_count -= freed;
845		PV_STAT(pv_entry_frees += freed);
846		PV_STAT(pv_entry_spare += freed);
847		pv_entry_count -= freed;
848		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
849		for (field = 0; field < _NPCM; field++)
850			if (pc->pc_map[field] != pc_freemask[field]) {
851				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
852				    pc_list);
853				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
854
855				/*
856				 * One freed pv entry in locked_pmap is
857				 * sufficient.
858				 */
859				if (pmap == locked_pmap)
860					goto out;
861				break;
862			}
863		if (field == _NPCM) {
864			PV_STAT(pv_entry_spare -= _NPCPV);
865			PV_STAT(pc_chunk_count--);
866			PV_STAT(pc_chunk_frees++);
867			/* Entire chunk is free; return it. */
868			m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
869			break;
870		}
871	}
872out:
873	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
874	if (pmap != NULL) {
875		if (pmap != locked_pmap) {
876			pmap_switch(locked_pmap);
877			PMAP_UNLOCK(pmap);
878		}
879	}
880	return (m_pc);
881}
882
883/*
884 * free the pv_entry back to the free list
885 */
886static void
887free_pv_entry(pmap_t pmap, pv_entry_t pv)
888{
889	struct pv_chunk *pc;
890	int bit, field, idx;
891
892	rw_assert(&pvh_global_lock, RA_WLOCKED);
893	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
894	PV_STAT(pv_entry_frees++);
895	PV_STAT(pv_entry_spare++);
896	pv_entry_count--;
897	pc = pv_to_chunk(pv);
898	idx = pv - &pc->pc_pventry[0];
899	field = idx / (sizeof(u_long) * NBBY);
900	bit = idx % (sizeof(u_long) * NBBY);
901	pc->pc_map[field] |= 1ul << bit;
902	for (idx = 0; idx < _NPCM; idx++)
903		if (pc->pc_map[idx] != pc_freemask[idx]) {
904			/*
905			 * 98% of the time, pc is already at the head of the
906			 * list.  If it isn't already, move it to the head.
907			 */
908			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
909			    pc)) {
910				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
911				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
912				    pc_list);
913			}
914			return;
915		}
916	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
917	free_pv_chunk(pc);
918}
919
920static void
921free_pv_chunk(struct pv_chunk *pc)
922{
923	vm_page_t m;
924
925 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
926	PV_STAT(pv_entry_spare -= _NPCPV);
927	PV_STAT(pc_chunk_count--);
928	PV_STAT(pc_chunk_frees++);
929	/* entire chunk is free, return it */
930	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
931	vm_page_unwire(m, 0);
932	vm_page_free(m);
933}
934
935/*
936 * get a new pv_entry, allocating a block from the system
937 * when needed.
938 */
939static pv_entry_t
940get_pv_entry(pmap_t pmap, boolean_t try)
941{
942	struct pv_chunk *pc;
943	pv_entry_t pv;
944	vm_page_t m;
945	int bit, field, idx;
946
947	rw_assert(&pvh_global_lock, RA_WLOCKED);
948	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
949	PV_STAT(pv_entry_allocs++);
950	pv_entry_count++;
951retry:
952	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
953	if (pc != NULL) {
954		for (field = 0; field < _NPCM; field++) {
955			if (pc->pc_map[field]) {
956				bit = ffsl(pc->pc_map[field]) - 1;
957				break;
958			}
959		}
960		if (field < _NPCM) {
961			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
962			pv = &pc->pc_pventry[idx];
963			pc->pc_map[field] &= ~(1ul << bit);
964			/* If this was the last item, move it to tail */
965			for (field = 0; field < _NPCM; field++)
966				if (pc->pc_map[field] != 0) {
967					PV_STAT(pv_entry_spare--);
968					return (pv);	/* not full, return */
969				}
970			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
971			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
972			PV_STAT(pv_entry_spare--);
973			return (pv);
974		}
975	}
976	/* No free items, allocate another chunk */
977	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
978	    VM_ALLOC_WIRED);
979	if (m == NULL) {
980		if (try) {
981			pv_entry_count--;
982			PV_STAT(pc_chunk_tryfail++);
983			return (NULL);
984		}
985		m = pmap_pv_reclaim(pmap);
986		if (m == NULL)
987			goto retry;
988	}
989	PV_STAT(pc_chunk_count++);
990	PV_STAT(pc_chunk_allocs++);
991	pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
992	pc->pc_pmap = pmap;
993	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
994	for (field = 1; field < _NPCM; field++)
995		pc->pc_map[field] = pc_freemask[field];
996	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
997	pv = &pc->pc_pventry[0];
998	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
999	PV_STAT(pv_entry_spare += _NPCPV - 1);
1000	return (pv);
1001}
1002
1003/*
1004 * Conditionally create a pv entry.
1005 */
1006static boolean_t
1007pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1008{
1009	pv_entry_t pv;
1010
1011	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1012	rw_assert(&pvh_global_lock, RA_WLOCKED);
1013	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1014		pv->pv_va = va;
1015		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1016		return (TRUE);
1017	} else
1018		return (FALSE);
1019}
1020
1021/*
1022 * Add an ia64_lpte to the VHPT.
1023 */
1024static void
1025pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1026{
1027	struct ia64_bucket *bckt;
1028	struct ia64_lpte *vhpte;
1029	uint64_t pte_pa;
1030
1031	/* Can fault, so get it out of the way. */
1032	pte_pa = ia64_tpa((vm_offset_t)pte);
1033
1034	vhpte = (struct ia64_lpte *)ia64_thash(va);
1035	bckt = (struct ia64_bucket *)vhpte->chain;
1036
1037	mtx_lock_spin(&bckt->mutex);
1038	pte->chain = bckt->chain;
1039	ia64_mf();
1040	bckt->chain = pte_pa;
1041
1042	pmap_vhpt_inserts++;
1043	bckt->length++;
1044	mtx_unlock_spin(&bckt->mutex);
1045}
1046
1047/*
1048 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1049 * worked or an appropriate error code otherwise.
1050 */
1051static int
1052pmap_remove_vhpt(vm_offset_t va)
1053{
1054	struct ia64_bucket *bckt;
1055	struct ia64_lpte *pte;
1056	struct ia64_lpte *lpte;
1057	struct ia64_lpte *vhpte;
1058	uint64_t chain, tag;
1059
1060	tag = ia64_ttag(va);
1061	vhpte = (struct ia64_lpte *)ia64_thash(va);
1062	bckt = (struct ia64_bucket *)vhpte->chain;
1063
1064	lpte = NULL;
1065	mtx_lock_spin(&bckt->mutex);
1066	chain = bckt->chain;
1067	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1068	while (chain != 0 && pte->tag != tag) {
1069		lpte = pte;
1070		chain = pte->chain;
1071		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1072	}
1073	if (chain == 0) {
1074		mtx_unlock_spin(&bckt->mutex);
1075		return (ENOENT);
1076	}
1077
1078	/* Snip this pv_entry out of the collision chain. */
1079	if (lpte == NULL)
1080		bckt->chain = pte->chain;
1081	else
1082		lpte->chain = pte->chain;
1083	ia64_mf();
1084
1085	bckt->length--;
1086	mtx_unlock_spin(&bckt->mutex);
1087	return (0);
1088}
1089
1090/*
1091 * Find the ia64_lpte for the given va, if any.
1092 */
1093static struct ia64_lpte *
1094pmap_find_vhpt(vm_offset_t va)
1095{
1096	struct ia64_bucket *bckt;
1097	struct ia64_lpte *pte;
1098	uint64_t chain, tag;
1099
1100	tag = ia64_ttag(va);
1101	pte = (struct ia64_lpte *)ia64_thash(va);
1102	bckt = (struct ia64_bucket *)pte->chain;
1103
1104	mtx_lock_spin(&bckt->mutex);
1105	chain = bckt->chain;
1106	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1107	while (chain != 0 && pte->tag != tag) {
1108		chain = pte->chain;
1109		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1110	}
1111	mtx_unlock_spin(&bckt->mutex);
1112	return ((chain != 0) ? pte : NULL);
1113}
1114
1115/*
1116 * Remove an entry from the list of managed mappings.
1117 */
1118static int
1119pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1120{
1121
1122	rw_assert(&pvh_global_lock, RA_WLOCKED);
1123	if (!pv) {
1124		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1125			if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1126				break;
1127		}
1128	}
1129
1130	if (pv) {
1131		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1132		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1133			vm_page_aflag_clear(m, PGA_WRITEABLE);
1134
1135		free_pv_entry(pmap, pv);
1136		return 0;
1137	} else {
1138		return ENOENT;
1139	}
1140}
1141
1142/*
1143 * Create a pv entry for page at pa for
1144 * (pmap, va).
1145 */
1146static void
1147pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1148{
1149	pv_entry_t pv;
1150
1151	rw_assert(&pvh_global_lock, RA_WLOCKED);
1152	pv = get_pv_entry(pmap, FALSE);
1153	pv->pv_va = va;
1154	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1155}
1156
1157/*
1158 *	Routine:	pmap_extract
1159 *	Function:
1160 *		Extract the physical page address associated
1161 *		with the given map/virtual_address pair.
1162 */
1163vm_paddr_t
1164pmap_extract(pmap_t pmap, vm_offset_t va)
1165{
1166	struct ia64_lpte *pte;
1167	pmap_t oldpmap;
1168	vm_paddr_t pa;
1169
1170	pa = 0;
1171	PMAP_LOCK(pmap);
1172	oldpmap = pmap_switch(pmap);
1173	pte = pmap_find_vhpt(va);
1174	if (pte != NULL && pmap_present(pte))
1175		pa = pmap_ppn(pte);
1176	pmap_switch(oldpmap);
1177	PMAP_UNLOCK(pmap);
1178	return (pa);
1179}
1180
1181/*
1182 *	Routine:	pmap_extract_and_hold
1183 *	Function:
1184 *		Atomically extract and hold the physical page
1185 *		with the given pmap and virtual address pair
1186 *		if that mapping permits the given protection.
1187 */
1188vm_page_t
1189pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1190{
1191	struct ia64_lpte *pte;
1192	pmap_t oldpmap;
1193	vm_page_t m;
1194	vm_paddr_t pa;
1195
1196	pa = 0;
1197	m = NULL;
1198	PMAP_LOCK(pmap);
1199	oldpmap = pmap_switch(pmap);
1200retry:
1201	pte = pmap_find_vhpt(va);
1202	if (pte != NULL && pmap_present(pte) &&
1203	    (pmap_prot(pte) & prot) == prot) {
1204		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1205		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1206			goto retry;
1207		vm_page_hold(m);
1208	}
1209	PA_UNLOCK_COND(pa);
1210	pmap_switch(oldpmap);
1211	PMAP_UNLOCK(pmap);
1212	return (m);
1213}
1214
1215/***************************************************
1216 * Low level mapping routines.....
1217 ***************************************************/
1218
1219/*
1220 * Find the kernel lpte for mapping the given virtual address, which
1221 * must be in the part of region 5 which we can cover with our kernel
1222 * 'page tables'.
1223 */
1224static struct ia64_lpte *
1225pmap_find_kpte(vm_offset_t va)
1226{
1227	struct ia64_lpte **dir1;
1228	struct ia64_lpte *leaf;
1229
1230	KASSERT((va >> 61) == 5,
1231		("kernel mapping 0x%lx not in region 5", va));
1232	KASSERT(va < kernel_vm_end,
1233		("kernel mapping 0x%lx out of range", va));
1234
1235	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1236	leaf = dir1[KPTE_DIR1_INDEX(va)];
1237	return (&leaf[KPTE_PTE_INDEX(va)]);
1238}
1239
1240/*
1241 * Find a pte suitable for mapping a user-space address. If one exists
1242 * in the VHPT, that one will be returned, otherwise a new pte is
1243 * allocated.
1244 */
1245static struct ia64_lpte *
1246pmap_find_pte(vm_offset_t va)
1247{
1248	struct ia64_lpte *pte;
1249
1250	if (va >= VM_MAXUSER_ADDRESS)
1251		return pmap_find_kpte(va);
1252
1253	pte = pmap_find_vhpt(va);
1254	if (pte == NULL) {
1255		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1256		pte->tag = 1UL << 63;
1257	}
1258	return (pte);
1259}
1260
1261/*
1262 * Free a pte which is now unused. This simply returns it to the zone
1263 * allocator if it is a user mapping. For kernel mappings, clear the
1264 * valid bit to make it clear that the mapping is not currently used.
1265 */
1266static void
1267pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1268{
1269	if (va < VM_MAXUSER_ADDRESS)
1270		uma_zfree(ptezone, pte);
1271	else
1272		pmap_clear_present(pte);
1273}
1274
1275static PMAP_INLINE void
1276pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1277{
1278	static long prot2ar[4] = {
1279		PTE_AR_R,		/* VM_PROT_NONE */
1280		PTE_AR_RW,		/* VM_PROT_WRITE */
1281		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1282		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1283	};
1284
1285	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1286	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1287	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1288	    ? PTE_PL_KERN : PTE_PL_USER;
1289	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1290}
1291
1292static PMAP_INLINE void
1293pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1294{
1295
1296	pte->pte &= ~PTE_MA_MASK;
1297	pte->pte |= (ma & PTE_MA_MASK);
1298}
1299
1300/*
1301 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1302 * the pte was orginally valid, then its assumed to already be in the
1303 * VHPT.
1304 * This functions does not set the protection bits.  It's expected
1305 * that those have been set correctly prior to calling this function.
1306 */
1307static void
1308pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1309    boolean_t wired, boolean_t managed)
1310{
1311
1312	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1313	    PTE_AR_MASK | PTE_ED;
1314	pte->pte |= PTE_PRESENT;
1315	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1316	pte->pte |= (wired) ? PTE_WIRED : 0;
1317	pte->pte |= pa & PTE_PPN_MASK;
1318
1319	pte->itir = PAGE_SHIFT << 2;
1320
1321	pte->tag = ia64_ttag(va);
1322}
1323
1324/*
1325 * Remove the (possibly managed) mapping represented by pte from the
1326 * given pmap.
1327 */
1328static int
1329pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1330		pv_entry_t pv, int freepte)
1331{
1332	int error;
1333	vm_page_t m;
1334
1335	/*
1336	 * First remove from the VHPT.
1337	 */
1338	error = pmap_remove_vhpt(va);
1339	if (error)
1340		return (error);
1341
1342	pmap_invalidate_page(va);
1343
1344	if (pmap_wired(pte))
1345		pmap->pm_stats.wired_count -= 1;
1346
1347	pmap->pm_stats.resident_count -= 1;
1348	if (pmap_managed(pte)) {
1349		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1350		if (pmap_dirty(pte))
1351			vm_page_dirty(m);
1352		if (pmap_accessed(pte))
1353			vm_page_aflag_set(m, PGA_REFERENCED);
1354
1355		error = pmap_remove_entry(pmap, m, va, pv);
1356	}
1357	if (freepte)
1358		pmap_free_pte(pte, va);
1359
1360	return (error);
1361}
1362
1363/*
1364 * Extract the physical page address associated with a kernel
1365 * virtual address.
1366 */
1367vm_paddr_t
1368pmap_kextract(vm_offset_t va)
1369{
1370	struct ia64_lpte *pte;
1371	uint64_t *pbvm_pgtbl;
1372	vm_paddr_t pa;
1373	u_int idx;
1374
1375	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1376
1377	/* Regions 6 and 7 are direct mapped. */
1378	if (va >= IA64_RR_BASE(6)) {
1379		pa = IA64_RR_MASK(va);
1380		goto out;
1381	}
1382
1383	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1384	if (va >= kernel_vm_end)
1385		goto err_out;
1386	if (va >= VM_INIT_KERNEL_ADDRESS) {
1387		pte = pmap_find_kpte(va);
1388		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1389		goto out;
1390	}
1391
1392	/* The PBVM page table. */
1393	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1394		goto err_out;
1395	if (va >= IA64_PBVM_PGTBL) {
1396		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1397		goto out;
1398	}
1399
1400	/* The PBVM itself. */
1401	if (va >= IA64_PBVM_BASE) {
1402		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1403		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1404		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1405			goto err_out;
1406		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1407			goto err_out;
1408		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1409		    (va & IA64_PBVM_PAGE_MASK);
1410		goto out;
1411	}
1412
1413 err_out:
1414	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1415	pa = 0;
1416	/* FALLTHROUGH */
1417
1418 out:
1419	return (pa);
1420}
1421
1422/*
1423 * Add a list of wired pages to the kva this routine is only used for
1424 * temporary kernel mappings that do not need to have page modification
1425 * or references recorded.  Note that old mappings are simply written
1426 * over.  The page is effectively wired, but it's customary to not have
1427 * the PTE reflect that, nor update statistics.
1428 */
1429void
1430pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1431{
1432	struct ia64_lpte *pte;
1433	int i;
1434
1435	for (i = 0; i < count; i++) {
1436		pte = pmap_find_kpte(va);
1437		if (pmap_present(pte))
1438			pmap_invalidate_page(va);
1439		else
1440			pmap_enter_vhpt(pte, va);
1441		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1442		pmap_pte_attr(pte, m[i]->md.memattr);
1443		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1444		va += PAGE_SIZE;
1445	}
1446}
1447
1448/*
1449 * this routine jerks page mappings from the
1450 * kernel -- it is meant only for temporary mappings.
1451 */
1452void
1453pmap_qremove(vm_offset_t va, int count)
1454{
1455	struct ia64_lpte *pte;
1456	int i;
1457
1458	for (i = 0; i < count; i++) {
1459		pte = pmap_find_kpte(va);
1460		if (pmap_present(pte)) {
1461			pmap_remove_vhpt(va);
1462			pmap_invalidate_page(va);
1463			pmap_clear_present(pte);
1464		}
1465		va += PAGE_SIZE;
1466	}
1467}
1468
1469/*
1470 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1471 * to not have the PTE reflect that, nor update statistics.
1472 */
1473void
1474pmap_kenter(vm_offset_t va, vm_offset_t pa)
1475{
1476	struct ia64_lpte *pte;
1477
1478	pte = pmap_find_kpte(va);
1479	if (pmap_present(pte))
1480		pmap_invalidate_page(va);
1481	else
1482		pmap_enter_vhpt(pte, va);
1483	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1484	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1485	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1486}
1487
1488/*
1489 * Remove a page from the kva
1490 */
1491void
1492pmap_kremove(vm_offset_t va)
1493{
1494	struct ia64_lpte *pte;
1495
1496	pte = pmap_find_kpte(va);
1497	if (pmap_present(pte)) {
1498		pmap_remove_vhpt(va);
1499		pmap_invalidate_page(va);
1500		pmap_clear_present(pte);
1501	}
1502}
1503
1504/*
1505 *	Used to map a range of physical addresses into kernel
1506 *	virtual address space.
1507 *
1508 *	The value passed in '*virt' is a suggested virtual address for
1509 *	the mapping. Architectures which can support a direct-mapped
1510 *	physical to virtual region can return the appropriate address
1511 *	within that region, leaving '*virt' unchanged. Other
1512 *	architectures should map the pages starting at '*virt' and
1513 *	update '*virt' with the first usable address after the mapped
1514 *	region.
1515 */
1516vm_offset_t
1517pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1518{
1519	return IA64_PHYS_TO_RR7(start);
1520}
1521
1522/*
1523 *	Remove the given range of addresses from the specified map.
1524 *
1525 *	It is assumed that the start and end are properly
1526 *	rounded to the page size.
1527 *
1528 *	Sparsely used ranges are inefficiently removed.  The VHPT is
1529 *	probed for every page within the range.  XXX
1530 */
1531void
1532pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1533{
1534	pmap_t oldpmap;
1535	vm_offset_t va;
1536	struct ia64_lpte *pte;
1537
1538	/*
1539	 * Perform an unsynchronized read.  This is, however, safe.
1540	 */
1541	if (pmap->pm_stats.resident_count == 0)
1542		return;
1543
1544	rw_wlock(&pvh_global_lock);
1545	PMAP_LOCK(pmap);
1546	oldpmap = pmap_switch(pmap);
1547	for (va = sva; va < eva; va += PAGE_SIZE) {
1548		pte = pmap_find_vhpt(va);
1549		if (pte != NULL)
1550			pmap_remove_pte(pmap, pte, va, 0, 1);
1551	}
1552	rw_wunlock(&pvh_global_lock);
1553	pmap_switch(oldpmap);
1554	PMAP_UNLOCK(pmap);
1555}
1556
1557/*
1558 *	Routine:	pmap_remove_all
1559 *	Function:
1560 *		Removes this physical page from
1561 *		all physical maps in which it resides.
1562 *		Reflects back modify bits to the pager.
1563 *
1564 *	Notes:
1565 *		Original versions of this routine were very
1566 *		inefficient because they iteratively called
1567 *		pmap_remove (slow...)
1568 */
1569
1570void
1571pmap_remove_all(vm_page_t m)
1572{
1573	pmap_t oldpmap;
1574	pv_entry_t pv;
1575
1576	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1577	    ("pmap_remove_all: page %p is not managed", m));
1578	rw_wlock(&pvh_global_lock);
1579	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1580		struct ia64_lpte *pte;
1581		pmap_t pmap = PV_PMAP(pv);
1582		vm_offset_t va = pv->pv_va;
1583
1584		PMAP_LOCK(pmap);
1585		oldpmap = pmap_switch(pmap);
1586		pte = pmap_find_vhpt(va);
1587		KASSERT(pte != NULL, ("pte"));
1588		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1589			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1590		pmap_remove_pte(pmap, pte, va, pv, 1);
1591		pmap_switch(oldpmap);
1592		PMAP_UNLOCK(pmap);
1593	}
1594	vm_page_aflag_clear(m, PGA_WRITEABLE);
1595	rw_wunlock(&pvh_global_lock);
1596}
1597
1598/*
1599 *	Set the physical protection on the
1600 *	specified range of this map as requested.
1601 */
1602void
1603pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1604{
1605	pmap_t oldpmap;
1606	struct ia64_lpte *pte;
1607
1608	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1609		pmap_remove(pmap, sva, eva);
1610		return;
1611	}
1612
1613	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1614	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1615		return;
1616
1617	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1618		panic("pmap_protect: unaligned addresses");
1619
1620	PMAP_LOCK(pmap);
1621	oldpmap = pmap_switch(pmap);
1622	for ( ; sva < eva; sva += PAGE_SIZE) {
1623		/* If page is invalid, skip this page */
1624		pte = pmap_find_vhpt(sva);
1625		if (pte == NULL)
1626			continue;
1627
1628		/* If there's no change, skip it too */
1629		if (pmap_prot(pte) == prot)
1630			continue;
1631
1632		if ((prot & VM_PROT_WRITE) == 0 &&
1633		    pmap_managed(pte) && pmap_dirty(pte)) {
1634			vm_paddr_t pa = pmap_ppn(pte);
1635			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1636
1637			vm_page_dirty(m);
1638			pmap_clear_dirty(pte);
1639		}
1640
1641		if (prot & VM_PROT_EXECUTE)
1642			ia64_sync_icache(sva, PAGE_SIZE);
1643
1644		pmap_pte_prot(pmap, pte, prot);
1645		pmap_invalidate_page(sva);
1646	}
1647	pmap_switch(oldpmap);
1648	PMAP_UNLOCK(pmap);
1649}
1650
1651/*
1652 *	Insert the given physical page (p) at
1653 *	the specified virtual address (v) in the
1654 *	target physical map with the protection requested.
1655 *
1656 *	If specified, the page will be wired down, meaning
1657 *	that the related pte can not be reclaimed.
1658 *
1659 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1660 *	or lose information.  That is, this routine must actually
1661 *	insert this page into the given map NOW.
1662 */
1663void
1664pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1665    vm_prot_t prot, boolean_t wired)
1666{
1667	pmap_t oldpmap;
1668	vm_offset_t pa;
1669	vm_offset_t opa;
1670	struct ia64_lpte origpte;
1671	struct ia64_lpte *pte;
1672	boolean_t icache_inval, managed;
1673
1674	rw_wlock(&pvh_global_lock);
1675	PMAP_LOCK(pmap);
1676	oldpmap = pmap_switch(pmap);
1677
1678	va &= ~PAGE_MASK;
1679 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1680	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
1681	    ("pmap_enter: page %p is not busy", m));
1682
1683	/*
1684	 * Find (or create) a pte for the given mapping.
1685	 */
1686	while ((pte = pmap_find_pte(va)) == NULL) {
1687		pmap_switch(oldpmap);
1688		PMAP_UNLOCK(pmap);
1689		rw_wunlock(&pvh_global_lock);
1690		VM_WAIT;
1691		rw_wlock(&pvh_global_lock);
1692		PMAP_LOCK(pmap);
1693		oldpmap = pmap_switch(pmap);
1694	}
1695	origpte = *pte;
1696	if (!pmap_present(pte)) {
1697		opa = ~0UL;
1698		pmap_enter_vhpt(pte, va);
1699	} else
1700		opa = pmap_ppn(pte);
1701	managed = FALSE;
1702	pa = VM_PAGE_TO_PHYS(m);
1703
1704	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1705
1706	/*
1707	 * Mapping has not changed, must be protection or wiring change.
1708	 */
1709	if (opa == pa) {
1710		/*
1711		 * Wiring change, just update stats. We don't worry about
1712		 * wiring PT pages as they remain resident as long as there
1713		 * are valid mappings in them. Hence, if a user page is wired,
1714		 * the PT page will be also.
1715		 */
1716		if (wired && !pmap_wired(&origpte))
1717			pmap->pm_stats.wired_count++;
1718		else if (!wired && pmap_wired(&origpte))
1719			pmap->pm_stats.wired_count--;
1720
1721		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1722
1723		/*
1724		 * We might be turning off write access to the page,
1725		 * so we go ahead and sense modify status. Otherwise,
1726		 * we can avoid I-cache invalidation if the page
1727		 * already allowed execution.
1728		 */
1729		if (managed && pmap_dirty(&origpte))
1730			vm_page_dirty(m);
1731		else if (pmap_exec(&origpte))
1732			icache_inval = FALSE;
1733
1734		pmap_invalidate_page(va);
1735		goto validate;
1736	}
1737
1738	/*
1739	 * Mapping has changed, invalidate old range and fall
1740	 * through to handle validating new mapping.
1741	 */
1742	if (opa != ~0UL) {
1743		pmap_remove_pte(pmap, pte, va, 0, 0);
1744		pmap_enter_vhpt(pte, va);
1745	}
1746
1747	/*
1748	 * Enter on the PV list if part of our managed memory.
1749	 */
1750	if ((m->oflags & VPO_UNMANAGED) == 0) {
1751		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1752		    ("pmap_enter: managed mapping within the clean submap"));
1753		pmap_insert_entry(pmap, va, m);
1754		managed = TRUE;
1755	}
1756
1757	/*
1758	 * Increment counters
1759	 */
1760	pmap->pm_stats.resident_count++;
1761	if (wired)
1762		pmap->pm_stats.wired_count++;
1763
1764validate:
1765
1766	/*
1767	 * Now validate mapping with desired protection/wiring. This
1768	 * adds the pte to the VHPT if necessary.
1769	 */
1770	pmap_pte_prot(pmap, pte, prot);
1771	pmap_pte_attr(pte, m->md.memattr);
1772	pmap_set_pte(pte, va, pa, wired, managed);
1773
1774	/* Invalidate the I-cache when needed. */
1775	if (icache_inval)
1776		ia64_sync_icache(va, PAGE_SIZE);
1777
1778	if ((prot & VM_PROT_WRITE) != 0 && managed)
1779		vm_page_aflag_set(m, PGA_WRITEABLE);
1780	rw_wunlock(&pvh_global_lock);
1781	pmap_switch(oldpmap);
1782	PMAP_UNLOCK(pmap);
1783}
1784
1785/*
1786 * Maps a sequence of resident pages belonging to the same object.
1787 * The sequence begins with the given page m_start.  This page is
1788 * mapped at the given virtual address start.  Each subsequent page is
1789 * mapped at a virtual address that is offset from start by the same
1790 * amount as the page is offset from m_start within the object.  The
1791 * last page in the sequence is the page with the largest offset from
1792 * m_start that can be mapped at a virtual address less than the given
1793 * virtual address end.  Not every virtual page between start and end
1794 * is mapped; only those for which a resident page exists with the
1795 * corresponding offset from m_start are mapped.
1796 */
1797void
1798pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1799    vm_page_t m_start, vm_prot_t prot)
1800{
1801	pmap_t oldpmap;
1802	vm_page_t m;
1803	vm_pindex_t diff, psize;
1804
1805	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1806
1807	psize = atop(end - start);
1808	m = m_start;
1809	rw_wlock(&pvh_global_lock);
1810	PMAP_LOCK(pmap);
1811	oldpmap = pmap_switch(pmap);
1812	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1813		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1814		m = TAILQ_NEXT(m, listq);
1815	}
1816	rw_wunlock(&pvh_global_lock);
1817	pmap_switch(oldpmap);
1818 	PMAP_UNLOCK(pmap);
1819}
1820
1821/*
1822 * this code makes some *MAJOR* assumptions:
1823 * 1. Current pmap & pmap exists.
1824 * 2. Not wired.
1825 * 3. Read access.
1826 * 4. No page table pages.
1827 * but is *MUCH* faster than pmap_enter...
1828 */
1829
1830void
1831pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1832{
1833	pmap_t oldpmap;
1834
1835	rw_wlock(&pvh_global_lock);
1836	PMAP_LOCK(pmap);
1837	oldpmap = pmap_switch(pmap);
1838	pmap_enter_quick_locked(pmap, va, m, prot);
1839	rw_wunlock(&pvh_global_lock);
1840	pmap_switch(oldpmap);
1841	PMAP_UNLOCK(pmap);
1842}
1843
1844static void
1845pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1846    vm_prot_t prot)
1847{
1848	struct ia64_lpte *pte;
1849	boolean_t managed;
1850
1851	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1852	    (m->oflags & VPO_UNMANAGED) != 0,
1853	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1854	rw_assert(&pvh_global_lock, RA_WLOCKED);
1855	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1856
1857	if ((pte = pmap_find_pte(va)) == NULL)
1858		return;
1859
1860	if (!pmap_present(pte)) {
1861		/* Enter on the PV list if the page is managed. */
1862		if ((m->oflags & VPO_UNMANAGED) == 0) {
1863			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1864				pmap_free_pte(pte, va);
1865				return;
1866			}
1867			managed = TRUE;
1868		} else
1869			managed = FALSE;
1870
1871		/* Increment counters. */
1872		pmap->pm_stats.resident_count++;
1873
1874		/* Initialise with R/O protection and enter into VHPT. */
1875		pmap_enter_vhpt(pte, va);
1876		pmap_pte_prot(pmap, pte,
1877		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1878		pmap_pte_attr(pte, m->md.memattr);
1879		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1880
1881		if (prot & VM_PROT_EXECUTE)
1882			ia64_sync_icache(va, PAGE_SIZE);
1883	}
1884}
1885
1886/*
1887 * pmap_object_init_pt preloads the ptes for a given object
1888 * into the specified pmap.  This eliminates the blast of soft
1889 * faults on process startup and immediately after an mmap.
1890 */
1891void
1892pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1893		    vm_object_t object, vm_pindex_t pindex,
1894		    vm_size_t size)
1895{
1896
1897	VM_OBJECT_ASSERT_WLOCKED(object);
1898	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1899	    ("pmap_object_init_pt: non-device object"));
1900}
1901
1902/*
1903 *	Routine:	pmap_change_wiring
1904 *	Function:	Change the wiring attribute for a map/virtual-address
1905 *			pair.
1906 *	In/out conditions:
1907 *			The mapping must already exist in the pmap.
1908 */
1909void
1910pmap_change_wiring(pmap, va, wired)
1911	register pmap_t pmap;
1912	vm_offset_t va;
1913	boolean_t wired;
1914{
1915	pmap_t oldpmap;
1916	struct ia64_lpte *pte;
1917
1918	PMAP_LOCK(pmap);
1919	oldpmap = pmap_switch(pmap);
1920
1921	pte = pmap_find_vhpt(va);
1922	KASSERT(pte != NULL, ("pte"));
1923	if (wired && !pmap_wired(pte)) {
1924		pmap->pm_stats.wired_count++;
1925		pmap_set_wired(pte);
1926	} else if (!wired && pmap_wired(pte)) {
1927		pmap->pm_stats.wired_count--;
1928		pmap_clear_wired(pte);
1929	}
1930
1931	pmap_switch(oldpmap);
1932	PMAP_UNLOCK(pmap);
1933}
1934
1935
1936
1937/*
1938 *	Copy the range specified by src_addr/len
1939 *	from the source map to the range dst_addr/len
1940 *	in the destination map.
1941 *
1942 *	This routine is only advisory and need not do anything.
1943 */
1944
1945void
1946pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1947	  vm_offset_t src_addr)
1948{
1949}
1950
1951
1952/*
1953 *	pmap_zero_page zeros the specified hardware page by
1954 *	mapping it into virtual memory and using bzero to clear
1955 *	its contents.
1956 */
1957
1958void
1959pmap_zero_page(vm_page_t m)
1960{
1961	void *p;
1962
1963	p = (void *)pmap_page_to_va(m);
1964	bzero(p, PAGE_SIZE);
1965}
1966
1967
1968/*
1969 *	pmap_zero_page_area zeros the specified hardware page by
1970 *	mapping it into virtual memory and using bzero to clear
1971 *	its contents.
1972 *
1973 *	off and size must reside within a single page.
1974 */
1975
1976void
1977pmap_zero_page_area(vm_page_t m, int off, int size)
1978{
1979	char *p;
1980
1981	p = (void *)pmap_page_to_va(m);
1982	bzero(p + off, size);
1983}
1984
1985
1986/*
1987 *	pmap_zero_page_idle zeros the specified hardware page by
1988 *	mapping it into virtual memory and using bzero to clear
1989 *	its contents.  This is for the vm_idlezero process.
1990 */
1991
1992void
1993pmap_zero_page_idle(vm_page_t m)
1994{
1995	void *p;
1996
1997	p = (void *)pmap_page_to_va(m);
1998	bzero(p, PAGE_SIZE);
1999}
2000
2001
2002/*
2003 *	pmap_copy_page copies the specified (machine independent)
2004 *	page by mapping the page into virtual memory and using
2005 *	bcopy to copy the page, one machine dependent page at a
2006 *	time.
2007 */
2008void
2009pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2010{
2011	void *dst, *src;
2012
2013	src = (void *)pmap_page_to_va(msrc);
2014	dst = (void *)pmap_page_to_va(mdst);
2015	bcopy(src, dst, PAGE_SIZE);
2016}
2017
2018int unmapped_buf_allowed;
2019
2020void
2021pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2022    vm_offset_t b_offset, int xfersize)
2023{
2024	void *a_cp, *b_cp;
2025	vm_offset_t a_pg_offset, b_pg_offset;
2026	int cnt;
2027
2028	while (xfersize > 0) {
2029		a_pg_offset = a_offset & PAGE_MASK;
2030		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2031		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2032		    a_pg_offset;
2033		b_pg_offset = b_offset & PAGE_MASK;
2034		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2035		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2036		    b_pg_offset;
2037		bcopy(a_cp, b_cp, cnt);
2038		a_offset += cnt;
2039		b_offset += cnt;
2040		xfersize -= cnt;
2041	}
2042}
2043
2044/*
2045 * Returns true if the pmap's pv is one of the first
2046 * 16 pvs linked to from this page.  This count may
2047 * be changed upwards or downwards in the future; it
2048 * is only necessary that true be returned for a small
2049 * subset of pmaps for proper page aging.
2050 */
2051boolean_t
2052pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2053{
2054	pv_entry_t pv;
2055	int loops = 0;
2056	boolean_t rv;
2057
2058	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2059	    ("pmap_page_exists_quick: page %p is not managed", m));
2060	rv = FALSE;
2061	rw_wlock(&pvh_global_lock);
2062	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2063		if (PV_PMAP(pv) == pmap) {
2064			rv = TRUE;
2065			break;
2066		}
2067		loops++;
2068		if (loops >= 16)
2069			break;
2070	}
2071	rw_wunlock(&pvh_global_lock);
2072	return (rv);
2073}
2074
2075/*
2076 *	pmap_page_wired_mappings:
2077 *
2078 *	Return the number of managed mappings to the given physical page
2079 *	that are wired.
2080 */
2081int
2082pmap_page_wired_mappings(vm_page_t m)
2083{
2084	struct ia64_lpte *pte;
2085	pmap_t oldpmap, pmap;
2086	pv_entry_t pv;
2087	int count;
2088
2089	count = 0;
2090	if ((m->oflags & VPO_UNMANAGED) != 0)
2091		return (count);
2092	rw_wlock(&pvh_global_lock);
2093	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2094		pmap = PV_PMAP(pv);
2095		PMAP_LOCK(pmap);
2096		oldpmap = pmap_switch(pmap);
2097		pte = pmap_find_vhpt(pv->pv_va);
2098		KASSERT(pte != NULL, ("pte"));
2099		if (pmap_wired(pte))
2100			count++;
2101		pmap_switch(oldpmap);
2102		PMAP_UNLOCK(pmap);
2103	}
2104	rw_wunlock(&pvh_global_lock);
2105	return (count);
2106}
2107
2108/*
2109 * Remove all pages from specified address space
2110 * this aids process exit speeds.  Also, this code
2111 * is special cased for current process only, but
2112 * can have the more generic (and slightly slower)
2113 * mode enabled.  This is much faster than pmap_remove
2114 * in the case of running down an entire address space.
2115 */
2116void
2117pmap_remove_pages(pmap_t pmap)
2118{
2119	struct pv_chunk *pc, *npc;
2120	struct ia64_lpte *pte;
2121	pmap_t oldpmap;
2122	pv_entry_t pv;
2123	vm_offset_t va;
2124	vm_page_t m;
2125	u_long inuse, bitmask;
2126	int allfree, bit, field, idx;
2127
2128	rw_wlock(&pvh_global_lock);
2129	PMAP_LOCK(pmap);
2130	oldpmap = pmap_switch(pmap);
2131	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2132		allfree = 1;
2133		for (field = 0; field < _NPCM; field++) {
2134			inuse = ~pc->pc_map[field] & pc_freemask[field];
2135			while (inuse != 0) {
2136				bit = ffsl(inuse) - 1;
2137				bitmask = 1UL << bit;
2138				idx = field * sizeof(inuse) * NBBY + bit;
2139				pv = &pc->pc_pventry[idx];
2140				inuse &= ~bitmask;
2141				va = pv->pv_va;
2142				pte = pmap_find_vhpt(va);
2143				KASSERT(pte != NULL, ("pte"));
2144				if (pmap_wired(pte)) {
2145					allfree = 0;
2146					continue;
2147				}
2148				pmap_remove_vhpt(va);
2149				pmap_invalidate_page(va);
2150				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2151				if (pmap_dirty(pte))
2152					vm_page_dirty(m);
2153				pmap_free_pte(pte, va);
2154				/* Mark free */
2155				PV_STAT(pv_entry_frees++);
2156				PV_STAT(pv_entry_spare++);
2157				pv_entry_count--;
2158				pc->pc_map[field] |= bitmask;
2159				pmap->pm_stats.resident_count--;
2160				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2161				if (TAILQ_EMPTY(&m->md.pv_list))
2162					vm_page_aflag_clear(m, PGA_WRITEABLE);
2163			}
2164		}
2165		if (allfree) {
2166			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2167			free_pv_chunk(pc);
2168		}
2169	}
2170	pmap_switch(oldpmap);
2171	PMAP_UNLOCK(pmap);
2172	rw_wunlock(&pvh_global_lock);
2173}
2174
2175/*
2176 *	pmap_ts_referenced:
2177 *
2178 *	Return a count of reference bits for a page, clearing those bits.
2179 *	It is not necessary for every reference bit to be cleared, but it
2180 *	is necessary that 0 only be returned when there are truly no
2181 *	reference bits set.
2182 *
2183 *	XXX: The exact number of bits to check and clear is a matter that
2184 *	should be tested and standardized at some point in the future for
2185 *	optimal aging of shared pages.
2186 */
2187int
2188pmap_ts_referenced(vm_page_t m)
2189{
2190	struct ia64_lpte *pte;
2191	pmap_t oldpmap, pmap;
2192	pv_entry_t pv;
2193	int count = 0;
2194
2195	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2196	    ("pmap_ts_referenced: page %p is not managed", m));
2197	rw_wlock(&pvh_global_lock);
2198	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2199		pmap = PV_PMAP(pv);
2200		PMAP_LOCK(pmap);
2201		oldpmap = pmap_switch(pmap);
2202		pte = pmap_find_vhpt(pv->pv_va);
2203		KASSERT(pte != NULL, ("pte"));
2204		if (pmap_accessed(pte)) {
2205			count++;
2206			pmap_clear_accessed(pte);
2207			pmap_invalidate_page(pv->pv_va);
2208		}
2209		pmap_switch(oldpmap);
2210		PMAP_UNLOCK(pmap);
2211	}
2212	rw_wunlock(&pvh_global_lock);
2213	return (count);
2214}
2215
2216/*
2217 *	pmap_is_modified:
2218 *
2219 *	Return whether or not the specified physical page was modified
2220 *	in any physical maps.
2221 */
2222boolean_t
2223pmap_is_modified(vm_page_t m)
2224{
2225	struct ia64_lpte *pte;
2226	pmap_t oldpmap, pmap;
2227	pv_entry_t pv;
2228	boolean_t rv;
2229
2230	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2231	    ("pmap_is_modified: page %p is not managed", m));
2232	rv = FALSE;
2233
2234	/*
2235	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2236	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2237	 * is clear, no PTEs can be dirty.
2238	 */
2239	VM_OBJECT_ASSERT_WLOCKED(m->object);
2240	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2241		return (rv);
2242	rw_wlock(&pvh_global_lock);
2243	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2244		pmap = PV_PMAP(pv);
2245		PMAP_LOCK(pmap);
2246		oldpmap = pmap_switch(pmap);
2247		pte = pmap_find_vhpt(pv->pv_va);
2248		pmap_switch(oldpmap);
2249		KASSERT(pte != NULL, ("pte"));
2250		rv = pmap_dirty(pte) ? TRUE : FALSE;
2251		PMAP_UNLOCK(pmap);
2252		if (rv)
2253			break;
2254	}
2255	rw_wunlock(&pvh_global_lock);
2256	return (rv);
2257}
2258
2259/*
2260 *	pmap_is_prefaultable:
2261 *
2262 *	Return whether or not the specified virtual address is elgible
2263 *	for prefault.
2264 */
2265boolean_t
2266pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2267{
2268	struct ia64_lpte *pte;
2269
2270	pte = pmap_find_vhpt(addr);
2271	if (pte != NULL && pmap_present(pte))
2272		return (FALSE);
2273	return (TRUE);
2274}
2275
2276/*
2277 *	pmap_is_referenced:
2278 *
2279 *	Return whether or not the specified physical page was referenced
2280 *	in any physical maps.
2281 */
2282boolean_t
2283pmap_is_referenced(vm_page_t m)
2284{
2285	struct ia64_lpte *pte;
2286	pmap_t oldpmap, pmap;
2287	pv_entry_t pv;
2288	boolean_t rv;
2289
2290	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2291	    ("pmap_is_referenced: page %p is not managed", m));
2292	rv = FALSE;
2293	rw_wlock(&pvh_global_lock);
2294	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2295		pmap = PV_PMAP(pv);
2296		PMAP_LOCK(pmap);
2297		oldpmap = pmap_switch(pmap);
2298		pte = pmap_find_vhpt(pv->pv_va);
2299		pmap_switch(oldpmap);
2300		KASSERT(pte != NULL, ("pte"));
2301		rv = pmap_accessed(pte) ? TRUE : FALSE;
2302		PMAP_UNLOCK(pmap);
2303		if (rv)
2304			break;
2305	}
2306	rw_wunlock(&pvh_global_lock);
2307	return (rv);
2308}
2309
2310/*
2311 *	Apply the given advice to the specified range of addresses within the
2312 *	given pmap.  Depending on the advice, clear the referenced and/or
2313 *	modified flags in each mapping and set the mapped page's dirty field.
2314 */
2315void
2316pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2317{
2318	struct ia64_lpte *pte;
2319	pmap_t oldpmap;
2320	vm_page_t m;
2321
2322	PMAP_LOCK(pmap);
2323	oldpmap = pmap_switch(pmap);
2324	for (; sva < eva; sva += PAGE_SIZE) {
2325		/* If page is invalid, skip this page. */
2326		pte = pmap_find_vhpt(sva);
2327		if (pte == NULL)
2328			continue;
2329
2330		/* If it isn't managed, skip it too. */
2331		if (!pmap_managed(pte))
2332			continue;
2333
2334		/* Clear its modified and referenced bits. */
2335		if (pmap_dirty(pte)) {
2336			if (advice == MADV_DONTNEED) {
2337				/*
2338				 * Future calls to pmap_is_modified() can be
2339				 * avoided by making the page dirty now.
2340				 */
2341				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2342				vm_page_dirty(m);
2343			}
2344			pmap_clear_dirty(pte);
2345		} else if (!pmap_accessed(pte))
2346			continue;
2347		pmap_clear_accessed(pte);
2348		pmap_invalidate_page(sva);
2349	}
2350	pmap_switch(oldpmap);
2351	PMAP_UNLOCK(pmap);
2352}
2353
2354/*
2355 *	Clear the modify bits on the specified physical page.
2356 */
2357void
2358pmap_clear_modify(vm_page_t m)
2359{
2360	struct ia64_lpte *pte;
2361	pmap_t oldpmap, pmap;
2362	pv_entry_t pv;
2363
2364	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2365	    ("pmap_clear_modify: page %p is not managed", m));
2366	VM_OBJECT_ASSERT_WLOCKED(m->object);
2367	KASSERT(!vm_page_xbusied(m),
2368	    ("pmap_clear_modify: page %p is exclusive busied", m));
2369
2370	/*
2371	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2372	 * If the object containing the page is locked and the page is not
2373	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2374	 */
2375	if ((m->aflags & PGA_WRITEABLE) == 0)
2376		return;
2377	rw_wlock(&pvh_global_lock);
2378	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2379		pmap = PV_PMAP(pv);
2380		PMAP_LOCK(pmap);
2381		oldpmap = pmap_switch(pmap);
2382		pte = pmap_find_vhpt(pv->pv_va);
2383		KASSERT(pte != NULL, ("pte"));
2384		if (pmap_dirty(pte)) {
2385			pmap_clear_dirty(pte);
2386			pmap_invalidate_page(pv->pv_va);
2387		}
2388		pmap_switch(oldpmap);
2389		PMAP_UNLOCK(pmap);
2390	}
2391	rw_wunlock(&pvh_global_lock);
2392}
2393
2394/*
2395 * Clear the write and modified bits in each of the given page's mappings.
2396 */
2397void
2398pmap_remove_write(vm_page_t m)
2399{
2400	struct ia64_lpte *pte;
2401	pmap_t oldpmap, pmap;
2402	pv_entry_t pv;
2403	vm_prot_t prot;
2404
2405	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2406	    ("pmap_remove_write: page %p is not managed", m));
2407
2408	/*
2409	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2410	 * set by another thread while the object is locked.  Thus,
2411	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2412	 */
2413	VM_OBJECT_ASSERT_WLOCKED(m->object);
2414	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2415		return;
2416	rw_wlock(&pvh_global_lock);
2417	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2418		pmap = PV_PMAP(pv);
2419		PMAP_LOCK(pmap);
2420		oldpmap = pmap_switch(pmap);
2421		pte = pmap_find_vhpt(pv->pv_va);
2422		KASSERT(pte != NULL, ("pte"));
2423		prot = pmap_prot(pte);
2424		if ((prot & VM_PROT_WRITE) != 0) {
2425			if (pmap_dirty(pte)) {
2426				vm_page_dirty(m);
2427				pmap_clear_dirty(pte);
2428			}
2429			prot &= ~VM_PROT_WRITE;
2430			pmap_pte_prot(pmap, pte, prot);
2431			pmap_pte_attr(pte, m->md.memattr);
2432			pmap_invalidate_page(pv->pv_va);
2433		}
2434		pmap_switch(oldpmap);
2435		PMAP_UNLOCK(pmap);
2436	}
2437	vm_page_aflag_clear(m, PGA_WRITEABLE);
2438	rw_wunlock(&pvh_global_lock);
2439}
2440
2441/*
2442 * Map a set of physical memory pages into the kernel virtual
2443 * address space. Return a pointer to where it is mapped. This
2444 * routine is intended to be used for mapping device memory,
2445 * NOT real memory.
2446 */
2447void *
2448pmap_mapdev(vm_paddr_t pa, vm_size_t sz)
2449{
2450	static void *last_va = NULL;
2451	static vm_paddr_t last_pa = 0;
2452	static vm_size_t last_sz = 0;
2453	struct efi_md *md;
2454	vm_offset_t va;
2455
2456	if (pa == last_pa && sz == last_sz)
2457		return (last_va);
2458
2459	md = efi_md_find(pa);
2460	if (md == NULL) {
2461		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2462		    __func__, pa, pa + sz - 1);
2463		return ((void *)IA64_PHYS_TO_RR6(pa));
2464	}
2465
2466	if (md->md_type == EFI_MD_TYPE_FREE) {
2467		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2468		    pa + sz - 1);
2469                return (NULL);
2470	}
2471
2472	va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2473	    IA64_PHYS_TO_RR6(pa);
2474
2475	last_va = (void *)va;
2476	last_pa = pa;
2477	last_sz = sz;
2478	return (last_va);
2479}
2480
2481/*
2482 * 'Unmap' a range mapped by pmap_mapdev().
2483 */
2484void
2485pmap_unmapdev(vm_offset_t va, vm_size_t size)
2486{
2487}
2488
2489/*
2490 * Sets the memory attribute for the specified page.
2491 */
2492static void
2493pmap_page_set_memattr_1(void *arg)
2494{
2495	struct ia64_pal_result res;
2496	register_t is;
2497	uintptr_t pp = (uintptr_t)arg;
2498
2499	is = intr_disable();
2500	res = ia64_call_pal_static(pp, 0, 0, 0);
2501	intr_restore(is);
2502}
2503
2504void
2505pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2506{
2507	struct ia64_lpte *pte;
2508	pmap_t oldpmap, pmap;
2509	pv_entry_t pv;
2510	void *va;
2511
2512	rw_wlock(&pvh_global_lock);
2513	m->md.memattr = ma;
2514	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2515		pmap = PV_PMAP(pv);
2516		PMAP_LOCK(pmap);
2517		oldpmap = pmap_switch(pmap);
2518		pte = pmap_find_vhpt(pv->pv_va);
2519		KASSERT(pte != NULL, ("pte"));
2520		pmap_pte_attr(pte, ma);
2521		pmap_invalidate_page(pv->pv_va);
2522		pmap_switch(oldpmap);
2523		PMAP_UNLOCK(pmap);
2524	}
2525	rw_wunlock(&pvh_global_lock);
2526
2527	if (ma == VM_MEMATTR_UNCACHEABLE) {
2528#ifdef SMP
2529		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2530		    (void *)PAL_PREFETCH_VISIBILITY);
2531#else
2532		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2533#endif
2534		va = (void *)pmap_page_to_va(m);
2535		critical_enter();
2536		cpu_flush_dcache(va, PAGE_SIZE);
2537		critical_exit();
2538#ifdef SMP
2539		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2540		    (void *)PAL_MC_DRAIN);
2541#else
2542		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2543#endif
2544	}
2545}
2546
2547/*
2548 * perform the pmap work for mincore
2549 */
2550int
2551pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2552{
2553	pmap_t oldpmap;
2554	struct ia64_lpte *pte, tpte;
2555	vm_paddr_t pa;
2556	int val;
2557
2558	PMAP_LOCK(pmap);
2559retry:
2560	oldpmap = pmap_switch(pmap);
2561	pte = pmap_find_vhpt(addr);
2562	if (pte != NULL) {
2563		tpte = *pte;
2564		pte = &tpte;
2565	}
2566	pmap_switch(oldpmap);
2567	if (pte == NULL || !pmap_present(pte)) {
2568		val = 0;
2569		goto out;
2570	}
2571	val = MINCORE_INCORE;
2572	if (pmap_dirty(pte))
2573		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2574	if (pmap_accessed(pte))
2575		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2576	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2577	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2578	    pmap_managed(pte)) {
2579		pa = pmap_ppn(pte);
2580		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2581		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2582			goto retry;
2583	} else
2584out:
2585		PA_UNLOCK_COND(*locked_pa);
2586	PMAP_UNLOCK(pmap);
2587	return (val);
2588}
2589
2590void
2591pmap_activate(struct thread *td)
2592{
2593	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2594}
2595
2596pmap_t
2597pmap_switch(pmap_t pm)
2598{
2599	pmap_t prevpm;
2600	int i;
2601
2602	critical_enter();
2603	prevpm = PCPU_GET(md.current_pmap);
2604	if (prevpm == pm)
2605		goto out;
2606	if (pm == NULL) {
2607		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2608			ia64_set_rr(IA64_RR_BASE(i),
2609			    (i << 8)|(PAGE_SHIFT << 2)|1);
2610		}
2611	} else {
2612		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2613			ia64_set_rr(IA64_RR_BASE(i),
2614			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2615		}
2616	}
2617	PCPU_SET(md.current_pmap, pm);
2618	ia64_srlz_d();
2619
2620out:
2621	critical_exit();
2622	return (prevpm);
2623}
2624
2625void
2626pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2627{
2628	pmap_t oldpm;
2629	struct ia64_lpte *pte;
2630	vm_offset_t lim;
2631	vm_size_t len;
2632
2633	sz += va & 31;
2634	va &= ~31;
2635	sz = (sz + 31) & ~31;
2636
2637	PMAP_LOCK(pm);
2638	oldpm = pmap_switch(pm);
2639	while (sz > 0) {
2640		lim = round_page(va);
2641		len = MIN(lim - va, sz);
2642		pte = pmap_find_vhpt(va);
2643		if (pte != NULL && pmap_present(pte))
2644			ia64_sync_icache(va, len);
2645		va += len;
2646		sz -= len;
2647	}
2648	pmap_switch(oldpm);
2649	PMAP_UNLOCK(pm);
2650}
2651
2652/*
2653 *	Increase the starting virtual address of the given mapping if a
2654 *	different alignment might result in more superpage mappings.
2655 */
2656void
2657pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2658    vm_offset_t *addr, vm_size_t size)
2659{
2660}
2661
2662#include "opt_ddb.h"
2663
2664#ifdef DDB
2665
2666#include <ddb/ddb.h>
2667
2668static const char*	psnames[] = {
2669	"1B",	"2B",	"4B",	"8B",
2670	"16B",	"32B",	"64B",	"128B",
2671	"256B",	"512B",	"1K",	"2K",
2672	"4K",	"8K",	"16K",	"32K",
2673	"64K",	"128K",	"256K",	"512K",
2674	"1M",	"2M",	"4M",	"8M",
2675	"16M",	"32M",	"64M",	"128M",
2676	"256M",	"512M",	"1G",	"2G"
2677};
2678
2679static void
2680print_trs(int type)
2681{
2682	struct ia64_pal_result res;
2683	int i, maxtr;
2684	struct {
2685		pt_entry_t	pte;
2686		uint64_t	itir;
2687		uint64_t	ifa;
2688		struct ia64_rr	rr;
2689	} buf;
2690	static const char *manames[] = {
2691		"WB",	"bad",	"bad",	"bad",
2692		"UC",	"UCE",	"WC",	"NaT",
2693	};
2694
2695	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2696	if (res.pal_status != 0) {
2697		db_printf("Can't get VM summary\n");
2698		return;
2699	}
2700
2701	if (type == 0)
2702		maxtr = (res.pal_result[0] >> 40) & 0xff;
2703	else
2704		maxtr = (res.pal_result[0] >> 32) & 0xff;
2705
2706	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2707	for (i = 0; i <= maxtr; i++) {
2708		bzero(&buf, sizeof(buf));
2709		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2710		    ia64_tpa((uint64_t)&buf));
2711		if (!(res.pal_result[0] & 1))
2712			buf.pte &= ~PTE_AR_MASK;
2713		if (!(res.pal_result[0] & 2))
2714			buf.pte &= ~PTE_PL_MASK;
2715		if (!(res.pal_result[0] & 4))
2716			pmap_clear_dirty(&buf);
2717		if (!(res.pal_result[0] & 8))
2718			buf.pte &= ~PTE_MA_MASK;
2719		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2720		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2721		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2722		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2723		    (buf.pte & PTE_ED) ? 1 : 0,
2724		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2725		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2726		    (pmap_dirty(&buf)) ? 1 : 0,
2727		    (pmap_accessed(&buf)) ? 1 : 0,
2728		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2729		    (pmap_present(&buf)) ? 1 : 0,
2730		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2731	}
2732}
2733
2734DB_COMMAND(itr, db_itr)
2735{
2736	print_trs(0);
2737}
2738
2739DB_COMMAND(dtr, db_dtr)
2740{
2741	print_trs(1);
2742}
2743
2744DB_COMMAND(rr, db_rr)
2745{
2746	int i;
2747	uint64_t t;
2748	struct ia64_rr rr;
2749
2750	printf("RR RID    PgSz VE\n");
2751	for (i = 0; i < 8; i++) {
2752		__asm __volatile ("mov %0=rr[%1]"
2753				  : "=r"(t)
2754				  : "r"(IA64_RR_BASE(i)));
2755		*(uint64_t *) &rr = t;
2756		printf("%d  %06x %4s %d\n",
2757		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2758	}
2759}
2760
2761DB_COMMAND(thash, db_thash)
2762{
2763	if (!have_addr)
2764		return;
2765
2766	db_printf("%p\n", (void *) ia64_thash(addr));
2767}
2768
2769DB_COMMAND(ttag, db_ttag)
2770{
2771	if (!have_addr)
2772		return;
2773
2774	db_printf("0x%lx\n", ia64_ttag(addr));
2775}
2776
2777DB_COMMAND(kpte, db_kpte)
2778{
2779	struct ia64_lpte *pte;
2780
2781	if (!have_addr) {
2782		db_printf("usage: kpte <kva>\n");
2783		return;
2784	}
2785	if (addr < VM_INIT_KERNEL_ADDRESS) {
2786		db_printf("kpte: error: invalid <kva>\n");
2787		return;
2788	}
2789	pte = pmap_find_kpte(addr);
2790	db_printf("kpte at %p:\n", pte);
2791	db_printf("  pte  =%016lx\n", pte->pte);
2792	db_printf("  itir =%016lx\n", pte->itir);
2793	db_printf("  tag  =%016lx\n", pte->tag);
2794	db_printf("  chain=%016lx\n", pte->chain);
2795}
2796
2797#endif
2798