pmap.c revision 262002
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/pmap.c 262002 2014-02-16 22:12:13Z marcel $");
50
51#include "opt_pmap.h"
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/lock.h>
56#include <sys/mman.h>
57#include <sys/mutex.h>
58#include <sys/proc.h>
59#include <sys/rwlock.h>
60#include <sys/smp.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_page.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_pageout.h>
70#include <vm/uma.h>
71
72#include <machine/bootinfo.h>
73#include <machine/efi.h>
74#include <machine/md_var.h>
75#include <machine/pal.h>
76
77/*
78 *	Manages physical address maps.
79 *
80 *	Since the information managed by this module is
81 *	also stored by the logical address mapping module,
82 *	this module may throw away valid virtual-to-physical
83 *	mappings at almost any time.  However, invalidations
84 *	of virtual-to-physical mappings must be done as
85 *	requested.
86 *
87 *	In order to cope with hardware architectures which
88 *	make virtual-to-physical map invalidates expensive,
89 *	this module may delay invalidate or reduced protection
90 *	operations until such time as they are actually
91 *	necessary.  This module is given full information as
92 *	to which processors are currently using which maps,
93 *	and to when physical maps must be made correct.
94 */
95
96/*
97 * Following the Linux model, region IDs are allocated in groups of
98 * eight so that a single region ID can be used for as many RRs as we
99 * want by encoding the RR number into the low bits of the ID.
100 *
101 * We reserve region ID 0 for the kernel and allocate the remaining
102 * IDs for user pmaps.
103 *
104 * Region 0-3:	User virtually mapped
105 * Region 4:	PBVM and special mappings
106 * Region 5:	Kernel virtual memory
107 * Region 6:	Direct-mapped uncacheable
108 * Region 7:	Direct-mapped cacheable
109 */
110
111/* XXX move to a header. */
112extern uint64_t ia64_gateway_page[];
113
114#if !defined(DIAGNOSTIC)
115#define PMAP_INLINE __inline
116#else
117#define PMAP_INLINE
118#endif
119
120#ifdef PV_STATS
121#define PV_STAT(x)	do { x ; } while (0)
122#else
123#define PV_STAT(x)	do { } while (0)
124#endif
125
126#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
127#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
128#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
129#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
130#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
131#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
132#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
133#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
134
135#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
136#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
137#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
138#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
139
140#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
141
142/*
143 * Individual PV entries are stored in per-pmap chunks.  This saves
144 * space by eliminating the need to record the pmap within every PV
145 * entry.
146 */
147#if PAGE_SIZE == 8192
148#define	_NPCM	6
149#define	_NPCPV	337
150#define	_NPCS	2
151#elif PAGE_SIZE == 16384
152#define	_NPCM	11
153#define	_NPCPV	677
154#define	_NPCS	1
155#endif
156struct pv_chunk {
157	pmap_t			pc_pmap;
158	TAILQ_ENTRY(pv_chunk)	pc_list;
159	u_long			pc_map[_NPCM];	/* bitmap; 1 = free */
160	TAILQ_ENTRY(pv_chunk)	pc_lru;
161	u_long			pc_spare[_NPCS];
162	struct pv_entry		pc_pventry[_NPCPV];
163};
164
165/*
166 * The VHPT bucket head structure.
167 */
168struct ia64_bucket {
169	uint64_t	chain;
170	struct mtx	mutex;
171	u_int		length;
172};
173
174/*
175 * Statically allocated kernel pmap
176 */
177struct pmap kernel_pmap_store;
178
179vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
180vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
181
182/*
183 * Kernel virtual memory management.
184 */
185static int nkpt;
186extern struct ia64_lpte ***ia64_kptdir;
187
188#define KPTE_DIR0_INDEX(va) \
189	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
190#define KPTE_DIR1_INDEX(va) \
191	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
192#define KPTE_PTE_INDEX(va) \
193	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
194#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
195
196vm_offset_t kernel_vm_end;
197
198/* Values for ptc.e. XXX values for SKI. */
199static uint64_t pmap_ptc_e_base = 0x100000000;
200static uint64_t pmap_ptc_e_count1 = 3;
201static uint64_t pmap_ptc_e_count2 = 2;
202static uint64_t pmap_ptc_e_stride1 = 0x2000;
203static uint64_t pmap_ptc_e_stride2 = 0x100000000;
204
205struct mtx pmap_ptc_mutex;
206
207/*
208 * Data for the RID allocator
209 */
210static int pmap_ridcount;
211static int pmap_rididx;
212static int pmap_ridmapsz;
213static int pmap_ridmax;
214static uint64_t *pmap_ridmap;
215struct mtx pmap_ridmutex;
216
217static struct rwlock_padalign pvh_global_lock;
218
219/*
220 * Data for the pv entry allocation mechanism
221 */
222static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
223static int pv_entry_count;
224
225/*
226 * Data for allocating PTEs for user processes.
227 */
228static uma_zone_t ptezone;
229
230/*
231 * Virtual Hash Page Table (VHPT) data.
232 */
233/* SYSCTL_DECL(_machdep); */
234static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
235
236struct ia64_bucket *pmap_vhpt_bucket;
237
238int pmap_vhpt_nbuckets;
239SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
240    &pmap_vhpt_nbuckets, 0, "");
241
242int pmap_vhpt_log2size = 0;
243TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
244SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
245    &pmap_vhpt_log2size, 0, "");
246
247static int pmap_vhpt_inserts;
248SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
249    &pmap_vhpt_inserts, 0, "");
250
251static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
252SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
253    NULL, 0, pmap_vhpt_population, "I", "");
254
255static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
256
257static void free_pv_chunk(struct pv_chunk *pc);
258static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
259static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
260static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
261
262static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
263		    vm_page_t m, vm_prot_t prot);
264static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
265static void	pmap_invalidate_all(void);
266static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
267		    vm_offset_t va, pv_entry_t pv, int freepte);
268static int	pmap_remove_vhpt(vm_offset_t va);
269static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
270		    vm_page_t m);
271
272static void
273pmap_initialize_vhpt(vm_offset_t vhpt)
274{
275	struct ia64_lpte *pte;
276	u_int i;
277
278	pte = (struct ia64_lpte *)vhpt;
279	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
280		pte[i].pte = 0;
281		pte[i].itir = 0;
282		pte[i].tag = 1UL << 63; /* Invalid tag */
283		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
284	}
285}
286
287#ifdef SMP
288vm_offset_t
289pmap_alloc_vhpt(void)
290{
291	vm_offset_t vhpt;
292	vm_page_t m;
293	vm_size_t size;
294
295	size = 1UL << pmap_vhpt_log2size;
296	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
297	    VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
298	    VM_MEMATTR_DEFAULT);
299	if (m != NULL) {
300		vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
301		pmap_initialize_vhpt(vhpt);
302		return (vhpt);
303	}
304	return (0);
305}
306#endif
307
308/*
309 *	Bootstrap the system enough to run with virtual memory.
310 */
311void
312pmap_bootstrap()
313{
314	struct ia64_pal_result res;
315	vm_offset_t base;
316	size_t size;
317	int i, ridbits;
318
319	/*
320	 * Query the PAL Code to find the loop parameters for the
321	 * ptc.e instruction.
322	 */
323	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
324	if (res.pal_status != 0)
325		panic("Can't configure ptc.e parameters");
326	pmap_ptc_e_base = res.pal_result[0];
327	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
328	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
329	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
330	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
331	if (bootverbose)
332		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
333		       "stride1=0x%lx, stride2=0x%lx\n",
334		       pmap_ptc_e_base,
335		       pmap_ptc_e_count1,
336		       pmap_ptc_e_count2,
337		       pmap_ptc_e_stride1,
338		       pmap_ptc_e_stride2);
339
340	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
341
342	/*
343	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
344	 *
345	 * We currently need at least 19 bits in the RID because PID_MAX
346	 * can only be encoded in 17 bits and we need RIDs for 4 regions
347	 * per process. With PID_MAX equalling 99999 this means that we
348	 * need to be able to encode 399996 (=4*PID_MAX).
349	 * The Itanium processor only has 18 bits and the architected
350	 * minimum is exactly that. So, we cannot use a PID based scheme
351	 * in those cases. Enter pmap_ridmap...
352	 * We should avoid the map when running on a processor that has
353	 * implemented enough bits. This means that we should pass the
354	 * process/thread ID to pmap. This we currently don't do, so we
355	 * use the map anyway. However, we don't want to allocate a map
356	 * that is large enough to cover the range dictated by the number
357	 * of bits in the RID, because that may result in a RID map of
358	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
359	 * The bottomline: we create a 32KB map when the processor only
360	 * implements 18 bits (or when we can't figure it out). Otherwise
361	 * we create a 64KB map.
362	 */
363	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
364	if (res.pal_status != 0) {
365		if (bootverbose)
366			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
367		ridbits = 18; /* guaranteed minimum */
368	} else {
369		ridbits = (res.pal_result[1] >> 8) & 0xff;
370		if (bootverbose)
371			printf("Processor supports %d Region ID bits\n",
372			    ridbits);
373	}
374	if (ridbits > 19)
375		ridbits = 19;
376
377	pmap_ridmax = (1 << ridbits);
378	pmap_ridmapsz = pmap_ridmax / 64;
379	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
380	pmap_ridmap[0] |= 0xff;
381	pmap_rididx = 0;
382	pmap_ridcount = 8;
383	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
384
385	/*
386	 * Allocate some memory for initial kernel 'page tables'.
387	 */
388	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
389	nkpt = 0;
390	kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
391
392	/*
393	 * Determine a valid (mappable) VHPT size.
394	 */
395	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
396	if (pmap_vhpt_log2size == 0)
397		pmap_vhpt_log2size = 20;
398	else if (pmap_vhpt_log2size < 16)
399		pmap_vhpt_log2size = 16;
400	else if (pmap_vhpt_log2size > 28)
401		pmap_vhpt_log2size = 28;
402	if (pmap_vhpt_log2size & 1)
403		pmap_vhpt_log2size--;
404
405	size = 1UL << pmap_vhpt_log2size;
406	base = (uintptr_t)ia64_physmem_alloc(size, size);
407	if (base == 0)
408		panic("Unable to allocate VHPT");
409
410	PCPU_SET(md.vhpt, base);
411	if (bootverbose)
412		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
413
414	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
415	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
416	    sizeof(struct ia64_bucket), PAGE_SIZE);
417	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
418		/* Stolen memory is zeroed. */
419		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
420		    MTX_NOWITNESS | MTX_SPIN);
421	}
422
423	pmap_initialize_vhpt(base);
424	map_vhpt(base);
425	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
426	ia64_srlz_i();
427
428	virtual_avail = VM_INIT_KERNEL_ADDRESS;
429	virtual_end = VM_MAX_KERNEL_ADDRESS;
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	PMAP_LOCK_INIT(kernel_pmap);
435	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
436		kernel_pmap->pm_rid[i] = 0;
437	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
438	PCPU_SET(md.current_pmap, kernel_pmap);
439
440 	/*
441	 * Initialize the global pv list lock.
442	 */
443	rw_init(&pvh_global_lock, "pmap pv global");
444
445	/* Region 5 is mapped via the VHPT. */
446	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
447
448	/*
449	 * Clear out any random TLB entries left over from booting.
450	 */
451	pmap_invalidate_all();
452
453	map_gateway_page();
454}
455
456static int
457pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
458{
459	int count, error, i;
460
461	count = 0;
462	for (i = 0; i < pmap_vhpt_nbuckets; i++)
463		count += pmap_vhpt_bucket[i].length;
464
465	error = SYSCTL_OUT(req, &count, sizeof(count));
466	return (error);
467}
468
469vm_offset_t
470pmap_page_to_va(vm_page_t m)
471{
472	vm_paddr_t pa;
473	vm_offset_t va;
474
475	pa = VM_PAGE_TO_PHYS(m);
476	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
477	    IA64_PHYS_TO_RR7(pa);
478	return (va);
479}
480
481/*
482 *	Initialize a vm_page's machine-dependent fields.
483 */
484void
485pmap_page_init(vm_page_t m)
486{
487
488	TAILQ_INIT(&m->md.pv_list);
489	m->md.memattr = VM_MEMATTR_DEFAULT;
490}
491
492/*
493 *	Initialize the pmap module.
494 *	Called by vm_init, to initialize any structures that the pmap
495 *	system needs to map virtual memory.
496 */
497void
498pmap_init(void)
499{
500
501	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
502	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
503}
504
505
506/***************************************************
507 * Manipulate TLBs for a pmap
508 ***************************************************/
509
510static void
511pmap_invalidate_page(vm_offset_t va)
512{
513	struct ia64_lpte *pte;
514	struct pcpu *pc;
515	uint64_t tag;
516	u_int vhpt_ofs;
517
518	critical_enter();
519
520	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
521	tag = ia64_ttag(va);
522	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
523		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
524		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
525	}
526
527	mtx_lock_spin(&pmap_ptc_mutex);
528
529	ia64_ptc_ga(va, PAGE_SHIFT << 2);
530	ia64_mf();
531	ia64_srlz_i();
532
533	mtx_unlock_spin(&pmap_ptc_mutex);
534
535	ia64_invala();
536
537	critical_exit();
538}
539
540static void
541pmap_invalidate_all_1(void *arg)
542{
543	uint64_t addr;
544	int i, j;
545
546	critical_enter();
547	addr = pmap_ptc_e_base;
548	for (i = 0; i < pmap_ptc_e_count1; i++) {
549		for (j = 0; j < pmap_ptc_e_count2; j++) {
550			ia64_ptc_e(addr);
551			addr += pmap_ptc_e_stride2;
552		}
553		addr += pmap_ptc_e_stride1;
554	}
555	critical_exit();
556}
557
558static void
559pmap_invalidate_all(void)
560{
561
562#ifdef SMP
563	if (mp_ncpus > 1) {
564		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
565		return;
566	}
567#endif
568	pmap_invalidate_all_1(NULL);
569}
570
571static uint32_t
572pmap_allocate_rid(void)
573{
574	uint64_t bit, bits;
575	int rid;
576
577	mtx_lock(&pmap_ridmutex);
578	if (pmap_ridcount == pmap_ridmax)
579		panic("pmap_allocate_rid: All Region IDs used");
580
581	/* Find an index with a free bit. */
582	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
583		pmap_rididx++;
584		if (pmap_rididx == pmap_ridmapsz)
585			pmap_rididx = 0;
586	}
587	rid = pmap_rididx * 64;
588
589	/* Find a free bit. */
590	bit = 1UL;
591	while (bits & bit) {
592		rid++;
593		bit <<= 1;
594	}
595
596	pmap_ridmap[pmap_rididx] |= bit;
597	pmap_ridcount++;
598	mtx_unlock(&pmap_ridmutex);
599
600	return rid;
601}
602
603static void
604pmap_free_rid(uint32_t rid)
605{
606	uint64_t bit;
607	int idx;
608
609	idx = rid / 64;
610	bit = ~(1UL << (rid & 63));
611
612	mtx_lock(&pmap_ridmutex);
613	pmap_ridmap[idx] &= bit;
614	pmap_ridcount--;
615	mtx_unlock(&pmap_ridmutex);
616}
617
618/***************************************************
619 * Page table page management routines.....
620 ***************************************************/
621
622void
623pmap_pinit0(struct pmap *pmap)
624{
625
626	PMAP_LOCK_INIT(pmap);
627	/* kernel_pmap is the same as any other pmap. */
628	pmap_pinit(pmap);
629}
630
631/*
632 * Initialize a preallocated and zeroed pmap structure,
633 * such as one in a vmspace structure.
634 */
635int
636pmap_pinit(struct pmap *pmap)
637{
638	int i;
639
640	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
641		pmap->pm_rid[i] = pmap_allocate_rid();
642	TAILQ_INIT(&pmap->pm_pvchunk);
643	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
644	return (1);
645}
646
647/***************************************************
648 * Pmap allocation/deallocation routines.
649 ***************************************************/
650
651/*
652 * Release any resources held by the given physical map.
653 * Called when a pmap initialized by pmap_pinit is being released.
654 * Should only be called if the map contains no valid mappings.
655 */
656void
657pmap_release(pmap_t pmap)
658{
659	int i;
660
661	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
662		if (pmap->pm_rid[i])
663			pmap_free_rid(pmap->pm_rid[i]);
664}
665
666/*
667 * grow the number of kernel page table entries, if needed
668 */
669void
670pmap_growkernel(vm_offset_t addr)
671{
672	struct ia64_lpte **dir1;
673	struct ia64_lpte *leaf;
674	vm_page_t nkpg;
675
676	while (kernel_vm_end <= addr) {
677		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
678			panic("%s: out of kernel address space", __func__);
679
680		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
681		if (dir1 == NULL) {
682			nkpg = vm_page_alloc(NULL, nkpt++,
683			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
684			if (!nkpg)
685				panic("%s: cannot add dir. page", __func__);
686
687			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
688			bzero(dir1, PAGE_SIZE);
689			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
690		}
691
692		nkpg = vm_page_alloc(NULL, nkpt++,
693		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
694		if (!nkpg)
695			panic("%s: cannot add PTE page", __func__);
696
697		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
698		bzero(leaf, PAGE_SIZE);
699		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
700
701		kernel_vm_end += PAGE_SIZE * NKPTEPG;
702	}
703}
704
705/***************************************************
706 * page management routines.
707 ***************************************************/
708
709CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
710
711static __inline struct pv_chunk *
712pv_to_chunk(pv_entry_t pv)
713{
714
715	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
716}
717
718#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
719
720#define	PC_FREE_FULL	0xfffffffffffffffful
721#define	PC_FREE_PARTIAL	\
722	((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
723
724#if PAGE_SIZE == 8192
725static const u_long pc_freemask[_NPCM] = {
726	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
727	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
728};
729#elif PAGE_SIZE == 16384
730static const u_long pc_freemask[_NPCM] = {
731	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
732	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
733	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
734	PC_FREE_FULL, PC_FREE_PARTIAL
735};
736#endif
737
738static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
739
740SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
741    "Current number of pv entries");
742
743#ifdef PV_STATS
744static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
745
746SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
747    "Current number of pv entry chunks");
748SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
749    "Current number of pv entry chunks allocated");
750SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
751    "Current number of pv entry chunks frees");
752SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
753    "Number of times tried to get a chunk page but failed.");
754
755static long pv_entry_frees, pv_entry_allocs;
756static int pv_entry_spare;
757
758SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
759    "Current number of pv entry frees");
760SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
761    "Current number of pv entry allocs");
762SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
763    "Current number of spare pv entries");
764#endif
765
766/*
767 * We are in a serious low memory condition.  Resort to
768 * drastic measures to free some pages so we can allocate
769 * another pv entry chunk.
770 */
771static vm_page_t
772pmap_pv_reclaim(pmap_t locked_pmap)
773{
774	struct pch newtail;
775	struct pv_chunk *pc;
776	struct ia64_lpte *pte;
777	pmap_t pmap;
778	pv_entry_t pv;
779	vm_offset_t va;
780	vm_page_t m, m_pc;
781	u_long inuse;
782	int bit, field, freed, idx;
783
784	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
785	pmap = NULL;
786	m_pc = NULL;
787	TAILQ_INIT(&newtail);
788	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
789		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
790		if (pmap != pc->pc_pmap) {
791			if (pmap != NULL) {
792				if (pmap != locked_pmap) {
793					pmap_switch(locked_pmap);
794					PMAP_UNLOCK(pmap);
795				}
796			}
797			pmap = pc->pc_pmap;
798			/* Avoid deadlock and lock recursion. */
799			if (pmap > locked_pmap)
800				PMAP_LOCK(pmap);
801			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
802				pmap = NULL;
803				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
804				continue;
805			}
806			pmap_switch(pmap);
807		}
808
809		/*
810		 * Destroy every non-wired, 8 KB page mapping in the chunk.
811		 */
812		freed = 0;
813		for (field = 0; field < _NPCM; field++) {
814			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
815			    inuse != 0; inuse &= ~(1UL << bit)) {
816				bit = ffsl(inuse) - 1;
817				idx = field * sizeof(inuse) * NBBY + bit;
818				pv = &pc->pc_pventry[idx];
819				va = pv->pv_va;
820				pte = pmap_find_vhpt(va);
821				KASSERT(pte != NULL, ("pte"));
822				if (pmap_wired(pte))
823					continue;
824				pmap_remove_vhpt(va);
825				pmap_invalidate_page(va);
826				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
827				if (pmap_accessed(pte))
828					vm_page_aflag_set(m, PGA_REFERENCED);
829				if (pmap_dirty(pte))
830					vm_page_dirty(m);
831				pmap_free_pte(pte, va);
832				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
833				if (TAILQ_EMPTY(&m->md.pv_list))
834					vm_page_aflag_clear(m, PGA_WRITEABLE);
835				pc->pc_map[field] |= 1UL << bit;
836				freed++;
837			}
838		}
839		if (freed == 0) {
840			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
841			continue;
842		}
843		/* Every freed mapping is for a 8 KB page. */
844		pmap->pm_stats.resident_count -= freed;
845		PV_STAT(pv_entry_frees += freed);
846		PV_STAT(pv_entry_spare += freed);
847		pv_entry_count -= freed;
848		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
849		for (field = 0; field < _NPCM; field++)
850			if (pc->pc_map[field] != pc_freemask[field]) {
851				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
852				    pc_list);
853				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
854
855				/*
856				 * One freed pv entry in locked_pmap is
857				 * sufficient.
858				 */
859				if (pmap == locked_pmap)
860					goto out;
861				break;
862			}
863		if (field == _NPCM) {
864			PV_STAT(pv_entry_spare -= _NPCPV);
865			PV_STAT(pc_chunk_count--);
866			PV_STAT(pc_chunk_frees++);
867			/* Entire chunk is free; return it. */
868			m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
869			break;
870		}
871	}
872out:
873	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
874	if (pmap != NULL) {
875		if (pmap != locked_pmap) {
876			pmap_switch(locked_pmap);
877			PMAP_UNLOCK(pmap);
878		}
879	}
880	return (m_pc);
881}
882
883/*
884 * free the pv_entry back to the free list
885 */
886static void
887free_pv_entry(pmap_t pmap, pv_entry_t pv)
888{
889	struct pv_chunk *pc;
890	int bit, field, idx;
891
892	rw_assert(&pvh_global_lock, RA_WLOCKED);
893	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
894	PV_STAT(pv_entry_frees++);
895	PV_STAT(pv_entry_spare++);
896	pv_entry_count--;
897	pc = pv_to_chunk(pv);
898	idx = pv - &pc->pc_pventry[0];
899	field = idx / (sizeof(u_long) * NBBY);
900	bit = idx % (sizeof(u_long) * NBBY);
901	pc->pc_map[field] |= 1ul << bit;
902	for (idx = 0; idx < _NPCM; idx++)
903		if (pc->pc_map[idx] != pc_freemask[idx]) {
904			/*
905			 * 98% of the time, pc is already at the head of the
906			 * list.  If it isn't already, move it to the head.
907			 */
908			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
909			    pc)) {
910				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
911				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
912				    pc_list);
913			}
914			return;
915		}
916	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
917	free_pv_chunk(pc);
918}
919
920static void
921free_pv_chunk(struct pv_chunk *pc)
922{
923	vm_page_t m;
924
925 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
926	PV_STAT(pv_entry_spare -= _NPCPV);
927	PV_STAT(pc_chunk_count--);
928	PV_STAT(pc_chunk_frees++);
929	/* entire chunk is free, return it */
930	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
931	vm_page_unwire(m, 0);
932	vm_page_free(m);
933}
934
935/*
936 * get a new pv_entry, allocating a block from the system
937 * when needed.
938 */
939static pv_entry_t
940get_pv_entry(pmap_t pmap, boolean_t try)
941{
942	struct pv_chunk *pc;
943	pv_entry_t pv;
944	vm_page_t m;
945	int bit, field, idx;
946
947	rw_assert(&pvh_global_lock, RA_WLOCKED);
948	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
949	PV_STAT(pv_entry_allocs++);
950	pv_entry_count++;
951retry:
952	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
953	if (pc != NULL) {
954		for (field = 0; field < _NPCM; field++) {
955			if (pc->pc_map[field]) {
956				bit = ffsl(pc->pc_map[field]) - 1;
957				break;
958			}
959		}
960		if (field < _NPCM) {
961			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
962			pv = &pc->pc_pventry[idx];
963			pc->pc_map[field] &= ~(1ul << bit);
964			/* If this was the last item, move it to tail */
965			for (field = 0; field < _NPCM; field++)
966				if (pc->pc_map[field] != 0) {
967					PV_STAT(pv_entry_spare--);
968					return (pv);	/* not full, return */
969				}
970			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
971			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
972			PV_STAT(pv_entry_spare--);
973			return (pv);
974		}
975	}
976	/* No free items, allocate another chunk */
977	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
978	    VM_ALLOC_WIRED);
979	if (m == NULL) {
980		if (try) {
981			pv_entry_count--;
982			PV_STAT(pc_chunk_tryfail++);
983			return (NULL);
984		}
985		m = pmap_pv_reclaim(pmap);
986		if (m == NULL)
987			goto retry;
988	}
989	PV_STAT(pc_chunk_count++);
990	PV_STAT(pc_chunk_allocs++);
991	pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
992	pc->pc_pmap = pmap;
993	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
994	for (field = 1; field < _NPCM; field++)
995		pc->pc_map[field] = pc_freemask[field];
996	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
997	pv = &pc->pc_pventry[0];
998	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
999	PV_STAT(pv_entry_spare += _NPCPV - 1);
1000	return (pv);
1001}
1002
1003/*
1004 * Conditionally create a pv entry.
1005 */
1006static boolean_t
1007pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1008{
1009	pv_entry_t pv;
1010
1011	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1012	rw_assert(&pvh_global_lock, RA_WLOCKED);
1013	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1014		pv->pv_va = va;
1015		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1016		return (TRUE);
1017	} else
1018		return (FALSE);
1019}
1020
1021/*
1022 * Add an ia64_lpte to the VHPT.
1023 */
1024static void
1025pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1026{
1027	struct ia64_bucket *bckt;
1028	struct ia64_lpte *vhpte;
1029	uint64_t pte_pa;
1030
1031	/* Can fault, so get it out of the way. */
1032	pte_pa = ia64_tpa((vm_offset_t)pte);
1033
1034	vhpte = (struct ia64_lpte *)ia64_thash(va);
1035	bckt = (struct ia64_bucket *)vhpte->chain;
1036
1037	mtx_lock_spin(&bckt->mutex);
1038	pte->chain = bckt->chain;
1039	ia64_mf();
1040	bckt->chain = pte_pa;
1041
1042	pmap_vhpt_inserts++;
1043	bckt->length++;
1044	mtx_unlock_spin(&bckt->mutex);
1045}
1046
1047/*
1048 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1049 * worked or an appropriate error code otherwise.
1050 */
1051static int
1052pmap_remove_vhpt(vm_offset_t va)
1053{
1054	struct ia64_bucket *bckt;
1055	struct ia64_lpte *pte;
1056	struct ia64_lpte *lpte;
1057	struct ia64_lpte *vhpte;
1058	uint64_t chain, tag;
1059
1060	tag = ia64_ttag(va);
1061	vhpte = (struct ia64_lpte *)ia64_thash(va);
1062	bckt = (struct ia64_bucket *)vhpte->chain;
1063
1064	lpte = NULL;
1065	mtx_lock_spin(&bckt->mutex);
1066	chain = bckt->chain;
1067	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1068	while (chain != 0 && pte->tag != tag) {
1069		lpte = pte;
1070		chain = pte->chain;
1071		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1072	}
1073	if (chain == 0) {
1074		mtx_unlock_spin(&bckt->mutex);
1075		return (ENOENT);
1076	}
1077
1078	/* Snip this pv_entry out of the collision chain. */
1079	if (lpte == NULL)
1080		bckt->chain = pte->chain;
1081	else
1082		lpte->chain = pte->chain;
1083	ia64_mf();
1084
1085	bckt->length--;
1086	mtx_unlock_spin(&bckt->mutex);
1087	return (0);
1088}
1089
1090/*
1091 * Find the ia64_lpte for the given va, if any.
1092 */
1093static struct ia64_lpte *
1094pmap_find_vhpt(vm_offset_t va)
1095{
1096	struct ia64_bucket *bckt;
1097	struct ia64_lpte *pte;
1098	uint64_t chain, tag;
1099
1100	tag = ia64_ttag(va);
1101	pte = (struct ia64_lpte *)ia64_thash(va);
1102	bckt = (struct ia64_bucket *)pte->chain;
1103
1104	mtx_lock_spin(&bckt->mutex);
1105	chain = bckt->chain;
1106	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1107	while (chain != 0 && pte->tag != tag) {
1108		chain = pte->chain;
1109		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1110	}
1111	mtx_unlock_spin(&bckt->mutex);
1112	return ((chain != 0) ? pte : NULL);
1113}
1114
1115/*
1116 * Remove an entry from the list of managed mappings.
1117 */
1118static int
1119pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1120{
1121
1122	rw_assert(&pvh_global_lock, RA_WLOCKED);
1123	if (!pv) {
1124		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1125			if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1126				break;
1127		}
1128	}
1129
1130	if (pv) {
1131		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1132		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1133			vm_page_aflag_clear(m, PGA_WRITEABLE);
1134
1135		free_pv_entry(pmap, pv);
1136		return 0;
1137	} else {
1138		return ENOENT;
1139	}
1140}
1141
1142/*
1143 * Create a pv entry for page at pa for
1144 * (pmap, va).
1145 */
1146static void
1147pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1148{
1149	pv_entry_t pv;
1150
1151	rw_assert(&pvh_global_lock, RA_WLOCKED);
1152	pv = get_pv_entry(pmap, FALSE);
1153	pv->pv_va = va;
1154	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1155}
1156
1157/*
1158 *	Routine:	pmap_extract
1159 *	Function:
1160 *		Extract the physical page address associated
1161 *		with the given map/virtual_address pair.
1162 */
1163vm_paddr_t
1164pmap_extract(pmap_t pmap, vm_offset_t va)
1165{
1166	struct ia64_lpte *pte;
1167	pmap_t oldpmap;
1168	vm_paddr_t pa;
1169
1170	pa = 0;
1171	PMAP_LOCK(pmap);
1172	oldpmap = pmap_switch(pmap);
1173	pte = pmap_find_vhpt(va);
1174	if (pte != NULL && pmap_present(pte))
1175		pa = pmap_ppn(pte);
1176	pmap_switch(oldpmap);
1177	PMAP_UNLOCK(pmap);
1178	return (pa);
1179}
1180
1181/*
1182 *	Routine:	pmap_extract_and_hold
1183 *	Function:
1184 *		Atomically extract and hold the physical page
1185 *		with the given pmap and virtual address pair
1186 *		if that mapping permits the given protection.
1187 */
1188vm_page_t
1189pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1190{
1191	struct ia64_lpte *pte;
1192	pmap_t oldpmap;
1193	vm_page_t m;
1194	vm_paddr_t pa;
1195
1196	pa = 0;
1197	m = NULL;
1198	PMAP_LOCK(pmap);
1199	oldpmap = pmap_switch(pmap);
1200retry:
1201	pte = pmap_find_vhpt(va);
1202	if (pte != NULL && pmap_present(pte) &&
1203	    (pmap_prot(pte) & prot) == prot) {
1204		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1205		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1206			goto retry;
1207		vm_page_hold(m);
1208	}
1209	PA_UNLOCK_COND(pa);
1210	pmap_switch(oldpmap);
1211	PMAP_UNLOCK(pmap);
1212	return (m);
1213}
1214
1215/***************************************************
1216 * Low level mapping routines.....
1217 ***************************************************/
1218
1219/*
1220 * Find the kernel lpte for mapping the given virtual address, which
1221 * must be in the part of region 5 which we can cover with our kernel
1222 * 'page tables'.
1223 */
1224static struct ia64_lpte *
1225pmap_find_kpte(vm_offset_t va)
1226{
1227	struct ia64_lpte **dir1;
1228	struct ia64_lpte *leaf;
1229
1230	KASSERT((va >> 61) == 5,
1231		("kernel mapping 0x%lx not in region 5", va));
1232	KASSERT(va < kernel_vm_end,
1233		("kernel mapping 0x%lx out of range", va));
1234
1235	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1236	leaf = dir1[KPTE_DIR1_INDEX(va)];
1237	return (&leaf[KPTE_PTE_INDEX(va)]);
1238}
1239
1240/*
1241 * Find a pte suitable for mapping a user-space address. If one exists
1242 * in the VHPT, that one will be returned, otherwise a new pte is
1243 * allocated.
1244 */
1245static struct ia64_lpte *
1246pmap_find_pte(vm_offset_t va)
1247{
1248	struct ia64_lpte *pte;
1249
1250	if (va >= VM_MAXUSER_ADDRESS)
1251		return pmap_find_kpte(va);
1252
1253	pte = pmap_find_vhpt(va);
1254	if (pte == NULL) {
1255		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1256		pte->tag = 1UL << 63;
1257	}
1258	return (pte);
1259}
1260
1261/*
1262 * Free a pte which is now unused. This simply returns it to the zone
1263 * allocator if it is a user mapping. For kernel mappings, clear the
1264 * valid bit to make it clear that the mapping is not currently used.
1265 */
1266static void
1267pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1268{
1269	if (va < VM_MAXUSER_ADDRESS)
1270		uma_zfree(ptezone, pte);
1271	else
1272		pmap_clear_present(pte);
1273}
1274
1275static PMAP_INLINE void
1276pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1277{
1278	static long prot2ar[4] = {
1279		PTE_AR_R,		/* VM_PROT_NONE */
1280		PTE_AR_RW,		/* VM_PROT_WRITE */
1281		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1282		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1283	};
1284
1285	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1286	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1287	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1288	    ? PTE_PL_KERN : PTE_PL_USER;
1289	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1290}
1291
1292static PMAP_INLINE void
1293pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1294{
1295
1296	pte->pte &= ~PTE_MA_MASK;
1297	pte->pte |= (ma & PTE_MA_MASK);
1298}
1299
1300/*
1301 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1302 * the pte was orginally valid, then its assumed to already be in the
1303 * VHPT.
1304 * This functions does not set the protection bits.  It's expected
1305 * that those have been set correctly prior to calling this function.
1306 */
1307static void
1308pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1309    boolean_t wired, boolean_t managed)
1310{
1311
1312	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1313	    PTE_AR_MASK | PTE_ED;
1314	pte->pte |= PTE_PRESENT;
1315	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1316	pte->pte |= (wired) ? PTE_WIRED : 0;
1317	pte->pte |= pa & PTE_PPN_MASK;
1318
1319	pte->itir = PAGE_SHIFT << 2;
1320
1321	ia64_mf();
1322
1323	pte->tag = ia64_ttag(va);
1324}
1325
1326/*
1327 * Remove the (possibly managed) mapping represented by pte from the
1328 * given pmap.
1329 */
1330static int
1331pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1332		pv_entry_t pv, int freepte)
1333{
1334	int error;
1335	vm_page_t m;
1336
1337	/*
1338	 * First remove from the VHPT.
1339	 */
1340	error = pmap_remove_vhpt(va);
1341	KASSERT(error == 0, ("%s: pmap_remove_vhpt returned %d",
1342	    __func__, error));
1343
1344	pmap_invalidate_page(va);
1345
1346	if (pmap_wired(pte))
1347		pmap->pm_stats.wired_count -= 1;
1348
1349	pmap->pm_stats.resident_count -= 1;
1350	if (pmap_managed(pte)) {
1351		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1352		if (pmap_dirty(pte))
1353			vm_page_dirty(m);
1354		if (pmap_accessed(pte))
1355			vm_page_aflag_set(m, PGA_REFERENCED);
1356
1357		error = pmap_remove_entry(pmap, m, va, pv);
1358	}
1359	if (freepte)
1360		pmap_free_pte(pte, va);
1361
1362	return (error);
1363}
1364
1365/*
1366 * Extract the physical page address associated with a kernel
1367 * virtual address.
1368 */
1369vm_paddr_t
1370pmap_kextract(vm_offset_t va)
1371{
1372	struct ia64_lpte *pte;
1373	uint64_t *pbvm_pgtbl;
1374	vm_paddr_t pa;
1375	u_int idx;
1376
1377	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1378
1379	/* Regions 6 and 7 are direct mapped. */
1380	if (va >= IA64_RR_BASE(6)) {
1381		pa = IA64_RR_MASK(va);
1382		goto out;
1383	}
1384
1385	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1386	if (va >= kernel_vm_end)
1387		goto err_out;
1388	if (va >= VM_INIT_KERNEL_ADDRESS) {
1389		pte = pmap_find_kpte(va);
1390		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1391		goto out;
1392	}
1393
1394	/* The PBVM page table. */
1395	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1396		goto err_out;
1397	if (va >= IA64_PBVM_PGTBL) {
1398		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1399		goto out;
1400	}
1401
1402	/* The PBVM itself. */
1403	if (va >= IA64_PBVM_BASE) {
1404		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1405		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1406		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1407			goto err_out;
1408		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1409			goto err_out;
1410		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1411		    (va & IA64_PBVM_PAGE_MASK);
1412		goto out;
1413	}
1414
1415 err_out:
1416	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1417	pa = 0;
1418	/* FALLTHROUGH */
1419
1420 out:
1421	return (pa);
1422}
1423
1424/*
1425 * Add a list of wired pages to the kva this routine is only used for
1426 * temporary kernel mappings that do not need to have page modification
1427 * or references recorded.  Note that old mappings are simply written
1428 * over.  The page is effectively wired, but it's customary to not have
1429 * the PTE reflect that, nor update statistics.
1430 */
1431void
1432pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1433{
1434	struct ia64_lpte *pte;
1435	int i;
1436
1437	for (i = 0; i < count; i++) {
1438		pte = pmap_find_kpte(va);
1439		if (pmap_present(pte))
1440			pmap_invalidate_page(va);
1441		else
1442			pmap_enter_vhpt(pte, va);
1443		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1444		pmap_pte_attr(pte, m[i]->md.memattr);
1445		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1446		va += PAGE_SIZE;
1447	}
1448}
1449
1450/*
1451 * this routine jerks page mappings from the
1452 * kernel -- it is meant only for temporary mappings.
1453 */
1454void
1455pmap_qremove(vm_offset_t va, int count)
1456{
1457	struct ia64_lpte *pte;
1458	int i;
1459
1460	for (i = 0; i < count; i++) {
1461		pte = pmap_find_kpte(va);
1462		if (pmap_present(pte)) {
1463			pmap_remove_vhpt(va);
1464			pmap_invalidate_page(va);
1465			pmap_clear_present(pte);
1466		}
1467		va += PAGE_SIZE;
1468	}
1469}
1470
1471/*
1472 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1473 * to not have the PTE reflect that, nor update statistics.
1474 */
1475void
1476pmap_kenter(vm_offset_t va, vm_offset_t pa)
1477{
1478	struct ia64_lpte *pte;
1479
1480	pte = pmap_find_kpte(va);
1481	if (pmap_present(pte))
1482		pmap_invalidate_page(va);
1483	else
1484		pmap_enter_vhpt(pte, va);
1485	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1486	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1487	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1488}
1489
1490/*
1491 * Remove a page from the kva
1492 */
1493void
1494pmap_kremove(vm_offset_t va)
1495{
1496	struct ia64_lpte *pte;
1497
1498	pte = pmap_find_kpte(va);
1499	if (pmap_present(pte)) {
1500		pmap_remove_vhpt(va);
1501		pmap_invalidate_page(va);
1502		pmap_clear_present(pte);
1503	}
1504}
1505
1506/*
1507 *	Used to map a range of physical addresses into kernel
1508 *	virtual address space.
1509 *
1510 *	The value passed in '*virt' is a suggested virtual address for
1511 *	the mapping. Architectures which can support a direct-mapped
1512 *	physical to virtual region can return the appropriate address
1513 *	within that region, leaving '*virt' unchanged. Other
1514 *	architectures should map the pages starting at '*virt' and
1515 *	update '*virt' with the first usable address after the mapped
1516 *	region.
1517 */
1518vm_offset_t
1519pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1520{
1521	return IA64_PHYS_TO_RR7(start);
1522}
1523
1524/*
1525 *	Remove the given range of addresses from the specified map.
1526 *
1527 *	It is assumed that the start and end are properly
1528 *	rounded to the page size.
1529 *
1530 *	Sparsely used ranges are inefficiently removed.  The VHPT is
1531 *	probed for every page within the range.  XXX
1532 */
1533void
1534pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1535{
1536	pmap_t oldpmap;
1537	vm_offset_t va;
1538	struct ia64_lpte *pte;
1539
1540	/*
1541	 * Perform an unsynchronized read.  This is, however, safe.
1542	 */
1543	if (pmap->pm_stats.resident_count == 0)
1544		return;
1545
1546	rw_wlock(&pvh_global_lock);
1547	PMAP_LOCK(pmap);
1548	oldpmap = pmap_switch(pmap);
1549	for (va = sva; va < eva; va += PAGE_SIZE) {
1550		pte = pmap_find_vhpt(va);
1551		if (pte != NULL)
1552			pmap_remove_pte(pmap, pte, va, 0, 1);
1553	}
1554	rw_wunlock(&pvh_global_lock);
1555	pmap_switch(oldpmap);
1556	PMAP_UNLOCK(pmap);
1557}
1558
1559/*
1560 *	Routine:	pmap_remove_all
1561 *	Function:
1562 *		Removes this physical page from
1563 *		all physical maps in which it resides.
1564 *		Reflects back modify bits to the pager.
1565 *
1566 *	Notes:
1567 *		Original versions of this routine were very
1568 *		inefficient because they iteratively called
1569 *		pmap_remove (slow...)
1570 */
1571
1572void
1573pmap_remove_all(vm_page_t m)
1574{
1575	pmap_t oldpmap;
1576	pv_entry_t pv;
1577
1578	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1579	    ("pmap_remove_all: page %p is not managed", m));
1580	rw_wlock(&pvh_global_lock);
1581	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1582		struct ia64_lpte *pte;
1583		pmap_t pmap = PV_PMAP(pv);
1584		vm_offset_t va = pv->pv_va;
1585
1586		PMAP_LOCK(pmap);
1587		oldpmap = pmap_switch(pmap);
1588		pte = pmap_find_vhpt(va);
1589		KASSERT(pte != NULL, ("pte"));
1590		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1591			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1592		pmap_remove_pte(pmap, pte, va, pv, 1);
1593		pmap_switch(oldpmap);
1594		PMAP_UNLOCK(pmap);
1595	}
1596	vm_page_aflag_clear(m, PGA_WRITEABLE);
1597	rw_wunlock(&pvh_global_lock);
1598}
1599
1600/*
1601 *	Set the physical protection on the
1602 *	specified range of this map as requested.
1603 */
1604void
1605pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1606{
1607	pmap_t oldpmap;
1608	struct ia64_lpte *pte;
1609
1610	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1611		pmap_remove(pmap, sva, eva);
1612		return;
1613	}
1614
1615	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1616	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1617		return;
1618
1619	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1620		panic("pmap_protect: unaligned addresses");
1621
1622	PMAP_LOCK(pmap);
1623	oldpmap = pmap_switch(pmap);
1624	for ( ; sva < eva; sva += PAGE_SIZE) {
1625		/* If page is invalid, skip this page */
1626		pte = pmap_find_vhpt(sva);
1627		if (pte == NULL)
1628			continue;
1629
1630		/* If there's no change, skip it too */
1631		if (pmap_prot(pte) == prot)
1632			continue;
1633
1634		if ((prot & VM_PROT_WRITE) == 0 &&
1635		    pmap_managed(pte) && pmap_dirty(pte)) {
1636			vm_paddr_t pa = pmap_ppn(pte);
1637			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1638
1639			vm_page_dirty(m);
1640			pmap_clear_dirty(pte);
1641		}
1642
1643		if (prot & VM_PROT_EXECUTE)
1644			ia64_sync_icache(sva, PAGE_SIZE);
1645
1646		pmap_pte_prot(pmap, pte, prot);
1647		pmap_invalidate_page(sva);
1648	}
1649	pmap_switch(oldpmap);
1650	PMAP_UNLOCK(pmap);
1651}
1652
1653/*
1654 *	Insert the given physical page (p) at
1655 *	the specified virtual address (v) in the
1656 *	target physical map with the protection requested.
1657 *
1658 *	If specified, the page will be wired down, meaning
1659 *	that the related pte can not be reclaimed.
1660 *
1661 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1662 *	or lose information.  That is, this routine must actually
1663 *	insert this page into the given map NOW.
1664 */
1665void
1666pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1667    vm_prot_t prot, boolean_t wired)
1668{
1669	pmap_t oldpmap;
1670	vm_offset_t pa;
1671	vm_offset_t opa;
1672	struct ia64_lpte origpte;
1673	struct ia64_lpte *pte;
1674	boolean_t icache_inval, managed;
1675
1676	rw_wlock(&pvh_global_lock);
1677	PMAP_LOCK(pmap);
1678	oldpmap = pmap_switch(pmap);
1679
1680	va &= ~PAGE_MASK;
1681 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1682	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
1683	    ("pmap_enter: page %p is not busy", m));
1684
1685	/*
1686	 * Find (or create) a pte for the given mapping.
1687	 */
1688	while ((pte = pmap_find_pte(va)) == NULL) {
1689		pmap_switch(oldpmap);
1690		PMAP_UNLOCK(pmap);
1691		rw_wunlock(&pvh_global_lock);
1692		VM_WAIT;
1693		rw_wlock(&pvh_global_lock);
1694		PMAP_LOCK(pmap);
1695		oldpmap = pmap_switch(pmap);
1696	}
1697	origpte = *pte;
1698	if (!pmap_present(pte)) {
1699		opa = ~0UL;
1700		pmap_enter_vhpt(pte, va);
1701	} else
1702		opa = pmap_ppn(pte);
1703	managed = FALSE;
1704	pa = VM_PAGE_TO_PHYS(m);
1705
1706	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1707
1708	/*
1709	 * Mapping has not changed, must be protection or wiring change.
1710	 */
1711	if (opa == pa) {
1712		/*
1713		 * Wiring change, just update stats. We don't worry about
1714		 * wiring PT pages as they remain resident as long as there
1715		 * are valid mappings in them. Hence, if a user page is wired,
1716		 * the PT page will be also.
1717		 */
1718		if (wired && !pmap_wired(&origpte))
1719			pmap->pm_stats.wired_count++;
1720		else if (!wired && pmap_wired(&origpte))
1721			pmap->pm_stats.wired_count--;
1722
1723		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1724
1725		/*
1726		 * We might be turning off write access to the page,
1727		 * so we go ahead and sense modify status. Otherwise,
1728		 * we can avoid I-cache invalidation if the page
1729		 * already allowed execution.
1730		 */
1731		if (managed && pmap_dirty(&origpte))
1732			vm_page_dirty(m);
1733		else if (pmap_exec(&origpte))
1734			icache_inval = FALSE;
1735
1736		pmap_invalidate_page(va);
1737		goto validate;
1738	}
1739
1740	/*
1741	 * Mapping has changed, invalidate old range and fall
1742	 * through to handle validating new mapping.
1743	 */
1744	if (opa != ~0UL) {
1745		pmap_remove_pte(pmap, pte, va, 0, 0);
1746		pmap_enter_vhpt(pte, va);
1747	}
1748
1749	/*
1750	 * Enter on the PV list if part of our managed memory.
1751	 */
1752	if ((m->oflags & VPO_UNMANAGED) == 0) {
1753		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1754		    ("pmap_enter: managed mapping within the clean submap"));
1755		pmap_insert_entry(pmap, va, m);
1756		managed = TRUE;
1757	}
1758
1759	/*
1760	 * Increment counters
1761	 */
1762	pmap->pm_stats.resident_count++;
1763	if (wired)
1764		pmap->pm_stats.wired_count++;
1765
1766validate:
1767
1768	/*
1769	 * Now validate mapping with desired protection/wiring. This
1770	 * adds the pte to the VHPT if necessary.
1771	 */
1772	pmap_pte_prot(pmap, pte, prot);
1773	pmap_pte_attr(pte, m->md.memattr);
1774	pmap_set_pte(pte, va, pa, wired, managed);
1775
1776	/* Invalidate the I-cache when needed. */
1777	if (icache_inval)
1778		ia64_sync_icache(va, PAGE_SIZE);
1779
1780	if ((prot & VM_PROT_WRITE) != 0 && managed)
1781		vm_page_aflag_set(m, PGA_WRITEABLE);
1782	rw_wunlock(&pvh_global_lock);
1783	pmap_switch(oldpmap);
1784	PMAP_UNLOCK(pmap);
1785}
1786
1787/*
1788 * Maps a sequence of resident pages belonging to the same object.
1789 * The sequence begins with the given page m_start.  This page is
1790 * mapped at the given virtual address start.  Each subsequent page is
1791 * mapped at a virtual address that is offset from start by the same
1792 * amount as the page is offset from m_start within the object.  The
1793 * last page in the sequence is the page with the largest offset from
1794 * m_start that can be mapped at a virtual address less than the given
1795 * virtual address end.  Not every virtual page between start and end
1796 * is mapped; only those for which a resident page exists with the
1797 * corresponding offset from m_start are mapped.
1798 */
1799void
1800pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1801    vm_page_t m_start, vm_prot_t prot)
1802{
1803	pmap_t oldpmap;
1804	vm_page_t m;
1805	vm_pindex_t diff, psize;
1806
1807	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1808
1809	psize = atop(end - start);
1810	m = m_start;
1811	rw_wlock(&pvh_global_lock);
1812	PMAP_LOCK(pmap);
1813	oldpmap = pmap_switch(pmap);
1814	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1815		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1816		m = TAILQ_NEXT(m, listq);
1817	}
1818	rw_wunlock(&pvh_global_lock);
1819	pmap_switch(oldpmap);
1820 	PMAP_UNLOCK(pmap);
1821}
1822
1823/*
1824 * this code makes some *MAJOR* assumptions:
1825 * 1. Current pmap & pmap exists.
1826 * 2. Not wired.
1827 * 3. Read access.
1828 * 4. No page table pages.
1829 * but is *MUCH* faster than pmap_enter...
1830 */
1831
1832void
1833pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1834{
1835	pmap_t oldpmap;
1836
1837	rw_wlock(&pvh_global_lock);
1838	PMAP_LOCK(pmap);
1839	oldpmap = pmap_switch(pmap);
1840	pmap_enter_quick_locked(pmap, va, m, prot);
1841	rw_wunlock(&pvh_global_lock);
1842	pmap_switch(oldpmap);
1843	PMAP_UNLOCK(pmap);
1844}
1845
1846static void
1847pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1848    vm_prot_t prot)
1849{
1850	struct ia64_lpte *pte;
1851	boolean_t managed;
1852
1853	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1854	    (m->oflags & VPO_UNMANAGED) != 0,
1855	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1856	rw_assert(&pvh_global_lock, RA_WLOCKED);
1857	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1858
1859	if ((pte = pmap_find_pte(va)) == NULL)
1860		return;
1861
1862	if (!pmap_present(pte)) {
1863		/* Enter on the PV list if the page is managed. */
1864		if ((m->oflags & VPO_UNMANAGED) == 0) {
1865			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1866				pmap_free_pte(pte, va);
1867				return;
1868			}
1869			managed = TRUE;
1870		} else
1871			managed = FALSE;
1872
1873		/* Increment counters. */
1874		pmap->pm_stats.resident_count++;
1875
1876		/* Initialise with R/O protection and enter into VHPT. */
1877		pmap_enter_vhpt(pte, va);
1878		pmap_pte_prot(pmap, pte,
1879		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1880		pmap_pte_attr(pte, m->md.memattr);
1881		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1882
1883		if (prot & VM_PROT_EXECUTE)
1884			ia64_sync_icache(va, PAGE_SIZE);
1885	}
1886}
1887
1888/*
1889 * pmap_object_init_pt preloads the ptes for a given object
1890 * into the specified pmap.  This eliminates the blast of soft
1891 * faults on process startup and immediately after an mmap.
1892 */
1893void
1894pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1895		    vm_object_t object, vm_pindex_t pindex,
1896		    vm_size_t size)
1897{
1898
1899	VM_OBJECT_ASSERT_WLOCKED(object);
1900	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1901	    ("pmap_object_init_pt: non-device object"));
1902}
1903
1904/*
1905 *	Routine:	pmap_change_wiring
1906 *	Function:	Change the wiring attribute for a map/virtual-address
1907 *			pair.
1908 *	In/out conditions:
1909 *			The mapping must already exist in the pmap.
1910 */
1911void
1912pmap_change_wiring(pmap, va, wired)
1913	register pmap_t pmap;
1914	vm_offset_t va;
1915	boolean_t wired;
1916{
1917	pmap_t oldpmap;
1918	struct ia64_lpte *pte;
1919
1920	PMAP_LOCK(pmap);
1921	oldpmap = pmap_switch(pmap);
1922
1923	pte = pmap_find_vhpt(va);
1924	KASSERT(pte != NULL, ("pte"));
1925	if (wired && !pmap_wired(pte)) {
1926		pmap->pm_stats.wired_count++;
1927		pmap_set_wired(pte);
1928	} else if (!wired && pmap_wired(pte)) {
1929		pmap->pm_stats.wired_count--;
1930		pmap_clear_wired(pte);
1931	}
1932
1933	pmap_switch(oldpmap);
1934	PMAP_UNLOCK(pmap);
1935}
1936
1937
1938
1939/*
1940 *	Copy the range specified by src_addr/len
1941 *	from the source map to the range dst_addr/len
1942 *	in the destination map.
1943 *
1944 *	This routine is only advisory and need not do anything.
1945 */
1946
1947void
1948pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1949	  vm_offset_t src_addr)
1950{
1951}
1952
1953
1954/*
1955 *	pmap_zero_page zeros the specified hardware page by
1956 *	mapping it into virtual memory and using bzero to clear
1957 *	its contents.
1958 */
1959
1960void
1961pmap_zero_page(vm_page_t m)
1962{
1963	void *p;
1964
1965	p = (void *)pmap_page_to_va(m);
1966	bzero(p, PAGE_SIZE);
1967}
1968
1969
1970/*
1971 *	pmap_zero_page_area zeros the specified hardware page by
1972 *	mapping it into virtual memory and using bzero to clear
1973 *	its contents.
1974 *
1975 *	off and size must reside within a single page.
1976 */
1977
1978void
1979pmap_zero_page_area(vm_page_t m, int off, int size)
1980{
1981	char *p;
1982
1983	p = (void *)pmap_page_to_va(m);
1984	bzero(p + off, size);
1985}
1986
1987
1988/*
1989 *	pmap_zero_page_idle zeros the specified hardware page by
1990 *	mapping it into virtual memory and using bzero to clear
1991 *	its contents.  This is for the vm_idlezero process.
1992 */
1993
1994void
1995pmap_zero_page_idle(vm_page_t m)
1996{
1997	void *p;
1998
1999	p = (void *)pmap_page_to_va(m);
2000	bzero(p, PAGE_SIZE);
2001}
2002
2003
2004/*
2005 *	pmap_copy_page copies the specified (machine independent)
2006 *	page by mapping the page into virtual memory and using
2007 *	bcopy to copy the page, one machine dependent page at a
2008 *	time.
2009 */
2010void
2011pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2012{
2013	void *dst, *src;
2014
2015	src = (void *)pmap_page_to_va(msrc);
2016	dst = (void *)pmap_page_to_va(mdst);
2017	bcopy(src, dst, PAGE_SIZE);
2018}
2019
2020int unmapped_buf_allowed;
2021
2022void
2023pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2024    vm_offset_t b_offset, int xfersize)
2025{
2026	void *a_cp, *b_cp;
2027	vm_offset_t a_pg_offset, b_pg_offset;
2028	int cnt;
2029
2030	while (xfersize > 0) {
2031		a_pg_offset = a_offset & PAGE_MASK;
2032		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2033		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2034		    a_pg_offset;
2035		b_pg_offset = b_offset & PAGE_MASK;
2036		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2037		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2038		    b_pg_offset;
2039		bcopy(a_cp, b_cp, cnt);
2040		a_offset += cnt;
2041		b_offset += cnt;
2042		xfersize -= cnt;
2043	}
2044}
2045
2046/*
2047 * Returns true if the pmap's pv is one of the first
2048 * 16 pvs linked to from this page.  This count may
2049 * be changed upwards or downwards in the future; it
2050 * is only necessary that true be returned for a small
2051 * subset of pmaps for proper page aging.
2052 */
2053boolean_t
2054pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2055{
2056	pv_entry_t pv;
2057	int loops = 0;
2058	boolean_t rv;
2059
2060	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2061	    ("pmap_page_exists_quick: page %p is not managed", m));
2062	rv = FALSE;
2063	rw_wlock(&pvh_global_lock);
2064	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2065		if (PV_PMAP(pv) == pmap) {
2066			rv = TRUE;
2067			break;
2068		}
2069		loops++;
2070		if (loops >= 16)
2071			break;
2072	}
2073	rw_wunlock(&pvh_global_lock);
2074	return (rv);
2075}
2076
2077/*
2078 *	pmap_page_wired_mappings:
2079 *
2080 *	Return the number of managed mappings to the given physical page
2081 *	that are wired.
2082 */
2083int
2084pmap_page_wired_mappings(vm_page_t m)
2085{
2086	struct ia64_lpte *pte;
2087	pmap_t oldpmap, pmap;
2088	pv_entry_t pv;
2089	int count;
2090
2091	count = 0;
2092	if ((m->oflags & VPO_UNMANAGED) != 0)
2093		return (count);
2094	rw_wlock(&pvh_global_lock);
2095	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2096		pmap = PV_PMAP(pv);
2097		PMAP_LOCK(pmap);
2098		oldpmap = pmap_switch(pmap);
2099		pte = pmap_find_vhpt(pv->pv_va);
2100		KASSERT(pte != NULL, ("pte"));
2101		if (pmap_wired(pte))
2102			count++;
2103		pmap_switch(oldpmap);
2104		PMAP_UNLOCK(pmap);
2105	}
2106	rw_wunlock(&pvh_global_lock);
2107	return (count);
2108}
2109
2110/*
2111 * Remove all pages from specified address space
2112 * this aids process exit speeds.  Also, this code
2113 * is special cased for current process only, but
2114 * can have the more generic (and slightly slower)
2115 * mode enabled.  This is much faster than pmap_remove
2116 * in the case of running down an entire address space.
2117 */
2118void
2119pmap_remove_pages(pmap_t pmap)
2120{
2121	struct pv_chunk *pc, *npc;
2122	struct ia64_lpte *pte;
2123	pmap_t oldpmap;
2124	pv_entry_t pv;
2125	vm_offset_t va;
2126	vm_page_t m;
2127	u_long inuse, bitmask;
2128	int allfree, bit, field, idx;
2129
2130	rw_wlock(&pvh_global_lock);
2131	PMAP_LOCK(pmap);
2132	oldpmap = pmap_switch(pmap);
2133	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2134		allfree = 1;
2135		for (field = 0; field < _NPCM; field++) {
2136			inuse = ~pc->pc_map[field] & pc_freemask[field];
2137			while (inuse != 0) {
2138				bit = ffsl(inuse) - 1;
2139				bitmask = 1UL << bit;
2140				idx = field * sizeof(inuse) * NBBY + bit;
2141				pv = &pc->pc_pventry[idx];
2142				inuse &= ~bitmask;
2143				va = pv->pv_va;
2144				pte = pmap_find_vhpt(va);
2145				KASSERT(pte != NULL, ("pte"));
2146				if (pmap_wired(pte)) {
2147					allfree = 0;
2148					continue;
2149				}
2150				pmap_remove_vhpt(va);
2151				pmap_invalidate_page(va);
2152				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2153				if (pmap_dirty(pte))
2154					vm_page_dirty(m);
2155				pmap_free_pte(pte, va);
2156				/* Mark free */
2157				PV_STAT(pv_entry_frees++);
2158				PV_STAT(pv_entry_spare++);
2159				pv_entry_count--;
2160				pc->pc_map[field] |= bitmask;
2161				pmap->pm_stats.resident_count--;
2162				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2163				if (TAILQ_EMPTY(&m->md.pv_list))
2164					vm_page_aflag_clear(m, PGA_WRITEABLE);
2165			}
2166		}
2167		if (allfree) {
2168			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2169			free_pv_chunk(pc);
2170		}
2171	}
2172	pmap_switch(oldpmap);
2173	PMAP_UNLOCK(pmap);
2174	rw_wunlock(&pvh_global_lock);
2175}
2176
2177/*
2178 *	pmap_ts_referenced:
2179 *
2180 *	Return a count of reference bits for a page, clearing those bits.
2181 *	It is not necessary for every reference bit to be cleared, but it
2182 *	is necessary that 0 only be returned when there are truly no
2183 *	reference bits set.
2184 *
2185 *	XXX: The exact number of bits to check and clear is a matter that
2186 *	should be tested and standardized at some point in the future for
2187 *	optimal aging of shared pages.
2188 */
2189int
2190pmap_ts_referenced(vm_page_t m)
2191{
2192	struct ia64_lpte *pte;
2193	pmap_t oldpmap, pmap;
2194	pv_entry_t pv;
2195	int count = 0;
2196
2197	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2198	    ("pmap_ts_referenced: page %p is not managed", m));
2199	rw_wlock(&pvh_global_lock);
2200	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2201		pmap = PV_PMAP(pv);
2202		PMAP_LOCK(pmap);
2203		oldpmap = pmap_switch(pmap);
2204		pte = pmap_find_vhpt(pv->pv_va);
2205		KASSERT(pte != NULL, ("pte"));
2206		if (pmap_accessed(pte)) {
2207			count++;
2208			pmap_clear_accessed(pte);
2209			pmap_invalidate_page(pv->pv_va);
2210		}
2211		pmap_switch(oldpmap);
2212		PMAP_UNLOCK(pmap);
2213	}
2214	rw_wunlock(&pvh_global_lock);
2215	return (count);
2216}
2217
2218/*
2219 *	pmap_is_modified:
2220 *
2221 *	Return whether or not the specified physical page was modified
2222 *	in any physical maps.
2223 */
2224boolean_t
2225pmap_is_modified(vm_page_t m)
2226{
2227	struct ia64_lpte *pte;
2228	pmap_t oldpmap, pmap;
2229	pv_entry_t pv;
2230	boolean_t rv;
2231
2232	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2233	    ("pmap_is_modified: page %p is not managed", m));
2234	rv = FALSE;
2235
2236	/*
2237	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2238	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2239	 * is clear, no PTEs can be dirty.
2240	 */
2241	VM_OBJECT_ASSERT_WLOCKED(m->object);
2242	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2243		return (rv);
2244	rw_wlock(&pvh_global_lock);
2245	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2246		pmap = PV_PMAP(pv);
2247		PMAP_LOCK(pmap);
2248		oldpmap = pmap_switch(pmap);
2249		pte = pmap_find_vhpt(pv->pv_va);
2250		pmap_switch(oldpmap);
2251		KASSERT(pte != NULL, ("pte"));
2252		rv = pmap_dirty(pte) ? TRUE : FALSE;
2253		PMAP_UNLOCK(pmap);
2254		if (rv)
2255			break;
2256	}
2257	rw_wunlock(&pvh_global_lock);
2258	return (rv);
2259}
2260
2261/*
2262 *	pmap_is_prefaultable:
2263 *
2264 *	Return whether or not the specified virtual address is elgible
2265 *	for prefault.
2266 */
2267boolean_t
2268pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2269{
2270	struct ia64_lpte *pte;
2271
2272	pte = pmap_find_vhpt(addr);
2273	if (pte != NULL && pmap_present(pte))
2274		return (FALSE);
2275	return (TRUE);
2276}
2277
2278/*
2279 *	pmap_is_referenced:
2280 *
2281 *	Return whether or not the specified physical page was referenced
2282 *	in any physical maps.
2283 */
2284boolean_t
2285pmap_is_referenced(vm_page_t m)
2286{
2287	struct ia64_lpte *pte;
2288	pmap_t oldpmap, pmap;
2289	pv_entry_t pv;
2290	boolean_t rv;
2291
2292	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2293	    ("pmap_is_referenced: page %p is not managed", m));
2294	rv = FALSE;
2295	rw_wlock(&pvh_global_lock);
2296	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2297		pmap = PV_PMAP(pv);
2298		PMAP_LOCK(pmap);
2299		oldpmap = pmap_switch(pmap);
2300		pte = pmap_find_vhpt(pv->pv_va);
2301		pmap_switch(oldpmap);
2302		KASSERT(pte != NULL, ("pte"));
2303		rv = pmap_accessed(pte) ? TRUE : FALSE;
2304		PMAP_UNLOCK(pmap);
2305		if (rv)
2306			break;
2307	}
2308	rw_wunlock(&pvh_global_lock);
2309	return (rv);
2310}
2311
2312/*
2313 *	Apply the given advice to the specified range of addresses within the
2314 *	given pmap.  Depending on the advice, clear the referenced and/or
2315 *	modified flags in each mapping and set the mapped page's dirty field.
2316 */
2317void
2318pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2319{
2320	struct ia64_lpte *pte;
2321	pmap_t oldpmap;
2322	vm_page_t m;
2323
2324	PMAP_LOCK(pmap);
2325	oldpmap = pmap_switch(pmap);
2326	for (; sva < eva; sva += PAGE_SIZE) {
2327		/* If page is invalid, skip this page. */
2328		pte = pmap_find_vhpt(sva);
2329		if (pte == NULL)
2330			continue;
2331
2332		/* If it isn't managed, skip it too. */
2333		if (!pmap_managed(pte))
2334			continue;
2335
2336		/* Clear its modified and referenced bits. */
2337		if (pmap_dirty(pte)) {
2338			if (advice == MADV_DONTNEED) {
2339				/*
2340				 * Future calls to pmap_is_modified() can be
2341				 * avoided by making the page dirty now.
2342				 */
2343				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2344				vm_page_dirty(m);
2345			}
2346			pmap_clear_dirty(pte);
2347		} else if (!pmap_accessed(pte))
2348			continue;
2349		pmap_clear_accessed(pte);
2350		pmap_invalidate_page(sva);
2351	}
2352	pmap_switch(oldpmap);
2353	PMAP_UNLOCK(pmap);
2354}
2355
2356/*
2357 *	Clear the modify bits on the specified physical page.
2358 */
2359void
2360pmap_clear_modify(vm_page_t m)
2361{
2362	struct ia64_lpte *pte;
2363	pmap_t oldpmap, pmap;
2364	pv_entry_t pv;
2365
2366	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2367	    ("pmap_clear_modify: page %p is not managed", m));
2368	VM_OBJECT_ASSERT_WLOCKED(m->object);
2369	KASSERT(!vm_page_xbusied(m),
2370	    ("pmap_clear_modify: page %p is exclusive busied", m));
2371
2372	/*
2373	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2374	 * If the object containing the page is locked and the page is not
2375	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2376	 */
2377	if ((m->aflags & PGA_WRITEABLE) == 0)
2378		return;
2379	rw_wlock(&pvh_global_lock);
2380	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2381		pmap = PV_PMAP(pv);
2382		PMAP_LOCK(pmap);
2383		oldpmap = pmap_switch(pmap);
2384		pte = pmap_find_vhpt(pv->pv_va);
2385		KASSERT(pte != NULL, ("pte"));
2386		if (pmap_dirty(pte)) {
2387			pmap_clear_dirty(pte);
2388			pmap_invalidate_page(pv->pv_va);
2389		}
2390		pmap_switch(oldpmap);
2391		PMAP_UNLOCK(pmap);
2392	}
2393	rw_wunlock(&pvh_global_lock);
2394}
2395
2396/*
2397 * Clear the write and modified bits in each of the given page's mappings.
2398 */
2399void
2400pmap_remove_write(vm_page_t m)
2401{
2402	struct ia64_lpte *pte;
2403	pmap_t oldpmap, pmap;
2404	pv_entry_t pv;
2405	vm_prot_t prot;
2406
2407	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2408	    ("pmap_remove_write: page %p is not managed", m));
2409
2410	/*
2411	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2412	 * set by another thread while the object is locked.  Thus,
2413	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2414	 */
2415	VM_OBJECT_ASSERT_WLOCKED(m->object);
2416	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2417		return;
2418	rw_wlock(&pvh_global_lock);
2419	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2420		pmap = PV_PMAP(pv);
2421		PMAP_LOCK(pmap);
2422		oldpmap = pmap_switch(pmap);
2423		pte = pmap_find_vhpt(pv->pv_va);
2424		KASSERT(pte != NULL, ("pte"));
2425		prot = pmap_prot(pte);
2426		if ((prot & VM_PROT_WRITE) != 0) {
2427			if (pmap_dirty(pte)) {
2428				vm_page_dirty(m);
2429				pmap_clear_dirty(pte);
2430			}
2431			prot &= ~VM_PROT_WRITE;
2432			pmap_pte_prot(pmap, pte, prot);
2433			pmap_pte_attr(pte, m->md.memattr);
2434			pmap_invalidate_page(pv->pv_va);
2435		}
2436		pmap_switch(oldpmap);
2437		PMAP_UNLOCK(pmap);
2438	}
2439	vm_page_aflag_clear(m, PGA_WRITEABLE);
2440	rw_wunlock(&pvh_global_lock);
2441}
2442
2443/*
2444 * Map a set of physical memory pages into the kernel virtual
2445 * address space. Return a pointer to where it is mapped. This
2446 * routine is intended to be used for mapping device memory,
2447 * NOT real memory.
2448 */
2449void *
2450pmap_mapdev(vm_paddr_t pa, vm_size_t sz)
2451{
2452	static void *last_va = NULL;
2453	static vm_paddr_t last_pa = 0;
2454	static vm_size_t last_sz = 0;
2455	struct efi_md *md;
2456	vm_offset_t va;
2457
2458	if (pa == last_pa && sz == last_sz)
2459		return (last_va);
2460
2461	md = efi_md_find(pa);
2462	if (md == NULL) {
2463		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2464		    __func__, pa, pa + sz - 1);
2465		return ((void *)IA64_PHYS_TO_RR6(pa));
2466	}
2467
2468	if (md->md_type == EFI_MD_TYPE_FREE) {
2469		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2470		    pa + sz - 1);
2471                return (NULL);
2472	}
2473
2474	va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2475	    IA64_PHYS_TO_RR6(pa);
2476
2477	last_va = (void *)va;
2478	last_pa = pa;
2479	last_sz = sz;
2480	return (last_va);
2481}
2482
2483/*
2484 * 'Unmap' a range mapped by pmap_mapdev().
2485 */
2486void
2487pmap_unmapdev(vm_offset_t va, vm_size_t size)
2488{
2489}
2490
2491/*
2492 * Sets the memory attribute for the specified page.
2493 */
2494static void
2495pmap_page_set_memattr_1(void *arg)
2496{
2497	struct ia64_pal_result res;
2498	register_t is;
2499	uintptr_t pp = (uintptr_t)arg;
2500
2501	is = intr_disable();
2502	res = ia64_call_pal_static(pp, 0, 0, 0);
2503	intr_restore(is);
2504}
2505
2506void
2507pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2508{
2509	struct ia64_lpte *pte;
2510	pmap_t oldpmap, pmap;
2511	pv_entry_t pv;
2512	void *va;
2513
2514	rw_wlock(&pvh_global_lock);
2515	m->md.memattr = ma;
2516	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2517		pmap = PV_PMAP(pv);
2518		PMAP_LOCK(pmap);
2519		oldpmap = pmap_switch(pmap);
2520		pte = pmap_find_vhpt(pv->pv_va);
2521		KASSERT(pte != NULL, ("pte"));
2522		pmap_pte_attr(pte, ma);
2523		pmap_invalidate_page(pv->pv_va);
2524		pmap_switch(oldpmap);
2525		PMAP_UNLOCK(pmap);
2526	}
2527	rw_wunlock(&pvh_global_lock);
2528
2529	if (ma == VM_MEMATTR_UNCACHEABLE) {
2530#ifdef SMP
2531		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2532		    (void *)PAL_PREFETCH_VISIBILITY);
2533#else
2534		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2535#endif
2536		va = (void *)pmap_page_to_va(m);
2537		critical_enter();
2538		cpu_flush_dcache(va, PAGE_SIZE);
2539		critical_exit();
2540#ifdef SMP
2541		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2542		    (void *)PAL_MC_DRAIN);
2543#else
2544		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2545#endif
2546	}
2547}
2548
2549/*
2550 * perform the pmap work for mincore
2551 */
2552int
2553pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2554{
2555	pmap_t oldpmap;
2556	struct ia64_lpte *pte, tpte;
2557	vm_paddr_t pa;
2558	int val;
2559
2560	PMAP_LOCK(pmap);
2561retry:
2562	oldpmap = pmap_switch(pmap);
2563	pte = pmap_find_vhpt(addr);
2564	if (pte != NULL) {
2565		tpte = *pte;
2566		pte = &tpte;
2567	}
2568	pmap_switch(oldpmap);
2569	if (pte == NULL || !pmap_present(pte)) {
2570		val = 0;
2571		goto out;
2572	}
2573	val = MINCORE_INCORE;
2574	if (pmap_dirty(pte))
2575		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2576	if (pmap_accessed(pte))
2577		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2578	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2579	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2580	    pmap_managed(pte)) {
2581		pa = pmap_ppn(pte);
2582		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2583		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2584			goto retry;
2585	} else
2586out:
2587		PA_UNLOCK_COND(*locked_pa);
2588	PMAP_UNLOCK(pmap);
2589	return (val);
2590}
2591
2592void
2593pmap_activate(struct thread *td)
2594{
2595	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2596}
2597
2598pmap_t
2599pmap_switch(pmap_t pm)
2600{
2601	pmap_t prevpm;
2602	int i;
2603
2604	critical_enter();
2605	prevpm = PCPU_GET(md.current_pmap);
2606	if (prevpm == pm)
2607		goto out;
2608	if (pm == NULL) {
2609		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2610			ia64_set_rr(IA64_RR_BASE(i),
2611			    (i << 8)|(PAGE_SHIFT << 2)|1);
2612		}
2613	} else {
2614		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2615			ia64_set_rr(IA64_RR_BASE(i),
2616			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2617		}
2618	}
2619	PCPU_SET(md.current_pmap, pm);
2620	ia64_srlz_d();
2621
2622out:
2623	critical_exit();
2624	return (prevpm);
2625}
2626
2627void
2628pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2629{
2630	pmap_t oldpm;
2631	struct ia64_lpte *pte;
2632	vm_offset_t lim;
2633	vm_size_t len;
2634
2635	sz += va & 31;
2636	va &= ~31;
2637	sz = (sz + 31) & ~31;
2638
2639	PMAP_LOCK(pm);
2640	oldpm = pmap_switch(pm);
2641	while (sz > 0) {
2642		lim = round_page(va);
2643		len = MIN(lim - va, sz);
2644		pte = pmap_find_vhpt(va);
2645		if (pte != NULL && pmap_present(pte))
2646			ia64_sync_icache(va, len);
2647		va += len;
2648		sz -= len;
2649	}
2650	pmap_switch(oldpm);
2651	PMAP_UNLOCK(pm);
2652}
2653
2654/*
2655 *	Increase the starting virtual address of the given mapping if a
2656 *	different alignment might result in more superpage mappings.
2657 */
2658void
2659pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2660    vm_offset_t *addr, vm_size_t size)
2661{
2662}
2663
2664#include "opt_ddb.h"
2665
2666#ifdef DDB
2667
2668#include <ddb/ddb.h>
2669
2670static const char*	psnames[] = {
2671	"1B",	"2B",	"4B",	"8B",
2672	"16B",	"32B",	"64B",	"128B",
2673	"256B",	"512B",	"1K",	"2K",
2674	"4K",	"8K",	"16K",	"32K",
2675	"64K",	"128K",	"256K",	"512K",
2676	"1M",	"2M",	"4M",	"8M",
2677	"16M",	"32M",	"64M",	"128M",
2678	"256M",	"512M",	"1G",	"2G"
2679};
2680
2681static void
2682print_trs(int type)
2683{
2684	struct ia64_pal_result res;
2685	int i, maxtr;
2686	struct {
2687		pt_entry_t	pte;
2688		uint64_t	itir;
2689		uint64_t	ifa;
2690		struct ia64_rr	rr;
2691	} buf;
2692	static const char *manames[] = {
2693		"WB",	"bad",	"bad",	"bad",
2694		"UC",	"UCE",	"WC",	"NaT",
2695	};
2696
2697	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2698	if (res.pal_status != 0) {
2699		db_printf("Can't get VM summary\n");
2700		return;
2701	}
2702
2703	if (type == 0)
2704		maxtr = (res.pal_result[0] >> 40) & 0xff;
2705	else
2706		maxtr = (res.pal_result[0] >> 32) & 0xff;
2707
2708	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2709	for (i = 0; i <= maxtr; i++) {
2710		bzero(&buf, sizeof(buf));
2711		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2712		    ia64_tpa((uint64_t)&buf));
2713		if (!(res.pal_result[0] & 1))
2714			buf.pte &= ~PTE_AR_MASK;
2715		if (!(res.pal_result[0] & 2))
2716			buf.pte &= ~PTE_PL_MASK;
2717		if (!(res.pal_result[0] & 4))
2718			pmap_clear_dirty(&buf);
2719		if (!(res.pal_result[0] & 8))
2720			buf.pte &= ~PTE_MA_MASK;
2721		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2722		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2723		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2724		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2725		    (buf.pte & PTE_ED) ? 1 : 0,
2726		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2727		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2728		    (pmap_dirty(&buf)) ? 1 : 0,
2729		    (pmap_accessed(&buf)) ? 1 : 0,
2730		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2731		    (pmap_present(&buf)) ? 1 : 0,
2732		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2733	}
2734}
2735
2736DB_COMMAND(itr, db_itr)
2737{
2738	print_trs(0);
2739}
2740
2741DB_COMMAND(dtr, db_dtr)
2742{
2743	print_trs(1);
2744}
2745
2746DB_COMMAND(rr, db_rr)
2747{
2748	int i;
2749	uint64_t t;
2750	struct ia64_rr rr;
2751
2752	printf("RR RID    PgSz VE\n");
2753	for (i = 0; i < 8; i++) {
2754		__asm __volatile ("mov %0=rr[%1]"
2755				  : "=r"(t)
2756				  : "r"(IA64_RR_BASE(i)));
2757		*(uint64_t *) &rr = t;
2758		printf("%d  %06x %4s %d\n",
2759		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2760	}
2761}
2762
2763DB_COMMAND(thash, db_thash)
2764{
2765	if (!have_addr)
2766		return;
2767
2768	db_printf("%p\n", (void *) ia64_thash(addr));
2769}
2770
2771DB_COMMAND(ttag, db_ttag)
2772{
2773	if (!have_addr)
2774		return;
2775
2776	db_printf("0x%lx\n", ia64_ttag(addr));
2777}
2778
2779DB_COMMAND(kpte, db_kpte)
2780{
2781	struct ia64_lpte *pte;
2782
2783	if (!have_addr) {
2784		db_printf("usage: kpte <kva>\n");
2785		return;
2786	}
2787	if (addr < VM_INIT_KERNEL_ADDRESS) {
2788		db_printf("kpte: error: invalid <kva>\n");
2789		return;
2790	}
2791	pte = pmap_find_kpte(addr);
2792	db_printf("kpte at %p:\n", pte);
2793	db_printf("  pte  =%016lx\n", pte->pte);
2794	db_printf("  itir =%016lx\n", pte->itir);
2795	db_printf("  tag  =%016lx\n", pte->tag);
2796	db_printf("  chain=%016lx\n", pte->chain);
2797}
2798
2799#endif
2800