pmap.c revision 268201
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/pmap.c 268201 2014-07-02 23:57:55Z marcel $");
50
51#include "opt_pmap.h"
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/ktr.h>
56#include <sys/lock.h>
57#include <sys/mman.h>
58#include <sys/mutex.h>
59#include <sys/proc.h>
60#include <sys/rwlock.h>
61#include <sys/smp.h>
62#include <sys/sysctl.h>
63#include <sys/systm.h>
64
65#include <vm/vm.h>
66#include <vm/vm_param.h>
67#include <vm/vm_page.h>
68#include <vm/vm_map.h>
69#include <vm/vm_object.h>
70#include <vm/vm_pageout.h>
71#include <vm/uma.h>
72
73#include <machine/bootinfo.h>
74#include <machine/efi.h>
75#include <machine/md_var.h>
76#include <machine/pal.h>
77
78/*
79 *	Manages physical address maps.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0-3:	User virtually mapped
106 * Region 4:	PBVM and special mappings
107 * Region 5:	Kernel virtual memory
108 * Region 6:	Direct-mapped uncacheable
109 * Region 7:	Direct-mapped cacheable
110 */
111
112/* XXX move to a header. */
113extern uint64_t ia64_gateway_page[];
114
115#if !defined(DIAGNOSTIC)
116#define PMAP_INLINE __inline
117#else
118#define PMAP_INLINE
119#endif
120
121#ifdef PV_STATS
122#define PV_STAT(x)	do { x ; } while (0)
123#else
124#define PV_STAT(x)	do { } while (0)
125#endif
126
127#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
128#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
129#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
130#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
131#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
132#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
133#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
134#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
135
136#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
137#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
138#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
139#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
140
141#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
142
143/*
144 * Individual PV entries are stored in per-pmap chunks.  This saves
145 * space by eliminating the need to record the pmap within every PV
146 * entry.
147 */
148#if PAGE_SIZE == 8192
149#define	_NPCM	6
150#define	_NPCPV	337
151#define	_NPCS	2
152#elif PAGE_SIZE == 16384
153#define	_NPCM	11
154#define	_NPCPV	677
155#define	_NPCS	1
156#endif
157struct pv_chunk {
158	pmap_t			pc_pmap;
159	TAILQ_ENTRY(pv_chunk)	pc_list;
160	u_long			pc_map[_NPCM];	/* bitmap; 1 = free */
161	TAILQ_ENTRY(pv_chunk)	pc_lru;
162	u_long			pc_spare[_NPCS];
163	struct pv_entry		pc_pventry[_NPCPV];
164};
165
166/*
167 * The VHPT bucket head structure.
168 */
169struct ia64_bucket {
170	uint64_t	chain;
171	struct mtx	mutex;
172	u_int		length;
173};
174
175/*
176 * Statically allocated kernel pmap
177 */
178struct pmap kernel_pmap_store;
179
180vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
181vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
182
183/*
184 * Kernel virtual memory management.
185 */
186static int nkpt;
187extern struct ia64_lpte ***ia64_kptdir;
188
189#define KPTE_DIR0_INDEX(va) \
190	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
191#define KPTE_DIR1_INDEX(va) \
192	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
193#define KPTE_PTE_INDEX(va) \
194	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
195#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
196
197vm_offset_t kernel_vm_end;
198
199/* Defaults for ptc.e. */
200static uint64_t pmap_ptc_e_base = 0;
201static uint32_t pmap_ptc_e_count1 = 1;
202static uint32_t pmap_ptc_e_count2 = 1;
203static uint32_t pmap_ptc_e_stride1 = 0;
204static uint32_t pmap_ptc_e_stride2 = 0;
205
206struct mtx pmap_ptc_mutex;
207
208/*
209 * Data for the RID allocator
210 */
211static int pmap_ridcount;
212static int pmap_rididx;
213static int pmap_ridmapsz;
214static int pmap_ridmax;
215static uint64_t *pmap_ridmap;
216struct mtx pmap_ridmutex;
217
218static struct rwlock_padalign pvh_global_lock;
219
220/*
221 * Data for the pv entry allocation mechanism
222 */
223static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
224static int pv_entry_count;
225
226/*
227 * Data for allocating PTEs for user processes.
228 */
229static uma_zone_t ptezone;
230
231/*
232 * Virtual Hash Page Table (VHPT) data.
233 */
234/* SYSCTL_DECL(_machdep); */
235static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
236
237struct ia64_bucket *pmap_vhpt_bucket;
238
239int pmap_vhpt_nbuckets;
240SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
241    &pmap_vhpt_nbuckets, 0, "");
242
243int pmap_vhpt_log2size = 0;
244TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
245SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
246    &pmap_vhpt_log2size, 0, "");
247
248static int pmap_vhpt_inserts;
249SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
250    &pmap_vhpt_inserts, 0, "");
251
252static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
253SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
254    NULL, 0, pmap_vhpt_population, "I", "");
255
256static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
257
258static void free_pv_chunk(struct pv_chunk *pc);
259static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
260static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
261static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
262
263static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
264		    vm_page_t m, vm_prot_t prot);
265static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
266static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
267		    vm_offset_t va, pv_entry_t pv, int freepte);
268static int	pmap_remove_vhpt(vm_offset_t va);
269static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
270		    vm_page_t m);
271
272static void
273pmap_initialize_vhpt(vm_offset_t vhpt)
274{
275	struct ia64_lpte *pte;
276	u_int i;
277
278	pte = (struct ia64_lpte *)vhpt;
279	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
280		pte[i].pte = 0;
281		pte[i].itir = 0;
282		pte[i].tag = 1UL << 63; /* Invalid tag */
283		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
284	}
285}
286
287#ifdef SMP
288vm_offset_t
289pmap_alloc_vhpt(void)
290{
291	vm_offset_t vhpt;
292	vm_page_t m;
293	vm_size_t size;
294
295	size = 1UL << pmap_vhpt_log2size;
296	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
297	    VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
298	    VM_MEMATTR_DEFAULT);
299	if (m != NULL) {
300		vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
301		pmap_initialize_vhpt(vhpt);
302		return (vhpt);
303	}
304	return (0);
305}
306#endif
307
308/*
309 *	Bootstrap the system enough to run with virtual memory.
310 */
311void
312pmap_bootstrap()
313{
314	struct ia64_pal_result res;
315	vm_offset_t base;
316	size_t size;
317	int i, ridbits;
318
319	/*
320	 * Query the PAL Code to find the loop parameters for the
321	 * ptc.e instruction.
322	 */
323	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
324	if (res.pal_status != 0)
325		panic("Can't configure ptc.e parameters");
326	pmap_ptc_e_base = res.pal_result[0];
327	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
328	pmap_ptc_e_count2 = res.pal_result[1];
329	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
330	pmap_ptc_e_stride2 = res.pal_result[2];
331	if (bootverbose)
332		printf("ptc.e base=0x%lx, count1=%u, count2=%u, "
333		       "stride1=0x%x, stride2=0x%x\n",
334		       pmap_ptc_e_base,
335		       pmap_ptc_e_count1,
336		       pmap_ptc_e_count2,
337		       pmap_ptc_e_stride1,
338		       pmap_ptc_e_stride2);
339
340	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
341
342	/*
343	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
344	 *
345	 * We currently need at least 19 bits in the RID because PID_MAX
346	 * can only be encoded in 17 bits and we need RIDs for 4 regions
347	 * per process. With PID_MAX equalling 99999 this means that we
348	 * need to be able to encode 399996 (=4*PID_MAX).
349	 * The Itanium processor only has 18 bits and the architected
350	 * minimum is exactly that. So, we cannot use a PID based scheme
351	 * in those cases. Enter pmap_ridmap...
352	 * We should avoid the map when running on a processor that has
353	 * implemented enough bits. This means that we should pass the
354	 * process/thread ID to pmap. This we currently don't do, so we
355	 * use the map anyway. However, we don't want to allocate a map
356	 * that is large enough to cover the range dictated by the number
357	 * of bits in the RID, because that may result in a RID map of
358	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
359	 * The bottomline: we create a 32KB map when the processor only
360	 * implements 18 bits (or when we can't figure it out). Otherwise
361	 * we create a 64KB map.
362	 */
363	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
364	if (res.pal_status != 0) {
365		if (bootverbose)
366			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
367		ridbits = 18; /* guaranteed minimum */
368	} else {
369		ridbits = (res.pal_result[1] >> 8) & 0xff;
370		if (bootverbose)
371			printf("Processor supports %d Region ID bits\n",
372			    ridbits);
373	}
374	if (ridbits > 19)
375		ridbits = 19;
376
377	pmap_ridmax = (1 << ridbits);
378	pmap_ridmapsz = pmap_ridmax / 64;
379	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
380	pmap_ridmap[0] |= 0xff;
381	pmap_rididx = 0;
382	pmap_ridcount = 8;
383	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
384
385	/*
386	 * Allocate some memory for initial kernel 'page tables'.
387	 */
388	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
389	nkpt = 0;
390	kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
391
392	/*
393	 * Determine a valid (mappable) VHPT size.
394	 */
395	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
396	if (pmap_vhpt_log2size == 0)
397		pmap_vhpt_log2size = 20;
398	else if (pmap_vhpt_log2size < 16)
399		pmap_vhpt_log2size = 16;
400	else if (pmap_vhpt_log2size > 28)
401		pmap_vhpt_log2size = 28;
402	if (pmap_vhpt_log2size & 1)
403		pmap_vhpt_log2size--;
404
405	size = 1UL << pmap_vhpt_log2size;
406	base = (uintptr_t)ia64_physmem_alloc(size, size);
407	if (base == 0)
408		panic("Unable to allocate VHPT");
409
410	PCPU_SET(md.vhpt, base);
411	if (bootverbose)
412		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
413
414	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
415	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
416	    sizeof(struct ia64_bucket), PAGE_SIZE);
417	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
418		/* Stolen memory is zeroed. */
419		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
420		    MTX_NOWITNESS | MTX_SPIN);
421	}
422
423	pmap_initialize_vhpt(base);
424	map_vhpt(base);
425	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
426	ia64_srlz_i();
427
428	virtual_avail = VM_INIT_KERNEL_ADDRESS;
429	virtual_end = VM_MAX_KERNEL_ADDRESS;
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	PMAP_LOCK_INIT(kernel_pmap);
435	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
436		kernel_pmap->pm_rid[i] = 0;
437	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
438	PCPU_SET(md.current_pmap, kernel_pmap);
439
440 	/*
441	 * Initialize the global pv list lock.
442	 */
443	rw_init(&pvh_global_lock, "pmap pv global");
444
445	/* Region 5 is mapped via the VHPT. */
446	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
447
448	/*
449	 * Clear out any random TLB entries left over from booting.
450	 */
451	pmap_invalidate_all();
452
453	map_gateway_page();
454}
455
456static int
457pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
458{
459	int count, error, i;
460
461	count = 0;
462	for (i = 0; i < pmap_vhpt_nbuckets; i++)
463		count += pmap_vhpt_bucket[i].length;
464
465	error = SYSCTL_OUT(req, &count, sizeof(count));
466	return (error);
467}
468
469vm_offset_t
470pmap_page_to_va(vm_page_t m)
471{
472	vm_paddr_t pa;
473	vm_offset_t va;
474
475	pa = VM_PAGE_TO_PHYS(m);
476	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
477	    IA64_PHYS_TO_RR7(pa);
478	return (va);
479}
480
481/*
482 *	Initialize a vm_page's machine-dependent fields.
483 */
484void
485pmap_page_init(vm_page_t m)
486{
487
488	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
489
490	TAILQ_INIT(&m->md.pv_list);
491	m->md.memattr = VM_MEMATTR_DEFAULT;
492}
493
494/*
495 *	Initialize the pmap module.
496 *	Called by vm_init, to initialize any structures that the pmap
497 *	system needs to map virtual memory.
498 */
499void
500pmap_init(void)
501{
502
503	CTR1(KTR_PMAP, "%s()", __func__);
504
505	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
506	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
507}
508
509
510/***************************************************
511 * Manipulate TLBs for a pmap
512 ***************************************************/
513
514static void
515pmap_invalidate_page(vm_offset_t va)
516{
517	struct ia64_lpte *pte;
518	struct pcpu *pc;
519	uint64_t tag;
520	u_int vhpt_ofs;
521
522	critical_enter();
523
524	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
525	tag = ia64_ttag(va);
526	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
527		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
528		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
529	}
530
531	mtx_lock_spin(&pmap_ptc_mutex);
532
533	ia64_ptc_ga(va, PAGE_SHIFT << 2);
534	ia64_mf();
535	ia64_srlz_i();
536
537	mtx_unlock_spin(&pmap_ptc_mutex);
538
539	ia64_invala();
540
541	critical_exit();
542}
543
544void
545pmap_invalidate_all(void)
546{
547	uint64_t addr;
548	int i, j;
549
550	addr = pmap_ptc_e_base;
551	for (i = 0; i < pmap_ptc_e_count1; i++) {
552		for (j = 0; j < pmap_ptc_e_count2; j++) {
553			ia64_ptc_e(addr);
554			addr += pmap_ptc_e_stride2;
555		}
556		addr += pmap_ptc_e_stride1;
557	}
558	ia64_srlz_i();
559}
560
561static uint32_t
562pmap_allocate_rid(void)
563{
564	uint64_t bit, bits;
565	int rid;
566
567	mtx_lock(&pmap_ridmutex);
568	if (pmap_ridcount == pmap_ridmax)
569		panic("pmap_allocate_rid: All Region IDs used");
570
571	/* Find an index with a free bit. */
572	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
573		pmap_rididx++;
574		if (pmap_rididx == pmap_ridmapsz)
575			pmap_rididx = 0;
576	}
577	rid = pmap_rididx * 64;
578
579	/* Find a free bit. */
580	bit = 1UL;
581	while (bits & bit) {
582		rid++;
583		bit <<= 1;
584	}
585
586	pmap_ridmap[pmap_rididx] |= bit;
587	pmap_ridcount++;
588	mtx_unlock(&pmap_ridmutex);
589
590	return rid;
591}
592
593static void
594pmap_free_rid(uint32_t rid)
595{
596	uint64_t bit;
597	int idx;
598
599	idx = rid / 64;
600	bit = ~(1UL << (rid & 63));
601
602	mtx_lock(&pmap_ridmutex);
603	pmap_ridmap[idx] &= bit;
604	pmap_ridcount--;
605	mtx_unlock(&pmap_ridmutex);
606}
607
608/***************************************************
609 * Page table page management routines.....
610 ***************************************************/
611
612static void
613pmap_pinit_common(pmap_t pmap)
614{
615	int i;
616
617	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
618		pmap->pm_rid[i] = pmap_allocate_rid();
619	TAILQ_INIT(&pmap->pm_pvchunk);
620	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
621}
622
623void
624pmap_pinit0(pmap_t pmap)
625{
626
627	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
628
629	PMAP_LOCK_INIT(pmap);
630	pmap_pinit_common(pmap);
631}
632
633/*
634 * Initialize a preallocated and zeroed pmap structure,
635 * such as one in a vmspace structure.
636 */
637int
638pmap_pinit(pmap_t pmap)
639{
640
641	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
642
643	pmap_pinit_common(pmap);
644	return (1);
645}
646
647/***************************************************
648 * Pmap allocation/deallocation routines.
649 ***************************************************/
650
651/*
652 * Release any resources held by the given physical map.
653 * Called when a pmap initialized by pmap_pinit is being released.
654 * Should only be called if the map contains no valid mappings.
655 */
656void
657pmap_release(pmap_t pmap)
658{
659	int i;
660
661	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
662
663	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
664		if (pmap->pm_rid[i])
665			pmap_free_rid(pmap->pm_rid[i]);
666}
667
668/*
669 * grow the number of kernel page table entries, if needed
670 */
671void
672pmap_growkernel(vm_offset_t addr)
673{
674	struct ia64_lpte **dir1;
675	struct ia64_lpte *leaf;
676	vm_page_t nkpg;
677
678	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, addr);
679
680	while (kernel_vm_end <= addr) {
681		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
682			panic("%s: out of kernel address space", __func__);
683
684		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
685		if (dir1 == NULL) {
686			nkpg = vm_page_alloc(NULL, nkpt++,
687			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
688			if (!nkpg)
689				panic("%s: cannot add dir. page", __func__);
690
691			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
692			bzero(dir1, PAGE_SIZE);
693			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
694		}
695
696		nkpg = vm_page_alloc(NULL, nkpt++,
697		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
698		if (!nkpg)
699			panic("%s: cannot add PTE page", __func__);
700
701		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
702		bzero(leaf, PAGE_SIZE);
703		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
704
705		kernel_vm_end += PAGE_SIZE * NKPTEPG;
706	}
707}
708
709/***************************************************
710 * page management routines.
711 ***************************************************/
712
713CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
714
715static __inline struct pv_chunk *
716pv_to_chunk(pv_entry_t pv)
717{
718
719	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
720}
721
722#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
723
724#define	PC_FREE_FULL	0xfffffffffffffffful
725#define	PC_FREE_PARTIAL	\
726	((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
727
728#if PAGE_SIZE == 8192
729static const u_long pc_freemask[_NPCM] = {
730	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
731	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
732};
733#elif PAGE_SIZE == 16384
734static const u_long pc_freemask[_NPCM] = {
735	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
736	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
737	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
738	PC_FREE_FULL, PC_FREE_PARTIAL
739};
740#endif
741
742static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
743
744SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
745    "Current number of pv entries");
746
747#ifdef PV_STATS
748static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
749
750SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
751    "Current number of pv entry chunks");
752SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
753    "Current number of pv entry chunks allocated");
754SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
755    "Current number of pv entry chunks frees");
756SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
757    "Number of times tried to get a chunk page but failed.");
758
759static long pv_entry_frees, pv_entry_allocs;
760static int pv_entry_spare;
761
762SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
763    "Current number of pv entry frees");
764SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
765    "Current number of pv entry allocs");
766SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
767    "Current number of spare pv entries");
768#endif
769
770/*
771 * We are in a serious low memory condition.  Resort to
772 * drastic measures to free some pages so we can allocate
773 * another pv entry chunk.
774 */
775static vm_page_t
776pmap_pv_reclaim(pmap_t locked_pmap)
777{
778	struct pch newtail;
779	struct pv_chunk *pc;
780	struct ia64_lpte *pte;
781	pmap_t pmap;
782	pv_entry_t pv;
783	vm_offset_t va;
784	vm_page_t m, m_pc;
785	u_long inuse;
786	int bit, field, freed, idx;
787
788	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
789	pmap = NULL;
790	m_pc = NULL;
791	TAILQ_INIT(&newtail);
792	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
793		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
794		if (pmap != pc->pc_pmap) {
795			if (pmap != NULL) {
796				if (pmap != locked_pmap) {
797					pmap_switch(locked_pmap);
798					PMAP_UNLOCK(pmap);
799				}
800			}
801			pmap = pc->pc_pmap;
802			/* Avoid deadlock and lock recursion. */
803			if (pmap > locked_pmap)
804				PMAP_LOCK(pmap);
805			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
806				pmap = NULL;
807				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
808				continue;
809			}
810			pmap_switch(pmap);
811		}
812
813		/*
814		 * Destroy every non-wired, 8 KB page mapping in the chunk.
815		 */
816		freed = 0;
817		for (field = 0; field < _NPCM; field++) {
818			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
819			    inuse != 0; inuse &= ~(1UL << bit)) {
820				bit = ffsl(inuse) - 1;
821				idx = field * sizeof(inuse) * NBBY + bit;
822				pv = &pc->pc_pventry[idx];
823				va = pv->pv_va;
824				pte = pmap_find_vhpt(va);
825				KASSERT(pte != NULL, ("pte"));
826				if (pmap_wired(pte))
827					continue;
828				pmap_remove_vhpt(va);
829				pmap_invalidate_page(va);
830				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
831				if (pmap_accessed(pte))
832					vm_page_aflag_set(m, PGA_REFERENCED);
833				if (pmap_dirty(pte))
834					vm_page_dirty(m);
835				pmap_free_pte(pte, va);
836				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
837				if (TAILQ_EMPTY(&m->md.pv_list))
838					vm_page_aflag_clear(m, PGA_WRITEABLE);
839				pc->pc_map[field] |= 1UL << bit;
840				freed++;
841			}
842		}
843		if (freed == 0) {
844			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
845			continue;
846		}
847		/* Every freed mapping is for a 8 KB page. */
848		pmap->pm_stats.resident_count -= freed;
849		PV_STAT(pv_entry_frees += freed);
850		PV_STAT(pv_entry_spare += freed);
851		pv_entry_count -= freed;
852		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
853		for (field = 0; field < _NPCM; field++)
854			if (pc->pc_map[field] != pc_freemask[field]) {
855				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
856				    pc_list);
857				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
858
859				/*
860				 * One freed pv entry in locked_pmap is
861				 * sufficient.
862				 */
863				if (pmap == locked_pmap)
864					goto out;
865				break;
866			}
867		if (field == _NPCM) {
868			PV_STAT(pv_entry_spare -= _NPCPV);
869			PV_STAT(pc_chunk_count--);
870			PV_STAT(pc_chunk_frees++);
871			/* Entire chunk is free; return it. */
872			m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
873			break;
874		}
875	}
876out:
877	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
878	if (pmap != NULL) {
879		if (pmap != locked_pmap) {
880			pmap_switch(locked_pmap);
881			PMAP_UNLOCK(pmap);
882		}
883	}
884	return (m_pc);
885}
886
887/*
888 * free the pv_entry back to the free list
889 */
890static void
891free_pv_entry(pmap_t pmap, pv_entry_t pv)
892{
893	struct pv_chunk *pc;
894	int bit, field, idx;
895
896	rw_assert(&pvh_global_lock, RA_WLOCKED);
897	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
898	PV_STAT(pv_entry_frees++);
899	PV_STAT(pv_entry_spare++);
900	pv_entry_count--;
901	pc = pv_to_chunk(pv);
902	idx = pv - &pc->pc_pventry[0];
903	field = idx / (sizeof(u_long) * NBBY);
904	bit = idx % (sizeof(u_long) * NBBY);
905	pc->pc_map[field] |= 1ul << bit;
906	for (idx = 0; idx < _NPCM; idx++)
907		if (pc->pc_map[idx] != pc_freemask[idx]) {
908			/*
909			 * 98% of the time, pc is already at the head of the
910			 * list.  If it isn't already, move it to the head.
911			 */
912			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
913			    pc)) {
914				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
915				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
916				    pc_list);
917			}
918			return;
919		}
920	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
921	free_pv_chunk(pc);
922}
923
924static void
925free_pv_chunk(struct pv_chunk *pc)
926{
927	vm_page_t m;
928
929 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
930	PV_STAT(pv_entry_spare -= _NPCPV);
931	PV_STAT(pc_chunk_count--);
932	PV_STAT(pc_chunk_frees++);
933	/* entire chunk is free, return it */
934	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
935	vm_page_unwire(m, 0);
936	vm_page_free(m);
937}
938
939/*
940 * get a new pv_entry, allocating a block from the system
941 * when needed.
942 */
943static pv_entry_t
944get_pv_entry(pmap_t pmap, boolean_t try)
945{
946	struct pv_chunk *pc;
947	pv_entry_t pv;
948	vm_page_t m;
949	int bit, field, idx;
950
951	rw_assert(&pvh_global_lock, RA_WLOCKED);
952	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
953	PV_STAT(pv_entry_allocs++);
954	pv_entry_count++;
955retry:
956	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
957	if (pc != NULL) {
958		for (field = 0; field < _NPCM; field++) {
959			if (pc->pc_map[field]) {
960				bit = ffsl(pc->pc_map[field]) - 1;
961				break;
962			}
963		}
964		if (field < _NPCM) {
965			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
966			pv = &pc->pc_pventry[idx];
967			pc->pc_map[field] &= ~(1ul << bit);
968			/* If this was the last item, move it to tail */
969			for (field = 0; field < _NPCM; field++)
970				if (pc->pc_map[field] != 0) {
971					PV_STAT(pv_entry_spare--);
972					return (pv);	/* not full, return */
973				}
974			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
975			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
976			PV_STAT(pv_entry_spare--);
977			return (pv);
978		}
979	}
980	/* No free items, allocate another chunk */
981	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
982	    VM_ALLOC_WIRED);
983	if (m == NULL) {
984		if (try) {
985			pv_entry_count--;
986			PV_STAT(pc_chunk_tryfail++);
987			return (NULL);
988		}
989		m = pmap_pv_reclaim(pmap);
990		if (m == NULL)
991			goto retry;
992	}
993	PV_STAT(pc_chunk_count++);
994	PV_STAT(pc_chunk_allocs++);
995	pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
996	pc->pc_pmap = pmap;
997	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
998	for (field = 1; field < _NPCM; field++)
999		pc->pc_map[field] = pc_freemask[field];
1000	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1001	pv = &pc->pc_pventry[0];
1002	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1003	PV_STAT(pv_entry_spare += _NPCPV - 1);
1004	return (pv);
1005}
1006
1007/*
1008 * Conditionally create a pv entry.
1009 */
1010static boolean_t
1011pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1012{
1013	pv_entry_t pv;
1014
1015	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1016	rw_assert(&pvh_global_lock, RA_WLOCKED);
1017	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1018		pv->pv_va = va;
1019		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1020		return (TRUE);
1021	} else
1022		return (FALSE);
1023}
1024
1025/*
1026 * Add an ia64_lpte to the VHPT.
1027 */
1028static void
1029pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1030{
1031	struct ia64_bucket *bckt;
1032	struct ia64_lpte *vhpte;
1033	uint64_t pte_pa;
1034
1035	/* Can fault, so get it out of the way. */
1036	pte_pa = ia64_tpa((vm_offset_t)pte);
1037
1038	vhpte = (struct ia64_lpte *)ia64_thash(va);
1039	bckt = (struct ia64_bucket *)vhpte->chain;
1040
1041	mtx_lock_spin(&bckt->mutex);
1042	pte->chain = bckt->chain;
1043	ia64_mf();
1044	bckt->chain = pte_pa;
1045
1046	pmap_vhpt_inserts++;
1047	bckt->length++;
1048	mtx_unlock_spin(&bckt->mutex);
1049}
1050
1051/*
1052 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1053 * worked or an appropriate error code otherwise.
1054 */
1055static int
1056pmap_remove_vhpt(vm_offset_t va)
1057{
1058	struct ia64_bucket *bckt;
1059	struct ia64_lpte *pte;
1060	struct ia64_lpte *lpte;
1061	struct ia64_lpte *vhpte;
1062	uint64_t chain, tag;
1063
1064	tag = ia64_ttag(va);
1065	vhpte = (struct ia64_lpte *)ia64_thash(va);
1066	bckt = (struct ia64_bucket *)vhpte->chain;
1067
1068	lpte = NULL;
1069	mtx_lock_spin(&bckt->mutex);
1070	chain = bckt->chain;
1071	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1072	while (chain != 0 && pte->tag != tag) {
1073		lpte = pte;
1074		chain = pte->chain;
1075		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1076	}
1077	if (chain == 0) {
1078		mtx_unlock_spin(&bckt->mutex);
1079		return (ENOENT);
1080	}
1081
1082	/* Snip this pv_entry out of the collision chain. */
1083	if (lpte == NULL)
1084		bckt->chain = pte->chain;
1085	else
1086		lpte->chain = pte->chain;
1087	ia64_mf();
1088
1089	bckt->length--;
1090	mtx_unlock_spin(&bckt->mutex);
1091	return (0);
1092}
1093
1094/*
1095 * Find the ia64_lpte for the given va, if any.
1096 */
1097static struct ia64_lpte *
1098pmap_find_vhpt(vm_offset_t va)
1099{
1100	struct ia64_bucket *bckt;
1101	struct ia64_lpte *pte;
1102	uint64_t chain, tag;
1103
1104	tag = ia64_ttag(va);
1105	pte = (struct ia64_lpte *)ia64_thash(va);
1106	bckt = (struct ia64_bucket *)pte->chain;
1107
1108	mtx_lock_spin(&bckt->mutex);
1109	chain = bckt->chain;
1110	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1111	while (chain != 0 && pte->tag != tag) {
1112		chain = pte->chain;
1113		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1114	}
1115	mtx_unlock_spin(&bckt->mutex);
1116	return ((chain != 0) ? pte : NULL);
1117}
1118
1119/*
1120 * Remove an entry from the list of managed mappings.
1121 */
1122static int
1123pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1124{
1125
1126	rw_assert(&pvh_global_lock, RA_WLOCKED);
1127	if (!pv) {
1128		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1129			if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1130				break;
1131		}
1132	}
1133
1134	if (pv) {
1135		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1136		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1137			vm_page_aflag_clear(m, PGA_WRITEABLE);
1138
1139		free_pv_entry(pmap, pv);
1140		return 0;
1141	} else {
1142		return ENOENT;
1143	}
1144}
1145
1146/*
1147 * Create a pv entry for page at pa for
1148 * (pmap, va).
1149 */
1150static void
1151pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1152{
1153	pv_entry_t pv;
1154
1155	rw_assert(&pvh_global_lock, RA_WLOCKED);
1156	pv = get_pv_entry(pmap, FALSE);
1157	pv->pv_va = va;
1158	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1159}
1160
1161/*
1162 *	Routine:	pmap_extract
1163 *	Function:
1164 *		Extract the physical page address associated
1165 *		with the given map/virtual_address pair.
1166 */
1167vm_paddr_t
1168pmap_extract(pmap_t pmap, vm_offset_t va)
1169{
1170	struct ia64_lpte *pte;
1171	pmap_t oldpmap;
1172	vm_paddr_t pa;
1173
1174	CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, va);
1175
1176	pa = 0;
1177	PMAP_LOCK(pmap);
1178	oldpmap = pmap_switch(pmap);
1179	pte = pmap_find_vhpt(va);
1180	if (pte != NULL && pmap_present(pte))
1181		pa = pmap_ppn(pte);
1182	pmap_switch(oldpmap);
1183	PMAP_UNLOCK(pmap);
1184	return (pa);
1185}
1186
1187/*
1188 *	Routine:	pmap_extract_and_hold
1189 *	Function:
1190 *		Atomically extract and hold the physical page
1191 *		with the given pmap and virtual address pair
1192 *		if that mapping permits the given protection.
1193 */
1194vm_page_t
1195pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1196{
1197	struct ia64_lpte *pte;
1198	pmap_t oldpmap;
1199	vm_page_t m;
1200	vm_paddr_t pa;
1201
1202	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, prot=%#x)", __func__, pmap, va,
1203	    prot);
1204
1205	pa = 0;
1206	m = NULL;
1207	PMAP_LOCK(pmap);
1208	oldpmap = pmap_switch(pmap);
1209retry:
1210	pte = pmap_find_vhpt(va);
1211	if (pte != NULL && pmap_present(pte) &&
1212	    (pmap_prot(pte) & prot) == prot) {
1213		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1214		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1215			goto retry;
1216		vm_page_hold(m);
1217	}
1218	PA_UNLOCK_COND(pa);
1219	pmap_switch(oldpmap);
1220	PMAP_UNLOCK(pmap);
1221	return (m);
1222}
1223
1224/***************************************************
1225 * Low level mapping routines.....
1226 ***************************************************/
1227
1228/*
1229 * Find the kernel lpte for mapping the given virtual address, which
1230 * must be in the part of region 5 which we can cover with our kernel
1231 * 'page tables'.
1232 */
1233static struct ia64_lpte *
1234pmap_find_kpte(vm_offset_t va)
1235{
1236	struct ia64_lpte **dir1;
1237	struct ia64_lpte *leaf;
1238
1239	KASSERT((va >> 61) == 5,
1240		("kernel mapping 0x%lx not in region 5", va));
1241	KASSERT(va < kernel_vm_end,
1242		("kernel mapping 0x%lx out of range", va));
1243
1244	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1245	leaf = dir1[KPTE_DIR1_INDEX(va)];
1246	return (&leaf[KPTE_PTE_INDEX(va)]);
1247}
1248
1249/*
1250 * Find a pte suitable for mapping a user-space address. If one exists
1251 * in the VHPT, that one will be returned, otherwise a new pte is
1252 * allocated.
1253 */
1254static struct ia64_lpte *
1255pmap_find_pte(vm_offset_t va)
1256{
1257	struct ia64_lpte *pte;
1258
1259	if (va >= VM_MAXUSER_ADDRESS)
1260		return pmap_find_kpte(va);
1261
1262	pte = pmap_find_vhpt(va);
1263	if (pte == NULL) {
1264		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1265		pte->tag = 1UL << 63;
1266	}
1267	return (pte);
1268}
1269
1270/*
1271 * Free a pte which is now unused. This simply returns it to the zone
1272 * allocator if it is a user mapping. For kernel mappings, clear the
1273 * valid bit to make it clear that the mapping is not currently used.
1274 */
1275static void
1276pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1277{
1278	if (va < VM_MAXUSER_ADDRESS)
1279		uma_zfree(ptezone, pte);
1280	else
1281		pmap_clear_present(pte);
1282}
1283
1284static PMAP_INLINE void
1285pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1286{
1287	static long prot2ar[4] = {
1288		PTE_AR_R,		/* VM_PROT_NONE */
1289		PTE_AR_RW,		/* VM_PROT_WRITE */
1290		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1291		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1292	};
1293
1294	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1295	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1296	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1297	    ? PTE_PL_KERN : PTE_PL_USER;
1298	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1299}
1300
1301static PMAP_INLINE void
1302pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1303{
1304
1305	pte->pte &= ~PTE_MA_MASK;
1306	pte->pte |= (ma & PTE_MA_MASK);
1307}
1308
1309/*
1310 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1311 * the pte was orginally valid, then its assumed to already be in the
1312 * VHPT.
1313 * This functions does not set the protection bits.  It's expected
1314 * that those have been set correctly prior to calling this function.
1315 */
1316static void
1317pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1318    boolean_t wired, boolean_t managed)
1319{
1320
1321	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1322	    PTE_AR_MASK | PTE_ED;
1323	pte->pte |= PTE_PRESENT;
1324	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1325	pte->pte |= (wired) ? PTE_WIRED : 0;
1326	pte->pte |= pa & PTE_PPN_MASK;
1327
1328	pte->itir = PAGE_SHIFT << 2;
1329
1330	ia64_mf();
1331
1332	pte->tag = ia64_ttag(va);
1333}
1334
1335/*
1336 * Remove the (possibly managed) mapping represented by pte from the
1337 * given pmap.
1338 */
1339static int
1340pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1341		pv_entry_t pv, int freepte)
1342{
1343	int error;
1344	vm_page_t m;
1345
1346	/*
1347	 * First remove from the VHPT.
1348	 */
1349	error = pmap_remove_vhpt(va);
1350	KASSERT(error == 0, ("%s: pmap_remove_vhpt returned %d",
1351	    __func__, error));
1352
1353	pmap_invalidate_page(va);
1354
1355	if (pmap_wired(pte))
1356		pmap->pm_stats.wired_count -= 1;
1357
1358	pmap->pm_stats.resident_count -= 1;
1359	if (pmap_managed(pte)) {
1360		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1361		if (pmap_dirty(pte))
1362			vm_page_dirty(m);
1363		if (pmap_accessed(pte))
1364			vm_page_aflag_set(m, PGA_REFERENCED);
1365
1366		error = pmap_remove_entry(pmap, m, va, pv);
1367	}
1368	if (freepte)
1369		pmap_free_pte(pte, va);
1370
1371	return (error);
1372}
1373
1374/*
1375 * Extract the physical page address associated with a kernel
1376 * virtual address.
1377 */
1378vm_paddr_t
1379pmap_kextract(vm_offset_t va)
1380{
1381	struct ia64_lpte *pte;
1382	uint64_t *pbvm_pgtbl;
1383	vm_paddr_t pa;
1384	u_int idx;
1385
1386	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1387
1388	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1389
1390	/* Regions 6 and 7 are direct mapped. */
1391	if (va >= IA64_RR_BASE(6)) {
1392		pa = IA64_RR_MASK(va);
1393		goto out;
1394	}
1395
1396	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1397	if (va >= kernel_vm_end)
1398		goto err_out;
1399	if (va >= VM_INIT_KERNEL_ADDRESS) {
1400		pte = pmap_find_kpte(va);
1401		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1402		goto out;
1403	}
1404
1405	/* The PBVM page table. */
1406	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1407		goto err_out;
1408	if (va >= IA64_PBVM_PGTBL) {
1409		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1410		goto out;
1411	}
1412
1413	/* The PBVM itself. */
1414	if (va >= IA64_PBVM_BASE) {
1415		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1416		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1417		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1418			goto err_out;
1419		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1420			goto err_out;
1421		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1422		    (va & IA64_PBVM_PAGE_MASK);
1423		goto out;
1424	}
1425
1426 err_out:
1427	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1428	pa = 0;
1429	/* FALLTHROUGH */
1430
1431 out:
1432	return (pa);
1433}
1434
1435/*
1436 * Add a list of wired pages to the kva this routine is only used for
1437 * temporary kernel mappings that do not need to have page modification
1438 * or references recorded.  Note that old mappings are simply written
1439 * over.  The page is effectively wired, but it's customary to not have
1440 * the PTE reflect that, nor update statistics.
1441 */
1442void
1443pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1444{
1445	struct ia64_lpte *pte;
1446	int i;
1447
1448	CTR4(KTR_PMAP, "%s(va=%#lx, m_p=%p, cnt=%d)", __func__, va, m, count);
1449
1450	for (i = 0; i < count; i++) {
1451		pte = pmap_find_kpte(va);
1452		if (pmap_present(pte))
1453			pmap_invalidate_page(va);
1454		else
1455			pmap_enter_vhpt(pte, va);
1456		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1457		pmap_pte_attr(pte, m[i]->md.memattr);
1458		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1459		va += PAGE_SIZE;
1460	}
1461}
1462
1463/*
1464 * this routine jerks page mappings from the
1465 * kernel -- it is meant only for temporary mappings.
1466 */
1467void
1468pmap_qremove(vm_offset_t va, int count)
1469{
1470	struct ia64_lpte *pte;
1471	int i;
1472
1473	CTR3(KTR_PMAP, "%s(va=%#lx, cnt=%d)", __func__, va, count);
1474
1475	for (i = 0; i < count; i++) {
1476		pte = pmap_find_kpte(va);
1477		if (pmap_present(pte)) {
1478			pmap_remove_vhpt(va);
1479			pmap_invalidate_page(va);
1480			pmap_clear_present(pte);
1481		}
1482		va += PAGE_SIZE;
1483	}
1484}
1485
1486/*
1487 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1488 * to not have the PTE reflect that, nor update statistics.
1489 */
1490void
1491pmap_kenter(vm_offset_t va, vm_paddr_t pa)
1492{
1493	struct ia64_lpte *pte;
1494
1495	CTR3(KTR_PMAP, "%s(va=%#lx, pa=%#lx)", __func__, va, pa);
1496
1497	pte = pmap_find_kpte(va);
1498	if (pmap_present(pte))
1499		pmap_invalidate_page(va);
1500	else
1501		pmap_enter_vhpt(pte, va);
1502	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1503	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1504	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1505}
1506
1507/*
1508 * Remove a page from the kva
1509 */
1510void
1511pmap_kremove(vm_offset_t va)
1512{
1513	struct ia64_lpte *pte;
1514
1515	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1516
1517	pte = pmap_find_kpte(va);
1518	if (pmap_present(pte)) {
1519		pmap_remove_vhpt(va);
1520		pmap_invalidate_page(va);
1521		pmap_clear_present(pte);
1522	}
1523}
1524
1525/*
1526 *	Used to map a range of physical addresses into kernel
1527 *	virtual address space.
1528 *
1529 *	The value passed in '*virt' is a suggested virtual address for
1530 *	the mapping. Architectures which can support a direct-mapped
1531 *	physical to virtual region can return the appropriate address
1532 *	within that region, leaving '*virt' unchanged. Other
1533 *	architectures should map the pages starting at '*virt' and
1534 *	update '*virt' with the first usable address after the mapped
1535 *	region.
1536 */
1537vm_offset_t
1538pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1539{
1540
1541	CTR5(KTR_PMAP, "%s(va_p=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1542	    virt, start, end, prot);
1543
1544	return IA64_PHYS_TO_RR7(start);
1545}
1546
1547/*
1548 *	Remove the given range of addresses from the specified map.
1549 *
1550 *	It is assumed that the start and end are properly
1551 *	rounded to the page size.
1552 *
1553 *	Sparsely used ranges are inefficiently removed.  The VHPT is
1554 *	probed for every page within the range.  XXX
1555 */
1556void
1557pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1558{
1559	pmap_t oldpmap;
1560	vm_offset_t va;
1561	struct ia64_lpte *pte;
1562
1563	CTR4(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx)", __func__, pmap, sva,
1564	    eva);
1565
1566	/*
1567	 * Perform an unsynchronized read.  This is, however, safe.
1568	 */
1569	if (pmap->pm_stats.resident_count == 0)
1570		return;
1571
1572	rw_wlock(&pvh_global_lock);
1573	PMAP_LOCK(pmap);
1574	oldpmap = pmap_switch(pmap);
1575	for (va = sva; va < eva; va += PAGE_SIZE) {
1576		pte = pmap_find_vhpt(va);
1577		if (pte != NULL)
1578			pmap_remove_pte(pmap, pte, va, 0, 1);
1579	}
1580	rw_wunlock(&pvh_global_lock);
1581	pmap_switch(oldpmap);
1582	PMAP_UNLOCK(pmap);
1583}
1584
1585/*
1586 *	Routine:	pmap_remove_all
1587 *	Function:
1588 *		Removes this physical page from
1589 *		all physical maps in which it resides.
1590 *		Reflects back modify bits to the pager.
1591 *
1592 *	Notes:
1593 *		Original versions of this routine were very
1594 *		inefficient because they iteratively called
1595 *		pmap_remove (slow...)
1596 */
1597void
1598pmap_remove_all(vm_page_t m)
1599{
1600	pmap_t oldpmap;
1601	pv_entry_t pv;
1602
1603	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
1604
1605	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1606	    ("pmap_remove_all: page %p is not managed", m));
1607	rw_wlock(&pvh_global_lock);
1608	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1609		struct ia64_lpte *pte;
1610		pmap_t pmap = PV_PMAP(pv);
1611		vm_offset_t va = pv->pv_va;
1612
1613		PMAP_LOCK(pmap);
1614		oldpmap = pmap_switch(pmap);
1615		pte = pmap_find_vhpt(va);
1616		KASSERT(pte != NULL, ("pte"));
1617		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1618			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1619		pmap_remove_pte(pmap, pte, va, pv, 1);
1620		pmap_switch(oldpmap);
1621		PMAP_UNLOCK(pmap);
1622	}
1623	vm_page_aflag_clear(m, PGA_WRITEABLE);
1624	rw_wunlock(&pvh_global_lock);
1625}
1626
1627/*
1628 *	Set the physical protection on the
1629 *	specified range of this map as requested.
1630 */
1631void
1632pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1633{
1634	pmap_t oldpmap;
1635	struct ia64_lpte *pte;
1636
1637	CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1638	    pmap, sva, eva, prot);
1639
1640	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1641		pmap_remove(pmap, sva, eva);
1642		return;
1643	}
1644
1645	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1646	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1647		return;
1648
1649	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1650		panic("pmap_protect: unaligned addresses");
1651
1652	PMAP_LOCK(pmap);
1653	oldpmap = pmap_switch(pmap);
1654	for ( ; sva < eva; sva += PAGE_SIZE) {
1655		/* If page is invalid, skip this page */
1656		pte = pmap_find_vhpt(sva);
1657		if (pte == NULL)
1658			continue;
1659
1660		/* If there's no change, skip it too */
1661		if (pmap_prot(pte) == prot)
1662			continue;
1663
1664		if ((prot & VM_PROT_WRITE) == 0 &&
1665		    pmap_managed(pte) && pmap_dirty(pte)) {
1666			vm_paddr_t pa = pmap_ppn(pte);
1667			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1668
1669			vm_page_dirty(m);
1670			pmap_clear_dirty(pte);
1671		}
1672
1673		if (prot & VM_PROT_EXECUTE)
1674			ia64_sync_icache(sva, PAGE_SIZE);
1675
1676		pmap_pte_prot(pmap, pte, prot);
1677		pmap_invalidate_page(sva);
1678	}
1679	pmap_switch(oldpmap);
1680	PMAP_UNLOCK(pmap);
1681}
1682
1683/*
1684 *	Insert the given physical page (p) at
1685 *	the specified virtual address (v) in the
1686 *	target physical map with the protection requested.
1687 *
1688 *	If specified, the page will be wired down, meaning
1689 *	that the related pte can not be reclaimed.
1690 *
1691 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1692 *	or lose information.  That is, this routine must actually
1693 *	insert this page into the given map NOW.
1694 */
1695void
1696pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1697    vm_prot_t prot, boolean_t wired)
1698{
1699	pmap_t oldpmap;
1700	vm_offset_t pa;
1701	vm_offset_t opa;
1702	struct ia64_lpte origpte;
1703	struct ia64_lpte *pte;
1704	boolean_t icache_inval, managed;
1705
1706	CTR6(KTR_PMAP, "pmap_enter(pm=%p, va=%#lx, acc=%#x, m=%p, prot=%#x, "
1707	    "wired=%u)", pmap, va, access, m, prot, wired);
1708
1709	rw_wlock(&pvh_global_lock);
1710	PMAP_LOCK(pmap);
1711	oldpmap = pmap_switch(pmap);
1712
1713	va &= ~PAGE_MASK;
1714 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1715	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
1716	    ("pmap_enter: page %p is not busy", m));
1717
1718	/*
1719	 * Find (or create) a pte for the given mapping.
1720	 */
1721	while ((pte = pmap_find_pte(va)) == NULL) {
1722		pmap_switch(oldpmap);
1723		PMAP_UNLOCK(pmap);
1724		rw_wunlock(&pvh_global_lock);
1725		VM_WAIT;
1726		rw_wlock(&pvh_global_lock);
1727		PMAP_LOCK(pmap);
1728		oldpmap = pmap_switch(pmap);
1729	}
1730	origpte = *pte;
1731	if (!pmap_present(pte)) {
1732		opa = ~0UL;
1733		pmap_enter_vhpt(pte, va);
1734	} else
1735		opa = pmap_ppn(pte);
1736	managed = FALSE;
1737	pa = VM_PAGE_TO_PHYS(m);
1738
1739	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1740
1741	/*
1742	 * Mapping has not changed, must be protection or wiring change.
1743	 */
1744	if (opa == pa) {
1745		/*
1746		 * Wiring change, just update stats. We don't worry about
1747		 * wiring PT pages as they remain resident as long as there
1748		 * are valid mappings in them. Hence, if a user page is wired,
1749		 * the PT page will be also.
1750		 */
1751		if (wired && !pmap_wired(&origpte))
1752			pmap->pm_stats.wired_count++;
1753		else if (!wired && pmap_wired(&origpte))
1754			pmap->pm_stats.wired_count--;
1755
1756		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1757
1758		/*
1759		 * We might be turning off write access to the page,
1760		 * so we go ahead and sense modify status. Otherwise,
1761		 * we can avoid I-cache invalidation if the page
1762		 * already allowed execution.
1763		 */
1764		if (managed && pmap_dirty(&origpte))
1765			vm_page_dirty(m);
1766		else if (pmap_exec(&origpte))
1767			icache_inval = FALSE;
1768
1769		pmap_invalidate_page(va);
1770		goto validate;
1771	}
1772
1773	/*
1774	 * Mapping has changed, invalidate old range and fall
1775	 * through to handle validating new mapping.
1776	 */
1777	if (opa != ~0UL) {
1778		pmap_remove_pte(pmap, pte, va, 0, 0);
1779		pmap_enter_vhpt(pte, va);
1780	}
1781
1782	/*
1783	 * Enter on the PV list if part of our managed memory.
1784	 */
1785	if ((m->oflags & VPO_UNMANAGED) == 0) {
1786		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1787		    ("pmap_enter: managed mapping within the clean submap"));
1788		pmap_insert_entry(pmap, va, m);
1789		managed = TRUE;
1790	}
1791
1792	/*
1793	 * Increment counters
1794	 */
1795	pmap->pm_stats.resident_count++;
1796	if (wired)
1797		pmap->pm_stats.wired_count++;
1798
1799validate:
1800
1801	/*
1802	 * Now validate mapping with desired protection/wiring. This
1803	 * adds the pte to the VHPT if necessary.
1804	 */
1805	pmap_pte_prot(pmap, pte, prot);
1806	pmap_pte_attr(pte, m->md.memattr);
1807	pmap_set_pte(pte, va, pa, wired, managed);
1808
1809	/* Invalidate the I-cache when needed. */
1810	if (icache_inval)
1811		ia64_sync_icache(va, PAGE_SIZE);
1812
1813	if ((prot & VM_PROT_WRITE) != 0 && managed)
1814		vm_page_aflag_set(m, PGA_WRITEABLE);
1815	rw_wunlock(&pvh_global_lock);
1816	pmap_switch(oldpmap);
1817	PMAP_UNLOCK(pmap);
1818}
1819
1820/*
1821 * Maps a sequence of resident pages belonging to the same object.
1822 * The sequence begins with the given page m_start.  This page is
1823 * mapped at the given virtual address start.  Each subsequent page is
1824 * mapped at a virtual address that is offset from start by the same
1825 * amount as the page is offset from m_start within the object.  The
1826 * last page in the sequence is the page with the largest offset from
1827 * m_start that can be mapped at a virtual address less than the given
1828 * virtual address end.  Not every virtual page between start and end
1829 * is mapped; only those for which a resident page exists with the
1830 * corresponding offset from m_start are mapped.
1831 */
1832void
1833pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1834    vm_page_t m_start, vm_prot_t prot)
1835{
1836	pmap_t oldpmap;
1837	vm_page_t m;
1838	vm_pindex_t diff, psize;
1839
1840	CTR6(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, m=%p, prot=%#x)",
1841	    __func__, pmap, start, end, m_start, prot);
1842
1843	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1844
1845	psize = atop(end - start);
1846	m = m_start;
1847	rw_wlock(&pvh_global_lock);
1848	PMAP_LOCK(pmap);
1849	oldpmap = pmap_switch(pmap);
1850	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1851		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1852		m = TAILQ_NEXT(m, listq);
1853	}
1854	rw_wunlock(&pvh_global_lock);
1855	pmap_switch(oldpmap);
1856 	PMAP_UNLOCK(pmap);
1857}
1858
1859/*
1860 * this code makes some *MAJOR* assumptions:
1861 * 1. Current pmap & pmap exists.
1862 * 2. Not wired.
1863 * 3. Read access.
1864 * 4. No page table pages.
1865 * but is *MUCH* faster than pmap_enter...
1866 */
1867void
1868pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1869{
1870	pmap_t oldpmap;
1871
1872	CTR5(KTR_PMAP, "%s(pm=%p, va=%#lx, m=%p, prot=%#x)", __func__, pmap,
1873	    va, m, prot);
1874
1875	rw_wlock(&pvh_global_lock);
1876	PMAP_LOCK(pmap);
1877	oldpmap = pmap_switch(pmap);
1878	pmap_enter_quick_locked(pmap, va, m, prot);
1879	rw_wunlock(&pvh_global_lock);
1880	pmap_switch(oldpmap);
1881	PMAP_UNLOCK(pmap);
1882}
1883
1884static void
1885pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1886    vm_prot_t prot)
1887{
1888	struct ia64_lpte *pte;
1889	boolean_t managed;
1890
1891	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1892	    (m->oflags & VPO_UNMANAGED) != 0,
1893	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1894	rw_assert(&pvh_global_lock, RA_WLOCKED);
1895	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1896
1897	if ((pte = pmap_find_pte(va)) == NULL)
1898		return;
1899
1900	if (!pmap_present(pte)) {
1901		/* Enter on the PV list if the page is managed. */
1902		if ((m->oflags & VPO_UNMANAGED) == 0) {
1903			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1904				pmap_free_pte(pte, va);
1905				return;
1906			}
1907			managed = TRUE;
1908		} else
1909			managed = FALSE;
1910
1911		/* Increment counters. */
1912		pmap->pm_stats.resident_count++;
1913
1914		/* Initialise with R/O protection and enter into VHPT. */
1915		pmap_enter_vhpt(pte, va);
1916		pmap_pte_prot(pmap, pte,
1917		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1918		pmap_pte_attr(pte, m->md.memattr);
1919		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1920
1921		if (prot & VM_PROT_EXECUTE)
1922			ia64_sync_icache(va, PAGE_SIZE);
1923	}
1924}
1925
1926/*
1927 * pmap_object_init_pt preloads the ptes for a given object
1928 * into the specified pmap.  This eliminates the blast of soft
1929 * faults on process startup and immediately after an mmap.
1930 */
1931void
1932pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
1933    vm_pindex_t pindex, vm_size_t size)
1934{
1935
1936	CTR6(KTR_PMAP, "%s(pm=%p, va=%#lx, obj=%p, idx=%lu, sz=%#lx)",
1937	    __func__, pmap, addr, object, pindex, size);
1938
1939	VM_OBJECT_ASSERT_WLOCKED(object);
1940	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1941	    ("pmap_object_init_pt: non-device object"));
1942}
1943
1944/*
1945 *	Routine:	pmap_change_wiring
1946 *	Function:	Change the wiring attribute for a map/virtual-address
1947 *			pair.
1948 *	In/out conditions:
1949 *			The mapping must already exist in the pmap.
1950 */
1951void
1952pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
1953{
1954	pmap_t oldpmap;
1955	struct ia64_lpte *pte;
1956
1957	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, wired=%u)", __func__, pmap, va,
1958	    wired);
1959
1960	PMAP_LOCK(pmap);
1961	oldpmap = pmap_switch(pmap);
1962
1963	pte = pmap_find_vhpt(va);
1964	KASSERT(pte != NULL, ("pte"));
1965	if (wired && !pmap_wired(pte)) {
1966		pmap->pm_stats.wired_count++;
1967		pmap_set_wired(pte);
1968	} else if (!wired && pmap_wired(pte)) {
1969		pmap->pm_stats.wired_count--;
1970		pmap_clear_wired(pte);
1971	}
1972
1973	pmap_switch(oldpmap);
1974	PMAP_UNLOCK(pmap);
1975}
1976
1977/*
1978 *	Copy the range specified by src_addr/len
1979 *	from the source map to the range dst_addr/len
1980 *	in the destination map.
1981 *
1982 *	This routine is only advisory and need not do anything.
1983 */
1984void
1985pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_va, vm_size_t len,
1986    vm_offset_t src_va)
1987{
1988
1989	CTR6(KTR_PMAP, "%s(dpm=%p, spm=%p, dva=%#lx, sz=%#lx, sva=%#lx)",
1990	    __func__, dst_pmap, src_pmap, dst_va, len, src_va);
1991}
1992
1993/*
1994 *	pmap_zero_page zeros the specified hardware page by
1995 *	mapping it into virtual memory and using bzero to clear
1996 *	its contents.
1997 */
1998void
1999pmap_zero_page(vm_page_t m)
2000{
2001	void *p;
2002
2003	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2004
2005	p = (void *)pmap_page_to_va(m);
2006	bzero(p, PAGE_SIZE);
2007}
2008
2009/*
2010 *	pmap_zero_page_area zeros the specified hardware page by
2011 *	mapping it into virtual memory and using bzero to clear
2012 *	its contents.
2013 *
2014 *	off and size must reside within a single page.
2015 */
2016void
2017pmap_zero_page_area(vm_page_t m, int off, int size)
2018{
2019	char *p;
2020
2021	CTR4(KTR_PMAP, "%s(m=%p, ofs=%d, len=%d)", __func__, m, off, size);
2022
2023	p = (void *)pmap_page_to_va(m);
2024	bzero(p + off, size);
2025}
2026
2027/*
2028 *	pmap_zero_page_idle zeros the specified hardware page by
2029 *	mapping it into virtual memory and using bzero to clear
2030 *	its contents.  This is for the vm_idlezero process.
2031 */
2032void
2033pmap_zero_page_idle(vm_page_t m)
2034{
2035	void *p;
2036
2037	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2038
2039	p = (void *)pmap_page_to_va(m);
2040	bzero(p, PAGE_SIZE);
2041}
2042
2043/*
2044 *	pmap_copy_page copies the specified (machine independent)
2045 *	page by mapping the page into virtual memory and using
2046 *	bcopy to copy the page, one machine dependent page at a
2047 *	time.
2048 */
2049void
2050pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2051{
2052	void *dst, *src;
2053
2054	CTR3(KTR_PMAP, "%s(sm=%p, dm=%p)", __func__, msrc, mdst);
2055
2056	src = (void *)pmap_page_to_va(msrc);
2057	dst = (void *)pmap_page_to_va(mdst);
2058	bcopy(src, dst, PAGE_SIZE);
2059}
2060
2061void
2062pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2063    vm_offset_t b_offset, int xfersize)
2064{
2065	void *a_cp, *b_cp;
2066	vm_offset_t a_pg_offset, b_pg_offset;
2067	int cnt;
2068
2069	CTR6(KTR_PMAP, "%s(m0=%p, va0=%#lx, m1=%p, va1=%#lx, sz=%#x)",
2070	    __func__, ma, a_offset, mb, b_offset, xfersize);
2071
2072	while (xfersize > 0) {
2073		a_pg_offset = a_offset & PAGE_MASK;
2074		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2075		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2076		    a_pg_offset;
2077		b_pg_offset = b_offset & PAGE_MASK;
2078		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2079		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2080		    b_pg_offset;
2081		bcopy(a_cp, b_cp, cnt);
2082		a_offset += cnt;
2083		b_offset += cnt;
2084		xfersize -= cnt;
2085	}
2086}
2087
2088/*
2089 * Returns true if the pmap's pv is one of the first
2090 * 16 pvs linked to from this page.  This count may
2091 * be changed upwards or downwards in the future; it
2092 * is only necessary that true be returned for a small
2093 * subset of pmaps for proper page aging.
2094 */
2095boolean_t
2096pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2097{
2098	pv_entry_t pv;
2099	int loops = 0;
2100	boolean_t rv;
2101
2102	CTR3(KTR_PMAP, "%s(pm=%p, m=%p)", __func__, pmap, m);
2103
2104	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2105	    ("pmap_page_exists_quick: page %p is not managed", m));
2106	rv = FALSE;
2107	rw_wlock(&pvh_global_lock);
2108	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2109		if (PV_PMAP(pv) == pmap) {
2110			rv = TRUE;
2111			break;
2112		}
2113		loops++;
2114		if (loops >= 16)
2115			break;
2116	}
2117	rw_wunlock(&pvh_global_lock);
2118	return (rv);
2119}
2120
2121/*
2122 *	pmap_page_wired_mappings:
2123 *
2124 *	Return the number of managed mappings to the given physical page
2125 *	that are wired.
2126 */
2127int
2128pmap_page_wired_mappings(vm_page_t m)
2129{
2130	struct ia64_lpte *pte;
2131	pmap_t oldpmap, pmap;
2132	pv_entry_t pv;
2133	int count;
2134
2135	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2136
2137	count = 0;
2138	if ((m->oflags & VPO_UNMANAGED) != 0)
2139		return (count);
2140	rw_wlock(&pvh_global_lock);
2141	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2142		pmap = PV_PMAP(pv);
2143		PMAP_LOCK(pmap);
2144		oldpmap = pmap_switch(pmap);
2145		pte = pmap_find_vhpt(pv->pv_va);
2146		KASSERT(pte != NULL, ("pte"));
2147		if (pmap_wired(pte))
2148			count++;
2149		pmap_switch(oldpmap);
2150		PMAP_UNLOCK(pmap);
2151	}
2152	rw_wunlock(&pvh_global_lock);
2153	return (count);
2154}
2155
2156/*
2157 * Remove all pages from specified address space
2158 * this aids process exit speeds.  Also, this code
2159 * is special cased for current process only, but
2160 * can have the more generic (and slightly slower)
2161 * mode enabled.  This is much faster than pmap_remove
2162 * in the case of running down an entire address space.
2163 */
2164void
2165pmap_remove_pages(pmap_t pmap)
2166{
2167	struct pv_chunk *pc, *npc;
2168	struct ia64_lpte *pte;
2169	pmap_t oldpmap;
2170	pv_entry_t pv;
2171	vm_offset_t va;
2172	vm_page_t m;
2173	u_long inuse, bitmask;
2174	int allfree, bit, field, idx;
2175
2176	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
2177
2178	rw_wlock(&pvh_global_lock);
2179	PMAP_LOCK(pmap);
2180	oldpmap = pmap_switch(pmap);
2181	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2182		allfree = 1;
2183		for (field = 0; field < _NPCM; field++) {
2184			inuse = ~pc->pc_map[field] & pc_freemask[field];
2185			while (inuse != 0) {
2186				bit = ffsl(inuse) - 1;
2187				bitmask = 1UL << bit;
2188				idx = field * sizeof(inuse) * NBBY + bit;
2189				pv = &pc->pc_pventry[idx];
2190				inuse &= ~bitmask;
2191				va = pv->pv_va;
2192				pte = pmap_find_vhpt(va);
2193				KASSERT(pte != NULL, ("pte"));
2194				if (pmap_wired(pte)) {
2195					allfree = 0;
2196					continue;
2197				}
2198				pmap_remove_vhpt(va);
2199				pmap_invalidate_page(va);
2200				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2201				if (pmap_dirty(pte))
2202					vm_page_dirty(m);
2203				pmap_free_pte(pte, va);
2204				/* Mark free */
2205				PV_STAT(pv_entry_frees++);
2206				PV_STAT(pv_entry_spare++);
2207				pv_entry_count--;
2208				pc->pc_map[field] |= bitmask;
2209				pmap->pm_stats.resident_count--;
2210				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2211				if (TAILQ_EMPTY(&m->md.pv_list))
2212					vm_page_aflag_clear(m, PGA_WRITEABLE);
2213			}
2214		}
2215		if (allfree) {
2216			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2217			free_pv_chunk(pc);
2218		}
2219	}
2220	pmap_switch(oldpmap);
2221	PMAP_UNLOCK(pmap);
2222	rw_wunlock(&pvh_global_lock);
2223}
2224
2225/*
2226 *	pmap_ts_referenced:
2227 *
2228 *	Return a count of reference bits for a page, clearing those bits.
2229 *	It is not necessary for every reference bit to be cleared, but it
2230 *	is necessary that 0 only be returned when there are truly no
2231 *	reference bits set.
2232 *
2233 *	XXX: The exact number of bits to check and clear is a matter that
2234 *	should be tested and standardized at some point in the future for
2235 *	optimal aging of shared pages.
2236 */
2237int
2238pmap_ts_referenced(vm_page_t m)
2239{
2240	struct ia64_lpte *pte;
2241	pmap_t oldpmap, pmap;
2242	pv_entry_t pv;
2243	int count = 0;
2244
2245	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2246
2247	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2248	    ("pmap_ts_referenced: page %p is not managed", m));
2249	rw_wlock(&pvh_global_lock);
2250	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2251		pmap = PV_PMAP(pv);
2252		PMAP_LOCK(pmap);
2253		oldpmap = pmap_switch(pmap);
2254		pte = pmap_find_vhpt(pv->pv_va);
2255		KASSERT(pte != NULL, ("pte"));
2256		if (pmap_accessed(pte)) {
2257			count++;
2258			pmap_clear_accessed(pte);
2259			pmap_invalidate_page(pv->pv_va);
2260		}
2261		pmap_switch(oldpmap);
2262		PMAP_UNLOCK(pmap);
2263	}
2264	rw_wunlock(&pvh_global_lock);
2265	return (count);
2266}
2267
2268/*
2269 *	pmap_is_modified:
2270 *
2271 *	Return whether or not the specified physical page was modified
2272 *	in any physical maps.
2273 */
2274boolean_t
2275pmap_is_modified(vm_page_t m)
2276{
2277	struct ia64_lpte *pte;
2278	pmap_t oldpmap, pmap;
2279	pv_entry_t pv;
2280	boolean_t rv;
2281
2282	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2283
2284	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2285	    ("pmap_is_modified: page %p is not managed", m));
2286	rv = FALSE;
2287
2288	/*
2289	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2290	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2291	 * is clear, no PTEs can be dirty.
2292	 */
2293	VM_OBJECT_ASSERT_WLOCKED(m->object);
2294	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2295		return (rv);
2296	rw_wlock(&pvh_global_lock);
2297	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2298		pmap = PV_PMAP(pv);
2299		PMAP_LOCK(pmap);
2300		oldpmap = pmap_switch(pmap);
2301		pte = pmap_find_vhpt(pv->pv_va);
2302		pmap_switch(oldpmap);
2303		KASSERT(pte != NULL, ("pte"));
2304		rv = pmap_dirty(pte) ? TRUE : FALSE;
2305		PMAP_UNLOCK(pmap);
2306		if (rv)
2307			break;
2308	}
2309	rw_wunlock(&pvh_global_lock);
2310	return (rv);
2311}
2312
2313/*
2314 *	pmap_is_prefaultable:
2315 *
2316 *	Return whether or not the specified virtual address is elgible
2317 *	for prefault.
2318 */
2319boolean_t
2320pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2321{
2322	struct ia64_lpte *pte;
2323
2324	CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, addr);
2325
2326	pte = pmap_find_vhpt(addr);
2327	if (pte != NULL && pmap_present(pte))
2328		return (FALSE);
2329	return (TRUE);
2330}
2331
2332/*
2333 *	pmap_is_referenced:
2334 *
2335 *	Return whether or not the specified physical page was referenced
2336 *	in any physical maps.
2337 */
2338boolean_t
2339pmap_is_referenced(vm_page_t m)
2340{
2341	struct ia64_lpte *pte;
2342	pmap_t oldpmap, pmap;
2343	pv_entry_t pv;
2344	boolean_t rv;
2345
2346	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2347
2348	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2349	    ("pmap_is_referenced: page %p is not managed", m));
2350	rv = FALSE;
2351	rw_wlock(&pvh_global_lock);
2352	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2353		pmap = PV_PMAP(pv);
2354		PMAP_LOCK(pmap);
2355		oldpmap = pmap_switch(pmap);
2356		pte = pmap_find_vhpt(pv->pv_va);
2357		pmap_switch(oldpmap);
2358		KASSERT(pte != NULL, ("pte"));
2359		rv = pmap_accessed(pte) ? TRUE : FALSE;
2360		PMAP_UNLOCK(pmap);
2361		if (rv)
2362			break;
2363	}
2364	rw_wunlock(&pvh_global_lock);
2365	return (rv);
2366}
2367
2368/*
2369 *	Apply the given advice to the specified range of addresses within the
2370 *	given pmap.  Depending on the advice, clear the referenced and/or
2371 *	modified flags in each mapping and set the mapped page's dirty field.
2372 */
2373void
2374pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2375{
2376	struct ia64_lpte *pte;
2377	pmap_t oldpmap;
2378	vm_page_t m;
2379
2380	CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, adv=%d)", __func__,
2381	    pmap, sva, eva, advice);
2382
2383	PMAP_LOCK(pmap);
2384	oldpmap = pmap_switch(pmap);
2385	for (; sva < eva; sva += PAGE_SIZE) {
2386		/* If page is invalid, skip this page. */
2387		pte = pmap_find_vhpt(sva);
2388		if (pte == NULL)
2389			continue;
2390
2391		/* If it isn't managed, skip it too. */
2392		if (!pmap_managed(pte))
2393			continue;
2394
2395		/* Clear its modified and referenced bits. */
2396		if (pmap_dirty(pte)) {
2397			if (advice == MADV_DONTNEED) {
2398				/*
2399				 * Future calls to pmap_is_modified() can be
2400				 * avoided by making the page dirty now.
2401				 */
2402				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2403				vm_page_dirty(m);
2404			}
2405			pmap_clear_dirty(pte);
2406		} else if (!pmap_accessed(pte))
2407			continue;
2408		pmap_clear_accessed(pte);
2409		pmap_invalidate_page(sva);
2410	}
2411	pmap_switch(oldpmap);
2412	PMAP_UNLOCK(pmap);
2413}
2414
2415/*
2416 *	Clear the modify bits on the specified physical page.
2417 */
2418void
2419pmap_clear_modify(vm_page_t m)
2420{
2421	struct ia64_lpte *pte;
2422	pmap_t oldpmap, pmap;
2423	pv_entry_t pv;
2424
2425	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2426
2427	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2428	    ("pmap_clear_modify: page %p is not managed", m));
2429	VM_OBJECT_ASSERT_WLOCKED(m->object);
2430	KASSERT(!vm_page_xbusied(m),
2431	    ("pmap_clear_modify: page %p is exclusive busied", m));
2432
2433	/*
2434	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2435	 * If the object containing the page is locked and the page is not
2436	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2437	 */
2438	if ((m->aflags & PGA_WRITEABLE) == 0)
2439		return;
2440	rw_wlock(&pvh_global_lock);
2441	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2442		pmap = PV_PMAP(pv);
2443		PMAP_LOCK(pmap);
2444		oldpmap = pmap_switch(pmap);
2445		pte = pmap_find_vhpt(pv->pv_va);
2446		KASSERT(pte != NULL, ("pte"));
2447		if (pmap_dirty(pte)) {
2448			pmap_clear_dirty(pte);
2449			pmap_invalidate_page(pv->pv_va);
2450		}
2451		pmap_switch(oldpmap);
2452		PMAP_UNLOCK(pmap);
2453	}
2454	rw_wunlock(&pvh_global_lock);
2455}
2456
2457/*
2458 * Clear the write and modified bits in each of the given page's mappings.
2459 */
2460void
2461pmap_remove_write(vm_page_t m)
2462{
2463	struct ia64_lpte *pte;
2464	pmap_t oldpmap, pmap;
2465	pv_entry_t pv;
2466	vm_prot_t prot;
2467
2468	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2469
2470	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2471	    ("pmap_remove_write: page %p is not managed", m));
2472
2473	/*
2474	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2475	 * set by another thread while the object is locked.  Thus,
2476	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2477	 */
2478	VM_OBJECT_ASSERT_WLOCKED(m->object);
2479	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2480		return;
2481	rw_wlock(&pvh_global_lock);
2482	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2483		pmap = PV_PMAP(pv);
2484		PMAP_LOCK(pmap);
2485		oldpmap = pmap_switch(pmap);
2486		pte = pmap_find_vhpt(pv->pv_va);
2487		KASSERT(pte != NULL, ("pte"));
2488		prot = pmap_prot(pte);
2489		if ((prot & VM_PROT_WRITE) != 0) {
2490			if (pmap_dirty(pte)) {
2491				vm_page_dirty(m);
2492				pmap_clear_dirty(pte);
2493			}
2494			prot &= ~VM_PROT_WRITE;
2495			pmap_pte_prot(pmap, pte, prot);
2496			pmap_pte_attr(pte, m->md.memattr);
2497			pmap_invalidate_page(pv->pv_va);
2498		}
2499		pmap_switch(oldpmap);
2500		PMAP_UNLOCK(pmap);
2501	}
2502	vm_page_aflag_clear(m, PGA_WRITEABLE);
2503	rw_wunlock(&pvh_global_lock);
2504}
2505
2506vm_offset_t
2507pmap_mapdev_priv(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2508{
2509	static vm_offset_t last_va = 0;
2510	static vm_paddr_t last_pa = ~0UL;
2511	static vm_size_t last_sz = 0;
2512	struct efi_md *md;
2513
2514	if (pa == last_pa && sz == last_sz)
2515		return (last_va);
2516
2517	md = efi_md_find(pa);
2518	if (md == NULL) {
2519		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2520		    __func__, pa, pa + sz - 1);
2521		return (IA64_PHYS_TO_RR6(pa));
2522	}
2523
2524	if (md->md_type == EFI_MD_TYPE_FREE) {
2525		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2526		    pa + sz - 1);
2527		return (0);
2528	}
2529
2530	last_va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2531	    IA64_PHYS_TO_RR6(pa);
2532	last_pa = pa;
2533	last_sz = sz;
2534	return (last_va);
2535}
2536
2537/*
2538 * Map a set of physical memory pages into the kernel virtual
2539 * address space. Return a pointer to where it is mapped. This
2540 * routine is intended to be used for mapping device memory,
2541 * NOT real memory.
2542 */
2543void *
2544pmap_mapdev_attr(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2545{
2546	vm_offset_t va;
2547
2548	CTR4(KTR_PMAP, "%s(pa=%#lx, sz=%#lx, attr=%#x)", __func__, pa, sz,
2549	    attr);
2550
2551	va = pmap_mapdev_priv(pa, sz, attr);
2552	return ((void *)(uintptr_t)va);
2553}
2554
2555/*
2556 * 'Unmap' a range mapped by pmap_mapdev_attr().
2557 */
2558void
2559pmap_unmapdev(vm_offset_t va, vm_size_t size)
2560{
2561
2562	CTR3(KTR_PMAP, "%s(va=%#lx, sz=%#lx)", __func__, va, size);
2563}
2564
2565/*
2566 * Sets the memory attribute for the specified page.
2567 */
2568static void
2569pmap_page_set_memattr_1(void *arg)
2570{
2571	struct ia64_pal_result res;
2572	register_t is;
2573	uintptr_t pp = (uintptr_t)arg;
2574
2575	is = intr_disable();
2576	res = ia64_call_pal_static(pp, 0, 0, 0);
2577	intr_restore(is);
2578}
2579
2580void
2581pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2582{
2583	struct ia64_lpte *pte;
2584	pmap_t oldpmap, pmap;
2585	pv_entry_t pv;
2586	void *va;
2587
2588	CTR3(KTR_PMAP, "%s(m=%p, attr=%#x)", __func__, m, ma);
2589
2590	rw_wlock(&pvh_global_lock);
2591	m->md.memattr = ma;
2592	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2593		pmap = PV_PMAP(pv);
2594		PMAP_LOCK(pmap);
2595		oldpmap = pmap_switch(pmap);
2596		pte = pmap_find_vhpt(pv->pv_va);
2597		KASSERT(pte != NULL, ("pte"));
2598		pmap_pte_attr(pte, ma);
2599		pmap_invalidate_page(pv->pv_va);
2600		pmap_switch(oldpmap);
2601		PMAP_UNLOCK(pmap);
2602	}
2603	rw_wunlock(&pvh_global_lock);
2604
2605	if (ma == VM_MEMATTR_UNCACHEABLE) {
2606#ifdef SMP
2607		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2608		    (void *)PAL_PREFETCH_VISIBILITY);
2609#else
2610		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2611#endif
2612		va = (void *)pmap_page_to_va(m);
2613		critical_enter();
2614		cpu_flush_dcache(va, PAGE_SIZE);
2615		critical_exit();
2616#ifdef SMP
2617		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2618		    (void *)PAL_MC_DRAIN);
2619#else
2620		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2621#endif
2622	}
2623}
2624
2625/*
2626 * perform the pmap work for mincore
2627 */
2628int
2629pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2630{
2631	pmap_t oldpmap;
2632	struct ia64_lpte *pte, tpte;
2633	vm_paddr_t pa;
2634	int val;
2635
2636	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, pa_p=%p)", __func__, pmap, addr,
2637	    locked_pa);
2638
2639	PMAP_LOCK(pmap);
2640retry:
2641	oldpmap = pmap_switch(pmap);
2642	pte = pmap_find_vhpt(addr);
2643	if (pte != NULL) {
2644		tpte = *pte;
2645		pte = &tpte;
2646	}
2647	pmap_switch(oldpmap);
2648	if (pte == NULL || !pmap_present(pte)) {
2649		val = 0;
2650		goto out;
2651	}
2652	val = MINCORE_INCORE;
2653	if (pmap_dirty(pte))
2654		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2655	if (pmap_accessed(pte))
2656		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2657	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2658	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2659	    pmap_managed(pte)) {
2660		pa = pmap_ppn(pte);
2661		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2662		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2663			goto retry;
2664	} else
2665out:
2666		PA_UNLOCK_COND(*locked_pa);
2667	PMAP_UNLOCK(pmap);
2668	return (val);
2669}
2670
2671/*
2672 *
2673 */
2674void
2675pmap_activate(struct thread *td)
2676{
2677
2678	CTR2(KTR_PMAP, "%s(td=%p)", __func__, td);
2679
2680	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2681}
2682
2683pmap_t
2684pmap_switch(pmap_t pm)
2685{
2686	pmap_t prevpm;
2687	int i;
2688
2689	critical_enter();
2690	prevpm = PCPU_GET(md.current_pmap);
2691	if (prevpm == pm)
2692		goto out;
2693	if (pm == NULL) {
2694		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2695			ia64_set_rr(IA64_RR_BASE(i),
2696			    (i << 8)|(PAGE_SHIFT << 2)|1);
2697		}
2698	} else {
2699		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2700			ia64_set_rr(IA64_RR_BASE(i),
2701			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2702		}
2703	}
2704	PCPU_SET(md.current_pmap, pm);
2705	ia64_srlz_d();
2706
2707out:
2708	critical_exit();
2709	return (prevpm);
2710}
2711
2712/*
2713 *
2714 */
2715void
2716pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2717{
2718	pmap_t oldpm;
2719	struct ia64_lpte *pte;
2720	vm_offset_t lim;
2721	vm_size_t len;
2722
2723	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, sz=%#lx)", __func__, pm, va, sz);
2724
2725	sz += va & 31;
2726	va &= ~31;
2727	sz = (sz + 31) & ~31;
2728
2729	PMAP_LOCK(pm);
2730	oldpm = pmap_switch(pm);
2731	while (sz > 0) {
2732		lim = round_page(va);
2733		len = MIN(lim - va, sz);
2734		pte = pmap_find_vhpt(va);
2735		if (pte != NULL && pmap_present(pte))
2736			ia64_sync_icache(va, len);
2737		va += len;
2738		sz -= len;
2739	}
2740	pmap_switch(oldpm);
2741	PMAP_UNLOCK(pm);
2742}
2743
2744/*
2745 *	Increase the starting virtual address of the given mapping if a
2746 *	different alignment might result in more superpage mappings.
2747 */
2748void
2749pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2750    vm_offset_t *addr, vm_size_t size)
2751{
2752
2753	CTR5(KTR_PMAP, "%s(obj=%p, ofs=%#lx, va_p=%p, sz=%#lx)", __func__,
2754	    object, offset, addr, size);
2755}
2756
2757#include "opt_ddb.h"
2758
2759#ifdef DDB
2760
2761#include <ddb/ddb.h>
2762
2763static const char*	psnames[] = {
2764	"1B",	"2B",	"4B",	"8B",
2765	"16B",	"32B",	"64B",	"128B",
2766	"256B",	"512B",	"1K",	"2K",
2767	"4K",	"8K",	"16K",	"32K",
2768	"64K",	"128K",	"256K",	"512K",
2769	"1M",	"2M",	"4M",	"8M",
2770	"16M",	"32M",	"64M",	"128M",
2771	"256M",	"512M",	"1G",	"2G"
2772};
2773
2774static void
2775print_trs(int type)
2776{
2777	struct ia64_pal_result res;
2778	int i, maxtr;
2779	struct {
2780		pt_entry_t	pte;
2781		uint64_t	itir;
2782		uint64_t	ifa;
2783		struct ia64_rr	rr;
2784	} buf;
2785	static const char *manames[] = {
2786		"WB",	"bad",	"bad",	"bad",
2787		"UC",	"UCE",	"WC",	"NaT",
2788	};
2789
2790	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2791	if (res.pal_status != 0) {
2792		db_printf("Can't get VM summary\n");
2793		return;
2794	}
2795
2796	if (type == 0)
2797		maxtr = (res.pal_result[0] >> 40) & 0xff;
2798	else
2799		maxtr = (res.pal_result[0] >> 32) & 0xff;
2800
2801	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2802	for (i = 0; i <= maxtr; i++) {
2803		bzero(&buf, sizeof(buf));
2804		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2805		    ia64_tpa((uint64_t)&buf));
2806		if (!(res.pal_result[0] & 1))
2807			buf.pte &= ~PTE_AR_MASK;
2808		if (!(res.pal_result[0] & 2))
2809			buf.pte &= ~PTE_PL_MASK;
2810		if (!(res.pal_result[0] & 4))
2811			pmap_clear_dirty(&buf);
2812		if (!(res.pal_result[0] & 8))
2813			buf.pte &= ~PTE_MA_MASK;
2814		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2815		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2816		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2817		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2818		    (buf.pte & PTE_ED) ? 1 : 0,
2819		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2820		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2821		    (pmap_dirty(&buf)) ? 1 : 0,
2822		    (pmap_accessed(&buf)) ? 1 : 0,
2823		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2824		    (pmap_present(&buf)) ? 1 : 0,
2825		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2826	}
2827}
2828
2829DB_COMMAND(itr, db_itr)
2830{
2831	print_trs(0);
2832}
2833
2834DB_COMMAND(dtr, db_dtr)
2835{
2836	print_trs(1);
2837}
2838
2839DB_COMMAND(rr, db_rr)
2840{
2841	int i;
2842	uint64_t t;
2843	struct ia64_rr rr;
2844
2845	printf("RR RID    PgSz VE\n");
2846	for (i = 0; i < 8; i++) {
2847		__asm __volatile ("mov %0=rr[%1]"
2848				  : "=r"(t)
2849				  : "r"(IA64_RR_BASE(i)));
2850		*(uint64_t *) &rr = t;
2851		printf("%d  %06x %4s %d\n",
2852		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2853	}
2854}
2855
2856DB_COMMAND(thash, db_thash)
2857{
2858	if (!have_addr)
2859		return;
2860
2861	db_printf("%p\n", (void *) ia64_thash(addr));
2862}
2863
2864DB_COMMAND(ttag, db_ttag)
2865{
2866	if (!have_addr)
2867		return;
2868
2869	db_printf("0x%lx\n", ia64_ttag(addr));
2870}
2871
2872DB_COMMAND(kpte, db_kpte)
2873{
2874	struct ia64_lpte *pte;
2875
2876	if (!have_addr) {
2877		db_printf("usage: kpte <kva>\n");
2878		return;
2879	}
2880	if (addr < VM_INIT_KERNEL_ADDRESS) {
2881		db_printf("kpte: error: invalid <kva>\n");
2882		return;
2883	}
2884	pte = pmap_find_kpte(addr);
2885	db_printf("kpte at %p:\n", pte);
2886	db_printf("  pte  =%016lx\n", pte->pte);
2887	db_printf("  itir =%016lx\n", pte->itir);
2888	db_printf("  tag  =%016lx\n", pte->tag);
2889	db_printf("  chain=%016lx\n", pte->chain);
2890}
2891
2892#endif
2893