pmap.c revision 270439
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/pmap.c 270439 2014-08-24 07:53:15Z kib $");
50
51#include "opt_pmap.h"
52
53#include <sys/param.h>
54#include <sys/efi.h>
55#include <sys/kernel.h>
56#include <sys/ktr.h>
57#include <sys/lock.h>
58#include <sys/mman.h>
59#include <sys/mutex.h>
60#include <sys/proc.h>
61#include <sys/rwlock.h>
62#include <sys/smp.h>
63#include <sys/sysctl.h>
64#include <sys/systm.h>
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/vm_page.h>
69#include <vm/vm_map.h>
70#include <vm/vm_object.h>
71#include <vm/vm_pageout.h>
72#include <vm/uma.h>
73
74#include <machine/bootinfo.h>
75#include <machine/md_var.h>
76#include <machine/pal.h>
77
78/*
79 *	Manages physical address maps.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0-3:	User virtually mapped
106 * Region 4:	PBVM and special mappings
107 * Region 5:	Kernel virtual memory
108 * Region 6:	Direct-mapped uncacheable
109 * Region 7:	Direct-mapped cacheable
110 */
111
112/* XXX move to a header. */
113extern uint64_t ia64_gateway_page[];
114
115#if !defined(DIAGNOSTIC)
116#define PMAP_INLINE __inline
117#else
118#define PMAP_INLINE
119#endif
120
121#ifdef PV_STATS
122#define PV_STAT(x)	do { x ; } while (0)
123#else
124#define PV_STAT(x)	do { } while (0)
125#endif
126
127#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
128#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
129#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
130#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
131#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
132#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
133#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
134#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
135
136#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
137#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
138#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
139#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
140
141#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
142
143/*
144 * Individual PV entries are stored in per-pmap chunks.  This saves
145 * space by eliminating the need to record the pmap within every PV
146 * entry.
147 */
148#if PAGE_SIZE == 8192
149#define	_NPCM	6
150#define	_NPCPV	337
151#define	_NPCS	2
152#elif PAGE_SIZE == 16384
153#define	_NPCM	11
154#define	_NPCPV	677
155#define	_NPCS	1
156#endif
157struct pv_chunk {
158	pmap_t			pc_pmap;
159	TAILQ_ENTRY(pv_chunk)	pc_list;
160	u_long			pc_map[_NPCM];	/* bitmap; 1 = free */
161	TAILQ_ENTRY(pv_chunk)	pc_lru;
162	u_long			pc_spare[_NPCS];
163	struct pv_entry		pc_pventry[_NPCPV];
164};
165
166/*
167 * The VHPT bucket head structure.
168 */
169struct ia64_bucket {
170	uint64_t	chain;
171	struct mtx	mutex;
172	u_int		length;
173};
174
175/*
176 * Statically allocated kernel pmap
177 */
178struct pmap kernel_pmap_store;
179
180vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
181vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
182
183/*
184 * Kernel virtual memory management.
185 */
186static int nkpt;
187extern struct ia64_lpte ***ia64_kptdir;
188
189#define KPTE_DIR0_INDEX(va) \
190	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
191#define KPTE_DIR1_INDEX(va) \
192	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
193#define KPTE_PTE_INDEX(va) \
194	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
195#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
196
197vm_offset_t kernel_vm_end;
198
199/* Defaults for ptc.e. */
200static uint64_t pmap_ptc_e_base = 0;
201static uint32_t pmap_ptc_e_count1 = 1;
202static uint32_t pmap_ptc_e_count2 = 1;
203static uint32_t pmap_ptc_e_stride1 = 0;
204static uint32_t pmap_ptc_e_stride2 = 0;
205
206struct mtx pmap_ptc_mutex;
207
208/*
209 * Data for the RID allocator
210 */
211static int pmap_ridcount;
212static int pmap_rididx;
213static int pmap_ridmapsz;
214static int pmap_ridmax;
215static uint64_t *pmap_ridmap;
216struct mtx pmap_ridmutex;
217
218static struct rwlock_padalign pvh_global_lock;
219
220/*
221 * Data for the pv entry allocation mechanism
222 */
223static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
224static int pv_entry_count;
225
226/*
227 * Data for allocating PTEs for user processes.
228 */
229static uma_zone_t ptezone;
230
231/*
232 * Virtual Hash Page Table (VHPT) data.
233 */
234/* SYSCTL_DECL(_machdep); */
235static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
236
237struct ia64_bucket *pmap_vhpt_bucket;
238
239int pmap_vhpt_nbuckets;
240SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
241    &pmap_vhpt_nbuckets, 0, "");
242
243int pmap_vhpt_log2size = 0;
244TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
245SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
246    &pmap_vhpt_log2size, 0, "");
247
248static int pmap_vhpt_inserts;
249SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
250    &pmap_vhpt_inserts, 0, "");
251
252static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
253SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
254    NULL, 0, pmap_vhpt_population, "I", "");
255
256static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
257
258static void free_pv_chunk(struct pv_chunk *pc);
259static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
260static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
261static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
262
263static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
264		    vm_page_t m, vm_prot_t prot);
265static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
266static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
267		    vm_offset_t va, pv_entry_t pv, int freepte);
268static int	pmap_remove_vhpt(vm_offset_t va);
269static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
270		    vm_page_t m);
271
272static void
273pmap_initialize_vhpt(vm_offset_t vhpt)
274{
275	struct ia64_lpte *pte;
276	u_int i;
277
278	pte = (struct ia64_lpte *)vhpt;
279	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
280		pte[i].pte = 0;
281		pte[i].itir = 0;
282		pte[i].tag = 1UL << 63; /* Invalid tag */
283		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
284	}
285}
286
287#ifdef SMP
288vm_offset_t
289pmap_alloc_vhpt(void)
290{
291	vm_offset_t vhpt;
292	vm_page_t m;
293	vm_size_t size;
294
295	size = 1UL << pmap_vhpt_log2size;
296	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
297	    VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
298	    VM_MEMATTR_DEFAULT);
299	if (m != NULL) {
300		vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
301		pmap_initialize_vhpt(vhpt);
302		return (vhpt);
303	}
304	return (0);
305}
306#endif
307
308/*
309 *	Bootstrap the system enough to run with virtual memory.
310 */
311void
312pmap_bootstrap()
313{
314	struct ia64_pal_result res;
315	vm_offset_t base;
316	size_t size;
317	int i, ridbits;
318
319	/*
320	 * Query the PAL Code to find the loop parameters for the
321	 * ptc.e instruction.
322	 */
323	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
324	if (res.pal_status != 0)
325		panic("Can't configure ptc.e parameters");
326	pmap_ptc_e_base = res.pal_result[0];
327	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
328	pmap_ptc_e_count2 = res.pal_result[1];
329	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
330	pmap_ptc_e_stride2 = res.pal_result[2];
331	if (bootverbose)
332		printf("ptc.e base=0x%lx, count1=%u, count2=%u, "
333		       "stride1=0x%x, stride2=0x%x\n",
334		       pmap_ptc_e_base,
335		       pmap_ptc_e_count1,
336		       pmap_ptc_e_count2,
337		       pmap_ptc_e_stride1,
338		       pmap_ptc_e_stride2);
339
340	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
341
342	/*
343	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
344	 *
345	 * We currently need at least 19 bits in the RID because PID_MAX
346	 * can only be encoded in 17 bits and we need RIDs for 4 regions
347	 * per process. With PID_MAX equalling 99999 this means that we
348	 * need to be able to encode 399996 (=4*PID_MAX).
349	 * The Itanium processor only has 18 bits and the architected
350	 * minimum is exactly that. So, we cannot use a PID based scheme
351	 * in those cases. Enter pmap_ridmap...
352	 * We should avoid the map when running on a processor that has
353	 * implemented enough bits. This means that we should pass the
354	 * process/thread ID to pmap. This we currently don't do, so we
355	 * use the map anyway. However, we don't want to allocate a map
356	 * that is large enough to cover the range dictated by the number
357	 * of bits in the RID, because that may result in a RID map of
358	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
359	 * The bottomline: we create a 32KB map when the processor only
360	 * implements 18 bits (or when we can't figure it out). Otherwise
361	 * we create a 64KB map.
362	 */
363	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
364	if (res.pal_status != 0) {
365		if (bootverbose)
366			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
367		ridbits = 18; /* guaranteed minimum */
368	} else {
369		ridbits = (res.pal_result[1] >> 8) & 0xff;
370		if (bootverbose)
371			printf("Processor supports %d Region ID bits\n",
372			    ridbits);
373	}
374	if (ridbits > 19)
375		ridbits = 19;
376
377	pmap_ridmax = (1 << ridbits);
378	pmap_ridmapsz = pmap_ridmax / 64;
379	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
380	pmap_ridmap[0] |= 0xff;
381	pmap_rididx = 0;
382	pmap_ridcount = 8;
383	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
384
385	/*
386	 * Allocate some memory for initial kernel 'page tables'.
387	 */
388	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
389	nkpt = 0;
390	kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
391
392	/*
393	 * Determine a valid (mappable) VHPT size.
394	 */
395	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
396	if (pmap_vhpt_log2size == 0)
397		pmap_vhpt_log2size = 20;
398	else if (pmap_vhpt_log2size < 16)
399		pmap_vhpt_log2size = 16;
400	else if (pmap_vhpt_log2size > 28)
401		pmap_vhpt_log2size = 28;
402	if (pmap_vhpt_log2size & 1)
403		pmap_vhpt_log2size--;
404
405	size = 1UL << pmap_vhpt_log2size;
406	base = (uintptr_t)ia64_physmem_alloc(size, size);
407	if (base == 0)
408		panic("Unable to allocate VHPT");
409
410	PCPU_SET(md.vhpt, base);
411	if (bootverbose)
412		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
413
414	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
415	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
416	    sizeof(struct ia64_bucket), PAGE_SIZE);
417	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
418		/* Stolen memory is zeroed. */
419		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
420		    MTX_NOWITNESS | MTX_SPIN);
421	}
422
423	pmap_initialize_vhpt(base);
424	map_vhpt(base);
425	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
426	ia64_srlz_i();
427
428	virtual_avail = VM_INIT_KERNEL_ADDRESS;
429	virtual_end = VM_MAX_KERNEL_ADDRESS;
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	PMAP_LOCK_INIT(kernel_pmap);
435	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
436		kernel_pmap->pm_rid[i] = 0;
437	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
438	PCPU_SET(md.current_pmap, kernel_pmap);
439
440 	/*
441	 * Initialize the global pv list lock.
442	 */
443	rw_init(&pvh_global_lock, "pmap pv global");
444
445	/* Region 5 is mapped via the VHPT. */
446	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
447
448	/*
449	 * Clear out any random TLB entries left over from booting.
450	 */
451	pmap_invalidate_all();
452
453	map_gateway_page();
454}
455
456static int
457pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
458{
459	int count, error, i;
460
461	count = 0;
462	for (i = 0; i < pmap_vhpt_nbuckets; i++)
463		count += pmap_vhpt_bucket[i].length;
464
465	error = SYSCTL_OUT(req, &count, sizeof(count));
466	return (error);
467}
468
469vm_offset_t
470pmap_page_to_va(vm_page_t m)
471{
472	vm_paddr_t pa;
473	vm_offset_t va;
474
475	pa = VM_PAGE_TO_PHYS(m);
476	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
477	    IA64_PHYS_TO_RR7(pa);
478	return (va);
479}
480
481/*
482 *	Initialize a vm_page's machine-dependent fields.
483 */
484void
485pmap_page_init(vm_page_t m)
486{
487
488	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
489
490	TAILQ_INIT(&m->md.pv_list);
491	m->md.memattr = VM_MEMATTR_DEFAULT;
492}
493
494/*
495 *	Initialize the pmap module.
496 *	Called by vm_init, to initialize any structures that the pmap
497 *	system needs to map virtual memory.
498 */
499void
500pmap_init(void)
501{
502
503	CTR1(KTR_PMAP, "%s()", __func__);
504
505	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
506	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
507}
508
509
510/***************************************************
511 * Manipulate TLBs for a pmap
512 ***************************************************/
513
514static void
515pmap_invalidate_page(vm_offset_t va)
516{
517	struct ia64_lpte *pte;
518	struct pcpu *pc;
519	uint64_t tag;
520	u_int vhpt_ofs;
521
522	critical_enter();
523
524	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
525	tag = ia64_ttag(va);
526	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
527		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
528		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
529	}
530
531	mtx_lock_spin(&pmap_ptc_mutex);
532
533	ia64_ptc_ga(va, PAGE_SHIFT << 2);
534	ia64_mf();
535	ia64_srlz_i();
536
537	mtx_unlock_spin(&pmap_ptc_mutex);
538
539	ia64_invala();
540
541	critical_exit();
542}
543
544void
545pmap_invalidate_all(void)
546{
547	uint64_t addr;
548	int i, j;
549
550	addr = pmap_ptc_e_base;
551	for (i = 0; i < pmap_ptc_e_count1; i++) {
552		for (j = 0; j < pmap_ptc_e_count2; j++) {
553			ia64_ptc_e(addr);
554			addr += pmap_ptc_e_stride2;
555		}
556		addr += pmap_ptc_e_stride1;
557	}
558	ia64_srlz_i();
559}
560
561static uint32_t
562pmap_allocate_rid(void)
563{
564	uint64_t bit, bits;
565	int rid;
566
567	mtx_lock(&pmap_ridmutex);
568	if (pmap_ridcount == pmap_ridmax)
569		panic("pmap_allocate_rid: All Region IDs used");
570
571	/* Find an index with a free bit. */
572	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
573		pmap_rididx++;
574		if (pmap_rididx == pmap_ridmapsz)
575			pmap_rididx = 0;
576	}
577	rid = pmap_rididx * 64;
578
579	/* Find a free bit. */
580	bit = 1UL;
581	while (bits & bit) {
582		rid++;
583		bit <<= 1;
584	}
585
586	pmap_ridmap[pmap_rididx] |= bit;
587	pmap_ridcount++;
588	mtx_unlock(&pmap_ridmutex);
589
590	return rid;
591}
592
593static void
594pmap_free_rid(uint32_t rid)
595{
596	uint64_t bit;
597	int idx;
598
599	idx = rid / 64;
600	bit = ~(1UL << (rid & 63));
601
602	mtx_lock(&pmap_ridmutex);
603	pmap_ridmap[idx] &= bit;
604	pmap_ridcount--;
605	mtx_unlock(&pmap_ridmutex);
606}
607
608/***************************************************
609 * Page table page management routines.....
610 ***************************************************/
611
612static void
613pmap_pinit_common(pmap_t pmap)
614{
615	int i;
616
617	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
618		pmap->pm_rid[i] = pmap_allocate_rid();
619	TAILQ_INIT(&pmap->pm_pvchunk);
620	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
621}
622
623void
624pmap_pinit0(pmap_t pmap)
625{
626
627	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
628
629	PMAP_LOCK_INIT(pmap);
630	pmap_pinit_common(pmap);
631}
632
633/*
634 * Initialize a preallocated and zeroed pmap structure,
635 * such as one in a vmspace structure.
636 */
637int
638pmap_pinit(pmap_t pmap)
639{
640
641	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
642
643	pmap_pinit_common(pmap);
644	return (1);
645}
646
647/***************************************************
648 * Pmap allocation/deallocation routines.
649 ***************************************************/
650
651/*
652 * Release any resources held by the given physical map.
653 * Called when a pmap initialized by pmap_pinit is being released.
654 * Should only be called if the map contains no valid mappings.
655 */
656void
657pmap_release(pmap_t pmap)
658{
659	int i;
660
661	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
662
663	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
664		if (pmap->pm_rid[i])
665			pmap_free_rid(pmap->pm_rid[i]);
666}
667
668/*
669 * grow the number of kernel page table entries, if needed
670 */
671void
672pmap_growkernel(vm_offset_t addr)
673{
674	struct ia64_lpte **dir1;
675	struct ia64_lpte *leaf;
676	vm_page_t nkpg;
677
678	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, addr);
679
680	while (kernel_vm_end <= addr) {
681		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
682			panic("%s: out of kernel address space", __func__);
683
684		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
685		if (dir1 == NULL) {
686			nkpg = vm_page_alloc(NULL, nkpt++,
687			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
688			if (!nkpg)
689				panic("%s: cannot add dir. page", __func__);
690
691			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
692			bzero(dir1, PAGE_SIZE);
693			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
694		}
695
696		nkpg = vm_page_alloc(NULL, nkpt++,
697		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
698		if (!nkpg)
699			panic("%s: cannot add PTE page", __func__);
700
701		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
702		bzero(leaf, PAGE_SIZE);
703		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
704
705		kernel_vm_end += PAGE_SIZE * NKPTEPG;
706	}
707}
708
709/***************************************************
710 * page management routines.
711 ***************************************************/
712
713CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
714
715static __inline struct pv_chunk *
716pv_to_chunk(pv_entry_t pv)
717{
718
719	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
720}
721
722#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
723
724#define	PC_FREE_FULL	0xfffffffffffffffful
725#define	PC_FREE_PARTIAL	\
726	((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
727
728#if PAGE_SIZE == 8192
729static const u_long pc_freemask[_NPCM] = {
730	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
731	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
732};
733#elif PAGE_SIZE == 16384
734static const u_long pc_freemask[_NPCM] = {
735	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
736	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
737	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
738	PC_FREE_FULL, PC_FREE_PARTIAL
739};
740#endif
741
742static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
743
744SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
745    "Current number of pv entries");
746
747#ifdef PV_STATS
748static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
749
750SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
751    "Current number of pv entry chunks");
752SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
753    "Current number of pv entry chunks allocated");
754SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
755    "Current number of pv entry chunks frees");
756SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
757    "Number of times tried to get a chunk page but failed.");
758
759static long pv_entry_frees, pv_entry_allocs;
760static int pv_entry_spare;
761
762SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
763    "Current number of pv entry frees");
764SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
765    "Current number of pv entry allocs");
766SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
767    "Current number of spare pv entries");
768#endif
769
770/*
771 * We are in a serious low memory condition.  Resort to
772 * drastic measures to free some pages so we can allocate
773 * another pv entry chunk.
774 */
775static vm_page_t
776pmap_pv_reclaim(pmap_t locked_pmap)
777{
778	struct pch newtail;
779	struct pv_chunk *pc;
780	struct ia64_lpte *pte;
781	pmap_t pmap;
782	pv_entry_t pv;
783	vm_offset_t va;
784	vm_page_t m, m_pc;
785	u_long inuse;
786	int bit, field, freed, idx;
787
788	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
789	pmap = NULL;
790	m_pc = NULL;
791	TAILQ_INIT(&newtail);
792	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
793		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
794		if (pmap != pc->pc_pmap) {
795			if (pmap != NULL) {
796				if (pmap != locked_pmap) {
797					pmap_switch(locked_pmap);
798					PMAP_UNLOCK(pmap);
799				}
800			}
801			pmap = pc->pc_pmap;
802			/* Avoid deadlock and lock recursion. */
803			if (pmap > locked_pmap)
804				PMAP_LOCK(pmap);
805			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
806				pmap = NULL;
807				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
808				continue;
809			}
810			pmap_switch(pmap);
811		}
812
813		/*
814		 * Destroy every non-wired, 8 KB page mapping in the chunk.
815		 */
816		freed = 0;
817		for (field = 0; field < _NPCM; field++) {
818			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
819			    inuse != 0; inuse &= ~(1UL << bit)) {
820				bit = ffsl(inuse) - 1;
821				idx = field * sizeof(inuse) * NBBY + bit;
822				pv = &pc->pc_pventry[idx];
823				va = pv->pv_va;
824				pte = pmap_find_vhpt(va);
825				KASSERT(pte != NULL, ("pte"));
826				if (pmap_wired(pte))
827					continue;
828				pmap_remove_vhpt(va);
829				pmap_invalidate_page(va);
830				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
831				if (pmap_accessed(pte))
832					vm_page_aflag_set(m, PGA_REFERENCED);
833				if (pmap_dirty(pte))
834					vm_page_dirty(m);
835				pmap_free_pte(pte, va);
836				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
837				if (TAILQ_EMPTY(&m->md.pv_list))
838					vm_page_aflag_clear(m, PGA_WRITEABLE);
839				pc->pc_map[field] |= 1UL << bit;
840				freed++;
841			}
842		}
843		if (freed == 0) {
844			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
845			continue;
846		}
847		/* Every freed mapping is for a 8 KB page. */
848		pmap->pm_stats.resident_count -= freed;
849		PV_STAT(pv_entry_frees += freed);
850		PV_STAT(pv_entry_spare += freed);
851		pv_entry_count -= freed;
852		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
853		for (field = 0; field < _NPCM; field++)
854			if (pc->pc_map[field] != pc_freemask[field]) {
855				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
856				    pc_list);
857				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
858
859				/*
860				 * One freed pv entry in locked_pmap is
861				 * sufficient.
862				 */
863				if (pmap == locked_pmap)
864					goto out;
865				break;
866			}
867		if (field == _NPCM) {
868			PV_STAT(pv_entry_spare -= _NPCPV);
869			PV_STAT(pc_chunk_count--);
870			PV_STAT(pc_chunk_frees++);
871			/* Entire chunk is free; return it. */
872			m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
873			break;
874		}
875	}
876out:
877	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
878	if (pmap != NULL) {
879		if (pmap != locked_pmap) {
880			pmap_switch(locked_pmap);
881			PMAP_UNLOCK(pmap);
882		}
883	}
884	return (m_pc);
885}
886
887/*
888 * free the pv_entry back to the free list
889 */
890static void
891free_pv_entry(pmap_t pmap, pv_entry_t pv)
892{
893	struct pv_chunk *pc;
894	int bit, field, idx;
895
896	rw_assert(&pvh_global_lock, RA_WLOCKED);
897	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
898	PV_STAT(pv_entry_frees++);
899	PV_STAT(pv_entry_spare++);
900	pv_entry_count--;
901	pc = pv_to_chunk(pv);
902	idx = pv - &pc->pc_pventry[0];
903	field = idx / (sizeof(u_long) * NBBY);
904	bit = idx % (sizeof(u_long) * NBBY);
905	pc->pc_map[field] |= 1ul << bit;
906	for (idx = 0; idx < _NPCM; idx++)
907		if (pc->pc_map[idx] != pc_freemask[idx]) {
908			/*
909			 * 98% of the time, pc is already at the head of the
910			 * list.  If it isn't already, move it to the head.
911			 */
912			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
913			    pc)) {
914				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
915				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
916				    pc_list);
917			}
918			return;
919		}
920	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
921	free_pv_chunk(pc);
922}
923
924static void
925free_pv_chunk(struct pv_chunk *pc)
926{
927	vm_page_t m;
928
929 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
930	PV_STAT(pv_entry_spare -= _NPCPV);
931	PV_STAT(pc_chunk_count--);
932	PV_STAT(pc_chunk_frees++);
933	/* entire chunk is free, return it */
934	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
935	vm_page_unwire(m, 0);
936	vm_page_free(m);
937}
938
939/*
940 * get a new pv_entry, allocating a block from the system
941 * when needed.
942 */
943static pv_entry_t
944get_pv_entry(pmap_t pmap, boolean_t try)
945{
946	struct pv_chunk *pc;
947	pv_entry_t pv;
948	vm_page_t m;
949	int bit, field, idx;
950
951	rw_assert(&pvh_global_lock, RA_WLOCKED);
952	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
953	PV_STAT(pv_entry_allocs++);
954	pv_entry_count++;
955retry:
956	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
957	if (pc != NULL) {
958		for (field = 0; field < _NPCM; field++) {
959			if (pc->pc_map[field]) {
960				bit = ffsl(pc->pc_map[field]) - 1;
961				break;
962			}
963		}
964		if (field < _NPCM) {
965			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
966			pv = &pc->pc_pventry[idx];
967			pc->pc_map[field] &= ~(1ul << bit);
968			/* If this was the last item, move it to tail */
969			for (field = 0; field < _NPCM; field++)
970				if (pc->pc_map[field] != 0) {
971					PV_STAT(pv_entry_spare--);
972					return (pv);	/* not full, return */
973				}
974			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
975			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
976			PV_STAT(pv_entry_spare--);
977			return (pv);
978		}
979	}
980	/* No free items, allocate another chunk */
981	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
982	    VM_ALLOC_WIRED);
983	if (m == NULL) {
984		if (try) {
985			pv_entry_count--;
986			PV_STAT(pc_chunk_tryfail++);
987			return (NULL);
988		}
989		m = pmap_pv_reclaim(pmap);
990		if (m == NULL)
991			goto retry;
992	}
993	PV_STAT(pc_chunk_count++);
994	PV_STAT(pc_chunk_allocs++);
995	pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
996	pc->pc_pmap = pmap;
997	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
998	for (field = 1; field < _NPCM; field++)
999		pc->pc_map[field] = pc_freemask[field];
1000	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1001	pv = &pc->pc_pventry[0];
1002	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1003	PV_STAT(pv_entry_spare += _NPCPV - 1);
1004	return (pv);
1005}
1006
1007/*
1008 * Conditionally create a pv entry.
1009 */
1010static boolean_t
1011pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1012{
1013	pv_entry_t pv;
1014
1015	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1016	rw_assert(&pvh_global_lock, RA_WLOCKED);
1017	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1018		pv->pv_va = va;
1019		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1020		return (TRUE);
1021	} else
1022		return (FALSE);
1023}
1024
1025/*
1026 * Add an ia64_lpte to the VHPT.
1027 */
1028static void
1029pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1030{
1031	struct ia64_bucket *bckt;
1032	struct ia64_lpte *vhpte;
1033	uint64_t pte_pa;
1034
1035	/* Can fault, so get it out of the way. */
1036	pte_pa = ia64_tpa((vm_offset_t)pte);
1037
1038	vhpte = (struct ia64_lpte *)ia64_thash(va);
1039	bckt = (struct ia64_bucket *)vhpte->chain;
1040
1041	mtx_lock_spin(&bckt->mutex);
1042	pte->chain = bckt->chain;
1043	ia64_mf();
1044	bckt->chain = pte_pa;
1045
1046	pmap_vhpt_inserts++;
1047	bckt->length++;
1048	mtx_unlock_spin(&bckt->mutex);
1049}
1050
1051/*
1052 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1053 * worked or an appropriate error code otherwise.
1054 */
1055static int
1056pmap_remove_vhpt(vm_offset_t va)
1057{
1058	struct ia64_bucket *bckt;
1059	struct ia64_lpte *pte;
1060	struct ia64_lpte *lpte;
1061	struct ia64_lpte *vhpte;
1062	uint64_t chain, tag;
1063
1064	tag = ia64_ttag(va);
1065	vhpte = (struct ia64_lpte *)ia64_thash(va);
1066	bckt = (struct ia64_bucket *)vhpte->chain;
1067
1068	lpte = NULL;
1069	mtx_lock_spin(&bckt->mutex);
1070	chain = bckt->chain;
1071	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1072	while (chain != 0 && pte->tag != tag) {
1073		lpte = pte;
1074		chain = pte->chain;
1075		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1076	}
1077	if (chain == 0) {
1078		mtx_unlock_spin(&bckt->mutex);
1079		return (ENOENT);
1080	}
1081
1082	/* Snip this pv_entry out of the collision chain. */
1083	if (lpte == NULL)
1084		bckt->chain = pte->chain;
1085	else
1086		lpte->chain = pte->chain;
1087	ia64_mf();
1088
1089	bckt->length--;
1090	mtx_unlock_spin(&bckt->mutex);
1091	return (0);
1092}
1093
1094/*
1095 * Find the ia64_lpte for the given va, if any.
1096 */
1097static struct ia64_lpte *
1098pmap_find_vhpt(vm_offset_t va)
1099{
1100	struct ia64_bucket *bckt;
1101	struct ia64_lpte *pte;
1102	uint64_t chain, tag;
1103
1104	tag = ia64_ttag(va);
1105	pte = (struct ia64_lpte *)ia64_thash(va);
1106	bckt = (struct ia64_bucket *)pte->chain;
1107
1108	mtx_lock_spin(&bckt->mutex);
1109	chain = bckt->chain;
1110	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1111	while (chain != 0 && pte->tag != tag) {
1112		chain = pte->chain;
1113		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1114	}
1115	mtx_unlock_spin(&bckt->mutex);
1116	return ((chain != 0) ? pte : NULL);
1117}
1118
1119/*
1120 * Remove an entry from the list of managed mappings.
1121 */
1122static int
1123pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1124{
1125
1126	rw_assert(&pvh_global_lock, RA_WLOCKED);
1127	if (!pv) {
1128		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1129			if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1130				break;
1131		}
1132	}
1133
1134	if (pv) {
1135		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1136		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1137			vm_page_aflag_clear(m, PGA_WRITEABLE);
1138
1139		free_pv_entry(pmap, pv);
1140		return 0;
1141	} else {
1142		return ENOENT;
1143	}
1144}
1145
1146/*
1147 * Create a pv entry for page at pa for
1148 * (pmap, va).
1149 */
1150static void
1151pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1152{
1153	pv_entry_t pv;
1154
1155	rw_assert(&pvh_global_lock, RA_WLOCKED);
1156	pv = get_pv_entry(pmap, FALSE);
1157	pv->pv_va = va;
1158	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1159}
1160
1161/*
1162 *	Routine:	pmap_extract
1163 *	Function:
1164 *		Extract the physical page address associated
1165 *		with the given map/virtual_address pair.
1166 */
1167vm_paddr_t
1168pmap_extract(pmap_t pmap, vm_offset_t va)
1169{
1170	struct ia64_lpte *pte;
1171	pmap_t oldpmap;
1172	vm_paddr_t pa;
1173
1174	CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, va);
1175
1176	pa = 0;
1177	PMAP_LOCK(pmap);
1178	oldpmap = pmap_switch(pmap);
1179	pte = pmap_find_vhpt(va);
1180	if (pte != NULL && pmap_present(pte))
1181		pa = pmap_ppn(pte);
1182	pmap_switch(oldpmap);
1183	PMAP_UNLOCK(pmap);
1184	return (pa);
1185}
1186
1187/*
1188 *	Routine:	pmap_extract_and_hold
1189 *	Function:
1190 *		Atomically extract and hold the physical page
1191 *		with the given pmap and virtual address pair
1192 *		if that mapping permits the given protection.
1193 */
1194vm_page_t
1195pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1196{
1197	struct ia64_lpte *pte;
1198	pmap_t oldpmap;
1199	vm_page_t m;
1200	vm_paddr_t pa;
1201
1202	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, prot=%#x)", __func__, pmap, va,
1203	    prot);
1204
1205	pa = 0;
1206	m = NULL;
1207	PMAP_LOCK(pmap);
1208	oldpmap = pmap_switch(pmap);
1209retry:
1210	pte = pmap_find_vhpt(va);
1211	if (pte != NULL && pmap_present(pte) &&
1212	    (pmap_prot(pte) & prot) == prot) {
1213		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1214		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1215			goto retry;
1216		vm_page_hold(m);
1217	}
1218	PA_UNLOCK_COND(pa);
1219	pmap_switch(oldpmap);
1220	PMAP_UNLOCK(pmap);
1221	return (m);
1222}
1223
1224/***************************************************
1225 * Low level mapping routines.....
1226 ***************************************************/
1227
1228/*
1229 * Find the kernel lpte for mapping the given virtual address, which
1230 * must be in the part of region 5 which we can cover with our kernel
1231 * 'page tables'.
1232 */
1233static struct ia64_lpte *
1234pmap_find_kpte(vm_offset_t va)
1235{
1236	struct ia64_lpte **dir1;
1237	struct ia64_lpte *leaf;
1238
1239	KASSERT((va >> 61) == 5,
1240		("kernel mapping 0x%lx not in region 5", va));
1241	KASSERT(va < kernel_vm_end,
1242		("kernel mapping 0x%lx out of range", va));
1243
1244	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1245	leaf = dir1[KPTE_DIR1_INDEX(va)];
1246	return (&leaf[KPTE_PTE_INDEX(va)]);
1247}
1248
1249/*
1250 * Find a pte suitable for mapping a user-space address. If one exists
1251 * in the VHPT, that one will be returned, otherwise a new pte is
1252 * allocated.
1253 */
1254static struct ia64_lpte *
1255pmap_find_pte(vm_offset_t va)
1256{
1257	struct ia64_lpte *pte;
1258
1259	if (va >= VM_MAXUSER_ADDRESS)
1260		return pmap_find_kpte(va);
1261
1262	pte = pmap_find_vhpt(va);
1263	if (pte == NULL) {
1264		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1265		pte->tag = 1UL << 63;
1266	}
1267	return (pte);
1268}
1269
1270/*
1271 * Free a pte which is now unused. This simply returns it to the zone
1272 * allocator if it is a user mapping. For kernel mappings, clear the
1273 * valid bit to make it clear that the mapping is not currently used.
1274 */
1275static void
1276pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1277{
1278	if (va < VM_MAXUSER_ADDRESS)
1279		uma_zfree(ptezone, pte);
1280	else
1281		pmap_clear_present(pte);
1282}
1283
1284static PMAP_INLINE void
1285pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1286{
1287	static long prot2ar[4] = {
1288		PTE_AR_R,		/* VM_PROT_NONE */
1289		PTE_AR_RW,		/* VM_PROT_WRITE */
1290		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1291		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1292	};
1293
1294	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1295	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1296	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1297	    ? PTE_PL_KERN : PTE_PL_USER;
1298	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1299}
1300
1301static PMAP_INLINE void
1302pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1303{
1304
1305	pte->pte &= ~PTE_MA_MASK;
1306	pte->pte |= (ma & PTE_MA_MASK);
1307}
1308
1309/*
1310 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1311 * the pte was orginally valid, then its assumed to already be in the
1312 * VHPT.
1313 * This functions does not set the protection bits.  It's expected
1314 * that those have been set correctly prior to calling this function.
1315 */
1316static void
1317pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1318    boolean_t wired, boolean_t managed)
1319{
1320
1321	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1322	    PTE_AR_MASK | PTE_ED;
1323	pte->pte |= PTE_PRESENT;
1324	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1325	pte->pte |= (wired) ? PTE_WIRED : 0;
1326	pte->pte |= pa & PTE_PPN_MASK;
1327
1328	pte->itir = PAGE_SHIFT << 2;
1329
1330	ia64_mf();
1331
1332	pte->tag = ia64_ttag(va);
1333}
1334
1335/*
1336 * Remove the (possibly managed) mapping represented by pte from the
1337 * given pmap.
1338 */
1339static int
1340pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1341		pv_entry_t pv, int freepte)
1342{
1343	int error;
1344	vm_page_t m;
1345
1346	/*
1347	 * First remove from the VHPT.
1348	 */
1349	error = pmap_remove_vhpt(va);
1350	KASSERT(error == 0, ("%s: pmap_remove_vhpt returned %d",
1351	    __func__, error));
1352
1353	pmap_invalidate_page(va);
1354
1355	if (pmap_wired(pte))
1356		pmap->pm_stats.wired_count -= 1;
1357
1358	pmap->pm_stats.resident_count -= 1;
1359	if (pmap_managed(pte)) {
1360		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1361		if (pmap_dirty(pte))
1362			vm_page_dirty(m);
1363		if (pmap_accessed(pte))
1364			vm_page_aflag_set(m, PGA_REFERENCED);
1365
1366		error = pmap_remove_entry(pmap, m, va, pv);
1367	}
1368	if (freepte)
1369		pmap_free_pte(pte, va);
1370
1371	return (error);
1372}
1373
1374/*
1375 * Extract the physical page address associated with a kernel
1376 * virtual address.
1377 */
1378vm_paddr_t
1379pmap_kextract(vm_offset_t va)
1380{
1381	struct ia64_lpte *pte;
1382	uint64_t *pbvm_pgtbl;
1383	vm_paddr_t pa;
1384	u_int idx;
1385
1386	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1387
1388	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1389
1390	/* Regions 6 and 7 are direct mapped. */
1391	if (va >= IA64_RR_BASE(6)) {
1392		pa = IA64_RR_MASK(va);
1393		goto out;
1394	}
1395
1396	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1397	if (va >= kernel_vm_end)
1398		goto err_out;
1399	if (va >= VM_INIT_KERNEL_ADDRESS) {
1400		pte = pmap_find_kpte(va);
1401		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1402		goto out;
1403	}
1404
1405	/* The PBVM page table. */
1406	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1407		goto err_out;
1408	if (va >= IA64_PBVM_PGTBL) {
1409		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1410		goto out;
1411	}
1412
1413	/* The PBVM itself. */
1414	if (va >= IA64_PBVM_BASE) {
1415		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1416		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1417		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1418			goto err_out;
1419		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1420			goto err_out;
1421		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1422		    (va & IA64_PBVM_PAGE_MASK);
1423		goto out;
1424	}
1425
1426 err_out:
1427	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1428	pa = 0;
1429	/* FALLTHROUGH */
1430
1431 out:
1432	return (pa);
1433}
1434
1435/*
1436 * Add a list of wired pages to the kva this routine is only used for
1437 * temporary kernel mappings that do not need to have page modification
1438 * or references recorded.  Note that old mappings are simply written
1439 * over.  The page is effectively wired, but it's customary to not have
1440 * the PTE reflect that, nor update statistics.
1441 */
1442void
1443pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1444{
1445	struct ia64_lpte *pte;
1446	int i;
1447
1448	CTR4(KTR_PMAP, "%s(va=%#lx, m_p=%p, cnt=%d)", __func__, va, m, count);
1449
1450	for (i = 0; i < count; i++) {
1451		pte = pmap_find_kpte(va);
1452		if (pmap_present(pte))
1453			pmap_invalidate_page(va);
1454		else
1455			pmap_enter_vhpt(pte, va);
1456		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1457		pmap_pte_attr(pte, m[i]->md.memattr);
1458		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1459		va += PAGE_SIZE;
1460	}
1461}
1462
1463/*
1464 * this routine jerks page mappings from the
1465 * kernel -- it is meant only for temporary mappings.
1466 */
1467void
1468pmap_qremove(vm_offset_t va, int count)
1469{
1470	struct ia64_lpte *pte;
1471	int i;
1472
1473	CTR3(KTR_PMAP, "%s(va=%#lx, cnt=%d)", __func__, va, count);
1474
1475	for (i = 0; i < count; i++) {
1476		pte = pmap_find_kpte(va);
1477		if (pmap_present(pte)) {
1478			pmap_remove_vhpt(va);
1479			pmap_invalidate_page(va);
1480			pmap_clear_present(pte);
1481		}
1482		va += PAGE_SIZE;
1483	}
1484}
1485
1486/*
1487 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1488 * to not have the PTE reflect that, nor update statistics.
1489 */
1490void
1491pmap_kenter(vm_offset_t va, vm_paddr_t pa)
1492{
1493	struct ia64_lpte *pte;
1494
1495	CTR3(KTR_PMAP, "%s(va=%#lx, pa=%#lx)", __func__, va, pa);
1496
1497	pte = pmap_find_kpte(va);
1498	if (pmap_present(pte))
1499		pmap_invalidate_page(va);
1500	else
1501		pmap_enter_vhpt(pte, va);
1502	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1503	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1504	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1505}
1506
1507/*
1508 * Remove a page from the kva
1509 */
1510void
1511pmap_kremove(vm_offset_t va)
1512{
1513	struct ia64_lpte *pte;
1514
1515	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1516
1517	pte = pmap_find_kpte(va);
1518	if (pmap_present(pte)) {
1519		pmap_remove_vhpt(va);
1520		pmap_invalidate_page(va);
1521		pmap_clear_present(pte);
1522	}
1523}
1524
1525/*
1526 *	Used to map a range of physical addresses into kernel
1527 *	virtual address space.
1528 *
1529 *	The value passed in '*virt' is a suggested virtual address for
1530 *	the mapping. Architectures which can support a direct-mapped
1531 *	physical to virtual region can return the appropriate address
1532 *	within that region, leaving '*virt' unchanged. Other
1533 *	architectures should map the pages starting at '*virt' and
1534 *	update '*virt' with the first usable address after the mapped
1535 *	region.
1536 */
1537vm_offset_t
1538pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1539{
1540
1541	CTR5(KTR_PMAP, "%s(va_p=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1542	    virt, start, end, prot);
1543
1544	return IA64_PHYS_TO_RR7(start);
1545}
1546
1547/*
1548 *	Remove the given range of addresses from the specified map.
1549 *
1550 *	It is assumed that the start and end are properly
1551 *	rounded to the page size.
1552 *
1553 *	Sparsely used ranges are inefficiently removed.  The VHPT is
1554 *	probed for every page within the range.  XXX
1555 */
1556void
1557pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1558{
1559	pmap_t oldpmap;
1560	vm_offset_t va;
1561	struct ia64_lpte *pte;
1562
1563	CTR4(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx)", __func__, pmap, sva,
1564	    eva);
1565
1566	/*
1567	 * Perform an unsynchronized read.  This is, however, safe.
1568	 */
1569	if (pmap->pm_stats.resident_count == 0)
1570		return;
1571
1572	rw_wlock(&pvh_global_lock);
1573	PMAP_LOCK(pmap);
1574	oldpmap = pmap_switch(pmap);
1575	for (va = sva; va < eva; va += PAGE_SIZE) {
1576		pte = pmap_find_vhpt(va);
1577		if (pte != NULL)
1578			pmap_remove_pte(pmap, pte, va, 0, 1);
1579	}
1580	rw_wunlock(&pvh_global_lock);
1581	pmap_switch(oldpmap);
1582	PMAP_UNLOCK(pmap);
1583}
1584
1585/*
1586 *	Routine:	pmap_remove_all
1587 *	Function:
1588 *		Removes this physical page from
1589 *		all physical maps in which it resides.
1590 *		Reflects back modify bits to the pager.
1591 *
1592 *	Notes:
1593 *		Original versions of this routine were very
1594 *		inefficient because they iteratively called
1595 *		pmap_remove (slow...)
1596 */
1597void
1598pmap_remove_all(vm_page_t m)
1599{
1600	pmap_t oldpmap;
1601	pv_entry_t pv;
1602
1603	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
1604
1605	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1606	    ("pmap_remove_all: page %p is not managed", m));
1607	rw_wlock(&pvh_global_lock);
1608	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1609		struct ia64_lpte *pte;
1610		pmap_t pmap = PV_PMAP(pv);
1611		vm_offset_t va = pv->pv_va;
1612
1613		PMAP_LOCK(pmap);
1614		oldpmap = pmap_switch(pmap);
1615		pte = pmap_find_vhpt(va);
1616		KASSERT(pte != NULL, ("pte"));
1617		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1618			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1619		pmap_remove_pte(pmap, pte, va, pv, 1);
1620		pmap_switch(oldpmap);
1621		PMAP_UNLOCK(pmap);
1622	}
1623	vm_page_aflag_clear(m, PGA_WRITEABLE);
1624	rw_wunlock(&pvh_global_lock);
1625}
1626
1627/*
1628 *	Set the physical protection on the
1629 *	specified range of this map as requested.
1630 */
1631void
1632pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1633{
1634	pmap_t oldpmap;
1635	struct ia64_lpte *pte;
1636
1637	CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1638	    pmap, sva, eva, prot);
1639
1640	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1641		pmap_remove(pmap, sva, eva);
1642		return;
1643	}
1644
1645	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1646	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1647		return;
1648
1649	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1650		panic("pmap_protect: unaligned addresses");
1651
1652	PMAP_LOCK(pmap);
1653	oldpmap = pmap_switch(pmap);
1654	for ( ; sva < eva; sva += PAGE_SIZE) {
1655		/* If page is invalid, skip this page */
1656		pte = pmap_find_vhpt(sva);
1657		if (pte == NULL)
1658			continue;
1659
1660		/* If there's no change, skip it too */
1661		if (pmap_prot(pte) == prot)
1662			continue;
1663
1664		if ((prot & VM_PROT_WRITE) == 0 &&
1665		    pmap_managed(pte) && pmap_dirty(pte)) {
1666			vm_paddr_t pa = pmap_ppn(pte);
1667			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1668
1669			vm_page_dirty(m);
1670			pmap_clear_dirty(pte);
1671		}
1672
1673		if (prot & VM_PROT_EXECUTE)
1674			ia64_sync_icache(sva, PAGE_SIZE);
1675
1676		pmap_pte_prot(pmap, pte, prot);
1677		pmap_invalidate_page(sva);
1678	}
1679	pmap_switch(oldpmap);
1680	PMAP_UNLOCK(pmap);
1681}
1682
1683/*
1684 *	Insert the given physical page (p) at
1685 *	the specified virtual address (v) in the
1686 *	target physical map with the protection requested.
1687 *
1688 *	If specified, the page will be wired down, meaning
1689 *	that the related pte can not be reclaimed.
1690 *
1691 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1692 *	or lose information.  That is, this routine must actually
1693 *	insert this page into the given map NOW.
1694 */
1695int
1696pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1697    u_int flags, int8_t psind __unused)
1698{
1699	pmap_t oldpmap;
1700	vm_offset_t pa;
1701	vm_offset_t opa;
1702	struct ia64_lpte origpte;
1703	struct ia64_lpte *pte;
1704	boolean_t icache_inval, managed, wired;
1705
1706	CTR5(KTR_PMAP, "pmap_enter(pm=%p, va=%#lx, m=%p, prot=%#x, "
1707	    "flags=%u)", pmap, va, m, prot, flags);
1708
1709	wired = (flags & PMAP_ENTER_WIRED) != 0;
1710	rw_wlock(&pvh_global_lock);
1711	PMAP_LOCK(pmap);
1712	oldpmap = pmap_switch(pmap);
1713
1714	va &= ~PAGE_MASK;
1715 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1716	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
1717	    ("pmap_enter: page %p is not busy", m));
1718
1719	/*
1720	 * Find (or create) a pte for the given mapping.
1721	 */
1722	while ((pte = pmap_find_pte(va)) == NULL) {
1723		pmap_switch(oldpmap);
1724		PMAP_UNLOCK(pmap);
1725		rw_wunlock(&pvh_global_lock);
1726		if ((flags & PMAP_ENTER_NOSLEEP) != 0)
1727			return (KERN_RESOURCE_SHORTAGE);
1728		VM_WAIT;
1729		rw_wlock(&pvh_global_lock);
1730		PMAP_LOCK(pmap);
1731		oldpmap = pmap_switch(pmap);
1732	}
1733	origpte = *pte;
1734	if (!pmap_present(pte)) {
1735		opa = ~0UL;
1736		pmap_enter_vhpt(pte, va);
1737	} else
1738		opa = pmap_ppn(pte);
1739	managed = FALSE;
1740	pa = VM_PAGE_TO_PHYS(m);
1741
1742	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1743
1744	/*
1745	 * Mapping has not changed, must be protection or wiring change.
1746	 */
1747	if (opa == pa) {
1748		/*
1749		 * Wiring change, just update stats. We don't worry about
1750		 * wiring PT pages as they remain resident as long as there
1751		 * are valid mappings in them. Hence, if a user page is wired,
1752		 * the PT page will be also.
1753		 */
1754		if (wired && !pmap_wired(&origpte))
1755			pmap->pm_stats.wired_count++;
1756		else if (!wired && pmap_wired(&origpte))
1757			pmap->pm_stats.wired_count--;
1758
1759		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1760
1761		/*
1762		 * We might be turning off write access to the page,
1763		 * so we go ahead and sense modify status. Otherwise,
1764		 * we can avoid I-cache invalidation if the page
1765		 * already allowed execution.
1766		 */
1767		if (managed && pmap_dirty(&origpte))
1768			vm_page_dirty(m);
1769		else if (pmap_exec(&origpte))
1770			icache_inval = FALSE;
1771
1772		pmap_invalidate_page(va);
1773		goto validate;
1774	}
1775
1776	/*
1777	 * Mapping has changed, invalidate old range and fall
1778	 * through to handle validating new mapping.
1779	 */
1780	if (opa != ~0UL) {
1781		pmap_remove_pte(pmap, pte, va, 0, 0);
1782		pmap_enter_vhpt(pte, va);
1783	}
1784
1785	/*
1786	 * Enter on the PV list if part of our managed memory.
1787	 */
1788	if ((m->oflags & VPO_UNMANAGED) == 0) {
1789		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1790		    ("pmap_enter: managed mapping within the clean submap"));
1791		pmap_insert_entry(pmap, va, m);
1792		managed = TRUE;
1793	}
1794
1795	/*
1796	 * Increment counters
1797	 */
1798	pmap->pm_stats.resident_count++;
1799	if (wired)
1800		pmap->pm_stats.wired_count++;
1801
1802validate:
1803
1804	/*
1805	 * Now validate mapping with desired protection/wiring. This
1806	 * adds the pte to the VHPT if necessary.
1807	 */
1808	pmap_pte_prot(pmap, pte, prot);
1809	pmap_pte_attr(pte, m->md.memattr);
1810	pmap_set_pte(pte, va, pa, wired, managed);
1811
1812	/* Invalidate the I-cache when needed. */
1813	if (icache_inval)
1814		ia64_sync_icache(va, PAGE_SIZE);
1815
1816	if ((prot & VM_PROT_WRITE) != 0 && managed)
1817		vm_page_aflag_set(m, PGA_WRITEABLE);
1818	rw_wunlock(&pvh_global_lock);
1819	pmap_switch(oldpmap);
1820	PMAP_UNLOCK(pmap);
1821	return (KERN_SUCCESS);
1822}
1823
1824/*
1825 * Maps a sequence of resident pages belonging to the same object.
1826 * The sequence begins with the given page m_start.  This page is
1827 * mapped at the given virtual address start.  Each subsequent page is
1828 * mapped at a virtual address that is offset from start by the same
1829 * amount as the page is offset from m_start within the object.  The
1830 * last page in the sequence is the page with the largest offset from
1831 * m_start that can be mapped at a virtual address less than the given
1832 * virtual address end.  Not every virtual page between start and end
1833 * is mapped; only those for which a resident page exists with the
1834 * corresponding offset from m_start are mapped.
1835 */
1836void
1837pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1838    vm_page_t m_start, vm_prot_t prot)
1839{
1840	pmap_t oldpmap;
1841	vm_page_t m;
1842	vm_pindex_t diff, psize;
1843
1844	CTR6(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, m=%p, prot=%#x)",
1845	    __func__, pmap, start, end, m_start, prot);
1846
1847	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1848
1849	psize = atop(end - start);
1850	m = m_start;
1851	rw_wlock(&pvh_global_lock);
1852	PMAP_LOCK(pmap);
1853	oldpmap = pmap_switch(pmap);
1854	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1855		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1856		m = TAILQ_NEXT(m, listq);
1857	}
1858	rw_wunlock(&pvh_global_lock);
1859	pmap_switch(oldpmap);
1860 	PMAP_UNLOCK(pmap);
1861}
1862
1863/*
1864 * this code makes some *MAJOR* assumptions:
1865 * 1. Current pmap & pmap exists.
1866 * 2. Not wired.
1867 * 3. Read access.
1868 * 4. No page table pages.
1869 * but is *MUCH* faster than pmap_enter...
1870 */
1871void
1872pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1873{
1874	pmap_t oldpmap;
1875
1876	CTR5(KTR_PMAP, "%s(pm=%p, va=%#lx, m=%p, prot=%#x)", __func__, pmap,
1877	    va, m, prot);
1878
1879	rw_wlock(&pvh_global_lock);
1880	PMAP_LOCK(pmap);
1881	oldpmap = pmap_switch(pmap);
1882	pmap_enter_quick_locked(pmap, va, m, prot);
1883	rw_wunlock(&pvh_global_lock);
1884	pmap_switch(oldpmap);
1885	PMAP_UNLOCK(pmap);
1886}
1887
1888static void
1889pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1890    vm_prot_t prot)
1891{
1892	struct ia64_lpte *pte;
1893	boolean_t managed;
1894
1895	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1896	    (m->oflags & VPO_UNMANAGED) != 0,
1897	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1898	rw_assert(&pvh_global_lock, RA_WLOCKED);
1899	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1900
1901	if ((pte = pmap_find_pte(va)) == NULL)
1902		return;
1903
1904	if (!pmap_present(pte)) {
1905		/* Enter on the PV list if the page is managed. */
1906		if ((m->oflags & VPO_UNMANAGED) == 0) {
1907			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1908				pmap_free_pte(pte, va);
1909				return;
1910			}
1911			managed = TRUE;
1912		} else
1913			managed = FALSE;
1914
1915		/* Increment counters. */
1916		pmap->pm_stats.resident_count++;
1917
1918		/* Initialise with R/O protection and enter into VHPT. */
1919		pmap_enter_vhpt(pte, va);
1920		pmap_pte_prot(pmap, pte,
1921		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1922		pmap_pte_attr(pte, m->md.memattr);
1923		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1924
1925		if (prot & VM_PROT_EXECUTE)
1926			ia64_sync_icache(va, PAGE_SIZE);
1927	}
1928}
1929
1930/*
1931 * pmap_object_init_pt preloads the ptes for a given object
1932 * into the specified pmap.  This eliminates the blast of soft
1933 * faults on process startup and immediately after an mmap.
1934 */
1935void
1936pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
1937    vm_pindex_t pindex, vm_size_t size)
1938{
1939
1940	CTR6(KTR_PMAP, "%s(pm=%p, va=%#lx, obj=%p, idx=%lu, sz=%#lx)",
1941	    __func__, pmap, addr, object, pindex, size);
1942
1943	VM_OBJECT_ASSERT_WLOCKED(object);
1944	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1945	    ("pmap_object_init_pt: non-device object"));
1946}
1947
1948/*
1949 *	Routine:	pmap_change_wiring
1950 *	Function:	Change the wiring attribute for a map/virtual-address
1951 *			pair.
1952 *	In/out conditions:
1953 *			The mapping must already exist in the pmap.
1954 */
1955void
1956pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
1957{
1958	pmap_t oldpmap;
1959	struct ia64_lpte *pte;
1960
1961	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, wired=%u)", __func__, pmap, va,
1962	    wired);
1963
1964	PMAP_LOCK(pmap);
1965	oldpmap = pmap_switch(pmap);
1966
1967	pte = pmap_find_vhpt(va);
1968	KASSERT(pte != NULL, ("pte"));
1969	if (wired && !pmap_wired(pte)) {
1970		pmap->pm_stats.wired_count++;
1971		pmap_set_wired(pte);
1972	} else if (!wired && pmap_wired(pte)) {
1973		pmap->pm_stats.wired_count--;
1974		pmap_clear_wired(pte);
1975	}
1976
1977	pmap_switch(oldpmap);
1978	PMAP_UNLOCK(pmap);
1979}
1980
1981/*
1982 *	Copy the range specified by src_addr/len
1983 *	from the source map to the range dst_addr/len
1984 *	in the destination map.
1985 *
1986 *	This routine is only advisory and need not do anything.
1987 */
1988void
1989pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_va, vm_size_t len,
1990    vm_offset_t src_va)
1991{
1992
1993	CTR6(KTR_PMAP, "%s(dpm=%p, spm=%p, dva=%#lx, sz=%#lx, sva=%#lx)",
1994	    __func__, dst_pmap, src_pmap, dst_va, len, src_va);
1995}
1996
1997/*
1998 *	pmap_zero_page zeros the specified hardware page by
1999 *	mapping it into virtual memory and using bzero to clear
2000 *	its contents.
2001 */
2002void
2003pmap_zero_page(vm_page_t m)
2004{
2005	void *p;
2006
2007	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2008
2009	p = (void *)pmap_page_to_va(m);
2010	bzero(p, PAGE_SIZE);
2011}
2012
2013/*
2014 *	pmap_zero_page_area zeros the specified hardware page by
2015 *	mapping it into virtual memory and using bzero to clear
2016 *	its contents.
2017 *
2018 *	off and size must reside within a single page.
2019 */
2020void
2021pmap_zero_page_area(vm_page_t m, int off, int size)
2022{
2023	char *p;
2024
2025	CTR4(KTR_PMAP, "%s(m=%p, ofs=%d, len=%d)", __func__, m, off, size);
2026
2027	p = (void *)pmap_page_to_va(m);
2028	bzero(p + off, size);
2029}
2030
2031/*
2032 *	pmap_zero_page_idle zeros the specified hardware page by
2033 *	mapping it into virtual memory and using bzero to clear
2034 *	its contents.  This is for the vm_idlezero process.
2035 */
2036void
2037pmap_zero_page_idle(vm_page_t m)
2038{
2039	void *p;
2040
2041	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2042
2043	p = (void *)pmap_page_to_va(m);
2044	bzero(p, PAGE_SIZE);
2045}
2046
2047/*
2048 *	pmap_copy_page copies the specified (machine independent)
2049 *	page by mapping the page into virtual memory and using
2050 *	bcopy to copy the page, one machine dependent page at a
2051 *	time.
2052 */
2053void
2054pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2055{
2056	void *dst, *src;
2057
2058	CTR3(KTR_PMAP, "%s(sm=%p, dm=%p)", __func__, msrc, mdst);
2059
2060	src = (void *)pmap_page_to_va(msrc);
2061	dst = (void *)pmap_page_to_va(mdst);
2062	bcopy(src, dst, PAGE_SIZE);
2063}
2064
2065void
2066pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2067    vm_offset_t b_offset, int xfersize)
2068{
2069	void *a_cp, *b_cp;
2070	vm_offset_t a_pg_offset, b_pg_offset;
2071	int cnt;
2072
2073	CTR6(KTR_PMAP, "%s(m0=%p, va0=%#lx, m1=%p, va1=%#lx, sz=%#x)",
2074	    __func__, ma, a_offset, mb, b_offset, xfersize);
2075
2076	while (xfersize > 0) {
2077		a_pg_offset = a_offset & PAGE_MASK;
2078		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2079		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2080		    a_pg_offset;
2081		b_pg_offset = b_offset & PAGE_MASK;
2082		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2083		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2084		    b_pg_offset;
2085		bcopy(a_cp, b_cp, cnt);
2086		a_offset += cnt;
2087		b_offset += cnt;
2088		xfersize -= cnt;
2089	}
2090}
2091
2092/*
2093 * Returns true if the pmap's pv is one of the first
2094 * 16 pvs linked to from this page.  This count may
2095 * be changed upwards or downwards in the future; it
2096 * is only necessary that true be returned for a small
2097 * subset of pmaps for proper page aging.
2098 */
2099boolean_t
2100pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2101{
2102	pv_entry_t pv;
2103	int loops = 0;
2104	boolean_t rv;
2105
2106	CTR3(KTR_PMAP, "%s(pm=%p, m=%p)", __func__, pmap, m);
2107
2108	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2109	    ("pmap_page_exists_quick: page %p is not managed", m));
2110	rv = FALSE;
2111	rw_wlock(&pvh_global_lock);
2112	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2113		if (PV_PMAP(pv) == pmap) {
2114			rv = TRUE;
2115			break;
2116		}
2117		loops++;
2118		if (loops >= 16)
2119			break;
2120	}
2121	rw_wunlock(&pvh_global_lock);
2122	return (rv);
2123}
2124
2125/*
2126 *	pmap_page_wired_mappings:
2127 *
2128 *	Return the number of managed mappings to the given physical page
2129 *	that are wired.
2130 */
2131int
2132pmap_page_wired_mappings(vm_page_t m)
2133{
2134	struct ia64_lpte *pte;
2135	pmap_t oldpmap, pmap;
2136	pv_entry_t pv;
2137	int count;
2138
2139	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2140
2141	count = 0;
2142	if ((m->oflags & VPO_UNMANAGED) != 0)
2143		return (count);
2144	rw_wlock(&pvh_global_lock);
2145	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2146		pmap = PV_PMAP(pv);
2147		PMAP_LOCK(pmap);
2148		oldpmap = pmap_switch(pmap);
2149		pte = pmap_find_vhpt(pv->pv_va);
2150		KASSERT(pte != NULL, ("pte"));
2151		if (pmap_wired(pte))
2152			count++;
2153		pmap_switch(oldpmap);
2154		PMAP_UNLOCK(pmap);
2155	}
2156	rw_wunlock(&pvh_global_lock);
2157	return (count);
2158}
2159
2160/*
2161 * Remove all pages from specified address space
2162 * this aids process exit speeds.  Also, this code
2163 * is special cased for current process only, but
2164 * can have the more generic (and slightly slower)
2165 * mode enabled.  This is much faster than pmap_remove
2166 * in the case of running down an entire address space.
2167 */
2168void
2169pmap_remove_pages(pmap_t pmap)
2170{
2171	struct pv_chunk *pc, *npc;
2172	struct ia64_lpte *pte;
2173	pmap_t oldpmap;
2174	pv_entry_t pv;
2175	vm_offset_t va;
2176	vm_page_t m;
2177	u_long inuse, bitmask;
2178	int allfree, bit, field, idx;
2179
2180	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
2181
2182	rw_wlock(&pvh_global_lock);
2183	PMAP_LOCK(pmap);
2184	oldpmap = pmap_switch(pmap);
2185	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2186		allfree = 1;
2187		for (field = 0; field < _NPCM; field++) {
2188			inuse = ~pc->pc_map[field] & pc_freemask[field];
2189			while (inuse != 0) {
2190				bit = ffsl(inuse) - 1;
2191				bitmask = 1UL << bit;
2192				idx = field * sizeof(inuse) * NBBY + bit;
2193				pv = &pc->pc_pventry[idx];
2194				inuse &= ~bitmask;
2195				va = pv->pv_va;
2196				pte = pmap_find_vhpt(va);
2197				KASSERT(pte != NULL, ("pte"));
2198				if (pmap_wired(pte)) {
2199					allfree = 0;
2200					continue;
2201				}
2202				pmap_remove_vhpt(va);
2203				pmap_invalidate_page(va);
2204				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2205				if (pmap_dirty(pte))
2206					vm_page_dirty(m);
2207				pmap_free_pte(pte, va);
2208				/* Mark free */
2209				PV_STAT(pv_entry_frees++);
2210				PV_STAT(pv_entry_spare++);
2211				pv_entry_count--;
2212				pc->pc_map[field] |= bitmask;
2213				pmap->pm_stats.resident_count--;
2214				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2215				if (TAILQ_EMPTY(&m->md.pv_list))
2216					vm_page_aflag_clear(m, PGA_WRITEABLE);
2217			}
2218		}
2219		if (allfree) {
2220			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2221			free_pv_chunk(pc);
2222		}
2223	}
2224	pmap_switch(oldpmap);
2225	PMAP_UNLOCK(pmap);
2226	rw_wunlock(&pvh_global_lock);
2227}
2228
2229/*
2230 *	pmap_ts_referenced:
2231 *
2232 *	Return a count of reference bits for a page, clearing those bits.
2233 *	It is not necessary for every reference bit to be cleared, but it
2234 *	is necessary that 0 only be returned when there are truly no
2235 *	reference bits set.
2236 *
2237 *	XXX: The exact number of bits to check and clear is a matter that
2238 *	should be tested and standardized at some point in the future for
2239 *	optimal aging of shared pages.
2240 */
2241int
2242pmap_ts_referenced(vm_page_t m)
2243{
2244	struct ia64_lpte *pte;
2245	pmap_t oldpmap, pmap;
2246	pv_entry_t pv;
2247	int count = 0;
2248
2249	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2250
2251	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2252	    ("pmap_ts_referenced: page %p is not managed", m));
2253	rw_wlock(&pvh_global_lock);
2254	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2255		pmap = PV_PMAP(pv);
2256		PMAP_LOCK(pmap);
2257		oldpmap = pmap_switch(pmap);
2258		pte = pmap_find_vhpt(pv->pv_va);
2259		KASSERT(pte != NULL, ("pte"));
2260		if (pmap_accessed(pte)) {
2261			count++;
2262			pmap_clear_accessed(pte);
2263			pmap_invalidate_page(pv->pv_va);
2264		}
2265		pmap_switch(oldpmap);
2266		PMAP_UNLOCK(pmap);
2267	}
2268	rw_wunlock(&pvh_global_lock);
2269	return (count);
2270}
2271
2272/*
2273 *	pmap_is_modified:
2274 *
2275 *	Return whether or not the specified physical page was modified
2276 *	in any physical maps.
2277 */
2278boolean_t
2279pmap_is_modified(vm_page_t m)
2280{
2281	struct ia64_lpte *pte;
2282	pmap_t oldpmap, pmap;
2283	pv_entry_t pv;
2284	boolean_t rv;
2285
2286	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2287
2288	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2289	    ("pmap_is_modified: page %p is not managed", m));
2290	rv = FALSE;
2291
2292	/*
2293	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2294	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2295	 * is clear, no PTEs can be dirty.
2296	 */
2297	VM_OBJECT_ASSERT_WLOCKED(m->object);
2298	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2299		return (rv);
2300	rw_wlock(&pvh_global_lock);
2301	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2302		pmap = PV_PMAP(pv);
2303		PMAP_LOCK(pmap);
2304		oldpmap = pmap_switch(pmap);
2305		pte = pmap_find_vhpt(pv->pv_va);
2306		pmap_switch(oldpmap);
2307		KASSERT(pte != NULL, ("pte"));
2308		rv = pmap_dirty(pte) ? TRUE : FALSE;
2309		PMAP_UNLOCK(pmap);
2310		if (rv)
2311			break;
2312	}
2313	rw_wunlock(&pvh_global_lock);
2314	return (rv);
2315}
2316
2317/*
2318 *	pmap_is_prefaultable:
2319 *
2320 *	Return whether or not the specified virtual address is elgible
2321 *	for prefault.
2322 */
2323boolean_t
2324pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2325{
2326	struct ia64_lpte *pte;
2327
2328	CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, addr);
2329
2330	pte = pmap_find_vhpt(addr);
2331	if (pte != NULL && pmap_present(pte))
2332		return (FALSE);
2333	return (TRUE);
2334}
2335
2336/*
2337 *	pmap_is_referenced:
2338 *
2339 *	Return whether or not the specified physical page was referenced
2340 *	in any physical maps.
2341 */
2342boolean_t
2343pmap_is_referenced(vm_page_t m)
2344{
2345	struct ia64_lpte *pte;
2346	pmap_t oldpmap, pmap;
2347	pv_entry_t pv;
2348	boolean_t rv;
2349
2350	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2351
2352	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2353	    ("pmap_is_referenced: page %p is not managed", m));
2354	rv = FALSE;
2355	rw_wlock(&pvh_global_lock);
2356	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2357		pmap = PV_PMAP(pv);
2358		PMAP_LOCK(pmap);
2359		oldpmap = pmap_switch(pmap);
2360		pte = pmap_find_vhpt(pv->pv_va);
2361		pmap_switch(oldpmap);
2362		KASSERT(pte != NULL, ("pte"));
2363		rv = pmap_accessed(pte) ? TRUE : FALSE;
2364		PMAP_UNLOCK(pmap);
2365		if (rv)
2366			break;
2367	}
2368	rw_wunlock(&pvh_global_lock);
2369	return (rv);
2370}
2371
2372/*
2373 *	Apply the given advice to the specified range of addresses within the
2374 *	given pmap.  Depending on the advice, clear the referenced and/or
2375 *	modified flags in each mapping and set the mapped page's dirty field.
2376 */
2377void
2378pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2379{
2380	struct ia64_lpte *pte;
2381	pmap_t oldpmap;
2382	vm_page_t m;
2383
2384	CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, adv=%d)", __func__,
2385	    pmap, sva, eva, advice);
2386
2387	PMAP_LOCK(pmap);
2388	oldpmap = pmap_switch(pmap);
2389	for (; sva < eva; sva += PAGE_SIZE) {
2390		/* If page is invalid, skip this page. */
2391		pte = pmap_find_vhpt(sva);
2392		if (pte == NULL)
2393			continue;
2394
2395		/* If it isn't managed, skip it too. */
2396		if (!pmap_managed(pte))
2397			continue;
2398
2399		/* Clear its modified and referenced bits. */
2400		if (pmap_dirty(pte)) {
2401			if (advice == MADV_DONTNEED) {
2402				/*
2403				 * Future calls to pmap_is_modified() can be
2404				 * avoided by making the page dirty now.
2405				 */
2406				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2407				vm_page_dirty(m);
2408			}
2409			pmap_clear_dirty(pte);
2410		} else if (!pmap_accessed(pte))
2411			continue;
2412		pmap_clear_accessed(pte);
2413		pmap_invalidate_page(sva);
2414	}
2415	pmap_switch(oldpmap);
2416	PMAP_UNLOCK(pmap);
2417}
2418
2419/*
2420 *	Clear the modify bits on the specified physical page.
2421 */
2422void
2423pmap_clear_modify(vm_page_t m)
2424{
2425	struct ia64_lpte *pte;
2426	pmap_t oldpmap, pmap;
2427	pv_entry_t pv;
2428
2429	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2430
2431	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2432	    ("pmap_clear_modify: page %p is not managed", m));
2433	VM_OBJECT_ASSERT_WLOCKED(m->object);
2434	KASSERT(!vm_page_xbusied(m),
2435	    ("pmap_clear_modify: page %p is exclusive busied", m));
2436
2437	/*
2438	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2439	 * If the object containing the page is locked and the page is not
2440	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2441	 */
2442	if ((m->aflags & PGA_WRITEABLE) == 0)
2443		return;
2444	rw_wlock(&pvh_global_lock);
2445	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2446		pmap = PV_PMAP(pv);
2447		PMAP_LOCK(pmap);
2448		oldpmap = pmap_switch(pmap);
2449		pte = pmap_find_vhpt(pv->pv_va);
2450		KASSERT(pte != NULL, ("pte"));
2451		if (pmap_dirty(pte)) {
2452			pmap_clear_dirty(pte);
2453			pmap_invalidate_page(pv->pv_va);
2454		}
2455		pmap_switch(oldpmap);
2456		PMAP_UNLOCK(pmap);
2457	}
2458	rw_wunlock(&pvh_global_lock);
2459}
2460
2461/*
2462 * Clear the write and modified bits in each of the given page's mappings.
2463 */
2464void
2465pmap_remove_write(vm_page_t m)
2466{
2467	struct ia64_lpte *pte;
2468	pmap_t oldpmap, pmap;
2469	pv_entry_t pv;
2470	vm_prot_t prot;
2471
2472	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2473
2474	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2475	    ("pmap_remove_write: page %p is not managed", m));
2476
2477	/*
2478	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2479	 * set by another thread while the object is locked.  Thus,
2480	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2481	 */
2482	VM_OBJECT_ASSERT_WLOCKED(m->object);
2483	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2484		return;
2485	rw_wlock(&pvh_global_lock);
2486	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2487		pmap = PV_PMAP(pv);
2488		PMAP_LOCK(pmap);
2489		oldpmap = pmap_switch(pmap);
2490		pte = pmap_find_vhpt(pv->pv_va);
2491		KASSERT(pte != NULL, ("pte"));
2492		prot = pmap_prot(pte);
2493		if ((prot & VM_PROT_WRITE) != 0) {
2494			if (pmap_dirty(pte)) {
2495				vm_page_dirty(m);
2496				pmap_clear_dirty(pte);
2497			}
2498			prot &= ~VM_PROT_WRITE;
2499			pmap_pte_prot(pmap, pte, prot);
2500			pmap_pte_attr(pte, m->md.memattr);
2501			pmap_invalidate_page(pv->pv_va);
2502		}
2503		pmap_switch(oldpmap);
2504		PMAP_UNLOCK(pmap);
2505	}
2506	vm_page_aflag_clear(m, PGA_WRITEABLE);
2507	rw_wunlock(&pvh_global_lock);
2508}
2509
2510vm_offset_t
2511pmap_mapdev_priv(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2512{
2513	static vm_offset_t last_va = 0;
2514	static vm_paddr_t last_pa = ~0UL;
2515	static vm_size_t last_sz = 0;
2516	struct efi_md *md;
2517
2518	if (pa == last_pa && sz == last_sz)
2519		return (last_va);
2520
2521	md = efi_md_find(pa);
2522	if (md == NULL) {
2523		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2524		    __func__, pa, pa + sz - 1);
2525		return (IA64_PHYS_TO_RR6(pa));
2526	}
2527
2528	if (md->md_type == EFI_MD_TYPE_FREE) {
2529		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2530		    pa + sz - 1);
2531		return (0);
2532	}
2533
2534	last_va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2535	    IA64_PHYS_TO_RR6(pa);
2536	last_pa = pa;
2537	last_sz = sz;
2538	return (last_va);
2539}
2540
2541/*
2542 * Map a set of physical memory pages into the kernel virtual
2543 * address space. Return a pointer to where it is mapped. This
2544 * routine is intended to be used for mapping device memory,
2545 * NOT real memory.
2546 */
2547void *
2548pmap_mapdev_attr(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2549{
2550	vm_offset_t va;
2551
2552	CTR4(KTR_PMAP, "%s(pa=%#lx, sz=%#lx, attr=%#x)", __func__, pa, sz,
2553	    attr);
2554
2555	va = pmap_mapdev_priv(pa, sz, attr);
2556	return ((void *)(uintptr_t)va);
2557}
2558
2559/*
2560 * 'Unmap' a range mapped by pmap_mapdev_attr().
2561 */
2562void
2563pmap_unmapdev(vm_offset_t va, vm_size_t size)
2564{
2565
2566	CTR3(KTR_PMAP, "%s(va=%#lx, sz=%#lx)", __func__, va, size);
2567}
2568
2569/*
2570 * Sets the memory attribute for the specified page.
2571 */
2572static void
2573pmap_page_set_memattr_1(void *arg)
2574{
2575	struct ia64_pal_result res;
2576	register_t is;
2577	uintptr_t pp = (uintptr_t)arg;
2578
2579	is = intr_disable();
2580	res = ia64_call_pal_static(pp, 0, 0, 0);
2581	intr_restore(is);
2582}
2583
2584void
2585pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2586{
2587	struct ia64_lpte *pte;
2588	pmap_t oldpmap, pmap;
2589	pv_entry_t pv;
2590	void *va;
2591
2592	CTR3(KTR_PMAP, "%s(m=%p, attr=%#x)", __func__, m, ma);
2593
2594	rw_wlock(&pvh_global_lock);
2595	m->md.memattr = ma;
2596	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2597		pmap = PV_PMAP(pv);
2598		PMAP_LOCK(pmap);
2599		oldpmap = pmap_switch(pmap);
2600		pte = pmap_find_vhpt(pv->pv_va);
2601		KASSERT(pte != NULL, ("pte"));
2602		pmap_pte_attr(pte, ma);
2603		pmap_invalidate_page(pv->pv_va);
2604		pmap_switch(oldpmap);
2605		PMAP_UNLOCK(pmap);
2606	}
2607	rw_wunlock(&pvh_global_lock);
2608
2609	if (ma == VM_MEMATTR_UNCACHEABLE) {
2610#ifdef SMP
2611		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2612		    (void *)PAL_PREFETCH_VISIBILITY);
2613#else
2614		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2615#endif
2616		va = (void *)pmap_page_to_va(m);
2617		critical_enter();
2618		cpu_flush_dcache(va, PAGE_SIZE);
2619		critical_exit();
2620#ifdef SMP
2621		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2622		    (void *)PAL_MC_DRAIN);
2623#else
2624		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2625#endif
2626	}
2627}
2628
2629/*
2630 * perform the pmap work for mincore
2631 */
2632int
2633pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2634{
2635	pmap_t oldpmap;
2636	struct ia64_lpte *pte, tpte;
2637	vm_paddr_t pa;
2638	int val;
2639
2640	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, pa_p=%p)", __func__, pmap, addr,
2641	    locked_pa);
2642
2643	PMAP_LOCK(pmap);
2644retry:
2645	oldpmap = pmap_switch(pmap);
2646	pte = pmap_find_vhpt(addr);
2647	if (pte != NULL) {
2648		tpte = *pte;
2649		pte = &tpte;
2650	}
2651	pmap_switch(oldpmap);
2652	if (pte == NULL || !pmap_present(pte)) {
2653		val = 0;
2654		goto out;
2655	}
2656	val = MINCORE_INCORE;
2657	if (pmap_dirty(pte))
2658		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2659	if (pmap_accessed(pte))
2660		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2661	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2662	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2663	    pmap_managed(pte)) {
2664		pa = pmap_ppn(pte);
2665		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2666		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2667			goto retry;
2668	} else
2669out:
2670		PA_UNLOCK_COND(*locked_pa);
2671	PMAP_UNLOCK(pmap);
2672	return (val);
2673}
2674
2675/*
2676 *
2677 */
2678void
2679pmap_activate(struct thread *td)
2680{
2681
2682	CTR2(KTR_PMAP, "%s(td=%p)", __func__, td);
2683
2684	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2685}
2686
2687pmap_t
2688pmap_switch(pmap_t pm)
2689{
2690	pmap_t prevpm;
2691	int i;
2692
2693	critical_enter();
2694	prevpm = PCPU_GET(md.current_pmap);
2695	if (prevpm == pm)
2696		goto out;
2697	if (pm == NULL) {
2698		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2699			ia64_set_rr(IA64_RR_BASE(i),
2700			    (i << 8)|(PAGE_SHIFT << 2)|1);
2701		}
2702	} else {
2703		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2704			ia64_set_rr(IA64_RR_BASE(i),
2705			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2706		}
2707	}
2708	PCPU_SET(md.current_pmap, pm);
2709	ia64_srlz_d();
2710
2711out:
2712	critical_exit();
2713	return (prevpm);
2714}
2715
2716/*
2717 *
2718 */
2719void
2720pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2721{
2722	pmap_t oldpm;
2723	struct ia64_lpte *pte;
2724	vm_offset_t lim;
2725	vm_size_t len;
2726
2727	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, sz=%#lx)", __func__, pm, va, sz);
2728
2729	sz += va & 31;
2730	va &= ~31;
2731	sz = (sz + 31) & ~31;
2732
2733	PMAP_LOCK(pm);
2734	oldpm = pmap_switch(pm);
2735	while (sz > 0) {
2736		lim = round_page(va);
2737		len = MIN(lim - va, sz);
2738		pte = pmap_find_vhpt(va);
2739		if (pte != NULL && pmap_present(pte))
2740			ia64_sync_icache(va, len);
2741		va += len;
2742		sz -= len;
2743	}
2744	pmap_switch(oldpm);
2745	PMAP_UNLOCK(pm);
2746}
2747
2748/*
2749 *	Increase the starting virtual address of the given mapping if a
2750 *	different alignment might result in more superpage mappings.
2751 */
2752void
2753pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2754    vm_offset_t *addr, vm_size_t size)
2755{
2756
2757	CTR5(KTR_PMAP, "%s(obj=%p, ofs=%#lx, va_p=%p, sz=%#lx)", __func__,
2758	    object, offset, addr, size);
2759}
2760
2761#include "opt_ddb.h"
2762
2763#ifdef DDB
2764
2765#include <ddb/ddb.h>
2766
2767static const char*	psnames[] = {
2768	"1B",	"2B",	"4B",	"8B",
2769	"16B",	"32B",	"64B",	"128B",
2770	"256B",	"512B",	"1K",	"2K",
2771	"4K",	"8K",	"16K",	"32K",
2772	"64K",	"128K",	"256K",	"512K",
2773	"1M",	"2M",	"4M",	"8M",
2774	"16M",	"32M",	"64M",	"128M",
2775	"256M",	"512M",	"1G",	"2G"
2776};
2777
2778static void
2779print_trs(int type)
2780{
2781	struct ia64_pal_result res;
2782	int i, maxtr;
2783	struct {
2784		pt_entry_t	pte;
2785		uint64_t	itir;
2786		uint64_t	ifa;
2787		struct ia64_rr	rr;
2788	} buf;
2789	static const char *manames[] = {
2790		"WB",	"bad",	"bad",	"bad",
2791		"UC",	"UCE",	"WC",	"NaT",
2792	};
2793
2794	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2795	if (res.pal_status != 0) {
2796		db_printf("Can't get VM summary\n");
2797		return;
2798	}
2799
2800	if (type == 0)
2801		maxtr = (res.pal_result[0] >> 40) & 0xff;
2802	else
2803		maxtr = (res.pal_result[0] >> 32) & 0xff;
2804
2805	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2806	for (i = 0; i <= maxtr; i++) {
2807		bzero(&buf, sizeof(buf));
2808		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2809		    ia64_tpa((uint64_t)&buf));
2810		if (!(res.pal_result[0] & 1))
2811			buf.pte &= ~PTE_AR_MASK;
2812		if (!(res.pal_result[0] & 2))
2813			buf.pte &= ~PTE_PL_MASK;
2814		if (!(res.pal_result[0] & 4))
2815			pmap_clear_dirty(&buf);
2816		if (!(res.pal_result[0] & 8))
2817			buf.pte &= ~PTE_MA_MASK;
2818		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2819		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2820		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2821		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2822		    (buf.pte & PTE_ED) ? 1 : 0,
2823		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2824		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2825		    (pmap_dirty(&buf)) ? 1 : 0,
2826		    (pmap_accessed(&buf)) ? 1 : 0,
2827		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2828		    (pmap_present(&buf)) ? 1 : 0,
2829		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2830	}
2831}
2832
2833DB_COMMAND(itr, db_itr)
2834{
2835	print_trs(0);
2836}
2837
2838DB_COMMAND(dtr, db_dtr)
2839{
2840	print_trs(1);
2841}
2842
2843DB_COMMAND(rr, db_rr)
2844{
2845	int i;
2846	uint64_t t;
2847	struct ia64_rr rr;
2848
2849	printf("RR RID    PgSz VE\n");
2850	for (i = 0; i < 8; i++) {
2851		__asm __volatile ("mov %0=rr[%1]"
2852				  : "=r"(t)
2853				  : "r"(IA64_RR_BASE(i)));
2854		*(uint64_t *) &rr = t;
2855		printf("%d  %06x %4s %d\n",
2856		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2857	}
2858}
2859
2860DB_COMMAND(thash, db_thash)
2861{
2862	if (!have_addr)
2863		return;
2864
2865	db_printf("%p\n", (void *) ia64_thash(addr));
2866}
2867
2868DB_COMMAND(ttag, db_ttag)
2869{
2870	if (!have_addr)
2871		return;
2872
2873	db_printf("0x%lx\n", ia64_ttag(addr));
2874}
2875
2876DB_COMMAND(kpte, db_kpte)
2877{
2878	struct ia64_lpte *pte;
2879
2880	if (!have_addr) {
2881		db_printf("usage: kpte <kva>\n");
2882		return;
2883	}
2884	if (addr < VM_INIT_KERNEL_ADDRESS) {
2885		db_printf("kpte: error: invalid <kva>\n");
2886		return;
2887	}
2888	pte = pmap_find_kpte(addr);
2889	db_printf("kpte at %p:\n", pte);
2890	db_printf("  pte  =%016lx\n", pte->pte);
2891	db_printf("  itir =%016lx\n", pte->itir);
2892	db_printf("  tag  =%016lx\n", pte->tag);
2893	db_printf("  chain=%016lx\n", pte->chain);
2894}
2895
2896#endif
2897