pmap.c revision 268189
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: stable/10/sys/ia64/ia64/pmap.c 268189 2014-07-02 22:19:59Z marcel $");
50
51#include "opt_pmap.h"
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/lock.h>
56#include <sys/mman.h>
57#include <sys/mutex.h>
58#include <sys/proc.h>
59#include <sys/rwlock.h>
60#include <sys/smp.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_page.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_pageout.h>
70#include <vm/uma.h>
71
72#include <machine/bootinfo.h>
73#include <machine/efi.h>
74#include <machine/md_var.h>
75#include <machine/pal.h>
76
77/*
78 *	Manages physical address maps.
79 *
80 *	Since the information managed by this module is
81 *	also stored by the logical address mapping module,
82 *	this module may throw away valid virtual-to-physical
83 *	mappings at almost any time.  However, invalidations
84 *	of virtual-to-physical mappings must be done as
85 *	requested.
86 *
87 *	In order to cope with hardware architectures which
88 *	make virtual-to-physical map invalidates expensive,
89 *	this module may delay invalidate or reduced protection
90 *	operations until such time as they are actually
91 *	necessary.  This module is given full information as
92 *	to which processors are currently using which maps,
93 *	and to when physical maps must be made correct.
94 */
95
96/*
97 * Following the Linux model, region IDs are allocated in groups of
98 * eight so that a single region ID can be used for as many RRs as we
99 * want by encoding the RR number into the low bits of the ID.
100 *
101 * We reserve region ID 0 for the kernel and allocate the remaining
102 * IDs for user pmaps.
103 *
104 * Region 0-3:	User virtually mapped
105 * Region 4:	PBVM and special mappings
106 * Region 5:	Kernel virtual memory
107 * Region 6:	Direct-mapped uncacheable
108 * Region 7:	Direct-mapped cacheable
109 */
110
111/* XXX move to a header. */
112extern uint64_t ia64_gateway_page[];
113
114#if !defined(DIAGNOSTIC)
115#define PMAP_INLINE __inline
116#else
117#define PMAP_INLINE
118#endif
119
120#ifdef PV_STATS
121#define PV_STAT(x)	do { x ; } while (0)
122#else
123#define PV_STAT(x)	do { } while (0)
124#endif
125
126#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
127#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
128#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
129#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
130#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
131#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
132#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
133#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
134
135#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
136#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
137#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
138#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
139
140#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
141
142/*
143 * Individual PV entries are stored in per-pmap chunks.  This saves
144 * space by eliminating the need to record the pmap within every PV
145 * entry.
146 */
147#if PAGE_SIZE == 8192
148#define	_NPCM	6
149#define	_NPCPV	337
150#define	_NPCS	2
151#elif PAGE_SIZE == 16384
152#define	_NPCM	11
153#define	_NPCPV	677
154#define	_NPCS	1
155#endif
156struct pv_chunk {
157	pmap_t			pc_pmap;
158	TAILQ_ENTRY(pv_chunk)	pc_list;
159	u_long			pc_map[_NPCM];	/* bitmap; 1 = free */
160	TAILQ_ENTRY(pv_chunk)	pc_lru;
161	u_long			pc_spare[_NPCS];
162	struct pv_entry		pc_pventry[_NPCPV];
163};
164
165/*
166 * The VHPT bucket head structure.
167 */
168struct ia64_bucket {
169	uint64_t	chain;
170	struct mtx	mutex;
171	u_int		length;
172};
173
174/*
175 * Statically allocated kernel pmap
176 */
177struct pmap kernel_pmap_store;
178
179vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
180vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
181
182/*
183 * Kernel virtual memory management.
184 */
185static int nkpt;
186extern struct ia64_lpte ***ia64_kptdir;
187
188#define KPTE_DIR0_INDEX(va) \
189	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
190#define KPTE_DIR1_INDEX(va) \
191	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
192#define KPTE_PTE_INDEX(va) \
193	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
194#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
195
196vm_offset_t kernel_vm_end;
197
198/* Defaults for ptc.e. */
199static uint64_t pmap_ptc_e_base = 0;
200static uint32_t pmap_ptc_e_count1 = 1;
201static uint32_t pmap_ptc_e_count2 = 1;
202static uint32_t pmap_ptc_e_stride1 = 0;
203static uint32_t pmap_ptc_e_stride2 = 0;
204
205struct mtx pmap_ptc_mutex;
206
207/*
208 * Data for the RID allocator
209 */
210static int pmap_ridcount;
211static int pmap_rididx;
212static int pmap_ridmapsz;
213static int pmap_ridmax;
214static uint64_t *pmap_ridmap;
215struct mtx pmap_ridmutex;
216
217static struct rwlock_padalign pvh_global_lock;
218
219/*
220 * Data for the pv entry allocation mechanism
221 */
222static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
223static int pv_entry_count;
224
225/*
226 * Data for allocating PTEs for user processes.
227 */
228static uma_zone_t ptezone;
229
230/*
231 * Virtual Hash Page Table (VHPT) data.
232 */
233/* SYSCTL_DECL(_machdep); */
234static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
235
236struct ia64_bucket *pmap_vhpt_bucket;
237
238int pmap_vhpt_nbuckets;
239SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
240    &pmap_vhpt_nbuckets, 0, "");
241
242int pmap_vhpt_log2size = 0;
243TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
244SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
245    &pmap_vhpt_log2size, 0, "");
246
247static int pmap_vhpt_inserts;
248SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
249    &pmap_vhpt_inserts, 0, "");
250
251static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
252SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
253    NULL, 0, pmap_vhpt_population, "I", "");
254
255static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
256
257static void free_pv_chunk(struct pv_chunk *pc);
258static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
259static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
260static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
261
262static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
263		    vm_page_t m, vm_prot_t prot);
264static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
265static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
266		    vm_offset_t va, pv_entry_t pv, int freepte);
267static int	pmap_remove_vhpt(vm_offset_t va);
268static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
269		    vm_page_t m);
270
271static void
272pmap_initialize_vhpt(vm_offset_t vhpt)
273{
274	struct ia64_lpte *pte;
275	u_int i;
276
277	pte = (struct ia64_lpte *)vhpt;
278	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
279		pte[i].pte = 0;
280		pte[i].itir = 0;
281		pte[i].tag = 1UL << 63; /* Invalid tag */
282		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
283	}
284}
285
286#ifdef SMP
287vm_offset_t
288pmap_alloc_vhpt(void)
289{
290	vm_offset_t vhpt;
291	vm_page_t m;
292	vm_size_t size;
293
294	size = 1UL << pmap_vhpt_log2size;
295	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
296	    VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
297	    VM_MEMATTR_DEFAULT);
298	if (m != NULL) {
299		vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
300		pmap_initialize_vhpt(vhpt);
301		return (vhpt);
302	}
303	return (0);
304}
305#endif
306
307/*
308 *	Bootstrap the system enough to run with virtual memory.
309 */
310void
311pmap_bootstrap()
312{
313	struct ia64_pal_result res;
314	vm_offset_t base;
315	size_t size;
316	int i, ridbits;
317
318	/*
319	 * Query the PAL Code to find the loop parameters for the
320	 * ptc.e instruction.
321	 */
322	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
323	if (res.pal_status != 0)
324		panic("Can't configure ptc.e parameters");
325	pmap_ptc_e_base = res.pal_result[0];
326	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
327	pmap_ptc_e_count2 = res.pal_result[1];
328	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
329	pmap_ptc_e_stride2 = res.pal_result[2];
330	if (bootverbose)
331		printf("ptc.e base=0x%lx, count1=%u, count2=%u, "
332		       "stride1=0x%x, stride2=0x%x\n",
333		       pmap_ptc_e_base,
334		       pmap_ptc_e_count1,
335		       pmap_ptc_e_count2,
336		       pmap_ptc_e_stride1,
337		       pmap_ptc_e_stride2);
338
339	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
340
341	/*
342	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
343	 *
344	 * We currently need at least 19 bits in the RID because PID_MAX
345	 * can only be encoded in 17 bits and we need RIDs for 4 regions
346	 * per process. With PID_MAX equalling 99999 this means that we
347	 * need to be able to encode 399996 (=4*PID_MAX).
348	 * The Itanium processor only has 18 bits and the architected
349	 * minimum is exactly that. So, we cannot use a PID based scheme
350	 * in those cases. Enter pmap_ridmap...
351	 * We should avoid the map when running on a processor that has
352	 * implemented enough bits. This means that we should pass the
353	 * process/thread ID to pmap. This we currently don't do, so we
354	 * use the map anyway. However, we don't want to allocate a map
355	 * that is large enough to cover the range dictated by the number
356	 * of bits in the RID, because that may result in a RID map of
357	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
358	 * The bottomline: we create a 32KB map when the processor only
359	 * implements 18 bits (or when we can't figure it out). Otherwise
360	 * we create a 64KB map.
361	 */
362	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
363	if (res.pal_status != 0) {
364		if (bootverbose)
365			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
366		ridbits = 18; /* guaranteed minimum */
367	} else {
368		ridbits = (res.pal_result[1] >> 8) & 0xff;
369		if (bootverbose)
370			printf("Processor supports %d Region ID bits\n",
371			    ridbits);
372	}
373	if (ridbits > 19)
374		ridbits = 19;
375
376	pmap_ridmax = (1 << ridbits);
377	pmap_ridmapsz = pmap_ridmax / 64;
378	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
379	pmap_ridmap[0] |= 0xff;
380	pmap_rididx = 0;
381	pmap_ridcount = 8;
382	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
383
384	/*
385	 * Allocate some memory for initial kernel 'page tables'.
386	 */
387	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
388	nkpt = 0;
389	kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
390
391	/*
392	 * Determine a valid (mappable) VHPT size.
393	 */
394	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
395	if (pmap_vhpt_log2size == 0)
396		pmap_vhpt_log2size = 20;
397	else if (pmap_vhpt_log2size < 16)
398		pmap_vhpt_log2size = 16;
399	else if (pmap_vhpt_log2size > 28)
400		pmap_vhpt_log2size = 28;
401	if (pmap_vhpt_log2size & 1)
402		pmap_vhpt_log2size--;
403
404	size = 1UL << pmap_vhpt_log2size;
405	base = (uintptr_t)ia64_physmem_alloc(size, size);
406	if (base == 0)
407		panic("Unable to allocate VHPT");
408
409	PCPU_SET(md.vhpt, base);
410	if (bootverbose)
411		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
412
413	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
414	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
415	    sizeof(struct ia64_bucket), PAGE_SIZE);
416	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
417		/* Stolen memory is zeroed. */
418		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
419		    MTX_NOWITNESS | MTX_SPIN);
420	}
421
422	pmap_initialize_vhpt(base);
423	map_vhpt(base);
424	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
425	ia64_srlz_i();
426
427	virtual_avail = VM_INIT_KERNEL_ADDRESS;
428	virtual_end = VM_MAX_KERNEL_ADDRESS;
429
430	/*
431	 * Initialize the kernel pmap (which is statically allocated).
432	 */
433	PMAP_LOCK_INIT(kernel_pmap);
434	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
435		kernel_pmap->pm_rid[i] = 0;
436	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
437	PCPU_SET(md.current_pmap, kernel_pmap);
438
439 	/*
440	 * Initialize the global pv list lock.
441	 */
442	rw_init(&pvh_global_lock, "pmap pv global");
443
444	/* Region 5 is mapped via the VHPT. */
445	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
446
447	/*
448	 * Clear out any random TLB entries left over from booting.
449	 */
450	pmap_invalidate_all();
451
452	map_gateway_page();
453}
454
455static int
456pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
457{
458	int count, error, i;
459
460	count = 0;
461	for (i = 0; i < pmap_vhpt_nbuckets; i++)
462		count += pmap_vhpt_bucket[i].length;
463
464	error = SYSCTL_OUT(req, &count, sizeof(count));
465	return (error);
466}
467
468vm_offset_t
469pmap_page_to_va(vm_page_t m)
470{
471	vm_paddr_t pa;
472	vm_offset_t va;
473
474	pa = VM_PAGE_TO_PHYS(m);
475	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
476	    IA64_PHYS_TO_RR7(pa);
477	return (va);
478}
479
480/*
481 *	Initialize a vm_page's machine-dependent fields.
482 */
483void
484pmap_page_init(vm_page_t m)
485{
486
487	TAILQ_INIT(&m->md.pv_list);
488	m->md.memattr = VM_MEMATTR_DEFAULT;
489}
490
491/*
492 *	Initialize the pmap module.
493 *	Called by vm_init, to initialize any structures that the pmap
494 *	system needs to map virtual memory.
495 */
496void
497pmap_init(void)
498{
499
500	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
501	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
502}
503
504
505/***************************************************
506 * Manipulate TLBs for a pmap
507 ***************************************************/
508
509static void
510pmap_invalidate_page(vm_offset_t va)
511{
512	struct ia64_lpte *pte;
513	struct pcpu *pc;
514	uint64_t tag;
515	u_int vhpt_ofs;
516
517	critical_enter();
518
519	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
520	tag = ia64_ttag(va);
521	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
522		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
523		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
524	}
525
526	mtx_lock_spin(&pmap_ptc_mutex);
527
528	ia64_ptc_ga(va, PAGE_SHIFT << 2);
529	ia64_mf();
530	ia64_srlz_i();
531
532	mtx_unlock_spin(&pmap_ptc_mutex);
533
534	ia64_invala();
535
536	critical_exit();
537}
538
539void
540pmap_invalidate_all(void)
541{
542	uint64_t addr;
543	int i, j;
544
545	addr = pmap_ptc_e_base;
546	for (i = 0; i < pmap_ptc_e_count1; i++) {
547		for (j = 0; j < pmap_ptc_e_count2; j++) {
548			ia64_ptc_e(addr);
549			addr += pmap_ptc_e_stride2;
550		}
551		addr += pmap_ptc_e_stride1;
552	}
553	ia64_srlz_i();
554}
555
556static uint32_t
557pmap_allocate_rid(void)
558{
559	uint64_t bit, bits;
560	int rid;
561
562	mtx_lock(&pmap_ridmutex);
563	if (pmap_ridcount == pmap_ridmax)
564		panic("pmap_allocate_rid: All Region IDs used");
565
566	/* Find an index with a free bit. */
567	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
568		pmap_rididx++;
569		if (pmap_rididx == pmap_ridmapsz)
570			pmap_rididx = 0;
571	}
572	rid = pmap_rididx * 64;
573
574	/* Find a free bit. */
575	bit = 1UL;
576	while (bits & bit) {
577		rid++;
578		bit <<= 1;
579	}
580
581	pmap_ridmap[pmap_rididx] |= bit;
582	pmap_ridcount++;
583	mtx_unlock(&pmap_ridmutex);
584
585	return rid;
586}
587
588static void
589pmap_free_rid(uint32_t rid)
590{
591	uint64_t bit;
592	int idx;
593
594	idx = rid / 64;
595	bit = ~(1UL << (rid & 63));
596
597	mtx_lock(&pmap_ridmutex);
598	pmap_ridmap[idx] &= bit;
599	pmap_ridcount--;
600	mtx_unlock(&pmap_ridmutex);
601}
602
603/***************************************************
604 * Page table page management routines.....
605 ***************************************************/
606
607void
608pmap_pinit0(struct pmap *pmap)
609{
610
611	PMAP_LOCK_INIT(pmap);
612	/* kernel_pmap is the same as any other pmap. */
613	pmap_pinit(pmap);
614}
615
616/*
617 * Initialize a preallocated and zeroed pmap structure,
618 * such as one in a vmspace structure.
619 */
620int
621pmap_pinit(struct pmap *pmap)
622{
623	int i;
624
625	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
626		pmap->pm_rid[i] = pmap_allocate_rid();
627	TAILQ_INIT(&pmap->pm_pvchunk);
628	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
629	return (1);
630}
631
632/***************************************************
633 * Pmap allocation/deallocation routines.
634 ***************************************************/
635
636/*
637 * Release any resources held by the given physical map.
638 * Called when a pmap initialized by pmap_pinit is being released.
639 * Should only be called if the map contains no valid mappings.
640 */
641void
642pmap_release(pmap_t pmap)
643{
644	int i;
645
646	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
647		if (pmap->pm_rid[i])
648			pmap_free_rid(pmap->pm_rid[i]);
649}
650
651/*
652 * grow the number of kernel page table entries, if needed
653 */
654void
655pmap_growkernel(vm_offset_t addr)
656{
657	struct ia64_lpte **dir1;
658	struct ia64_lpte *leaf;
659	vm_page_t nkpg;
660
661	while (kernel_vm_end <= addr) {
662		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
663			panic("%s: out of kernel address space", __func__);
664
665		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
666		if (dir1 == NULL) {
667			nkpg = vm_page_alloc(NULL, nkpt++,
668			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
669			if (!nkpg)
670				panic("%s: cannot add dir. page", __func__);
671
672			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
673			bzero(dir1, PAGE_SIZE);
674			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
675		}
676
677		nkpg = vm_page_alloc(NULL, nkpt++,
678		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
679		if (!nkpg)
680			panic("%s: cannot add PTE page", __func__);
681
682		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
683		bzero(leaf, PAGE_SIZE);
684		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
685
686		kernel_vm_end += PAGE_SIZE * NKPTEPG;
687	}
688}
689
690/***************************************************
691 * page management routines.
692 ***************************************************/
693
694CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
695
696static __inline struct pv_chunk *
697pv_to_chunk(pv_entry_t pv)
698{
699
700	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
701}
702
703#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
704
705#define	PC_FREE_FULL	0xfffffffffffffffful
706#define	PC_FREE_PARTIAL	\
707	((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
708
709#if PAGE_SIZE == 8192
710static const u_long pc_freemask[_NPCM] = {
711	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
712	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
713};
714#elif PAGE_SIZE == 16384
715static const u_long pc_freemask[_NPCM] = {
716	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
717	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
718	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
719	PC_FREE_FULL, PC_FREE_PARTIAL
720};
721#endif
722
723static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
724
725SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
726    "Current number of pv entries");
727
728#ifdef PV_STATS
729static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
730
731SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
732    "Current number of pv entry chunks");
733SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
734    "Current number of pv entry chunks allocated");
735SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
736    "Current number of pv entry chunks frees");
737SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
738    "Number of times tried to get a chunk page but failed.");
739
740static long pv_entry_frees, pv_entry_allocs;
741static int pv_entry_spare;
742
743SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
744    "Current number of pv entry frees");
745SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
746    "Current number of pv entry allocs");
747SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
748    "Current number of spare pv entries");
749#endif
750
751/*
752 * We are in a serious low memory condition.  Resort to
753 * drastic measures to free some pages so we can allocate
754 * another pv entry chunk.
755 */
756static vm_page_t
757pmap_pv_reclaim(pmap_t locked_pmap)
758{
759	struct pch newtail;
760	struct pv_chunk *pc;
761	struct ia64_lpte *pte;
762	pmap_t pmap;
763	pv_entry_t pv;
764	vm_offset_t va;
765	vm_page_t m, m_pc;
766	u_long inuse;
767	int bit, field, freed, idx;
768
769	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
770	pmap = NULL;
771	m_pc = NULL;
772	TAILQ_INIT(&newtail);
773	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
774		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
775		if (pmap != pc->pc_pmap) {
776			if (pmap != NULL) {
777				if (pmap != locked_pmap) {
778					pmap_switch(locked_pmap);
779					PMAP_UNLOCK(pmap);
780				}
781			}
782			pmap = pc->pc_pmap;
783			/* Avoid deadlock and lock recursion. */
784			if (pmap > locked_pmap)
785				PMAP_LOCK(pmap);
786			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
787				pmap = NULL;
788				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
789				continue;
790			}
791			pmap_switch(pmap);
792		}
793
794		/*
795		 * Destroy every non-wired, 8 KB page mapping in the chunk.
796		 */
797		freed = 0;
798		for (field = 0; field < _NPCM; field++) {
799			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
800			    inuse != 0; inuse &= ~(1UL << bit)) {
801				bit = ffsl(inuse) - 1;
802				idx = field * sizeof(inuse) * NBBY + bit;
803				pv = &pc->pc_pventry[idx];
804				va = pv->pv_va;
805				pte = pmap_find_vhpt(va);
806				KASSERT(pte != NULL, ("pte"));
807				if (pmap_wired(pte))
808					continue;
809				pmap_remove_vhpt(va);
810				pmap_invalidate_page(va);
811				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
812				if (pmap_accessed(pte))
813					vm_page_aflag_set(m, PGA_REFERENCED);
814				if (pmap_dirty(pte))
815					vm_page_dirty(m);
816				pmap_free_pte(pte, va);
817				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
818				if (TAILQ_EMPTY(&m->md.pv_list))
819					vm_page_aflag_clear(m, PGA_WRITEABLE);
820				pc->pc_map[field] |= 1UL << bit;
821				freed++;
822			}
823		}
824		if (freed == 0) {
825			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
826			continue;
827		}
828		/* Every freed mapping is for a 8 KB page. */
829		pmap->pm_stats.resident_count -= freed;
830		PV_STAT(pv_entry_frees += freed);
831		PV_STAT(pv_entry_spare += freed);
832		pv_entry_count -= freed;
833		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
834		for (field = 0; field < _NPCM; field++)
835			if (pc->pc_map[field] != pc_freemask[field]) {
836				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
837				    pc_list);
838				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
839
840				/*
841				 * One freed pv entry in locked_pmap is
842				 * sufficient.
843				 */
844				if (pmap == locked_pmap)
845					goto out;
846				break;
847			}
848		if (field == _NPCM) {
849			PV_STAT(pv_entry_spare -= _NPCPV);
850			PV_STAT(pc_chunk_count--);
851			PV_STAT(pc_chunk_frees++);
852			/* Entire chunk is free; return it. */
853			m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
854			break;
855		}
856	}
857out:
858	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
859	if (pmap != NULL) {
860		if (pmap != locked_pmap) {
861			pmap_switch(locked_pmap);
862			PMAP_UNLOCK(pmap);
863		}
864	}
865	return (m_pc);
866}
867
868/*
869 * free the pv_entry back to the free list
870 */
871static void
872free_pv_entry(pmap_t pmap, pv_entry_t pv)
873{
874	struct pv_chunk *pc;
875	int bit, field, idx;
876
877	rw_assert(&pvh_global_lock, RA_WLOCKED);
878	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
879	PV_STAT(pv_entry_frees++);
880	PV_STAT(pv_entry_spare++);
881	pv_entry_count--;
882	pc = pv_to_chunk(pv);
883	idx = pv - &pc->pc_pventry[0];
884	field = idx / (sizeof(u_long) * NBBY);
885	bit = idx % (sizeof(u_long) * NBBY);
886	pc->pc_map[field] |= 1ul << bit;
887	for (idx = 0; idx < _NPCM; idx++)
888		if (pc->pc_map[idx] != pc_freemask[idx]) {
889			/*
890			 * 98% of the time, pc is already at the head of the
891			 * list.  If it isn't already, move it to the head.
892			 */
893			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
894			    pc)) {
895				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
896				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
897				    pc_list);
898			}
899			return;
900		}
901	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
902	free_pv_chunk(pc);
903}
904
905static void
906free_pv_chunk(struct pv_chunk *pc)
907{
908	vm_page_t m;
909
910 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
911	PV_STAT(pv_entry_spare -= _NPCPV);
912	PV_STAT(pc_chunk_count--);
913	PV_STAT(pc_chunk_frees++);
914	/* entire chunk is free, return it */
915	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
916	vm_page_unwire(m, 0);
917	vm_page_free(m);
918}
919
920/*
921 * get a new pv_entry, allocating a block from the system
922 * when needed.
923 */
924static pv_entry_t
925get_pv_entry(pmap_t pmap, boolean_t try)
926{
927	struct pv_chunk *pc;
928	pv_entry_t pv;
929	vm_page_t m;
930	int bit, field, idx;
931
932	rw_assert(&pvh_global_lock, RA_WLOCKED);
933	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
934	PV_STAT(pv_entry_allocs++);
935	pv_entry_count++;
936retry:
937	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
938	if (pc != NULL) {
939		for (field = 0; field < _NPCM; field++) {
940			if (pc->pc_map[field]) {
941				bit = ffsl(pc->pc_map[field]) - 1;
942				break;
943			}
944		}
945		if (field < _NPCM) {
946			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
947			pv = &pc->pc_pventry[idx];
948			pc->pc_map[field] &= ~(1ul << bit);
949			/* If this was the last item, move it to tail */
950			for (field = 0; field < _NPCM; field++)
951				if (pc->pc_map[field] != 0) {
952					PV_STAT(pv_entry_spare--);
953					return (pv);	/* not full, return */
954				}
955			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
956			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
957			PV_STAT(pv_entry_spare--);
958			return (pv);
959		}
960	}
961	/* No free items, allocate another chunk */
962	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
963	    VM_ALLOC_WIRED);
964	if (m == NULL) {
965		if (try) {
966			pv_entry_count--;
967			PV_STAT(pc_chunk_tryfail++);
968			return (NULL);
969		}
970		m = pmap_pv_reclaim(pmap);
971		if (m == NULL)
972			goto retry;
973	}
974	PV_STAT(pc_chunk_count++);
975	PV_STAT(pc_chunk_allocs++);
976	pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
977	pc->pc_pmap = pmap;
978	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
979	for (field = 1; field < _NPCM; field++)
980		pc->pc_map[field] = pc_freemask[field];
981	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
982	pv = &pc->pc_pventry[0];
983	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
984	PV_STAT(pv_entry_spare += _NPCPV - 1);
985	return (pv);
986}
987
988/*
989 * Conditionally create a pv entry.
990 */
991static boolean_t
992pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
993{
994	pv_entry_t pv;
995
996	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
997	rw_assert(&pvh_global_lock, RA_WLOCKED);
998	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
999		pv->pv_va = va;
1000		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1001		return (TRUE);
1002	} else
1003		return (FALSE);
1004}
1005
1006/*
1007 * Add an ia64_lpte to the VHPT.
1008 */
1009static void
1010pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1011{
1012	struct ia64_bucket *bckt;
1013	struct ia64_lpte *vhpte;
1014	uint64_t pte_pa;
1015
1016	/* Can fault, so get it out of the way. */
1017	pte_pa = ia64_tpa((vm_offset_t)pte);
1018
1019	vhpte = (struct ia64_lpte *)ia64_thash(va);
1020	bckt = (struct ia64_bucket *)vhpte->chain;
1021
1022	mtx_lock_spin(&bckt->mutex);
1023	pte->chain = bckt->chain;
1024	ia64_mf();
1025	bckt->chain = pte_pa;
1026
1027	pmap_vhpt_inserts++;
1028	bckt->length++;
1029	mtx_unlock_spin(&bckt->mutex);
1030}
1031
1032/*
1033 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1034 * worked or an appropriate error code otherwise.
1035 */
1036static int
1037pmap_remove_vhpt(vm_offset_t va)
1038{
1039	struct ia64_bucket *bckt;
1040	struct ia64_lpte *pte;
1041	struct ia64_lpte *lpte;
1042	struct ia64_lpte *vhpte;
1043	uint64_t chain, tag;
1044
1045	tag = ia64_ttag(va);
1046	vhpte = (struct ia64_lpte *)ia64_thash(va);
1047	bckt = (struct ia64_bucket *)vhpte->chain;
1048
1049	lpte = NULL;
1050	mtx_lock_spin(&bckt->mutex);
1051	chain = bckt->chain;
1052	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1053	while (chain != 0 && pte->tag != tag) {
1054		lpte = pte;
1055		chain = pte->chain;
1056		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1057	}
1058	if (chain == 0) {
1059		mtx_unlock_spin(&bckt->mutex);
1060		return (ENOENT);
1061	}
1062
1063	/* Snip this pv_entry out of the collision chain. */
1064	if (lpte == NULL)
1065		bckt->chain = pte->chain;
1066	else
1067		lpte->chain = pte->chain;
1068	ia64_mf();
1069
1070	bckt->length--;
1071	mtx_unlock_spin(&bckt->mutex);
1072	return (0);
1073}
1074
1075/*
1076 * Find the ia64_lpte for the given va, if any.
1077 */
1078static struct ia64_lpte *
1079pmap_find_vhpt(vm_offset_t va)
1080{
1081	struct ia64_bucket *bckt;
1082	struct ia64_lpte *pte;
1083	uint64_t chain, tag;
1084
1085	tag = ia64_ttag(va);
1086	pte = (struct ia64_lpte *)ia64_thash(va);
1087	bckt = (struct ia64_bucket *)pte->chain;
1088
1089	mtx_lock_spin(&bckt->mutex);
1090	chain = bckt->chain;
1091	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1092	while (chain != 0 && pte->tag != tag) {
1093		chain = pte->chain;
1094		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1095	}
1096	mtx_unlock_spin(&bckt->mutex);
1097	return ((chain != 0) ? pte : NULL);
1098}
1099
1100/*
1101 * Remove an entry from the list of managed mappings.
1102 */
1103static int
1104pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1105{
1106
1107	rw_assert(&pvh_global_lock, RA_WLOCKED);
1108	if (!pv) {
1109		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1110			if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1111				break;
1112		}
1113	}
1114
1115	if (pv) {
1116		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1117		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1118			vm_page_aflag_clear(m, PGA_WRITEABLE);
1119
1120		free_pv_entry(pmap, pv);
1121		return 0;
1122	} else {
1123		return ENOENT;
1124	}
1125}
1126
1127/*
1128 * Create a pv entry for page at pa for
1129 * (pmap, va).
1130 */
1131static void
1132pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1133{
1134	pv_entry_t pv;
1135
1136	rw_assert(&pvh_global_lock, RA_WLOCKED);
1137	pv = get_pv_entry(pmap, FALSE);
1138	pv->pv_va = va;
1139	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1140}
1141
1142/*
1143 *	Routine:	pmap_extract
1144 *	Function:
1145 *		Extract the physical page address associated
1146 *		with the given map/virtual_address pair.
1147 */
1148vm_paddr_t
1149pmap_extract(pmap_t pmap, vm_offset_t va)
1150{
1151	struct ia64_lpte *pte;
1152	pmap_t oldpmap;
1153	vm_paddr_t pa;
1154
1155	pa = 0;
1156	PMAP_LOCK(pmap);
1157	oldpmap = pmap_switch(pmap);
1158	pte = pmap_find_vhpt(va);
1159	if (pte != NULL && pmap_present(pte))
1160		pa = pmap_ppn(pte);
1161	pmap_switch(oldpmap);
1162	PMAP_UNLOCK(pmap);
1163	return (pa);
1164}
1165
1166/*
1167 *	Routine:	pmap_extract_and_hold
1168 *	Function:
1169 *		Atomically extract and hold the physical page
1170 *		with the given pmap and virtual address pair
1171 *		if that mapping permits the given protection.
1172 */
1173vm_page_t
1174pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1175{
1176	struct ia64_lpte *pte;
1177	pmap_t oldpmap;
1178	vm_page_t m;
1179	vm_paddr_t pa;
1180
1181	pa = 0;
1182	m = NULL;
1183	PMAP_LOCK(pmap);
1184	oldpmap = pmap_switch(pmap);
1185retry:
1186	pte = pmap_find_vhpt(va);
1187	if (pte != NULL && pmap_present(pte) &&
1188	    (pmap_prot(pte) & prot) == prot) {
1189		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1190		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1191			goto retry;
1192		vm_page_hold(m);
1193	}
1194	PA_UNLOCK_COND(pa);
1195	pmap_switch(oldpmap);
1196	PMAP_UNLOCK(pmap);
1197	return (m);
1198}
1199
1200/***************************************************
1201 * Low level mapping routines.....
1202 ***************************************************/
1203
1204/*
1205 * Find the kernel lpte for mapping the given virtual address, which
1206 * must be in the part of region 5 which we can cover with our kernel
1207 * 'page tables'.
1208 */
1209static struct ia64_lpte *
1210pmap_find_kpte(vm_offset_t va)
1211{
1212	struct ia64_lpte **dir1;
1213	struct ia64_lpte *leaf;
1214
1215	KASSERT((va >> 61) == 5,
1216		("kernel mapping 0x%lx not in region 5", va));
1217	KASSERT(va < kernel_vm_end,
1218		("kernel mapping 0x%lx out of range", va));
1219
1220	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1221	leaf = dir1[KPTE_DIR1_INDEX(va)];
1222	return (&leaf[KPTE_PTE_INDEX(va)]);
1223}
1224
1225/*
1226 * Find a pte suitable for mapping a user-space address. If one exists
1227 * in the VHPT, that one will be returned, otherwise a new pte is
1228 * allocated.
1229 */
1230static struct ia64_lpte *
1231pmap_find_pte(vm_offset_t va)
1232{
1233	struct ia64_lpte *pte;
1234
1235	if (va >= VM_MAXUSER_ADDRESS)
1236		return pmap_find_kpte(va);
1237
1238	pte = pmap_find_vhpt(va);
1239	if (pte == NULL) {
1240		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1241		pte->tag = 1UL << 63;
1242	}
1243	return (pte);
1244}
1245
1246/*
1247 * Free a pte which is now unused. This simply returns it to the zone
1248 * allocator if it is a user mapping. For kernel mappings, clear the
1249 * valid bit to make it clear that the mapping is not currently used.
1250 */
1251static void
1252pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1253{
1254	if (va < VM_MAXUSER_ADDRESS)
1255		uma_zfree(ptezone, pte);
1256	else
1257		pmap_clear_present(pte);
1258}
1259
1260static PMAP_INLINE void
1261pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1262{
1263	static long prot2ar[4] = {
1264		PTE_AR_R,		/* VM_PROT_NONE */
1265		PTE_AR_RW,		/* VM_PROT_WRITE */
1266		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1267		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1268	};
1269
1270	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1271	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1272	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1273	    ? PTE_PL_KERN : PTE_PL_USER;
1274	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1275}
1276
1277static PMAP_INLINE void
1278pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1279{
1280
1281	pte->pte &= ~PTE_MA_MASK;
1282	pte->pte |= (ma & PTE_MA_MASK);
1283}
1284
1285/*
1286 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1287 * the pte was orginally valid, then its assumed to already be in the
1288 * VHPT.
1289 * This functions does not set the protection bits.  It's expected
1290 * that those have been set correctly prior to calling this function.
1291 */
1292static void
1293pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1294    boolean_t wired, boolean_t managed)
1295{
1296
1297	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1298	    PTE_AR_MASK | PTE_ED;
1299	pte->pte |= PTE_PRESENT;
1300	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1301	pte->pte |= (wired) ? PTE_WIRED : 0;
1302	pte->pte |= pa & PTE_PPN_MASK;
1303
1304	pte->itir = PAGE_SHIFT << 2;
1305
1306	ia64_mf();
1307
1308	pte->tag = ia64_ttag(va);
1309}
1310
1311/*
1312 * Remove the (possibly managed) mapping represented by pte from the
1313 * given pmap.
1314 */
1315static int
1316pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1317		pv_entry_t pv, int freepte)
1318{
1319	int error;
1320	vm_page_t m;
1321
1322	/*
1323	 * First remove from the VHPT.
1324	 */
1325	error = pmap_remove_vhpt(va);
1326	KASSERT(error == 0, ("%s: pmap_remove_vhpt returned %d",
1327	    __func__, error));
1328
1329	pmap_invalidate_page(va);
1330
1331	if (pmap_wired(pte))
1332		pmap->pm_stats.wired_count -= 1;
1333
1334	pmap->pm_stats.resident_count -= 1;
1335	if (pmap_managed(pte)) {
1336		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1337		if (pmap_dirty(pte))
1338			vm_page_dirty(m);
1339		if (pmap_accessed(pte))
1340			vm_page_aflag_set(m, PGA_REFERENCED);
1341
1342		error = pmap_remove_entry(pmap, m, va, pv);
1343	}
1344	if (freepte)
1345		pmap_free_pte(pte, va);
1346
1347	return (error);
1348}
1349
1350/*
1351 * Extract the physical page address associated with a kernel
1352 * virtual address.
1353 */
1354vm_paddr_t
1355pmap_kextract(vm_offset_t va)
1356{
1357	struct ia64_lpte *pte;
1358	uint64_t *pbvm_pgtbl;
1359	vm_paddr_t pa;
1360	u_int idx;
1361
1362	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1363
1364	/* Regions 6 and 7 are direct mapped. */
1365	if (va >= IA64_RR_BASE(6)) {
1366		pa = IA64_RR_MASK(va);
1367		goto out;
1368	}
1369
1370	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1371	if (va >= kernel_vm_end)
1372		goto err_out;
1373	if (va >= VM_INIT_KERNEL_ADDRESS) {
1374		pte = pmap_find_kpte(va);
1375		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1376		goto out;
1377	}
1378
1379	/* The PBVM page table. */
1380	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1381		goto err_out;
1382	if (va >= IA64_PBVM_PGTBL) {
1383		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1384		goto out;
1385	}
1386
1387	/* The PBVM itself. */
1388	if (va >= IA64_PBVM_BASE) {
1389		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1390		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1391		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1392			goto err_out;
1393		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1394			goto err_out;
1395		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1396		    (va & IA64_PBVM_PAGE_MASK);
1397		goto out;
1398	}
1399
1400 err_out:
1401	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1402	pa = 0;
1403	/* FALLTHROUGH */
1404
1405 out:
1406	return (pa);
1407}
1408
1409/*
1410 * Add a list of wired pages to the kva this routine is only used for
1411 * temporary kernel mappings that do not need to have page modification
1412 * or references recorded.  Note that old mappings are simply written
1413 * over.  The page is effectively wired, but it's customary to not have
1414 * the PTE reflect that, nor update statistics.
1415 */
1416void
1417pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1418{
1419	struct ia64_lpte *pte;
1420	int i;
1421
1422	for (i = 0; i < count; i++) {
1423		pte = pmap_find_kpte(va);
1424		if (pmap_present(pte))
1425			pmap_invalidate_page(va);
1426		else
1427			pmap_enter_vhpt(pte, va);
1428		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1429		pmap_pte_attr(pte, m[i]->md.memattr);
1430		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1431		va += PAGE_SIZE;
1432	}
1433}
1434
1435/*
1436 * this routine jerks page mappings from the
1437 * kernel -- it is meant only for temporary mappings.
1438 */
1439void
1440pmap_qremove(vm_offset_t va, int count)
1441{
1442	struct ia64_lpte *pte;
1443	int i;
1444
1445	for (i = 0; i < count; i++) {
1446		pte = pmap_find_kpte(va);
1447		if (pmap_present(pte)) {
1448			pmap_remove_vhpt(va);
1449			pmap_invalidate_page(va);
1450			pmap_clear_present(pte);
1451		}
1452		va += PAGE_SIZE;
1453	}
1454}
1455
1456/*
1457 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1458 * to not have the PTE reflect that, nor update statistics.
1459 */
1460void
1461pmap_kenter(vm_offset_t va, vm_offset_t pa)
1462{
1463	struct ia64_lpte *pte;
1464
1465	pte = pmap_find_kpte(va);
1466	if (pmap_present(pte))
1467		pmap_invalidate_page(va);
1468	else
1469		pmap_enter_vhpt(pte, va);
1470	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1471	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1472	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1473}
1474
1475/*
1476 * Remove a page from the kva
1477 */
1478void
1479pmap_kremove(vm_offset_t va)
1480{
1481	struct ia64_lpte *pte;
1482
1483	pte = pmap_find_kpte(va);
1484	if (pmap_present(pte)) {
1485		pmap_remove_vhpt(va);
1486		pmap_invalidate_page(va);
1487		pmap_clear_present(pte);
1488	}
1489}
1490
1491/*
1492 *	Used to map a range of physical addresses into kernel
1493 *	virtual address space.
1494 *
1495 *	The value passed in '*virt' is a suggested virtual address for
1496 *	the mapping. Architectures which can support a direct-mapped
1497 *	physical to virtual region can return the appropriate address
1498 *	within that region, leaving '*virt' unchanged. Other
1499 *	architectures should map the pages starting at '*virt' and
1500 *	update '*virt' with the first usable address after the mapped
1501 *	region.
1502 */
1503vm_offset_t
1504pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1505{
1506	return IA64_PHYS_TO_RR7(start);
1507}
1508
1509/*
1510 *	Remove the given range of addresses from the specified map.
1511 *
1512 *	It is assumed that the start and end are properly
1513 *	rounded to the page size.
1514 *
1515 *	Sparsely used ranges are inefficiently removed.  The VHPT is
1516 *	probed for every page within the range.  XXX
1517 */
1518void
1519pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1520{
1521	pmap_t oldpmap;
1522	vm_offset_t va;
1523	struct ia64_lpte *pte;
1524
1525	/*
1526	 * Perform an unsynchronized read.  This is, however, safe.
1527	 */
1528	if (pmap->pm_stats.resident_count == 0)
1529		return;
1530
1531	rw_wlock(&pvh_global_lock);
1532	PMAP_LOCK(pmap);
1533	oldpmap = pmap_switch(pmap);
1534	for (va = sva; va < eva; va += PAGE_SIZE) {
1535		pte = pmap_find_vhpt(va);
1536		if (pte != NULL)
1537			pmap_remove_pte(pmap, pte, va, 0, 1);
1538	}
1539	rw_wunlock(&pvh_global_lock);
1540	pmap_switch(oldpmap);
1541	PMAP_UNLOCK(pmap);
1542}
1543
1544/*
1545 *	Routine:	pmap_remove_all
1546 *	Function:
1547 *		Removes this physical page from
1548 *		all physical maps in which it resides.
1549 *		Reflects back modify bits to the pager.
1550 *
1551 *	Notes:
1552 *		Original versions of this routine were very
1553 *		inefficient because they iteratively called
1554 *		pmap_remove (slow...)
1555 */
1556
1557void
1558pmap_remove_all(vm_page_t m)
1559{
1560	pmap_t oldpmap;
1561	pv_entry_t pv;
1562
1563	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1564	    ("pmap_remove_all: page %p is not managed", m));
1565	rw_wlock(&pvh_global_lock);
1566	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1567		struct ia64_lpte *pte;
1568		pmap_t pmap = PV_PMAP(pv);
1569		vm_offset_t va = pv->pv_va;
1570
1571		PMAP_LOCK(pmap);
1572		oldpmap = pmap_switch(pmap);
1573		pte = pmap_find_vhpt(va);
1574		KASSERT(pte != NULL, ("pte"));
1575		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1576			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1577		pmap_remove_pte(pmap, pte, va, pv, 1);
1578		pmap_switch(oldpmap);
1579		PMAP_UNLOCK(pmap);
1580	}
1581	vm_page_aflag_clear(m, PGA_WRITEABLE);
1582	rw_wunlock(&pvh_global_lock);
1583}
1584
1585/*
1586 *	Set the physical protection on the
1587 *	specified range of this map as requested.
1588 */
1589void
1590pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1591{
1592	pmap_t oldpmap;
1593	struct ia64_lpte *pte;
1594
1595	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1596		pmap_remove(pmap, sva, eva);
1597		return;
1598	}
1599
1600	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1601	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1602		return;
1603
1604	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1605		panic("pmap_protect: unaligned addresses");
1606
1607	PMAP_LOCK(pmap);
1608	oldpmap = pmap_switch(pmap);
1609	for ( ; sva < eva; sva += PAGE_SIZE) {
1610		/* If page is invalid, skip this page */
1611		pte = pmap_find_vhpt(sva);
1612		if (pte == NULL)
1613			continue;
1614
1615		/* If there's no change, skip it too */
1616		if (pmap_prot(pte) == prot)
1617			continue;
1618
1619		if ((prot & VM_PROT_WRITE) == 0 &&
1620		    pmap_managed(pte) && pmap_dirty(pte)) {
1621			vm_paddr_t pa = pmap_ppn(pte);
1622			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1623
1624			vm_page_dirty(m);
1625			pmap_clear_dirty(pte);
1626		}
1627
1628		if (prot & VM_PROT_EXECUTE)
1629			ia64_sync_icache(sva, PAGE_SIZE);
1630
1631		pmap_pte_prot(pmap, pte, prot);
1632		pmap_invalidate_page(sva);
1633	}
1634	pmap_switch(oldpmap);
1635	PMAP_UNLOCK(pmap);
1636}
1637
1638/*
1639 *	Insert the given physical page (p) at
1640 *	the specified virtual address (v) in the
1641 *	target physical map with the protection requested.
1642 *
1643 *	If specified, the page will be wired down, meaning
1644 *	that the related pte can not be reclaimed.
1645 *
1646 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1647 *	or lose information.  That is, this routine must actually
1648 *	insert this page into the given map NOW.
1649 */
1650void
1651pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1652    vm_prot_t prot, boolean_t wired)
1653{
1654	pmap_t oldpmap;
1655	vm_offset_t pa;
1656	vm_offset_t opa;
1657	struct ia64_lpte origpte;
1658	struct ia64_lpte *pte;
1659	boolean_t icache_inval, managed;
1660
1661	rw_wlock(&pvh_global_lock);
1662	PMAP_LOCK(pmap);
1663	oldpmap = pmap_switch(pmap);
1664
1665	va &= ~PAGE_MASK;
1666 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1667	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
1668	    ("pmap_enter: page %p is not busy", m));
1669
1670	/*
1671	 * Find (or create) a pte for the given mapping.
1672	 */
1673	while ((pte = pmap_find_pte(va)) == NULL) {
1674		pmap_switch(oldpmap);
1675		PMAP_UNLOCK(pmap);
1676		rw_wunlock(&pvh_global_lock);
1677		VM_WAIT;
1678		rw_wlock(&pvh_global_lock);
1679		PMAP_LOCK(pmap);
1680		oldpmap = pmap_switch(pmap);
1681	}
1682	origpte = *pte;
1683	if (!pmap_present(pte)) {
1684		opa = ~0UL;
1685		pmap_enter_vhpt(pte, va);
1686	} else
1687		opa = pmap_ppn(pte);
1688	managed = FALSE;
1689	pa = VM_PAGE_TO_PHYS(m);
1690
1691	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1692
1693	/*
1694	 * Mapping has not changed, must be protection or wiring change.
1695	 */
1696	if (opa == pa) {
1697		/*
1698		 * Wiring change, just update stats. We don't worry about
1699		 * wiring PT pages as they remain resident as long as there
1700		 * are valid mappings in them. Hence, if a user page is wired,
1701		 * the PT page will be also.
1702		 */
1703		if (wired && !pmap_wired(&origpte))
1704			pmap->pm_stats.wired_count++;
1705		else if (!wired && pmap_wired(&origpte))
1706			pmap->pm_stats.wired_count--;
1707
1708		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1709
1710		/*
1711		 * We might be turning off write access to the page,
1712		 * so we go ahead and sense modify status. Otherwise,
1713		 * we can avoid I-cache invalidation if the page
1714		 * already allowed execution.
1715		 */
1716		if (managed && pmap_dirty(&origpte))
1717			vm_page_dirty(m);
1718		else if (pmap_exec(&origpte))
1719			icache_inval = FALSE;
1720
1721		pmap_invalidate_page(va);
1722		goto validate;
1723	}
1724
1725	/*
1726	 * Mapping has changed, invalidate old range and fall
1727	 * through to handle validating new mapping.
1728	 */
1729	if (opa != ~0UL) {
1730		pmap_remove_pte(pmap, pte, va, 0, 0);
1731		pmap_enter_vhpt(pte, va);
1732	}
1733
1734	/*
1735	 * Enter on the PV list if part of our managed memory.
1736	 */
1737	if ((m->oflags & VPO_UNMANAGED) == 0) {
1738		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1739		    ("pmap_enter: managed mapping within the clean submap"));
1740		pmap_insert_entry(pmap, va, m);
1741		managed = TRUE;
1742	}
1743
1744	/*
1745	 * Increment counters
1746	 */
1747	pmap->pm_stats.resident_count++;
1748	if (wired)
1749		pmap->pm_stats.wired_count++;
1750
1751validate:
1752
1753	/*
1754	 * Now validate mapping with desired protection/wiring. This
1755	 * adds the pte to the VHPT if necessary.
1756	 */
1757	pmap_pte_prot(pmap, pte, prot);
1758	pmap_pte_attr(pte, m->md.memattr);
1759	pmap_set_pte(pte, va, pa, wired, managed);
1760
1761	/* Invalidate the I-cache when needed. */
1762	if (icache_inval)
1763		ia64_sync_icache(va, PAGE_SIZE);
1764
1765	if ((prot & VM_PROT_WRITE) != 0 && managed)
1766		vm_page_aflag_set(m, PGA_WRITEABLE);
1767	rw_wunlock(&pvh_global_lock);
1768	pmap_switch(oldpmap);
1769	PMAP_UNLOCK(pmap);
1770}
1771
1772/*
1773 * Maps a sequence of resident pages belonging to the same object.
1774 * The sequence begins with the given page m_start.  This page is
1775 * mapped at the given virtual address start.  Each subsequent page is
1776 * mapped at a virtual address that is offset from start by the same
1777 * amount as the page is offset from m_start within the object.  The
1778 * last page in the sequence is the page with the largest offset from
1779 * m_start that can be mapped at a virtual address less than the given
1780 * virtual address end.  Not every virtual page between start and end
1781 * is mapped; only those for which a resident page exists with the
1782 * corresponding offset from m_start are mapped.
1783 */
1784void
1785pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1786    vm_page_t m_start, vm_prot_t prot)
1787{
1788	pmap_t oldpmap;
1789	vm_page_t m;
1790	vm_pindex_t diff, psize;
1791
1792	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1793
1794	psize = atop(end - start);
1795	m = m_start;
1796	rw_wlock(&pvh_global_lock);
1797	PMAP_LOCK(pmap);
1798	oldpmap = pmap_switch(pmap);
1799	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1800		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1801		m = TAILQ_NEXT(m, listq);
1802	}
1803	rw_wunlock(&pvh_global_lock);
1804	pmap_switch(oldpmap);
1805 	PMAP_UNLOCK(pmap);
1806}
1807
1808/*
1809 * this code makes some *MAJOR* assumptions:
1810 * 1. Current pmap & pmap exists.
1811 * 2. Not wired.
1812 * 3. Read access.
1813 * 4. No page table pages.
1814 * but is *MUCH* faster than pmap_enter...
1815 */
1816
1817void
1818pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1819{
1820	pmap_t oldpmap;
1821
1822	rw_wlock(&pvh_global_lock);
1823	PMAP_LOCK(pmap);
1824	oldpmap = pmap_switch(pmap);
1825	pmap_enter_quick_locked(pmap, va, m, prot);
1826	rw_wunlock(&pvh_global_lock);
1827	pmap_switch(oldpmap);
1828	PMAP_UNLOCK(pmap);
1829}
1830
1831static void
1832pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1833    vm_prot_t prot)
1834{
1835	struct ia64_lpte *pte;
1836	boolean_t managed;
1837
1838	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1839	    (m->oflags & VPO_UNMANAGED) != 0,
1840	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1841	rw_assert(&pvh_global_lock, RA_WLOCKED);
1842	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1843
1844	if ((pte = pmap_find_pte(va)) == NULL)
1845		return;
1846
1847	if (!pmap_present(pte)) {
1848		/* Enter on the PV list if the page is managed. */
1849		if ((m->oflags & VPO_UNMANAGED) == 0) {
1850			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1851				pmap_free_pte(pte, va);
1852				return;
1853			}
1854			managed = TRUE;
1855		} else
1856			managed = FALSE;
1857
1858		/* Increment counters. */
1859		pmap->pm_stats.resident_count++;
1860
1861		/* Initialise with R/O protection and enter into VHPT. */
1862		pmap_enter_vhpt(pte, va);
1863		pmap_pte_prot(pmap, pte,
1864		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1865		pmap_pte_attr(pte, m->md.memattr);
1866		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1867
1868		if (prot & VM_PROT_EXECUTE)
1869			ia64_sync_icache(va, PAGE_SIZE);
1870	}
1871}
1872
1873/*
1874 * pmap_object_init_pt preloads the ptes for a given object
1875 * into the specified pmap.  This eliminates the blast of soft
1876 * faults on process startup and immediately after an mmap.
1877 */
1878void
1879pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1880		    vm_object_t object, vm_pindex_t pindex,
1881		    vm_size_t size)
1882{
1883
1884	VM_OBJECT_ASSERT_WLOCKED(object);
1885	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1886	    ("pmap_object_init_pt: non-device object"));
1887}
1888
1889/*
1890 *	Routine:	pmap_change_wiring
1891 *	Function:	Change the wiring attribute for a map/virtual-address
1892 *			pair.
1893 *	In/out conditions:
1894 *			The mapping must already exist in the pmap.
1895 */
1896void
1897pmap_change_wiring(pmap, va, wired)
1898	register pmap_t pmap;
1899	vm_offset_t va;
1900	boolean_t wired;
1901{
1902	pmap_t oldpmap;
1903	struct ia64_lpte *pte;
1904
1905	PMAP_LOCK(pmap);
1906	oldpmap = pmap_switch(pmap);
1907
1908	pte = pmap_find_vhpt(va);
1909	KASSERT(pte != NULL, ("pte"));
1910	if (wired && !pmap_wired(pte)) {
1911		pmap->pm_stats.wired_count++;
1912		pmap_set_wired(pte);
1913	} else if (!wired && pmap_wired(pte)) {
1914		pmap->pm_stats.wired_count--;
1915		pmap_clear_wired(pte);
1916	}
1917
1918	pmap_switch(oldpmap);
1919	PMAP_UNLOCK(pmap);
1920}
1921
1922
1923
1924/*
1925 *	Copy the range specified by src_addr/len
1926 *	from the source map to the range dst_addr/len
1927 *	in the destination map.
1928 *
1929 *	This routine is only advisory and need not do anything.
1930 */
1931
1932void
1933pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1934	  vm_offset_t src_addr)
1935{
1936}
1937
1938
1939/*
1940 *	pmap_zero_page zeros the specified hardware page by
1941 *	mapping it into virtual memory and using bzero to clear
1942 *	its contents.
1943 */
1944
1945void
1946pmap_zero_page(vm_page_t m)
1947{
1948	void *p;
1949
1950	p = (void *)pmap_page_to_va(m);
1951	bzero(p, PAGE_SIZE);
1952}
1953
1954
1955/*
1956 *	pmap_zero_page_area zeros the specified hardware page by
1957 *	mapping it into virtual memory and using bzero to clear
1958 *	its contents.
1959 *
1960 *	off and size must reside within a single page.
1961 */
1962
1963void
1964pmap_zero_page_area(vm_page_t m, int off, int size)
1965{
1966	char *p;
1967
1968	p = (void *)pmap_page_to_va(m);
1969	bzero(p + off, size);
1970}
1971
1972
1973/*
1974 *	pmap_zero_page_idle zeros the specified hardware page by
1975 *	mapping it into virtual memory and using bzero to clear
1976 *	its contents.  This is for the vm_idlezero process.
1977 */
1978
1979void
1980pmap_zero_page_idle(vm_page_t m)
1981{
1982	void *p;
1983
1984	p = (void *)pmap_page_to_va(m);
1985	bzero(p, PAGE_SIZE);
1986}
1987
1988
1989/*
1990 *	pmap_copy_page copies the specified (machine independent)
1991 *	page by mapping the page into virtual memory and using
1992 *	bcopy to copy the page, one machine dependent page at a
1993 *	time.
1994 */
1995void
1996pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1997{
1998	void *dst, *src;
1999
2000	src = (void *)pmap_page_to_va(msrc);
2001	dst = (void *)pmap_page_to_va(mdst);
2002	bcopy(src, dst, PAGE_SIZE);
2003}
2004
2005int unmapped_buf_allowed;
2006
2007void
2008pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2009    vm_offset_t b_offset, int xfersize)
2010{
2011	void *a_cp, *b_cp;
2012	vm_offset_t a_pg_offset, b_pg_offset;
2013	int cnt;
2014
2015	while (xfersize > 0) {
2016		a_pg_offset = a_offset & PAGE_MASK;
2017		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2018		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2019		    a_pg_offset;
2020		b_pg_offset = b_offset & PAGE_MASK;
2021		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2022		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2023		    b_pg_offset;
2024		bcopy(a_cp, b_cp, cnt);
2025		a_offset += cnt;
2026		b_offset += cnt;
2027		xfersize -= cnt;
2028	}
2029}
2030
2031/*
2032 * Returns true if the pmap's pv is one of the first
2033 * 16 pvs linked to from this page.  This count may
2034 * be changed upwards or downwards in the future; it
2035 * is only necessary that true be returned for a small
2036 * subset of pmaps for proper page aging.
2037 */
2038boolean_t
2039pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2040{
2041	pv_entry_t pv;
2042	int loops = 0;
2043	boolean_t rv;
2044
2045	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2046	    ("pmap_page_exists_quick: page %p is not managed", m));
2047	rv = FALSE;
2048	rw_wlock(&pvh_global_lock);
2049	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2050		if (PV_PMAP(pv) == pmap) {
2051			rv = TRUE;
2052			break;
2053		}
2054		loops++;
2055		if (loops >= 16)
2056			break;
2057	}
2058	rw_wunlock(&pvh_global_lock);
2059	return (rv);
2060}
2061
2062/*
2063 *	pmap_page_wired_mappings:
2064 *
2065 *	Return the number of managed mappings to the given physical page
2066 *	that are wired.
2067 */
2068int
2069pmap_page_wired_mappings(vm_page_t m)
2070{
2071	struct ia64_lpte *pte;
2072	pmap_t oldpmap, pmap;
2073	pv_entry_t pv;
2074	int count;
2075
2076	count = 0;
2077	if ((m->oflags & VPO_UNMANAGED) != 0)
2078		return (count);
2079	rw_wlock(&pvh_global_lock);
2080	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2081		pmap = PV_PMAP(pv);
2082		PMAP_LOCK(pmap);
2083		oldpmap = pmap_switch(pmap);
2084		pte = pmap_find_vhpt(pv->pv_va);
2085		KASSERT(pte != NULL, ("pte"));
2086		if (pmap_wired(pte))
2087			count++;
2088		pmap_switch(oldpmap);
2089		PMAP_UNLOCK(pmap);
2090	}
2091	rw_wunlock(&pvh_global_lock);
2092	return (count);
2093}
2094
2095/*
2096 * Remove all pages from specified address space
2097 * this aids process exit speeds.  Also, this code
2098 * is special cased for current process only, but
2099 * can have the more generic (and slightly slower)
2100 * mode enabled.  This is much faster than pmap_remove
2101 * in the case of running down an entire address space.
2102 */
2103void
2104pmap_remove_pages(pmap_t pmap)
2105{
2106	struct pv_chunk *pc, *npc;
2107	struct ia64_lpte *pte;
2108	pmap_t oldpmap;
2109	pv_entry_t pv;
2110	vm_offset_t va;
2111	vm_page_t m;
2112	u_long inuse, bitmask;
2113	int allfree, bit, field, idx;
2114
2115	rw_wlock(&pvh_global_lock);
2116	PMAP_LOCK(pmap);
2117	oldpmap = pmap_switch(pmap);
2118	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2119		allfree = 1;
2120		for (field = 0; field < _NPCM; field++) {
2121			inuse = ~pc->pc_map[field] & pc_freemask[field];
2122			while (inuse != 0) {
2123				bit = ffsl(inuse) - 1;
2124				bitmask = 1UL << bit;
2125				idx = field * sizeof(inuse) * NBBY + bit;
2126				pv = &pc->pc_pventry[idx];
2127				inuse &= ~bitmask;
2128				va = pv->pv_va;
2129				pte = pmap_find_vhpt(va);
2130				KASSERT(pte != NULL, ("pte"));
2131				if (pmap_wired(pte)) {
2132					allfree = 0;
2133					continue;
2134				}
2135				pmap_remove_vhpt(va);
2136				pmap_invalidate_page(va);
2137				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2138				if (pmap_dirty(pte))
2139					vm_page_dirty(m);
2140				pmap_free_pte(pte, va);
2141				/* Mark free */
2142				PV_STAT(pv_entry_frees++);
2143				PV_STAT(pv_entry_spare++);
2144				pv_entry_count--;
2145				pc->pc_map[field] |= bitmask;
2146				pmap->pm_stats.resident_count--;
2147				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2148				if (TAILQ_EMPTY(&m->md.pv_list))
2149					vm_page_aflag_clear(m, PGA_WRITEABLE);
2150			}
2151		}
2152		if (allfree) {
2153			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2154			free_pv_chunk(pc);
2155		}
2156	}
2157	pmap_switch(oldpmap);
2158	PMAP_UNLOCK(pmap);
2159	rw_wunlock(&pvh_global_lock);
2160}
2161
2162/*
2163 *	pmap_ts_referenced:
2164 *
2165 *	Return a count of reference bits for a page, clearing those bits.
2166 *	It is not necessary for every reference bit to be cleared, but it
2167 *	is necessary that 0 only be returned when there are truly no
2168 *	reference bits set.
2169 *
2170 *	XXX: The exact number of bits to check and clear is a matter that
2171 *	should be tested and standardized at some point in the future for
2172 *	optimal aging of shared pages.
2173 */
2174int
2175pmap_ts_referenced(vm_page_t m)
2176{
2177	struct ia64_lpte *pte;
2178	pmap_t oldpmap, pmap;
2179	pv_entry_t pv;
2180	int count = 0;
2181
2182	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2183	    ("pmap_ts_referenced: page %p is not managed", m));
2184	rw_wlock(&pvh_global_lock);
2185	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2186		pmap = PV_PMAP(pv);
2187		PMAP_LOCK(pmap);
2188		oldpmap = pmap_switch(pmap);
2189		pte = pmap_find_vhpt(pv->pv_va);
2190		KASSERT(pte != NULL, ("pte"));
2191		if (pmap_accessed(pte)) {
2192			count++;
2193			pmap_clear_accessed(pte);
2194			pmap_invalidate_page(pv->pv_va);
2195		}
2196		pmap_switch(oldpmap);
2197		PMAP_UNLOCK(pmap);
2198	}
2199	rw_wunlock(&pvh_global_lock);
2200	return (count);
2201}
2202
2203/*
2204 *	pmap_is_modified:
2205 *
2206 *	Return whether or not the specified physical page was modified
2207 *	in any physical maps.
2208 */
2209boolean_t
2210pmap_is_modified(vm_page_t m)
2211{
2212	struct ia64_lpte *pte;
2213	pmap_t oldpmap, pmap;
2214	pv_entry_t pv;
2215	boolean_t rv;
2216
2217	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2218	    ("pmap_is_modified: page %p is not managed", m));
2219	rv = FALSE;
2220
2221	/*
2222	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2223	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2224	 * is clear, no PTEs can be dirty.
2225	 */
2226	VM_OBJECT_ASSERT_WLOCKED(m->object);
2227	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2228		return (rv);
2229	rw_wlock(&pvh_global_lock);
2230	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2231		pmap = PV_PMAP(pv);
2232		PMAP_LOCK(pmap);
2233		oldpmap = pmap_switch(pmap);
2234		pte = pmap_find_vhpt(pv->pv_va);
2235		pmap_switch(oldpmap);
2236		KASSERT(pte != NULL, ("pte"));
2237		rv = pmap_dirty(pte) ? TRUE : FALSE;
2238		PMAP_UNLOCK(pmap);
2239		if (rv)
2240			break;
2241	}
2242	rw_wunlock(&pvh_global_lock);
2243	return (rv);
2244}
2245
2246/*
2247 *	pmap_is_prefaultable:
2248 *
2249 *	Return whether or not the specified virtual address is elgible
2250 *	for prefault.
2251 */
2252boolean_t
2253pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2254{
2255	struct ia64_lpte *pte;
2256
2257	pte = pmap_find_vhpt(addr);
2258	if (pte != NULL && pmap_present(pte))
2259		return (FALSE);
2260	return (TRUE);
2261}
2262
2263/*
2264 *	pmap_is_referenced:
2265 *
2266 *	Return whether or not the specified physical page was referenced
2267 *	in any physical maps.
2268 */
2269boolean_t
2270pmap_is_referenced(vm_page_t m)
2271{
2272	struct ia64_lpte *pte;
2273	pmap_t oldpmap, pmap;
2274	pv_entry_t pv;
2275	boolean_t rv;
2276
2277	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2278	    ("pmap_is_referenced: page %p is not managed", m));
2279	rv = FALSE;
2280	rw_wlock(&pvh_global_lock);
2281	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2282		pmap = PV_PMAP(pv);
2283		PMAP_LOCK(pmap);
2284		oldpmap = pmap_switch(pmap);
2285		pte = pmap_find_vhpt(pv->pv_va);
2286		pmap_switch(oldpmap);
2287		KASSERT(pte != NULL, ("pte"));
2288		rv = pmap_accessed(pte) ? TRUE : FALSE;
2289		PMAP_UNLOCK(pmap);
2290		if (rv)
2291			break;
2292	}
2293	rw_wunlock(&pvh_global_lock);
2294	return (rv);
2295}
2296
2297/*
2298 *	Apply the given advice to the specified range of addresses within the
2299 *	given pmap.  Depending on the advice, clear the referenced and/or
2300 *	modified flags in each mapping and set the mapped page's dirty field.
2301 */
2302void
2303pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2304{
2305	struct ia64_lpte *pte;
2306	pmap_t oldpmap;
2307	vm_page_t m;
2308
2309	PMAP_LOCK(pmap);
2310	oldpmap = pmap_switch(pmap);
2311	for (; sva < eva; sva += PAGE_SIZE) {
2312		/* If page is invalid, skip this page. */
2313		pte = pmap_find_vhpt(sva);
2314		if (pte == NULL)
2315			continue;
2316
2317		/* If it isn't managed, skip it too. */
2318		if (!pmap_managed(pte))
2319			continue;
2320
2321		/* Clear its modified and referenced bits. */
2322		if (pmap_dirty(pte)) {
2323			if (advice == MADV_DONTNEED) {
2324				/*
2325				 * Future calls to pmap_is_modified() can be
2326				 * avoided by making the page dirty now.
2327				 */
2328				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2329				vm_page_dirty(m);
2330			}
2331			pmap_clear_dirty(pte);
2332		} else if (!pmap_accessed(pte))
2333			continue;
2334		pmap_clear_accessed(pte);
2335		pmap_invalidate_page(sva);
2336	}
2337	pmap_switch(oldpmap);
2338	PMAP_UNLOCK(pmap);
2339}
2340
2341/*
2342 *	Clear the modify bits on the specified physical page.
2343 */
2344void
2345pmap_clear_modify(vm_page_t m)
2346{
2347	struct ia64_lpte *pte;
2348	pmap_t oldpmap, pmap;
2349	pv_entry_t pv;
2350
2351	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2352	    ("pmap_clear_modify: page %p is not managed", m));
2353	VM_OBJECT_ASSERT_WLOCKED(m->object);
2354	KASSERT(!vm_page_xbusied(m),
2355	    ("pmap_clear_modify: page %p is exclusive busied", m));
2356
2357	/*
2358	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2359	 * If the object containing the page is locked and the page is not
2360	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2361	 */
2362	if ((m->aflags & PGA_WRITEABLE) == 0)
2363		return;
2364	rw_wlock(&pvh_global_lock);
2365	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2366		pmap = PV_PMAP(pv);
2367		PMAP_LOCK(pmap);
2368		oldpmap = pmap_switch(pmap);
2369		pte = pmap_find_vhpt(pv->pv_va);
2370		KASSERT(pte != NULL, ("pte"));
2371		if (pmap_dirty(pte)) {
2372			pmap_clear_dirty(pte);
2373			pmap_invalidate_page(pv->pv_va);
2374		}
2375		pmap_switch(oldpmap);
2376		PMAP_UNLOCK(pmap);
2377	}
2378	rw_wunlock(&pvh_global_lock);
2379}
2380
2381/*
2382 * Clear the write and modified bits in each of the given page's mappings.
2383 */
2384void
2385pmap_remove_write(vm_page_t m)
2386{
2387	struct ia64_lpte *pte;
2388	pmap_t oldpmap, pmap;
2389	pv_entry_t pv;
2390	vm_prot_t prot;
2391
2392	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2393	    ("pmap_remove_write: page %p is not managed", m));
2394
2395	/*
2396	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2397	 * set by another thread while the object is locked.  Thus,
2398	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2399	 */
2400	VM_OBJECT_ASSERT_WLOCKED(m->object);
2401	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2402		return;
2403	rw_wlock(&pvh_global_lock);
2404	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2405		pmap = PV_PMAP(pv);
2406		PMAP_LOCK(pmap);
2407		oldpmap = pmap_switch(pmap);
2408		pte = pmap_find_vhpt(pv->pv_va);
2409		KASSERT(pte != NULL, ("pte"));
2410		prot = pmap_prot(pte);
2411		if ((prot & VM_PROT_WRITE) != 0) {
2412			if (pmap_dirty(pte)) {
2413				vm_page_dirty(m);
2414				pmap_clear_dirty(pte);
2415			}
2416			prot &= ~VM_PROT_WRITE;
2417			pmap_pte_prot(pmap, pte, prot);
2418			pmap_pte_attr(pte, m->md.memattr);
2419			pmap_invalidate_page(pv->pv_va);
2420		}
2421		pmap_switch(oldpmap);
2422		PMAP_UNLOCK(pmap);
2423	}
2424	vm_page_aflag_clear(m, PGA_WRITEABLE);
2425	rw_wunlock(&pvh_global_lock);
2426}
2427
2428/*
2429 * Map a set of physical memory pages into the kernel virtual
2430 * address space. Return a pointer to where it is mapped. This
2431 * routine is intended to be used for mapping device memory,
2432 * NOT real memory.
2433 */
2434void *
2435pmap_mapdev(vm_paddr_t pa, vm_size_t sz)
2436{
2437	static void *last_va = NULL;
2438	static vm_paddr_t last_pa = 0;
2439	static vm_size_t last_sz = 0;
2440	struct efi_md *md;
2441	vm_offset_t va;
2442
2443	if (pa == last_pa && sz == last_sz)
2444		return (last_va);
2445
2446	md = efi_md_find(pa);
2447	if (md == NULL) {
2448		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2449		    __func__, pa, pa + sz - 1);
2450		return ((void *)IA64_PHYS_TO_RR6(pa));
2451	}
2452
2453	if (md->md_type == EFI_MD_TYPE_FREE) {
2454		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2455		    pa + sz - 1);
2456                return (NULL);
2457	}
2458
2459	va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2460	    IA64_PHYS_TO_RR6(pa);
2461
2462	last_va = (void *)va;
2463	last_pa = pa;
2464	last_sz = sz;
2465	return (last_va);
2466}
2467
2468/*
2469 * 'Unmap' a range mapped by pmap_mapdev().
2470 */
2471void
2472pmap_unmapdev(vm_offset_t va, vm_size_t size)
2473{
2474}
2475
2476/*
2477 * Sets the memory attribute for the specified page.
2478 */
2479static void
2480pmap_page_set_memattr_1(void *arg)
2481{
2482	struct ia64_pal_result res;
2483	register_t is;
2484	uintptr_t pp = (uintptr_t)arg;
2485
2486	is = intr_disable();
2487	res = ia64_call_pal_static(pp, 0, 0, 0);
2488	intr_restore(is);
2489}
2490
2491void
2492pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2493{
2494	struct ia64_lpte *pte;
2495	pmap_t oldpmap, pmap;
2496	pv_entry_t pv;
2497	void *va;
2498
2499	rw_wlock(&pvh_global_lock);
2500	m->md.memattr = ma;
2501	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2502		pmap = PV_PMAP(pv);
2503		PMAP_LOCK(pmap);
2504		oldpmap = pmap_switch(pmap);
2505		pte = pmap_find_vhpt(pv->pv_va);
2506		KASSERT(pte != NULL, ("pte"));
2507		pmap_pte_attr(pte, ma);
2508		pmap_invalidate_page(pv->pv_va);
2509		pmap_switch(oldpmap);
2510		PMAP_UNLOCK(pmap);
2511	}
2512	rw_wunlock(&pvh_global_lock);
2513
2514	if (ma == VM_MEMATTR_UNCACHEABLE) {
2515#ifdef SMP
2516		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2517		    (void *)PAL_PREFETCH_VISIBILITY);
2518#else
2519		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2520#endif
2521		va = (void *)pmap_page_to_va(m);
2522		critical_enter();
2523		cpu_flush_dcache(va, PAGE_SIZE);
2524		critical_exit();
2525#ifdef SMP
2526		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2527		    (void *)PAL_MC_DRAIN);
2528#else
2529		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2530#endif
2531	}
2532}
2533
2534/*
2535 * perform the pmap work for mincore
2536 */
2537int
2538pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2539{
2540	pmap_t oldpmap;
2541	struct ia64_lpte *pte, tpte;
2542	vm_paddr_t pa;
2543	int val;
2544
2545	PMAP_LOCK(pmap);
2546retry:
2547	oldpmap = pmap_switch(pmap);
2548	pte = pmap_find_vhpt(addr);
2549	if (pte != NULL) {
2550		tpte = *pte;
2551		pte = &tpte;
2552	}
2553	pmap_switch(oldpmap);
2554	if (pte == NULL || !pmap_present(pte)) {
2555		val = 0;
2556		goto out;
2557	}
2558	val = MINCORE_INCORE;
2559	if (pmap_dirty(pte))
2560		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2561	if (pmap_accessed(pte))
2562		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2563	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2564	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2565	    pmap_managed(pte)) {
2566		pa = pmap_ppn(pte);
2567		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2568		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2569			goto retry;
2570	} else
2571out:
2572		PA_UNLOCK_COND(*locked_pa);
2573	PMAP_UNLOCK(pmap);
2574	return (val);
2575}
2576
2577void
2578pmap_activate(struct thread *td)
2579{
2580	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2581}
2582
2583pmap_t
2584pmap_switch(pmap_t pm)
2585{
2586	pmap_t prevpm;
2587	int i;
2588
2589	critical_enter();
2590	prevpm = PCPU_GET(md.current_pmap);
2591	if (prevpm == pm)
2592		goto out;
2593	if (pm == NULL) {
2594		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2595			ia64_set_rr(IA64_RR_BASE(i),
2596			    (i << 8)|(PAGE_SHIFT << 2)|1);
2597		}
2598	} else {
2599		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2600			ia64_set_rr(IA64_RR_BASE(i),
2601			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2602		}
2603	}
2604	PCPU_SET(md.current_pmap, pm);
2605	ia64_srlz_d();
2606
2607out:
2608	critical_exit();
2609	return (prevpm);
2610}
2611
2612void
2613pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2614{
2615	pmap_t oldpm;
2616	struct ia64_lpte *pte;
2617	vm_offset_t lim;
2618	vm_size_t len;
2619
2620	sz += va & 31;
2621	va &= ~31;
2622	sz = (sz + 31) & ~31;
2623
2624	PMAP_LOCK(pm);
2625	oldpm = pmap_switch(pm);
2626	while (sz > 0) {
2627		lim = round_page(va);
2628		len = MIN(lim - va, sz);
2629		pte = pmap_find_vhpt(va);
2630		if (pte != NULL && pmap_present(pte))
2631			ia64_sync_icache(va, len);
2632		va += len;
2633		sz -= len;
2634	}
2635	pmap_switch(oldpm);
2636	PMAP_UNLOCK(pm);
2637}
2638
2639/*
2640 *	Increase the starting virtual address of the given mapping if a
2641 *	different alignment might result in more superpage mappings.
2642 */
2643void
2644pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2645    vm_offset_t *addr, vm_size_t size)
2646{
2647}
2648
2649#include "opt_ddb.h"
2650
2651#ifdef DDB
2652
2653#include <ddb/ddb.h>
2654
2655static const char*	psnames[] = {
2656	"1B",	"2B",	"4B",	"8B",
2657	"16B",	"32B",	"64B",	"128B",
2658	"256B",	"512B",	"1K",	"2K",
2659	"4K",	"8K",	"16K",	"32K",
2660	"64K",	"128K",	"256K",	"512K",
2661	"1M",	"2M",	"4M",	"8M",
2662	"16M",	"32M",	"64M",	"128M",
2663	"256M",	"512M",	"1G",	"2G"
2664};
2665
2666static void
2667print_trs(int type)
2668{
2669	struct ia64_pal_result res;
2670	int i, maxtr;
2671	struct {
2672		pt_entry_t	pte;
2673		uint64_t	itir;
2674		uint64_t	ifa;
2675		struct ia64_rr	rr;
2676	} buf;
2677	static const char *manames[] = {
2678		"WB",	"bad",	"bad",	"bad",
2679		"UC",	"UCE",	"WC",	"NaT",
2680	};
2681
2682	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2683	if (res.pal_status != 0) {
2684		db_printf("Can't get VM summary\n");
2685		return;
2686	}
2687
2688	if (type == 0)
2689		maxtr = (res.pal_result[0] >> 40) & 0xff;
2690	else
2691		maxtr = (res.pal_result[0] >> 32) & 0xff;
2692
2693	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2694	for (i = 0; i <= maxtr; i++) {
2695		bzero(&buf, sizeof(buf));
2696		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2697		    ia64_tpa((uint64_t)&buf));
2698		if (!(res.pal_result[0] & 1))
2699			buf.pte &= ~PTE_AR_MASK;
2700		if (!(res.pal_result[0] & 2))
2701			buf.pte &= ~PTE_PL_MASK;
2702		if (!(res.pal_result[0] & 4))
2703			pmap_clear_dirty(&buf);
2704		if (!(res.pal_result[0] & 8))
2705			buf.pte &= ~PTE_MA_MASK;
2706		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2707		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2708		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2709		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2710		    (buf.pte & PTE_ED) ? 1 : 0,
2711		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2712		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2713		    (pmap_dirty(&buf)) ? 1 : 0,
2714		    (pmap_accessed(&buf)) ? 1 : 0,
2715		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2716		    (pmap_present(&buf)) ? 1 : 0,
2717		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2718	}
2719}
2720
2721DB_COMMAND(itr, db_itr)
2722{
2723	print_trs(0);
2724}
2725
2726DB_COMMAND(dtr, db_dtr)
2727{
2728	print_trs(1);
2729}
2730
2731DB_COMMAND(rr, db_rr)
2732{
2733	int i;
2734	uint64_t t;
2735	struct ia64_rr rr;
2736
2737	printf("RR RID    PgSz VE\n");
2738	for (i = 0; i < 8; i++) {
2739		__asm __volatile ("mov %0=rr[%1]"
2740				  : "=r"(t)
2741				  : "r"(IA64_RR_BASE(i)));
2742		*(uint64_t *) &rr = t;
2743		printf("%d  %06x %4s %d\n",
2744		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2745	}
2746}
2747
2748DB_COMMAND(thash, db_thash)
2749{
2750	if (!have_addr)
2751		return;
2752
2753	db_printf("%p\n", (void *) ia64_thash(addr));
2754}
2755
2756DB_COMMAND(ttag, db_ttag)
2757{
2758	if (!have_addr)
2759		return;
2760
2761	db_printf("0x%lx\n", ia64_ttag(addr));
2762}
2763
2764DB_COMMAND(kpte, db_kpte)
2765{
2766	struct ia64_lpte *pte;
2767
2768	if (!have_addr) {
2769		db_printf("usage: kpte <kva>\n");
2770		return;
2771	}
2772	if (addr < VM_INIT_KERNEL_ADDRESS) {
2773		db_printf("kpte: error: invalid <kva>\n");
2774		return;
2775	}
2776	pte = pmap_find_kpte(addr);
2777	db_printf("kpte at %p:\n", pte);
2778	db_printf("  pte  =%016lx\n", pte->pte);
2779	db_printf("  itir =%016lx\n", pte->itir);
2780	db_printf("  tag  =%016lx\n", pte->tag);
2781	db_printf("  chain=%016lx\n", pte->chain);
2782}
2783
2784#endif
2785