mmu_oea64.c revision 215163
1/*-
2 * Copyright (c) 2001 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *        This product includes software developed by the NetBSD
19 *        Foundation, Inc. and its contributors.
20 * 4. Neither the name of The NetBSD Foundation nor the names of its
21 *    contributors may be used to endorse or promote products derived
22 *    from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36/*-
37 * Copyright (C) 1995, 1996 Wolfgang Solfrank.
38 * Copyright (C) 1995, 1996 TooLs GmbH.
39 * All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 *    must display the following acknowledgement:
51 *	This product includes software developed by TooLs GmbH.
52 * 4. The name of TooLs GmbH may not be used to endorse or promote products
53 *    derived from this software without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
58 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
61 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
62 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
63 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
64 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 *
66 * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $
67 */
68/*-
69 * Copyright (C) 2001 Benno Rice.
70 * All rights reserved.
71 *
72 * Redistribution and use in source and binary forms, with or without
73 * modification, are permitted provided that the following conditions
74 * are met:
75 * 1. Redistributions of source code must retain the above copyright
76 *    notice, this list of conditions and the following disclaimer.
77 * 2. Redistributions in binary form must reproduce the above copyright
78 *    notice, this list of conditions and the following disclaimer in the
79 *    documentation and/or other materials provided with the distribution.
80 *
81 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
82 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
83 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
84 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
85 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
86 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
87 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
88 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
89 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
90 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
91 */
92
93#include <sys/cdefs.h>
94__FBSDID("$FreeBSD: head/sys/powerpc/aim/mmu_oea64.c 215163 2010-11-12 05:12:38Z nwhitehorn $");
95
96/*
97 * Manages physical address maps.
98 *
99 * In addition to hardware address maps, this module is called upon to
100 * provide software-use-only maps which may or may not be stored in the
101 * same form as hardware maps.  These pseudo-maps are used to store
102 * intermediate results from copy operations to and from address spaces.
103 *
104 * Since the information managed by this module is also stored by the
105 * logical address mapping module, this module may throw away valid virtual
106 * to physical mappings at almost any time.  However, invalidations of
107 * mappings must be done as requested.
108 *
109 * In order to cope with hardware architectures which make virtual to
110 * physical map invalidates expensive, this module may delay invalidate
111 * reduced protection operations until such time as they are actually
112 * necessary.  This module is given full information as to which processors
113 * are currently using which maps, and to when physical maps must be made
114 * correct.
115 */
116
117#include "opt_kstack_pages.h"
118
119#include <sys/param.h>
120#include <sys/kernel.h>
121#include <sys/ktr.h>
122#include <sys/lock.h>
123#include <sys/msgbuf.h>
124#include <sys/mutex.h>
125#include <sys/proc.h>
126#include <sys/sysctl.h>
127#include <sys/systm.h>
128#include <sys/vmmeter.h>
129
130#include <sys/kdb.h>
131
132#include <dev/ofw/openfirm.h>
133
134#include <vm/vm.h>
135#include <vm/vm_param.h>
136#include <vm/vm_kern.h>
137#include <vm/vm_page.h>
138#include <vm/vm_map.h>
139#include <vm/vm_object.h>
140#include <vm/vm_extern.h>
141#include <vm/vm_pageout.h>
142#include <vm/vm_pager.h>
143#include <vm/uma.h>
144
145#include <machine/_inttypes.h>
146#include <machine/cpu.h>
147#include <machine/platform.h>
148#include <machine/frame.h>
149#include <machine/md_var.h>
150#include <machine/psl.h>
151#include <machine/bat.h>
152#include <machine/hid.h>
153#include <machine/pte.h>
154#include <machine/sr.h>
155#include <machine/trap.h>
156#include <machine/mmuvar.h>
157
158#include "mmu_if.h"
159
160#define	MOEA_DEBUG
161
162#define TODO	panic("%s: not implemented", __func__);
163void moea64_release_vsid(uint64_t vsid);
164uintptr_t moea64_get_unique_vsid(void);
165
166static __inline register_t
167cntlzd(volatile register_t a) {
168	register_t b;
169	__asm ("cntlzd %0, %1" : "=r"(b) : "r"(a));
170	return b;
171}
172
173#define	PTESYNC()	__asm __volatile("ptesync");
174#define	TLBSYNC()	__asm __volatile("tlbsync; ptesync");
175#define	SYNC()		__asm __volatile("sync");
176#define	EIEIO()		__asm __volatile("eieio");
177
178/*
179 * The tlbie instruction must be executed in 64-bit mode
180 * so we have to twiddle MSR[SF] around every invocation.
181 * Just to add to the fun, exceptions must be off as well
182 * so that we can't trap in 64-bit mode. What a pain.
183 */
184struct mtx	tlbie_mutex;
185
186static __inline void
187TLBIE(uint64_t vpn) {
188#ifndef __powerpc64__
189	register_t vpn_hi, vpn_lo;
190	register_t msr;
191	register_t scratch;
192#endif
193
194	vpn <<= ADDR_PIDX_SHFT;
195	vpn &= ~(0xffffULL << 48);
196
197	mtx_lock_spin(&tlbie_mutex);
198#ifdef __powerpc64__
199	__asm __volatile("\
200	    ptesync; \
201	    tlbie %0; \
202	    eieio; \
203	    tlbsync; \
204	    ptesync;"
205	:: "r"(vpn) : "memory");
206#else
207	vpn_hi = (uint32_t)(vpn >> 32);
208	vpn_lo = (uint32_t)vpn;
209
210	__asm __volatile("\
211	    mfmsr %0; \
212	    mr %1, %0; \
213	    insrdi %1,%5,1,0; \
214	    mtmsrd %1; isync; \
215	    ptesync; \
216	    \
217	    sld %1,%2,%4; \
218	    or %1,%1,%3; \
219	    tlbie %1; \
220	    \
221	    mtmsrd %0; isync; \
222	    eieio; \
223	    tlbsync; \
224	    ptesync;"
225	: "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1)
226	    : "memory");
227#endif
228	mtx_unlock_spin(&tlbie_mutex);
229}
230
231#define DISABLE_TRANS(msr)	msr = mfmsr(); mtmsr(msr & ~PSL_DR); isync()
232#define ENABLE_TRANS(msr)	mtmsr(msr); isync()
233
234#define	VSID_MAKE(sr, hash)	((sr) | (((hash) & 0xfffff) << 4))
235#define	VSID_TO_HASH(vsid)	(((vsid) >> 4) & 0xfffff)
236#define	VSID_HASH_MASK		0x0000007fffffffffULL
237
238#define	PVO_PTEGIDX_MASK	0x007UL		/* which PTEG slot */
239#define	PVO_PTEGIDX_VALID	0x008UL		/* slot is valid */
240#define	PVO_WIRED		0x010UL		/* PVO entry is wired */
241#define	PVO_MANAGED		0x020UL		/* PVO entry is managed */
242#define	PVO_BOOTSTRAP		0x080UL		/* PVO entry allocated during
243						   bootstrap */
244#define PVO_FAKE		0x100UL		/* fictitious phys page */
245#define PVO_LARGE		0x200UL		/* large page */
246#define	PVO_VADDR(pvo)		((pvo)->pvo_vaddr & ~ADDR_POFF)
247#define PVO_ISFAKE(pvo)		((pvo)->pvo_vaddr & PVO_FAKE)
248#define	PVO_PTEGIDX_GET(pvo)	((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
249#define	PVO_PTEGIDX_ISSET(pvo)	((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)
250#define	PVO_PTEGIDX_CLR(pvo)	\
251	((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK)))
252#define	PVO_PTEGIDX_SET(pvo, i)	\
253	((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID))
254#define	PVO_VSID(pvo)		((pvo)->pvo_vpn >> 16)
255
256#define	MOEA_PVO_CHECK(pvo)
257
258#define LOCK_TABLE() mtx_lock(&moea64_table_mutex)
259#define UNLOCK_TABLE() mtx_unlock(&moea64_table_mutex);
260#define ASSERT_TABLE_LOCK() mtx_assert(&moea64_table_mutex, MA_OWNED)
261
262struct ofw_map {
263	cell_t	om_va;
264	cell_t	om_len;
265	cell_t	om_pa_hi;
266	cell_t	om_pa_lo;
267	cell_t	om_mode;
268};
269
270/*
271 * Map of physical memory regions.
272 */
273static struct	mem_region *regions;
274static struct	mem_region *pregions;
275static u_int	phys_avail_count;
276static int	regions_sz, pregions_sz;
277
278extern void bs_remap_earlyboot(void);
279
280
281/*
282 * Lock for the pteg and pvo tables.
283 */
284struct mtx	moea64_table_mutex;
285struct mtx	moea64_slb_mutex;
286
287/*
288 * PTEG data.
289 */
290static struct	lpteg *moea64_pteg_table;
291u_int		moea64_pteg_count;
292u_int		moea64_pteg_mask;
293
294/*
295 * PVO data.
296 */
297struct	pvo_head *moea64_pvo_table;		/* pvo entries by pteg index */
298struct	pvo_head moea64_pvo_kunmanaged =	/* list of unmanaged pages */
299    LIST_HEAD_INITIALIZER(moea64_pvo_kunmanaged);
300
301uma_zone_t	moea64_upvo_zone; /* zone for pvo entries for unmanaged pages */
302uma_zone_t	moea64_mpvo_zone; /* zone for pvo entries for managed pages */
303
304#define	BPVO_POOL_SIZE	327680
305static struct	pvo_entry *moea64_bpvo_pool;
306static int	moea64_bpvo_pool_index = 0;
307
308#define	VSID_NBPW	(sizeof(u_int32_t) * 8)
309#ifdef __powerpc64__
310#define	NVSIDS		(NPMAPS * 16)
311#define VSID_HASHMASK	0xffffffffUL
312#else
313#define NVSIDS		NPMAPS
314#define VSID_HASHMASK	0xfffffUL
315#endif
316static u_int	moea64_vsid_bitmap[NVSIDS / VSID_NBPW];
317
318static boolean_t moea64_initialized = FALSE;
319
320/*
321 * Statistics.
322 */
323u_int	moea64_pte_valid = 0;
324u_int	moea64_pte_overflow = 0;
325u_int	moea64_pvo_entries = 0;
326u_int	moea64_pvo_enter_calls = 0;
327u_int	moea64_pvo_remove_calls = 0;
328SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD,
329    &moea64_pte_valid, 0, "");
330SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD,
331    &moea64_pte_overflow, 0, "");
332SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD,
333    &moea64_pvo_entries, 0, "");
334SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD,
335    &moea64_pvo_enter_calls, 0, "");
336SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD,
337    &moea64_pvo_remove_calls, 0, "");
338
339vm_offset_t	moea64_scratchpage_va[2];
340uint64_t	moea64_scratchpage_vpn[2];
341struct	lpte 	*moea64_scratchpage_pte[2];
342struct	mtx	moea64_scratchpage_mtx;
343
344uint64_t 	moea64_large_page_mask = 0;
345int		moea64_large_page_size = 0;
346int		moea64_large_page_shift = 0;
347
348/*
349 * Allocate physical memory for use in moea64_bootstrap.
350 */
351static vm_offset_t	moea64_bootstrap_alloc(vm_size_t, u_int);
352
353/*
354 * PTE calls.
355 */
356static int		moea64_pte_insert(u_int, struct lpte *);
357
358/*
359 * PVO calls.
360 */
361static int	moea64_pvo_enter(pmap_t, uma_zone_t, struct pvo_head *,
362		    vm_offset_t, vm_offset_t, uint64_t, int);
363static void	moea64_pvo_remove(struct pvo_entry *);
364static struct	pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t);
365static struct	lpte *moea64_pvo_to_pte(const struct pvo_entry *);
366
367/*
368 * Utility routines.
369 */
370static void		moea64_bootstrap(mmu_t mmup,
371			    vm_offset_t kernelstart, vm_offset_t kernelend);
372static void		moea64_cpu_bootstrap(mmu_t, int ap);
373static void		moea64_enter_locked(pmap_t, vm_offset_t, vm_page_t,
374			    vm_prot_t, boolean_t);
375static boolean_t	moea64_query_bit(vm_page_t, u_int64_t);
376static u_int		moea64_clear_bit(vm_page_t, u_int64_t);
377static void		moea64_kremove(mmu_t, vm_offset_t);
378static void		moea64_syncicache(pmap_t pmap, vm_offset_t va,
379			    vm_offset_t pa, vm_size_t sz);
380static void		tlbia(void);
381#ifdef __powerpc64__
382static void		slbia(void);
383#endif
384
385/*
386 * Kernel MMU interface
387 */
388void moea64_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t);
389void moea64_clear_modify(mmu_t, vm_page_t);
390void moea64_clear_reference(mmu_t, vm_page_t);
391void moea64_copy_page(mmu_t, vm_page_t, vm_page_t);
392void moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t);
393void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
394    vm_prot_t);
395void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
396vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t);
397vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t);
398void moea64_init(mmu_t);
399boolean_t moea64_is_modified(mmu_t, vm_page_t);
400boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
401boolean_t moea64_is_referenced(mmu_t, vm_page_t);
402boolean_t moea64_ts_referenced(mmu_t, vm_page_t);
403vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int);
404boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t);
405int moea64_page_wired_mappings(mmu_t, vm_page_t);
406void moea64_pinit(mmu_t, pmap_t);
407void moea64_pinit0(mmu_t, pmap_t);
408void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
409void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
410void moea64_qremove(mmu_t, vm_offset_t, int);
411void moea64_release(mmu_t, pmap_t);
412void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
413void moea64_remove_all(mmu_t, vm_page_t);
414void moea64_remove_write(mmu_t, vm_page_t);
415void moea64_zero_page(mmu_t, vm_page_t);
416void moea64_zero_page_area(mmu_t, vm_page_t, int, int);
417void moea64_zero_page_idle(mmu_t, vm_page_t);
418void moea64_activate(mmu_t, struct thread *);
419void moea64_deactivate(mmu_t, struct thread *);
420void *moea64_mapdev(mmu_t, vm_offset_t, vm_size_t);
421void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t);
422void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t);
423vm_offset_t moea64_kextract(mmu_t, vm_offset_t);
424void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma);
425void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma);
426void moea64_kenter(mmu_t, vm_offset_t, vm_offset_t);
427boolean_t moea64_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t);
428static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
429
430static mmu_method_t moea64_methods[] = {
431	MMUMETHOD(mmu_change_wiring,	moea64_change_wiring),
432	MMUMETHOD(mmu_clear_modify,	moea64_clear_modify),
433	MMUMETHOD(mmu_clear_reference,	moea64_clear_reference),
434	MMUMETHOD(mmu_copy_page,	moea64_copy_page),
435	MMUMETHOD(mmu_enter,		moea64_enter),
436	MMUMETHOD(mmu_enter_object,	moea64_enter_object),
437	MMUMETHOD(mmu_enter_quick,	moea64_enter_quick),
438	MMUMETHOD(mmu_extract,		moea64_extract),
439	MMUMETHOD(mmu_extract_and_hold,	moea64_extract_and_hold),
440	MMUMETHOD(mmu_init,		moea64_init),
441	MMUMETHOD(mmu_is_modified,	moea64_is_modified),
442	MMUMETHOD(mmu_is_prefaultable,	moea64_is_prefaultable),
443	MMUMETHOD(mmu_is_referenced,	moea64_is_referenced),
444	MMUMETHOD(mmu_ts_referenced,	moea64_ts_referenced),
445	MMUMETHOD(mmu_map,     		moea64_map),
446	MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick),
447	MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings),
448	MMUMETHOD(mmu_pinit,		moea64_pinit),
449	MMUMETHOD(mmu_pinit0,		moea64_pinit0),
450	MMUMETHOD(mmu_protect,		moea64_protect),
451	MMUMETHOD(mmu_qenter,		moea64_qenter),
452	MMUMETHOD(mmu_qremove,		moea64_qremove),
453	MMUMETHOD(mmu_release,		moea64_release),
454	MMUMETHOD(mmu_remove,		moea64_remove),
455	MMUMETHOD(mmu_remove_all,      	moea64_remove_all),
456	MMUMETHOD(mmu_remove_write,	moea64_remove_write),
457	MMUMETHOD(mmu_sync_icache,	moea64_sync_icache),
458	MMUMETHOD(mmu_zero_page,       	moea64_zero_page),
459	MMUMETHOD(mmu_zero_page_area,	moea64_zero_page_area),
460	MMUMETHOD(mmu_zero_page_idle,	moea64_zero_page_idle),
461	MMUMETHOD(mmu_activate,		moea64_activate),
462	MMUMETHOD(mmu_deactivate,      	moea64_deactivate),
463	MMUMETHOD(mmu_page_set_memattr,	moea64_page_set_memattr),
464
465	/* Internal interfaces */
466	MMUMETHOD(mmu_bootstrap,       	moea64_bootstrap),
467	MMUMETHOD(mmu_cpu_bootstrap,   	moea64_cpu_bootstrap),
468	MMUMETHOD(mmu_mapdev,		moea64_mapdev),
469	MMUMETHOD(mmu_mapdev_attr,	moea64_mapdev_attr),
470	MMUMETHOD(mmu_unmapdev,		moea64_unmapdev),
471	MMUMETHOD(mmu_kextract,		moea64_kextract),
472	MMUMETHOD(mmu_kenter,		moea64_kenter),
473	MMUMETHOD(mmu_kenter_attr,	moea64_kenter_attr),
474	MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped),
475
476	{ 0, 0 }
477};
478
479MMU_DEF(oea64_mmu, MMU_TYPE_G5, moea64_methods, 0);
480
481static __inline u_int
482va_to_pteg(uint64_t vsid, vm_offset_t addr, int large)
483{
484	uint64_t hash;
485	int shift;
486
487	shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT;
488	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >>
489	    shift);
490	return (hash & moea64_pteg_mask);
491}
492
493static __inline struct pvo_head *
494vm_page_to_pvoh(vm_page_t m)
495{
496
497	return (&m->md.mdpg_pvoh);
498}
499
500static __inline void
501moea64_attr_clear(vm_page_t m, u_int64_t ptebit)
502{
503
504	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
505	m->md.mdpg_attrs &= ~ptebit;
506}
507
508static __inline u_int64_t
509moea64_attr_fetch(vm_page_t m)
510{
511
512	return (m->md.mdpg_attrs);
513}
514
515static __inline void
516moea64_attr_save(vm_page_t m, u_int64_t ptebit)
517{
518
519	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
520	m->md.mdpg_attrs |= ptebit;
521}
522
523static __inline void
524moea64_pte_create(struct lpte *pt, uint64_t vsid, vm_offset_t va,
525    uint64_t pte_lo, int flags)
526{
527
528	ASSERT_TABLE_LOCK();
529
530	/*
531	 * Construct a PTE.  Default to IMB initially.  Valid bit only gets
532	 * set when the real pte is set in memory.
533	 *
534	 * Note: Don't set the valid bit for correct operation of tlb update.
535	 */
536	pt->pte_hi = (vsid << LPTE_VSID_SHIFT) |
537	    (((uint64_t)(va & ADDR_PIDX) >> ADDR_API_SHFT64) & LPTE_API);
538
539	if (flags & PVO_LARGE)
540		pt->pte_hi |= LPTE_BIG;
541
542	pt->pte_lo = pte_lo;
543}
544
545static __inline void
546moea64_pte_synch(struct lpte *pt, struct lpte *pvo_pt)
547{
548
549	ASSERT_TABLE_LOCK();
550
551	pvo_pt->pte_lo |= pt->pte_lo & (LPTE_REF | LPTE_CHG);
552}
553
554static __inline void
555moea64_pte_clear(struct lpte *pt, uint64_t vpn, u_int64_t ptebit)
556{
557	ASSERT_TABLE_LOCK();
558
559	/*
560	 * As shown in Section 7.6.3.2.3
561	 */
562	pt->pte_lo &= ~ptebit;
563	TLBIE(vpn);
564}
565
566static __inline void
567moea64_pte_set(struct lpte *pt, struct lpte *pvo_pt)
568{
569
570	ASSERT_TABLE_LOCK();
571	pvo_pt->pte_hi |= LPTE_VALID;
572
573	/*
574	 * Update the PTE as defined in section 7.6.3.1.
575	 * Note that the REF/CHG bits are from pvo_pt and thus should have
576	 * been saved so this routine can restore them (if desired).
577	 */
578	pt->pte_lo = pvo_pt->pte_lo;
579	EIEIO();
580	pt->pte_hi = pvo_pt->pte_hi;
581	PTESYNC();
582	moea64_pte_valid++;
583}
584
585static __inline void
586moea64_pte_unset(struct lpte *pt, struct lpte *pvo_pt, uint64_t vpn)
587{
588	ASSERT_TABLE_LOCK();
589	pvo_pt->pte_hi &= ~LPTE_VALID;
590
591	/*
592	 * Force the reg & chg bits back into the PTEs.
593	 */
594	SYNC();
595
596	/*
597	 * Invalidate the pte.
598	 */
599	pt->pte_hi &= ~LPTE_VALID;
600	TLBIE(vpn);
601
602	/*
603	 * Save the reg & chg bits.
604	 */
605	moea64_pte_synch(pt, pvo_pt);
606	moea64_pte_valid--;
607}
608
609static __inline void
610moea64_pte_change(struct lpte *pt, struct lpte *pvo_pt, uint64_t vpn)
611{
612
613	/*
614	 * Invalidate the PTE
615	 */
616	moea64_pte_unset(pt, pvo_pt, vpn);
617	moea64_pte_set(pt, pvo_pt);
618}
619
620static __inline uint64_t
621moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma)
622{
623	uint64_t pte_lo;
624	int i;
625
626	if (ma != VM_MEMATTR_DEFAULT) {
627		switch (ma) {
628		case VM_MEMATTR_UNCACHEABLE:
629			return (LPTE_I | LPTE_G);
630		case VM_MEMATTR_WRITE_COMBINING:
631		case VM_MEMATTR_WRITE_BACK:
632		case VM_MEMATTR_PREFETCHABLE:
633			return (LPTE_I);
634		case VM_MEMATTR_WRITE_THROUGH:
635			return (LPTE_W | LPTE_M);
636		}
637	}
638
639	/*
640	 * Assume the page is cache inhibited and access is guarded unless
641	 * it's in our available memory array.
642	 */
643	pte_lo = LPTE_I | LPTE_G;
644	for (i = 0; i < pregions_sz; i++) {
645		if ((pa >= pregions[i].mr_start) &&
646		    (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
647			pte_lo &= ~(LPTE_I | LPTE_G);
648			pte_lo |= LPTE_M;
649			break;
650		}
651	}
652
653	return pte_lo;
654}
655
656/*
657 * Quick sort callout for comparing memory regions.
658 */
659static int	mr_cmp(const void *a, const void *b);
660static int	om_cmp(const void *a, const void *b);
661
662static int
663mr_cmp(const void *a, const void *b)
664{
665	const struct	mem_region *regiona;
666	const struct	mem_region *regionb;
667
668	regiona = a;
669	regionb = b;
670	if (regiona->mr_start < regionb->mr_start)
671		return (-1);
672	else if (regiona->mr_start > regionb->mr_start)
673		return (1);
674	else
675		return (0);
676}
677
678static int
679om_cmp(const void *a, const void *b)
680{
681	const struct	ofw_map *mapa;
682	const struct	ofw_map *mapb;
683
684	mapa = a;
685	mapb = b;
686	if (mapa->om_pa_hi < mapb->om_pa_hi)
687		return (-1);
688	else if (mapa->om_pa_hi > mapb->om_pa_hi)
689		return (1);
690	else if (mapa->om_pa_lo < mapb->om_pa_lo)
691		return (-1);
692	else if (mapa->om_pa_lo > mapb->om_pa_lo)
693		return (1);
694	else
695		return (0);
696}
697
698static void
699moea64_cpu_bootstrap(mmu_t mmup, int ap)
700{
701	int i = 0;
702	#ifdef __powerpc64__
703	struct slb *slb = PCPU_GET(slb);
704	#endif
705
706	/*
707	 * Initialize segment registers and MMU
708	 */
709
710	mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); isync();
711
712	/*
713	 * Install kernel SLB entries
714	 */
715
716	#ifdef __powerpc64__
717		slbia();
718
719		for (i = 0; i < 64; i++) {
720			if (!(slb[i].slbe & SLBE_VALID))
721				continue;
722
723			__asm __volatile ("slbmte %0, %1" ::
724			    "r"(slb[i].slbv), "r"(slb[i].slbe));
725		}
726	#else
727		for (i = 0; i < 16; i++)
728			mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]);
729	#endif
730
731	/*
732	 * Install page table
733	 */
734
735	__asm __volatile ("ptesync; mtsdr1 %0; isync"
736	    :: "r"((uintptr_t)moea64_pteg_table
737		     | (64 - cntlzd(moea64_pteg_mask >> 11))));
738	tlbia();
739}
740
741static void
742moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz)
743{
744	struct ofw_map	translations[sz/sizeof(struct ofw_map)];
745	register_t	msr;
746	vm_offset_t	off;
747	vm_paddr_t	pa_base;
748	int		i, ofw_mappings;
749
750	bzero(translations, sz);
751	if (OF_getprop(mmu, "translations", translations, sz) == -1)
752		panic("moea64_bootstrap: can't get ofw translations");
753
754	CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations");
755	sz /= sizeof(*translations);
756	qsort(translations, sz, sizeof (*translations), om_cmp);
757
758	for (i = 0, ofw_mappings = 0; i < sz; i++) {
759		CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x",
760		    (uint32_t)(translations[i].om_pa_lo), translations[i].om_va,
761		    translations[i].om_len);
762
763		if (translations[i].om_pa_lo % PAGE_SIZE)
764			panic("OFW translation not page-aligned!");
765
766		pa_base = translations[i].om_pa_lo;
767
768	      #ifdef __powerpc64__
769		pa_base += (vm_offset_t)translations[i].om_pa_hi << 32;
770	      #else
771		if (translations[i].om_pa_hi)
772			panic("OFW translations above 32-bit boundary!");
773	      #endif
774
775		/* Now enter the pages for this mapping */
776
777		DISABLE_TRANS(msr);
778		for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
779			if (moea64_pvo_find_va(kernel_pmap,
780			    translations[i].om_va + off) != NULL)
781				continue;
782
783			moea64_kenter(mmup, translations[i].om_va + off,
784			    pa_base + off);
785
786			ofw_mappings++;
787		}
788		ENABLE_TRANS(msr);
789	}
790}
791
792#ifdef __powerpc64__
793static void
794moea64_probe_large_page(void)
795{
796	uint16_t pvr = mfpvr() >> 16;
797
798	switch (pvr) {
799	case IBM970:
800	case IBM970FX:
801	case IBM970MP:
802		powerpc_sync(); isync();
803		mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG);
804		powerpc_sync(); isync();
805
806		/* FALLTHROUGH */
807	case IBMCELLBE:
808		moea64_large_page_size = 0x1000000; /* 16 MB */
809		moea64_large_page_shift = 24;
810		break;
811	default:
812		moea64_large_page_size = 0;
813	}
814
815	moea64_large_page_mask = moea64_large_page_size - 1;
816}
817
818static void
819moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
820{
821	struct slb *cache;
822	struct slb entry;
823	uint64_t esid, slbe;
824	uint64_t i;
825
826	cache = PCPU_GET(slb);
827	esid = va >> ADDR_SR_SHFT;
828	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
829
830	for (i = 0; i < 64; i++) {
831		if (cache[i].slbe == (slbe | i))
832			return;
833	}
834
835	entry.slbe = slbe;
836	entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT;
837	if (large)
838		entry.slbv |= SLBV_L;
839
840	slb_insert_kernel(entry.slbe, entry.slbv);
841}
842#endif
843
844static void
845moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart,
846    vm_offset_t kernelend)
847{
848	register_t msr;
849	vm_paddr_t pa;
850	vm_offset_t size, off;
851	uint64_t pte_lo;
852	int i;
853
854	if (moea64_large_page_size == 0)
855		hw_direct_map = 0;
856
857	DISABLE_TRANS(msr);
858	if (hw_direct_map) {
859		PMAP_LOCK(kernel_pmap);
860		for (i = 0; i < pregions_sz; i++) {
861		  for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
862		     pregions[i].mr_size; pa += moea64_large_page_size) {
863			pte_lo = LPTE_M;
864
865			/*
866			 * Set memory access as guarded if prefetch within
867			 * the page could exit the available physmem area.
868			 */
869			if (pa & moea64_large_page_mask) {
870				pa &= moea64_large_page_mask;
871				pte_lo |= LPTE_G;
872			}
873			if (pa + moea64_large_page_size >
874			    pregions[i].mr_start + pregions[i].mr_size)
875				pte_lo |= LPTE_G;
876
877			moea64_pvo_enter(kernel_pmap, moea64_upvo_zone,
878				    &moea64_pvo_kunmanaged, pa, pa,
879				    pte_lo, PVO_WIRED | PVO_LARGE |
880				    VM_PROT_EXECUTE);
881		  }
882		}
883		PMAP_UNLOCK(kernel_pmap);
884	} else {
885		size = moea64_pteg_count * sizeof(struct lpteg);
886		off = (vm_offset_t)(moea64_pteg_table);
887		for (pa = off; pa < off + size; pa += PAGE_SIZE)
888			moea64_kenter(mmup, pa, pa);
889		size = sizeof(struct pvo_head) * moea64_pteg_count;
890		off = (vm_offset_t)(moea64_pvo_table);
891		for (pa = off; pa < off + size; pa += PAGE_SIZE)
892			moea64_kenter(mmup, pa, pa);
893		size = BPVO_POOL_SIZE*sizeof(struct pvo_entry);
894		off = (vm_offset_t)(moea64_bpvo_pool);
895		for (pa = off; pa < off + size; pa += PAGE_SIZE)
896		moea64_kenter(mmup, pa, pa);
897
898		/*
899		 * Map certain important things, like ourselves.
900		 *
901		 * NOTE: We do not map the exception vector space. That code is
902		 * used only in real mode, and leaving it unmapped allows us to
903		 * catch NULL pointer deferences, instead of making NULL a valid
904		 * address.
905		 */
906
907		for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
908		    pa += PAGE_SIZE)
909			moea64_kenter(mmup, pa, pa);
910	}
911	ENABLE_TRANS(msr);
912}
913
914static void
915moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
916{
917	ihandle_t	mmui;
918	phandle_t	chosen;
919	phandle_t	mmu;
920	size_t		sz;
921	int		i, j;
922	vm_size_t	size, physsz, hwphyssz;
923	vm_offset_t	pa, va;
924	register_t	msr;
925	void		*dpcpu;
926
927#ifndef __powerpc64__
928	/* We don't have a direct map since there is no BAT */
929	hw_direct_map = 0;
930
931	/* Make sure battable is zero, since we have no BAT */
932	for (i = 0; i < 16; i++) {
933		battable[i].batu = 0;
934		battable[i].batl = 0;
935	}
936#else
937	moea64_probe_large_page();
938
939	/* Use a direct map if we have large page support */
940	if (moea64_large_page_size > 0)
941		hw_direct_map = 1;
942	else
943		hw_direct_map = 0;
944#endif
945
946	/* Get physical memory regions from firmware */
947	mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
948	CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
949
950	qsort(pregions, pregions_sz, sizeof(*pregions), mr_cmp);
951	if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz)
952		panic("moea64_bootstrap: phys_avail too small");
953	qsort(regions, regions_sz, sizeof(*regions), mr_cmp);
954	phys_avail_count = 0;
955	physsz = 0;
956	hwphyssz = 0;
957	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
958	for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
959		CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start,
960		    regions[i].mr_start + regions[i].mr_size,
961		    regions[i].mr_size);
962		if (hwphyssz != 0 &&
963		    (physsz + regions[i].mr_size) >= hwphyssz) {
964			if (physsz < hwphyssz) {
965				phys_avail[j] = regions[i].mr_start;
966				phys_avail[j + 1] = regions[i].mr_start +
967				    hwphyssz - physsz;
968				physsz = hwphyssz;
969				phys_avail_count++;
970			}
971			break;
972		}
973		phys_avail[j] = regions[i].mr_start;
974		phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
975		phys_avail_count++;
976		physsz += regions[i].mr_size;
977	}
978
979	/* Check for overlap with the kernel and exception vectors */
980	for (j = 0; j < 2*phys_avail_count; j+=2) {
981		if (phys_avail[j] < EXC_LAST)
982			phys_avail[j] += EXC_LAST;
983
984		if (kernelstart >= phys_avail[j] &&
985		    kernelstart < phys_avail[j+1]) {
986			if (kernelend < phys_avail[j+1]) {
987				phys_avail[2*phys_avail_count] =
988				    (kernelend & ~PAGE_MASK) + PAGE_SIZE;
989				phys_avail[2*phys_avail_count + 1] =
990				    phys_avail[j+1];
991				phys_avail_count++;
992			}
993
994			phys_avail[j+1] = kernelstart & ~PAGE_MASK;
995		}
996
997		if (kernelend >= phys_avail[j] &&
998		    kernelend < phys_avail[j+1]) {
999			if (kernelstart > phys_avail[j]) {
1000				phys_avail[2*phys_avail_count] = phys_avail[j];
1001				phys_avail[2*phys_avail_count + 1] =
1002				    kernelstart & ~PAGE_MASK;
1003				phys_avail_count++;
1004			}
1005
1006			phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE;
1007		}
1008	}
1009
1010	physmem = btoc(physsz);
1011
1012	/*
1013	 * Allocate PTEG table.
1014	 */
1015#ifdef PTEGCOUNT
1016	moea64_pteg_count = PTEGCOUNT;
1017#else
1018	moea64_pteg_count = 0x1000;
1019
1020	while (moea64_pteg_count < physmem)
1021		moea64_pteg_count <<= 1;
1022
1023	moea64_pteg_count >>= 1;
1024#endif /* PTEGCOUNT */
1025
1026	size = moea64_pteg_count * sizeof(struct lpteg);
1027	CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes",
1028	    moea64_pteg_count, size);
1029
1030	/*
1031	 * We now need to allocate memory. This memory, to be allocated,
1032	 * has to reside in a page table. The page table we are about to
1033	 * allocate. We don't have BAT. So drop to data real mode for a minute
1034	 * as a measure of last resort. We do this a couple times.
1035	 */
1036
1037	moea64_pteg_table = (struct lpteg *)moea64_bootstrap_alloc(size, size);
1038	DISABLE_TRANS(msr);
1039	bzero((void *)moea64_pteg_table, moea64_pteg_count * sizeof(struct lpteg));
1040	ENABLE_TRANS(msr);
1041
1042	moea64_pteg_mask = moea64_pteg_count - 1;
1043
1044	CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table);
1045
1046	/*
1047	 * Allocate pv/overflow lists.
1048	 */
1049	size = sizeof(struct pvo_head) * moea64_pteg_count;
1050
1051	moea64_pvo_table = (struct pvo_head *)moea64_bootstrap_alloc(size,
1052	    PAGE_SIZE);
1053	CTR1(KTR_PMAP, "moea64_bootstrap: PVO table at %p", moea64_pvo_table);
1054
1055	DISABLE_TRANS(msr);
1056	for (i = 0; i < moea64_pteg_count; i++)
1057		LIST_INIT(&moea64_pvo_table[i]);
1058	ENABLE_TRANS(msr);
1059
1060	/*
1061	 * Initialize the lock that synchronizes access to the pteg and pvo
1062	 * tables.
1063	 */
1064	mtx_init(&moea64_table_mutex, "pmap table", NULL, MTX_DEF |
1065	    MTX_RECURSE);
1066	mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF);
1067
1068	/*
1069	 * Initialize the TLBIE lock. TLBIE can only be executed by one CPU.
1070	 */
1071	mtx_init(&tlbie_mutex, "tlbie mutex", NULL, MTX_SPIN);
1072
1073	/*
1074	 * Initialise the unmanaged pvo pool.
1075	 */
1076	moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc(
1077		BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0);
1078	moea64_bpvo_pool_index = 0;
1079
1080	/*
1081	 * Make sure kernel vsid is allocated as well as VSID 0.
1082	 */
1083	#ifndef __powerpc64__
1084	moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW]
1085		|= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
1086	moea64_vsid_bitmap[0] |= 1;
1087	#endif
1088
1089	/*
1090	 * Initialize the kernel pmap (which is statically allocated).
1091	 */
1092	#ifdef __powerpc64__
1093	for (i = 0; i < 64; i++) {
1094		pcpup->pc_slb[i].slbv = 0;
1095		pcpup->pc_slb[i].slbe = 0;
1096	}
1097	#else
1098	for (i = 0; i < 16; i++)
1099		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
1100	#endif
1101
1102	kernel_pmap->pmap_phys = kernel_pmap;
1103	kernel_pmap->pm_active = ~0;
1104
1105	PMAP_LOCK_INIT(kernel_pmap);
1106
1107	/*
1108	 * Now map in all the other buffers we allocated earlier
1109	 */
1110
1111	moea64_setup_direct_map(mmup, kernelstart, kernelend);
1112
1113	/*
1114	 * Set up the Open Firmware pmap and add its mappings if not in real
1115	 * mode.
1116	 */
1117
1118	chosen = OF_finddevice("/chosen");
1119	if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) {
1120	    mmu = OF_instance_to_package(mmui);
1121	    if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1)
1122		sz = 0;
1123	    if (sz > 6144 /* tmpstksz - 2 KB headroom */)
1124		panic("moea64_bootstrap: too many ofw translations");
1125
1126	    if (sz > 0)
1127		moea64_add_ofw_mappings(mmup, mmu, sz);
1128	}
1129
1130	/*
1131	 * Calculate the last available physical address.
1132	 */
1133	for (i = 0; phys_avail[i + 2] != 0; i += 2)
1134		;
1135	Maxmem = powerpc_btop(phys_avail[i + 1]);
1136
1137	/*
1138	 * Initialize MMU and remap early physical mappings
1139	 */
1140	moea64_cpu_bootstrap(mmup,0);
1141	mtmsr(mfmsr() | PSL_DR | PSL_IR); isync();
1142	pmap_bootstrapped++;
1143	bs_remap_earlyboot();
1144
1145	/*
1146	 * Set the start and end of kva.
1147	 */
1148	virtual_avail = VM_MIN_KERNEL_ADDRESS;
1149	virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS;
1150
1151	/*
1152	 * Map the entire KVA range into the SLB. We must not fault there.
1153	 */
1154	#ifdef __powerpc64__
1155	for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
1156		moea64_bootstrap_slb_prefault(va, 0);
1157	#endif
1158
1159	/*
1160	 * Figure out how far we can extend virtual_end into segment 16
1161	 * without running into existing mappings. Segment 16 is guaranteed
1162	 * to contain neither RAM nor devices (at least on Apple hardware),
1163	 * but will generally contain some OFW mappings we should not
1164	 * step on.
1165	 */
1166
1167	#ifndef __powerpc64__	/* KVA is in high memory on PPC64 */
1168	PMAP_LOCK(kernel_pmap);
1169	while (virtual_end < VM_MAX_KERNEL_ADDRESS &&
1170	    moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL)
1171		virtual_end += PAGE_SIZE;
1172	PMAP_UNLOCK(kernel_pmap);
1173	#endif
1174
1175	/*
1176	 * Allocate some things for page zeroing. We put this directly
1177	 * in the page table, marked with LPTE_LOCKED, to avoid any
1178	 * of the PVO book-keeping or other parts of the VM system
1179	 * from even knowing that this hack exists.
1180	 */
1181
1182	if (!hw_direct_map) {
1183		mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL,
1184		    MTX_DEF);
1185		for (i = 0; i < 2; i++) {
1186			struct lpte pt;
1187			uint64_t vsid;
1188			int pteidx, ptegidx;
1189
1190			moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE;
1191			virtual_end -= PAGE_SIZE;
1192
1193			LOCK_TABLE();
1194
1195			vsid = va_to_vsid(kernel_pmap,
1196			    moea64_scratchpage_va[i]);
1197			moea64_pte_create(&pt, vsid, moea64_scratchpage_va[i],
1198			    LPTE_NOEXEC, 0);
1199			pt.pte_hi |= LPTE_LOCKED;
1200
1201			moea64_scratchpage_vpn[i] = (vsid << 16) |
1202			    ((moea64_scratchpage_va[i] & ADDR_PIDX) >>
1203			    ADDR_PIDX_SHFT);
1204			ptegidx = va_to_pteg(vsid, moea64_scratchpage_va[i], 0);
1205			pteidx = moea64_pte_insert(ptegidx, &pt);
1206			if (pt.pte_hi & LPTE_HID)
1207				ptegidx ^= moea64_pteg_mask;
1208
1209			moea64_scratchpage_pte[i] =
1210			    &moea64_pteg_table[ptegidx].pt[pteidx];
1211
1212			UNLOCK_TABLE();
1213		}
1214	}
1215
1216	/*
1217	 * Allocate a kernel stack with a guard page for thread0 and map it
1218	 * into the kernel page map.
1219	 */
1220	pa = moea64_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
1221	va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
1222	virtual_avail = va + KSTACK_PAGES * PAGE_SIZE;
1223	CTR2(KTR_PMAP, "moea_bootstrap: kstack0 at %#x (%#x)", pa, va);
1224	thread0.td_kstack = va;
1225	thread0.td_kstack_pages = KSTACK_PAGES;
1226	for (i = 0; i < KSTACK_PAGES; i++) {
1227		moea64_kenter(mmup, va, pa);
1228		pa += PAGE_SIZE;
1229		va += PAGE_SIZE;
1230	}
1231
1232	/*
1233	 * Allocate virtual address space for the message buffer.
1234	 */
1235	pa = msgbuf_phys = moea64_bootstrap_alloc(MSGBUF_SIZE, PAGE_SIZE);
1236	msgbufp = (struct msgbuf *)virtual_avail;
1237	va = virtual_avail;
1238	virtual_avail += round_page(MSGBUF_SIZE);
1239	while (va < virtual_avail) {
1240		moea64_kenter(mmup, va, pa);
1241		pa += PAGE_SIZE;
1242		va += PAGE_SIZE;
1243	}
1244
1245	/*
1246	 * Allocate virtual address space for the dynamic percpu area.
1247	 */
1248	pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
1249	dpcpu = (void *)virtual_avail;
1250	va = virtual_avail;
1251	virtual_avail += DPCPU_SIZE;
1252	while (va < virtual_avail) {
1253		moea64_kenter(mmup, va, pa);
1254		pa += PAGE_SIZE;
1255		va += PAGE_SIZE;
1256	}
1257	dpcpu_init(dpcpu, 0);
1258}
1259
1260/*
1261 * Activate a user pmap.  The pmap must be activated before its address
1262 * space can be accessed in any way.
1263 */
1264void
1265moea64_activate(mmu_t mmu, struct thread *td)
1266{
1267	pmap_t	pm;
1268
1269	pm = &td->td_proc->p_vmspace->vm_pmap;
1270	pm->pm_active |= PCPU_GET(cpumask);
1271
1272	#ifdef __powerpc64__
1273	PCPU_SET(userslb, pm->pm_slb);
1274	#else
1275	PCPU_SET(curpmap, pm->pmap_phys);
1276	#endif
1277}
1278
1279void
1280moea64_deactivate(mmu_t mmu, struct thread *td)
1281{
1282	pmap_t	pm;
1283
1284	pm = &td->td_proc->p_vmspace->vm_pmap;
1285	pm->pm_active &= ~(PCPU_GET(cpumask));
1286	#ifdef __powerpc64__
1287	PCPU_SET(userslb, NULL);
1288	#else
1289	PCPU_SET(curpmap, NULL);
1290	#endif
1291}
1292
1293void
1294moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired)
1295{
1296	struct	pvo_entry *pvo;
1297	struct	lpte *pt;
1298	uint64_t vsid;
1299	int	i, ptegidx;
1300
1301	PMAP_LOCK(pm);
1302	pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
1303
1304	if (pvo != NULL) {
1305		LOCK_TABLE();
1306		pt = moea64_pvo_to_pte(pvo);
1307
1308		if (wired) {
1309			if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
1310				pm->pm_stats.wired_count++;
1311			pvo->pvo_vaddr |= PVO_WIRED;
1312			pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED;
1313		} else {
1314			if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
1315				pm->pm_stats.wired_count--;
1316			pvo->pvo_vaddr &= ~PVO_WIRED;
1317			pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED;
1318		}
1319
1320		if (pt != NULL) {
1321			/* Update wiring flag in page table. */
1322			moea64_pte_change(pt, &pvo->pvo_pte.lpte,
1323			    pvo->pvo_vpn);
1324		} else if (wired) {
1325			/*
1326			 * If we are wiring the page, and it wasn't in the
1327			 * page table before, add it.
1328			 */
1329			vsid = PVO_VSID(pvo);
1330			ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo),
1331			    pvo->pvo_vaddr & PVO_LARGE);
1332
1333			i = moea64_pte_insert(ptegidx, &pvo->pvo_pte.lpte);
1334			if (i >= 0) {
1335				PVO_PTEGIDX_CLR(pvo);
1336				PVO_PTEGIDX_SET(pvo, i);
1337			}
1338		}
1339
1340		UNLOCK_TABLE();
1341	}
1342	PMAP_UNLOCK(pm);
1343}
1344
1345/*
1346 * This goes through and sets the physical address of our
1347 * special scratch PTE to the PA we want to zero or copy. Because
1348 * of locking issues (this can get called in pvo_enter() by
1349 * the UMA allocator), we can't use most other utility functions here
1350 */
1351
1352static __inline
1353void moea64_set_scratchpage_pa(int which, vm_offset_t pa) {
1354
1355	KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!"));
1356	mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
1357
1358	moea64_scratchpage_pte[which]->pte_hi &= ~LPTE_VALID;
1359	TLBIE(moea64_scratchpage_vpn[which]);
1360
1361	moea64_scratchpage_pte[which]->pte_lo &=
1362	    ~(LPTE_WIMG | LPTE_RPGN);
1363	moea64_scratchpage_pte[which]->pte_lo |=
1364	    moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa;
1365	EIEIO();
1366
1367	moea64_scratchpage_pte[which]->pte_hi |= LPTE_VALID;
1368	PTESYNC(); isync();
1369}
1370
1371void
1372moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
1373{
1374	vm_offset_t	dst;
1375	vm_offset_t	src;
1376
1377	dst = VM_PAGE_TO_PHYS(mdst);
1378	src = VM_PAGE_TO_PHYS(msrc);
1379
1380	if (hw_direct_map) {
1381		kcopy((void *)src, (void *)dst, PAGE_SIZE);
1382	} else {
1383		mtx_lock(&moea64_scratchpage_mtx);
1384
1385		moea64_set_scratchpage_pa(0,src);
1386		moea64_set_scratchpage_pa(1,dst);
1387
1388		kcopy((void *)moea64_scratchpage_va[0],
1389		    (void *)moea64_scratchpage_va[1], PAGE_SIZE);
1390
1391		mtx_unlock(&moea64_scratchpage_mtx);
1392	}
1393}
1394
1395void
1396moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
1397{
1398	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
1399
1400	if (!moea64_initialized)
1401		panic("moea64_zero_page: can't zero pa %#" PRIxPTR, pa);
1402	if (size + off > PAGE_SIZE)
1403		panic("moea64_zero_page: size + off > PAGE_SIZE");
1404
1405	if (hw_direct_map) {
1406		bzero((caddr_t)pa + off, size);
1407	} else {
1408		mtx_lock(&moea64_scratchpage_mtx);
1409		moea64_set_scratchpage_pa(0,pa);
1410		bzero((caddr_t)moea64_scratchpage_va[0] + off, size);
1411		mtx_unlock(&moea64_scratchpage_mtx);
1412	}
1413}
1414
1415/*
1416 * Zero a page of physical memory by temporarily mapping it
1417 */
1418void
1419moea64_zero_page(mmu_t mmu, vm_page_t m)
1420{
1421	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
1422	vm_offset_t va, off;
1423
1424	if (!moea64_initialized)
1425		panic("moea64_zero_page: can't zero pa %#zx", pa);
1426
1427	if (!hw_direct_map) {
1428		mtx_lock(&moea64_scratchpage_mtx);
1429
1430		moea64_set_scratchpage_pa(0,pa);
1431		va = moea64_scratchpage_va[0];
1432	} else {
1433		va = pa;
1434	}
1435
1436	for (off = 0; off < PAGE_SIZE; off += cacheline_size)
1437		__asm __volatile("dcbz 0,%0" :: "r"(va + off));
1438
1439	if (!hw_direct_map)
1440		mtx_unlock(&moea64_scratchpage_mtx);
1441}
1442
1443void
1444moea64_zero_page_idle(mmu_t mmu, vm_page_t m)
1445{
1446
1447	moea64_zero_page(mmu, m);
1448}
1449
1450/*
1451 * Map the given physical page at the specified virtual address in the
1452 * target pmap with the protection requested.  If specified the page
1453 * will be wired down.
1454 */
1455void
1456moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
1457    vm_prot_t prot, boolean_t wired)
1458{
1459
1460	vm_page_lock_queues();
1461	PMAP_LOCK(pmap);
1462	moea64_enter_locked(pmap, va, m, prot, wired);
1463	vm_page_unlock_queues();
1464	PMAP_UNLOCK(pmap);
1465}
1466
1467/*
1468 * Map the given physical page at the specified virtual address in the
1469 * target pmap with the protection requested.  If specified the page
1470 * will be wired down.
1471 *
1472 * The page queues and pmap must be locked.
1473 */
1474
1475static void
1476moea64_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1477    boolean_t wired)
1478{
1479	struct		pvo_head *pvo_head;
1480	uma_zone_t	zone;
1481	vm_page_t	pg;
1482	uint64_t	pte_lo;
1483	u_int		pvo_flags;
1484	int		error;
1485
1486	if (!moea64_initialized) {
1487		pvo_head = &moea64_pvo_kunmanaged;
1488		pg = NULL;
1489		zone = moea64_upvo_zone;
1490		pvo_flags = 0;
1491	} else {
1492		pvo_head = vm_page_to_pvoh(m);
1493		pg = m;
1494		zone = moea64_mpvo_zone;
1495		pvo_flags = PVO_MANAGED;
1496	}
1497
1498	if (pmap_bootstrapped)
1499		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1500	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1501	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1502	    (m->oflags & VPO_BUSY) != 0 || VM_OBJECT_LOCKED(m->object),
1503	    ("moea64_enter_locked: page %p is not busy", m));
1504
1505	/* XXX change the pvo head for fake pages */
1506	if ((m->flags & PG_FICTITIOUS) == PG_FICTITIOUS) {
1507		pvo_flags &= ~PVO_MANAGED;
1508		pvo_head = &moea64_pvo_kunmanaged;
1509		zone = moea64_upvo_zone;
1510	}
1511
1512	pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m));
1513
1514	if (prot & VM_PROT_WRITE) {
1515		pte_lo |= LPTE_BW;
1516		if (pmap_bootstrapped &&
1517		    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
1518			vm_page_flag_set(m, PG_WRITEABLE);
1519	} else
1520		pte_lo |= LPTE_BR;
1521
1522	if (prot & VM_PROT_EXECUTE)
1523		pvo_flags |= VM_PROT_EXECUTE;
1524
1525	if (wired)
1526		pvo_flags |= PVO_WIRED;
1527
1528	if ((m->flags & PG_FICTITIOUS) != 0)
1529		pvo_flags |= PVO_FAKE;
1530
1531	error = moea64_pvo_enter(pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m),
1532	    pte_lo, pvo_flags);
1533
1534	/*
1535	 * Flush the page from the instruction cache if this page is
1536	 * mapped executable and cacheable.
1537	 */
1538	if ((pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
1539		moea64_syncicache(pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE);
1540	}
1541}
1542
1543static void
1544moea64_syncicache(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz)
1545{
1546
1547	/*
1548	 * This is much trickier than on older systems because
1549	 * we can't sync the icache on physical addresses directly
1550	 * without a direct map. Instead we check a couple of cases
1551	 * where the memory is already mapped in and, failing that,
1552	 * use the same trick we use for page zeroing to create
1553	 * a temporary mapping for this physical address.
1554	 */
1555
1556	if (!pmap_bootstrapped) {
1557		/*
1558		 * If PMAP is not bootstrapped, we are likely to be
1559		 * in real mode.
1560		 */
1561		__syncicache((void *)pa, sz);
1562	} else if (pmap == kernel_pmap) {
1563		__syncicache((void *)va, sz);
1564	} else if (hw_direct_map) {
1565		__syncicache((void *)pa, sz);
1566	} else {
1567		/* Use the scratch page to set up a temp mapping */
1568
1569		mtx_lock(&moea64_scratchpage_mtx);
1570
1571		moea64_set_scratchpage_pa(1,pa & ~ADDR_POFF);
1572		__syncicache((void *)(moea64_scratchpage_va[1] +
1573		    (va & ADDR_POFF)), sz);
1574
1575		mtx_unlock(&moea64_scratchpage_mtx);
1576	}
1577}
1578
1579/*
1580 * Maps a sequence of resident pages belonging to the same object.
1581 * The sequence begins with the given page m_start.  This page is
1582 * mapped at the given virtual address start.  Each subsequent page is
1583 * mapped at a virtual address that is offset from start by the same
1584 * amount as the page is offset from m_start within the object.  The
1585 * last page in the sequence is the page with the largest offset from
1586 * m_start that can be mapped at a virtual address less than the given
1587 * virtual address end.  Not every virtual page between start and end
1588 * is mapped; only those for which a resident page exists with the
1589 * corresponding offset from m_start are mapped.
1590 */
1591void
1592moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end,
1593    vm_page_t m_start, vm_prot_t prot)
1594{
1595	vm_page_t m;
1596	vm_pindex_t diff, psize;
1597
1598	psize = atop(end - start);
1599	m = m_start;
1600	vm_page_lock_queues();
1601	PMAP_LOCK(pm);
1602	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1603		moea64_enter_locked(pm, start + ptoa(diff), m, prot &
1604		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
1605		m = TAILQ_NEXT(m, listq);
1606	}
1607	vm_page_unlock_queues();
1608	PMAP_UNLOCK(pm);
1609}
1610
1611void
1612moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m,
1613    vm_prot_t prot)
1614{
1615
1616	vm_page_lock_queues();
1617	PMAP_LOCK(pm);
1618	moea64_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1619	    FALSE);
1620	vm_page_unlock_queues();
1621	PMAP_UNLOCK(pm);
1622}
1623
1624vm_paddr_t
1625moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va)
1626{
1627	struct	pvo_entry *pvo;
1628	vm_paddr_t pa;
1629
1630	PMAP_LOCK(pm);
1631	pvo = moea64_pvo_find_va(pm, va);
1632	if (pvo == NULL)
1633		pa = 0;
1634	else
1635		pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) |
1636		    (va - PVO_VADDR(pvo));
1637	PMAP_UNLOCK(pm);
1638	return (pa);
1639}
1640
1641/*
1642 * Atomically extract and hold the physical page with the given
1643 * pmap and virtual address pair if that mapping permits the given
1644 * protection.
1645 */
1646vm_page_t
1647moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1648{
1649	struct	pvo_entry *pvo;
1650	vm_page_t m;
1651        vm_paddr_t pa;
1652
1653	m = NULL;
1654	pa = 0;
1655	PMAP_LOCK(pmap);
1656retry:
1657	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1658	if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) &&
1659	    ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW ||
1660	     (prot & VM_PROT_WRITE) == 0)) {
1661		if (vm_page_pa_tryrelock(pmap,
1662			pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa))
1663			goto retry;
1664		m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN);
1665		vm_page_hold(m);
1666	}
1667	PA_UNLOCK_COND(pa);
1668	PMAP_UNLOCK(pmap);
1669	return (m);
1670}
1671
1672static void *
1673moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
1674{
1675	/*
1676	 * This entire routine is a horrible hack to avoid bothering kmem
1677	 * for new KVA addresses. Because this can get called from inside
1678	 * kmem allocation routines, calling kmem for a new address here
1679	 * can lead to multiply locking non-recursive mutexes.
1680	 */
1681	static vm_pindex_t color;
1682        vm_offset_t va;
1683
1684        vm_page_t m;
1685        int pflags, needed_lock;
1686
1687	*flags = UMA_SLAB_PRIV;
1688	needed_lock = !PMAP_LOCKED(kernel_pmap);
1689
1690	if (needed_lock)
1691		PMAP_LOCK(kernel_pmap);
1692
1693        if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
1694                pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED;
1695        else
1696                pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED;
1697        if (wait & M_ZERO)
1698                pflags |= VM_ALLOC_ZERO;
1699
1700        for (;;) {
1701                m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
1702                if (m == NULL) {
1703                        if (wait & M_NOWAIT)
1704                                return (NULL);
1705                        VM_WAIT;
1706                } else
1707                        break;
1708        }
1709
1710	va = VM_PAGE_TO_PHYS(m);
1711
1712	moea64_pvo_enter(kernel_pmap, moea64_upvo_zone,
1713	    &moea64_pvo_kunmanaged, va, VM_PAGE_TO_PHYS(m), LPTE_M,
1714	    PVO_WIRED | PVO_BOOTSTRAP);
1715
1716	if (needed_lock)
1717		PMAP_UNLOCK(kernel_pmap);
1718
1719	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
1720                bzero((void *)va, PAGE_SIZE);
1721
1722	return (void *)va;
1723}
1724
1725void
1726moea64_init(mmu_t mmu)
1727{
1728
1729	CTR0(KTR_PMAP, "moea64_init");
1730
1731	moea64_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
1732	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1733	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
1734	moea64_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry),
1735	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1736	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
1737
1738	if (!hw_direct_map) {
1739		uma_zone_set_allocf(moea64_upvo_zone,moea64_uma_page_alloc);
1740		uma_zone_set_allocf(moea64_mpvo_zone,moea64_uma_page_alloc);
1741	}
1742
1743	moea64_initialized = TRUE;
1744}
1745
1746boolean_t
1747moea64_is_referenced(mmu_t mmu, vm_page_t m)
1748{
1749
1750	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1751	    ("moea64_is_referenced: page %p is not managed", m));
1752	return (moea64_query_bit(m, PTE_REF));
1753}
1754
1755boolean_t
1756moea64_is_modified(mmu_t mmu, vm_page_t m)
1757{
1758
1759	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1760	    ("moea64_is_modified: page %p is not managed", m));
1761
1762	/*
1763	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
1764	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
1765	 * is clear, no PTEs can have LPTE_CHG set.
1766	 */
1767	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1768	if ((m->oflags & VPO_BUSY) == 0 &&
1769	    (m->flags & PG_WRITEABLE) == 0)
1770		return (FALSE);
1771	return (moea64_query_bit(m, LPTE_CHG));
1772}
1773
1774boolean_t
1775moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va)
1776{
1777	struct pvo_entry *pvo;
1778	boolean_t rv;
1779
1780	PMAP_LOCK(pmap);
1781	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1782	rv = pvo == NULL || (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0;
1783	PMAP_UNLOCK(pmap);
1784	return (rv);
1785}
1786
1787void
1788moea64_clear_reference(mmu_t mmu, vm_page_t m)
1789{
1790
1791	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1792	    ("moea64_clear_reference: page %p is not managed", m));
1793	moea64_clear_bit(m, LPTE_REF);
1794}
1795
1796void
1797moea64_clear_modify(mmu_t mmu, vm_page_t m)
1798{
1799
1800	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1801	    ("moea64_clear_modify: page %p is not managed", m));
1802	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1803	KASSERT((m->oflags & VPO_BUSY) == 0,
1804	    ("moea64_clear_modify: page %p is busy", m));
1805
1806	/*
1807	 * If the page is not PG_WRITEABLE, then no PTEs can have LPTE_CHG
1808	 * set.  If the object containing the page is locked and the page is
1809	 * not VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
1810	 */
1811	if ((m->flags & PG_WRITEABLE) == 0)
1812		return;
1813	moea64_clear_bit(m, LPTE_CHG);
1814}
1815
1816/*
1817 * Clear the write and modified bits in each of the given page's mappings.
1818 */
1819void
1820moea64_remove_write(mmu_t mmu, vm_page_t m)
1821{
1822	struct	pvo_entry *pvo;
1823	struct	lpte *pt;
1824	pmap_t	pmap;
1825	uint64_t lo;
1826
1827	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1828	    ("moea64_remove_write: page %p is not managed", m));
1829
1830	/*
1831	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
1832	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
1833	 * is clear, no page table entries need updating.
1834	 */
1835	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1836	if ((m->oflags & VPO_BUSY) == 0 &&
1837	    (m->flags & PG_WRITEABLE) == 0)
1838		return;
1839	vm_page_lock_queues();
1840	lo = moea64_attr_fetch(m);
1841	SYNC();
1842	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
1843		pmap = pvo->pvo_pmap;
1844		PMAP_LOCK(pmap);
1845		LOCK_TABLE();
1846		if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) {
1847			pt = moea64_pvo_to_pte(pvo);
1848			pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP;
1849			pvo->pvo_pte.lpte.pte_lo |= LPTE_BR;
1850			if (pt != NULL) {
1851				moea64_pte_synch(pt, &pvo->pvo_pte.lpte);
1852				lo |= pvo->pvo_pte.lpte.pte_lo;
1853				pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG;
1854				moea64_pte_change(pt, &pvo->pvo_pte.lpte,
1855				    pvo->pvo_vpn);
1856				if (pvo->pvo_pmap == kernel_pmap)
1857					isync();
1858			}
1859		}
1860		UNLOCK_TABLE();
1861		PMAP_UNLOCK(pmap);
1862	}
1863	if ((lo & LPTE_CHG) != 0) {
1864		moea64_attr_clear(m, LPTE_CHG);
1865		vm_page_dirty(m);
1866	}
1867	vm_page_flag_clear(m, PG_WRITEABLE);
1868	vm_page_unlock_queues();
1869}
1870
1871/*
1872 *	moea64_ts_referenced:
1873 *
1874 *	Return a count of reference bits for a page, clearing those bits.
1875 *	It is not necessary for every reference bit to be cleared, but it
1876 *	is necessary that 0 only be returned when there are truly no
1877 *	reference bits set.
1878 *
1879 *	XXX: The exact number of bits to check and clear is a matter that
1880 *	should be tested and standardized at some point in the future for
1881 *	optimal aging of shared pages.
1882 */
1883boolean_t
1884moea64_ts_referenced(mmu_t mmu, vm_page_t m)
1885{
1886
1887	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1888	    ("moea64_ts_referenced: page %p is not managed", m));
1889	return (moea64_clear_bit(m, LPTE_REF));
1890}
1891
1892/*
1893 * Modify the WIMG settings of all mappings for a page.
1894 */
1895void
1896moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma)
1897{
1898	struct	pvo_entry *pvo;
1899	struct  pvo_head *pvo_head;
1900	struct	lpte *pt;
1901	pmap_t	pmap;
1902	uint64_t lo;
1903
1904	if (m->flags & PG_FICTITIOUS) {
1905		m->md.mdpg_cache_attrs = ma;
1906		return;
1907	}
1908
1909	vm_page_lock_queues();
1910	pvo_head = vm_page_to_pvoh(m);
1911	lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
1912	LIST_FOREACH(pvo, pvo_head, pvo_vlink) {
1913		pmap = pvo->pvo_pmap;
1914		PMAP_LOCK(pmap);
1915		LOCK_TABLE();
1916		pt = moea64_pvo_to_pte(pvo);
1917		pvo->pvo_pte.lpte.pte_lo &= ~LPTE_WIMG;
1918		pvo->pvo_pte.lpte.pte_lo |= lo;
1919		if (pt != NULL) {
1920			moea64_pte_change(pt, &pvo->pvo_pte.lpte,
1921			    pvo->pvo_vpn);
1922			if (pvo->pvo_pmap == kernel_pmap)
1923				isync();
1924		}
1925		UNLOCK_TABLE();
1926		PMAP_UNLOCK(pmap);
1927	}
1928	m->md.mdpg_cache_attrs = ma;
1929	vm_page_unlock_queues();
1930}
1931
1932/*
1933 * Map a wired page into kernel virtual address space.
1934 */
1935void
1936moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma)
1937{
1938	uint64_t	pte_lo;
1939	int		error;
1940
1941	pte_lo = moea64_calc_wimg(pa, ma);
1942
1943	PMAP_LOCK(kernel_pmap);
1944	error = moea64_pvo_enter(kernel_pmap, moea64_upvo_zone,
1945	    &moea64_pvo_kunmanaged, va, pa, pte_lo,
1946	    PVO_WIRED | VM_PROT_EXECUTE);
1947
1948	if (error != 0 && error != ENOENT)
1949		panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va,
1950		    pa, error);
1951
1952	/*
1953	 * Flush the memory from the instruction cache.
1954	 */
1955	if ((pte_lo & (LPTE_I | LPTE_G)) == 0) {
1956		__syncicache((void *)va, PAGE_SIZE);
1957	}
1958	PMAP_UNLOCK(kernel_pmap);
1959}
1960
1961void
1962moea64_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa)
1963{
1964
1965	moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
1966}
1967
1968/*
1969 * Extract the physical page address associated with the given kernel virtual
1970 * address.
1971 */
1972vm_offset_t
1973moea64_kextract(mmu_t mmu, vm_offset_t va)
1974{
1975	struct		pvo_entry *pvo;
1976	vm_paddr_t pa;
1977
1978	/*
1979	 * Shortcut the direct-mapped case when applicable.  We never put
1980	 * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS.
1981	 */
1982	if (va < VM_MIN_KERNEL_ADDRESS)
1983		return (va);
1984
1985	PMAP_LOCK(kernel_pmap);
1986	pvo = moea64_pvo_find_va(kernel_pmap, va);
1987	KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
1988	    va));
1989	pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) + (va - PVO_VADDR(pvo));
1990	PMAP_UNLOCK(kernel_pmap);
1991	return (pa);
1992}
1993
1994/*
1995 * Remove a wired page from kernel virtual address space.
1996 */
1997void
1998moea64_kremove(mmu_t mmu, vm_offset_t va)
1999{
2000	moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE);
2001}
2002
2003/*
2004 * Map a range of physical addresses into kernel virtual address space.
2005 *
2006 * The value passed in *virt is a suggested virtual address for the mapping.
2007 * Architectures which can support a direct-mapped physical to virtual region
2008 * can return the appropriate address within that region, leaving '*virt'
2009 * unchanged.  We cannot and therefore do not; *virt is updated with the
2010 * first usable address after the mapped region.
2011 */
2012vm_offset_t
2013moea64_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start,
2014    vm_offset_t pa_end, int prot)
2015{
2016	vm_offset_t	sva, va;
2017
2018	sva = *virt;
2019	va = sva;
2020	for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
2021		moea64_kenter(mmu, va, pa_start);
2022	*virt = va;
2023
2024	return (sva);
2025}
2026
2027/*
2028 * Returns true if the pmap's pv is one of the first
2029 * 16 pvs linked to from this page.  This count may
2030 * be changed upwards or downwards in the future; it
2031 * is only necessary that true be returned for a small
2032 * subset of pmaps for proper page aging.
2033 */
2034boolean_t
2035moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
2036{
2037        int loops;
2038	struct pvo_entry *pvo;
2039	boolean_t rv;
2040
2041	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2042	    ("moea64_page_exists_quick: page %p is not managed", m));
2043	loops = 0;
2044	rv = FALSE;
2045	vm_page_lock_queues();
2046	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2047		if (pvo->pvo_pmap == pmap) {
2048			rv = TRUE;
2049			break;
2050		}
2051		if (++loops >= 16)
2052			break;
2053	}
2054	vm_page_unlock_queues();
2055	return (rv);
2056}
2057
2058/*
2059 * Return the number of managed mappings to the given physical page
2060 * that are wired.
2061 */
2062int
2063moea64_page_wired_mappings(mmu_t mmu, vm_page_t m)
2064{
2065	struct pvo_entry *pvo;
2066	int count;
2067
2068	count = 0;
2069	if ((m->flags & PG_FICTITIOUS) != 0)
2070		return (count);
2071	vm_page_lock_queues();
2072	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
2073		if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
2074			count++;
2075	vm_page_unlock_queues();
2076	return (count);
2077}
2078
2079static uintptr_t	moea64_vsidcontext;
2080
2081uintptr_t
2082moea64_get_unique_vsid(void) {
2083	u_int entropy;
2084	register_t hash;
2085	uint32_t mask;
2086	int i;
2087
2088	entropy = 0;
2089	__asm __volatile("mftb %0" : "=r"(entropy));
2090
2091	mtx_lock(&moea64_slb_mutex);
2092	for (i = 0; i < NVSIDS; i += VSID_NBPW) {
2093		u_int	n;
2094
2095		/*
2096		 * Create a new value by mutiplying by a prime and adding in
2097		 * entropy from the timebase register.  This is to make the
2098		 * VSID more random so that the PT hash function collides
2099		 * less often.  (Note that the prime casues gcc to do shifts
2100		 * instead of a multiply.)
2101		 */
2102		moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy;
2103		hash = moea64_vsidcontext & (NVSIDS - 1);
2104		if (hash == 0)		/* 0 is special, avoid it */
2105			continue;
2106		n = hash >> 5;
2107		mask = 1 << (hash & (VSID_NBPW - 1));
2108		hash = (moea64_vsidcontext & VSID_HASHMASK);
2109		if (moea64_vsid_bitmap[n] & mask) {	/* collision? */
2110			/* anything free in this bucket? */
2111			if (moea64_vsid_bitmap[n] == 0xffffffff) {
2112				entropy = (moea64_vsidcontext >> 20);
2113				continue;
2114			}
2115			i = ffs(~moea64_vsid_bitmap[n]) - 1;
2116			mask = 1 << i;
2117			hash &= VSID_HASHMASK & ~(VSID_NBPW - 1);
2118			hash |= i;
2119		}
2120		KASSERT(!(moea64_vsid_bitmap[n] & mask),
2121		    ("Allocating in-use VSID %#zx\n", hash));
2122		moea64_vsid_bitmap[n] |= mask;
2123		mtx_unlock(&moea64_slb_mutex);
2124		return (hash);
2125	}
2126
2127	mtx_unlock(&moea64_slb_mutex);
2128	panic("%s: out of segments",__func__);
2129}
2130
2131#ifdef __powerpc64__
2132void
2133moea64_pinit(mmu_t mmu, pmap_t pmap)
2134{
2135	PMAP_LOCK_INIT(pmap);
2136
2137	pmap->pm_slb_tree_root = slb_alloc_tree();
2138	pmap->pm_slb = slb_alloc_user_cache();
2139	pmap->pm_slb_len = 0;
2140}
2141#else
2142void
2143moea64_pinit(mmu_t mmu, pmap_t pmap)
2144{
2145	int	i;
2146	uint32_t hash;
2147
2148	PMAP_LOCK_INIT(pmap);
2149
2150	if (pmap_bootstrapped)
2151		pmap->pmap_phys = (pmap_t)moea64_kextract(mmu,
2152		    (vm_offset_t)pmap);
2153	else
2154		pmap->pmap_phys = pmap;
2155
2156	/*
2157	 * Allocate some segment registers for this pmap.
2158	 */
2159	hash = moea64_get_unique_vsid();
2160
2161	for (i = 0; i < 16; i++)
2162		pmap->pm_sr[i] = VSID_MAKE(i, hash);
2163
2164	KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0"));
2165}
2166#endif
2167
2168/*
2169 * Initialize the pmap associated with process 0.
2170 */
2171void
2172moea64_pinit0(mmu_t mmu, pmap_t pm)
2173{
2174	moea64_pinit(mmu, pm);
2175	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
2176}
2177
2178/*
2179 * Set the physical protection on the specified range of this map as requested.
2180 */
2181void
2182moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2183    vm_prot_t prot)
2184{
2185	struct	pvo_entry *pvo;
2186	struct	lpte *pt;
2187
2188	CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, sva,
2189	    eva, prot);
2190
2191
2192	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
2193	    ("moea64_protect: non current pmap"));
2194
2195	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2196		moea64_remove(mmu, pm, sva, eva);
2197		return;
2198	}
2199
2200	vm_page_lock_queues();
2201	PMAP_LOCK(pm);
2202	for (; sva < eva; sva += PAGE_SIZE) {
2203		pvo = moea64_pvo_find_va(pm, sva);
2204		if (pvo == NULL)
2205			continue;
2206
2207		/*
2208		 * Grab the PTE pointer before we diddle with the cached PTE
2209		 * copy.
2210		 */
2211		LOCK_TABLE();
2212		pt = moea64_pvo_to_pte(pvo);
2213
2214		/*
2215		 * Change the protection of the page.
2216		 */
2217		pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP;
2218		pvo->pvo_pte.lpte.pte_lo |= LPTE_BR;
2219		pvo->pvo_pte.lpte.pte_lo &= ~LPTE_NOEXEC;
2220		if ((prot & VM_PROT_EXECUTE) == 0)
2221			pvo->pvo_pte.lpte.pte_lo |= LPTE_NOEXEC;
2222
2223		/*
2224		 * If the PVO is in the page table, update that pte as well.
2225		 */
2226		if (pt != NULL) {
2227			moea64_pte_change(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn);
2228			if ((pvo->pvo_pte.lpte.pte_lo &
2229			    (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
2230				moea64_syncicache(pm, sva,
2231				    pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN,
2232				    PAGE_SIZE);
2233			}
2234		}
2235		UNLOCK_TABLE();
2236	}
2237	vm_page_unlock_queues();
2238	PMAP_UNLOCK(pm);
2239}
2240
2241/*
2242 * Map a list of wired pages into kernel virtual address space.  This is
2243 * intended for temporary mappings which do not need page modification or
2244 * references recorded.  Existing mappings in the region are overwritten.
2245 */
2246void
2247moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count)
2248{
2249	while (count-- > 0) {
2250		moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
2251		va += PAGE_SIZE;
2252		m++;
2253	}
2254}
2255
2256/*
2257 * Remove page mappings from kernel virtual address space.  Intended for
2258 * temporary mappings entered by moea64_qenter.
2259 */
2260void
2261moea64_qremove(mmu_t mmu, vm_offset_t va, int count)
2262{
2263	while (count-- > 0) {
2264		moea64_kremove(mmu, va);
2265		va += PAGE_SIZE;
2266	}
2267}
2268
2269void
2270moea64_release_vsid(uint64_t vsid)
2271{
2272	int idx, mask;
2273
2274	mtx_lock(&moea64_slb_mutex);
2275	idx = vsid & (NVSIDS-1);
2276	mask = 1 << (idx % VSID_NBPW);
2277	idx /= VSID_NBPW;
2278	KASSERT(moea64_vsid_bitmap[idx] & mask,
2279	    ("Freeing unallocated VSID %#jx", vsid));
2280	moea64_vsid_bitmap[idx] &= ~mask;
2281	mtx_unlock(&moea64_slb_mutex);
2282}
2283
2284
2285void
2286moea64_release(mmu_t mmu, pmap_t pmap)
2287{
2288
2289	/*
2290	 * Free segment registers' VSIDs
2291	 */
2292    #ifdef __powerpc64__
2293	slb_free_tree(pmap);
2294	slb_free_user_cache(pmap->pm_slb);
2295    #else
2296	KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
2297
2298	moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0]));
2299    #endif
2300
2301	PMAP_LOCK_DESTROY(pmap);
2302}
2303
2304/*
2305 * Remove the given range of addresses from the specified map.
2306 */
2307void
2308moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
2309{
2310	struct	pvo_entry *pvo;
2311
2312	vm_page_lock_queues();
2313	PMAP_LOCK(pm);
2314	for (; sva < eva; sva += PAGE_SIZE) {
2315		pvo = moea64_pvo_find_va(pm, sva);
2316		if (pvo != NULL)
2317			moea64_pvo_remove(pvo);
2318	}
2319	vm_page_unlock_queues();
2320	PMAP_UNLOCK(pm);
2321}
2322
2323/*
2324 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove()
2325 * will reflect changes in pte's back to the vm_page.
2326 */
2327void
2328moea64_remove_all(mmu_t mmu, vm_page_t m)
2329{
2330	struct  pvo_head *pvo_head;
2331	struct	pvo_entry *pvo, *next_pvo;
2332	pmap_t	pmap;
2333
2334	vm_page_lock_queues();
2335	pvo_head = vm_page_to_pvoh(m);
2336	for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) {
2337		next_pvo = LIST_NEXT(pvo, pvo_vlink);
2338
2339		MOEA_PVO_CHECK(pvo);	/* sanity check */
2340		pmap = pvo->pvo_pmap;
2341		PMAP_LOCK(pmap);
2342		moea64_pvo_remove(pvo);
2343		PMAP_UNLOCK(pmap);
2344	}
2345	if ((m->flags & PG_WRITEABLE) && moea64_is_modified(mmu, m)) {
2346		moea64_attr_clear(m, LPTE_CHG);
2347		vm_page_dirty(m);
2348	}
2349	vm_page_flag_clear(m, PG_WRITEABLE);
2350	vm_page_unlock_queues();
2351}
2352
2353/*
2354 * Allocate a physical page of memory directly from the phys_avail map.
2355 * Can only be called from moea64_bootstrap before avail start and end are
2356 * calculated.
2357 */
2358static vm_offset_t
2359moea64_bootstrap_alloc(vm_size_t size, u_int align)
2360{
2361	vm_offset_t	s, e;
2362	int		i, j;
2363
2364	size = round_page(size);
2365	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
2366		if (align != 0)
2367			s = (phys_avail[i] + align - 1) & ~(align - 1);
2368		else
2369			s = phys_avail[i];
2370		e = s + size;
2371
2372		if (s < phys_avail[i] || e > phys_avail[i + 1])
2373			continue;
2374
2375		if (s + size > platform_real_maxaddr())
2376			continue;
2377
2378		if (s == phys_avail[i]) {
2379			phys_avail[i] += size;
2380		} else if (e == phys_avail[i + 1]) {
2381			phys_avail[i + 1] -= size;
2382		} else {
2383			for (j = phys_avail_count * 2; j > i; j -= 2) {
2384				phys_avail[j] = phys_avail[j - 2];
2385				phys_avail[j + 1] = phys_avail[j - 1];
2386			}
2387
2388			phys_avail[i + 3] = phys_avail[i + 1];
2389			phys_avail[i + 1] = s;
2390			phys_avail[i + 2] = e;
2391			phys_avail_count++;
2392		}
2393
2394		return (s);
2395	}
2396	panic("moea64_bootstrap_alloc: could not allocate memory");
2397}
2398
2399static void
2400tlbia(void)
2401{
2402	vm_offset_t i;
2403	#ifndef __powerpc64__
2404	register_t msr, scratch;
2405	#endif
2406
2407	TLBSYNC();
2408
2409	for (i = 0; i < 0xFF000; i += 0x00001000) {
2410		#ifdef __powerpc64__
2411		__asm __volatile("tlbiel %0" :: "r"(i));
2412		#else
2413		__asm __volatile("\
2414		    mfmsr %0; \
2415		    mr %1, %0; \
2416		    insrdi %1,%3,1,0; \
2417		    mtmsrd %1; \
2418		    isync; \
2419		    \
2420		    tlbiel %2; \
2421		    \
2422		    mtmsrd %0; \
2423		    isync;"
2424		: "=r"(msr), "=r"(scratch) : "r"(i), "r"(1));
2425		#endif
2426	}
2427
2428	EIEIO();
2429	TLBSYNC();
2430}
2431
2432#ifdef __powerpc64__
2433static void
2434slbia(void)
2435{
2436	register_t seg0;
2437
2438	__asm __volatile ("slbia");
2439	__asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0));
2440}
2441#endif
2442
2443static int
2444moea64_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head,
2445    vm_offset_t va, vm_offset_t pa, uint64_t pte_lo, int flags)
2446{
2447	struct	 pvo_entry *pvo;
2448	uint64_t vsid;
2449	int	 first;
2450	u_int	 ptegidx;
2451	int	 i;
2452	int      bootstrap;
2453
2454	/*
2455	 * One nasty thing that can happen here is that the UMA calls to
2456	 * allocate new PVOs need to map more memory, which calls pvo_enter(),
2457	 * which calls UMA...
2458	 *
2459	 * We break the loop by detecting recursion and allocating out of
2460	 * the bootstrap pool.
2461	 */
2462
2463	first = 0;
2464	bootstrap = (flags & PVO_BOOTSTRAP);
2465
2466	if (!moea64_initialized)
2467		bootstrap = 1;
2468
2469	/*
2470	 * Compute the PTE Group index.
2471	 */
2472	va &= ~ADDR_POFF;
2473	vsid = va_to_vsid(pm, va);
2474	ptegidx = va_to_pteg(vsid, va, flags & PVO_LARGE);
2475
2476	/*
2477	 * Remove any existing mapping for this page.  Reuse the pvo entry if
2478	 * there is a mapping.
2479	 */
2480	LOCK_TABLE();
2481
2482	moea64_pvo_enter_calls++;
2483
2484	LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) {
2485		if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
2486			if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa &&
2487			    (pvo->pvo_pte.lpte.pte_lo & LPTE_PP) ==
2488			    (pte_lo & LPTE_PP)) {
2489			    	if (!(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID)) {
2490					/* Re-insert if spilled */
2491					i = moea64_pte_insert(ptegidx,
2492					    &pvo->pvo_pte.lpte);
2493					if (i >= 0)
2494						PVO_PTEGIDX_SET(pvo, i);
2495					moea64_pte_overflow--;
2496				}
2497				UNLOCK_TABLE();
2498				return (0);
2499			}
2500			moea64_pvo_remove(pvo);
2501			break;
2502		}
2503	}
2504
2505	/*
2506	 * If we aren't overwriting a mapping, try to allocate.
2507	 */
2508	if (bootstrap) {
2509		if (moea64_bpvo_pool_index >= BPVO_POOL_SIZE) {
2510			panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd",
2511			      moea64_bpvo_pool_index, BPVO_POOL_SIZE,
2512			      BPVO_POOL_SIZE * sizeof(struct pvo_entry));
2513		}
2514		pvo = &moea64_bpvo_pool[moea64_bpvo_pool_index];
2515		moea64_bpvo_pool_index++;
2516		bootstrap = 1;
2517	} else {
2518		/*
2519		 * Note: drop the table lock around the UMA allocation in
2520		 * case the UMA allocator needs to manipulate the page
2521		 * table. The mapping we are working with is already
2522		 * protected by the PMAP lock.
2523		 */
2524		UNLOCK_TABLE();
2525		pvo = uma_zalloc(zone, M_NOWAIT);
2526		LOCK_TABLE();
2527	}
2528
2529	if (pvo == NULL) {
2530		UNLOCK_TABLE();
2531		return (ENOMEM);
2532	}
2533
2534	moea64_pvo_entries++;
2535	pvo->pvo_vaddr = va;
2536	pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
2537	    | (vsid << 16);
2538	pvo->pvo_pmap = pm;
2539	LIST_INSERT_HEAD(&moea64_pvo_table[ptegidx], pvo, pvo_olink);
2540	pvo->pvo_vaddr &= ~ADDR_POFF;
2541
2542	if (!(flags & VM_PROT_EXECUTE))
2543		pte_lo |= LPTE_NOEXEC;
2544	if (flags & PVO_WIRED)
2545		pvo->pvo_vaddr |= PVO_WIRED;
2546	if (pvo_head != &moea64_pvo_kunmanaged)
2547		pvo->pvo_vaddr |= PVO_MANAGED;
2548	if (bootstrap)
2549		pvo->pvo_vaddr |= PVO_BOOTSTRAP;
2550	if (flags & PVO_FAKE)
2551		pvo->pvo_vaddr |= PVO_FAKE;
2552	if (flags & PVO_LARGE)
2553		pvo->pvo_vaddr |= PVO_LARGE;
2554
2555	moea64_pte_create(&pvo->pvo_pte.lpte, vsid, va,
2556	    (uint64_t)(pa) | pte_lo, flags);
2557
2558	/*
2559	 * Remember if the list was empty and therefore will be the first
2560	 * item.
2561	 */
2562	if (LIST_FIRST(pvo_head) == NULL)
2563		first = 1;
2564	LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
2565
2566	if (pvo->pvo_vaddr & PVO_WIRED) {
2567		pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED;
2568		pm->pm_stats.wired_count++;
2569	}
2570	pm->pm_stats.resident_count++;
2571
2572	/*
2573	 * We hope this succeeds but it isn't required.
2574	 */
2575	i = moea64_pte_insert(ptegidx, &pvo->pvo_pte.lpte);
2576	if (i >= 0) {
2577		PVO_PTEGIDX_SET(pvo, i);
2578	} else {
2579		panic("moea64_pvo_enter: overflow");
2580		moea64_pte_overflow++;
2581	}
2582
2583	if (pm == kernel_pmap)
2584		isync();
2585
2586	UNLOCK_TABLE();
2587
2588#ifdef __powerpc64__
2589	/*
2590	 * Make sure all our bootstrap mappings are in the SLB as soon
2591	 * as virtual memory is switched on.
2592	 */
2593	if (!pmap_bootstrapped)
2594		moea64_bootstrap_slb_prefault(va, flags & PVO_LARGE);
2595#endif
2596
2597	return (first ? ENOENT : 0);
2598}
2599
2600static void
2601moea64_pvo_remove(struct pvo_entry *pvo)
2602{
2603	struct	lpte *pt;
2604
2605	/*
2606	 * If there is an active pte entry, we need to deactivate it (and
2607	 * save the ref & cfg bits).
2608	 */
2609	LOCK_TABLE();
2610	pt = moea64_pvo_to_pte(pvo);
2611	if (pt != NULL) {
2612		moea64_pte_unset(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn);
2613		PVO_PTEGIDX_CLR(pvo);
2614	} else {
2615		moea64_pte_overflow--;
2616	}
2617
2618	/*
2619	 * Update our statistics.
2620	 */
2621	pvo->pvo_pmap->pm_stats.resident_count--;
2622	if (pvo->pvo_vaddr & PVO_WIRED)
2623		pvo->pvo_pmap->pm_stats.wired_count--;
2624
2625	/*
2626	 * Save the REF/CHG bits into their cache if the page is managed.
2627	 */
2628	if ((pvo->pvo_vaddr & (PVO_MANAGED|PVO_FAKE)) == PVO_MANAGED) {
2629		struct	vm_page *pg;
2630
2631		pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN);
2632		if (pg != NULL) {
2633			moea64_attr_save(pg, pvo->pvo_pte.lpte.pte_lo &
2634			    (LPTE_REF | LPTE_CHG));
2635		}
2636	}
2637
2638	/*
2639	 * Remove this PVO from the PV list.
2640	 */
2641	LIST_REMOVE(pvo, pvo_vlink);
2642
2643	/*
2644	 * Remove this from the overflow list and return it to the pool
2645	 * if we aren't going to reuse it.
2646	 */
2647	LIST_REMOVE(pvo, pvo_olink);
2648
2649	moea64_pvo_entries--;
2650	moea64_pvo_remove_calls++;
2651
2652	UNLOCK_TABLE();
2653
2654	if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
2655		uma_zfree((pvo->pvo_vaddr & PVO_MANAGED) ? moea64_mpvo_zone :
2656		    moea64_upvo_zone, pvo);
2657}
2658
2659static struct pvo_entry *
2660moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
2661{
2662	struct		pvo_entry *pvo;
2663	int		ptegidx;
2664	uint64_t	vsid;
2665	#ifdef __powerpc64__
2666	uint64_t	slbv;
2667
2668	if (pm == kernel_pmap) {
2669		slbv = kernel_va_to_slbv(va);
2670	} else {
2671		struct slb *slb;
2672		slb = user_va_to_slb_entry(pm, va);
2673		/* The page is not mapped if the segment isn't */
2674		if (slb == NULL)
2675			return NULL;
2676		slbv = slb->slbv;
2677	}
2678
2679	vsid = (slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT;
2680	if (slbv & SLBV_L)
2681		va &= ~moea64_large_page_mask;
2682	else
2683		va &= ~ADDR_POFF;
2684	ptegidx = va_to_pteg(vsid, va, slbv & SLBV_L);
2685	#else
2686	va &= ~ADDR_POFF;
2687	vsid = va_to_vsid(pm, va);
2688	ptegidx = va_to_pteg(vsid, va, 0);
2689	#endif
2690
2691	LOCK_TABLE();
2692	LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) {
2693		if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va)
2694			break;
2695	}
2696	UNLOCK_TABLE();
2697
2698	return (pvo);
2699}
2700
2701static struct lpte *
2702moea64_pvo_to_pte(const struct pvo_entry *pvo)
2703{
2704	struct lpte 	*pt;
2705	int		pteidx, ptegidx;
2706	uint64_t	vsid;
2707
2708	ASSERT_TABLE_LOCK();
2709
2710	/* If the PTEG index is not set, then there is no page table entry */
2711	if (!PVO_PTEGIDX_ISSET(pvo))
2712		return (NULL);
2713
2714	/*
2715	 * Calculate the ptegidx
2716	 */
2717	vsid = PVO_VSID(pvo);
2718	ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo),
2719	    pvo->pvo_vaddr & PVO_LARGE);
2720
2721	/*
2722	 * We can find the actual pte entry without searching by grabbing
2723	 * the PTEG index from 3 unused bits in pvo_vaddr and by
2724	 * noticing the HID bit.
2725	 */
2726	if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID)
2727		ptegidx ^= moea64_pteg_mask;
2728
2729	pteidx = (ptegidx << 3) | PVO_PTEGIDX_GET(pvo);
2730
2731	if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) &&
2732	    !PVO_PTEGIDX_ISSET(pvo)) {
2733		panic("moea64_pvo_to_pte: pvo %p has valid pte in pvo but no "
2734		    "valid pte index", pvo);
2735	}
2736
2737	if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0 &&
2738	    PVO_PTEGIDX_ISSET(pvo)) {
2739		panic("moea64_pvo_to_pte: pvo %p has valid pte index in pvo "
2740		    "pvo but no valid pte", pvo);
2741	}
2742
2743	pt = &moea64_pteg_table[pteidx >> 3].pt[pteidx & 7];
2744	if ((pt->pte_hi ^ (pvo->pvo_pte.lpte.pte_hi & ~LPTE_VALID)) ==
2745	    LPTE_VALID) {
2746		if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0) {
2747			panic("moea64_pvo_to_pte: pvo %p has valid pte in "
2748			    "moea64_pteg_table %p but invalid in pvo", pvo, pt);
2749		}
2750
2751		if (((pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo) &
2752		    ~(LPTE_M|LPTE_CHG|LPTE_REF)) != 0) {
2753			panic("moea64_pvo_to_pte: pvo %p pte does not match "
2754			    "pte %p in moea64_pteg_table difference is %#x",
2755			    pvo, pt,
2756			    (uint32_t)(pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo));
2757		}
2758
2759		return (pt);
2760	}
2761
2762	if (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) {
2763		panic("moea64_pvo_to_pte: pvo %p has invalid pte %p in "
2764		    "moea64_pteg_table but valid in pvo", pvo, pt);
2765	}
2766
2767	return (NULL);
2768}
2769
2770static __inline int
2771moea64_pte_spillable_ident(u_int ptegidx)
2772{
2773	struct	lpte *pt;
2774	int	i, j, k;
2775
2776	/* Start at a random slot */
2777	i = mftb() % 8;
2778	k = -1;
2779	for (j = 0; j < 8; j++) {
2780		pt = &moea64_pteg_table[ptegidx].pt[(i + j) % 8];
2781		if (pt->pte_hi & (LPTE_LOCKED | LPTE_WIRED))
2782			continue;
2783
2784		/* This is a candidate, so remember it */
2785		k = (i + j) % 8;
2786
2787		/* Try to get a page that has not been used lately */
2788		if (!(pt->pte_lo & LPTE_REF))
2789			return (k);
2790	}
2791
2792	return (k);
2793}
2794
2795static int
2796moea64_pte_insert(u_int ptegidx, struct lpte *pvo_pt)
2797{
2798	struct	lpte *pt;
2799	struct	pvo_entry *pvo;
2800	u_int	pteg_bktidx;
2801	int	i;
2802
2803	ASSERT_TABLE_LOCK();
2804
2805	/*
2806	 * First try primary hash.
2807	 */
2808	pteg_bktidx = ptegidx;
2809	for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) {
2810		if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) {
2811			pvo_pt->pte_hi &= ~LPTE_HID;
2812			moea64_pte_set(pt, pvo_pt);
2813			return (i);
2814		}
2815	}
2816
2817	/*
2818	 * Now try secondary hash.
2819	 */
2820	pteg_bktidx ^= moea64_pteg_mask;
2821	for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) {
2822		if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) {
2823			pvo_pt->pte_hi |= LPTE_HID;
2824			moea64_pte_set(pt, pvo_pt);
2825			return (i);
2826		}
2827	}
2828
2829	/*
2830	 * Out of luck. Find a PTE to sacrifice.
2831	 */
2832	pteg_bktidx = ptegidx;
2833	i = moea64_pte_spillable_ident(pteg_bktidx);
2834	if (i < 0) {
2835		pteg_bktidx ^= moea64_pteg_mask;
2836		i = moea64_pte_spillable_ident(pteg_bktidx);
2837	}
2838
2839	if (i < 0) {
2840		/* No freeable slots in either PTEG? We're hosed. */
2841		panic("moea64_pte_insert: overflow");
2842		return (-1);
2843	}
2844
2845	if (pteg_bktidx == ptegidx)
2846		pvo_pt->pte_hi &= ~LPTE_HID;
2847	else
2848		pvo_pt->pte_hi |= LPTE_HID;
2849
2850	/*
2851	 * Synchronize the sacrifice PTE with its PVO, then mark both
2852	 * invalid. The PVO will be reused when/if the VM system comes
2853	 * here after a fault.
2854	 */
2855	pt = &moea64_pteg_table[pteg_bktidx].pt[i];
2856
2857	if (pt->pte_hi & LPTE_HID)
2858		pteg_bktidx ^= moea64_pteg_mask; /* PTEs indexed by primary */
2859
2860	LIST_FOREACH(pvo, &moea64_pvo_table[pteg_bktidx], pvo_olink) {
2861		if (pvo->pvo_pte.lpte.pte_hi == pt->pte_hi) {
2862			KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID,
2863			    ("Invalid PVO for valid PTE!"));
2864			moea64_pte_unset(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn);
2865			PVO_PTEGIDX_CLR(pvo);
2866			moea64_pte_overflow++;
2867			break;
2868		}
2869	}
2870
2871	KASSERT(pvo->pvo_pte.lpte.pte_hi == pt->pte_hi,
2872	   ("Unable to find PVO for spilled PTE"));
2873
2874	/*
2875	 * Set the new PTE.
2876	 */
2877	moea64_pte_set(pt, pvo_pt);
2878
2879	return (i);
2880}
2881
2882static boolean_t
2883moea64_query_bit(vm_page_t m, u_int64_t ptebit)
2884{
2885	struct	pvo_entry *pvo;
2886	struct	lpte *pt;
2887
2888	if (moea64_attr_fetch(m) & ptebit)
2889		return (TRUE);
2890
2891	vm_page_lock_queues();
2892
2893	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2894		MOEA_PVO_CHECK(pvo);	/* sanity check */
2895
2896		/*
2897		 * See if we saved the bit off.  If so, cache it and return
2898		 * success.
2899		 */
2900		if (pvo->pvo_pte.lpte.pte_lo & ptebit) {
2901			moea64_attr_save(m, ptebit);
2902			MOEA_PVO_CHECK(pvo);	/* sanity check */
2903			vm_page_unlock_queues();
2904			return (TRUE);
2905		}
2906	}
2907
2908	/*
2909	 * No luck, now go through the hard part of looking at the PTEs
2910	 * themselves.  Sync so that any pending REF/CHG bits are flushed to
2911	 * the PTEs.
2912	 */
2913	SYNC();
2914	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2915		MOEA_PVO_CHECK(pvo);	/* sanity check */
2916
2917		/*
2918		 * See if this pvo has a valid PTE.  if so, fetch the
2919		 * REF/CHG bits from the valid PTE.  If the appropriate
2920		 * ptebit is set, cache it and return success.
2921		 */
2922		LOCK_TABLE();
2923		pt = moea64_pvo_to_pte(pvo);
2924		if (pt != NULL) {
2925			moea64_pte_synch(pt, &pvo->pvo_pte.lpte);
2926			if (pvo->pvo_pte.lpte.pte_lo & ptebit) {
2927				UNLOCK_TABLE();
2928
2929				moea64_attr_save(m, ptebit);
2930				MOEA_PVO_CHECK(pvo);	/* sanity check */
2931				vm_page_unlock_queues();
2932				return (TRUE);
2933			}
2934		}
2935		UNLOCK_TABLE();
2936	}
2937
2938	vm_page_unlock_queues();
2939	return (FALSE);
2940}
2941
2942static u_int
2943moea64_clear_bit(vm_page_t m, u_int64_t ptebit)
2944{
2945	u_int	count;
2946	struct	pvo_entry *pvo;
2947	struct	lpte *pt;
2948
2949	vm_page_lock_queues();
2950
2951	/*
2952	 * Clear the cached value.
2953	 */
2954	moea64_attr_clear(m, ptebit);
2955
2956	/*
2957	 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
2958	 * we can reset the right ones).  note that since the pvo entries and
2959	 * list heads are accessed via BAT0 and are never placed in the page
2960	 * table, we don't have to worry about further accesses setting the
2961	 * REF/CHG bits.
2962	 */
2963	SYNC();
2964
2965	/*
2966	 * For each pvo entry, clear the pvo's ptebit.  If this pvo has a
2967	 * valid pte clear the ptebit from the valid pte.
2968	 */
2969	count = 0;
2970	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2971		MOEA_PVO_CHECK(pvo);	/* sanity check */
2972
2973		LOCK_TABLE();
2974		pt = moea64_pvo_to_pte(pvo);
2975		if (pt != NULL) {
2976			moea64_pte_synch(pt, &pvo->pvo_pte.lpte);
2977			if (pvo->pvo_pte.lpte.pte_lo & ptebit) {
2978				count++;
2979				moea64_pte_clear(pt, pvo->pvo_vpn, ptebit);
2980			}
2981		}
2982		pvo->pvo_pte.lpte.pte_lo &= ~ptebit;
2983		MOEA_PVO_CHECK(pvo);	/* sanity check */
2984		UNLOCK_TABLE();
2985	}
2986
2987	vm_page_unlock_queues();
2988	return (count);
2989}
2990
2991boolean_t
2992moea64_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size)
2993{
2994	struct pvo_entry *pvo;
2995	vm_offset_t ppa;
2996	int error = 0;
2997
2998	PMAP_LOCK(kernel_pmap);
2999	for (ppa = pa & ~ADDR_POFF; ppa < pa + size; ppa += PAGE_SIZE) {
3000		pvo = moea64_pvo_find_va(kernel_pmap, ppa);
3001		if (pvo == NULL ||
3002		    (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) != ppa) {
3003			error = EFAULT;
3004			break;
3005		}
3006	}
3007	PMAP_UNLOCK(kernel_pmap);
3008
3009	return (error);
3010}
3011
3012/*
3013 * Map a set of physical memory pages into the kernel virtual
3014 * address space. Return a pointer to where it is mapped. This
3015 * routine is intended to be used for mapping device memory,
3016 * NOT real memory.
3017 */
3018void *
3019moea64_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma)
3020{
3021	vm_offset_t va, tmpva, ppa, offset;
3022
3023	ppa = trunc_page(pa);
3024	offset = pa & PAGE_MASK;
3025	size = roundup(offset + size, PAGE_SIZE);
3026
3027	va = kmem_alloc_nofault(kernel_map, size);
3028
3029	if (!va)
3030		panic("moea64_mapdev: Couldn't alloc kernel virtual memory");
3031
3032	for (tmpva = va; size > 0;) {
3033		moea64_kenter_attr(mmu, tmpva, ppa, ma);
3034		size -= PAGE_SIZE;
3035		tmpva += PAGE_SIZE;
3036		ppa += PAGE_SIZE;
3037	}
3038
3039	return ((void *)(va + offset));
3040}
3041
3042void *
3043moea64_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size)
3044{
3045
3046	return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT);
3047}
3048
3049void
3050moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
3051{
3052	vm_offset_t base, offset;
3053
3054	base = trunc_page(va);
3055	offset = va & PAGE_MASK;
3056	size = roundup(offset + size, PAGE_SIZE);
3057
3058	kmem_free(kernel_map, base, size);
3059}
3060
3061static void
3062moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
3063{
3064	struct pvo_entry *pvo;
3065	vm_offset_t lim;
3066	vm_paddr_t pa;
3067	vm_size_t len;
3068
3069	PMAP_LOCK(pm);
3070	while (sz > 0) {
3071		lim = round_page(va);
3072		len = MIN(lim - va, sz);
3073		pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
3074		if (pvo != NULL) {
3075			pa = (pvo->pvo_pte.pte.pte_lo & LPTE_RPGN) |
3076			    (va & ADDR_POFF);
3077			moea64_syncicache(pm, va, pa, len);
3078		}
3079		va += len;
3080		sz -= len;
3081	}
3082	PMAP_UNLOCK(pm);
3083}
3084