1/*
2 * Copyright 2013, winocm. <winocm@icloud.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 *   Redistributions of source code must retain the above copyright notice, this
9 *   list of conditions and the following disclaimer.
10 *
11 *   Redistributions in binary form must reproduce the above copyright notice, this
12 *   list of conditions and the following disclaimer in the documentation and/or
13 *   other materials provided with the distribution.
14 *
15 *   If you are going to use this software in any form that does not involve
16 *   releasing the source to this project or improving it, let me know beforehand.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*
30 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
31 *
32 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
33 *
34 * This file contains Original Code and/or Modifications of Original Code
35 * as defined in and that are subject to the Apple Public Source License
36 * Version 2.0 (the 'License'). You may not use this file except in
37 * compliance with the License. The rights granted to you under the License
38 * may not be used to create, or enable the creation or redistribution of,
39 * unlawful or unlicensed copies of an Apple operating system, or to
40 * circumvent, violate, or enable the circumvention or violation of, any
41 * terms of an Apple operating system software license agreement.
42 *
43 * Please obtain a copy of the License at
44 * http://www.opensource.apple.com/apsl/ and read it before using this file.
45 *
46 * The Original Code and all software distributed under the License are
47 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
48 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
49 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
50 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
51 * Please see the License for the specific language governing rights and
52 * limitations under the License.
53 *
54 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55 */
56/*
57 * @OSF_COPYRIGHT@
58 */
59/*
60 * Mach Operating System
61 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
62 * All Rights Reserved.
63 *
64 * Permission to use, copy, modify and distribute this software and its
65 * documentation is hereby granted, provided that both the copyright
66 * notice and this permission notice appear in all copies of the
67 * software, derivative works or modified versions, and any portions
68 * thereof, and that both notices appear in supporting documentation.
69 *
70 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
71 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
72 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
73 *
74 * Carnegie Mellon requests users of this software to return to
75 *
76 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
77 *  School of Computer Science
78 *  Carnegie Mellon University
79 *  Pittsburgh PA 15213-3890
80 *
81 * any improvements or extensions that they make and grant Carnegie Mellon
82 * the rights to redistribute these changes.
83 */
84/*-
85 * Copyright (c) 2010 The NetBSD Foundation, Inc.
86 * All rights reserved.
87 *
88 * This code is derived from software contributed to The NetBSD Foundation
89 * by Matt Thomas at 3am Software Foundry.
90 *
91 * Redistribution and use in source and binary forms, with or without
92 * modification, are permitted provided that the following conditions
93 * are met:
94 * 1. Redistributions of source code must retain the above copyright
95 *    notice, this list of conditions and the following disclaimer.
96 * 2. Redistributions in binary form must reproduce the above copyright
97 *    notice, this list of conditions and the following disclaimer in the
98 *    documentation and/or other materials provided with the distribution.
99 *
100 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
101 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
102 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
103 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
104 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
105 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
106 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
107 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
108 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
109 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
110 * POSSIBILITY OF SUCH DAMAGE.
111 */
112/*
113 * ARM physical memory map.
114 *
115 * Version 1.2b2, 'The Rewrite'.
116 *
117 * I'm sorry. This pmap sucks, but it sucks 'less' than the previous one did.
118 *
119 * Todo: fix pmap_nest, pmap_copy, pmap_unnest, pmap_enter_options, pmap_remove/pmap_remove_region
120 *
121 * And make pmap_create use an ASID bitmap too ifdef _ARM_ARCH_7
122 */
123
124#include <mach_debug.h>
125#include <debug.h>
126#include <mach/vm_types.h>
127#include <mach/vm_param.h>
128#include <mach/thread_status.h>
129#include <kern/misc_protos.h>
130#include <kern/assert.h>
131#include <kern/cpu_number.h>
132#include <kern/thread.h>
133#include <arm/pmap.h>
134#include <arm/mp.h>
135#include <arm/misc_protos.h>
136#include <kern/ledger.h>
137#include <kern/zalloc.h>
138#include <kern/lock.h>
139#include <kern/kalloc.h>
140#include <vm/vm_protos.h>
141#include <vm/vm_map.h>
142#include <vm/vm_kern.h>
143#include <mach/vm_param.h>
144#include <mach/vm_prot.h>
145#include <vm/vm_object.h>
146#include <vm/vm_page.h>
147#include <vm/cpm.h>
148#include <arm/cpu_capabilities.h>
149#include <arm/arch.h>
150#include <arm/pmap_asid.h>
151#include <arm/cpufunc.h>
152#include "proc_reg.h"
153
154/*
155 * The pv_head_table contains a 'trunk' of mappings for each physical
156 * page, one mapping exists for each page. Pages that are mapped in
157 * multiple pmaps (i.e: nested pmaps from say, the Dyld shared region)
158 * have multiple 'pv_nexts'. These are considered leaf mappings. Code should
159 * go through the leaf mappings if accessing/modifying page entries.
160 *
161 * -- With love, winocm.
162 */
163
164#define VM_MEM_WIRED            0x4
165#define _1KB                    1 * 1024
166#define _1MB                    1 * 1024 * _1KB
167
168/** Core Structures */
169typedef struct __pv_entry__ {
170    struct __pv_entry__ *pv_next;   /* Next PV entry. */
171    pmap_t pv_pmap;             /* Where does our mapping lie? */
172    vm_offset_t pv_address_va;  /* Virtual Address for the mapping. */
173    uint32_t pv_flags;          /* Pmap Flags */
174} pv_entry, *pv_entry_t;
175
176typedef enum {
177    ARM_PAGE_TRANSLATION_FAULT = 0x00,  /* 0b00 */
178    ARM_PAGE_PAGE_TABLE = 0x01, /* 0b01 */
179    ARM_PAGE_SECTION = 0x02,    /* 0b10 */
180    ARM_PAGE_MASK_VALUE = 0x03, /* 0b11 */
181} pmap_arm_l1_page_types_t;
182
183typedef enum {
184    ARM_PTE_DESCRIPTOR_64K = 0x01,  /* 0b01 */
185    ARM_PTE_DESCRIPTOR_4K = 0x02,   /* 0b1X */
186} pmap_arm_l2_page_types_t;
187
188extern vm_offset_t vm_kernel_stext;
189extern vm_offset_t vm_kernel_etext;
190
191/** Global variables */
192boolean_t pmap_initialized = FALSE; /* Is the pmap system initialized? */
193static struct vm_object pmap_object_store;  /* Storage object for the actual VM thing. */
194vm_object_t pmap_object;        /* The real VM object. */
195extern uint32_t first_avail, avail_end; /* End/begin of Managed RAM space. */
196struct zone *pmap_zone;         /* Zone of pmap structures */
197struct zone *pve_zone;          /* Pmap Virtual Entry zone. */
198pv_entry_t pv_head_table;       /* Start of PV entries. */
199static pmap_paddr_t avail_remaining;    /* Remaining avaialable pages. */
200uint32_t virt_begin, virt_end;  /* Virtual Address Space. */
201uint32_t avail_start, vm_first_phys;
202vm_page_t commpage;
203uint64_t pmap_nesting_size_min = 0x8000000;
204uint64_t pmap_nesting_size_max = 0x8000000;
205
206int allow_data_exec = 0;        /* no exec from data, embedded is hardcore like that */
207int allow_stack_exec = 0;       /* No apps may execute from the stack by default */
208int nx_enabled = 1;
209
210/* THE kernel pmap. */
211struct pmap kernel_pmap_store;
212pmap_t kernel_pmap = &kernel_pmap_store;
213
214/** Locking Primitives */
215lock_t pmap_system_lock;
216#define SPLVM(spl)          spl = splhigh();
217#define SPLX(spl)           splx(spl);
218
219#define PMAP_LOCK(pmap) {               \
220    simple_lock(&(pmap)->lock);         \
221}
222
223#define PMAP_UNLOCK(pmap) {             \
224    simple_unlock(&(pmap)->lock);       \
225}
226
227#define ppn_to_pai
228
229/** The Free List. */
230pv_entry_t pv_free_list;        /* The free list should be populated when the pmaps are not locked. */
231decl_simple_lock_data(, pv_free_list_lock);
232
233#define PV_ALLOC(pv_e) {                \
234    simple_lock(&pv_free_list_lock);    \
235    if((pv_e = pv_free_list) != 0) {    \
236        pv_free_list = pv_e->pv_next;   \
237    }                                   \
238    simple_unlock(&pv_free_list_lock);  \
239}
240
241#define PV_FREE(pv_e) {                 \
242    simple_lock(&pv_free_list_lock);    \
243    pv_e->pv_next = pv_free_list;       \
244    pv_free_list = pv_e;                \
245    simple_unlock(&pv_free_list_lock);  \
246}
247
248/*
249 *  For each vm_page_t, there is a list of all currently
250 *  valid virtual mappings of that page.  An entry is
251 *  a pv_rooted_entry_t; the list is the pv_table.
252 *
253 *      N.B.  with the new combo rooted/hashed scheme it is
254 *      only possibly to remove individual non-rooted entries
255 *      if they are found via the hashed chains as there is no
256 *      way to unlink the singly linked hashed entries if navigated to
257 *      via the queue list off the rooted entries.  Think of it as
258 *      hash/walk/pull, keeping track of the prev pointer while walking
259 *      the singly linked hash list.  All of this is to save memory and
260 *      keep both types of pv_entries as small as possible.
261 */
262
263/*
264
265PV HASHING Changes - JK 1/2007
266
267Pve's establish physical to virtual mappings.  These are used for aliasing of a
268physical page to (potentially many) virtual addresses within pmaps. In the previous
269implementation the structure of the pv_entries (each 16 bytes in size) was
270
271typedef struct pv_entry {
272    struct pv_entry_t    next;
273    pmap_t                    pmap;
274    vm_map_offset_t   va;
275} *pv_entry_t;
276
277An initial array of these is created at boot time, one per physical page of memory,
278indexed by the physical page number. Additionally, a pool of entries is created from a
279pv_zone to be used as needed by pmap_enter() when it is creating new mappings.
280Originally, we kept this pool around because the code in pmap_enter() was unable to
281block if it needed an entry and none were available - we'd panic.  Some time ago I
282restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing
283a pv structure and restart, removing a panic from the code (in the case of the kernel
284pmap we cannot block and still panic, so, we keep a separate hot pool for use only on
285kernel pmaps).  The pool has not been removed since there is a large performance gain
286keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
287
288As pmap_enter() created new mappings it linked the new pve's for them off the fixed
289pv array for that ppn (off the next pointer).  These pve's are accessed for several
290operations, one of them being address space teardown.  In that case, we basically do this
291
292    for (every page/pte in the space) {
293        calc pve_ptr from the ppn in the pte
294        for (every pv in the list for the ppn) {
295            if (this pv is for this pmap/vaddr) {
296                do housekeeping
297                unlink/free the pv
298            }
299        }
300    }
301
302The problem arose when we were running, say 8000 (or even 2000) apache or other processes
303and one or all terminate. The list hanging off each pv array entry could have thousands of
304entries.  We were continuously linearly searching each of these lists as we stepped through
305the address space we were tearing down.  Because of the locks we hold, likely taking a cache
306miss for each node,  and interrupt disabling for MP issues the system became completely
307unresponsive for many seconds while we did this.
308
309Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn
310for operations like pmap_page_protect and finding and modifying/removing a single pve as
311part of pmap_enter processing) has led to modifying the pve structures and databases.
312
313There are now two types of pve structures.  A "rooted" structure which is basically the
314original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a
315hash list via a hash of [pmap, vaddr].  These have been designed with the two goals of
316minimizing wired memory and making the lookup of a ppn faster.  Since a vast majority of
317pages in the system are not aliased and hence represented by a single pv entry I've kept
318the rooted entry size as small as possible because there is one of these dedicated for
319every physical page of memory.  The hashed pve's are larger due to the addition of the hash
320link and the ppn entry needed for matching while running the hash list to find the entry we
321are looking for.  This way, only systems that have lots of aliasing (like 2000+ httpd procs)
322will pay the extra memory price. Both structures have the same first three fields allowing
323some simplification in the code.
324
325They have these shapes
326
327typedef struct pv_rooted_entry {
328        queue_head_t qlink;
329        vm_map_offset_t va;
330        pmap_t          pmap;
331} *pv_rooted_entry_t;
332
333typedef struct pv_hashed_entry {
334  queue_head_t qlink;
335  vm_map_offset_t va;
336  pmap_t        pmap;
337  ppnum_t ppn;
338  struct pv_hashed_entry *nexth;
339} *pv_hashed_entry_t;
340
341The main flow difference is that the code is now aware of the rooted entry and the hashed
342entries.  Code that runs the pv list still starts with the rooted entry and then continues
343down the qlink onto the hashed entries.  Code that is looking up a specific pv entry first
344checks the rooted entry and then hashes and runs the hash list for the match. The hash list
345lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
346
347*/
348
349/*
350 * OS level page bits.
351 */
352typedef enum {
353    PMAP_OSPTE_TYPE_VALID = 0x0,
354    PMAP_OSPTE_TYPE_WIRED = 0x1,
355    PMAP_OSPTE_TYPE_REFERENCED = 0x2,
356    PMAP_OSPTE_TYPE_MODIFIED = 0x4,
357    PMAP_OSPTE_TYPE_NOENCRYPT = 0x8,
358    PMAP_OSPTE_TYPE_NOCACHE = 0x10,
359    PMAP_OSPTE_TYPE_PTA = 0x20,
360} __internal_pmap_ospte_bits_t;
361
362/*
363 * The PV rooted hash stuff is from xnu-1228/osfmk/i386/pmap.c
364 */
365
366typedef struct pv_rooted_entry {    /* first three entries must match pv_hashed_entry_t */
367    queue_head_t qlink;
368    vm_map_offset_t va;         /* virtual address for mapping */
369    pmap_t pmap;                /* pmap where mapping lies */
370    uint32_t flags;             /* address flags */
371} *pv_rooted_entry_t;
372
373#define PV_ROOTED_ENTRY_NULL    ((pv_rooted_entry_t) 0)
374
375pv_rooted_entry_t pv_head_hash_table;   /* array of entries, one per page */
376
377typedef struct pv_hashed_entry {    /* first three entries must match pv_rooted_entry_t */
378    queue_head_t qlink;
379    vm_map_offset_t va;
380    pmap_t pmap;
381    ppnum_t ppn;
382    struct pv_hashed_entry *nexth;
383} *pv_hashed_entry_t;
384
385#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
386
387#define NPVHASH 4095            /* MUST BE 2^N - 1 */
388pv_hashed_entry_t *pv_hash_table;   /* hash lists */
389
390uint32_t npvhash = 0;
391
392/* #define PV_DEBUG 1   uncomment to enable some PV debugging code */
393// #define PV_DEBUG 1
394#define kprintf(args...)
395
396
397#ifdef PV_DEBUG
398#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
399#else
400#define CHK_NPVHASH()
401#endif
402
403/*
404 *  pv_list entries are kept on a list that can only be accessed
405 *  with the pmap system locked (at SPLVM, not in the cpus_active set).
406 *  The list is refilled from the pv_hashed_list_zone if it becomes empty.
407 */
408pv_rooted_entry_t pv_hash_free_list = PV_ROOTED_ENTRY_NULL; /* free list at SPLVM */
409pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
410pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
411decl_simple_lock_data(, pv_hashed_free_list_lock)
412    decl_simple_lock_data(, pv_hashed_kern_free_list_lock)
413    decl_simple_lock_data(, pv_hash_table_lock)
414
415int pv_free_count = 0;
416int pv_hashed_free_count = 0;
417int pv_kern_free_count = 0;
418int pv_hashed_kern_free_count = 0;
419#define PV_HASHED_LOW_WATER_MARK 5000
420#define PV_HASHED_KERN_LOW_WATER_MARK 100
421#define PV_HASHED_ALLOC_CHUNK 2000
422#define PV_HASHED_KERN_ALLOC_CHUNK 50
423thread_call_t mapping_adjust_call;
424static thread_call_data_t mapping_adjust_call_data;
425uint32_t mappingrecurse = 0;
426
427#define PV_HASHED_ALLOC(pvh_e) { \
428    simple_lock(&pv_hashed_free_list_lock); \
429    if ((pvh_e = pv_hashed_free_list) != 0) { \
430      pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
431            pv_hashed_free_count--; \
432            if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \
433              if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
434                thread_call_enter(mapping_adjust_call); \
435    } \
436    simple_unlock(&pv_hashed_free_list_lock); \
437}
438
439#define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {   \
440    simple_lock(&pv_hashed_free_list_lock); \
441    pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;    \
442    pv_hashed_free_list = pvh_eh; \
443        pv_hashed_free_count += pv_cnt; \
444    simple_unlock(&pv_hashed_free_list_lock); \
445}
446
447#define PV_HASHED_KERN_ALLOC(pvh_e) { \
448    simple_lock(&pv_hashed_kern_free_list_lock); \
449    if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
450      pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;  \
451            pv_hashed_kern_free_count--; \
452            if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
453              if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
454                thread_call_enter(mapping_adjust_call); \
455    } \
456    simple_unlock(&pv_hashed_kern_free_list_lock); \
457}
458
459#define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {   \
460    simple_lock(&pv_hashed_kern_free_list_lock); \
461    pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
462    pv_hashed_kern_free_list = pvh_eh; \
463        pv_hashed_kern_free_count += pv_cnt; \
464    simple_unlock(&pv_hashed_kern_free_list_lock); \
465}
466
467zone_t pv_hashed_list_zone;     /* zone of pv_hashed_entry structures */
468
469#define pvhash(idx)         (&pv_hash_table[idx])
470
471/** Useful Macros */
472#define pa_index(pa)        (atop(pa))
473#define pai_to_pvh(pai)     (&pv_head_hash_table[pai - atop(gPhysBase)])
474
475/*
476 *  Each entry in the pv_head_table is locked by a full spinlock in the
477 *  pv_lock_table.  The lock bits are accessed by the physical
478 *  address of the page they lock.
479 */
480
481char *pv_lock_table;            /* pointer to array of bits */
482#define pv_lock_table_size(n)   (((n) * sizeof(uint32_t)))
483
484char *pv_hash_lock_table;
485#define pv_hash_lock_table_size(n)  (((n) * sizeof(uint32_t)))
486
487/*
488 * Locking protocols
489 */
490
491#define bit_lock(pai, l)    //lck_spin_lock((uint32_t*)(l) + pai);
492#define bit_unlock(pai, l)  //lck_spin_unlock((uint32_t*)(l) + pai);
493
494#define lock_pvh_pai(pai)       bit_lock(pai - atop(gPhysBase), (void *)pv_lock_table)
495#define unlock_pvh_pai(pai)     bit_unlock(pai - atop(gPhysBase), (void *)pv_lock_table)
496
497#define lock_hash_hash(hash)    bit_lock(hash, (void *)pv_hash_lock_table)
498#define unlock_hash_hash(hash)  bit_unlock(hash, (void *)pv_hash_lock_table)
499
500#define LOCK_PV_HASH(hash)  lock_hash_hash(hash)
501#define UNLOCK_PV_HASH(hash)    unlock_hash_hash(hash)
502
503#define LOCK_PVH(index)     {          \
504    mp_disable_preemption();           \
505    lock_pvh_pai(index);               \
506}
507
508#define UNLOCK_PVH(index)  {      \
509    unlock_pvh_pai(index);        \
510    mp_enable_preemption();       \
511}
512
513/** ASID stuff */
514
515#define KERNEL_ASID_PID 0
516
517static vm_offset_t pm_asid_hint = KERNEL_ASID_PID + 1;
518static u_long pm_asid_bitmap[256 / (sizeof(u_long) * 8)];
519
520static u_long pm_asid_max = 255;
521static u_long pm_asids_free = 254;  /* One is reserved by the Kernel ASID */
522
523#define __BITMAP_SET(bm, n) \
524    ((bm)[(n) / (8*sizeof(bm[0]))] |= 1LU << ((n) % (8*sizeof(bm[0]))))
525#define __BITMAP_CLR(bm, n) \
526    ((bm)[(n) / (8*sizeof(bm[0]))] &= ~(1LU << ((n) % (8*sizeof(bm[0])))))
527#define __BITMAP_ISSET_P(bm, n) \
528    (((bm)[(n) / (8*sizeof(bm[0]))] & (1LU << ((n) % (8*sizeof(bm[0]))))) != 0)
529
530#define TLBINFO_ASID_MARK_USED(ti, asid) \
531    __BITMAP_SET((ti), (asid))
532#define TLBINFO_ASID_INUSE_P(ti, asid) \
533    __BITMAP_ISSET_P((ti), (asid))
534
535/** Template PTEs */
536
537/*
538 * Protection flags for various requested VM definitions, all of them are in here.
539 * These are per ARMv6/ARM11JZF-S defintions.
540 */
541arm_l2_t arm_pte_prot_templates[] = {
542    {.l2.nx = TRUE,.l2.ap = 0x00,.l2.apx = 0},  /* Privileged   ---     User    --- */
543    {.l2.nx = TRUE,.l2.ap = 0x01,.l2.apx = 0},  /* Privileged   RW-     User    --- */
544    {.l2.nx = TRUE,.l2.ap = 0x02,.l2.apx = 0},  /* Privileged   RW-     User    R-- */
545    {.l2.nx = TRUE,.l2.ap = 0x03,.l2.apx = 0},  /* Privileged   RW-     User    RW- */
546
547    {.l2.nx = FALSE,.l2.ap = 0x00,.l2.apx = 0}, /* Privileged   --X     User    --X */
548    {.l2.nx = FALSE,.l2.ap = 0x01,.l2.apx = 0}, /* Privileged   RWX     User    --X */
549    {.l2.nx = FALSE,.l2.ap = 0x02,.l2.apx = 0}, /* Privileged   RWX     User    R-X */
550    {.l2.nx = FALSE,.l2.ap = 0x03,.l2.apx = 0}, /* Privileged   RWX     User    RWX */
551
552    {.l2.nx = TRUE,.l2.ap = 0x00,.l2.apx = 1},  /* Privileged   ---     User    --- */
553    {.l2.nx = TRUE,.l2.ap = 0x01,.l2.apx = 1},  /* Privileged   R--     User    --- */
554    {.l2.nx = TRUE,.l2.ap = 0x02,.l2.apx = 1},  /* Privileged   R--     User    R-- */
555    {.l2.nx = TRUE,.l2.ap = 0x03,.l2.apx = 1},  /* Privileged   R--     User    R-- */
556
557    {.l2.nx = FALSE,.l2.ap = 0x00,.l2.apx = 1}, /* Privileged   --X     User    --X */
558    {.l2.nx = FALSE,.l2.ap = 0x01,.l2.apx = 1}, /* Privileged   R-X     User    --X */
559    {.l2.nx = FALSE,.l2.ap = 0x02,.l2.apx = 1}, /* Privileged   R-X     User    R-X */
560    {.l2.nx = FALSE,.l2.ap = 0x03,.l2.apx = 1}, /* Privileged   R-X     User    R-X */
561};
562
563uint64_t pmap_pv_hashlist_walks = 0;
564uint64_t pmap_pv_hashlist_cnts = 0;
565uint32_t pmap_pv_hashlist_max = 0;
566
567unsigned int inuse_ptepages_count = 0;
568unsigned int bootstrap_wired_pages = 0;
569int pt_fake_zone_index = -1;
570
571uint32_t alloc_ptepages_count __attribute__ ((aligned(8))) = 0LL;   /* aligned for atomic access */
572extern uint32_t pmap_asid_ncpus;
573
574/*
575 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
576 * !!!!!!!! Make SURE this remains in sync with arm_pte_prot_templates. !!!!!!!!!
577 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
578 */
579typedef enum {
580    ARM_PTE_PROT_KERNEL_NONE_USER_NONE,
581    ARM_PTE_PROT_KERNEL_RW_USER_NONE,
582    ARM_PTE_PROT_KERNEL_RW_USER_R,
583    ARM_PTE_PROT_KERNEL_RW_USER_RW,
584    ARM_PTE_PROT_KERNEL_X_USER_X,
585    ARM_PTE_PROT_KERNEL_RWX_USER_X,
586    ARM_PTE_PROT_KERNEL_RWX_USER_RX,
587    ARM_PTE_PROT_KERNEL_RWX_USER_RWX,
588    ARM_PTE_PROT_KERNEL_NONE_USER_NONE_2,
589    ARM_PTE_PROT_KERNEL_R_USER_NONE,
590    ARM_PTE_PROT_KERNEL_R_USER_R,
591    ARM_PTE_PROT_KERNEL_R_USER_R_2,
592    ARM_PTE_PROT_KERNEL_X_USER_X_2,
593    ARM_PTE_PROT_KERNEL_RX_USER_X,
594    ARM_PTE_PROT_KERNEL_RX_USER_X_2,
595    ARM_PTE_PROT_KERNEL_RX_USER_RX,
596    ARM_PTE_PROT_KERNEL_RX_USER_RX_2,
597} arm_prot_pte_definitions;
598
599/*
600 * Type Extension bits for ARM V6 and V7 MMU
601 *
602 * TEX C B                                    Shared
603 * 000 0 0  Strong order                      yes
604 * 000 0 1  Shared device                     yes
605 * 000 1 0  write through, no write alloc     S-bit
606 * 000 1 1  write back, no write alloc        S-bit
607 * 001 0 0  non-cacheable                     S-bit
608 * 001 0 1  reserved
609 * 001 1 0  reserved
610 * 001 1 1  write back, write alloc           S-bit
611 * 010 0 0  Non-shared device                 no
612 * 010 0 1  reserved
613 * 010 1 X  reserved
614 * 011 X X  reserved
615 * 1BB A A  BB for internal, AA for external  S-bit
616 *
617 *    BB    internal cache
618 *    0 0   Non-cacheable non-buffered
619 *    0 1   Write back, write alloc, buffered
620 *    1 0   Write through, no write alloc, buffered
621 *          (non-cacheable for MPCore)
622 *    1 1   Write back, no write alloc, buffered
623 *          (write back, write alloc for MPCore)
624 *
625 *    AA    external cache
626 *    0 0   Non-cacheable non-buffered
627 *    0 1   Write back, write alloc, buffered
628 *    1 0   Write through, no write alloc, buffered
629 *    1 1   Write back, no write alloc, buffered
630 */
631#define ARM_L2_C_BIT            0x00000004
632#define ARM_L2_B_BIT            0x00000008
633#define ARM_L2_4KB_TEX(x)       ((x & 0x7) << 6)  /* Type Extension */
634
635#define ARM_CACHEBIT_NONE_NO_BUFFERED         0
636#define ARM_CACHEBIT_WB_WA_BUFFERED           1
637#define ARM_CACHEBIT_WT_NWA_BUFFERED          2
638#define ARM_CACHEBIT_WB_NWA_BUFFERED          3
639
640#define ARM_L2_TEX_000          0
641#define ARM_L2_TEX_001          1
642#define ARM_L2_TEX_010          2
643#define ARM_L2_TEX_011          3
644#define ARM_L2_TEX_100          4
645#define ARM_L2_TEX_101          5
646#define ARM_L2_TEX_110          6
647#define ARM_L2_TEX_111          7
648
649/** Functions */
650
651extern int pt_fake_zone_index;
652static inline void PMAP_ZINFO_PALLOC(pmap_t pmap, vm_size_t bytes)
653{
654    thread_t thr = current_thread();
655    task_t task;
656    zinfo_usage_t zinfo;
657
658    pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
659
660    if (pt_fake_zone_index != -1 && (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
661        OSAddAtomic64(bytes, (int64_t *) & zinfo[pt_fake_zone_index].alloc);
662}
663
664static inline void PMAP_ZINFO_PFREE(pmap_t pmap, vm_size_t bytes)
665{
666    thread_t thr = current_thread();
667    task_t task;
668    zinfo_usage_t zinfo;
669
670    pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
671
672    if (pt_fake_zone_index != -1 && (task = thr->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
673        OSAddAtomic64(bytes, (int64_t *) & zinfo[pt_fake_zone_index].free);
674}
675
676static inline void PMAP_ZINFO_SALLOC(pmap_t pmap, vm_size_t bytes)
677{
678    pmap_ledger_credit(pmap, task_ledgers.tkm_shared, bytes);
679}
680
681static inline void PMAP_ZINFO_SFREE(pmap_t pmap, vm_size_t bytes)
682{
683    pmap_ledger_debit(pmap, task_ledgers.tkm_shared, bytes);
684}
685
686static inline uint32_t pvhashidx(pmap_t pmap, vm_map_offset_t va)
687{
688    return ((uint32_t) (uintptr_t) pmap ^ ((uint32_t) (va >> PAGE_SHIFT) & 0xFFFFFFFF)) & npvhash;
689}
690
691static inline void pv_hash_add(pv_hashed_entry_t pvh_e, pv_rooted_entry_t pv_h)
692{
693    pv_hashed_entry_t *hashp;
694    int pvhash_idx;
695
696    CHK_NPVHASH();
697    pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
698    LOCK_PV_HASH(pvhash_idx);
699    insque(&pvh_e->qlink, &pv_h->qlink);
700    hashp = pvhash(pvhash_idx);
701#if PV_DEBUG
702    if (NULL == hashp)
703        panic("pv_hash_add(%p) null hash bucket", pvh_e);
704#endif
705    pvh_e->nexth = *hashp;
706    *hashp = pvh_e;
707    UNLOCK_PV_HASH(pvhash_idx);
708}
709
710/*
711 * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
712 * properly deals with the anchor.
713 * must be called with the hash locked, does not unlock it
714 */
715
716static inline void pmap_pvh_unlink(pv_hashed_entry_t pvh)
717{
718    pv_hashed_entry_t curh;
719    pv_hashed_entry_t *pprevh;
720    int pvhash_idx;
721
722    CHK_NPVHASH();
723    pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
724
725    pprevh = pvhash(pvhash_idx);
726
727#if PV_DEBUG
728    if (NULL == *pprevh)
729        panic("pvh_unlink null anchor");    /* JK DEBUG */
730#endif
731    curh = *pprevh;
732
733    while (PV_HASHED_ENTRY_NULL != curh) {
734        if (pvh == curh)
735            break;
736        pprevh = &curh->nexth;
737        curh = curh->nexth;
738    }
739    if (PV_HASHED_ENTRY_NULL == curh)
740        panic("pmap_pvh_unlink no pvh");
741    *pprevh = pvh->nexth;
742    return;
743}
744
745static inline void pv_hash_remove(pv_hashed_entry_t pvh_e)
746{
747    int pvhash_idx;
748
749    CHK_NPVHASH();
750    pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
751    LOCK_PV_HASH(pvhash_idx);
752    remque(&pvh_e->qlink);
753    pmap_pvh_unlink(pvh_e);
754    UNLOCK_PV_HASH(pvhash_idx);
755}
756
757static inline boolean_t popcnt1(uint64_t distance)
758{
759    return ((distance & (distance - 1)) == 0);
760}
761
762/*
763 * Routines to handle suppression of/recovery from some forms of pagetable corruption
764 * incidents observed in the field. These can be either software induced (wild
765 * stores to the mapwindows where applicable, use after free errors
766 * (typically of pages addressed physically), mis-directed DMAs etc., or due
767 * to DRAM/memory hierarchy/interconnect errors. Given the theoretical rarity of these errors,
768 * the recording mechanism is deliberately not MP-safe. The overarching goal is to
769 * still assert on potential software races, but attempt recovery from incidents
770 * identifiable as occurring due to issues beyond the control of the pmap module.
771 * The latter includes single-bit errors and malformed pagetable entries.
772 * We currently limit ourselves to recovery/suppression of one incident per
773 * PMAP_PAGETABLE_CORRUPTION_INTERVAL seconds, and details of the incident
774 * are logged.
775 * Assertions are not suppressed if kernel debugging is enabled. (DRK 09)
776 */
777
778typedef enum {
779    PTE_VALID = 0x0,
780    PTE_INVALID = 0x1,
781    PTE_RSVD = 0x2,
782    PTE_SUPERVISOR = 0x4,
783    PTE_BITFLIP = 0x8,
784    PV_BITFLIP = 0x10,
785    PTE_INVALID_CACHEABILITY = 0x20
786} pmap_pagetable_corruption_t;
787
788typedef enum {
789    ROOT_PRESENT = 0,
790    ROOT_ABSENT = 1
791} pmap_pv_assertion_t;
792
793typedef enum {
794    PMAP_ACTION_IGNORE = 0x0,
795    PMAP_ACTION_ASSERT = 0x1,
796    PMAP_ACTION_RETRY = 0x2,
797    PMAP_ACTION_RETRY_RELOCK = 0x4
798} pmap_pagetable_corruption_action_t;
799
800#define PMAP_PAGETABLE_CORRUPTION_INTERVAL (6ULL * 3600ULL)
801extern uint64_t pmap_pagetable_corruption_interval_abstime;
802
803extern uint32_t pmap_pagetable_corruption_incidents;
804#define PMAP_PAGETABLE_CORRUPTION_MAX_LOG (8)
805typedef struct {
806    pmap_pv_assertion_t incident;
807    pmap_pagetable_corruption_t reason;
808    pmap_pagetable_corruption_action_t action;
809    pmap_t pmap;
810    vm_map_offset_t vaddr;
811    pt_entry_t pte;
812    ppnum_t ppn;
813    pmap_t pvpmap;
814    vm_map_offset_t pvva;
815    uint64_t abstime;
816} pmap_pagetable_corruption_record_t;
817
818pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
819uint32_t pmap_pagetable_corruption_incidents;
820uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
821uint64_t pmap_pagetable_corruption_interval_abstime;
822thread_call_t pmap_pagetable_corruption_log_call;
823static thread_call_data_t pmap_pagetable_corruption_log_call_data;
824boolean_t pmap_pagetable_corruption_timeout = FALSE;
825
826static inline void pmap_pagetable_corruption_log(pmap_pv_assertion_t incident, pmap_pagetable_corruption_t suppress_reason, pmap_pagetable_corruption_action_t action, pmap_t pmap, vm_map_offset_t vaddr, pt_entry_t * ptep, ppnum_t ppn, pmap_t pvpmap, vm_map_offset_t pvva)
827{
828    uint32_t pmap_pagetable_corruption_log_index;
829    pmap_pagetable_corruption_log_index = pmap_pagetable_corruption_incidents++ % PMAP_PAGETABLE_CORRUPTION_MAX_LOG;
830    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].incident = incident;
831    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].reason = suppress_reason;
832    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].action = action;
833    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pmap = pmap;
834    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].vaddr = vaddr;
835    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pte = *ptep;
836    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].ppn = ppn;
837    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvpmap = pvpmap;
838    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].pvva = pvva;
839    pmap_pagetable_corruption_records[pmap_pagetable_corruption_log_index].abstime = mach_absolute_time();
840    /*
841     * Asynchronously log
842     */
843    thread_call_enter(pmap_pagetable_corruption_log_call);
844}
845
846static inline pmap_pagetable_corruption_action_t pmap_classify_pagetable_corruption(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t * ppnp, pt_entry_t * ptep, pmap_pv_assertion_t incident)
847{
848    pmap_pagetable_corruption_action_t action = PMAP_ACTION_ASSERT;
849    pmap_pagetable_corruption_t suppress_reason = PTE_VALID;
850    ppnum_t suppress_ppn = 0;
851    pt_entry_t cpte = *ptep;
852    ppnum_t cpn = pa_index((cpte) & L2_ADDR_MASK);
853    ppnum_t ppn = *ppnp;
854    pv_rooted_entry_t pv_h = pai_to_pvh((ppn));
855    pv_rooted_entry_t pv_e = pv_h;
856    uint32_t bitdex;
857    pmap_t pvpmap = pv_h->pmap;
858    vm_map_offset_t pvva = pv_h->va;
859    boolean_t ppcd = FALSE;
860
861    /*
862     * Ideally, we'd consult the Mach VM here to definitively determine
863     * * the nature of the mapping for this address space and address.
864     * * As that would be a layering violation in this context, we
865     * * use various heuristics to recover from single bit errors,
866     * * malformed pagetable entries etc. These are not intended
867     * * to be comprehensive.
868     */
869
870    /*
871     * Correct potential single bit errors in either (but not both) element
872     * of the PV
873     */
874    do {
875        if ((popcnt1((uintptr_t) pv_e->pmap ^ (uintptr_t) pmap) && pv_e->va == vaddr) || (pv_e->pmap == pmap && popcnt1(pv_e->va ^ vaddr))) {
876            pv_e->pmap = pmap;
877            pv_e->va = vaddr;
878            suppress_reason = PV_BITFLIP;
879            action = PMAP_ACTION_RETRY;
880            goto pmap_cpc_exit;
881        }
882    } while (((pv_e = (pv_rooted_entry_t) queue_next(&pv_e->qlink))) && (pv_e != pv_h));
883
884    /*
885     * Discover root entries with a Hamming
886     * * distance of 1 from the supplied
887     * * physical page frame.
888     */
889    for (bitdex = 0; bitdex < (sizeof(ppnum_t) << 3); bitdex++) {
890        ppnum_t npn = cpn ^ (ppnum_t) (1ULL << bitdex);
891        {
892            pv_rooted_entry_t npv_h = pai_to_pvh((npn));
893            if (npv_h->va == vaddr && npv_h->pmap == pmap) {
894                suppress_reason = PTE_BITFLIP;
895                suppress_ppn = npn;
896                action = PMAP_ACTION_RETRY_RELOCK;
897                UNLOCK_PVH((ppn));
898                *ppnp = npn;
899                goto pmap_cpc_exit;
900            }
901        }
902    }
903
904    if (pmap == kernel_pmap) {
905        action = PMAP_ACTION_ASSERT;
906        goto pmap_cpc_exit;
907    }
908
909    /*
910     * Check for malformed/inconsistent entries
911     */
912
913    if ((pmap != kernel_pmap) && ((cpte & L2_ACCESS_USER) == 0)) {
914        action = PMAP_ACTION_IGNORE;
915        suppress_reason = PTE_SUPERVISOR;
916    }
917 pmap_cpc_exit:
918    PE_parse_boot_argn("-pmap_pagetable_corruption_deassert", &ppcd, sizeof(ppcd));
919
920    if (debug_boot_arg && !ppcd) {
921        action = PMAP_ACTION_ASSERT;
922    }
923
924    if ((mach_absolute_time() - pmap_pagetable_corruption_last_abstime) < pmap_pagetable_corruption_interval_abstime) {
925        action = PMAP_ACTION_ASSERT;
926        pmap_pagetable_corruption_timeout = TRUE;
927    } else {
928        pmap_pagetable_corruption_last_abstime = mach_absolute_time();
929    }
930    pmap_pagetable_corruption_log(incident, suppress_reason, action, pmap, vaddr, &cpte, *ppnp, pvpmap, pvva);
931    return action;
932}
933
934/*
935 * Remove pv list entry.
936 * Called with pv_head_table entry locked.
937 * Returns pv entry to be freed (or NULL).
938 */
939static inline __attribute__ ((always_inline)) pv_hashed_entry_t pmap_pv_remove(pmap_t pmap, vm_map_offset_t vaddr, ppnum_t * ppnp, pt_entry_t * pte)
940{
941    pv_hashed_entry_t pvh_e;
942    pv_rooted_entry_t pv_h;
943    pv_hashed_entry_t *pprevh;
944    int pvhash_idx;
945    uint32_t pv_cnt;
946    ppnum_t ppn;
947
948 pmap_pv_remove_retry:
949    ppn = *ppnp;
950    pvh_e = PV_HASHED_ENTRY_NULL;
951    pv_h = pai_to_pvh((ppn));
952
953    if (__improbable(pv_h->pmap == PMAP_NULL)) {
954        pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_ABSENT);
955        if (pac == PMAP_ACTION_IGNORE)
956            goto pmap_pv_remove_exit;
957        else if (pac == PMAP_ACTION_ASSERT)
958            panic("pmap_pv_remove(%p,0x%x,0x%x, 0x%x, %p, %p): null pv_list!", pmap, vaddr, ppn, *pte, ppnp, pte);
959        else if (pac == PMAP_ACTION_RETRY_RELOCK) {
960            LOCK_PVH((*ppnp));
961            goto pmap_pv_remove_retry;
962        } else if (pac == PMAP_ACTION_RETRY)
963            goto pmap_pv_remove_retry;
964    }
965
966    if (pv_h->va == vaddr && pv_h->pmap == pmap) {
967        /*
968         * Header is the pv_rooted_entry.
969         * We can't free that. If there is a queued
970         * entry after this one we remove that
971         * from the ppn queue, we remove it from the hash chain
972         * and copy it to the rooted entry. Then free it instead.
973         */
974        pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
975        if (pv_h != (pv_rooted_entry_t) pvh_e) {
976            /*
977             * Entry queued to root, remove this from hash
978             * and install as new root.
979             */
980            CHK_NPVHASH();
981            pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
982            LOCK_PV_HASH(pvhash_idx);
983            remque(&pvh_e->qlink);
984            pprevh = pvhash(pvhash_idx);
985            if (PV_HASHED_ENTRY_NULL == *pprevh) {
986                panic("pmap_pv_remove(%p,0x%x,0x%x): " "empty hash, removing rooted", pmap, vaddr, ppn);
987            }
988            pmap_pvh_unlink(pvh_e);
989            UNLOCK_PV_HASH(pvhash_idx);
990            pv_h->pmap = pvh_e->pmap;
991            pv_h->va = pvh_e->va;   /* dispose of pvh_e */
992        } else {
993            /*
994             * none queued after rooted
995             */
996            pv_h->pmap = PMAP_NULL;
997            pvh_e = PV_HASHED_ENTRY_NULL;
998        }
999    } else {
1000        /*
1001         * not removing rooted pv. find it on hash chain, remove from
1002         * ppn queue and hash chain and free it
1003         */
1004        CHK_NPVHASH();
1005        pvhash_idx = pvhashidx(pmap, vaddr);
1006        LOCK_PV_HASH(pvhash_idx);
1007        pprevh = pvhash(pvhash_idx);
1008        if (PV_HASHED_ENTRY_NULL == *pprevh) {
1009            panic("pmap_pv_remove(%p,0x%x,0x%x, 0x%x, %p): empty hash", pmap, vaddr, ppn, *pte, pte);
1010        }
1011        pvh_e = *pprevh;
1012        pmap_pv_hashlist_walks++;
1013        pv_cnt = 0;
1014        while (PV_HASHED_ENTRY_NULL != pvh_e) {
1015            pv_cnt++;
1016            if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == ppn)
1017                break;
1018            pprevh = &pvh_e->nexth;
1019            pvh_e = pvh_e->nexth;
1020        }
1021
1022        if (PV_HASHED_ENTRY_NULL == pvh_e) {
1023            pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT);
1024
1025            if (pac == PMAP_ACTION_ASSERT)
1026                panic("pmap_pv_remove(%p, 0x%x, 0x%x, 0x%x, %p, %p): pv not on hash, head: %p, 0x%x", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, pv_h->va);
1027            else {
1028                UNLOCK_PV_HASH(pvhash_idx);
1029                if (pac == PMAP_ACTION_RETRY_RELOCK) {
1030                    LOCK_PVH(ppn_to_pai(*ppnp));
1031                    goto pmap_pv_remove_retry;
1032                } else if (pac == PMAP_ACTION_RETRY) {
1033                    goto pmap_pv_remove_retry;
1034                } else if (pac == PMAP_ACTION_IGNORE) {
1035                    goto pmap_pv_remove_exit;
1036                }
1037            }
1038        }
1039
1040        pmap_pv_hashlist_cnts += pv_cnt;
1041        if (pmap_pv_hashlist_max < pv_cnt)
1042            pmap_pv_hashlist_max = pv_cnt;
1043        *pprevh = pvh_e->nexth;
1044        remque(&pvh_e->qlink);
1045        UNLOCK_PV_HASH(pvhash_idx);
1046    }
1047 pmap_pv_remove_exit:
1048    return pvh_e;
1049}
1050
1051__private_extern__ void pmap_pagetable_corruption_msg_log(int (*log_func) (const char *fmt, ...) __printflike(1, 2))
1052{
1053    if (pmap_pagetable_corruption_incidents > 0) {
1054        int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
1055        (*log_func) ("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
1056        for (i = 0; i < e; i++) {
1057            (*log_func) ("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident, pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
1058        }
1059    }
1060}
1061
1062static inline void pmap_pagetable_corruption_log_setup(void)
1063{
1064    if (pmap_pagetable_corruption_log_call == NULL) {
1065        nanotime_to_absolutetime(20000, 0, &pmap_pagetable_corruption_interval_abstime);
1066        thread_call_setup(&pmap_pagetable_corruption_log_call_data, (thread_call_func_t) pmap_pagetable_corruption_msg_log, (thread_call_param_t) & printf);
1067        pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
1068    }
1069}
1070
1071/**
1072 * pmap_vm_prot_to_page_flags
1073 */
1074uint32_t pmap_vm_prot_to_page_flags(pmap_t pmap, vm_prot_t prot, int wired, int nx)
1075{
1076    arm_l2_t *current_l2 = &arm_pte_prot_templates[0];
1077    pt_entry_t pte = 0;
1078
1079    /*
1080     * Pmaps other than the kernel one will always have user accessible pages.
1081     */
1082    if (pmap != kernel_pmap)
1083        pte |= L2_ACCESS_USER;
1084    pte |= L2_ACCESS_PRW;
1085
1086    /*
1087     * Enforce Read-Write if necessary.
1088     */
1089    if (prot & VM_PROT_WRITE)
1090        pte &= ~(L2_ACCESS_APX);    /* APX-bit, RW? */
1091    else
1092        pte |= (L2_ACCESS_APX); /* APX-bit, R-? */
1093
1094    /*
1095     * Enforce XN if necessary.
1096     */
1097    if (!(prot & VM_PROT_EXECUTE))
1098        pte |= L2_NX_BIT;       /* XN-bit, R?X */
1099
1100    return pte;
1101}
1102
1103/**
1104 * phys_attribute_clear and friends. These suck.
1105 */
1106void phys_attribute_clear(ppnum_t pn, int bits)
1107{
1108    int pai;
1109    pv_rooted_entry_t pv_h;
1110
1111    assert(pn != vm_page_fictitious_addr);
1112
1113    pv_h = pai_to_pvh(pn);
1114    pv_h->flags &= ~bits;
1115
1116    return;
1117}
1118
1119int phys_attribute_test(ppnum_t pn, int bits)
1120{
1121    int pai;
1122    pv_rooted_entry_t pv_h;
1123
1124    assert(pn != vm_page_fictitious_addr);
1125
1126    pv_h = pai_to_pvh(pn);
1127    if ((pv_h->flags & bits) == (unsigned int)bits)
1128        return bits;
1129
1130    return (pv_h->flags & bits);
1131}
1132
1133void phys_attribute_set(ppnum_t pn, int bits)
1134{
1135    int pai;
1136    pv_rooted_entry_t pv_h;
1137
1138    assert(pn != vm_page_fictitious_addr);
1139
1140    pv_h = pai_to_pvh(pn);
1141    pv_h->flags |= bits;
1142
1143    return;
1144}
1145
1146/**
1147 * pmap_adjust_unnest_parameters
1148 *
1149 * Invoked by the Mach VM to determine the platform specific unnest region. This
1150 * is not used on ARM platforms.
1151 */
1152boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t * s, vm_map_offset_t * e)
1153{
1154    return FALSE;
1155}
1156
1157/**
1158 * pmap_attributes
1159 *
1160 * Set/Get special memory attributes; Set/Get is not implemented.
1161 */
1162kern_return_t pmap_attribute(pmap_t pmap, vm_offset_t address, vm_size_t size, vm_machine_attribute_t atte, vm_machine_attribute_val_t * attrp)
1163{
1164    return KERN_INVALID_ADDRESS;
1165}
1166
1167/**
1168 * pmap_attribute_cache_sync
1169 *
1170 * Flush appropriate cache based on page number sent.
1171 */
1172kern_return_t pmap_attribute_cache_sync(ppnum_t pn, vm_size_t size, vm_machine_attribute_t attr, vm_machine_attribute_val_t * attrp)
1173{
1174    Debugger("pmap_attribute_cache_sync");
1175    return KERN_SUCCESS;
1176}
1177
1178/**
1179 * pmap_get_cache_attributes
1180 */
1181unsigned int pmap_get_cache_attributes(ppnum_t pn) {
1182    /* If the pmap subsystem isn't up, just assume writethrough cache. */
1183    if(!pmap_initialized)
1184        return (ARM_CACHEBIT_WT_NWA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WT_NWA_BUFFERED));
1185
1186    /* If it's out of memory, assume it's not cacheable at all. */
1187    if(!pmap_valid_page(pn))
1188        return 0;
1189
1190    assert(pn != vm_page_fictitious_addr);
1191    pv_rooted_entry_t pv_h = pai_to_pvh(pn);
1192    assert(pv_h);
1193
1194    unsigned int attr = pv_h->flags;
1195    unsigned int template = 0;
1196
1197    if (attr & PMAP_OSPTE_TYPE_NOCACHE)
1198        /* No cache, strongly ordered memory. */
1199        template |= 0;
1200    else
1201        /* Assume writethrough, no write allocate for now. */
1202        template |= (ARM_CACHEBIT_WT_NWA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WT_NWA_BUFFERED));
1203
1204    return template;
1205}
1206
1207
1208/**
1209 * pmap_cache_attributes
1210 */
1211unsigned int pmap_cache_attributes(ppnum_t pn)
1212{
1213    if (!pmap_get_cache_attributes(pn) & ARM_L2_C_BIT)
1214        return (VM_WIMG_IO);
1215    else
1216        return (VM_WIMG_COPYBACK);
1217}
1218
1219/**
1220 * pmap_clear_noencrypt
1221 */
1222void pmap_clear_noencrypt(ppnum_t pn)
1223{
1224    if (!pmap_initialized)
1225        return;
1226    phys_attribute_clear(pn, PMAP_OSPTE_TYPE_NOENCRYPT);
1227}
1228
1229/**
1230 * pmap_is_noencrypt
1231 */
1232boolean_t pmap_is_noencrypt(ppnum_t pn)
1233{
1234    if (!pmap_initialized)
1235        return FALSE;
1236    return (phys_attribute_test(pn, PMAP_OSPTE_TYPE_NOENCRYPT));
1237}
1238
1239/**
1240 * pmap_set_noencrypt
1241 */
1242void pmap_set_noencrypt(ppnum_t pn)
1243{
1244    if (!pmap_initialized)
1245        return;
1246    phys_attribute_set(pn, PMAP_OSPTE_TYPE_NOENCRYPT);
1247}
1248
1249
1250/**
1251 * pmap_flush_tlbs
1252 */
1253void
1254pmap_flush_tlbs(pmap_t  pmap, vm_map_offset_t startv, vm_map_offset_t endv)
1255{
1256    unsigned int    cpu;
1257    unsigned int    cpu_bit;
1258    unsigned int    my_cpu = cpu_number();
1259    pmap_paddr_t    ttb = pmap->pm_l1_phys;
1260    boolean_t   flush_self = FALSE;
1261    boolean_t   pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap));
1262
1263    assert((processor_avail_count < 2) ||
1264           (ml_get_interrupts_enabled() && get_preemption_level() != 0));
1265
1266    if (pmap_asid_ncpus) {
1267        pmap_asid_invalidate_all_cpus(pmap);
1268        __asm__ volatile("":::"memory");
1269    }
1270
1271    for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
1272        if (!cpu_datap(cpu)->cpu_running)
1273            continue;
1274        uint32_t ttbr_pmap = armreg_ttbr_read() & 0xFFFFFF00;
1275
1276        /* Current pmap is active, flush it. */
1277        if ((ttb == ttbr_pmap) ||
1278            (pmap_is_shared)) {
1279            if (cpu == my_cpu) {
1280                flush_self = TRUE;
1281                continue;
1282            }
1283
1284            /* xxx broadcast IPI to all other CPUs to flush */
1285        }
1286    }
1287
1288    /*
1289     * Flush local tlb if required.
1290     * Do this now to overlap with other processors responding.
1291     */
1292    if (flush_self) {
1293        if (pmap_asid_ncpus) {
1294            pmap_asid_validate_cpu(pmap, my_cpu);
1295            if (pmap_is_shared)
1296                arm_tlb_flushID();
1297            else
1298                arm_tlb_flushID_ASID(pmap->pm_asid & 0xFF);
1299        }
1300        else
1301            arm_tlb_flushID();
1302    }
1303
1304    if (__improbable((pmap == kernel_pmap) && (flush_self != TRUE))) {
1305        panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE");
1306    }
1307}
1308
1309/*
1310 * Update cache attributes for all extant managed mappings.
1311 * Assumes PV for this page is locked, and that the page
1312 * is managed.
1313 */
1314
1315static uint32_t cacheability_mask = ~((ARM_L2_TEX_011 << 2) | ARM_L2_4KB_TEX(ARM_L2_TEX_111));
1316
1317void
1318pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
1319    pv_rooted_entry_t pv_h, pv_e;
1320    pv_hashed_entry_t pvh_e, nexth;
1321    vm_map_offset_t vaddr;
1322    pmap_t  pmap;
1323    pt_entry_t  *ptep;
1324
1325    pv_h = pai_to_pvh(pn);
1326    /*
1327     * TODO: translate the PHYS_* bits to PTE bits, while they're
1328     * currently identical, they may not remain so
1329     * Potential optimization (here and in page_protect),
1330     * parallel shootdowns, check for redundant
1331     * attribute modifications.
1332     */
1333
1334    /*
1335     * Alter attributes on all mappings
1336     */
1337    if (pv_h->pmap != PMAP_NULL) {
1338        pv_e = pv_h;
1339        pvh_e = (pv_hashed_entry_t)pv_e;
1340
1341        do {
1342            pmap = pv_e->pmap;
1343            vaddr = pv_e->va;
1344            ptep = (pt_entry_t *)pmap_pte(pmap, vaddr);
1345
1346            if (ptep == 0)
1347                panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%x kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
1348
1349            nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
1350
1351            /*
1352             * Update PTE.
1353             */
1354            pt_entry_t* cpte = (pt_entry_t*)ptep;
1355            *cpte &= cacheability_mask;
1356            *cpte |= attributes;
1357            pmap_flush_tlbs(pmap, vaddr, vaddr + PAGE_SIZE);
1358
1359            pvh_e = nexth;
1360        } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
1361    }
1362}
1363
1364
1365/**
1366 * pmap_set_cache_attributes
1367 *
1368 * Set the specified cache attributes.
1369 */
1370void pmap_set_cache_attributes(ppnum_t pn, unsigned int cacheattr)
1371{
1372    unsigned int current, template = 0;
1373    int pai;
1374
1375    if (cacheattr & VM_MEM_NOT_CACHEABLE) {
1376        /*
1377         * Template of 0 is non-cacheable, strongly ordered memory.
1378         */
1379        template &= cacheability_mask;
1380    } else {
1381        /*
1382         * Writethrough.
1383         */
1384        if(cacheattr == VM_WIMG_WTHRU)
1385            template |= (ARM_CACHEBIT_WT_NWA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WT_NWA_BUFFERED));
1386        /*
1387         * Writecombine/copyback = writeback.
1388         */
1389        else if(cacheattr == VM_WIMG_WCOMB || cacheattr == VM_WIMG_COPYBACK)
1390            template |= (ARM_CACHEBIT_WB_WA_BUFFERED << 2) | (ARM_L2_4KB_TEX(ARM_L2_TEX_100 | ARM_CACHEBIT_WB_WA_BUFFERED));
1391    }
1392
1393    /*
1394     * On MP systems, interrupts must be enabled.
1395     */
1396    if (processor_avail_count > 1 && !ml_get_interrupts_enabled())
1397        panic("interrupts must be enabled for pmap_set_cache_attributes");
1398
1399    assert((pn != vm_page_fictitious_addr) && (pn != vm_page_guard_addr));
1400
1401    LOCK_PVH(pai);
1402    pmap_update_cache_attributes_locked(pn, template);
1403
1404    if(cacheattr & VM_MEM_NOT_CACHEABLE)
1405        phys_attribute_set(pn, PMAP_OSPTE_TYPE_NOCACHE);
1406    else
1407        phys_attribute_clear(pn, PMAP_OSPTE_TYPE_NOCACHE);
1408
1409    UNLOCK_PVH(pai);
1410
1411    return;
1412}
1413
1414/**
1415 * compute_pmap_gc_throttle
1416 *
1417 * Unused.
1418 */
1419void compute_pmap_gc_throttle(void *arg __unused)
1420{
1421    return;
1422}
1423
1424/**
1425 * pmap_change_wiring
1426 *
1427 * Specify pageability.
1428 */
1429void pmap_change_wiring(pmap_t map, vm_map_offset_t va, boolean_t wired)
1430{
1431    pt_entry_t *pte;
1432    uint32_t pa;
1433
1434    /*
1435     * Lock the pmap.
1436     */
1437    PMAP_LOCK(map);
1438
1439    if ((pte = (pt_entry_t *)pmap_pte(map, va)) == (pt_entry_t *) 0)
1440        panic("pmap_change_wiring: pte missing");
1441
1442    /*
1443     * Use FVTP to get the physical PPN. This will not work with the old
1444     * pmap_extract.
1445     */
1446    PMAP_UNLOCK(map);
1447    pa = pmap_extract(map, va);
1448    PMAP_LOCK(map);
1449    assert(pa);
1450
1451    if (wired && phys_attribute_test(pa >> PAGE_SHIFT, PMAP_OSPTE_TYPE_WIRED)) {
1452        /*
1453         * We are wiring down the mapping.
1454         */
1455        pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
1456        OSAddAtomic(+1, &map->pm_stats.wired_count);
1457        phys_attribute_set(pa >> PAGE_SHIFT, PMAP_OSPTE_TYPE_WIRED);
1458    } else {
1459        /*
1460         * Unwiring the mapping.
1461         */
1462        assert(map->pm_stats.wired_count >= 1);
1463        OSAddAtomic(-1, &map->pm_stats.wired_count);
1464        phys_attribute_clear(pa >> PAGE_SHIFT, PMAP_OSPTE_TYPE_WIRED);
1465        pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1466    }
1467
1468    /*
1469     * Done, unlock the map.
1470     */
1471    PMAP_UNLOCK(map);
1472    return;
1473}
1474
1475/**
1476 * pmap_tte
1477 */
1478vm_offset_t pmap_tte(pmap_t pmap, vm_offset_t virt)
1479{
1480    uint32_t tte_offset_begin;
1481    tte_offset_begin = pmap->pm_l1_virt;
1482    if ((tte_offset_begin + L1_SIZE) < addr_to_tte(pmap->pm_l1_virt, virt))
1483        panic("Translation table entry extends past L1 size (base: 0x%08X)", tte_offset_begin);
1484    return addr_to_tte(pmap->pm_l1_virt, virt);
1485}
1486
1487/**
1488 * pmap_pte
1489 */
1490vm_offset_t pmap_pte(pmap_t pmap, vm_offset_t virt)
1491{
1492    uint32_t *tte_offset = (uint32_t *) pmap_tte(pmap, virt);
1493    uint32_t tte, pte, *ptep;
1494
1495    /*
1496     * Get the translation-table entry.
1497     */
1498    assert(tte_offset);
1499    tte = *tte_offset;
1500
1501    /*
1502     * If the requested PTE entry is required is indeed the commonpage and
1503     * we are not the kernel pmap, quit.
1504     *
1505     * This is because the TTBCR is set to 4kB, and all higher page table
1506     * address accesses will go to the kernel.
1507     */
1508    if (pmap != kernel_pmap && virt >= _COMM_PAGE_BASE_ADDRESS)
1509        return 0;
1510
1511    /*
1512     * Verify it's not a section mapping.
1513     */
1514    if ((tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_SECTION) {
1515        panic("Translation table entry is a section mapping (tte %x ttep %p ttebv %x)!\n", tte, tte_offset, pmap->pm_l1_virt);
1516    }
1517
1518    /*
1519     * Clean the TTE bits off, get the address.
1520     */
1521    pte = L1_PTE_ADDR(tte);
1522    if (!pte)
1523        return 0;
1524
1525    /*
1526     * Return the virtual mapped PTE.
1527     */
1528    ptep = (uint32_t *) ((phys_to_virt(pte) + pte_offset(virt)));
1529
1530    return (vm_offset_t)(ptep);
1531}
1532
1533void mapping_free_prime(void)
1534{
1535    int i;
1536    pv_hashed_entry_t pvh_e;
1537    pv_hashed_entry_t pvh_eh;
1538    pv_hashed_entry_t pvh_et;
1539    int pv_cnt;
1540
1541    pv_cnt = 0;
1542    pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1543    for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
1544        pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1545
1546        pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1547        pvh_eh = pvh_e;
1548
1549        if (pvh_et == PV_HASHED_ENTRY_NULL)
1550            pvh_et = pvh_e;
1551        pv_cnt++;
1552    }
1553    PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1554
1555    pv_cnt = 0;
1556    pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1557    for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
1558        pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1559
1560        pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1561        pvh_eh = pvh_e;
1562
1563        if (pvh_et == PV_HASHED_ENTRY_NULL)
1564            pvh_et = pvh_e;
1565        pv_cnt++;
1566    }
1567    PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1568
1569}
1570
1571void mapping_adjust(void)
1572{
1573    pv_hashed_entry_t pvh_e;
1574    pv_hashed_entry_t pvh_eh;
1575    pv_hashed_entry_t pvh_et;
1576    int pv_cnt;
1577    int i;
1578
1579    if (mapping_adjust_call == NULL) {
1580        pmap_pagetable_corruption_log_setup();
1581        thread_call_setup(&mapping_adjust_call_data, (thread_call_func_t) mapping_adjust, (thread_call_param_t) NULL);
1582        mapping_adjust_call = &mapping_adjust_call_data;
1583    }
1584
1585    pv_cnt = 0;
1586    pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1587    if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
1588        for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
1589            pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1590
1591            pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1592            pvh_eh = pvh_e;
1593
1594            if (pvh_et == PV_HASHED_ENTRY_NULL)
1595                pvh_et = pvh_e;
1596            pv_cnt++;
1597        }
1598        PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1599    }
1600
1601    pv_cnt = 0;
1602    pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
1603    if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
1604        for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
1605            pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
1606
1607            pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1608            pvh_eh = pvh_e;
1609
1610            if (pvh_et == PV_HASHED_ENTRY_NULL)
1611                pvh_et = pvh_e;
1612            pv_cnt++;
1613        }
1614        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
1615    }
1616    mappingrecurse = 0;
1617}
1618
1619/*
1620 * pmap_map
1621 *
1622 * Map specified virtual address range to a physical one.
1623 */
1624vm_offset_t pmap_map(vm_offset_t virt, vm_map_offset_t start_addr, vm_map_offset_t end_addr, vm_prot_t prot, unsigned int flags)
1625{
1626    int ps;
1627
1628    ps = PAGE_SIZE;
1629    while (start_addr < end_addr) {
1630        pmap_enter(kernel_pmap, (vm_map_offset_t) virt, (start_addr >> PAGE_SHIFT), prot, flags, FALSE, TRUE);
1631        virt += ps;
1632        start_addr += ps;
1633    }
1634    return (virt);
1635}
1636
1637/**
1638 * pmap_next_page_hi
1639 *
1640 * Allocate physical pages.
1641 */
1642boolean_t pmap_next_page_hi(ppnum_t * pnum)
1643{
1644    return pmap_next_page(pnum);
1645}
1646
1647/**
1648 * pmap_zero_page
1649 *
1650 * Zero a physical page.
1651 */
1652void pmap_zero_page(ppnum_t p)
1653{
1654    assert(p != vm_page_fictitious_addr);
1655
1656    /*
1657     * Make sure the page is valid.
1658     */
1659    if (((p << PAGE_SHIFT) < avail_start) || ((p << PAGE_SHIFT) > avail_end))
1660        panic("pmap_zero_page: zeroing a non-managed page, ppnum %d", p);
1661
1662    bzero((void *)phys_to_virt(p << PAGE_SHIFT), PAGE_SIZE);
1663}
1664
1665/**
1666 * pmap_clear_refmod
1667 *
1668 * Clears the referenced and modified bits as specified by the mask
1669 * of the specified physical page.
1670 */
1671void pmap_clear_refmod(ppnum_t pn, unsigned int mask)
1672{
1673    phys_attribute_clear(pn, mask);
1674}
1675
1676/**
1677 * io_map
1678 *
1679 * Maps an IO region and returns its virtual address.
1680 */
1681vm_offset_t io_map(vm_offset_t phys_addr, vm_size_t size, unsigned int flags)
1682{
1683    vm_offset_t start;
1684
1685    if (kernel_map == VM_MAP_NULL) {
1686        /*
1687         * VM is not initialized.  Grab memory.
1688         */
1689        start = virt_begin;
1690        virt_begin += round_page(size);
1691
1692        (void) pmap_map_bd(start, phys_addr, phys_addr + round_page(size), VM_PROT_READ | VM_PROT_WRITE, flags);
1693    } else {
1694        (void) kmem_alloc_pageable(kernel_map, &start, round_page(size));
1695        (void) pmap_map(start, phys_addr, phys_addr + round_page(size), VM_PROT_READ | VM_PROT_WRITE, flags);
1696    }
1697
1698    return (start);
1699}
1700
1701vm_offset_t io_map_spec(vm_map_offset_t phys_addr, vm_size_t size, unsigned int flags)
1702{
1703    return (io_map(phys_addr, size, flags));
1704}
1705
1706/**
1707 * pmap_next_page
1708 *
1709 * Allocate physical pages.
1710 */
1711boolean_t pmap_next_page(ppnum_t * addrp)
1712{
1713    if (first_avail >= avail_end) {
1714        kprintf("pmap_next_page: ran out of possible pages, last page was 0x%08x", first_avail);
1715        return FALSE;
1716    }
1717
1718    *addrp = pa_index(first_avail);
1719
1720    /*
1721     * We lost a page.
1722     */
1723    first_avail += PAGE_SIZE;
1724    avail_remaining--;
1725    return TRUE;
1726}
1727
1728/**
1729 * pmap_virtual_space
1730 *
1731 * Get virtual space parameters.
1732 */
1733void pmap_virtual_space(vm_offset_t * startp, vm_offset_t * endp)
1734{
1735    *startp = virt_begin;
1736    *endp = virt_end;
1737    kprintf("pmap_virtual_space: VM region 0x%08x - 0x%08x\n", virt_begin, virt_end);
1738}
1739
1740/**
1741 * pmap_free_pages
1742 *
1743 * Return free page count.
1744 */
1745unsigned int pmap_free_pages(void)
1746{
1747    return avail_remaining;
1748}
1749
1750/**
1751 * pmap_map_bd
1752 *
1753 * Enters a physical mapping. (Before the VM subsystem is up.)
1754 */
1755boolean_t pmap_map_bd(vm_offset_t virt, vm_map_offset_t start, vm_map_offset_t end, vm_prot_t prot, unsigned int flags)
1756{
1757    spl_t spl;
1758
1759    /*
1760     * Verify the start and end are page aligned.
1761     */
1762    assert(!(start & PAGE_MASK));
1763    assert(!(end & PAGE_MASK));
1764
1765    /*
1766     * Disable interrupts and start mapping pages
1767     */
1768    SPLVM(spl);
1769
1770    /*
1771     * Write the PTEs to memory.
1772     */
1773    uint32_t ptep = (uint32_t) (pmap_pte(kernel_pmap, virt));
1774    if (!ptep)
1775        panic("pmap_map_bd: Invalid kernel address");
1776
1777    /*
1778     * Map the pages.
1779     */
1780    l2_map_linear_range_no_cache(virt_to_phys(ptep), start, end);
1781
1782    /*
1783     * Return.
1784     */
1785    SPLX(spl);
1786
1787    return TRUE;
1788}
1789
1790/**
1791 * pmap_pageable
1792 */
1793void pmap_pageable(__unused pmap_t pmap, __unused vm_map_offset_t start, __unused vm_map_offset_t end, __unused boolean_t pageable)
1794{
1795    return;
1796}
1797
1798/**
1799 * pmap_set_modify
1800 *
1801 * Set the modify bit on the specified physical page.
1802 */
1803void pmap_set_modify(ppnum_t pn)
1804{
1805    phys_attribute_set(pn, PMAP_OSPTE_TYPE_MODIFIED);
1806}
1807
1808/**
1809 * pmap_clear_modify
1810 *
1811 * Clear the modify bits on the specified physical page.
1812 */
1813void pmap_clear_modify(ppnum_t pn)
1814{
1815    phys_attribute_clear(pn, PMAP_OSPTE_TYPE_MODIFIED);
1816}
1817
1818/**
1819 * pmap_clear_reference
1820 *
1821 * Clear the reference bit on the specified physical page.
1822 */
1823void pmap_clear_reference(ppnum_t pn)
1824{
1825    phys_attribute_clear(pn, PMAP_OSPTE_TYPE_REFERENCED);
1826}
1827
1828/**
1829 * pmap_set_reference
1830 *
1831 * Set the reference bit on the specified physical page.
1832 */
1833void pmap_set_reference(ppnum_t pn)
1834{
1835    phys_attribute_set(pn, PMAP_OSPTE_TYPE_REFERENCED);
1836}
1837
1838/**
1839 * pmap_valid_page
1840 *
1841 * Is the page inside the managed zone?
1842 */
1843boolean_t pmap_valid_page(ppnum_t p)
1844{
1845    return (((p << PAGE_SHIFT) > avail_start)
1846            && ((p << PAGE_SHIFT) < avail_end));
1847}
1848
1849/**
1850 * pmap_verify_free
1851 *
1852 * Verify that the page has no mappings.
1853 */
1854boolean_t pmap_verify_free(vm_offset_t phys)
1855{
1856    pv_rooted_entry_t pv_h;
1857    int pai;
1858    boolean_t result;
1859
1860    assert(phys != vm_page_fictitious_addr);
1861    if (!pmap_initialized)
1862        return (TRUE);
1863
1864    if (!pmap_valid_page(phys))
1865        return (FALSE);
1866
1867    pv_h = pai_to_pvh(phys);
1868    result = (pv_h->pmap == PMAP_NULL);
1869
1870    return (result);
1871}
1872
1873/**
1874 * pmap_sync_page_data_phys
1875 *
1876 * Invalidates all of the instruction cache on a physical page and
1877 * pushes any dirty data from the data cache for the same physical page
1878 */
1879void pmap_sync_page_data_phys(__unused ppnum_t pa)
1880{
1881    Debugger("pmap_sync_page_data_phys");
1882    return;
1883}
1884
1885/**
1886 * pmap_sync_page_attributes_phys(ppnum_t pa)
1887 *
1888 * Write back and invalidate all cachelines on a physical page.
1889 */
1890void pmap_sync_page_attributes_phys(ppnum_t pa)
1891{
1892    Debugger("pmap_sync_page_attributes_phys");
1893    return;
1894}
1895
1896/*
1897 * Statistics routines
1898 */
1899int pmap_resident_max(pmap_t pmap)
1900{
1901    return ((pmap)->pm_stats.resident_max);
1902}
1903
1904int pmap_resident_count(pmap_t pmap)
1905{
1906    return ((pmap)->pm_stats.resident_count);
1907}
1908
1909/**
1910 * pmap_disable_NX
1911 *
1912 * Disable NX on a specified pmap.
1913 */
1914void pmap_disable_NX(pmap_t pmap)
1915{
1916    panic("pmap_disable_NX not implemented\n");
1917}
1918
1919extern void ovbcopy(void *from, void *to, vm_size_t len); /* TODO: Put this in a better place. */
1920
1921/**
1922 * pmap_zero_page
1923 *
1924 * pmap_copy_page copies the specified (machine independent)
1925 * page from physical address src to physical address dst.
1926 */
1927void pmap_copy_page(ppnum_t src, ppnum_t dst)
1928{
1929    ovbcopy((void *)phys_to_virt(src << PAGE_SHIFT), (void *)phys_to_virt(dst << PAGE_SHIFT), PAGE_SIZE);
1930}
1931
1932/**
1933 * pmap_copy_part_page
1934 *
1935 * Copies the specified (machine independent) pages.
1936 */
1937void pmap_copy_part_page(ppnum_t src, vm_offset_t src_offset, ppnum_t dst, vm_offset_t dst_offset, vm_size_t len)
1938{
1939    assert((((dst << PAGE_SHIFT) & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE);
1940    assert((((src << PAGE_SHIFT) & PAGE_MASK) + src_offset + len) <= PAGE_SIZE);
1941
1942    ovbcopy((void *)(phys_to_virt(src << PAGE_SHIFT) + src_offset), (void *)(phys_to_virt(dst << PAGE_SHIFT) + src_offset), len);
1943}
1944
1945/**
1946 * pmap_common_init
1947 *
1948 * Initialize common elements of pmaps.
1949 */
1950void pmap_common_init(pmap_t pmap)
1951{
1952    usimple_lock_init(&pmap->lock, 0);
1953    if (pmap->ledger)
1954        ledger_reference(pmap->ledger);
1955    pmap->pm_refcnt = 1;
1956    pmap->pm_nx = 0;
1957    pmap->pm_shared = FALSE;
1958    pmap->pm_stats.resident_count = 0;
1959    pmap->pm_stats.wired_count = 0;
1960}
1961
1962/**
1963 * pmap_static_init
1964 *
1965 * Initialize the basic kernel pmap.
1966 */
1967void pmap_static_init(void)
1968{
1969    kdb_printf("pmap_static_init: Bootstrapping pmap\n");
1970    kernel_pmap->ledger = NULL;
1971    kernel_pmap->pm_asid = 0;
1972    kernel_pmap->pm_l1_size = 0x4000;   /* Cover 4*1024 TTEs */
1973    pmap_common_init(kernel_pmap);
1974    return;
1975}
1976
1977/**
1978 * pmap_is_modified
1979 *
1980 * Return whether or not the specified physical page is modified
1981 * by any physical maps.
1982 */
1983boolean_t pmap_is_modified(vm_offset_t phys)
1984{
1985    return (phys_attribute_test(phys, PMAP_OSPTE_TYPE_MODIFIED));
1986}
1987
1988/**
1989 * pmap_is_referenced
1990 *
1991 * Return whether or not the specified physical page is referenced
1992 * by any physical maps.
1993 */
1994boolean_t pmap_is_referenced(vm_offset_t phys)
1995{
1996    return (phys_attribute_test(phys, PMAP_OSPTE_TYPE_REFERENCED));
1997}
1998
1999/**
2000 * pmap_list_resident_pages
2001 */
2002int pmap_list_resident_pages(pmap_t pmap, vm_offset_t * listp, int space)
2003{
2004    return 0;
2005}
2006
2007/**
2008 * pmap_find_phys
2009 *
2010 * pmap_find_phys returns the (4K) physical page number containing a
2011 * given virtual address in a given pmap.
2012 */
2013ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va)
2014{
2015    spl_t spl;
2016    uint32_t ptep, pte, ppn;
2017
2018    /*
2019     * Raise priority level.
2020     */
2021    disable_preemption();
2022
2023#if 1
2024    /*
2025     * Get the PTE.
2026     */
2027    ptep = (uint32_t) pmap_pte(pmap, (vm_offset_t) va);
2028    if (!ptep) {
2029        ppn = 0;
2030        goto out;
2031    }
2032    pte = (*(uint32_t *) (ptep));
2033
2034    /*
2035     * Make sure it's a PTE.
2036     */
2037    if (!((pte) & ARM_PTE_DESCRIPTOR_4K)) {
2038        ppn = 0;
2039        goto out;
2040    }
2041
2042    ppn = pa_index(pte & L2_ADDR_MASK);
2043#else
2044    uint32_t virt = (va & L2_ADDR_MASK), par;
2045    boolean_t is_priv = (pmap == kernel_pmap) ? TRUE : FALSE;
2046
2047    /*
2048     * TTBCR split means that commonpage is at 0x40000000, in kernel_pmap.
2049     */
2050    if (virt == _COMM_PAGE_BASE_ADDRESS) {
2051        ppn = 0;
2052        goto out;
2053    }
2054
2055    /*
2056     * Fast VirtToPhys involves using the virtual address trnalsation
2057     * register as present in Cortex-A and ARM11 MPCore systems.
2058     *
2059     * Privileged reads are only done on the kernel PMAP versus user
2060     * pmaps getting user read/write state.
2061     *
2062     * The entire process should take much shorter compared to the
2063     * older pmap_extract, which fully walked the page tables. You can
2064     * still use the current behaviour however, by messing with
2065     * the MASTER files.
2066     *
2067     * I swear, I need more stupid sleep.
2068     */
2069
2070    /*
2071     * Set the PAtoVA register and perform the operation.
2072     */
2073    if (is_priv)
2074        armreg_va2pa_pr_ns_write(virt);
2075    else
2076        armreg_va2pa_ur_ns_write(virt);
2077
2078    /*
2079     * Wait for the instruction transaction to complete.
2080     */
2081    __asm__ __volatile__("xisb sy");
2082
2083    /*
2084     * See if the translation aborted, log any translation errors.
2085     */
2086    par = armreg_par_read();
2087
2088    /*
2089     * Successful translation, we're done.
2090     */
2091    if (!(par & 1)) {
2092        uint32_t pa = par & L2_ADDR_MASK;
2093        ppn = pa_index(pa);
2094    } else {
2095        ppn = 0;
2096    }
2097#endif
2098 out:
2099    /*
2100     * Return.
2101     */
2102    enable_preemption();
2103    return ppn;
2104}
2105
2106/**
2107 * pmap_find_phys_fvtp
2108 *
2109 * pmap_find_phys returns the (4K) physical page number containing a
2110 * given virtual address in a given pmap. This is used for KDP purposes
2111 * only.
2112 */
2113ppnum_t pmap_find_phys_fvtp(pmap_t pmap, addr64_t va)
2114{
2115#ifdef _ARM_ARCH_7
2116    uint32_t ptep, pte, ppn;
2117    uint32_t virt = (va & L2_ADDR_MASK), par;
2118    boolean_t is_priv = (pmap == kernel_pmap) ? TRUE : FALSE;
2119
2120    /*
2121     * TTBCR split means that commonpage is at 0x40000000, in kernel_pmap.
2122     */
2123    if (virt == _COMM_PAGE_BASE_ADDRESS) {
2124        ppn = 0;
2125        goto out;
2126    }
2127
2128    /*
2129     * Fast VirtToPhys involves using the virtual address trnalsation
2130     * register as present in Cortex-A and ARM11 MPCore systems.
2131     *
2132     * Privileged reads are only done on the kernel PMAP versus user
2133     * pmaps getting user read/write state.
2134     *
2135     * The entire process should take much shorter compared to the
2136     * older pmap_extract, which fully walked the page tables. You can
2137     * still use the current behaviour however, by messing with
2138     * the MASTER files.
2139     *
2140     * I swear, I need more stupid sleep.
2141     */
2142
2143    /*
2144     * Set the PAtoVA register and perform the operation.
2145     */
2146    if (is_priv)
2147        armreg_va2pa_pr_ns_write(virt);
2148    else
2149        armreg_va2pa_ur_ns_write(virt);
2150
2151    /*
2152     * Wait for the instruction transaction to complete.
2153     */
2154    __asm__ __volatile__("isb sy");
2155
2156    /*
2157     * See if the translation aborted, log any translation errors.
2158     */
2159    par = armreg_par_read();
2160
2161    /*
2162     * Successful translation, we're done.
2163     */
2164    if (!(par & 1)) {
2165        uint32_t pa = par & L2_ADDR_MASK;
2166        ppn = pa_index(pa);
2167    } else {
2168        ppn = 0;
2169    }
2170 out:
2171    /*
2172     * Return.
2173     */
2174    enable_preemption();
2175    return ppn;
2176#else
2177    return 0;
2178#endif
2179}
2180
2181/**
2182 * pmap_switch
2183 *
2184 * Switch the current user pmap to a new one.
2185 */
2186void pmap_switch(pmap_t new_pmap)
2187{
2188    spl_t spl;
2189
2190    /*
2191     * Raise priority level.
2192     */
2193    SPLVM(spl);
2194
2195    /*
2196     * Make sure it's not the kernel pmap.
2197     */
2198    if (new_pmap == kernel_pmap)
2199        goto switch_return;
2200
2201    /*
2202     * Switch it if needed.
2203     */
2204    if (current_cpu_datap()->user_pmap == new_pmap) {
2205        goto switch_return;
2206    } else {
2207        if (pmap_asid_ncpus) {
2208            pmap_asid_activate(new_pmap, cpu_number());
2209        }
2210        current_cpu_datap()->user_pmap = new_pmap;
2211        arm_set_context_id(new_pmap->pm_asid & 0xFF);
2212        arm_context_switch(new_pmap->pm_l1_phys);
2213    }
2214
2215    /*
2216     * Done.
2217     */
2218 switch_return:
2219    SPLX(spl);
2220    return;
2221}
2222
2223/**
2224 * pmap_map_block
2225 *
2226 * Map a (possibly) autogenned block
2227 */
2228void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, __unused unsigned int flags)
2229{
2230    uint32_t page;
2231    for (page = 0; page < size; page++) {
2232        pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
2233        va += PAGE_SIZE;
2234        pa++;
2235    }
2236}
2237
2238/**
2239 * pmap_asid_init
2240 */
2241static inline void pmap_asid_init(void)
2242{
2243    pm_asid_bitmap[0] = (2 << KERNEL_ASID_PID) - 1;
2244}
2245
2246/**
2247 * pmap_asid_alloc_fast
2248 *
2249 * Allocate a specified ASID for each proces. Each pmap has their own
2250 * individual ASID.
2251 */
2252#define __arraycount(__x) (sizeof(__x) / sizeof(__x[0]))
2253static inline void pmap_asid_alloc_fast(pmap_t map)
2254{
2255    /*
2256     * The pmap specified cannot be the kernel map, it already has its
2257     * own ASID allocated to it.
2258     */
2259    assert(map != kernel_pmap);
2260    assert(map->pm_asid == 0);
2261    assert(pm_asids_free > 0);
2262    assert(pm_asid_hint <= pm_asid_max);
2263
2264    /*
2265     * Let's see if the hinted ASID is free. If not, search for a new one.
2266     */
2267    if (TLBINFO_ASID_INUSE_P(pm_asid_bitmap, pm_asid_hint)) {
2268        const size_t words = __arraycount(pm_asid_bitmap);
2269        const size_t nbpw = 8 * sizeof(pm_asid_bitmap[0]);
2270        for (size_t i = 0; i < pm_asid_hint / nbpw; i++) {
2271            assert(pm_asid_bitmap[i] == 0);
2272        }
2273        for (size_t i = pm_asid_hint / nbpw;; i++) {
2274            assert(i < words);
2275            /*
2276             * ffs wants to find the first bit set while we want
2277             * to find the first bit cleared.
2278             */
2279            u_long bits = ~pm_asid_bitmap[i];
2280            if (bits) {
2281                u_int n = 0;
2282                if ((bits & 0xffffffff) == 0) {
2283                    bits = (bits >> 31) >> 1;
2284                    assert(bits);
2285                    n += 32;
2286                }
2287                n += ffs(bits) - 1;
2288                assert(n < nbpw);
2289                pm_asid_hint = n + i * nbpw;
2290                break;
2291            }
2292        }
2293        assert(pm_asid_hint > KERNEL_ASID_PID);
2294        assert(TLBINFO_ASID_INUSE_P(pm_asid_bitmap, pm_asid_hint - 1));
2295        assert(!TLBINFO_ASID_INUSE_P(pm_asid_bitmap, pm_asid_hint));
2296    }
2297
2298    /*
2299     * The hint contains our next ASID so take it and advance the hint.
2300     * Mark it as used and insert the pai into the list of active asids.
2301     * There is also one less asid free in this TLB.
2302     */
2303    map->pm_asid = pm_asid_hint++;
2304    TLBINFO_ASID_MARK_USED(pm_asid_bitmap, map->pm_asid);
2305    pm_asids_free--;
2306
2307#if 1
2308    kprintf("[pmap_asid_alloc_fast] ASIDs free: %d ASIDs, ASID subsystem allocated id %u for map %p!\n", pm_asids_free, map->pm_asid, map);
2309#endif
2310
2311    return;
2312}
2313
2314/**
2315 * pmap_asid_reset
2316 */
2317static inline void pmap_asid_reset(pmap_t map)
2318{
2319    /*
2320     * We must have an ASID.
2321     */
2322    assert(map->pm_asid > KERNEL_ASID_PID);
2323
2324    /*
2325     * Note that we don't mark the ASID as not in use in the TLB's ASID
2326     * bitmap (thus it can't be allocated until the ASID space is exhausted
2327     * and therefore reinitialized).  We don't want to flush the TLB for
2328     * entries belonging to this ASID so we will let natural TLB entry
2329     * replacement flush them out of the TLB.  Any new entries for this
2330     * pmap will need a new ASID allocated.
2331     */
2332    map->pm_asid = 0;
2333
2334    return;
2335}
2336
2337extern long __stack_chk_guard[];
2338
2339/**
2340 * pmap_bootstrap
2341 *
2342 * Bootstrap the pmap subsystem.
2343 */
2344void pmap_bootstrap(__unused uint64_t msize, vm_offset_t * __first_avail, __unused unsigned int kmapsize)
2345{
2346    /*
2347     * Set the first virtual address we can use.
2348     */
2349    virt_begin = *__first_avail;
2350
2351    /*
2352     * Make sure we don't go to the ARM Vector Table.
2353     */
2354    virt_end = vm_last_addr = 0xFFFFEFFF;
2355
2356    /*
2357     * Set the available page amount.
2358     */
2359    avail_remaining = (avail_end - first_avail) >> PAGE_SHIFT;
2360    vm_first_phys = first_avail;
2361    avail_start = first_avail;
2362
2363    kprintf("pmap_bootstrap: physical region 0x%08x - 0x%08x\n", first_avail, avail_end);
2364
2365    /*
2366     * Set NPVhash defaults.
2367     */
2368    if (PE_parse_boot_argn("npvhash", &npvhash, sizeof(npvhash))) {
2369        if (0 != ((npvhash + 1) & npvhash)) {
2370            kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n", npvhash, NPVHASH);
2371            npvhash = NPVHASH;
2372        }
2373    } else {
2374        npvhash = NPVHASH;
2375    }
2376    printf("npvhash=%d\n", npvhash);
2377
2378    /*
2379     * ASID initialization.
2380     */
2381    pmap_asid_initialize_kernel(kernel_pmap);
2382
2383    /*
2384     * Initialize kernel pmap.
2385     */
2386    pmap_static_init();
2387}
2388
2389/**
2390 * pmap_reference
2391 *
2392 * Increment reference count of the specified pmap.
2393 */
2394void pmap_reference(pmap_t pmap)
2395{
2396    /*
2397     * Bump the count.
2398     */
2399    if (pmap != PMAP_NULL)
2400        (void) hw_atomic_add((volatile uint32_t *)&pmap->pm_refcnt, 1);
2401}
2402
2403/**
2404 * pmap_get_refmod
2405 *
2406 * Returns the referenced and modified bits of the specified
2407 * physical page.
2408 */
2409unsigned int pmap_get_refmod(ppnum_t pn)
2410{
2411    int refmod;
2412    unsigned int retval = 0;
2413
2414    refmod = phys_attribute_test(pn, PMAP_OSPTE_TYPE_MODIFIED | PMAP_OSPTE_TYPE_REFERENCED);
2415
2416    if (refmod & PMAP_OSPTE_TYPE_MODIFIED)
2417        retval |= VM_MEM_MODIFIED;
2418    if (refmod & PMAP_OSPTE_TYPE_REFERENCED)
2419        retval |= VM_MEM_REFERENCED;
2420
2421    return (retval);
2422}
2423
2424/**
2425 * pmap_enter
2426 *
2427 * Enter pages into a physical map.
2428 */
2429void pmap_enter(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, vm_prot_t fault_type, unsigned int flags, boolean_t wired)
2430{
2431    pmap_enter_options(pmap, va, pa, prot, fault_type, flags, wired, 0);
2432}
2433
2434/**
2435 * pmap_grab_page
2436 *
2437 * Get a page from the global pmap object.
2438 */
2439vm_page_t pmap_grab_page(pmap_t pmap)
2440{
2441    vm_page_t page;
2442    uint32_t ctr;
2443    assert(pmap_initialized && kernel_map && pmap->pm_obj);
2444
2445    /*
2446     * Grab pages from the global VM object.
2447     */
2448    while ((page = vm_page_grab()) == VM_PAGE_NULL)
2449        VM_PAGE_WAIT();
2450
2451    /*
2452     * Lock the global object to prevent interruptions.
2453     */
2454    vm_object_lock(pmap->pm_obj);
2455    assert((page->phys_page << PAGE_SHIFT) > gPhysBase);
2456    ctr = (page->phys_page) - (gPhysBase >> PAGE_SHIFT);
2457    bzero((void *)phys_to_virt(page->phys_page << PAGE_SHIFT), PAGE_SIZE);
2458    vm_page_insert(page, pmap->pm_obj, ctr);
2459
2460    /*
2461     * Wire our new page.
2462     */
2463    vm_page_lockspin_queues();
2464    vm_page_wire(page);
2465    vm_page_unlock_queues();
2466
2467    /*
2468     * Done.
2469     */
2470    vm_object_unlock(pmap->pm_obj);
2471
2472    /*
2473     * Set noencrypt bits.
2474     */
2475    pmap_set_noencrypt(page->phys_page);
2476
2477    /*
2478     * Increment inuse ptepages.
2479     */
2480    OSAddAtomic(1, &inuse_ptepages_count);
2481    OSAddAtomic(1, &alloc_ptepages_count);
2482
2483    return page;
2484}
2485
2486/**
2487 * pmap_destroy_page
2488 *
2489 * Free a page from the internal VM object.
2490 */
2491void pmap_destroy_page(ppnum_t pa)
2492{
2493    vm_page_t m;
2494
2495    vm_object_lock(pmap_object);
2496
2497    m = vm_page_lookup(pmap_object, pa);
2498    if (m == VM_PAGE_NULL)
2499        return;
2500
2501    vm_object_unlock(pmap_object);
2502
2503    VM_PAGE_FREE(m);
2504    kprintf("Freed page for PA %x\n", pa << PAGE_SHIFT);
2505
2506    /*
2507     * Remove one.
2508     */
2509    OSAddAtomic(-1, &inuse_ptepages_count);
2510
2511    return;
2512}
2513
2514/**
2515 * pmap_create_sharedpage
2516 *
2517 * Create the system common page.
2518 */
2519void pmap_create_sharedpage(void)
2520{
2521    /*
2522     * Grab a page...
2523     */
2524    commpage = pmap_grab_page(kernel_pmap);
2525    assert(commpage);
2526
2527    /*
2528     * And map it.
2529     */
2530    pmap_enter(kernel_pmap, (vm_map_offset_t) _COMM_PAGE_BASE_ADDRESS, commpage->phys_page, VM_PROT_READ | VM_PROT_WRITE, 0, FALSE, TRUE);
2531
2532    /*
2533     * Memset it.
2534     */
2535    memset((void *) _COMM_PAGE_BASE_ADDRESS, 0x00, PAGE_SIZE);
2536    return;
2537}
2538
2539/**
2540 * pmap_extract
2541 *
2542 * Get the physical address for a virtual one.
2543 */
2544vm_offset_t pmap_extract(pmap_t pmap, vm_offset_t virt)
2545{
2546#if defined(_ARM_ARCH_6)
2547    spl_t spl;
2548    vm_offset_t ppn = 0;
2549    uint32_t tte, *ttep = pmap_tte(pmap, virt);
2550
2551    /*
2552     * Block off all interruptions. Nothing may interrupt the extraction process
2553     * as the page tables may be changed by another callee to pmap_enter or such.
2554     */
2555
2556    PMAP_LOCK(pmap);
2557    if (!ttep)
2558        goto extract_out;
2559
2560    /*
2561     * Look at the TTE and see what type of mapping it is.
2562     */
2563    tte = *ttep;
2564
2565    /*
2566     * Verify it's not a section mapping.
2567     */
2568    if ((tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_SECTION) {
2569        /*
2570         * Clean the lower bits off.
2571         */
2572        ppn = (tte & L1_SECT_ADDR_MASK);
2573
2574        /*
2575         * Now add the lower bits back from the VA.
2576         */
2577        ppn |= (virt & ~(L1_SECT_ADDR_MASK));
2578
2579        /*
2580         * Done. Address extraction successful.
2581         */
2582        goto extract_out;
2583    } else if ((tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_PAGE_TABLE) {
2584        uint32_t pte, *ptep;
2585
2586        /*
2587         * Clean the TTE bits off, get the address of the L1 entry.
2588         */
2589        pte = L1_PTE_ADDR(tte);
2590        if (!pte)
2591            goto extract_out;
2592
2593        /*
2594         * Return the virtually mapped PTE.
2595         */
2596        ptep = (uint32_t *) ((phys_to_virt(pte) + pte_offset(virt)));
2597
2598        /*
2599         * Make sure it's not a large page. They're not supported yet, but they will
2600         * be at some point.
2601         */
2602        if (((*ptep & ARM_PAGE_MASK_VALUE) == ARM_PTE_DESCRIPTOR_64K))
2603            panic("pmap_extract: 64kb pages not supported yet");
2604
2605        /*
2606         * Clean the PTE bits off the address.
2607         */
2608        ppn = (*ptep) & L2_ADDR_MASK;
2609
2610        /*
2611         * Now, add the lower bits back from the VA.
2612         */
2613        ppn |= (virt & ~(L2_ADDR_MASK));
2614
2615        /*
2616         * Done. Extraction successful.
2617         */
2618        goto extract_out;
2619    } else {
2620        kprintf("pmap_extract: invalid tte (ttep %x tte %x)\n", ttep, tte);
2621    }
2622
2623 extract_out:
2624
2625    /*
2626     * Return.
2627     */
2628    PMAP_UNLOCK(pmap);
2629    return ppn;
2630#elif defined(_ARM_ARCH_7)
2631    uint32_t va = (virt & L2_ADDR_MASK), par;
2632    boolean_t is_priv = (pmap == kernel_pmap) ? TRUE : FALSE;
2633
2634    /*
2635     * Fast VirtToPhys involves using the virtual address trnalsation
2636     * register as present in Cortex-A and ARM11 MPCore systems.
2637     *
2638     * Privileged reads are only done on the kernel PMAP versus user
2639     * pmaps getting user read/write state.
2640     *
2641     * The entire process should take much shorter compared to the
2642     * older pmap_extract, which fully walked the page tables. You can
2643     * still use the current behaviour however, by messing with
2644     * the MASTER files.
2645     *
2646     * I swear, I need more stupid sleep.
2647     */
2648
2649    /*
2650     * Set the PAtoVA register and perform the operation.
2651     */
2652    if (is_priv)
2653        armreg_va2pa_pr_ns_write(va);
2654    else
2655        armreg_va2pa_ur_ns_write(va);
2656
2657    /*
2658     * Wait for the instruction transaction to complete.
2659     */
2660    __asm__ __volatile__("isb sy");
2661
2662    /*
2663     * See if the translation aborted, log any translation errors.
2664     */
2665    par = armreg_par_read();
2666
2667    /*
2668     * Successful translation, we're done.
2669     */
2670    if (!(par & 1)) {
2671        uint32_t pa = par & L2_ADDR_MASK;
2672        pa |= (virt & ~(L2_ADDR_MASK));
2673        return pa;
2674    } else {
2675        /*
2676         * Log translation fault.
2677         */
2678        kprintf("pmap_extract: fast extraction failed, par 0x%x\n", par);
2679    }
2680
2681    return 0;
2682#else
2683#error Unsupported subarchitecture
2684#endif
2685}
2686
2687/**
2688 * pmap_expand_ttb
2689 *
2690 * Expand and reorganize the current translation-table as to fit a new size.
2691 */
2692void pmap_expand_ttb(pmap_t map, vm_offset_t expansion_size)
2693{
2694    /*
2695     * If the requested expansion size is less than or greater, we have nothing to do.
2696     */
2697    if (expansion_size <= map->pm_l1_size)
2698        return;
2699
2700    /*
2701     * Do not expand past maximum size.
2702     */
2703    if (expansion_size > 0x4000)
2704        panic("pmap_expand_ttb: attempting to expand past maximum address of %x, map %p, expansion %x\n", 0x4000, map, expansion_size);
2705
2706    switch (expansion_size) {
2707    case 0x1000:
2708        panic("pmap_expand_ttb: attempting to expand an already-expanded pmap?");
2709    case 0x2000 ... 0x3000:{
2710            kern_return_t ret;
2711            vm_page_t pages;
2712
2713            /*
2714             * Allocate a contiguous segment of memory for the new L1 mapping table. (including one guard)
2715             */
2716            ret = cpm_allocate(expansion_size, &pages, 0, ((expansion_size / map->pm_l1_size) - 1), FALSE, KMA_LOMEM);
2717            assert(ret == KERN_SUCCESS);
2718
2719            /*
2720             * We got the new contiguous block.
2721             */
2722            bzero((void *)phys_to_virt(pages->phys_page << PAGE_SHIFT), expansion_size);
2723
2724            /*
2725             * Copy the old entries to the new area.
2726             */
2727            bcopy((void *) map->pm_l1_virt, (void *) phys_to_virt(pages->phys_page << PAGE_SHIFT), map->pm_l1_size);
2728#if 1
2729            kprintf("pmap_expand_ttb: 0x%x => 0x%x\n", map->pm_l1_virt, phys_to_virt(pages->phys_page << PAGE_SHIFT));
2730#endif
2731
2732            /*
2733             * Deallocate the old L1.
2734             */
2735            pmap_deallocate_l1(map);
2736
2737            /*
2738             * Set the new TTB base.
2739             */
2740            map->pm_l1_virt = phys_to_virt(pages->phys_page << PAGE_SHIFT);
2741            map->pm_l1_phys = pages->phys_page << PAGE_SHIFT;
2742            map->pm_l1_size = expansion_size;
2743
2744            OSAddAtomic((expansion_size >> PAGE_SHIFT), &inuse_ptepages_count);
2745            OSAddAtomic((expansion_size >> PAGE_SHIFT), &alloc_ptepages_count);
2746
2747            /*
2748             * Switch into the new TTB if it needs to be used.
2749             */
2750            if (map == current_cpu_datap()->user_pmap) {
2751                arm_context_switch(map->pm_l1_phys);
2752            }
2753
2754            return;
2755        }
2756    default:
2757        panic("pmap_expand_ttb: invalid expansion size %x\n", expansion_size);
2758    }
2759
2760    return;
2761}
2762
2763/**
2764 * pmap_expand
2765 *
2766 * Expand the address space of the current physical map.
2767 */
2768void pmap_expand(pmap_t map, vm_offset_t v)
2769{
2770    vm_offset_t *tte = (vm_offset_t *) pmap_tte(map, v);
2771    vm_page_t page = pmap_grab_page(map);
2772    spl_t spl;
2773
2774    /*
2775     * High priority. We do not want any interruptions.
2776     */
2777    PMAP_LOCK(map);
2778
2779    if (map != kernel_pmap) {
2780        /*
2781         * First, if we have a size below 0x1000, we can't be sure about expanding.
2782         */
2783        if (map->pm_l1_size < 0x1000) {
2784            panic("pmap_expand: this pmap has a really weird size: %d bytes", map->pm_l1_size);
2785        }
2786
2787        /*
2788         * See if we can make it grow.
2789         */
2790        uint32_t expansion_size = ((tte_offset(v)) & ~(PAGE_SIZE - 1)) + PAGE_SIZE;
2791        pmap_expand_ttb(map, expansion_size);
2792
2793        /*
2794         * Refetch the TTE, since the pmap base may have changed.
2795         */
2796        tte = (vm_offset_t *) pmap_tte(map, v);
2797
2798#if 0
2799        /*
2800         * Do not extend past the commpage.
2801         */
2802        if (map->pm_l1_size == 0x1000) {
2803            if (v >= _COMM_PAGE_BASE_ADDRESS) {
2804                panic("attempting to expand pmap past maximum address of %x\n", _COMM_PAGE_BASE_ADDRESS);
2805            }
2806        }
2807#endif
2808
2809        /*
2810         * L1 section mappings may not be expanded any further.
2811         */
2812        if ((*tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_SECTION)
2813            panic("cannot expand current map into L1 sections");
2814    }
2815
2816    /*
2817     * Overwrite the old L1 mapping in this region with a fresh L1 descriptor.
2818     */
2819    *tte = ((page->phys_page << PAGE_SHIFT) & L1_PTE_ADDR_MASK) | L1_TYPE_PTE;
2820
2821    /*
2822     * Flush the TLBs since we updated the page tables.
2823     */
2824    pmap_flush_tlbs(map, v, v + PAGE_SIZE);
2825    PMAP_UNLOCK(map);
2826    return;
2827}
2828
2829/**
2830 * pmap_enter_options
2831 *
2832 * Create a translation entry for a PA->VA mappings with additional options.
2833 * Called from vm_fault.
2834 */
2835kern_return_t pmap_enter_options(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, vm_prot_t fault_type, unsigned int flags, boolean_t wired, unsigned int options)
2836{
2837    spl_t spl;
2838    pt_entry_t pte;
2839    register pv_rooted_entry_t pv_h;
2840    pv_hashed_entry_t pvh_e;
2841    pv_hashed_entry_t pvh_new;
2842    pv_hashed_entry_t *hashp;
2843    int pvhash_idx;
2844    uint32_t pv_cnt;
2845    boolean_t old_pvh_locked = FALSE;
2846
2847    /*
2848     * Verify the address isn't fictitious.
2849     */
2850    assert(pa != vm_page_fictitious_addr);
2851
2852    /*
2853     * Only low addresses are supported for user pmaps.
2854     */
2855    if (va > _COMM_PAGE_BASE_ADDRESS && pmap != kernel_pmap) {
2856        kprintf("pmap_enter_options: low address 0x%08X is invalid for pmap %p\n", va, pmap);
2857        return KERN_INVALID_ARGUMENT;
2858    }
2859
2860    pvh_new = PV_HASHED_ENTRY_NULL;
2861
2862 Retry:
2863    pvh_e = PV_HASHED_ENTRY_NULL;
2864
2865    /*
2866     * Set a high priority level. We do not wany any interruptions or any unauthorized
2867     * page table modification.
2868     */
2869    PMAP_LOCK(pmap);
2870
2871    /*
2872     * Expand the pmap to include the new PTE if necessary to accomodate the new VA we're
2873     * entering in.
2874     */
2875    while ((pte = (pt_entry_t)pmap_pte(pmap, va)) == 0) {
2876        PMAP_UNLOCK(pmap);
2877        pmap_expand(pmap, va);
2878        PMAP_LOCK(pmap);
2879    }
2880
2881    /*
2882     * If the old page already has a mapping, the caller might be changing protection flags.
2883     */
2884    uint32_t old_pte = (*(uint32_t *) pte);
2885
2886    /*
2887     * If it's a managed page, lock the pv entry right now.
2888     */
2889    if((old_pte & L2_ADDR_MASK) != 0) {
2890        uint32_t pai = pa_index(old_pte & L2_ADDR_MASK);
2891        LOCK_PVH(pai);
2892        old_pvh_locked = TRUE;
2893        old_pte = (*(uint32_t *) pte);
2894        if(0 == old_pte) {
2895            UNLOCK_PVH(pai);
2896            old_pvh_locked = FALSE;
2897        }
2898    }
2899
2900    if ((old_pte & L2_ADDR_MASK) == (pa << PAGE_SHIFT)) {
2901        /*
2902         * !!! IMPLEMENT 'pmap_vm_prot_to_page_flags' !!!
2903         * XXX protection is not implemented right now, all pages are 'RWX'.
2904         */
2905
2906        uint32_t template_pte = ((pa << PAGE_SHIFT) & L2_ADDR_MASK) | L2_SMALL_PAGE;
2907        template_pte |= pmap_vm_prot_to_page_flags(pmap, prot, wired, 0);
2908
2909        if (va == _COMM_PAGE_BASE_ADDRESS)
2910            template_pte |= L2_ACCESS_USER;
2911
2912        /*
2913         * Add cacheability attributes.
2914         */
2915        template_pte |= pmap_get_cache_attributes(pa);
2916
2917        if (wired) {
2918            if (!phys_attribute_test(pa, PMAP_OSPTE_TYPE_WIRED)) {
2919                OSAddAtomic(+1, &pmap->pm_stats.wired_count);
2920                phys_attribute_set(pa, PMAP_OSPTE_TYPE_WIRED);
2921                pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
2922            } else {
2923                assert(pmap->pm_stats.wired_count >= 1);
2924                OSAddAtomic(-1, &pmap->pm_stats.wired_count);
2925                pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
2926            }
2927        }
2928
2929        *(uint32_t *) pte = template_pte;
2930
2931        /*
2932         * The work here is done, the PTE will now have new permissions. Flush the TLBs for the
2933         * specific VA and then exit.
2934         */
2935        if(old_pvh_locked) {
2936            UNLOCK_PVH(pa);
2937            old_pvh_locked = FALSE;
2938        }
2939
2940        goto enter_options_done;
2941    }
2942
2943    /*
2944     * This is a new mapping, add it to the pv_head_table if pmap is initialized. This is so
2945     * we can correctly manage our entries.
2946     */
2947    if (pmap_initialized) {
2948        ppnum_t pai;
2949
2950        /*
2951         * If the current PA isn't zero, and if it's non-existent... remove the mapping
2952         */
2953        if ((old_pte & L2_ADDR_MASK) != 0) {
2954            pai = pa_index((old_pte & L2_ADDR_MASK));
2955            pv_h = pai_to_pvh(pai);
2956
2957            *(uint32_t *) pte = 0;
2958
2959            if (!pmap_valid_page(pa))
2960                goto EnterPte;
2961
2962            /*
2963             * Set statistics and credit/debit internal pmap ledgers
2964             */
2965            {
2966                pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
2967                assert(pmap->pm_stats.resident_count >= 1);
2968                OSAddAtomic(-1, &pmap->pm_stats.resident_count);
2969            }
2970
2971            if (phys_attribute_test(pa, PMAP_OSPTE_TYPE_WIRED)) {
2972                assert(pmap->pm_stats.wired_count >= 1);
2973                OSAddAtomic(-1, &pmap->pm_stats.wired_count);
2974                phys_attribute_clear(pa, PMAP_OSPTE_TYPE_WIRED);
2975            }
2976
2977            if (pv_h->pmap == PMAP_NULL) {
2978                panic("pmap_enter_options: null pv_list\n");
2979            }
2980            pvh_e = pmap_pv_remove(pmap, va, (ppnum_t *) & pai, 0);
2981
2982            /*
2983             * Unlock the old pvh since it's gone now
2984             */
2985            if(old_pvh_locked) {
2986                UNLOCK_PVH(pai);
2987                old_pvh_locked = FALSE;
2988            }
2989        }
2990
2991        pai = pa;
2992        pv_h = pai_to_pvh(pai);
2993
2994        if (!pmap_valid_page(pa))
2995            goto EnterPte;
2996
2997#if 0
2998        /*
2999         * Check to see if it exists, if it does, then make it null. The code later
3000         * will treat a null mapping as a new one and will enter it anyway.
3001         */
3002        if ((pv_h->pv_pmap == pmap) && (pv_h->pv_address_va == va)) {
3003            pv_entry_t cur;
3004            cur = pv_h->pv_next;
3005            if (cur != (pv_entry_t) 0) {
3006                *pv_h = *cur;
3007                pv_e = cur;
3008            } else {
3009                pv_h->pv_pmap = PMAP_NULL;
3010            }
3011        }
3012#endif
3013        /*
3014         *  Step 2) Enter the mapping in the PV list for this
3015         *  physical page.
3016         */
3017        LOCK_PVH(pai);
3018
3019        /*
3020         * This is definitely a new mapping.
3021         */
3022        if (pv_h->pmap == PMAP_NULL) {
3023            pv_h->va = va;
3024            pv_h->pmap = pmap;
3025            queue_init(&pv_h->qlink);
3026            if (wired)
3027                phys_attribute_set(pa, PMAP_OSPTE_TYPE_WIRED);
3028        } else {
3029            /*
3030             *  Add new pv_hashed_entry after header.
3031             */
3032            if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
3033                pvh_e = pvh_new;
3034                pvh_new = PV_HASHED_ENTRY_NULL;
3035            } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
3036                PV_HASHED_ALLOC(pvh_e);
3037                if (PV_HASHED_ENTRY_NULL == pvh_e) {
3038                    /*
3039                     * the pv list is empty. if we are on
3040                     * the kernel pmap we'll use one of
3041                     * the special private kernel pv_e's,
3042                     * else, we need to unlock
3043                     * everything, zalloc a pv_e, and
3044                     * restart bringing in the pv_e with
3045                     * us.
3046                     */
3047                    if (kernel_pmap == pmap) {
3048                        PV_HASHED_KERN_ALLOC(pvh_e);
3049                    } else {
3050                        UNLOCK_PVH(pai);
3051                        PMAP_UNLOCK(pmap);
3052                        pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
3053                        goto Retry;
3054                    }
3055                }
3056            }
3057
3058            if (PV_HASHED_ENTRY_NULL == pvh_e)
3059                panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
3060            pvh_e->va = va;
3061            pvh_e->pmap = pmap;
3062            pvh_e->ppn = pa;
3063
3064            pv_hash_add(pvh_e, pv_h);
3065
3066            /*
3067             *  Remember that we used the pvlist entry.
3068             */
3069            pvh_e = PV_HASHED_ENTRY_NULL;
3070        }
3071#if 0
3072        kprintf("pmap_enter: pai %d pa %d (%x) va %x pv_h %p pmap %p pv_h->pmap %p pv_h->pv_address_va %x\n", pai, pa, pa << PAGE_SHIFT, va, pv_h, pmap, pv_h->pv_pmap, pv_h->pv_address_va);
3073#endif
3074    }
3075 EnterPte:
3076
3077    /*
3078     * Enter and count the mapping.
3079     */
3080    pmap->pm_stats.resident_count++;
3081    pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
3082
3083    if (wired) {
3084        pmap->pm_stats.wired_count++;
3085        pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
3086    }
3087
3088    /*
3089     * Set VM protections
3090     */
3091    uint32_t template_pte = ((pa << PAGE_SHIFT) & L2_ADDR_MASK) | L2_SMALL_PAGE;
3092    template_pte |= pmap_vm_prot_to_page_flags(pmap, prot, wired, 0);
3093
3094    /*
3095     * Hack for commpage, how is this to be done?
3096     */
3097    if (va == _COMM_PAGE_BASE_ADDRESS)
3098        template_pte |= L2_ACCESS_USER;
3099
3100    /*
3101     * Add cacheability attributes.
3102     */
3103    template_pte |= pmap_get_cache_attributes(pa);
3104
3105    *(uint32_t *) pte = template_pte;
3106
3107    /*
3108     * Unlock the pv. (if it is managed by us)
3109     */
3110    if(pmap_initialized && pmap_valid_page(pa)) {
3111        UNLOCK_PVH(pa);
3112    }
3113
3114 enter_options_done:
3115    /*
3116     * Done, now invalidate the TLB for a single page.
3117     */
3118    pmap_flush_tlbs(pmap, va, va + PAGE_SIZE);
3119
3120    if (pvh_e != PV_HASHED_ENTRY_NULL) {
3121        PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
3122    }
3123    if (pvh_new != PV_HASHED_ENTRY_NULL) {
3124        PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
3125    }
3126
3127    /*
3128     * The operation has completed successfully.
3129     */
3130    PMAP_UNLOCK(pmap);
3131
3132    return KERN_SUCCESS;
3133}
3134
3135extern vm_offset_t sdata, edata;
3136extern vm_offset_t sconstdata, econstdata;
3137extern boolean_t doconstro_override;
3138
3139/**
3140 * pmap_init
3141 *
3142 * Stage 2 initialization of the pmap subsystem.
3143 */
3144void pmap_init(void)
3145{
3146    vm_offset_t pv_root;
3147    vm_size_t s;
3148    spl_t spl;
3149    int i;
3150
3151    kprintf("pmap_init: %d physical pages in memory, kernel pmap at %p\n", (mem_size / PAGE_SIZE), kernel_pmap);
3152
3153    /*
3154     * Allocate the core PV structure. The pv_head_table contains trunk entries
3155     * for every physical page that exists in the system.
3156     */
3157    s = (mem_size / PAGE_SIZE) * sizeof(pv_entry);
3158    if (kernel_memory_allocate(kernel_map, &pv_root, s, 0, KMA_KOBJECT | KMA_PERMANENT) != KERN_SUCCESS)
3159        panic("pmap_init: failed to allocate pv table!");
3160
3161    /*
3162     * Okay. Zero out the PV head table.
3163     */
3164    pv_head_table = (pv_entry_t) pv_root;
3165    kprintf("pmap_init: pv_head_table at %p\n", pv_head_table);
3166    bzero((void *) pv_head_table, s);
3167
3168    /*
3169     * Initialize the Zones for object allocation.
3170     */
3171    pmap_zone = zinit((sizeof(struct pmap)), 400 * (sizeof(struct pmap)), 4096, "pmap_pmap");
3172
3173    /*
3174     * Expandable zone. (pv_entry zone)
3175     */
3176    pve_zone = zinit((sizeof(struct __pv_entry__)), 10000 * (sizeof(struct __pv_entry__)), 4096, "pmap_pve");
3177
3178    /*
3179     * Allocate memory for the pv_head_hash_table.
3180     */
3181    s = (vm_size_t) (sizeof(struct pv_rooted_entry) * (mem_size / PAGE_SIZE)
3182                     + (sizeof(struct pv_hashed_entry_t *) * (npvhash + 1))
3183                     + pv_lock_table_size((mem_size / PAGE_SIZE))
3184                     + pv_hash_lock_table_size((npvhash + 1))
3185                     + (mem_size / PAGE_SIZE));
3186    if (kernel_memory_allocate(kernel_map, &pv_root, s, 0, KMA_KOBJECT | KMA_PERMANENT) != KERN_SUCCESS)
3187        panic("pmap_init: failed to allocate pv hash table!");
3188
3189    /*
3190     * Initialize the core objects.
3191     */
3192    uint32_t npages = (mem_size / PAGE_SIZE);
3193    pv_head_hash_table = (pv_rooted_entry_t) pv_root;
3194    pv_root = (vm_offset_t) (pv_head_table + npages);
3195
3196    pv_hash_table = (pv_hashed_entry_t *) pv_root;
3197    pv_root = (vm_offset_t) (pv_hash_table + (npvhash + 1));
3198
3199    pv_lock_table = (char *) pv_root;
3200    pv_root = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
3201    bzero(pv_lock_table, pv_lock_table_size(npages));
3202
3203    pv_hash_lock_table = (char *) pv_root;
3204    pv_root = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash + 1)));
3205    bzero(pv_hash_lock_table, pv_hash_lock_table_size((npvhash + 1)));
3206
3207    bzero((void *) pv_head_hash_table, s);
3208    kprintf("pmap_init: pv_head_hash_table at %p\n", pv_head_hash_table);
3209
3210    /*
3211     * PVHash Zone
3212     */
3213    pv_hashed_list_zone = zinit(sizeof(struct pv_hashed_entry), 10000 * sizeof(struct pv_hashed_entry), 4096, "pv_list");   /* XXX */
3214
3215    /*
3216     * Initialize the free list lock. (unused right now.)
3217     */
3218    simple_lock_init(&kernel_pmap->lock, 0);
3219    simple_lock_init(&pv_free_list_lock, 0);
3220    simple_lock_init(&pv_hashed_free_list_lock, 0);
3221    simple_lock_init(&pv_hashed_kern_free_list_lock, 0);
3222    simple_lock_init(&pv_hash_table_lock, 0);
3223
3224    /*
3225     * Remap kernel as RO only.
3226     */
3227    uint32_t ro_kern = 1;
3228    if (PE_parse_boot_argn("kernel_read_only", &ro_kern, sizeof(ro_kern))) {
3229        ro_kern = 0;
3230    }
3231    SPLVM(spl);
3232
3233    kprintf("Kernel ASLR slide: 0x%08x, virtual base: 0x%08x\n", vm_kernel_slide, gVirtBase);
3234
3235    /*
3236     * Rewrite the kernel PTEs.
3237     */
3238    if (ro_kern) {
3239        vm_offset_t kva;
3240        pt_entry_t *ptep;
3241
3242        kprintf("Kernel text %x-%x to be write-protected\n", vm_kernel_stext, vm_kernel_etext);
3243
3244        /*
3245         * Add APX-bit to reduce protections to R-X.
3246         */
3247        for (kva = vm_kernel_stext; kva < vm_kernel_etext; kva += PAGE_SIZE) {
3248            ptep = (pt_entry_t *)pmap_pte(kernel_pmap, (vm_map_offset_t) kva);
3249            if (ptep)
3250                *ptep |= L2_ACCESS_APX;
3251        }
3252    }
3253
3254    /*
3255     * Set const to R-- only too.
3256     */
3257    boolean_t doconstro = TRUE;
3258
3259    (void) PE_parse_boot_argn("dataconstro", &doconstro, sizeof(doconstro));
3260
3261    if ((sconstdata | econstdata) & PAGE_MASK) {
3262        kprintf("Const DATA misaligned 0x%lx 0x%lx\n", sconstdata, econstdata);
3263        if ((sconstdata & PAGE_MASK) || (doconstro_override == FALSE))
3264            doconstro = FALSE;
3265    }
3266
3267    if ((sconstdata > edata) || (sconstdata < sdata)
3268        || ((econstdata - sconstdata) >= (edata - sdata))) {
3269        kprintf("Const DATA incorrect size 0x%lx 0x%lx 0x%lx 0x%lx\n", sconstdata, econstdata, sdata, edata);
3270        doconstro = FALSE;
3271    }
3272
3273    if (doconstro)
3274        kprintf("Marking const DATA read-only\n");
3275
3276    vm_offset_t dva;
3277    for (dva = sdata; dva < edata; dva += PAGE_SIZE) {
3278        pt_entry_t *pte, dpte;
3279        pte = (pt_entry_t *)pmap_pte(kernel_pmap, dva);
3280        assert(pte);
3281
3282        /*
3283         * Make sure the PTE is valid.
3284         */
3285        dpte = *pte;
3286        assert(dpte & ARM_PTE_DESCRIPTOR_4K);
3287        if (!(dpte & ARM_PTE_DESCRIPTOR_4K)) {
3288            kprintf("Missing data mapping 0x%x 0x%x 0x%x\n", dva, sdata, edata);
3289            continue;
3290        }
3291
3292        /*
3293         * Enforce NX and RO as necessary.
3294         */
3295        dpte |= L2_NX_BIT;
3296        if (doconstro && (dva >= sconstdata) && (dva < econstdata)) {
3297            dpte |= L2_ACCESS_APX;
3298        }
3299        *pte = dpte;
3300    }
3301
3302    /*
3303     * Just flush the entire TLB since we messed with quite a lot of mappings.
3304     */
3305    pmap_flush_tlbs(kernel_pmap, 0, 0xFFFFFFFF);
3306
3307    SPLX(spl);
3308
3309    /*
3310     * Set up the core VM object.
3311     */
3312    pmap_object = &pmap_object_store;
3313    _vm_object_allocate(mem_size, &pmap_object_store);
3314    kernel_pmap->pm_obj = pmap_object;
3315
3316#ifdef _ARM_ARCH_7
3317    /*
3318     * Initialize ASID subsystem properly.
3319     */
3320    pmap_asid_configure();
3321#endif
3322
3323    /*
3324     * Done initializing.
3325     */
3326    pmap_initialized = TRUE;
3327
3328    return;
3329}
3330
3331/**
3332 * pmap_remove_range
3333 *
3334 * Remove a range of hardware page-table entries. (This function does not support section mappings.)
3335 */
3336void pmap_remove_range(pmap_t pmap, vm_map_offset_t start_vaddr, pt_entry_t * spte, pt_entry_t * epte, boolean_t is_sect)
3337{
3338    pt_entry_t *cpte = spte;
3339    vm_map_offset_t vaddr = start_vaddr;
3340    vm_size_t our_page_size = (is_sect) ? (_1MB) : PAGE_SIZE;
3341    int num_removed = 0, num_unwired = 0;
3342    pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
3343    pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
3344    pv_hashed_entry_t pvh_e;
3345    int pvh_cnt = 0;
3346    int pvhash_idx;
3347    uint32_t pv_cnt;
3348
3349    /*
3350     * Make sure the Cpte/Epte are within sane boundaries. (256 entries, one L2 area size.)
3351     */
3352    if (((vm_offset_t) epte - (vm_offset_t) cpte) > L2_SIZE)
3353        panic("pmap_remove_range: attempting to remove more ptes than 256!\n");
3354
3355    for (cpte = spte, vaddr = start_vaddr; cpte < epte; cpte++, vaddr += our_page_size) {
3356        /*
3357         * Start nuking the range.
3358         */
3359        pt_entry_t *p = cpte;
3360
3361        /*
3362         * Get the index for the PV table.
3363         */
3364        ppnum_t pai = pa_index(*cpte & L2_ADDR_MASK);
3365        if (pai == 0) {
3366            continue;
3367        }
3368
3369        /*
3370         * If it isn't a managed page, don't update the pv_table.
3371         */
3372        if (!pmap_valid_page(pai))
3373            continue;
3374
3375        num_removed++;
3376        if (phys_attribute_test(pai, PMAP_OSPTE_TYPE_WIRED)) {
3377            phys_attribute_clear(pai, PMAP_OSPTE_TYPE_WIRED);
3378            num_unwired++;
3379        }
3380
3381        /*
3382         * Nuke the page table entry.
3383         */
3384        *cpte = 0;
3385
3386        /*
3387         * Continue onwards if pmap isn't up yet.. (keep nuking pages!)
3388         */
3389        if (!pmap_initialized)
3390            continue;
3391
3392        LOCK_PVH(pai);
3393        /*
3394         *  Remove the mapping from the pvlist for
3395         *  this physical page.
3396         */
3397        {
3398            pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) & pai, cpte);
3399            UNLOCK_PVH(pai);
3400            if (pvh_e != PV_HASHED_ENTRY_NULL) {
3401                pvh_e->qlink.next = (queue_entry_t) pvh_eh;
3402                pvh_eh = pvh_e;
3403
3404                if (pvh_et == PV_HASHED_ENTRY_NULL) {
3405                    pvh_et = pvh_e;
3406                }
3407                pvh_cnt++;
3408            }
3409        }                       /* removing mappings for this phy page */
3410    }
3411
3412    if (pvh_eh != PV_HASHED_ENTRY_NULL) {
3413        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
3414    }
3415
3416    /*
3417     * Invalidate all TLBs.
3418     */
3419    pmap_flush_tlbs(pmap, start_vaddr, vaddr);
3420
3421    /*
3422     * Make sure the amount removed isn't... weird.
3423     */
3424    if (pmap->pm_stats.resident_count < num_removed)
3425        panic("pmap_remove_range: resident_count");
3426    pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * PAGE_SIZE);
3427    assert(pmap->pm_stats.resident_count >= num_removed);
3428    OSAddAtomic(-num_removed, &pmap->pm_stats.resident_count);
3429
3430    if (pmap->pm_stats.wired_count < num_unwired)
3431        panic("pmap_remove_range: wired_count");
3432    assert(pmap->pm_stats.wired_count >= num_unwired);
3433    OSAddAtomic(-num_unwired, &pmap->pm_stats.wired_count);
3434    pmap_ledger_debit(pmap, task_ledgers.wired_mem, num_unwired * PAGE_SIZE);
3435
3436    return;
3437}
3438
3439/**
3440 * pmap_remove
3441 *
3442 * Remove the given range of addresses from the specified map.
3443 */
3444void pmap_remove(pmap_t map, vm_offset_t sva, vm_offset_t eva)
3445{
3446    spl_t spl;
3447    pt_entry_t *tte;
3448    vm_offset_t *spte, *epte, lva = sva;
3449
3450    /*
3451     * Verify the pages are page aligned.
3452     */
3453    assert(!(sva & PAGE_MASK));
3454    assert(!(eva & PAGE_MASK));
3455
3456    /*
3457     * High Priority. Nothing may interrupt the removal process.
3458     */
3459    PMAP_LOCK(map);
3460
3461    /*
3462     * This is broken.
3463     */
3464    while (sva < eva) {
3465        lva = (sva + _1MB) & ~((_1MB) - 1);
3466        if (lva > eva)
3467            lva = eva;
3468        tte = (pt_entry_t *)pmap_tte(map, sva);
3469        assert(tte);
3470        if (tte && ((*tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_PAGE_TABLE)) {
3471            pt_entry_t *spte_begin;
3472            spte_begin = (pt_entry_t *) (phys_to_virt(L1_PTE_ADDR(*tte)));
3473            spte = (vm_offset_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(sva));
3474            epte = (vm_offset_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(lva));
3475
3476            /*
3477             * If the addresses are more than one 1MB apart, well...
3478             */
3479            if ((sva >> L1SHIFT) != (lva >> L1SHIFT)) {
3480                int mb_off = (lva >> L1SHIFT) - (sva >> L1SHIFT);
3481                epte = (vm_offset_t *)((vm_offset_t) spte_begin + (0x400 * mb_off) + (vm_offset_t) pte_offset(lva));
3482            }
3483
3484            assert(epte >= spte);
3485
3486            /*
3487             * Make sure the range isn't bogus.
3488             */
3489            if (((vm_offset_t) epte - (vm_offset_t) spte) > L2_SIZE) {
3490                panic("pmap_remove: attempting to remove bogus PTE range");
3491            }
3492
3493            pmap_remove_range(map, sva, spte, epte, FALSE);
3494        }
3495        sva = lva;
3496    }
3497
3498    /*
3499     * Flush TLBs since we modified page table entries.
3500     */
3501    pmap_flush_tlbs(map, sva, eva);
3502
3503    /*
3504     * Return.
3505     */
3506    PMAP_UNLOCK(map);
3507    return;
3508}
3509
3510/**
3511 * pmap_create
3512 *
3513 * Create a pmap.
3514 */
3515pmap_t pmap_create(ledger_t ledger, vm_map_size_t size, __unused boolean_t is_64bit)
3516{
3517    pmap_t our_pmap;
3518    vm_page_t new_l1;
3519
3520    /*
3521     * Some necessary requisites.
3522     */
3523    if (!pmap_initialized || size || !kernel_task)
3524        return PMAP_NULL;
3525
3526    /*
3527     * Zalloc a new one.
3528     */
3529    our_pmap = (pmap_t) zalloc(pmap_zone);
3530    if (!our_pmap) {
3531        panic("pmap_create: allocating the new pmap failed");
3532    }
3533    our_pmap->pm_refcnt = 1;
3534    our_pmap->ledger = ledger;
3535    our_pmap->pm_asid = 0;
3536    pmap_common_init(our_pmap);
3537
3538#ifdef _NOTYET_
3539    pmap_asid_alloc_fast(our_pmap);
3540#endif
3541
3542    /*
3543     * Create the pmap VM object.
3544     */
3545    if (NULL == (our_pmap->pm_obj = vm_object_allocate((vm_object_size_t) (4096 * PAGE_SIZE))))
3546        panic("pmap_create: pm_obj null");
3547
3548    if (pmap_asid_ncpus)
3549        pmap_asid_initialize(our_pmap);
3550
3551    /*
3552     * Grab a new page and set the new L1 region.
3553     */
3554    new_l1 = pmap_grab_page(our_pmap);
3555    our_pmap->pm_l1_phys = new_l1->phys_page << PAGE_SHIFT;
3556    our_pmap->pm_l1_virt = phys_to_virt(new_l1->phys_page << PAGE_SHIFT);
3557    bzero((void *)phys_to_virt(new_l1->phys_page << PAGE_SHIFT), PAGE_SIZE);
3558
3559    /*
3560     * New pmaps have 4096 bytes of TTB area.
3561     */
3562    our_pmap->pm_l1_size = PAGE_SIZE;
3563
3564    /*
3565     * Done.
3566     */
3567    return our_pmap;
3568}
3569
3570/**
3571 * pmap_page_protect
3572 *
3573 * Lower the protections on a set of mappings.
3574 */
3575void pmap_page_protect(ppnum_t pn, vm_prot_t prot)
3576{
3577    boolean_t remove;
3578    spl_t spl;
3579    pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
3580    pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
3581    pv_hashed_entry_t nexth;
3582    int pvh_cnt = 0;
3583    int pvhash_idx;
3584    pv_rooted_entry_t pv_h;
3585    pv_rooted_entry_t pv_e;
3586    pv_hashed_entry_t pvh_e;
3587    register pmap_t pmap;
3588    pt_entry_t *pte;
3589
3590    /*
3591     * Verify it's not a fictitious page.
3592     */
3593    assert(pn != vm_page_fictitious_addr);
3594
3595    /*
3596     * Verify said page is managed by us.
3597     */
3598    assert(pmap_initialized);
3599    if (!pmap_valid_page(pn)) {
3600        return;
3601    }
3602
3603    /*
3604     * Determine the new protection.
3605     */
3606    switch (prot) {
3607    case VM_PROT_READ:
3608    case VM_PROT_READ | VM_PROT_EXECUTE:
3609        remove = FALSE;
3610        break;
3611    case VM_PROT_ALL:
3612        return;                 /* nothing to do */
3613    default:
3614        remove = TRUE;
3615        break;
3616    }
3617
3618    /*
3619     * Walk down the PV listings and remove the entries.
3620     */
3621    pv_h = pai_to_pvh(pn);
3622    LOCK_PVH(pn);
3623
3624    /*
3625     * Walk down PV list, changing or removing all mappings.
3626     */
3627    if (pv_h->pmap != PMAP_NULL) {
3628
3629        pv_e = pv_h;
3630        pvh_e = (pv_hashed_entry_t) pv_e;   /* cheat */
3631
3632        do {
3633            register vm_map_offset_t vaddr;
3634            pmap = pv_e->pmap;
3635
3636            vaddr = pv_e->va;
3637            pte = (pt_entry_t *)pmap_pte(pmap, vaddr);
3638
3639            if (0 == pte) {
3640                panic("pmap_page_protect(): null PTE pmap=%p pn=0x%x vaddr=0x%08x shadow=0x%08x\n", pmap, pn, vaddr, pv_e->flags);
3641            }
3642            nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);  /* if there is one */
3643
3644            /*
3645             * Remove the mapping if new protection is NONE
3646             * or if write-protecting a kernel mapping.
3647             */
3648            if (remove || pmap == kernel_pmap) {
3649                /*
3650                 * Remove the mapping, collecting any modify bits.
3651                 */
3652                *(pt_entry_t *) pte = 0;
3653                pmap_flush_tlbs(pmap, vaddr, vaddr + PAGE_SIZE);
3654                phys_attribute_clear(pn, PMAP_OSPTE_TYPE_REFERENCED | PMAP_OSPTE_TYPE_MODIFIED);
3655                if (pmap->pm_stats.resident_count < 1)
3656                    panic("pmap_page_protect: resident_count");
3657                assert(pmap->pm_stats.resident_count >= 1);
3658                OSAddAtomic(-1, (SInt32 *) & pmap->pm_stats.resident_count);
3659                pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
3660
3661                /*
3662                 * Deal with the pv_rooted_entry.
3663                 */
3664
3665                if (pv_e == pv_h) {
3666                    /*
3667                     * Fix up head later.
3668                     */
3669                    pv_h->pmap = PMAP_NULL;
3670                } else {
3671                    /*
3672                     * Delete this entry.
3673                     */
3674                    pv_hash_remove(pvh_e);
3675                    pvh_e->qlink.next = (queue_entry_t) pvh_eh;
3676                    pvh_eh = pvh_e;
3677
3678                    if (pvh_et == PV_HASHED_ENTRY_NULL)
3679                        pvh_et = pvh_e;
3680                    pvh_cnt++;
3681                }
3682            } else {
3683                /*
3684                 * Write-protect.
3685                 */
3686                *(pt_entry_t *) pte |= (L2_ACCESS_APX);
3687                pmap_flush_tlbs(pmap, vaddr, vaddr + PAGE_SIZE);
3688            }
3689
3690            pvh_e = nexth;
3691        } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
3692
3693        /*
3694         * If pv_head mapping was removed, fix it up.
3695         */
3696
3697        if (pv_h->pmap == PMAP_NULL) {
3698            pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
3699
3700            if (pvh_e != (pv_hashed_entry_t) pv_h) {
3701                pv_hash_remove(pvh_e);
3702                pv_h->pmap = pvh_e->pmap;
3703                pv_h->va = pvh_e->va;
3704                pvh_e->qlink.next = (queue_entry_t) pvh_eh;
3705                pvh_eh = pvh_e;
3706
3707                if (pvh_et == PV_HASHED_ENTRY_NULL)
3708                    pvh_et = pvh_e;
3709                pvh_cnt++;
3710            }
3711        }
3712    }
3713    if (pvh_eh != PV_HASHED_ENTRY_NULL) {
3714        PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
3715    }
3716    UNLOCK_PVH(pn);
3717}
3718
3719/**
3720 * pmap_deallocate_l1
3721 *
3722 * Deallocate the allocated L1 translation table.
3723 */
3724void pmap_deallocate_l1(pmap_t pmap)
3725{
3726    uint32_t ttb_base = pmap->pm_l1_phys;
3727    vm_page_t m;
3728
3729    /*
3730     * If the pmap is expanded past 0x1000, we must use cpm_deallocate.
3731     */
3732    if (pmap->pm_l1_size > 0x1000) {
3733        /*
3734         * xxx todo
3735         */
3736        return;
3737    }
3738
3739    /*
3740     * Lock the VM object.
3741     */
3742    vm_object_lock(pmap->pm_obj);
3743
3744    /*
3745     * Look up the page.
3746     */
3747    m = vm_page_lookup(pmap->pm_obj, (vm_object_offset_t) ((ttb_base >> PAGE_SHIFT) - (gPhysBase >> PAGE_SHIFT)));
3748    assert(m);
3749
3750    /*
3751     * Got it, now free it.
3752     */
3753    VM_PAGE_FREE(m);
3754
3755    /*
3756     * Done.
3757     */
3758    vm_object_unlock(pmap->pm_obj);
3759
3760    /*
3761     * Remove one.
3762     */
3763    OSAddAtomic(-1, &inuse_ptepages_count);
3764
3765    /*
3766     * Invalidation of the entire pmap should be done.
3767     */
3768    return;
3769}
3770
3771/**
3772 * pmap_destroy
3773 *
3774 * Destroy the current physical map.
3775 */
3776void pmap_destroy(pmap_t pmap)
3777{
3778    spl_t spl;
3779    int refcnt, i;
3780
3781    /*
3782     * Some necessary prerequisites.
3783     */
3784    assert(pmap_initialized);
3785
3786    /*
3787     * NEVER EVER EVER DESTROY THE KERNEL PMAP
3788     */
3789    if (pmap == kernel_pmap)
3790        panic("pmap_destroy: attempting to destroy kernel_pmap");
3791
3792    PMAP_LOCK(pmap);
3793
3794    /*
3795     * Okay, decrease the reference count.
3796     */
3797    refcnt = --pmap->pm_refcnt;
3798    if (refcnt == 0) {
3799        pmap_flush_tlbs(pmap, 0, 0xFFFFFFFF);
3800        if (pmap_asid_ncpus)
3801            pmap_destroy_asid_sync(pmap);
3802    }
3803
3804    /*
3805     * Unlock the pmap system.
3806     */
3807    PMAP_UNLOCK(pmap);
3808
3809    /*
3810     * If the pmap still has a reference count, we don't kill it.
3811     */
3812    if (refcnt != 0) {
3813        return;
3814    }
3815
3816    /*
3817     * Free the associated objects with the pmap first.
3818     */
3819    pmap_deallocate_l1(pmap);
3820    ledger_dereference(pmap->ledger);
3821
3822    /*
3823     * Free the 'expanded' pages.
3824     */
3825    OSAddAtomic(-pmap->pm_obj->resident_page_count, &inuse_ptepages_count);
3826    PMAP_ZINFO_PFREE(pmap, pmap->pm_obj->resident_page_count * PAGE_SIZE);
3827    vm_object_deallocate(pmap->pm_obj);
3828
3829    /*
3830     * Free the actual pmap.
3831     */
3832    zfree(pmap_zone, pmap);
3833
3834    /*
3835     * Done.
3836     */
3837    return;
3838}
3839
3840/**
3841 * pmap_protect
3842 *t
3843 * Lower the specified protections on a certain map from sva to eva using prot prot.
3844 */
3845void pmap_protect(pmap_t map, vm_map_offset_t sva, vm_map_offset_t eva, vm_prot_t prot)
3846{
3847    register pt_entry_t *tte;
3848    register pt_entry_t *spte, *epte;
3849    vm_map_offset_t lva;
3850    vm_map_offset_t orig_sva;
3851    boolean_t set_NX;
3852    int num_found = 0;
3853
3854    /*
3855     * Verify the start and end are page aligned.
3856     */
3857    assert(!(sva & PAGE_MASK));
3858    assert(!(eva & PAGE_MASK));
3859
3860    /*
3861     * Remove PTEs if they're set to VM_PROT_NONE.
3862     */
3863    if (map == PMAP_NULL)
3864        return;
3865
3866    if (prot == VM_PROT_NONE) {
3867        pmap_remove(map, sva, eva);
3868        return;
3869    }
3870
3871    /*
3872     * Enforce NX if necessary.
3873     */
3874    if ((prot & VM_PROT_EXECUTE) || !nx_enabled)
3875        set_NX = FALSE;
3876    else
3877        set_NX = TRUE;
3878
3879    /*
3880     * Lock the pmap and set the protections on the PTEs.
3881     */
3882    PMAP_LOCK(map);
3883
3884    /*
3885     * This is broken.
3886     */
3887    orig_sva = sva;
3888    while (sva < eva) {
3889        lva = (sva + _1MB) & ~((_1MB) - 1);
3890        if (lva > eva)
3891            lva = eva;
3892        tte = (pt_entry_t *)pmap_tte(map, sva);
3893        assert(tte);
3894        if (tte && ((*tte & ARM_PAGE_MASK_VALUE) == ARM_PAGE_PAGE_TABLE)) {
3895            pt_entry_t *spte_begin;
3896            spte_begin = (pt_entry_t *) (phys_to_virt(L1_PTE_ADDR(*tte)));
3897            spte = (pt_entry_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(sva));
3898            epte = (pt_entry_t *)((vm_offset_t) spte_begin + (vm_offset_t) pte_offset(lva));
3899
3900            /*
3901             * If the addresses are more than one 1MB apart, well...
3902             */
3903            if ((sva >> L1SHIFT) != (lva >> L1SHIFT)) {
3904                int mb_off = (lva >> L1SHIFT) - (sva >> L1SHIFT);
3905                epte = (pt_entry_t *)((vm_offset_t) spte_begin + (0x400 * mb_off) + (vm_offset_t) pte_offset(lva));
3906            }
3907
3908            assert(epte >= spte);
3909
3910            /*
3911             * Make sure the range isn't bogus.
3912             */
3913            if (((vm_offset_t) epte - (vm_offset_t) spte) > L2_SIZE)
3914                panic("pmap_protect: attempting to protect bogus PTE range");;
3915
3916            while (spte < epte) {
3917                if (*spte & ARM_PTE_DESCRIPTOR_4K) {
3918                    assert(*spte & ARM_PTE_DESCRIPTOR_4K);
3919
3920                    /*
3921                     * Make the PTE RO if necessary.
3922                     */
3923                    if (prot & VM_PROT_WRITE)
3924                        *spte &= ~(L2_ACCESS_APX);
3925                    else
3926                        *spte |= L2_ACCESS_APX;
3927
3928                    /*
3929                     * Enforce NX bit.
3930                     */
3931                    if (set_NX)
3932                        *spte |= L2_NX_BIT;
3933                    else
3934                        *spte &= ~(L2_NX_BIT);
3935                    num_found++;
3936                }
3937                spte++;
3938            }
3939        }
3940        sva = lva;
3941    }
3942
3943    /*
3944     * We're done with that, bye.
3945     */
3946    pmap_flush_tlbs(map, sva, eva);
3947    PMAP_UNLOCK(map);
3948
3949    return;
3950}
3951
3952/**
3953 * pmap_nest
3954 *
3955 * Nest a pmap with new mappings into a master pmap.
3956 */
3957kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size)
3958{
3959    int copied;
3960    unsigned int i;
3961    vm_offset_t *tte, *ntte;
3962    vm_map_offset_t nvaddr, vaddr;
3963
3964    /*
3965     * Anounce ourselves. We are nesting one pmap inside another.
3966     */
3967    kprintf("pmap_nest: %p[0x%08llx] => %p[0x%08llx], %d tte entries\n", subord, va_start, grand, nstart, size >> L1SHIFT);
3968
3969    /*
3970     * Sanity checks.
3971     */
3972    if (size == 0) {
3973        panic("pmap_nest: size is invalid - %016llX\n", size);
3974    }
3975
3976    if (va_start != nstart)
3977        panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
3978
3979    /*
3980     * Start the copy operations.
3981     */
3982    PMAP_LOCK(subord);
3983
3984    /*
3985     * Mark the surbodinate pmap as shared.
3986     */
3987    uint32_t num_sect = size >> L1SHIFT;
3988    subord->pm_shared = TRUE;
3989    nvaddr = (vm_map_offset_t) nstart;
3990
3991    /*
3992     * Expand the subordinate pmap to fit.
3993     */
3994    for (i = 0; i < num_sect; i++) {
3995        /*
3996         * Fetch the TTE and expand the pmap if there is not one.
3997         */
3998        ntte = (vm_offset_t *)pmap_tte(subord, nvaddr);
3999
4000        while (ntte == 0 || ((*ntte & ARM_PAGE_MASK_VALUE) != ARM_PAGE_PAGE_TABLE)) {
4001            PMAP_UNLOCK(subord);
4002            pmap_expand(subord, nvaddr);
4003            PMAP_LOCK(subord);
4004            ntte = (vm_offset_t *)pmap_tte(subord, nvaddr);
4005        }
4006
4007        /*
4008         * Increase virtual address by granularity of one TTE entry.
4009         */
4010        nvaddr += (_1MB);
4011    }
4012    PMAP_UNLOCK(subord);
4013
4014    /*
4015     * Initial expansion of the Subordinate pmap is done, copy the new entries to the
4016     * master Grand pmap.
4017     */
4018    PMAP_LOCK(grand);
4019    vaddr = (vm_map_offset_t) va_start;
4020    for (i = 0; i < num_sect; i++) {
4021        pt_entry_t target;
4022
4023        /*
4024         * Get the initial TTE from the subordinate map and verify it.
4025         */
4026        ntte = (vm_offset_t *)pmap_tte(subord, vaddr);
4027        if (ntte == 0)
4028            panic("pmap_nest: no ntte, subord %p nstart 0x%llx", subord, nstart);
4029        target = *ntte;
4030
4031        nstart += (_1MB);
4032
4033        /*
4034         * Now, get the TTE address from the Grand map.
4035         */
4036        tte = (vm_offset_t *)pmap_tte(grand, vaddr);
4037        if (tte == 0)
4038            panic("pmap_nest: no tte, grand %p vaddr 0x%x", grand, vaddr);
4039
4040        /*
4041         * Store the TTE.
4042         */
4043        *tte = target;
4044        vaddr += (_1MB);
4045    }
4046    PMAP_UNLOCK(grand);
4047
4048    /*
4049     * Out. Flush all TLBs.
4050     */
4051    pmap_flush_tlbs(grand, va_start, va_start + size);
4052
4053    return KERN_SUCCESS;
4054}
4055
4056/**
4057 * pmap_unnest
4058 *
4059 * Remove a nested pmap.
4060 */
4061kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size)
4062{
4063    vm_offset_t *tte;
4064    unsigned int i, num_sect;
4065    addr64_t vstart, vend;
4066    spl_t spl;
4067
4068    /*
4069     * Verify the sizes aren't unaligned.
4070     */
4071    if ((size & (pmap_nesting_size_min - 1)) || (vaddr & (pmap_nesting_size_min - 1))) {
4072        panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned addresses\n", grand, vaddr, size);
4073    }
4074
4075    /*
4076     * Align everything to a 1MB boundary. (TTE granularity)
4077     */
4078    vstart = vaddr & ~((_1MB) - 1);
4079    vend = (vaddr + size + (_1MB) - 1) & ~((_1MB) - 1);
4080    size = (vend - vstart);
4081
4082    /*
4083     * Lock the pmaps to prevent use.
4084     */
4085    PMAP_LOCK(grand);
4086
4087    num_sect = size >> L1SHIFT;
4088    vaddr = vstart;
4089    for (i = 0; i < num_sect; i++) {
4090        tte = (vm_offset_t *)pmap_tte(grand, (vm_map_offset_t) vaddr);
4091        if (tte == 0)
4092            panic("pmap_unnest: no tte, grand %p vaddr 0x%llx\n", grand, vaddr);
4093        *tte = 0;
4094        vaddr += (_1MB);
4095    }
4096
4097    /*
4098     * The operation has now completed.
4099     */
4100    pmap_flush_tlbs(grand, vaddr, vaddr + size);
4101
4102    PMAP_UNLOCK(grand);
4103
4104    return KERN_SUCCESS;
4105}
4106
4107/**
4108 * pmap_disconnect
4109 *
4110 * Remove a page and return the referenced bits.
4111 */
4112unsigned int pmap_disconnect(ppnum_t pa)
4113{
4114    /*
4115     * Disconnect the page.
4116     */
4117    pmap_page_protect(pa, 0);
4118    return pmap_get_refmod(pa);
4119}
4120
4121/*
4122 * kern_return_t
4123 * pmap_add_physical_memory(vm_offset_t spa, vm_offset_t epa,
4124 *                          boolean_t available, unsigned int attr)
4125 *
4126 *  THIS IS NOT SUPPORTED
4127 */
4128kern_return_t pmap_add_physical_memory(__unused vm_offset_t spa, __unused vm_offset_t epa, __unused boolean_t available, __unused unsigned int attr)
4129{
4130    panic("Forget it! You can't map no more memory, you greedy puke!\n");
4131    return KERN_SUCCESS;
4132}
4133
4134/**
4135 * pmap_zero_part_page
4136 *
4137 * Zeroes the specified (machine independent) pages.
4138 */
4139void pmap_zero_part_page(ppnum_t src, vm_offset_t src_offset, vm_offset_t len)
4140{
4141    assert(src != vm_page_fictitious_addr);
4142    assert((((src << PAGE_SHIFT) & PAGE_MASK) + src_offset + len) <= PAGE_SIZE);
4143    bzero((void *)(phys_to_virt(src << PAGE_SHIFT) + src_offset), len);
4144}
4145
4146/**
4147 * pmap_copy_part_lpage
4148 *
4149 * Copy part of a virtually addressed page
4150 * to a physically addressed page.
4151 */
4152void pmap_copy_part_lpage(vm_offset_t src, vm_offset_t dst, vm_offset_t dst_offset, vm_size_t len)
4153{
4154    panic("pmap_copy_part_lpage");
4155}
4156
4157/**
4158 * pmap_copy_part_rpage
4159 *
4160 * Copy part of a physically addressed page
4161 * to a virtually addressed page.
4162 */
4163void pmap_copy_part_rpage(vm_offset_t src, vm_offset_t src_offset, vm_offset_t dst, vm_size_t len)
4164{
4165    panic("pmap_copy_part_rpage");
4166}
4167
4168/**
4169 * pmap_copy
4170 *
4171 * Unused.
4172 */
4173void pmap_copy(pmap_t dst, pmap_t src, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr)
4174{
4175    return;
4176}
4177
4178/**
4179 * coredumpok
4180 *
4181 * Unused.
4182 */
4183boolean_t coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
4184{
4185    return TRUE;
4186}
4187
4188/*
4189 * These functions are used for bookkeeping.
4190 */
4191void pt_fake_zone_init(int zone_index)
4192{
4193    pt_fake_zone_index = zone_index;
4194}
4195
4196void pt_fake_zone_info(int *count, vm_size_t * cur_size, vm_size_t * max_size, vm_size_t * elem_size, vm_size_t * alloc_size, uint64_t * sum_size, int *collectable, int *exhaustable, int *caller_acct)
4197{
4198    *count = inuse_ptepages_count;
4199    *cur_size = PAGE_SIZE * inuse_ptepages_count;
4200    *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
4201    *elem_size = PAGE_SIZE;
4202    *alloc_size = PAGE_SIZE;
4203    *sum_size = alloc_ptepages_count * PAGE_SIZE;
4204
4205    *collectable = 1;
4206    *exhaustable = 0;
4207    *caller_acct = 1;
4208}
4209