vmm_mem.c revision 221828
1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/param.h> 33221828Sgrehan#include <sys/lock.h> 34221828Sgrehan#include <sys/mutex.h> 35221828Sgrehan#include <sys/linker.h> 36221828Sgrehan#include <sys/systm.h> 37221828Sgrehan#include <sys/malloc.h> 38221828Sgrehan#include <sys/kernel.h> 39221828Sgrehan 40221828Sgrehan#include <vm/vm.h> 41221828Sgrehan#include <vm/pmap.h> 42221828Sgrehan 43221828Sgrehan#include <machine/md_var.h> 44221828Sgrehan#include <machine/metadata.h> 45221828Sgrehan#include <machine/pc/bios.h> 46221828Sgrehan#include <machine/vmparam.h> 47221828Sgrehan#include <machine/pmap.h> 48221828Sgrehan 49221828Sgrehan#include "vmm_util.h" 50221828Sgrehan#include "vmm_mem.h" 51221828Sgrehan 52221828Sgrehanstatic MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory"); 53221828Sgrehan 54221828Sgrehan#define MB (1024 * 1024) 55221828Sgrehan#define GB (1024 * MB) 56221828Sgrehan 57221828Sgrehan#define VMM_MEM_MAXSEGS 64 58221828Sgrehan 59221828Sgrehan/* protected by vmm_mem_mtx */ 60221828Sgrehanstatic struct { 61221828Sgrehan vm_paddr_t base; 62221828Sgrehan vm_size_t length; 63221828Sgrehan} vmm_mem_avail[VMM_MEM_MAXSEGS]; 64221828Sgrehan 65221828Sgrehanstatic int vmm_mem_nsegs; 66221828Sgrehan 67221828Sgrehanstatic vm_paddr_t maxaddr; 68221828Sgrehan 69221828Sgrehanstatic struct mtx vmm_mem_mtx; 70221828Sgrehan 71221828Sgrehan/* 72221828Sgrehan * Steal any memory that was deliberately hidden from FreeBSD either by 73221828Sgrehan * the use of MAXMEM kernel config option or the hw.physmem loader tunable. 74221828Sgrehan */ 75221828Sgrehanstatic int 76221828Sgrehanvmm_mem_steal_memory(void) 77221828Sgrehan{ 78221828Sgrehan int nsegs; 79221828Sgrehan caddr_t kmdp; 80221828Sgrehan uint32_t smapsize; 81221828Sgrehan uint64_t base, length; 82221828Sgrehan struct bios_smap *smapbase, *smap, *smapend; 83221828Sgrehan 84221828Sgrehan /* 85221828Sgrehan * Borrowed from hammer_time() and getmemsize() in machdep.c 86221828Sgrehan */ 87221828Sgrehan kmdp = preload_search_by_type("elf kernel"); 88221828Sgrehan if (kmdp == NULL) 89221828Sgrehan kmdp = preload_search_by_type("elf64 kernel"); 90221828Sgrehan 91221828Sgrehan smapbase = (struct bios_smap *)preload_search_info(kmdp, 92221828Sgrehan MODINFO_METADATA | MODINFOMD_SMAP); 93221828Sgrehan if (smapbase == NULL) 94221828Sgrehan panic("No BIOS smap info from loader!"); 95221828Sgrehan 96221828Sgrehan smapsize = *((uint32_t *)smapbase - 1); 97221828Sgrehan smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 98221828Sgrehan 99221828Sgrehan nsegs = 0; 100221828Sgrehan for (smap = smapbase; smap < smapend; smap++) { 101221828Sgrehan /* 102221828Sgrehan * XXX 103221828Sgrehan * Assuming non-overlapping, monotonically increasing 104221828Sgrehan * memory segments. 105221828Sgrehan */ 106221828Sgrehan if (smap->type != SMAP_TYPE_MEMORY) 107221828Sgrehan continue; 108221828Sgrehan if (smap->length == 0) 109221828Sgrehan break; 110221828Sgrehan 111221828Sgrehan base = roundup(smap->base, NBPDR); 112221828Sgrehan length = rounddown(smap->length, NBPDR); 113221828Sgrehan 114221828Sgrehan /* Skip this segment if FreeBSD is using all of it. */ 115221828Sgrehan if (base + length <= ptoa(Maxmem)) 116221828Sgrehan continue; 117221828Sgrehan 118221828Sgrehan /* 119221828Sgrehan * If FreeBSD is using part of this segment then adjust 120221828Sgrehan * 'base' and 'length' accordingly. 121221828Sgrehan */ 122221828Sgrehan if (base < ptoa(Maxmem)) { 123221828Sgrehan uint64_t used; 124221828Sgrehan used = roundup(ptoa(Maxmem), NBPDR) - base; 125221828Sgrehan base += used; 126221828Sgrehan length -= used; 127221828Sgrehan } 128221828Sgrehan 129221828Sgrehan if (length == 0) 130221828Sgrehan continue; 131221828Sgrehan 132221828Sgrehan vmm_mem_avail[nsegs].base = base; 133221828Sgrehan vmm_mem_avail[nsegs].length = length; 134221828Sgrehan 135221828Sgrehan if (base + length > maxaddr) 136221828Sgrehan maxaddr = base + length; 137221828Sgrehan 138221828Sgrehan if (0 && bootverbose) { 139221828Sgrehan printf("vmm_mem_populate: index %d, base 0x%0lx, " 140221828Sgrehan "length %ld\n", 141221828Sgrehan nsegs, vmm_mem_avail[nsegs].base, 142221828Sgrehan vmm_mem_avail[nsegs].length); 143221828Sgrehan } 144221828Sgrehan 145221828Sgrehan nsegs++; 146221828Sgrehan if (nsegs >= VMM_MEM_MAXSEGS) { 147221828Sgrehan printf("vmm_mem_populate: maximum number of vmm memory " 148221828Sgrehan "segments reached!\n"); 149221828Sgrehan return (ENOSPC); 150221828Sgrehan } 151221828Sgrehan } 152221828Sgrehan 153221828Sgrehan vmm_mem_nsegs = nsegs; 154221828Sgrehan 155221828Sgrehan return (0); 156221828Sgrehan} 157221828Sgrehan 158221828Sgrehanstatic void 159221828Sgrehanvmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end) 160221828Sgrehan{ 161221828Sgrehan vm_paddr_t addr, remaining; 162221828Sgrehan int pdpi, pdi, superpage_size; 163221828Sgrehan pml4_entry_t *pml4p; 164221828Sgrehan pdp_entry_t *pdp; 165221828Sgrehan pd_entry_t *pd; 166221828Sgrehan uint64_t page_attr_bits; 167221828Sgrehan 168221828Sgrehan if (end >= NBPML4) 169221828Sgrehan panic("Cannot map memory beyond %ldGB", NBPML4 / GB); 170221828Sgrehan 171221828Sgrehan /* XXX FreeBSD 8.1 does not use 1G superpages in the direct map */ 172221828Sgrehan if (0 && vmm_supports_1G_pages()) 173221828Sgrehan superpage_size = NBPDP; 174221828Sgrehan else 175221828Sgrehan superpage_size = NBPDR; 176221828Sgrehan 177221828Sgrehan /* 178221828Sgrehan * Get the page directory pointer page that contains the direct 179221828Sgrehan * map address mappings. 180221828Sgrehan */ 181221828Sgrehan pml4p = kernel_pmap->pm_pml4; 182221828Sgrehan pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK); 183221828Sgrehan 184221828Sgrehan page_attr_bits = PG_RW | PG_V | PG_PS | PG_G; 185221828Sgrehan addr = start; 186221828Sgrehan while (addr < end) { 187221828Sgrehan remaining = end - addr; 188221828Sgrehan pdpi = addr / NBPDP; 189221828Sgrehan if (superpage_size == NBPDP && 190221828Sgrehan remaining >= NBPDP && 191221828Sgrehan addr % NBPDP == 0) { 192221828Sgrehan /* 193221828Sgrehan * If there isn't a mapping for this address then 194221828Sgrehan * create one but if there is one already make sure 195221828Sgrehan * it matches what we expect it to be. 196221828Sgrehan */ 197221828Sgrehan if (pdp[pdpi] == 0) { 198221828Sgrehan pdp[pdpi] = addr | page_attr_bits; 199221828Sgrehan if (0 && bootverbose) { 200221828Sgrehan printf("vmm_mem_populate: mapping " 201221828Sgrehan "0x%lx with 1GB page at " 202221828Sgrehan "pdpi %d\n", addr, pdpi); 203221828Sgrehan } 204221828Sgrehan } else { 205221828Sgrehan pdp_entry_t pdpe = pdp[pdpi]; 206221828Sgrehan if ((pdpe & ~PAGE_MASK) != addr || 207221828Sgrehan (pdpe & page_attr_bits) != page_attr_bits) { 208221828Sgrehan panic("An invalid mapping 0x%016lx " 209221828Sgrehan "already exists for 0x%016lx\n", 210221828Sgrehan pdpe, addr); 211221828Sgrehan } 212221828Sgrehan } 213221828Sgrehan addr += NBPDP; 214221828Sgrehan } else { 215221828Sgrehan if (remaining < NBPDR) { 216221828Sgrehan panic("vmm_mem_populate: remaining (%ld) must " 217221828Sgrehan "be greater than NBPDR (%d)\n", 218221828Sgrehan remaining, NBPDR); 219221828Sgrehan } 220221828Sgrehan if (pdp[pdpi] == 0) { 221221828Sgrehan /* 222221828Sgrehan * XXX we lose this memory forever because 223221828Sgrehan * we do not keep track of the virtual address 224221828Sgrehan * that would be required to free this page. 225221828Sgrehan */ 226221828Sgrehan pd = malloc(PAGE_SIZE, M_VMM_MEM, 227221828Sgrehan M_WAITOK | M_ZERO); 228221828Sgrehan if ((uintptr_t)pd & PAGE_MASK) { 229221828Sgrehan panic("vmm_mem_populate: page directory" 230221828Sgrehan "page not aligned on %d " 231221828Sgrehan "boundary\n", PAGE_SIZE); 232221828Sgrehan } 233221828Sgrehan pdp[pdpi] = vtophys(pd); 234221828Sgrehan pdp[pdpi] |= PG_RW | PG_V | PG_U; 235221828Sgrehan if (0 && bootverbose) { 236221828Sgrehan printf("Creating page directory " 237221828Sgrehan "at pdp index %d for 0x%016lx\n", 238221828Sgrehan pdpi, addr); 239221828Sgrehan } 240221828Sgrehan } 241221828Sgrehan pdi = (addr % NBPDP) / NBPDR; 242221828Sgrehan pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK); 243221828Sgrehan 244221828Sgrehan /* 245221828Sgrehan * Create a new mapping if one doesn't already exist 246221828Sgrehan * or validate it if it does. 247221828Sgrehan */ 248221828Sgrehan if (pd[pdi] == 0) { 249221828Sgrehan pd[pdi] = addr | page_attr_bits; 250221828Sgrehan if (0 && bootverbose) { 251221828Sgrehan printf("vmm_mem_populate: mapping " 252221828Sgrehan "0x%lx with 2MB page at " 253221828Sgrehan "pdpi %d, pdi %d\n", 254221828Sgrehan addr, pdpi, pdi); 255221828Sgrehan } 256221828Sgrehan } else { 257221828Sgrehan pd_entry_t pde = pd[pdi]; 258221828Sgrehan if ((pde & ~PAGE_MASK) != addr || 259221828Sgrehan (pde & page_attr_bits) != page_attr_bits) { 260221828Sgrehan panic("An invalid mapping 0x%016lx " 261221828Sgrehan "already exists for 0x%016lx\n", 262221828Sgrehan pde, addr); 263221828Sgrehan } 264221828Sgrehan } 265221828Sgrehan addr += NBPDR; 266221828Sgrehan } 267221828Sgrehan } 268221828Sgrehan} 269221828Sgrehan 270221828Sgrehanstatic int 271221828Sgrehanvmm_mem_populate(void) 272221828Sgrehan{ 273221828Sgrehan int seg, error; 274221828Sgrehan vm_paddr_t start, end; 275221828Sgrehan 276221828Sgrehan /* populate the vmm_mem_avail[] array */ 277221828Sgrehan error = vmm_mem_steal_memory(); 278221828Sgrehan if (error) 279221828Sgrehan return (error); 280221828Sgrehan 281221828Sgrehan /* 282221828Sgrehan * Now map the memory that was hidden from FreeBSD in 283221828Sgrehan * the direct map VA space. 284221828Sgrehan */ 285221828Sgrehan for (seg = 0; seg < vmm_mem_nsegs; seg++) { 286221828Sgrehan start = vmm_mem_avail[seg].base; 287221828Sgrehan end = start + vmm_mem_avail[seg].length; 288221828Sgrehan if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) { 289221828Sgrehan panic("start (0x%016lx) and end (0x%016lx) must be " 290221828Sgrehan "aligned on a %dMB boundary\n", 291221828Sgrehan start, end, NBPDR / MB); 292221828Sgrehan } 293221828Sgrehan vmm_mem_direct_map(start, end); 294221828Sgrehan } 295221828Sgrehan 296221828Sgrehan return (0); 297221828Sgrehan} 298221828Sgrehan 299221828Sgrehanint 300221828Sgrehanvmm_mem_init(void) 301221828Sgrehan{ 302221828Sgrehan int error; 303221828Sgrehan 304221828Sgrehan mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF); 305221828Sgrehan 306221828Sgrehan error = vmm_mem_populate(); 307221828Sgrehan if (error) 308221828Sgrehan return (error); 309221828Sgrehan 310221828Sgrehan return (0); 311221828Sgrehan} 312221828Sgrehan 313221828Sgrehanvm_paddr_t 314221828Sgrehanvmm_mem_alloc(size_t size) 315221828Sgrehan{ 316221828Sgrehan int i; 317221828Sgrehan vm_paddr_t addr; 318221828Sgrehan 319221828Sgrehan if ((size & PDRMASK) != 0) { 320221828Sgrehan panic("vmm_mem_alloc: size 0x%0lx must be " 321221828Sgrehan "aligned on a 0x%0x boundary\n", size, NBPDR); 322221828Sgrehan } 323221828Sgrehan 324221828Sgrehan addr = 0; 325221828Sgrehan 326221828Sgrehan mtx_lock(&vmm_mem_mtx); 327221828Sgrehan for (i = 0; i < vmm_mem_nsegs; i++) { 328221828Sgrehan if (vmm_mem_avail[i].length >= size) { 329221828Sgrehan addr = vmm_mem_avail[i].base; 330221828Sgrehan vmm_mem_avail[i].base += size; 331221828Sgrehan vmm_mem_avail[i].length -= size; 332221828Sgrehan /* remove a zero length segment */ 333221828Sgrehan if (vmm_mem_avail[i].length == 0) { 334221828Sgrehan memmove(&vmm_mem_avail[i], 335221828Sgrehan &vmm_mem_avail[i + 1], 336221828Sgrehan (vmm_mem_nsegs - (i + 1)) * 337221828Sgrehan sizeof(vmm_mem_avail[0])); 338221828Sgrehan vmm_mem_nsegs--; 339221828Sgrehan } 340221828Sgrehan break; 341221828Sgrehan } 342221828Sgrehan } 343221828Sgrehan mtx_unlock(&vmm_mem_mtx); 344221828Sgrehan 345221828Sgrehan return (addr); 346221828Sgrehan} 347221828Sgrehan 348221828Sgrehanvoid 349221828Sgrehanvmm_mem_free(vm_paddr_t base, size_t length) 350221828Sgrehan{ 351221828Sgrehan int i; 352221828Sgrehan 353221828Sgrehan if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) { 354221828Sgrehan panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be " 355221828Sgrehan "aligned on a 0x%0x boundary\n", base, length, NBPDR); 356221828Sgrehan } 357221828Sgrehan 358221828Sgrehan mtx_lock(&vmm_mem_mtx); 359221828Sgrehan 360221828Sgrehan for (i = 0; i < vmm_mem_nsegs; i++) { 361221828Sgrehan if (vmm_mem_avail[i].base > base) 362221828Sgrehan break; 363221828Sgrehan } 364221828Sgrehan 365221828Sgrehan if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS) 366221828Sgrehan panic("vmm_mem_free: cannot free any more segments"); 367221828Sgrehan 368221828Sgrehan /* Create a new segment at index 'i' */ 369221828Sgrehan memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i], 370221828Sgrehan (vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0])); 371221828Sgrehan 372221828Sgrehan vmm_mem_avail[i].base = base; 373221828Sgrehan vmm_mem_avail[i].length = length; 374221828Sgrehan 375221828Sgrehan vmm_mem_nsegs++; 376221828Sgrehan 377221828Sgrehancoalesce_some_more: 378221828Sgrehan for (i = 0; i < vmm_mem_nsegs - 1; i++) { 379221828Sgrehan if (vmm_mem_avail[i].base + vmm_mem_avail[i].length == 380221828Sgrehan vmm_mem_avail[i + 1].base) { 381221828Sgrehan vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length; 382221828Sgrehan memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2], 383221828Sgrehan (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0])); 384221828Sgrehan vmm_mem_nsegs--; 385221828Sgrehan goto coalesce_some_more; 386221828Sgrehan } 387221828Sgrehan } 388221828Sgrehan 389221828Sgrehan mtx_unlock(&vmm_mem_mtx); 390221828Sgrehan} 391221828Sgrehan 392221828Sgrehanvm_paddr_t 393221828Sgrehanvmm_mem_maxaddr(void) 394221828Sgrehan{ 395221828Sgrehan 396221828Sgrehan return (maxaddr); 397221828Sgrehan} 398221828Sgrehan 399221828Sgrehanvoid 400221828Sgrehanvmm_mem_dump(void) 401221828Sgrehan{ 402221828Sgrehan int i; 403221828Sgrehan vm_paddr_t base; 404221828Sgrehan vm_size_t length; 405221828Sgrehan 406221828Sgrehan mtx_lock(&vmm_mem_mtx); 407221828Sgrehan for (i = 0; i < vmm_mem_nsegs; i++) { 408221828Sgrehan base = vmm_mem_avail[i].base; 409221828Sgrehan length = vmm_mem_avail[i].length; 410221828Sgrehan printf("%-4d0x%016lx 0x%016lx\n", i, base, base + length); 411221828Sgrehan } 412221828Sgrehan mtx_unlock(&vmm_mem_mtx); 413221828Sgrehan} 414