vmm_mem.c revision 221828
1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/lock.h>
34221828Sgrehan#include <sys/mutex.h>
35221828Sgrehan#include <sys/linker.h>
36221828Sgrehan#include <sys/systm.h>
37221828Sgrehan#include <sys/malloc.h>
38221828Sgrehan#include <sys/kernel.h>
39221828Sgrehan
40221828Sgrehan#include <vm/vm.h>
41221828Sgrehan#include <vm/pmap.h>
42221828Sgrehan
43221828Sgrehan#include <machine/md_var.h>
44221828Sgrehan#include <machine/metadata.h>
45221828Sgrehan#include <machine/pc/bios.h>
46221828Sgrehan#include <machine/vmparam.h>
47221828Sgrehan#include <machine/pmap.h>
48221828Sgrehan
49221828Sgrehan#include "vmm_util.h"
50221828Sgrehan#include "vmm_mem.h"
51221828Sgrehan
52221828Sgrehanstatic MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory");
53221828Sgrehan
54221828Sgrehan#define	MB		(1024 * 1024)
55221828Sgrehan#define	GB		(1024 * MB)
56221828Sgrehan
57221828Sgrehan#define	VMM_MEM_MAXSEGS	64
58221828Sgrehan
59221828Sgrehan/* protected by vmm_mem_mtx */
60221828Sgrehanstatic struct {
61221828Sgrehan	vm_paddr_t	base;
62221828Sgrehan	vm_size_t	length;
63221828Sgrehan} vmm_mem_avail[VMM_MEM_MAXSEGS];
64221828Sgrehan
65221828Sgrehanstatic int vmm_mem_nsegs;
66221828Sgrehan
67221828Sgrehanstatic vm_paddr_t maxaddr;
68221828Sgrehan
69221828Sgrehanstatic struct mtx vmm_mem_mtx;
70221828Sgrehan
71221828Sgrehan/*
72221828Sgrehan * Steal any memory that was deliberately hidden from FreeBSD either by
73221828Sgrehan * the use of MAXMEM kernel config option or the hw.physmem loader tunable.
74221828Sgrehan */
75221828Sgrehanstatic int
76221828Sgrehanvmm_mem_steal_memory(void)
77221828Sgrehan{
78221828Sgrehan	int nsegs;
79221828Sgrehan	caddr_t kmdp;
80221828Sgrehan	uint32_t smapsize;
81221828Sgrehan	uint64_t base, length;
82221828Sgrehan	struct bios_smap *smapbase, *smap, *smapend;
83221828Sgrehan
84221828Sgrehan	/*
85221828Sgrehan	 * Borrowed from hammer_time() and getmemsize() in machdep.c
86221828Sgrehan	 */
87221828Sgrehan	kmdp = preload_search_by_type("elf kernel");
88221828Sgrehan	if (kmdp == NULL)
89221828Sgrehan		kmdp = preload_search_by_type("elf64 kernel");
90221828Sgrehan
91221828Sgrehan	smapbase = (struct bios_smap *)preload_search_info(kmdp,
92221828Sgrehan		MODINFO_METADATA | MODINFOMD_SMAP);
93221828Sgrehan	if (smapbase == NULL)
94221828Sgrehan		panic("No BIOS smap info from loader!");
95221828Sgrehan
96221828Sgrehan	smapsize = *((uint32_t *)smapbase - 1);
97221828Sgrehan	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
98221828Sgrehan
99221828Sgrehan	nsegs = 0;
100221828Sgrehan	for (smap = smapbase; smap < smapend; smap++) {
101221828Sgrehan		/*
102221828Sgrehan		 * XXX
103221828Sgrehan		 * Assuming non-overlapping, monotonically increasing
104221828Sgrehan		 * memory segments.
105221828Sgrehan		 */
106221828Sgrehan		if (smap->type != SMAP_TYPE_MEMORY)
107221828Sgrehan			continue;
108221828Sgrehan		if (smap->length == 0)
109221828Sgrehan			break;
110221828Sgrehan
111221828Sgrehan		base = roundup(smap->base, NBPDR);
112221828Sgrehan		length = rounddown(smap->length, NBPDR);
113221828Sgrehan
114221828Sgrehan		/* Skip this segment if FreeBSD is using all of it. */
115221828Sgrehan		if (base + length <= ptoa(Maxmem))
116221828Sgrehan			continue;
117221828Sgrehan
118221828Sgrehan		/*
119221828Sgrehan		 * If FreeBSD is using part of this segment then adjust
120221828Sgrehan		 * 'base' and 'length' accordingly.
121221828Sgrehan		 */
122221828Sgrehan		if (base < ptoa(Maxmem)) {
123221828Sgrehan			uint64_t used;
124221828Sgrehan			used = roundup(ptoa(Maxmem), NBPDR) - base;
125221828Sgrehan			base += used;
126221828Sgrehan			length -= used;
127221828Sgrehan		}
128221828Sgrehan
129221828Sgrehan		if (length == 0)
130221828Sgrehan			continue;
131221828Sgrehan
132221828Sgrehan		vmm_mem_avail[nsegs].base = base;
133221828Sgrehan		vmm_mem_avail[nsegs].length = length;
134221828Sgrehan
135221828Sgrehan		if (base + length > maxaddr)
136221828Sgrehan			maxaddr = base + length;
137221828Sgrehan
138221828Sgrehan		if (0 && bootverbose) {
139221828Sgrehan			printf("vmm_mem_populate: index %d, base 0x%0lx, "
140221828Sgrehan			       "length %ld\n",
141221828Sgrehan			       nsegs, vmm_mem_avail[nsegs].base,
142221828Sgrehan			       vmm_mem_avail[nsegs].length);
143221828Sgrehan		}
144221828Sgrehan
145221828Sgrehan		nsegs++;
146221828Sgrehan		if (nsegs >= VMM_MEM_MAXSEGS) {
147221828Sgrehan			printf("vmm_mem_populate: maximum number of vmm memory "
148221828Sgrehan			       "segments reached!\n");
149221828Sgrehan			return (ENOSPC);
150221828Sgrehan		}
151221828Sgrehan	}
152221828Sgrehan
153221828Sgrehan	vmm_mem_nsegs = nsegs;
154221828Sgrehan
155221828Sgrehan	return (0);
156221828Sgrehan}
157221828Sgrehan
158221828Sgrehanstatic void
159221828Sgrehanvmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end)
160221828Sgrehan{
161221828Sgrehan	vm_paddr_t addr, remaining;
162221828Sgrehan	int pdpi, pdi, superpage_size;
163221828Sgrehan	pml4_entry_t *pml4p;
164221828Sgrehan	pdp_entry_t *pdp;
165221828Sgrehan	pd_entry_t *pd;
166221828Sgrehan	uint64_t page_attr_bits;
167221828Sgrehan
168221828Sgrehan	if (end >= NBPML4)
169221828Sgrehan		panic("Cannot map memory beyond %ldGB", NBPML4 / GB);
170221828Sgrehan
171221828Sgrehan	/* XXX FreeBSD 8.1 does not use 1G superpages in the direct map */
172221828Sgrehan	if (0 && vmm_supports_1G_pages())
173221828Sgrehan		superpage_size = NBPDP;
174221828Sgrehan	else
175221828Sgrehan		superpage_size = NBPDR;
176221828Sgrehan
177221828Sgrehan	/*
178221828Sgrehan	 * Get the page directory pointer page that contains the direct
179221828Sgrehan	 * map address mappings.
180221828Sgrehan	 */
181221828Sgrehan	pml4p = kernel_pmap->pm_pml4;
182221828Sgrehan	pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK);
183221828Sgrehan
184221828Sgrehan	page_attr_bits = PG_RW | PG_V | PG_PS | PG_G;
185221828Sgrehan	addr = start;
186221828Sgrehan	while (addr < end) {
187221828Sgrehan		remaining = end - addr;
188221828Sgrehan		pdpi = addr / NBPDP;
189221828Sgrehan		if (superpage_size == NBPDP &&
190221828Sgrehan		    remaining >= NBPDP &&
191221828Sgrehan		    addr % NBPDP == 0) {
192221828Sgrehan			/*
193221828Sgrehan			 * If there isn't a mapping for this address then
194221828Sgrehan			 * create one but if there is one already make sure
195221828Sgrehan			 * it matches what we expect it to be.
196221828Sgrehan			 */
197221828Sgrehan			if (pdp[pdpi] == 0) {
198221828Sgrehan				pdp[pdpi] = addr | page_attr_bits;
199221828Sgrehan				if (0 && bootverbose) {
200221828Sgrehan					printf("vmm_mem_populate: mapping "
201221828Sgrehan					       "0x%lx with 1GB page at "
202221828Sgrehan					       "pdpi %d\n", addr, pdpi);
203221828Sgrehan				}
204221828Sgrehan			} else {
205221828Sgrehan				pdp_entry_t pdpe = pdp[pdpi];
206221828Sgrehan				if ((pdpe & ~PAGE_MASK) != addr ||
207221828Sgrehan				    (pdpe & page_attr_bits) != page_attr_bits) {
208221828Sgrehan					panic("An invalid mapping 0x%016lx "
209221828Sgrehan					      "already exists for 0x%016lx\n",
210221828Sgrehan					      pdpe, addr);
211221828Sgrehan				}
212221828Sgrehan			}
213221828Sgrehan			addr += NBPDP;
214221828Sgrehan		} else {
215221828Sgrehan			if (remaining < NBPDR) {
216221828Sgrehan				panic("vmm_mem_populate: remaining (%ld) must "
217221828Sgrehan				      "be greater than NBPDR (%d)\n",
218221828Sgrehan				      remaining, NBPDR);
219221828Sgrehan			}
220221828Sgrehan			if (pdp[pdpi] == 0) {
221221828Sgrehan				/*
222221828Sgrehan				 * XXX we lose this memory forever because
223221828Sgrehan				 * we do not keep track of the virtual address
224221828Sgrehan				 * that would be required to free this page.
225221828Sgrehan				 */
226221828Sgrehan				pd = malloc(PAGE_SIZE, M_VMM_MEM,
227221828Sgrehan					    M_WAITOK | M_ZERO);
228221828Sgrehan				if ((uintptr_t)pd & PAGE_MASK) {
229221828Sgrehan					panic("vmm_mem_populate: page directory"
230221828Sgrehan					      "page not aligned on %d "
231221828Sgrehan					      "boundary\n", PAGE_SIZE);
232221828Sgrehan				}
233221828Sgrehan				pdp[pdpi] = vtophys(pd);
234221828Sgrehan				pdp[pdpi] |= PG_RW | PG_V | PG_U;
235221828Sgrehan				if (0 && bootverbose) {
236221828Sgrehan					printf("Creating page directory "
237221828Sgrehan					       "at pdp index %d for 0x%016lx\n",
238221828Sgrehan					       pdpi, addr);
239221828Sgrehan				}
240221828Sgrehan			}
241221828Sgrehan			pdi = (addr % NBPDP) / NBPDR;
242221828Sgrehan			pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK);
243221828Sgrehan
244221828Sgrehan			/*
245221828Sgrehan			 * Create a new mapping if one doesn't already exist
246221828Sgrehan			 * or validate it if it does.
247221828Sgrehan			 */
248221828Sgrehan			if (pd[pdi] == 0) {
249221828Sgrehan				pd[pdi] = addr | page_attr_bits;
250221828Sgrehan				if (0 && bootverbose) {
251221828Sgrehan					printf("vmm_mem_populate: mapping "
252221828Sgrehan					       "0x%lx with 2MB page at "
253221828Sgrehan					       "pdpi %d, pdi %d\n",
254221828Sgrehan					       addr, pdpi, pdi);
255221828Sgrehan				}
256221828Sgrehan			} else {
257221828Sgrehan				pd_entry_t pde = pd[pdi];
258221828Sgrehan				if ((pde & ~PAGE_MASK) != addr ||
259221828Sgrehan				    (pde & page_attr_bits) != page_attr_bits) {
260221828Sgrehan					panic("An invalid mapping 0x%016lx "
261221828Sgrehan					      "already exists for 0x%016lx\n",
262221828Sgrehan					      pde, addr);
263221828Sgrehan				}
264221828Sgrehan			}
265221828Sgrehan			addr += NBPDR;
266221828Sgrehan		}
267221828Sgrehan	}
268221828Sgrehan}
269221828Sgrehan
270221828Sgrehanstatic int
271221828Sgrehanvmm_mem_populate(void)
272221828Sgrehan{
273221828Sgrehan	int seg, error;
274221828Sgrehan	vm_paddr_t start, end;
275221828Sgrehan
276221828Sgrehan	/* populate the vmm_mem_avail[] array */
277221828Sgrehan	error = vmm_mem_steal_memory();
278221828Sgrehan	if (error)
279221828Sgrehan		return (error);
280221828Sgrehan
281221828Sgrehan	/*
282221828Sgrehan	 * Now map the memory that was hidden from FreeBSD in
283221828Sgrehan	 * the direct map VA space.
284221828Sgrehan	 */
285221828Sgrehan	for (seg = 0; seg < vmm_mem_nsegs; seg++) {
286221828Sgrehan		start = vmm_mem_avail[seg].base;
287221828Sgrehan		end = start + vmm_mem_avail[seg].length;
288221828Sgrehan		if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) {
289221828Sgrehan			panic("start (0x%016lx) and end (0x%016lx) must be "
290221828Sgrehan			      "aligned on a %dMB boundary\n",
291221828Sgrehan			      start, end, NBPDR / MB);
292221828Sgrehan		}
293221828Sgrehan		vmm_mem_direct_map(start, end);
294221828Sgrehan	}
295221828Sgrehan
296221828Sgrehan	return (0);
297221828Sgrehan}
298221828Sgrehan
299221828Sgrehanint
300221828Sgrehanvmm_mem_init(void)
301221828Sgrehan{
302221828Sgrehan	int error;
303221828Sgrehan
304221828Sgrehan	mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF);
305221828Sgrehan
306221828Sgrehan	error = vmm_mem_populate();
307221828Sgrehan	if (error)
308221828Sgrehan		return (error);
309221828Sgrehan
310221828Sgrehan	return (0);
311221828Sgrehan}
312221828Sgrehan
313221828Sgrehanvm_paddr_t
314221828Sgrehanvmm_mem_alloc(size_t size)
315221828Sgrehan{
316221828Sgrehan	int i;
317221828Sgrehan	vm_paddr_t addr;
318221828Sgrehan
319221828Sgrehan	if ((size & PDRMASK) != 0) {
320221828Sgrehan		panic("vmm_mem_alloc: size 0x%0lx must be "
321221828Sgrehan		      "aligned on a 0x%0x boundary\n", size, NBPDR);
322221828Sgrehan	}
323221828Sgrehan
324221828Sgrehan	addr = 0;
325221828Sgrehan
326221828Sgrehan	mtx_lock(&vmm_mem_mtx);
327221828Sgrehan	for (i = 0; i < vmm_mem_nsegs; i++) {
328221828Sgrehan		if (vmm_mem_avail[i].length >= size) {
329221828Sgrehan			addr = vmm_mem_avail[i].base;
330221828Sgrehan			vmm_mem_avail[i].base += size;
331221828Sgrehan			vmm_mem_avail[i].length -= size;
332221828Sgrehan			/* remove a zero length segment */
333221828Sgrehan			if (vmm_mem_avail[i].length == 0) {
334221828Sgrehan				memmove(&vmm_mem_avail[i],
335221828Sgrehan					&vmm_mem_avail[i + 1],
336221828Sgrehan					(vmm_mem_nsegs - (i + 1)) *
337221828Sgrehan					 sizeof(vmm_mem_avail[0]));
338221828Sgrehan				vmm_mem_nsegs--;
339221828Sgrehan			}
340221828Sgrehan			break;
341221828Sgrehan		}
342221828Sgrehan	}
343221828Sgrehan	mtx_unlock(&vmm_mem_mtx);
344221828Sgrehan
345221828Sgrehan	return (addr);
346221828Sgrehan}
347221828Sgrehan
348221828Sgrehanvoid
349221828Sgrehanvmm_mem_free(vm_paddr_t base, size_t length)
350221828Sgrehan{
351221828Sgrehan	int i;
352221828Sgrehan
353221828Sgrehan	if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) {
354221828Sgrehan		panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be "
355221828Sgrehan		      "aligned on a 0x%0x boundary\n", base, length, NBPDR);
356221828Sgrehan	}
357221828Sgrehan
358221828Sgrehan	mtx_lock(&vmm_mem_mtx);
359221828Sgrehan
360221828Sgrehan	for (i = 0; i < vmm_mem_nsegs; i++) {
361221828Sgrehan		if (vmm_mem_avail[i].base > base)
362221828Sgrehan			break;
363221828Sgrehan	}
364221828Sgrehan
365221828Sgrehan	if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS)
366221828Sgrehan		panic("vmm_mem_free: cannot free any more segments");
367221828Sgrehan
368221828Sgrehan	/* Create a new segment at index 'i' */
369221828Sgrehan	memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i],
370221828Sgrehan		(vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0]));
371221828Sgrehan
372221828Sgrehan	vmm_mem_avail[i].base = base;
373221828Sgrehan	vmm_mem_avail[i].length = length;
374221828Sgrehan
375221828Sgrehan	vmm_mem_nsegs++;
376221828Sgrehan
377221828Sgrehancoalesce_some_more:
378221828Sgrehan	for (i = 0; i < vmm_mem_nsegs - 1; i++) {
379221828Sgrehan		if (vmm_mem_avail[i].base + vmm_mem_avail[i].length ==
380221828Sgrehan		    vmm_mem_avail[i + 1].base) {
381221828Sgrehan			vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length;
382221828Sgrehan			memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2],
383221828Sgrehan			  (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0]));
384221828Sgrehan			vmm_mem_nsegs--;
385221828Sgrehan			goto coalesce_some_more;
386221828Sgrehan		}
387221828Sgrehan	}
388221828Sgrehan
389221828Sgrehan	mtx_unlock(&vmm_mem_mtx);
390221828Sgrehan}
391221828Sgrehan
392221828Sgrehanvm_paddr_t
393221828Sgrehanvmm_mem_maxaddr(void)
394221828Sgrehan{
395221828Sgrehan
396221828Sgrehan	return (maxaddr);
397221828Sgrehan}
398221828Sgrehan
399221828Sgrehanvoid
400221828Sgrehanvmm_mem_dump(void)
401221828Sgrehan{
402221828Sgrehan	int i;
403221828Sgrehan	vm_paddr_t base;
404221828Sgrehan	vm_size_t length;
405221828Sgrehan
406221828Sgrehan	mtx_lock(&vmm_mem_mtx);
407221828Sgrehan	for (i = 0; i < vmm_mem_nsegs; i++) {
408221828Sgrehan		base = vmm_mem_avail[i].base;
409221828Sgrehan		length = vmm_mem_avail[i].length;
410221828Sgrehan		printf("%-4d0x%016lx    0x%016lx\n", i, base, base + length);
411221828Sgrehan	}
412221828Sgrehan	mtx_unlock(&vmm_mem_mtx);
413221828Sgrehan}
414