1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 *
4 * This software was developed by Benno Rice under sponsorship from
5 * the FreeBSD Foundation.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/param.h>
29
30#include <stand.h>
31#include <bootstrap.h>
32
33#include <efi.h>
34#include <efilib.h>
35
36#include "loader_efi.h"
37
38#define	M(x)	((x) * 1024 * 1024)
39#define	G(x)	(1ULL * (x) * 1024 * 1024 * 1024)
40
41#if defined(__amd64__)
42#include <machine/cpufunc.h>
43#include <machine/specialreg.h>
44#include <machine/vmparam.h>
45
46/*
47 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
48 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
49 */
50#define CPUID_LEAF_HV_MAXLEAF		0x40000000
51#define CPUID_LEAF_HV_INTERFACE		0x40000001
52#define CPUID_LEAF_HV_FEATURES		0x40000003
53#define CPUID_LEAF_HV_LIMITS		0x40000005
54#define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
55#define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
56#define CPUID_HV_MSR_HYPERCALL		0x0020
57
58static int
59running_on_hyperv(void)
60{
61	char hv_vendor[16];
62	uint32_t regs[4];
63
64	do_cpuid(1, regs);
65	if ((regs[2] & CPUID2_HV) == 0)
66		return (0);
67
68	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
69	if (regs[0] < CPUID_LEAF_HV_LIMITS)
70		return (0);
71
72	((uint32_t *)&hv_vendor)[0] = regs[1];
73	((uint32_t *)&hv_vendor)[1] = regs[2];
74	((uint32_t *)&hv_vendor)[2] = regs[3];
75	hv_vendor[12] = '\0';
76	if (strcmp(hv_vendor, "Microsoft Hv") != 0)
77		return (0);
78
79	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
80	if (regs[0] != CPUID_HV_IFACE_HYPERV)
81		return (0);
82
83	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
84	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
85		return (0);
86	if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
87		return (0);
88
89	return (1);
90}
91
92static void
93efi_verify_staging_size(unsigned long *nr_pages)
94{
95	UINTN sz;
96	EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
97	EFI_PHYSICAL_ADDRESS start, end;
98	UINTN key, dsz;
99	UINT32 dver;
100	EFI_STATUS status;
101	int i, ndesc;
102	unsigned long available_pages = 0;
103
104	sz = 0;
105
106	for (;;) {
107		status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
108		if (!EFI_ERROR(status))
109			break;
110
111		if (status != EFI_BUFFER_TOO_SMALL) {
112			printf("Can't read memory map: %lu\n",
113			    EFI_ERROR_CODE(status));
114			goto out;
115		}
116
117		free(map);
118
119		/* Allocate 10 descriptors more than the size reported,
120		 * to allow for any fragmentation caused by calling
121		 * malloc */
122		map = malloc(sz + (10 * dsz));
123		if (map == NULL) {
124			printf("Unable to allocate memory\n");
125			goto out;
126		}
127	}
128
129	ndesc = sz / dsz;
130	for (i = 0, p = map; i < ndesc;
131	     i++, p = NextMemoryDescriptor(p, dsz)) {
132		start = p->PhysicalStart;
133		end = start + p->NumberOfPages * EFI_PAGE_SIZE;
134
135		if (KERNLOAD < start || KERNLOAD >= end)
136			continue;
137
138		available_pages = p->NumberOfPages -
139			((KERNLOAD - start) >> EFI_PAGE_SHIFT);
140		break;
141	}
142
143	if (available_pages == 0) {
144		printf("Can't find valid memory map for staging area!\n");
145		goto out;
146	}
147
148	i++;
149	p = NextMemoryDescriptor(p, dsz);
150
151	for ( ; i < ndesc;
152	     i++, p = NextMemoryDescriptor(p, dsz)) {
153		if (p->Type != EfiConventionalMemory &&
154		    p->Type != EfiLoaderData)
155			break;
156
157		if (p->PhysicalStart != end)
158			break;
159
160		end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;
161
162		available_pages += p->NumberOfPages;
163	}
164
165	if (*nr_pages > available_pages) {
166		printf("Staging area's size is reduced: %ld -> %ld!\n",
167		    *nr_pages, available_pages);
168		*nr_pages = available_pages;
169	}
170out:
171	free(map);
172}
173#endif /* __amd64__ */
174
175#if defined(__arm__)
176#define	DEFAULT_EFI_STAGING_SIZE	32
177#else
178#define	DEFAULT_EFI_STAGING_SIZE	64
179#endif
180#ifndef EFI_STAGING_SIZE
181#define	EFI_STAGING_SIZE	DEFAULT_EFI_STAGING_SIZE
182#endif
183
184#if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
185    defined(__riscv)
186#define	EFI_STAGING_2M_ALIGN	1
187#else
188#define	EFI_STAGING_2M_ALIGN	0
189#endif
190
191#if defined(__amd64__)
192#define	EFI_STAGING_SLOP	M(8)
193#else
194#define	EFI_STAGING_SLOP	0
195#endif
196
197static u_long staging_slop = EFI_STAGING_SLOP;
198
199EFI_PHYSICAL_ADDRESS	staging, staging_end, staging_base;
200bool			stage_offset_set = false;
201ssize_t			stage_offset;
202
203static void
204efi_copy_free(void)
205{
206	BS->FreePages(staging_base, (staging_end - staging_base) /
207	    EFI_PAGE_SIZE);
208	stage_offset_set = false;
209	stage_offset = 0;
210}
211
212#ifdef __amd64__
213int copy_staging = COPY_STAGING_AUTO;
214
215static int
216command_copy_staging(int argc, char *argv[])
217{
218	static const char *const mode[3] = {
219		[COPY_STAGING_ENABLE] = "enable",
220		[COPY_STAGING_DISABLE] = "disable",
221		[COPY_STAGING_AUTO] = "auto",
222	};
223	int prev, res;
224
225	res = CMD_OK;
226	if (argc > 2) {
227		res = CMD_ERROR;
228	} else if (argc == 2) {
229		prev = copy_staging;
230		if (strcmp(argv[1], "enable") == 0)
231			copy_staging = COPY_STAGING_ENABLE;
232		else if (strcmp(argv[1], "disable") == 0)
233			copy_staging = COPY_STAGING_DISABLE;
234		else if (strcmp(argv[1], "auto") == 0)
235			copy_staging = COPY_STAGING_AUTO;
236		else {
237			printf("usage: copy_staging enable|disable|auto\n");
238			res = CMD_ERROR;
239		}
240		if (res == CMD_OK && prev != copy_staging) {
241			printf("changed copy_staging, unloading kernel\n");
242			unload();
243			efi_copy_free();
244			efi_copy_init();
245		}
246	} else {
247		printf("copy staging: %s\n", mode[copy_staging]);
248	}
249	return (res);
250}
251COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
252#endif
253
254static int
255command_staging_slop(int argc, char *argv[])
256{
257	char *endp;
258	u_long new, prev;
259	int res;
260
261	res = CMD_OK;
262	if (argc > 2) {
263		res = CMD_ERROR;
264	} else if (argc == 2) {
265		new = strtoul(argv[1], &endp, 0);
266		if (*endp != '\0') {
267			printf("invalid slop value\n");
268			res = CMD_ERROR;
269		}
270		if (res == CMD_OK && staging_slop != new) {
271			printf("changed slop, unloading kernel\n");
272			unload();
273			efi_copy_free();
274			efi_copy_init();
275		}
276	} else {
277		printf("staging slop %#lx\n", staging_slop);
278	}
279	return (res);
280}
281COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
282    command_staging_slop);
283
284#if defined(__amd64__)
285/*
286 * The staging area must reside in the first 1GB or 4GB physical
287 * memory: see elf64_exec() in
288 * boot/efi/loader/arch/amd64/elf64_freebsd.c.
289 */
290static EFI_PHYSICAL_ADDRESS
291get_staging_max(void)
292{
293	EFI_PHYSICAL_ADDRESS res;
294
295	res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
296	return (res);
297}
298#define	EFI_ALLOC_METHOD	AllocateMaxAddress
299#else
300#define	EFI_ALLOC_METHOD	AllocateAnyPages
301#endif
302
303int
304efi_copy_init(void)
305{
306	EFI_STATUS	status;
307	unsigned long nr_pages;
308	vm_offset_t ess;
309
310	ess = EFI_STAGING_SIZE;
311	if (ess < DEFAULT_EFI_STAGING_SIZE)
312		ess = DEFAULT_EFI_STAGING_SIZE;
313	nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);
314
315#if defined(__amd64__)
316	/*
317	 * We'll decrease nr_pages, if it's too big. Currently we only
318	 * apply this to FreeBSD VM running on Hyper-V. Why? Please see
319	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
320	 */
321	if (running_on_hyperv())
322		efi_verify_staging_size(&nr_pages);
323
324	staging = get_staging_max();
325#endif
326	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
327	    nr_pages, &staging);
328	if (EFI_ERROR(status)) {
329		printf("failed to allocate staging area: %lu\n",
330		    EFI_ERROR_CODE(status));
331		return (status);
332	}
333	staging_base = staging;
334	staging_end = staging + nr_pages * EFI_PAGE_SIZE;
335
336#if EFI_STAGING_2M_ALIGN
337	/*
338	 * Round the kernel load address to a 2MiB value. This is needed
339	 * because the kernel builds a page table based on where it has
340	 * been loaded in physical address space. As the kernel will use
341	 * either a 1MiB or 2MiB page for this we need to make sure it
342	 * is correctly aligned for both cases.
343	 */
344	staging = roundup2(staging, M(2));
345#endif
346
347	return (0);
348}
349
350static bool
351efi_check_space(vm_offset_t end)
352{
353	EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
354	EFI_STATUS status;
355	unsigned long nr_pages;
356
357	end = roundup2(end, EFI_PAGE_SIZE);
358
359	/* There is already enough space */
360	if (end + staging_slop <= staging_end)
361		return (true);
362
363	if (!boot_services_active) {
364		if (end <= staging_end)
365			return (true);
366		panic("efi_check_space: cannot expand staging area "
367		    "after boot services were exited\n");
368	}
369
370	/*
371	 * Add slop at the end:
372	 * 1. amd64 kernel expects to do some very early allocations
373	 *    by carving out memory after kernend.  Slop guarantees
374	 *    that it does not ovewrite anything useful.
375	 * 2. It seems that initial calculation of the staging size
376	 *    could be somewhat smaller than actually copying in after
377	 *    boot services are exited.  Slop avoids calling
378	 *    BS->AllocatePages() when it cannot work.
379	 */
380	end += staging_slop;
381
382	nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
383#if defined(__amd64__)
384	/*
385	 * amd64 needs all memory to be allocated under the 1G or 4G boundary.
386	 */
387	if (end > get_staging_max())
388		goto before_staging;
389#endif
390
391	/* Try to allocate more space after the previous allocation */
392	addr = staging_end;
393	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
394	    &addr);
395	if (!EFI_ERROR(status)) {
396		staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
397		return (true);
398	}
399
400before_staging:
401	/* Try allocating space before the previous allocation */
402	if (staging < nr_pages * EFI_PAGE_SIZE)
403		goto expand;
404	addr = staging - nr_pages * EFI_PAGE_SIZE;
405#if EFI_STAGING_2M_ALIGN
406	/* See efi_copy_init for why this is needed */
407	addr = rounddown2(addr, M(2));
408#endif
409	nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
410	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
411	    &addr);
412	if (!EFI_ERROR(status)) {
413		/*
414		 * Move the old allocation and update the state so
415		 * translation still works.
416		 */
417		staging_base = addr;
418		memmove((void *)(uintptr_t)staging_base,
419		    (void *)(uintptr_t)staging, staging_end - staging);
420		stage_offset -= staging - staging_base;
421		staging = staging_base;
422		return (true);
423	}
424
425expand:
426	nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
427#if EFI_STAGING_2M_ALIGN
428	nr_pages += M(2) / EFI_PAGE_SIZE;
429#endif
430#if defined(__amd64__)
431	new_base = get_staging_max();
432#endif
433	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
434	    nr_pages, &new_base);
435	if (!EFI_ERROR(status)) {
436#if EFI_STAGING_2M_ALIGN
437		new_staging = roundup2(new_base, M(2));
438#else
439		new_staging = new_base;
440#endif
441		/*
442		 * Move the old allocation and update the state so
443		 * translation still works.
444		 */
445		memcpy((void *)(uintptr_t)new_staging,
446		    (void *)(uintptr_t)staging, staging_end - staging);
447		BS->FreePages(staging_base, (staging_end - staging_base) /
448		    EFI_PAGE_SIZE);
449		stage_offset -= staging - new_staging;
450		staging = new_staging;
451		staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
452		staging_base = new_base;
453		return (true);
454	}
455
456	printf("efi_check_space: Unable to expand staging area\n");
457	return (false);
458}
459
460void *
461efi_translate(vm_offset_t ptr)
462{
463
464	return ((void *)(ptr + stage_offset));
465}
466
467ssize_t
468efi_copyin(const void *src, vm_offset_t dest, const size_t len)
469{
470
471	if (!stage_offset_set) {
472		stage_offset = (vm_offset_t)staging - dest;
473		stage_offset_set = true;
474	}
475
476	/* XXX: Callers do not check for failure. */
477	if (!efi_check_space(dest + stage_offset + len)) {
478		errno = ENOMEM;
479		return (-1);
480	}
481	bcopy(src, (void *)(dest + stage_offset), len);
482	return (len);
483}
484
485ssize_t
486efi_copyout(const vm_offset_t src, void *dest, const size_t len)
487{
488
489	/* XXX: Callers do not check for failure. */
490	if (src + stage_offset + len > staging_end) {
491		errno = ENOMEM;
492		return (-1);
493	}
494	bcopy((void *)(src + stage_offset), dest, len);
495	return (len);
496}
497
498ssize_t
499efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
500{
501
502	if (!stage_offset_set) {
503		stage_offset = (vm_offset_t)staging - dest;
504		stage_offset_set = true;
505	}
506
507	if (!efi_check_space(dest + stage_offset + len)) {
508		errno = ENOMEM;
509		return (-1);
510	}
511	return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
512}
513
514void
515efi_copy_finish(void)
516{
517	uint64_t	*src, *dst, *last;
518
519	src = (uint64_t *)(uintptr_t)staging;
520	dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
521	last = (uint64_t *)(uintptr_t)staging_end;
522
523	while (src < last)
524		*dst++ = *src++;
525}
526
527void
528efi_copy_finish_nop(void)
529{
530}
531