1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2021 Beckhoff Automation GmbH & Co. KG
5 * Author: Corvin K��hne <c.koehne@beckhoff.com>
6 */
7
8#include <sys/types.h>
9#include <sys/queue.h>
10
11#include <machine/vmm.h>
12
13#include <assert.h>
14#include <err.h>
15#include <errno.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19
20#include "debug.h"
21#include "e820.h"
22#include "qemu_fwcfg.h"
23
24/*
25 * E820 always uses 64 bit entries. Emulation code will use vm_paddr_t since it
26 * works on physical addresses. If vm_paddr_t is larger than uint64_t E820 can't
27 * hold all possible physical addresses and we can get into trouble.
28 */
29static_assert(sizeof(vm_paddr_t) <= sizeof(uint64_t),
30    "Unable to represent physical memory by E820 table");
31
32#define E820_FWCFG_FILE_NAME "etc/e820"
33
34#define KB (1024UL)
35#define MB (1024 * KB)
36#define GB (1024 * MB)
37
38/*
39 * Fix E820 memory holes:
40 * [    A0000,    C0000) VGA
41 * [    C0000,   100000) ROM
42 */
43#define E820_VGA_MEM_BASE 0xA0000
44#define E820_VGA_MEM_END 0xC0000
45#define E820_ROM_MEM_BASE 0xC0000
46#define E820_ROM_MEM_END 0x100000
47
48struct e820_element {
49	TAILQ_ENTRY(e820_element) chain;
50	uint64_t base;
51	uint64_t end;
52	enum e820_memory_type type;
53};
54static TAILQ_HEAD(e820_table, e820_element) e820_table = TAILQ_HEAD_INITIALIZER(
55    e820_table);
56
57static struct e820_element *
58e820_element_alloc(uint64_t base, uint64_t end, enum e820_memory_type type)
59{
60	struct e820_element *element;
61
62	element = calloc(1, sizeof(*element));
63	if (element == NULL) {
64		return (NULL);
65	}
66
67	element->base = base;
68	element->end = end;
69	element->type = type;
70
71	return (element);
72}
73
74static const char *
75e820_get_type_name(const enum e820_memory_type type)
76{
77	switch (type) {
78	case E820_TYPE_MEMORY:
79		return ("RAM");
80	case E820_TYPE_RESERVED:
81		return ("Reserved");
82	case E820_TYPE_ACPI:
83		return ("ACPI");
84	case E820_TYPE_NVS:
85		return ("NVS");
86	default:
87		return ("Unknown");
88	}
89}
90
91void
92e820_dump_table(void)
93{
94	struct e820_element *element;
95	uint64_t i;
96
97	EPRINTLN("E820 map:");
98
99	i = 0;
100	TAILQ_FOREACH(element, &e820_table, chain) {
101		EPRINTLN("  (%4lu) [%16lx, %16lx] %s", i,
102		    element->base, element->end,
103		    e820_get_type_name(element->type));
104
105		++i;
106	}
107}
108
109static struct qemu_fwcfg_item *
110e820_get_fwcfg_item(void)
111{
112	struct qemu_fwcfg_item *fwcfg_item;
113	struct e820_element *element;
114	struct e820_entry *entries;
115	int count, i;
116
117	count = 0;
118	TAILQ_FOREACH(element, &e820_table, chain) {
119		++count;
120	}
121	if (count == 0) {
122		warnx("%s: E820 table empty", __func__);
123		return (NULL);
124	}
125
126	fwcfg_item = calloc(1, sizeof(struct qemu_fwcfg_item));
127	if (fwcfg_item == NULL) {
128		return (NULL);
129	}
130
131	fwcfg_item->size = count * sizeof(struct e820_entry);
132	fwcfg_item->data = calloc(count, sizeof(struct e820_entry));
133	if (fwcfg_item->data == NULL) {
134		free(fwcfg_item);
135		return (NULL);
136	}
137
138	i = 0;
139	entries = (struct e820_entry *)fwcfg_item->data;
140	TAILQ_FOREACH(element, &e820_table, chain) {
141		struct e820_entry *entry = &entries[i];
142
143		entry->base = element->base;
144		entry->length = element->end - element->base;
145		entry->type = element->type;
146
147		++i;
148	}
149
150	return (fwcfg_item);
151}
152
153static int
154e820_add_entry(const uint64_t base, const uint64_t end,
155    const enum e820_memory_type type)
156{
157	struct e820_element *new_element;
158	struct e820_element *element;
159	struct e820_element *ram_element;
160
161	assert(end >= base);
162
163	new_element = e820_element_alloc(base, end, type);
164	if (new_element == NULL) {
165		return (ENOMEM);
166	}
167
168	/*
169	 * E820 table should always be sorted in ascending order. Therefore,
170	 * search for a range whose end is larger than the base parameter.
171	 */
172	TAILQ_FOREACH(element, &e820_table, chain) {
173		if (element->end > base) {
174			break;
175		}
176	}
177
178	/*
179	 * System memory requires special handling.
180	 */
181	if (type == E820_TYPE_MEMORY) {
182		/*
183		 * base is larger than of any existing element. Add new system
184		 * memory at the end of the table.
185		 */
186		if (element == NULL) {
187			TAILQ_INSERT_TAIL(&e820_table, new_element, chain);
188			return (0);
189		}
190
191		/*
192		 * System memory shouldn't overlap with any existing element.
193		 */
194		assert(end >= element->base);
195
196		TAILQ_INSERT_BEFORE(element, new_element, chain);
197
198		return (0);
199	}
200
201	/*
202	 * If some one tries to allocate a specific address, it could happen, that
203	 * this address is not allocatable. Therefore, do some checks. If the
204	 * address is not allocatable, don't panic. The user may have a fallback and
205	 * tries to allocate another address. This is true for the GVT-d emulation
206	 * which tries to reuse the host address of the graphics stolen memory and
207	 * falls back to allocating the highest address below 4 GB.
208	 */
209	if (element == NULL || element->type != E820_TYPE_MEMORY ||
210	    (base < element->base || end > element->end))
211		return (ENOMEM);
212
213	if (base == element->base) {
214		/*
215		 * New element at system memory base boundary. Add new
216		 * element before current and adjust the base of the old
217		 * element.
218		 *
219		 * Old table:
220		 * 	[ 0x1000, 0x4000] RAM		<-- element
221		 * New table:
222		 * 	[ 0x1000, 0x2000] Reserved
223		 * 	[ 0x2000, 0x4000] RAM		<-- element
224		 */
225		TAILQ_INSERT_BEFORE(element, new_element, chain);
226		element->base = end;
227	} else if (end == element->end) {
228		/*
229		 * New element at system memory end boundary. Add new
230		 * element after current and adjust the end of the
231		 * current element.
232		 *
233		 * Old table:
234		 * 	[ 0x1000, 0x4000] RAM		<-- element
235		 * New table:
236		 * 	[ 0x1000, 0x3000] RAM		<-- element
237		 * 	[ 0x3000, 0x4000] Reserved
238		 */
239		TAILQ_INSERT_AFTER(&e820_table, element, new_element, chain);
240		element->end = base;
241	} else {
242		/*
243		 * New element inside system memory entry. Split it by
244		 * adding a system memory element and the new element
245		 * before current.
246		 *
247		 * Old table:
248		 * 	[ 0x1000, 0x4000] RAM		<-- element
249		 * New table:
250		 * 	[ 0x1000, 0x2000] RAM
251		 * 	[ 0x2000, 0x3000] Reserved
252		 * 	[ 0x3000, 0x4000] RAM		<-- element
253		 */
254		ram_element = e820_element_alloc(element->base, base,
255		    E820_TYPE_MEMORY);
256		if (ram_element == NULL) {
257			return (ENOMEM);
258		}
259		TAILQ_INSERT_BEFORE(element, ram_element, chain);
260		TAILQ_INSERT_BEFORE(element, new_element, chain);
261		element->base = end;
262	}
263
264	return (0);
265}
266
267static int
268e820_add_memory_hole(const uint64_t base, const uint64_t end)
269{
270	struct e820_element *element;
271	struct e820_element *ram_element;
272
273	assert(end >= base);
274
275	/*
276	 * E820 table should be always sorted in ascending order. Therefore,
277	 * search for an element which end is larger than the base parameter.
278	 */
279	TAILQ_FOREACH(element, &e820_table, chain) {
280		if (element->end > base) {
281			break;
282		}
283	}
284
285	if (element == NULL || end <= element->base) {
286		/* Nothing to do. Hole already exists */
287		return (0);
288	}
289
290	/* Memory holes are only allowed in system memory */
291	assert(element->type == E820_TYPE_MEMORY);
292
293	if (base == element->base) {
294		/*
295		 * New hole at system memory base boundary.
296		 *
297		 * Old table:
298		 * 	[ 0x1000, 0x4000] RAM
299		 * New table:
300		 * 	[ 0x2000, 0x4000] RAM
301		 */
302		element->base = end;
303	} else if (end == element->end) {
304		/*
305		 * New hole at system memory end boundary.
306		 *
307		 * Old table:
308		 * 	[ 0x1000, 0x4000] RAM
309		 * New table:
310		 * 	[ 0x1000, 0x3000] RAM
311		 */
312		element->end = base;
313	} else {
314		/*
315		 * New hole inside system memory entry. Split the system memory.
316		 *
317		 * Old table:
318		 * 	[ 0x1000, 0x4000] RAM		<-- element
319		 * New table:
320		 * 	[ 0x1000, 0x2000] RAM
321		 * 	[ 0x3000, 0x4000] RAM		<-- element
322		 */
323		ram_element = e820_element_alloc(element->base, base,
324		    E820_TYPE_MEMORY);
325		if (ram_element == NULL) {
326			return (ENOMEM);
327		}
328		TAILQ_INSERT_BEFORE(element, ram_element, chain);
329		element->base = end;
330	}
331
332	return (0);
333}
334
335static uint64_t
336e820_alloc_highest(const uint64_t max_address, const uint64_t length,
337    const uint64_t alignment, const enum e820_memory_type type)
338{
339	struct e820_element *element;
340
341	TAILQ_FOREACH_REVERSE(element, &e820_table, e820_table, chain) {
342		uint64_t address, base, end;
343
344		end = MIN(max_address, element->end);
345		base = roundup2(element->base, alignment);
346
347		/*
348		 * If end - length == 0, we would allocate memory at address 0. This
349		 * address is mostly unusable and we should avoid allocating it.
350		 * Therefore, search for another block in that case.
351		 */
352		if (element->type != E820_TYPE_MEMORY || end < base ||
353		    end - base < length || end - length == 0) {
354			continue;
355		}
356
357		address = rounddown2(end - length, alignment);
358
359		if (e820_add_entry(address, address + length, type) != 0) {
360			return (0);
361		}
362
363		return (address);
364	}
365
366	return (0);
367}
368
369static uint64_t
370e820_alloc_lowest(const uint64_t min_address, const uint64_t length,
371    const uint64_t alignment, const enum e820_memory_type type)
372{
373	struct e820_element *element;
374
375	TAILQ_FOREACH(element, &e820_table, chain) {
376		uint64_t base, end;
377
378		end = element->end;
379		base = MAX(min_address, roundup2(element->base, alignment));
380
381		/*
382		 * If base == 0, we would allocate memory at address 0. This
383		 * address is mostly unusable and we should avoid allocating it.
384		 * Therefore, search for another block in that case.
385		 */
386		if (element->type != E820_TYPE_MEMORY || end < base ||
387		    end - base < length || base == 0) {
388			continue;
389		}
390
391		if (e820_add_entry(base, base + length, type) != 0) {
392			return (0);
393		}
394
395		return (base);
396	}
397
398	return (0);
399}
400
401uint64_t
402e820_alloc(const uint64_t address, const uint64_t length,
403    const uint64_t alignment, const enum e820_memory_type type,
404    const enum e820_allocation_strategy strategy)
405{
406	assert(powerof2(alignment));
407	assert((address & (alignment - 1)) == 0);
408
409	switch (strategy) {
410	case E820_ALLOCATE_ANY:
411		/*
412		 * Allocate any address. Therefore, ignore the address parameter
413		 * and reuse the code path for allocating the lowest address.
414		 */
415		return (e820_alloc_lowest(0, length, alignment, type));
416	case E820_ALLOCATE_LOWEST:
417		return (e820_alloc_lowest(address, length, alignment, type));
418	case E820_ALLOCATE_HIGHEST:
419		return (e820_alloc_highest(address, length, alignment, type));
420	case E820_ALLOCATE_SPECIFIC:
421		if (e820_add_entry(address, address + length, type) != 0) {
422			return (0);
423		}
424
425		return (address);
426	}
427
428	return (0);
429}
430
431int
432e820_init(struct vmctx *const ctx)
433{
434	uint64_t lowmem_size, highmem_size;
435	int error;
436
437	TAILQ_INIT(&e820_table);
438
439	lowmem_size = vm_get_lowmem_size(ctx);
440	error = e820_add_entry(0, lowmem_size, E820_TYPE_MEMORY);
441	if (error) {
442		warnx("%s: Could not add lowmem", __func__);
443		return (error);
444	}
445
446	highmem_size = vm_get_highmem_size(ctx);
447	if (highmem_size != 0) {
448		error = e820_add_entry(4 * GB, 4 * GB + highmem_size,
449		    E820_TYPE_MEMORY);
450		if (error) {
451			warnx("%s: Could not add highmem", __func__);
452			return (error);
453		}
454	}
455
456	error = e820_add_memory_hole(E820_VGA_MEM_BASE, E820_VGA_MEM_END);
457	if (error) {
458		warnx("%s: Could not add VGA memory", __func__);
459		return (error);
460	}
461
462	error = e820_add_memory_hole(E820_ROM_MEM_BASE, E820_ROM_MEM_END);
463	if (error) {
464		warnx("%s: Could not add ROM area", __func__);
465		return (error);
466	}
467
468	return (0);
469}
470
471int
472e820_finalize(void)
473{
474	struct qemu_fwcfg_item *e820_fwcfg_item;
475	int error;
476
477	e820_fwcfg_item = e820_get_fwcfg_item();
478	if (e820_fwcfg_item == NULL) {
479		warnx("invalid e820 table");
480		return (ENOMEM);
481	}
482	error = qemu_fwcfg_add_file("etc/e820",
483	    e820_fwcfg_item->size, e820_fwcfg_item->data);
484	if (error != 0) {
485		warnx("could not add qemu fwcfg etc/e820");
486		free(e820_fwcfg_item->data);
487		free(e820_fwcfg_item);
488		return (error);
489	}
490	free(e820_fwcfg_item);
491
492	return (0);
493}
494