1/*-
2 * Copyright (c) 2014 Andrew Turner
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include "opt_acpi.h"
29#include "opt_kstack_pages.h"
30#include "opt_platform.h"
31#include "opt_ddb.h"
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/asan.h>
36#include <sys/buf.h>
37#include <sys/bus.h>
38#include <sys/cons.h>
39#include <sys/cpu.h>
40#include <sys/csan.h>
41#include <sys/devmap.h>
42#include <sys/efi.h>
43#include <sys/exec.h>
44#include <sys/imgact.h>
45#include <sys/kdb.h>
46#include <sys/kernel.h>
47#include <sys/ktr.h>
48#include <sys/limits.h>
49#include <sys/linker.h>
50#include <sys/msan.h>
51#include <sys/msgbuf.h>
52#include <sys/pcpu.h>
53#include <sys/physmem.h>
54#include <sys/proc.h>
55#include <sys/ptrace.h>
56#include <sys/reboot.h>
57#include <sys/reg.h>
58#include <sys/rwlock.h>
59#include <sys/sched.h>
60#include <sys/signalvar.h>
61#include <sys/syscallsubr.h>
62#include <sys/sysent.h>
63#include <sys/sysproto.h>
64#include <sys/ucontext.h>
65#include <sys/vdso.h>
66#include <sys/vmmeter.h>
67
68#include <vm/vm.h>
69#include <vm/vm_param.h>
70#include <vm/vm_kern.h>
71#include <vm/vm_object.h>
72#include <vm/vm_page.h>
73#include <vm/vm_phys.h>
74#include <vm/pmap.h>
75#include <vm/vm_map.h>
76#include <vm/vm_pager.h>
77
78#include <machine/armreg.h>
79#include <machine/cpu.h>
80#include <machine/debug_monitor.h>
81#include <machine/hypervisor.h>
82#include <machine/kdb.h>
83#include <machine/machdep.h>
84#include <machine/metadata.h>
85#include <machine/md_var.h>
86#include <machine/pcb.h>
87#include <machine/undefined.h>
88#include <machine/vmparam.h>
89
90#ifdef VFP
91#include <machine/vfp.h>
92#endif
93
94#ifdef DEV_ACPI
95#include <contrib/dev/acpica/include/acpi.h>
96#include <machine/acpica_machdep.h>
97#endif
98
99#ifdef FDT
100#include <dev/fdt/fdt_common.h>
101#include <dev/ofw/openfirm.h>
102#endif
103
104#include <dev/smbios/smbios.h>
105
106_Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
107_Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
108    "pcb_fpusaved changed offset");
109_Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
110    "pcb_fpustate changed offset");
111
112enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
113
114/*
115 * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
116 * could relocate this, but will need to keep the same virtual address as
117 * it's reverenced by the EARLY_COUNTER macro.
118 */
119struct pcpu pcpu0;
120
121#if defined(PERTHREAD_SSP)
122/*
123 * The boot SSP canary. Will be replaced with a per-thread canary when
124 * scheduling has started.
125 */
126uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
127#endif
128
129static struct trapframe proc0_tf;
130
131int early_boot = 1;
132int cold = 1;
133static int boot_el;
134static uint64_t hcr_el2;
135
136struct kva_md_info kmi;
137
138int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
139int has_pan;
140
141#if defined(SOCDEV_PA)
142/*
143 * This is the virtual address used to access SOCDEV_PA. As it's set before
144 * .bss is cleared we need to ensure it's preserved. To do this use
145 * __read_mostly as it's only ever set once but read in the putc functions.
146 */
147uintptr_t socdev_va __read_mostly;
148#endif
149
150/*
151 * Physical address of the EFI System Table. Stashed from the metadata hints
152 * passed into the kernel and used by the EFI code to call runtime services.
153 */
154vm_paddr_t efi_systbl_phys;
155static struct efi_map_header *efihdr;
156
157/* pagezero_* implementations are provided in support.S */
158void pagezero_simple(void *);
159void pagezero_cache(void *);
160
161/* pagezero_simple is default pagezero */
162void (*pagezero)(void *p) = pagezero_simple;
163
164int (*apei_nmi)(void);
165
166#if defined(PERTHREAD_SSP_WARNING)
167static void
168print_ssp_warning(void *data __unused)
169{
170	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
171}
172SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
173SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
174#endif
175
176static void
177pan_setup(void)
178{
179	uint64_t id_aa64mfr1;
180
181	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
182	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
183		has_pan = 1;
184}
185
186void
187pan_enable(void)
188{
189
190	/*
191	 * The LLVM integrated assembler doesn't understand the PAN
192	 * PSTATE field. Because of this we need to manually create
193	 * the instruction in an asm block. This is equivalent to:
194	 * msr pan, #1
195	 *
196	 * This sets the PAN bit, stopping the kernel from accessing
197	 * memory when userspace can also access it unless the kernel
198	 * uses the userspace load/store instructions.
199	 */
200	if (has_pan) {
201		WRITE_SPECIALREG(sctlr_el1,
202		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
203		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
204	}
205}
206
207bool
208has_hyp(void)
209{
210
211	/*
212	 * XXX The E2H check is wrong, but it's close enough for now.  Needs to
213	 * be re-evaluated once we're running regularly in EL2.
214	 */
215	return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0);
216}
217
218bool
219in_vhe(void)
220{
221	/* If we are currently in EL2 then must be in VHE */
222	return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
223	    CURRENTEL_EL_EL2);
224}
225
226static void
227cpu_startup(void *dummy)
228{
229	vm_paddr_t size;
230	int i;
231
232	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
233	    ptoa((uintmax_t)realmem) / 1024 / 1024);
234
235	if (bootverbose) {
236		printf("Physical memory chunk(s):\n");
237		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
238			size = phys_avail[i + 1] - phys_avail[i];
239			printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
240			    (uintmax_t)phys_avail[i],
241			    (uintmax_t)phys_avail[i + 1] - 1,
242			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
243		}
244	}
245
246	printf("avail memory = %ju (%ju MB)\n",
247	    ptoa((uintmax_t)vm_free_count()),
248	    ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
249
250	undef_init();
251	install_cpu_errata();
252
253	vm_ksubmap_init(&kmi);
254	bufinit();
255	vm_pager_bufferinit();
256}
257
258SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
259
260static void
261late_ifunc_resolve(void *dummy __unused)
262{
263	link_elf_late_ireloc();
264}
265SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
266
267int
268cpu_idle_wakeup(int cpu)
269{
270
271	return (0);
272}
273
274void
275cpu_idle(int busy)
276{
277
278	spinlock_enter();
279	if (!busy)
280		cpu_idleclock();
281	if (!sched_runnable())
282		__asm __volatile(
283		    "dsb sy \n"
284		    "wfi    \n");
285	if (!busy)
286		cpu_activeclock();
287	spinlock_exit();
288}
289
290void
291cpu_halt(void)
292{
293
294	/* We should have shutdown by now, if not enter a low power sleep */
295	intr_disable();
296	while (1) {
297		__asm __volatile("wfi");
298	}
299}
300
301/*
302 * Flush the D-cache for non-DMA I/O so that the I-cache can
303 * be made coherent later.
304 */
305void
306cpu_flush_dcache(void *ptr, size_t len)
307{
308
309	/* ARM64TODO TBD */
310}
311
312/* Get current clock frequency for the given CPU ID. */
313int
314cpu_est_clockrate(int cpu_id, uint64_t *rate)
315{
316	struct pcpu *pc;
317
318	pc = pcpu_find(cpu_id);
319	if (pc == NULL || rate == NULL)
320		return (EINVAL);
321
322	if (pc->pc_clock == 0)
323		return (EOPNOTSUPP);
324
325	*rate = pc->pc_clock;
326	return (0);
327}
328
329void
330cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
331{
332
333	pcpu->pc_acpi_id = 0xffffffff;
334	pcpu->pc_mpidr = UINT64_MAX;
335}
336
337void
338spinlock_enter(void)
339{
340	struct thread *td;
341	register_t daif;
342
343	td = curthread;
344	if (td->td_md.md_spinlock_count == 0) {
345		daif = intr_disable();
346		td->td_md.md_spinlock_count = 1;
347		td->td_md.md_saved_daif = daif;
348		critical_enter();
349	} else
350		td->td_md.md_spinlock_count++;
351}
352
353void
354spinlock_exit(void)
355{
356	struct thread *td;
357	register_t daif;
358
359	td = curthread;
360	daif = td->td_md.md_saved_daif;
361	td->td_md.md_spinlock_count--;
362	if (td->td_md.md_spinlock_count == 0) {
363		critical_exit();
364		intr_restore(daif);
365	}
366}
367
368/*
369 * Construct a PCB from a trapframe. This is called from kdb_trap() where
370 * we want to start a backtrace from the function that caused us to enter
371 * the debugger. We have the context in the trapframe, but base the trace
372 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
373 * enough for a backtrace.
374 */
375void
376makectx(struct trapframe *tf, struct pcb *pcb)
377{
378	int i;
379
380	/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
381	for (i = 0; i < nitems(pcb->pcb_x); i++) {
382		if (i == PCB_LR)
383			pcb->pcb_x[i] = tf->tf_elr;
384		else
385			pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
386	}
387
388	pcb->pcb_sp = tf->tf_sp;
389}
390
391static void
392init_proc0(vm_offset_t kstack)
393{
394	struct pcpu *pcpup;
395
396	pcpup = cpuid_to_pcpu[0];
397	MPASS(pcpup != NULL);
398
399	proc_linkup0(&proc0, &thread0);
400	thread0.td_kstack = kstack;
401	thread0.td_kstack_pages = KSTACK_PAGES;
402#if defined(PERTHREAD_SSP)
403	thread0.td_md.md_canary = boot_canary;
404#endif
405	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
406	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
407	thread0.td_pcb->pcb_flags = 0;
408	thread0.td_pcb->pcb_fpflags = 0;
409	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
410	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
411	thread0.td_frame = &proc0_tf;
412	ptrauth_thread0(&thread0);
413	pcpup->pc_curpcb = thread0.td_pcb;
414
415	/*
416	 * Unmask SError exceptions. They are used to signal a RAS failure,
417	 * or other hardware error.
418	 */
419	serror_enable();
420}
421
422/*
423 * Get an address to be used to write to kernel data that may be mapped
424 * read-only, e.g. to patch kernel code.
425 */
426bool
427arm64_get_writable_addr(void *addr, void **out)
428{
429	vm_paddr_t pa;
430
431	/* Check if the page is writable */
432	if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
433		*out = addr;
434		return (true);
435	}
436
437	/*
438	 * Find the physical address of the given page.
439	 */
440	if (!pmap_klookup((vm_offset_t)addr, &pa)) {
441		return (false);
442	}
443
444	/*
445	 * If it is within the DMAP region and is writable use that.
446	 */
447	if (PHYS_IN_DMAP_RANGE(pa)) {
448		addr = (void *)PHYS_TO_DMAP(pa);
449		if (PAR_SUCCESS(arm64_address_translate_s1e1w(
450		    (vm_offset_t)addr))) {
451			*out = addr;
452			return (true);
453		}
454	}
455
456	return (false);
457}
458
459typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp);
460
461static void
462foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp)
463{
464	struct efi_md *map, *p;
465	size_t efisz;
466	int ndesc, i;
467
468	/*
469	 * Memory map data provided by UEFI via the GetMemoryMap
470	 * Boot Services API.
471	 */
472	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
473	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
474
475	if (efihdr->descriptor_size == 0)
476		return;
477	ndesc = efihdr->memory_size / efihdr->descriptor_size;
478
479	for (i = 0, p = map; i < ndesc; i++,
480	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
481		cb(p, argp);
482	}
483}
484
485/*
486 * Handle the EFI memory map list.
487 *
488 * We will make two passes at this, the first (exclude == false) to populate
489 * physmem with valid physical memory ranges from recognized map entry types.
490 * In the second pass we will exclude memory ranges from physmem which must not
491 * be used for general allocations, either because they are used by runtime
492 * firmware or otherwise reserved.
493 *
494 * Adding the runtime-reserved memory ranges to physmem and excluding them
495 * later ensures that they are included in the DMAP, but excluded from
496 * phys_avail[].
497 *
498 * Entry types not explicitly listed here are ignored and not mapped.
499 */
500static void
501handle_efi_map_entry(struct efi_md *p, void *argp)
502{
503	bool exclude = *(bool *)argp;
504
505	switch (p->md_type) {
506	case EFI_MD_TYPE_RECLAIM:
507		/*
508		 * The recomended location for ACPI tables. Map into the
509		 * DMAP so we can access them from userspace via /dev/mem.
510		 */
511	case EFI_MD_TYPE_RT_CODE:
512		/*
513		 * Some UEFI implementations put the system table in the
514		 * runtime code section. Include it in the DMAP, but will
515		 * be excluded from phys_avail.
516		 */
517	case EFI_MD_TYPE_RT_DATA:
518		/*
519		 * Runtime data will be excluded after the DMAP
520		 * region is created to stop it from being added
521		 * to phys_avail.
522		 */
523		if (exclude) {
524			physmem_exclude_region(p->md_phys,
525			    p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC);
526			break;
527		}
528		/* FALLTHROUGH */
529	case EFI_MD_TYPE_CODE:
530	case EFI_MD_TYPE_DATA:
531	case EFI_MD_TYPE_BS_CODE:
532	case EFI_MD_TYPE_BS_DATA:
533	case EFI_MD_TYPE_FREE:
534		/*
535		 * We're allowed to use any entry with these types.
536		 */
537		if (!exclude)
538			physmem_hardware_region(p->md_phys,
539			    p->md_pages * EFI_PAGE_SIZE);
540		break;
541	default:
542		/* Other types shall not be handled by physmem. */
543		break;
544	}
545}
546
547static void
548add_efi_map_entries(struct efi_map_header *efihdr)
549{
550	bool exclude = false;
551	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
552}
553
554static void
555exclude_efi_map_entries(struct efi_map_header *efihdr)
556{
557	bool exclude = true;
558	foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude);
559}
560
561static void
562print_efi_map_entry(struct efi_md *p, void *argp __unused)
563{
564	const char *type;
565	static const char *types[] = {
566		"Reserved",
567		"LoaderCode",
568		"LoaderData",
569		"BootServicesCode",
570		"BootServicesData",
571		"RuntimeServicesCode",
572		"RuntimeServicesData",
573		"ConventionalMemory",
574		"UnusableMemory",
575		"ACPIReclaimMemory",
576		"ACPIMemoryNVS",
577		"MemoryMappedIO",
578		"MemoryMappedIOPortSpace",
579		"PalCode",
580		"PersistentMemory"
581	};
582
583	if (p->md_type < nitems(types))
584		type = types[p->md_type];
585	else
586		type = "<INVALID>";
587	printf("%23s %012lx %012lx %08lx ", type, p->md_phys,
588	    p->md_virt, p->md_pages);
589	if (p->md_attr & EFI_MD_ATTR_UC)
590		printf("UC ");
591	if (p->md_attr & EFI_MD_ATTR_WC)
592		printf("WC ");
593	if (p->md_attr & EFI_MD_ATTR_WT)
594		printf("WT ");
595	if (p->md_attr & EFI_MD_ATTR_WB)
596		printf("WB ");
597	if (p->md_attr & EFI_MD_ATTR_UCE)
598		printf("UCE ");
599	if (p->md_attr & EFI_MD_ATTR_WP)
600		printf("WP ");
601	if (p->md_attr & EFI_MD_ATTR_RP)
602		printf("RP ");
603	if (p->md_attr & EFI_MD_ATTR_XP)
604		printf("XP ");
605	if (p->md_attr & EFI_MD_ATTR_NV)
606		printf("NV ");
607	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
608		printf("MORE_RELIABLE ");
609	if (p->md_attr & EFI_MD_ATTR_RO)
610		printf("RO ");
611	if (p->md_attr & EFI_MD_ATTR_RT)
612		printf("RUNTIME");
613	printf("\n");
614}
615
616static void
617print_efi_map_entries(struct efi_map_header *efihdr)
618{
619
620	printf("%23s %12s %12s %8s %4s\n",
621	    "Type", "Physical", "Virtual", "#Pages", "Attr");
622	foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL);
623}
624
625/*
626 * Map the passed in VA in EFI space to a void * using the efi memory table to
627 * find the PA and return it in the DMAP, if it exists. We're used between the
628 * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
629 * tables We assume that either the entry you are mapping fits within its page,
630 * or if it spills to the next page, that's contiguous in PA and in the DMAP.
631 * All observed tables obey the first part of this precondition.
632 */
633struct early_map_data
634{
635	vm_offset_t va;
636	vm_offset_t pa;
637};
638
639static void
640efi_early_map_entry(struct efi_md *p, void *argp)
641{
642	struct early_map_data *emdp = argp;
643	vm_offset_t s, e;
644
645	if (emdp->pa != 0)
646		return;
647	if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
648		return;
649	s = p->md_virt;
650	e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
651	if (emdp->va < s  || emdp->va >= e)
652		return;
653	emdp->pa = p->md_phys + (emdp->va - p->md_virt);
654}
655
656static void *
657efi_early_map(vm_offset_t va)
658{
659	struct early_map_data emd = { .va = va };
660
661	foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd);
662	if (emd.pa == 0)
663		return NULL;
664	return (void *)PHYS_TO_DMAP(emd.pa);
665}
666
667
668/*
669 * When booted via kboot, the prior kernel will pass in reserved memory areas in
670 * a EFI config table. We need to find that table and walk through it excluding
671 * the memory ranges in it. btw, this is called too early for the printf to do
672 * anything since msgbufp isn't initialized, let alone a console...
673 */
674static void
675exclude_efi_memreserve(vm_offset_t efi_systbl_phys)
676{
677	struct efi_systbl *systbl;
678	struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
679
680	systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
681	if (systbl == NULL) {
682		printf("can't map systbl\n");
683		return;
684	}
685	if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
686		printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
687		return;
688	}
689
690	/*
691	 * We don't yet have the pmap system booted enough to create a pmap for
692	 * the efi firmware's preferred address space from the GetMemoryMap()
693	 * table. The st_cfgtbl is a VA in this space, so we need to do the
694	 * mapping ourselves to a kernel VA with efi_early_map. We assume that
695	 * the cfgtbl entries don't span a page. Other pointers are PAs, as
696	 * noted below.
697	 */
698	if (systbl->st_cfgtbl == 0)	/* Failsafe st_entries should == 0 in this case */
699		return;
700	for (int i = 0; i < systbl->st_entries; i++) {
701		struct efi_cfgtbl *cfgtbl;
702		struct linux_efi_memreserve *mr;
703
704		cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
705		if (cfgtbl == NULL)
706			panic("Can't map the config table entry %d\n", i);
707		if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0)
708			continue;
709
710		/*
711		 * cfgtbl points are either VA or PA, depending on the GUID of
712		 * the table. memreserve GUID pointers are PA and not converted
713		 * after a SetVirtualAddressMap(). The list's mr_next pointer
714		 * is also a PA.
715		 */
716		mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
717			(vm_offset_t)cfgtbl->ct_data);
718		while (true) {
719			for (int j = 0; j < mr->mr_count; j++) {
720				struct linux_efi_memreserve_entry *mre;
721
722				mre = &mr->mr_entry[j];
723				physmem_exclude_region(mre->mre_base, mre->mre_size,
724				    EXFLAG_NODUMP | EXFLAG_NOALLOC);
725			}
726			if (mr->mr_next == 0)
727				break;
728			mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
729		};
730	}
731
732}
733
734#ifdef FDT
735static void
736try_load_dtb(caddr_t kmdp)
737{
738	vm_offset_t dtbp;
739
740	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
741#if defined(FDT_DTB_STATIC)
742	/*
743	 * In case the device tree blob was not retrieved (from metadata) try
744	 * to use the statically embedded one.
745	 */
746	if (dtbp == 0)
747		dtbp = (vm_offset_t)&fdt_static_dtb;
748#endif
749
750	if (dtbp == (vm_offset_t)NULL) {
751#ifndef TSLOG
752		printf("ERROR loading DTB\n");
753#endif
754		return;
755	}
756
757	if (OF_install(OFW_FDT, 0) == FALSE)
758		panic("Cannot install FDT");
759
760	if (OF_init((void *)dtbp) != 0)
761		panic("OF_init failed with the found device tree");
762
763	parse_fdt_bootargs();
764}
765#endif
766
767static bool
768bus_probe(void)
769{
770	bool has_acpi, has_fdt;
771	char *order, *env;
772
773	has_acpi = has_fdt = false;
774
775#ifdef FDT
776	has_fdt = (OF_peer(0) != 0);
777#endif
778#ifdef DEV_ACPI
779	has_acpi = (AcpiOsGetRootPointer() != 0);
780#endif
781
782	env = kern_getenv("kern.cfg.order");
783	if (env != NULL) {
784		order = env;
785		while (order != NULL) {
786			if (has_acpi &&
787			    strncmp(order, "acpi", 4) == 0 &&
788			    (order[4] == ',' || order[4] == '\0')) {
789				arm64_bus_method = ARM64_BUS_ACPI;
790				break;
791			}
792			if (has_fdt &&
793			    strncmp(order, "fdt", 3) == 0 &&
794			    (order[3] == ',' || order[3] == '\0')) {
795				arm64_bus_method = ARM64_BUS_FDT;
796				break;
797			}
798			order = strchr(order, ',');
799			if (order != NULL)
800				order++;	/* Skip comma */
801		}
802		freeenv(env);
803
804		/* If we set the bus method it is valid */
805		if (arm64_bus_method != ARM64_BUS_NONE)
806			return (true);
807	}
808	/* If no order or an invalid order was set use the default */
809	if (arm64_bus_method == ARM64_BUS_NONE) {
810		if (has_fdt)
811			arm64_bus_method = ARM64_BUS_FDT;
812		else if (has_acpi)
813			arm64_bus_method = ARM64_BUS_ACPI;
814	}
815
816	/*
817	 * If no option was set the default is valid, otherwise we are
818	 * setting one to get cninit() working, then calling panic to tell
819	 * the user about the invalid bus setup.
820	 */
821	return (env == NULL);
822}
823
824static void
825cache_setup(void)
826{
827	int dczva_line_shift;
828	uint32_t dczid_el0;
829
830	identify_cache(READ_SPECIALREG(ctr_el0));
831
832	dczid_el0 = READ_SPECIALREG(dczid_el0);
833
834	/* Check if dc zva is not prohibited */
835	if (dczid_el0 & DCZID_DZP)
836		dczva_line_size = 0;
837	else {
838		/* Same as with above calculations */
839		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
840		dczva_line_size = sizeof(int) << dczva_line_shift;
841
842		/* Change pagezero function */
843		pagezero = pagezero_cache;
844	}
845}
846
847int
848memory_mapping_mode(vm_paddr_t pa)
849{
850	struct efi_md *map, *p;
851	size_t efisz;
852	int ndesc, i;
853
854	if (efihdr == NULL)
855		return (VM_MEMATTR_WRITE_BACK);
856
857	/*
858	 * Memory map data provided by UEFI via the GetMemoryMap
859	 * Boot Services API.
860	 */
861	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
862	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
863
864	if (efihdr->descriptor_size == 0)
865		return (VM_MEMATTR_WRITE_BACK);
866	ndesc = efihdr->memory_size / efihdr->descriptor_size;
867
868	for (i = 0, p = map; i < ndesc; i++,
869	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
870		if (pa < p->md_phys ||
871		    pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
872			continue;
873		if (p->md_type == EFI_MD_TYPE_IOMEM ||
874		    p->md_type == EFI_MD_TYPE_IOPORT)
875			return (VM_MEMATTR_DEVICE);
876		else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
877		    p->md_type == EFI_MD_TYPE_RECLAIM)
878			return (VM_MEMATTR_WRITE_BACK);
879		else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
880			return (VM_MEMATTR_WRITE_THROUGH);
881		else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
882			return (VM_MEMATTR_WRITE_COMBINING);
883		break;
884	}
885
886	return (VM_MEMATTR_DEVICE);
887}
888
889void
890initarm(struct arm64_bootparams *abp)
891{
892	struct efi_fb *efifb;
893	struct pcpu *pcpup;
894	char *env;
895#ifdef FDT
896	struct mem_region mem_regions[FDT_MEM_REGIONS];
897	int mem_regions_sz;
898	phandle_t root;
899	char dts_version[255];
900#endif
901	vm_offset_t lastaddr;
902	caddr_t kmdp;
903	bool valid;
904
905	TSRAW(&thread0, TS_ENTER, __func__, NULL);
906
907	boot_el = abp->boot_el;
908	hcr_el2 = abp->hcr_el2;
909
910	/* Parse loader or FDT boot parametes. Determine last used address. */
911	lastaddr = parse_boot_param(abp);
912
913	/* Find the kernel address */
914	kmdp = preload_search_by_type("elf kernel");
915	if (kmdp == NULL)
916		kmdp = preload_search_by_type("elf64 kernel");
917
918	identify_cpu(0);
919	identify_hypervisor_smbios();
920
921	update_special_regs(0);
922
923	/* Set the pcpu data, this is needed by pmap_bootstrap */
924	pcpup = &pcpu0;
925	pcpu_init(pcpup, 0, sizeof(struct pcpu));
926
927	/*
928	 * Set the pcpu pointer with a backup in tpidr_el1 to be
929	 * loaded when entering the kernel from userland.
930	 */
931	__asm __volatile(
932	    "mov x18, %0 \n"
933	    "msr tpidr_el1, %0" :: "r"(pcpup));
934
935	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
936	PCPU_SET(curthread, &thread0);
937	PCPU_SET(midr, get_midr());
938
939	link_elf_ireloc(kmdp);
940#ifdef FDT
941	try_load_dtb(kmdp);
942#endif
943
944	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
945
946	/* Load the physical memory ranges */
947	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
948	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
949	if (efihdr != NULL)
950		add_efi_map_entries(efihdr);
951#ifdef FDT
952	else {
953		/* Grab physical memory regions information from device tree. */
954		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
955		    NULL) != 0)
956			panic("Cannot get physical memory regions");
957		physmem_hardware_regions(mem_regions, mem_regions_sz);
958	}
959	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
960		physmem_exclude_regions(mem_regions, mem_regions_sz,
961		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
962#endif
963
964	/* Exclude the EFI framebuffer from our view of physical memory. */
965	efifb = (struct efi_fb *)preload_search_info(kmdp,
966	    MODINFO_METADATA | MODINFOMD_EFI_FB);
967	if (efifb != NULL)
968		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
969		    EXFLAG_NOALLOC);
970
971	/* Do basic tuning, hz etc */
972	init_param1();
973
974	cache_setup();
975	pan_setup();
976
977	/* Bootstrap enough of pmap  to enter the kernel proper */
978	pmap_bootstrap(lastaddr - KERNBASE);
979	/* Exclude entries needed in the DMAP region, but not phys_avail */
980	if (efihdr != NULL)
981		exclude_efi_map_entries(efihdr);
982	/*  Do the same for reserve entries in the EFI MEMRESERVE table */
983	if (efi_systbl_phys != 0)
984		exclude_efi_memreserve(efi_systbl_phys);
985
986	/*
987	 * We carefully bootstrap the sanitizer map after we've excluded
988	 * absolutely everything else that could impact phys_avail.  There's not
989	 * always enough room for the initial shadow map after the kernel, so
990	 * we'll end up searching for segments that we can safely use.  Those
991	 * segments also get excluded from phys_avail.
992	 */
993#if defined(KASAN) || defined(KMSAN)
994	pmap_bootstrap_san();
995#endif
996
997	physmem_init_kernel_globals();
998
999	devmap_bootstrap();
1000
1001	valid = bus_probe();
1002
1003	cninit();
1004	set_ttbr0(abp->kern_ttbr0);
1005	cpu_tlb_flushID();
1006
1007	if (!valid)
1008		panic("Invalid bus configuration: %s",
1009		    kern_getenv("kern.cfg.order"));
1010
1011	/*
1012	 * Check if pointer authentication is available on this system, and
1013	 * if so enable its use. This needs to be called before init_proc0
1014	 * as that will configure the thread0 pointer authentication keys.
1015	 */
1016	ptrauth_init();
1017
1018	/*
1019	 * Dump the boot metadata. We have to wait for cninit() since console
1020	 * output is required. If it's grossly incorrect the kernel will never
1021	 * make it this far.
1022	 */
1023	if (getenv_is_true("debug.dump_modinfo_at_boot"))
1024		preload_dump();
1025
1026	init_proc0(abp->kern_stack);
1027	msgbufinit(msgbufp, msgbufsize);
1028	mutex_init();
1029	init_param2(physmem);
1030
1031	dbg_init();
1032	kdb_init();
1033#ifdef KDB
1034	if ((boothowto & RB_KDB) != 0)
1035		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
1036#endif
1037	pan_enable();
1038
1039	kcsan_cpu_init(0);
1040	kasan_init();
1041	kmsan_init();
1042
1043	env = kern_getenv("kernelname");
1044	if (env != NULL)
1045		strlcpy(kernelname, env, sizeof(kernelname));
1046
1047#ifdef FDT
1048	if (arm64_bus_method == ARM64_BUS_FDT) {
1049		root = OF_finddevice("/");
1050		if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
1051			if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
1052				printf("WARNING: DTB version is %s while kernel expects %s, "
1053				    "please update the DTB in the ESP\n",
1054				    dts_version,
1055				    LINUX_DTS_VERSION);
1056		} else {
1057			printf("WARNING: Cannot find freebsd,dts-version property, "
1058			    "cannot check DTB compliance\n");
1059		}
1060	}
1061#endif
1062
1063	if (boothowto & RB_VERBOSE) {
1064		if (efihdr != NULL)
1065			print_efi_map_entries(efihdr);
1066		physmem_print_tables();
1067	}
1068
1069	early_boot = 0;
1070
1071	if (bootverbose && kstack_pages != KSTACK_PAGES)
1072		printf("kern.kstack_pages = %d ignored for thread0\n",
1073		    kstack_pages);
1074
1075	TSEXIT();
1076}
1077
1078void
1079dbg_init(void)
1080{
1081
1082	/* Clear OS lock */
1083	WRITE_SPECIALREG(oslar_el1, 0);
1084
1085	/* This permits DDB to use debug registers for watchpoints. */
1086	dbg_monitor_init();
1087
1088	/* TODO: Eventually will need to initialize debug registers here. */
1089}
1090
1091#ifdef DDB
1092#include <ddb/ddb.h>
1093
1094DB_SHOW_COMMAND(specialregs, db_show_spregs)
1095{
1096#define	PRINT_REG(reg)	\
1097    db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
1098
1099	PRINT_REG(actlr_el1);
1100	PRINT_REG(afsr0_el1);
1101	PRINT_REG(afsr1_el1);
1102	PRINT_REG(aidr_el1);
1103	PRINT_REG(amair_el1);
1104	PRINT_REG(ccsidr_el1);
1105	PRINT_REG(clidr_el1);
1106	PRINT_REG(contextidr_el1);
1107	PRINT_REG(cpacr_el1);
1108	PRINT_REG(csselr_el1);
1109	PRINT_REG(ctr_el0);
1110	PRINT_REG(currentel);
1111	PRINT_REG(daif);
1112	PRINT_REG(dczid_el0);
1113	PRINT_REG(elr_el1);
1114	PRINT_REG(esr_el1);
1115	PRINT_REG(far_el1);
1116#if 0
1117	/* ARM64TODO: Enable VFP before reading floating-point registers */
1118	PRINT_REG(fpcr);
1119	PRINT_REG(fpsr);
1120#endif
1121	PRINT_REG(id_aa64afr0_el1);
1122	PRINT_REG(id_aa64afr1_el1);
1123	PRINT_REG(id_aa64dfr0_el1);
1124	PRINT_REG(id_aa64dfr1_el1);
1125	PRINT_REG(id_aa64isar0_el1);
1126	PRINT_REG(id_aa64isar1_el1);
1127	PRINT_REG(id_aa64pfr0_el1);
1128	PRINT_REG(id_aa64pfr1_el1);
1129	PRINT_REG(id_afr0_el1);
1130	PRINT_REG(id_dfr0_el1);
1131	PRINT_REG(id_isar0_el1);
1132	PRINT_REG(id_isar1_el1);
1133	PRINT_REG(id_isar2_el1);
1134	PRINT_REG(id_isar3_el1);
1135	PRINT_REG(id_isar4_el1);
1136	PRINT_REG(id_isar5_el1);
1137	PRINT_REG(id_mmfr0_el1);
1138	PRINT_REG(id_mmfr1_el1);
1139	PRINT_REG(id_mmfr2_el1);
1140	PRINT_REG(id_mmfr3_el1);
1141#if 0
1142	/* Missing from llvm */
1143	PRINT_REG(id_mmfr4_el1);
1144#endif
1145	PRINT_REG(id_pfr0_el1);
1146	PRINT_REG(id_pfr1_el1);
1147	PRINT_REG(isr_el1);
1148	PRINT_REG(mair_el1);
1149	PRINT_REG(midr_el1);
1150	PRINT_REG(mpidr_el1);
1151	PRINT_REG(mvfr0_el1);
1152	PRINT_REG(mvfr1_el1);
1153	PRINT_REG(mvfr2_el1);
1154	PRINT_REG(revidr_el1);
1155	PRINT_REG(sctlr_el1);
1156	PRINT_REG(sp_el0);
1157	PRINT_REG(spsel);
1158	PRINT_REG(spsr_el1);
1159	PRINT_REG(tcr_el1);
1160	PRINT_REG(tpidr_el0);
1161	PRINT_REG(tpidr_el1);
1162	PRINT_REG(tpidrro_el0);
1163	PRINT_REG(ttbr0_el1);
1164	PRINT_REG(ttbr1_el1);
1165	PRINT_REG(vbar_el1);
1166#undef PRINT_REG
1167}
1168
1169DB_SHOW_COMMAND(vtop, db_show_vtop)
1170{
1171	uint64_t phys;
1172
1173	if (have_addr) {
1174		phys = arm64_address_translate_s1e1r(addr);
1175		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
1176		phys = arm64_address_translate_s1e1w(addr);
1177		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
1178		phys = arm64_address_translate_s1e0r(addr);
1179		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
1180		phys = arm64_address_translate_s1e0w(addr);
1181		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
1182	} else
1183		db_printf("show vtop <virt_addr>\n");
1184}
1185#endif
1186