1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1998-2000 Doug Rabson
5 * Copyright (c) 2004 Peter Wemm
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include "opt_ddb.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/fcntl.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/linker.h>
39#include <sys/mutex.h>
40#include <sys/mount.h>
41#include <sys/namei.h>
42#include <sys/proc.h>
43#include <sys/rwlock.h>
44#include <sys/sysctl.h>
45#include <sys/vnode.h>
46
47#include <machine/elf.h>
48
49#include <net/vnet.h>
50
51#include <security/mac/mac_framework.h>
52
53#include <vm/vm.h>
54#include <vm/vm_param.h>
55#include <vm/pmap.h>
56#include <vm/vm_extern.h>
57#include <vm/vm_kern.h>
58#include <vm/vm_map.h>
59#include <vm/vm_object.h>
60#include <vm/vm_page.h>
61#include <vm/vm_pager.h>
62
63#include <sys/link_elf.h>
64
65#ifdef DDB_CTF
66#include <contrib/zlib/zlib.h>
67#endif
68
69#include "linker_if.h"
70
71typedef struct {
72	void		*addr;
73	Elf_Off		size;
74	int		flags;	/* Section flags. */
75	int		sec;	/* Original section number. */
76	char		*name;
77} Elf_progent;
78
79typedef struct {
80	Elf_Rel		*rel;
81	int		nrel;
82	int		sec;
83} Elf_relent;
84
85typedef struct {
86	Elf_Rela	*rela;
87	int		nrela;
88	int		sec;
89} Elf_relaent;
90
91typedef struct elf_file {
92	struct linker_file lf;		/* Common fields */
93
94	int		preloaded;
95	caddr_t		address;	/* Relocation address */
96	vm_object_t	object;		/* VM object to hold file pages */
97	Elf_Shdr	*e_shdr;
98
99	Elf_progent	*progtab;
100	u_int		nprogtab;
101
102	Elf_relaent	*relatab;
103	u_int		nrelatab;
104
105	Elf_relent	*reltab;
106	int		nreltab;
107
108	Elf_Sym		*ddbsymtab;	/* The symbol table we are using */
109	long		ddbsymcnt;	/* Number of symbols */
110	caddr_t		ddbstrtab;	/* String table */
111	long		ddbstrcnt;	/* number of bytes in string table */
112
113	caddr_t		shstrtab;	/* Section name string table */
114	long		shstrcnt;	/* number of bytes in string table */
115
116	caddr_t		ctftab;		/* CTF table */
117	long		ctfcnt;		/* number of bytes in CTF table */
118	caddr_t		ctfoff;		/* CTF offset table */
119	caddr_t		typoff;		/* Type offset table */
120	long		typlen;		/* Number of type entries. */
121
122} *elf_file_t;
123
124#include <kern/kern_ctf.c>
125
126static int	link_elf_link_preload(linker_class_t cls,
127		    const char *, linker_file_t *);
128static int	link_elf_link_preload_finish(linker_file_t);
129static int	link_elf_load_file(linker_class_t, const char *, linker_file_t *);
130static int	link_elf_lookup_symbol(linker_file_t, const char *,
131		    c_linker_sym_t *);
132static int	link_elf_lookup_debug_symbol(linker_file_t, const char *,
133		    c_linker_sym_t *);
134static int	link_elf_lookup_debug_symbol_ctf(linker_file_t lf,
135		    const char *name, c_linker_sym_t *sym, linker_ctf_t *lc);
136static int	link_elf_symbol_values(linker_file_t, c_linker_sym_t,
137		    linker_symval_t *);
138static int	link_elf_debug_symbol_values(linker_file_t, c_linker_sym_t,
139		    linker_symval_t *);
140static int	link_elf_search_symbol(linker_file_t, caddr_t value,
141		    c_linker_sym_t *sym, long *diffp);
142
143static void	link_elf_unload_file(linker_file_t);
144static int	link_elf_lookup_set(linker_file_t, const char *,
145		    void ***, void ***, int *);
146static int	link_elf_each_function_name(linker_file_t,
147		    int (*)(const char *, void *), void *);
148static int	link_elf_each_function_nameval(linker_file_t,
149				linker_function_nameval_callback_t,
150				void *);
151static int	link_elf_reloc_local(linker_file_t, bool);
152static long	link_elf_symtab_get(linker_file_t, const Elf_Sym **);
153static long	link_elf_strtab_get(linker_file_t, caddr_t *);
154#ifdef VIMAGE
155static void	link_elf_propagate_vnets(linker_file_t);
156#endif
157
158static int	elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps,
159		    Elf_Addr *);
160
161static kobj_method_t link_elf_methods[] = {
162	KOBJMETHOD(linker_lookup_symbol,	link_elf_lookup_symbol),
163	KOBJMETHOD(linker_lookup_debug_symbol,	link_elf_lookup_debug_symbol),
164	KOBJMETHOD(linker_lookup_debug_symbol_ctf, link_elf_lookup_debug_symbol_ctf),
165	KOBJMETHOD(linker_symbol_values,	link_elf_symbol_values),
166	KOBJMETHOD(linker_debug_symbol_values,	link_elf_debug_symbol_values),
167	KOBJMETHOD(linker_search_symbol,	link_elf_search_symbol),
168	KOBJMETHOD(linker_unload,		link_elf_unload_file),
169	KOBJMETHOD(linker_load_file,		link_elf_load_file),
170	KOBJMETHOD(linker_link_preload,		link_elf_link_preload),
171	KOBJMETHOD(linker_link_preload_finish,	link_elf_link_preload_finish),
172	KOBJMETHOD(linker_lookup_set,		link_elf_lookup_set),
173	KOBJMETHOD(linker_each_function_name,	link_elf_each_function_name),
174	KOBJMETHOD(linker_each_function_nameval, link_elf_each_function_nameval),
175	KOBJMETHOD(linker_ctf_get,		link_elf_ctf_get),
176	KOBJMETHOD(linker_ctf_lookup_typename,  link_elf_ctf_lookup_typename),
177	KOBJMETHOD(linker_symtab_get, 		link_elf_symtab_get),
178	KOBJMETHOD(linker_strtab_get, 		link_elf_strtab_get),
179#ifdef VIMAGE
180	KOBJMETHOD(linker_propagate_vnets,	link_elf_propagate_vnets),
181#endif
182	KOBJMETHOD_END
183};
184
185static struct linker_class link_elf_class = {
186#if ELF_TARG_CLASS == ELFCLASS32
187	"elf32_obj",
188#else
189	"elf64_obj",
190#endif
191	link_elf_methods, sizeof(struct elf_file)
192};
193
194static bool link_elf_obj_leak_locals = true;
195SYSCTL_BOOL(_debug, OID_AUTO, link_elf_obj_leak_locals,
196    CTLFLAG_RWTUN, &link_elf_obj_leak_locals, 0,
197    "Allow local symbols to participate in global module symbol resolution");
198
199static int	relocate_file(elf_file_t ef);
200static void	elf_obj_cleanup_globals_cache(elf_file_t);
201
202static void
203link_elf_error(const char *filename, const char *s)
204{
205	if (filename == NULL)
206		printf("kldload: %s\n", s);
207	else
208		printf("kldload: %s: %s\n", filename, s);
209}
210
211static void
212link_elf_init(void *arg)
213{
214
215	linker_add_class(&link_elf_class);
216}
217SYSINIT(link_elf_obj, SI_SUB_KLD, SI_ORDER_SECOND, link_elf_init, NULL);
218
219static void
220link_elf_protect_range(elf_file_t ef, vm_offset_t start, vm_offset_t end,
221    vm_prot_t prot)
222{
223	int error __unused;
224
225	KASSERT(start <= end && start >= (vm_offset_t)ef->address &&
226	    end <= round_page((vm_offset_t)ef->address + ef->lf.size),
227	    ("link_elf_protect_range: invalid range %#jx-%#jx",
228	    (uintmax_t)start, (uintmax_t)end));
229
230	if (start == end)
231		return;
232	if (ef->preloaded) {
233#ifdef __amd64__
234		error = pmap_change_prot(start, end - start, prot);
235		KASSERT(error == 0,
236		    ("link_elf_protect_range: pmap_change_prot() returned %d",
237		    error));
238#endif
239		return;
240	}
241	error = vm_map_protect(kernel_map, start, end, prot, 0,
242	    VM_MAP_PROTECT_SET_PROT);
243	KASSERT(error == KERN_SUCCESS,
244	    ("link_elf_protect_range: vm_map_protect() returned %d", error));
245}
246
247/*
248 * Restrict permissions on linker file memory based on section flags.
249 * Sections need not be page-aligned, so overlap within a page is possible.
250 */
251static void
252link_elf_protect(elf_file_t ef)
253{
254	vm_offset_t end, segend, segstart, start;
255	vm_prot_t gapprot, prot, segprot;
256	int i;
257
258	/*
259	 * If the file was preloaded, the last page may contain other preloaded
260	 * data which may need to be writeable.  ELF files are always
261	 * page-aligned, but other preloaded data, such as entropy or CPU
262	 * microcode may be loaded with a smaller alignment.
263	 */
264	gapprot = ef->preloaded ? VM_PROT_RW : VM_PROT_READ;
265
266	start = end = (vm_offset_t)ef->address;
267	prot = VM_PROT_READ;
268	for (i = 0; i < ef->nprogtab; i++) {
269		/*
270		 * VNET and DPCPU sections have their memory allocated by their
271		 * respective subsystems.
272		 */
273		if (ef->progtab[i].name != NULL && (
274#ifdef VIMAGE
275		    strcmp(ef->progtab[i].name, VNET_SETNAME) == 0 ||
276#endif
277		    strcmp(ef->progtab[i].name, DPCPU_SETNAME) == 0))
278			continue;
279
280		segstart = trunc_page((vm_offset_t)ef->progtab[i].addr);
281		segend = round_page((vm_offset_t)ef->progtab[i].addr +
282		    ef->progtab[i].size);
283		segprot = VM_PROT_READ;
284		if ((ef->progtab[i].flags & SHF_WRITE) != 0)
285			segprot |= VM_PROT_WRITE;
286		if ((ef->progtab[i].flags & SHF_EXECINSTR) != 0)
287			segprot |= VM_PROT_EXECUTE;
288
289		if (end <= segstart) {
290			/*
291			 * Case 1: there is no overlap between the previous
292			 * segment and this one.  Apply protections to the
293			 * previous segment, and protect the gap between the
294			 * previous and current segments, if any.
295			 */
296			link_elf_protect_range(ef, start, end, prot);
297			link_elf_protect_range(ef, end, segstart, gapprot);
298
299			start = segstart;
300			end = segend;
301			prot = segprot;
302		} else if (start < segstart && end == segend) {
303			/*
304			 * Case 2: the current segment is a subrange of the
305			 * previous segment.  Apply protections to the
306			 * non-overlapping portion of the previous segment.
307			 */
308			link_elf_protect_range(ef, start, segstart, prot);
309
310			start = segstart;
311			prot |= segprot;
312		} else if (end < segend) {
313			/*
314			 * Case 3: there is partial overlap between the previous
315			 * and current segments.  Apply protections to the
316			 * non-overlapping portion of the previous segment, and
317			 * then the overlap, which must use the union of the two
318			 * segments' protections.
319			 */
320			link_elf_protect_range(ef, start, segstart, prot);
321			link_elf_protect_range(ef, segstart, end,
322			    prot | segprot);
323			start = end;
324			end = segend;
325			prot = segprot;
326		} else {
327			/*
328			 * Case 4: the two segments reside in the same page.
329			 */
330			prot |= segprot;
331		}
332	}
333
334	/*
335	 * Fix up the last unprotected segment and trailing data.
336	 */
337	link_elf_protect_range(ef, start, end, prot);
338	link_elf_protect_range(ef, end,
339	    round_page((vm_offset_t)ef->address + ef->lf.size), gapprot);
340}
341
342static int
343link_elf_link_preload(linker_class_t cls, const char *filename,
344    linker_file_t *result)
345{
346	Elf_Ehdr *hdr;
347	Elf_Shdr *shdr;
348	Elf_Sym *es;
349	void *modptr, *baseptr, *sizeptr;
350	char *type;
351	elf_file_t ef;
352	linker_file_t lf;
353	Elf_Addr off;
354	int error, i, j, pb, ra, rl, shstrindex, symstrindex, symtabindex;
355
356	/* Look to see if we have the file preloaded */
357	modptr = preload_search_by_name(filename);
358	if (modptr == NULL)
359		return ENOENT;
360
361	type = (char *)preload_search_info(modptr, MODINFO_TYPE);
362	baseptr = preload_search_info(modptr, MODINFO_ADDR);
363	sizeptr = preload_search_info(modptr, MODINFO_SIZE);
364	hdr = (Elf_Ehdr *)preload_search_info(modptr, MODINFO_METADATA |
365	    MODINFOMD_ELFHDR);
366	shdr = (Elf_Shdr *)preload_search_info(modptr, MODINFO_METADATA |
367	    MODINFOMD_SHDR);
368	if (type == NULL || (strcmp(type, "elf" __XSTRING(__ELF_WORD_SIZE)
369	    " obj module") != 0 &&
370	    strcmp(type, "elf obj module") != 0)) {
371		return (EFTYPE);
372	}
373	if (baseptr == NULL || sizeptr == NULL || hdr == NULL ||
374	    shdr == NULL)
375		return (EINVAL);
376
377	lf = linker_make_file(filename, &link_elf_class);
378	if (lf == NULL)
379		return (ENOMEM);
380
381	ef = (elf_file_t)lf;
382	ef->preloaded = 1;
383	ef->address = *(caddr_t *)baseptr;
384	lf->address = *(caddr_t *)baseptr;
385	lf->size = *(size_t *)sizeptr;
386
387	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
388	    hdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
389	    hdr->e_ident[EI_VERSION] != EV_CURRENT ||
390	    hdr->e_version != EV_CURRENT ||
391	    hdr->e_type != ET_REL ||
392	    hdr->e_machine != ELF_TARG_MACH) {
393		error = EFTYPE;
394		goto out;
395	}
396	ef->e_shdr = shdr;
397
398	/* Scan the section header for information and table sizing. */
399	symtabindex = -1;
400	symstrindex = -1;
401	for (i = 0; i < hdr->e_shnum; i++) {
402		switch (shdr[i].sh_type) {
403		case SHT_PROGBITS:
404		case SHT_NOBITS:
405#ifdef __amd64__
406		case SHT_X86_64_UNWIND:
407#endif
408		case SHT_INIT_ARRAY:
409		case SHT_FINI_ARRAY:
410			/* Ignore sections not loaded by the loader. */
411			if (shdr[i].sh_addr == 0)
412				break;
413			ef->nprogtab++;
414			break;
415		case SHT_SYMTAB:
416			symtabindex = i;
417			symstrindex = shdr[i].sh_link;
418			break;
419		case SHT_REL:
420			/*
421			 * Ignore relocation tables for sections not
422			 * loaded by the loader.
423			 */
424			if (shdr[shdr[i].sh_info].sh_addr == 0)
425				break;
426			ef->nreltab++;
427			break;
428		case SHT_RELA:
429			if (shdr[shdr[i].sh_info].sh_addr == 0)
430				break;
431			ef->nrelatab++;
432			break;
433		}
434	}
435
436	shstrindex = hdr->e_shstrndx;
437	if (ef->nprogtab == 0 || symstrindex < 0 ||
438	    symstrindex >= hdr->e_shnum ||
439	    shdr[symstrindex].sh_type != SHT_STRTAB || shstrindex == 0 ||
440	    shstrindex >= hdr->e_shnum ||
441	    shdr[shstrindex].sh_type != SHT_STRTAB) {
442		printf("%s: bad/missing section headers\n", filename);
443		error = ENOEXEC;
444		goto out;
445	}
446
447	/* Allocate space for tracking the load chunks */
448	if (ef->nprogtab != 0)
449		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
450		    M_LINKER, M_WAITOK | M_ZERO);
451	if (ef->nreltab != 0)
452		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
453		    M_LINKER, M_WAITOK | M_ZERO);
454	if (ef->nrelatab != 0)
455		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
456		    M_LINKER, M_WAITOK | M_ZERO);
457	if ((ef->nprogtab != 0 && ef->progtab == NULL) ||
458	    (ef->nreltab != 0 && ef->reltab == NULL) ||
459	    (ef->nrelatab != 0 && ef->relatab == NULL)) {
460		error = ENOMEM;
461		goto out;
462	}
463
464	/* XXX, relocate the sh_addr fields saved by the loader. */
465	off = 0;
466	for (i = 0; i < hdr->e_shnum; i++) {
467		if (shdr[i].sh_addr != 0 && (off == 0 || shdr[i].sh_addr < off))
468			off = shdr[i].sh_addr;
469	}
470	for (i = 0; i < hdr->e_shnum; i++) {
471		if (shdr[i].sh_addr != 0)
472			shdr[i].sh_addr = shdr[i].sh_addr - off +
473			    (Elf_Addr)ef->address;
474	}
475
476	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
477	ef->ddbsymtab = (Elf_Sym *)shdr[symtabindex].sh_addr;
478	ef->ddbstrcnt = shdr[symstrindex].sh_size;
479	ef->ddbstrtab = (char *)shdr[symstrindex].sh_addr;
480	ef->shstrcnt = shdr[shstrindex].sh_size;
481	ef->shstrtab = (char *)shdr[shstrindex].sh_addr;
482
483	/* Now fill out progtab and the relocation tables. */
484	pb = 0;
485	rl = 0;
486	ra = 0;
487	for (i = 0; i < hdr->e_shnum; i++) {
488		switch (shdr[i].sh_type) {
489		case SHT_PROGBITS:
490		case SHT_NOBITS:
491#ifdef __amd64__
492		case SHT_X86_64_UNWIND:
493#endif
494		case SHT_INIT_ARRAY:
495		case SHT_FINI_ARRAY:
496			if (shdr[i].sh_addr == 0)
497				break;
498			ef->progtab[pb].addr = (void *)shdr[i].sh_addr;
499			if (shdr[i].sh_type == SHT_PROGBITS)
500				ef->progtab[pb].name = "<<PROGBITS>>";
501#ifdef __amd64__
502			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
503				ef->progtab[pb].name = "<<UNWIND>>";
504#endif
505			else if (shdr[i].sh_type == SHT_INIT_ARRAY)
506				ef->progtab[pb].name = "<<INIT_ARRAY>>";
507			else if (shdr[i].sh_type == SHT_FINI_ARRAY)
508				ef->progtab[pb].name = "<<FINI_ARRAY>>";
509			else
510				ef->progtab[pb].name = "<<NOBITS>>";
511			ef->progtab[pb].size = shdr[i].sh_size;
512			ef->progtab[pb].flags = shdr[i].sh_flags;
513			ef->progtab[pb].sec = i;
514			if (ef->shstrtab && shdr[i].sh_name != 0)
515				ef->progtab[pb].name =
516				    ef->shstrtab + shdr[i].sh_name;
517			if (ef->progtab[pb].name != NULL &&
518			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
519				void *dpcpu;
520
521				dpcpu = dpcpu_alloc(shdr[i].sh_size);
522				if (dpcpu == NULL) {
523					printf("%s: pcpu module space is out "
524					    "of space; cannot allocate %#jx "
525					    "for %s\n", __func__,
526					    (uintmax_t)shdr[i].sh_size,
527					    filename);
528					error = ENOSPC;
529					goto out;
530				}
531				memcpy(dpcpu, ef->progtab[pb].addr,
532				    ef->progtab[pb].size);
533				dpcpu_copy(dpcpu, shdr[i].sh_size);
534				ef->progtab[pb].addr = dpcpu;
535#ifdef VIMAGE
536			} else if (ef->progtab[pb].name != NULL &&
537			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
538				void *vnet_data;
539
540				vnet_data = vnet_data_alloc(shdr[i].sh_size);
541				if (vnet_data == NULL) {
542					printf("%s: vnet module space is out "
543					    "of space; cannot allocate %#jx "
544					    "for %s\n", __func__,
545					    (uintmax_t)shdr[i].sh_size,
546					    filename);
547					error = ENOSPC;
548					goto out;
549				}
550				memcpy(vnet_data, ef->progtab[pb].addr,
551				    ef->progtab[pb].size);
552				ef->progtab[pb].addr = vnet_data;
553				vnet_save_init(ef->progtab[pb].addr,
554				    ef->progtab[pb].size);
555#endif
556			} else if ((ef->progtab[pb].name != NULL &&
557			    strcmp(ef->progtab[pb].name, ".ctors") == 0) ||
558			    shdr[i].sh_type == SHT_INIT_ARRAY) {
559				if (lf->ctors_addr != 0) {
560					printf(
561				    "%s: multiple ctor sections in %s\n",
562					    __func__, filename);
563				} else {
564					lf->ctors_addr = ef->progtab[pb].addr;
565					lf->ctors_size = shdr[i].sh_size;
566				}
567			} else if ((ef->progtab[pb].name != NULL &&
568			    strcmp(ef->progtab[pb].name, ".dtors") == 0) ||
569			    shdr[i].sh_type == SHT_FINI_ARRAY) {
570				if (lf->dtors_addr != 0) {
571					printf(
572				    "%s: multiple dtor sections in %s\n",
573					    __func__, filename);
574				} else {
575					lf->dtors_addr = ef->progtab[pb].addr;
576					lf->dtors_size = shdr[i].sh_size;
577				}
578			}
579
580			/* Update all symbol values with the offset. */
581			for (j = 0; j < ef->ddbsymcnt; j++) {
582				es = &ef->ddbsymtab[j];
583				if (es->st_shndx != i)
584					continue;
585				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
586			}
587			pb++;
588			break;
589		case SHT_REL:
590			if (shdr[shdr[i].sh_info].sh_addr == 0)
591				break;
592			ef->reltab[rl].rel = (Elf_Rel *)shdr[i].sh_addr;
593			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
594			ef->reltab[rl].sec = shdr[i].sh_info;
595			rl++;
596			break;
597		case SHT_RELA:
598			if (shdr[shdr[i].sh_info].sh_addr == 0)
599				break;
600			ef->relatab[ra].rela = (Elf_Rela *)shdr[i].sh_addr;
601			ef->relatab[ra].nrela =
602			    shdr[i].sh_size / sizeof(Elf_Rela);
603			ef->relatab[ra].sec = shdr[i].sh_info;
604			ra++;
605			break;
606		}
607	}
608	if (pb != ef->nprogtab) {
609		printf("%s: lost progbits\n", filename);
610		error = ENOEXEC;
611		goto out;
612	}
613	if (rl != ef->nreltab) {
614		printf("%s: lost reltab\n", filename);
615		error = ENOEXEC;
616		goto out;
617	}
618	if (ra != ef->nrelatab) {
619		printf("%s: lost relatab\n", filename);
620		error = ENOEXEC;
621		goto out;
622	}
623
624	/*
625	 * The file needs to be writeable and executable while applying
626	 * relocations.  Mapping protections are applied once relocation
627	 * processing is complete.
628	 */
629	link_elf_protect_range(ef, (vm_offset_t)ef->address,
630	    round_page((vm_offset_t)ef->address + ef->lf.size), VM_PROT_ALL);
631
632	/* Local intra-module relocations */
633	error = link_elf_reloc_local(lf, false);
634	if (error != 0)
635		goto out;
636	*result = lf;
637	return (0);
638
639out:
640	/* preload not done this way */
641	linker_file_unload(lf, LINKER_UNLOAD_FORCE);
642	return (error);
643}
644
645static void
646link_elf_invoke_cbs(caddr_t addr, size_t size)
647{
648	void (**ctor)(void);
649	size_t i, cnt;
650
651	if (addr == NULL || size == 0)
652		return;
653	cnt = size / sizeof(*ctor);
654	ctor = (void *)addr;
655	for (i = 0; i < cnt; i++) {
656		if (ctor[i] != NULL)
657			(*ctor[i])();
658	}
659}
660
661static void
662link_elf_invoke_ctors(linker_file_t lf)
663{
664	KASSERT(lf->ctors_invoked == LF_NONE,
665	    ("%s: file %s ctor state %d",
666	    __func__, lf->filename, lf->ctors_invoked));
667
668	link_elf_invoke_cbs(lf->ctors_addr, lf->ctors_size);
669	lf->ctors_invoked = LF_CTORS;
670}
671
672static void
673link_elf_invoke_dtors(linker_file_t lf)
674{
675	KASSERT(lf->ctors_invoked != LF_DTORS,
676	    ("%s: file %s ctor state %d",
677	    __func__, lf->filename, lf->ctors_invoked));
678
679	if (lf->ctors_invoked == LF_CTORS) {
680		link_elf_invoke_cbs(lf->dtors_addr, lf->dtors_size);
681		lf->ctors_invoked = LF_DTORS;
682	}
683}
684
685static int
686link_elf_link_preload_finish(linker_file_t lf)
687{
688	elf_file_t ef;
689	int error;
690
691	ef = (elf_file_t)lf;
692	error = relocate_file(ef);
693	if (error)
694		return (error);
695
696	/* Notify MD code that a module is being loaded. */
697	error = elf_cpu_load_file(lf);
698	if (error)
699		return (error);
700
701#if defined(__i386__) || defined(__amd64__)
702	/* Now ifuncs. */
703	error = link_elf_reloc_local(lf, true);
704	if (error != 0)
705		return (error);
706#endif
707
708	/* Apply protections now that relocation processing is complete. */
709	link_elf_protect(ef);
710
711	link_elf_invoke_ctors(lf);
712	return (0);
713}
714
715static int
716link_elf_load_file(linker_class_t cls, const char *filename,
717    linker_file_t *result)
718{
719	struct nameidata *nd;
720	struct thread *td = curthread;	/* XXX */
721	Elf_Ehdr *hdr;
722	Elf_Shdr *shdr;
723	Elf_Sym *es;
724	int nbytes, i, j;
725	vm_offset_t mapbase;
726	size_t mapsize;
727	int error = 0;
728	ssize_t resid;
729	int flags;
730	elf_file_t ef;
731	linker_file_t lf;
732	int symtabindex;
733	int symstrindex;
734	int shstrindex;
735	int nsym;
736	int pb, rl, ra;
737	int alignmask;
738
739	shdr = NULL;
740	lf = NULL;
741	mapsize = 0;
742	hdr = NULL;
743
744	nd = malloc(sizeof(struct nameidata), M_TEMP, M_WAITOK);
745	NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, filename);
746	flags = FREAD;
747	error = vn_open(nd, &flags, 0, NULL);
748	if (error) {
749		free(nd, M_TEMP);
750		return error;
751	}
752	NDFREE_PNBUF(nd);
753	if (nd->ni_vp->v_type != VREG) {
754		error = ENOEXEC;
755		goto out;
756	}
757#ifdef MAC
758	error = mac_kld_check_load(td->td_ucred, nd->ni_vp);
759	if (error) {
760		goto out;
761	}
762#endif
763
764	/* Read the elf header from the file. */
765	hdr = malloc(sizeof(*hdr), M_LINKER, M_WAITOK);
766	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)hdr, sizeof(*hdr), 0,
767	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
768	    &resid, td);
769	if (error)
770		goto out;
771	if (resid != 0){
772		error = ENOEXEC;
773		goto out;
774	}
775
776	if (!IS_ELF(*hdr)) {
777		error = ENOEXEC;
778		goto out;
779	}
780
781	if (hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS
782	    || hdr->e_ident[EI_DATA] != ELF_TARG_DATA) {
783		link_elf_error(filename, "Unsupported file layout");
784		error = ENOEXEC;
785		goto out;
786	}
787	if (hdr->e_ident[EI_VERSION] != EV_CURRENT
788	    || hdr->e_version != EV_CURRENT) {
789		link_elf_error(filename, "Unsupported file version");
790		error = ENOEXEC;
791		goto out;
792	}
793	if (hdr->e_type != ET_REL) {
794		error = ENOSYS;
795		goto out;
796	}
797	if (hdr->e_machine != ELF_TARG_MACH) {
798		link_elf_error(filename, "Unsupported machine");
799		error = ENOEXEC;
800		goto out;
801	}
802
803	lf = linker_make_file(filename, &link_elf_class);
804	if (!lf) {
805		error = ENOMEM;
806		goto out;
807	}
808	ef = (elf_file_t) lf;
809	ef->nprogtab = 0;
810	ef->e_shdr = 0;
811	ef->nreltab = 0;
812	ef->nrelatab = 0;
813
814	/* Allocate and read in the section header */
815	nbytes = hdr->e_shnum * hdr->e_shentsize;
816	if (nbytes == 0 || hdr->e_shoff == 0 ||
817	    hdr->e_shentsize != sizeof(Elf_Shdr)) {
818		error = ENOEXEC;
819		goto out;
820	}
821	shdr = malloc(nbytes, M_LINKER, M_WAITOK);
822	ef->e_shdr = shdr;
823	error = vn_rdwr(UIO_READ, nd->ni_vp, (caddr_t)shdr, nbytes,
824	    hdr->e_shoff, UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
825	    NOCRED, &resid, td);
826	if (error)
827		goto out;
828	if (resid) {
829		error = ENOEXEC;
830		goto out;
831	}
832
833	/* Scan the section header for information and table sizing. */
834	nsym = 0;
835	symtabindex = -1;
836	symstrindex = -1;
837	for (i = 0; i < hdr->e_shnum; i++) {
838		if (shdr[i].sh_size == 0)
839			continue;
840		switch (shdr[i].sh_type) {
841		case SHT_PROGBITS:
842		case SHT_NOBITS:
843#ifdef __amd64__
844		case SHT_X86_64_UNWIND:
845#endif
846		case SHT_INIT_ARRAY:
847		case SHT_FINI_ARRAY:
848			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
849				break;
850			ef->nprogtab++;
851			break;
852		case SHT_SYMTAB:
853			nsym++;
854			symtabindex = i;
855			symstrindex = shdr[i].sh_link;
856			break;
857		case SHT_REL:
858			/*
859			 * Ignore relocation tables for unallocated
860			 * sections.
861			 */
862			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
863				break;
864			ef->nreltab++;
865			break;
866		case SHT_RELA:
867			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
868				break;
869			ef->nrelatab++;
870			break;
871		case SHT_STRTAB:
872			break;
873		}
874	}
875	if (ef->nprogtab == 0) {
876		link_elf_error(filename, "file has no contents");
877		error = ENOEXEC;
878		goto out;
879	}
880	if (nsym != 1) {
881		/* Only allow one symbol table for now */
882		link_elf_error(filename,
883		    "file must have exactly one symbol table");
884		error = ENOEXEC;
885		goto out;
886	}
887	if (symstrindex < 0 || symstrindex > hdr->e_shnum ||
888	    shdr[symstrindex].sh_type != SHT_STRTAB) {
889		link_elf_error(filename, "file has invalid symbol strings");
890		error = ENOEXEC;
891		goto out;
892	}
893
894	/* Allocate space for tracking the load chunks */
895	if (ef->nprogtab != 0)
896		ef->progtab = malloc(ef->nprogtab * sizeof(*ef->progtab),
897		    M_LINKER, M_WAITOK | M_ZERO);
898	if (ef->nreltab != 0)
899		ef->reltab = malloc(ef->nreltab * sizeof(*ef->reltab),
900		    M_LINKER, M_WAITOK | M_ZERO);
901	if (ef->nrelatab != 0)
902		ef->relatab = malloc(ef->nrelatab * sizeof(*ef->relatab),
903		    M_LINKER, M_WAITOK | M_ZERO);
904
905	if (symtabindex == -1) {
906		link_elf_error(filename, "lost symbol table index");
907		error = ENOEXEC;
908		goto out;
909	}
910	/* Allocate space for and load the symbol table */
911	ef->ddbsymcnt = shdr[symtabindex].sh_size / sizeof(Elf_Sym);
912	ef->ddbsymtab = malloc(shdr[symtabindex].sh_size, M_LINKER, M_WAITOK);
913	error = vn_rdwr(UIO_READ, nd->ni_vp, (void *)ef->ddbsymtab,
914	    shdr[symtabindex].sh_size, shdr[symtabindex].sh_offset,
915	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
916	    &resid, td);
917	if (error)
918		goto out;
919	if (resid != 0){
920		error = EINVAL;
921		goto out;
922	}
923
924	/* Allocate space for and load the symbol strings */
925	ef->ddbstrcnt = shdr[symstrindex].sh_size;
926	ef->ddbstrtab = malloc(shdr[symstrindex].sh_size, M_LINKER, M_WAITOK);
927	error = vn_rdwr(UIO_READ, nd->ni_vp, ef->ddbstrtab,
928	    shdr[symstrindex].sh_size, shdr[symstrindex].sh_offset,
929	    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
930	    &resid, td);
931	if (error)
932		goto out;
933	if (resid != 0){
934		error = EINVAL;
935		goto out;
936	}
937
938	/* Do we have a string table for the section names?  */
939	shstrindex = -1;
940	if (hdr->e_shstrndx != 0 &&
941	    shdr[hdr->e_shstrndx].sh_type == SHT_STRTAB) {
942		shstrindex = hdr->e_shstrndx;
943		ef->shstrcnt = shdr[shstrindex].sh_size;
944		ef->shstrtab = malloc(shdr[shstrindex].sh_size, M_LINKER,
945		    M_WAITOK);
946		error = vn_rdwr(UIO_READ, nd->ni_vp, ef->shstrtab,
947		    shdr[shstrindex].sh_size, shdr[shstrindex].sh_offset,
948		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
949		    &resid, td);
950		if (error)
951			goto out;
952		if (resid != 0){
953			error = EINVAL;
954			goto out;
955		}
956	}
957
958	/* Size up code/data(progbits) and bss(nobits). */
959	alignmask = 0;
960	for (i = 0; i < hdr->e_shnum; i++) {
961		if (shdr[i].sh_size == 0)
962			continue;
963		switch (shdr[i].sh_type) {
964		case SHT_PROGBITS:
965		case SHT_NOBITS:
966#ifdef __amd64__
967		case SHT_X86_64_UNWIND:
968#endif
969		case SHT_INIT_ARRAY:
970		case SHT_FINI_ARRAY:
971			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
972				break;
973			alignmask = shdr[i].sh_addralign - 1;
974			mapsize += alignmask;
975			mapsize &= ~alignmask;
976			mapsize += shdr[i].sh_size;
977			break;
978		}
979	}
980
981	/*
982	 * We know how much space we need for the text/data/bss/etc.
983	 * This stuff needs to be in a single chunk so that profiling etc
984	 * can get the bounds and gdb can associate offsets with modules
985	 */
986	ef->object = vm_pager_allocate(OBJT_PHYS, NULL, round_page(mapsize),
987	    VM_PROT_ALL, 0, thread0.td_ucred);
988	if (ef->object == NULL) {
989		error = ENOMEM;
990		goto out;
991	}
992#if VM_NRESERVLEVEL > 0
993	vm_object_color(ef->object, 0);
994#endif
995
996	/*
997	 * In order to satisfy amd64's architectural requirements on the
998	 * location of code and data in the kernel's address space, request a
999	 * mapping that is above the kernel.
1000	 *
1001	 * Protections will be restricted once relocations are applied.
1002	 */
1003#ifdef __amd64__
1004	mapbase = KERNBASE;
1005#else
1006	mapbase = VM_MIN_KERNEL_ADDRESS;
1007#endif
1008	error = vm_map_find(kernel_map, ef->object, 0, &mapbase,
1009	    round_page(mapsize), 0, VMFS_OPTIMAL_SPACE, VM_PROT_ALL,
1010	    VM_PROT_ALL, 0);
1011	if (error != KERN_SUCCESS) {
1012		vm_object_deallocate(ef->object);
1013		ef->object = NULL;
1014		error = ENOMEM;
1015		goto out;
1016	}
1017
1018	/* Wire the pages */
1019	error = vm_map_wire(kernel_map, mapbase,
1020	    mapbase + round_page(mapsize),
1021	    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
1022	if (error != KERN_SUCCESS) {
1023		error = ENOMEM;
1024		goto out;
1025	}
1026
1027	/* Inform the kld system about the situation */
1028	lf->address = ef->address = (caddr_t)mapbase;
1029	lf->size = mapsize;
1030
1031	/*
1032	 * Now load code/data(progbits), zero bss(nobits), allocate space for
1033	 * and load relocs
1034	 */
1035	pb = 0;
1036	rl = 0;
1037	ra = 0;
1038	alignmask = 0;
1039	for (i = 0; i < hdr->e_shnum; i++) {
1040		if (shdr[i].sh_size == 0)
1041			continue;
1042		switch (shdr[i].sh_type) {
1043		case SHT_PROGBITS:
1044		case SHT_NOBITS:
1045#ifdef __amd64__
1046		case SHT_X86_64_UNWIND:
1047#endif
1048		case SHT_INIT_ARRAY:
1049		case SHT_FINI_ARRAY:
1050			if ((shdr[i].sh_flags & SHF_ALLOC) == 0)
1051				break;
1052			alignmask = shdr[i].sh_addralign - 1;
1053			mapbase += alignmask;
1054			mapbase &= ~alignmask;
1055			if (ef->shstrtab != NULL && shdr[i].sh_name != 0) {
1056				ef->progtab[pb].name =
1057				    ef->shstrtab + shdr[i].sh_name;
1058				if (!strcmp(ef->progtab[pb].name, ".ctors") ||
1059				    shdr[i].sh_type == SHT_INIT_ARRAY) {
1060					if (lf->ctors_addr != 0) {
1061						printf(
1062				    "%s: multiple ctor sections in %s\n",
1063						    __func__, filename);
1064					} else {
1065						lf->ctors_addr =
1066						    (caddr_t)mapbase;
1067						lf->ctors_size =
1068						    shdr[i].sh_size;
1069					}
1070				} else if (!strcmp(ef->progtab[pb].name,
1071				    ".dtors") ||
1072				    shdr[i].sh_type == SHT_FINI_ARRAY) {
1073					if (lf->dtors_addr != 0) {
1074						printf(
1075				    "%s: multiple dtor sections in %s\n",
1076						    __func__, filename);
1077					} else {
1078						lf->dtors_addr =
1079						    (caddr_t)mapbase;
1080						lf->dtors_size =
1081						    shdr[i].sh_size;
1082					}
1083				}
1084			} else if (shdr[i].sh_type == SHT_PROGBITS)
1085				ef->progtab[pb].name = "<<PROGBITS>>";
1086#ifdef __amd64__
1087			else if (shdr[i].sh_type == SHT_X86_64_UNWIND)
1088				ef->progtab[pb].name = "<<UNWIND>>";
1089#endif
1090			else
1091				ef->progtab[pb].name = "<<NOBITS>>";
1092			if (ef->progtab[pb].name != NULL &&
1093			    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME)) {
1094				ef->progtab[pb].addr =
1095				    dpcpu_alloc(shdr[i].sh_size);
1096				if (ef->progtab[pb].addr == NULL) {
1097					printf("%s: pcpu module space is out "
1098					    "of space; cannot allocate %#jx "
1099					    "for %s\n", __func__,
1100					    (uintmax_t)shdr[i].sh_size,
1101					    filename);
1102				}
1103			}
1104#ifdef VIMAGE
1105			else if (ef->progtab[pb].name != NULL &&
1106			    !strcmp(ef->progtab[pb].name, VNET_SETNAME)) {
1107				ef->progtab[pb].addr =
1108				    vnet_data_alloc(shdr[i].sh_size);
1109				if (ef->progtab[pb].addr == NULL) {
1110					printf("%s: vnet module space is out "
1111					    "of space; cannot allocate %#jx "
1112					    "for %s\n", __func__,
1113					    (uintmax_t)shdr[i].sh_size,
1114					    filename);
1115				}
1116			}
1117#endif
1118			else
1119				ef->progtab[pb].addr =
1120				    (void *)(uintptr_t)mapbase;
1121			if (ef->progtab[pb].addr == NULL) {
1122				error = ENOSPC;
1123				goto out;
1124			}
1125			ef->progtab[pb].size = shdr[i].sh_size;
1126			ef->progtab[pb].flags = shdr[i].sh_flags;
1127			ef->progtab[pb].sec = i;
1128			if (shdr[i].sh_type == SHT_PROGBITS
1129#ifdef __amd64__
1130			    || shdr[i].sh_type == SHT_X86_64_UNWIND
1131#endif
1132			    ) {
1133				error = vn_rdwr(UIO_READ, nd->ni_vp,
1134				    ef->progtab[pb].addr,
1135				    shdr[i].sh_size, shdr[i].sh_offset,
1136				    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
1137				    NOCRED, &resid, td);
1138				if (error)
1139					goto out;
1140				if (resid != 0){
1141					error = EINVAL;
1142					goto out;
1143				}
1144				/* Initialize the per-cpu area. */
1145				if (ef->progtab[pb].addr != (void *)mapbase &&
1146				    !strcmp(ef->progtab[pb].name, DPCPU_SETNAME))
1147					dpcpu_copy(ef->progtab[pb].addr,
1148					    shdr[i].sh_size);
1149			} else
1150				bzero(ef->progtab[pb].addr, shdr[i].sh_size);
1151
1152#ifdef VIMAGE
1153			if (ef->progtab[pb].addr != (void *)mapbase &&
1154			    strcmp(ef->progtab[pb].name, VNET_SETNAME) == 0)
1155				vnet_save_init(ef->progtab[pb].addr,
1156				    ef->progtab[pb].size);
1157#endif
1158			/* Update all symbol values with the offset. */
1159			for (j = 0; j < ef->ddbsymcnt; j++) {
1160				es = &ef->ddbsymtab[j];
1161				if (es->st_shndx != i)
1162					continue;
1163				es->st_value += (Elf_Addr)ef->progtab[pb].addr;
1164			}
1165			mapbase += shdr[i].sh_size;
1166			pb++;
1167			break;
1168		case SHT_REL:
1169			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1170				break;
1171			ef->reltab[rl].rel = malloc(shdr[i].sh_size, M_LINKER,
1172			    M_WAITOK);
1173			ef->reltab[rl].nrel = shdr[i].sh_size / sizeof(Elf_Rel);
1174			ef->reltab[rl].sec = shdr[i].sh_info;
1175			error = vn_rdwr(UIO_READ, nd->ni_vp,
1176			    (void *)ef->reltab[rl].rel,
1177			    shdr[i].sh_size, shdr[i].sh_offset,
1178			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1179			    &resid, td);
1180			if (error)
1181				goto out;
1182			if (resid != 0){
1183				error = EINVAL;
1184				goto out;
1185			}
1186			rl++;
1187			break;
1188		case SHT_RELA:
1189			if ((shdr[shdr[i].sh_info].sh_flags & SHF_ALLOC) == 0)
1190				break;
1191			ef->relatab[ra].rela = malloc(shdr[i].sh_size, M_LINKER,
1192			    M_WAITOK);
1193			ef->relatab[ra].nrela =
1194			    shdr[i].sh_size / sizeof(Elf_Rela);
1195			ef->relatab[ra].sec = shdr[i].sh_info;
1196			error = vn_rdwr(UIO_READ, nd->ni_vp,
1197			    (void *)ef->relatab[ra].rela,
1198			    shdr[i].sh_size, shdr[i].sh_offset,
1199			    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, NOCRED,
1200			    &resid, td);
1201			if (error)
1202				goto out;
1203			if (resid != 0){
1204				error = EINVAL;
1205				goto out;
1206			}
1207			ra++;
1208			break;
1209		}
1210	}
1211	if (pb != ef->nprogtab) {
1212		link_elf_error(filename, "lost progbits");
1213		error = ENOEXEC;
1214		goto out;
1215	}
1216	if (rl != ef->nreltab) {
1217		link_elf_error(filename, "lost reltab");
1218		error = ENOEXEC;
1219		goto out;
1220	}
1221	if (ra != ef->nrelatab) {
1222		link_elf_error(filename, "lost relatab");
1223		error = ENOEXEC;
1224		goto out;
1225	}
1226	if (mapbase != (vm_offset_t)ef->address + mapsize) {
1227		printf(
1228		    "%s: mapbase 0x%lx != address %p + mapsize 0x%lx (0x%lx)\n",
1229		    filename != NULL ? filename : "<none>",
1230		    (u_long)mapbase, ef->address, (u_long)mapsize,
1231		    (u_long)(vm_offset_t)ef->address + mapsize);
1232		error = ENOMEM;
1233		goto out;
1234	}
1235
1236	/* Local intra-module relocations */
1237	error = link_elf_reloc_local(lf, false);
1238	if (error != 0)
1239		goto out;
1240
1241	/* Pull in dependencies */
1242	VOP_UNLOCK(nd->ni_vp);
1243	error = linker_load_dependencies(lf);
1244	vn_lock(nd->ni_vp, LK_EXCLUSIVE | LK_RETRY);
1245	if (error)
1246		goto out;
1247
1248	/* External relocations */
1249	error = relocate_file(ef);
1250	if (error)
1251		goto out;
1252
1253	/* Notify MD code that a module is being loaded. */
1254	error = elf_cpu_load_file(lf);
1255	if (error)
1256		goto out;
1257
1258#if defined(__i386__) || defined(__amd64__)
1259	/* Now ifuncs. */
1260	error = link_elf_reloc_local(lf, true);
1261	if (error != 0)
1262		goto out;
1263#endif
1264
1265	link_elf_protect(ef);
1266	link_elf_invoke_ctors(lf);
1267	*result = lf;
1268
1269out:
1270	VOP_UNLOCK(nd->ni_vp);
1271	vn_close(nd->ni_vp, FREAD, td->td_ucred, td);
1272	free(nd, M_TEMP);
1273	if (error && lf)
1274		linker_file_unload(lf, LINKER_UNLOAD_FORCE);
1275	free(hdr, M_LINKER);
1276
1277	return error;
1278}
1279
1280static void
1281link_elf_unload_file(linker_file_t file)
1282{
1283	elf_file_t ef = (elf_file_t) file;
1284	u_int i;
1285
1286	link_elf_invoke_dtors(file);
1287
1288	/* Notify MD code that a module is being unloaded. */
1289	elf_cpu_unload_file(file);
1290
1291	if (ef->progtab) {
1292		for (i = 0; i < ef->nprogtab; i++) {
1293			if (ef->progtab[i].size == 0)
1294				continue;
1295			if (ef->progtab[i].name == NULL)
1296				continue;
1297			if (!strcmp(ef->progtab[i].name, DPCPU_SETNAME))
1298				dpcpu_free(ef->progtab[i].addr,
1299				    ef->progtab[i].size);
1300#ifdef VIMAGE
1301			else if (!strcmp(ef->progtab[i].name, VNET_SETNAME))
1302				vnet_data_free(ef->progtab[i].addr,
1303				    ef->progtab[i].size);
1304#endif
1305		}
1306	}
1307	if (ef->preloaded) {
1308		free(ef->reltab, M_LINKER);
1309		free(ef->relatab, M_LINKER);
1310		free(ef->progtab, M_LINKER);
1311		free(ef->ctftab, M_LINKER);
1312		free(ef->ctfoff, M_LINKER);
1313		free(ef->typoff, M_LINKER);
1314		if (file->pathname != NULL)
1315			preload_delete_name(file->pathname);
1316		return;
1317	}
1318
1319	for (i = 0; i < ef->nreltab; i++)
1320		free(ef->reltab[i].rel, M_LINKER);
1321	for (i = 0; i < ef->nrelatab; i++)
1322		free(ef->relatab[i].rela, M_LINKER);
1323	free(ef->reltab, M_LINKER);
1324	free(ef->relatab, M_LINKER);
1325	free(ef->progtab, M_LINKER);
1326
1327	if (ef->object != NULL)
1328		vm_map_remove(kernel_map, (vm_offset_t)ef->address,
1329		    (vm_offset_t)ef->address + ptoa(ef->object->size));
1330	free(ef->e_shdr, M_LINKER);
1331	free(ef->ddbsymtab, M_LINKER);
1332	free(ef->ddbstrtab, M_LINKER);
1333	free(ef->shstrtab, M_LINKER);
1334	free(ef->ctftab, M_LINKER);
1335	free(ef->ctfoff, M_LINKER);
1336	free(ef->typoff, M_LINKER);
1337}
1338
1339static const char *
1340symbol_name(elf_file_t ef, Elf_Size r_info)
1341{
1342	const Elf_Sym *ref;
1343
1344	if (ELF_R_SYM(r_info)) {
1345		ref = ef->ddbsymtab + ELF_R_SYM(r_info);
1346		return ef->ddbstrtab + ref->st_name;
1347	} else
1348		return NULL;
1349}
1350
1351static Elf_Addr
1352findbase(elf_file_t ef, int sec)
1353{
1354	int i;
1355	Elf_Addr base = 0;
1356
1357	for (i = 0; i < ef->nprogtab; i++) {
1358		if (sec == ef->progtab[i].sec) {
1359			base = (Elf_Addr)ef->progtab[i].addr;
1360			break;
1361		}
1362	}
1363	return base;
1364}
1365
1366static int
1367relocate_file1(elf_file_t ef, bool ifuncs)
1368{
1369	const Elf_Rel *rellim;
1370	const Elf_Rel *rel;
1371	const Elf_Rela *relalim;
1372	const Elf_Rela *rela;
1373	const char *symname;
1374	const Elf_Sym *sym;
1375	int i;
1376	Elf_Size symidx;
1377	Elf_Addr base;
1378
1379	/* Perform relocations without addend if there are any: */
1380	for (i = 0; i < ef->nreltab; i++) {
1381		rel = ef->reltab[i].rel;
1382		if (rel == NULL) {
1383			link_elf_error(ef->lf.filename, "lost a reltab!");
1384			return (ENOEXEC);
1385		}
1386		rellim = rel + ef->reltab[i].nrel;
1387		base = findbase(ef, ef->reltab[i].sec);
1388		if (base == 0) {
1389			link_elf_error(ef->lf.filename, "lost base for reltab");
1390			return (ENOEXEC);
1391		}
1392		for ( ; rel < rellim; rel++) {
1393			symidx = ELF_R_SYM(rel->r_info);
1394			if (symidx >= ef->ddbsymcnt)
1395				continue;
1396			sym = ef->ddbsymtab + symidx;
1397			/* Local relocs are already done */
1398			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1399				continue;
1400			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1401			    elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1402				continue;
1403			if (elf_reloc(&ef->lf, base, rel, ELF_RELOC_REL,
1404			    elf_obj_lookup)) {
1405				symname = symbol_name(ef, rel->r_info);
1406				printf("link_elf_obj: symbol %s undefined\n",
1407				    symname);
1408				return (ENOENT);
1409			}
1410		}
1411	}
1412
1413	/* Perform relocations with addend if there are any: */
1414	for (i = 0; i < ef->nrelatab; i++) {
1415		rela = ef->relatab[i].rela;
1416		if (rela == NULL) {
1417			link_elf_error(ef->lf.filename, "lost a relatab!");
1418			return (ENOEXEC);
1419		}
1420		relalim = rela + ef->relatab[i].nrela;
1421		base = findbase(ef, ef->relatab[i].sec);
1422		if (base == 0) {
1423			link_elf_error(ef->lf.filename,
1424			    "lost base for relatab");
1425			return (ENOEXEC);
1426		}
1427		for ( ; rela < relalim; rela++) {
1428			symidx = ELF_R_SYM(rela->r_info);
1429			if (symidx >= ef->ddbsymcnt)
1430				continue;
1431			sym = ef->ddbsymtab + symidx;
1432			/* Local relocs are already done */
1433			if (ELF_ST_BIND(sym->st_info) == STB_LOCAL)
1434				continue;
1435			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1436			    elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1437				continue;
1438			if (elf_reloc(&ef->lf, base, rela, ELF_RELOC_RELA,
1439			    elf_obj_lookup)) {
1440				symname = symbol_name(ef, rela->r_info);
1441				printf("link_elf_obj: symbol %s undefined\n",
1442				    symname);
1443				return (ENOENT);
1444			}
1445		}
1446	}
1447
1448	/*
1449	 * Only clean SHN_FBSD_CACHED for successful return.  If we
1450	 * modified symbol table for the object but found an
1451	 * unresolved symbol, there is no reason to roll back.
1452	 */
1453	elf_obj_cleanup_globals_cache(ef);
1454
1455	return (0);
1456}
1457
1458static int
1459relocate_file(elf_file_t ef)
1460{
1461	int error;
1462
1463	error = relocate_file1(ef, false);
1464	if (error == 0)
1465		error = relocate_file1(ef, true);
1466	return (error);
1467}
1468
1469static int
1470link_elf_lookup_symbol1(linker_file_t lf, const char *name, c_linker_sym_t *sym,
1471    bool see_local)
1472{
1473	elf_file_t ef = (elf_file_t)lf;
1474	const Elf_Sym *symp;
1475	const char *strp;
1476	int i;
1477
1478	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1479		strp = ef->ddbstrtab + symp->st_name;
1480		if (symp->st_shndx != SHN_UNDEF && strcmp(name, strp) == 0) {
1481			if (see_local ||
1482			    ELF_ST_BIND(symp->st_info) == STB_GLOBAL) {
1483				*sym = (c_linker_sym_t) symp;
1484				return (0);
1485			}
1486			return (ENOENT);
1487		}
1488	}
1489	return (ENOENT);
1490}
1491
1492static int
1493link_elf_lookup_symbol(linker_file_t lf, const char *name, c_linker_sym_t *sym)
1494{
1495	return (link_elf_lookup_symbol1(lf, name, sym,
1496	    link_elf_obj_leak_locals));
1497}
1498
1499static int
1500link_elf_lookup_debug_symbol(linker_file_t lf, const char *name,
1501    c_linker_sym_t *sym)
1502{
1503	return (link_elf_lookup_symbol1(lf, name, sym, true));
1504}
1505
1506static int
1507link_elf_lookup_debug_symbol_ctf(linker_file_t lf, const char *name,
1508    c_linker_sym_t *sym, linker_ctf_t *lc)
1509{
1510	if (link_elf_lookup_debug_symbol(lf, name, sym))
1511		return (ENOENT);
1512
1513	return (link_elf_ctf_get_ddb(lf, lc));
1514}
1515
1516static int
1517link_elf_symbol_values1(linker_file_t lf, c_linker_sym_t sym,
1518    linker_symval_t *symval, bool see_local)
1519{
1520	elf_file_t ef;
1521	const Elf_Sym *es;
1522	caddr_t val;
1523
1524	ef = (elf_file_t) lf;
1525	es = (const Elf_Sym*) sym;
1526	val = (caddr_t)es->st_value;
1527	if (es >= ef->ddbsymtab && es < (ef->ddbsymtab + ef->ddbsymcnt)) {
1528		if (!see_local && ELF_ST_BIND(es->st_info) == STB_LOCAL)
1529			return (ENOENT);
1530		symval->name = ef->ddbstrtab + es->st_name;
1531		val = (caddr_t)es->st_value;
1532		if (ELF_ST_TYPE(es->st_info) == STT_GNU_IFUNC)
1533			val = ((caddr_t (*)(void))val)();
1534		symval->value = val;
1535		symval->size = es->st_size;
1536		return (0);
1537	}
1538	return (ENOENT);
1539}
1540
1541static int
1542link_elf_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1543    linker_symval_t *symval)
1544{
1545	return (link_elf_symbol_values1(lf, sym, symval,
1546	    link_elf_obj_leak_locals));
1547}
1548
1549static int
1550link_elf_debug_symbol_values(linker_file_t lf, c_linker_sym_t sym,
1551    linker_symval_t *symval)
1552{
1553	return (link_elf_symbol_values1(lf, sym, symval, true));
1554}
1555
1556static int
1557link_elf_search_symbol(linker_file_t lf, caddr_t value,
1558    c_linker_sym_t *sym, long *diffp)
1559{
1560	elf_file_t ef = (elf_file_t)lf;
1561	u_long off = (uintptr_t)(void *)value;
1562	u_long diff = off;
1563	u_long st_value;
1564	const Elf_Sym *es;
1565	const Elf_Sym *best = NULL;
1566	int i;
1567
1568	for (i = 0, es = ef->ddbsymtab; i < ef->ddbsymcnt; i++, es++) {
1569		if (es->st_name == 0)
1570			continue;
1571		st_value = es->st_value;
1572		if (off >= st_value) {
1573			if (off - st_value < diff) {
1574				diff = off - st_value;
1575				best = es;
1576				if (diff == 0)
1577					break;
1578			} else if (off - st_value == diff) {
1579				best = es;
1580			}
1581		}
1582	}
1583	if (best == NULL)
1584		*diffp = off;
1585	else
1586		*diffp = diff;
1587	*sym = (c_linker_sym_t) best;
1588
1589	return (0);
1590}
1591
1592/*
1593 * Look up a linker set on an ELF system.
1594 */
1595static int
1596link_elf_lookup_set(linker_file_t lf, const char *name,
1597    void ***startp, void ***stopp, int *countp)
1598{
1599	elf_file_t ef = (elf_file_t)lf;
1600	void **start, **stop;
1601	int i, count;
1602
1603	/* Relative to section number */
1604	for (i = 0; i < ef->nprogtab; i++) {
1605		if ((strncmp(ef->progtab[i].name, "set_", 4) == 0) &&
1606		    strcmp(ef->progtab[i].name + 4, name) == 0) {
1607			start  = (void **)ef->progtab[i].addr;
1608			stop = (void **)((char *)ef->progtab[i].addr +
1609			    ef->progtab[i].size);
1610			count = stop - start;
1611			if (startp)
1612				*startp = start;
1613			if (stopp)
1614				*stopp = stop;
1615			if (countp)
1616				*countp = count;
1617			return (0);
1618		}
1619	}
1620	return (ESRCH);
1621}
1622
1623static int
1624link_elf_each_function_name(linker_file_t file,
1625    int (*callback)(const char *, void *), void *opaque)
1626{
1627	elf_file_t ef = (elf_file_t)file;
1628	const Elf_Sym *symp;
1629	int i, error;
1630
1631	/* Exhaustive search */
1632	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1633		if (symp->st_value != 0 &&
1634		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1635		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1636			error = callback(ef->ddbstrtab + symp->st_name, opaque);
1637			if (error)
1638				return (error);
1639		}
1640	}
1641	return (0);
1642}
1643
1644static int
1645link_elf_each_function_nameval(linker_file_t file,
1646    linker_function_nameval_callback_t callback, void *opaque)
1647{
1648	linker_symval_t symval;
1649	elf_file_t ef = (elf_file_t)file;
1650	const Elf_Sym *symp;
1651	int i, error;
1652
1653	/* Exhaustive search */
1654	for (i = 0, symp = ef->ddbsymtab; i < ef->ddbsymcnt; i++, symp++) {
1655		if (symp->st_value != 0 &&
1656		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC ||
1657		    ELF_ST_TYPE(symp->st_info) == STT_GNU_IFUNC)) {
1658			error = link_elf_debug_symbol_values(file,
1659			    (c_linker_sym_t)symp, &symval);
1660			if (error == 0)
1661				error = callback(file, i, &symval, opaque);
1662			if (error != 0)
1663				return (error);
1664		}
1665	}
1666	return (0);
1667}
1668
1669static void
1670elf_obj_cleanup_globals_cache(elf_file_t ef)
1671{
1672	Elf_Sym *sym;
1673	Elf_Size i;
1674
1675	for (i = 0; i < ef->ddbsymcnt; i++) {
1676		sym = ef->ddbsymtab + i;
1677		if (sym->st_shndx == SHN_FBSD_CACHED) {
1678			sym->st_shndx = SHN_UNDEF;
1679			sym->st_value = 0;
1680		}
1681	}
1682}
1683
1684/*
1685 * Symbol lookup function that can be used when the symbol index is known (ie
1686 * in relocations). It uses the symbol index instead of doing a fully fledged
1687 * hash table based lookup when such is valid. For example for local symbols.
1688 * This is not only more efficient, it's also more correct. It's not always
1689 * the case that the symbol can be found through the hash table.
1690 */
1691static int
1692elf_obj_lookup(linker_file_t lf, Elf_Size symidx, int deps, Elf_Addr *res)
1693{
1694	elf_file_t ef = (elf_file_t)lf;
1695	Elf_Sym *sym;
1696	const char *symbol;
1697	Elf_Addr res1;
1698
1699	/* Don't even try to lookup the symbol if the index is bogus. */
1700	if (symidx >= ef->ddbsymcnt) {
1701		*res = 0;
1702		return (EINVAL);
1703	}
1704
1705	sym = ef->ddbsymtab + symidx;
1706
1707	/* Quick answer if there is a definition included. */
1708	if (sym->st_shndx != SHN_UNDEF) {
1709		res1 = (Elf_Addr)sym->st_value;
1710		if (ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC)
1711			res1 = ((Elf_Addr (*)(void))res1)();
1712		*res = res1;
1713		return (0);
1714	}
1715
1716	/* If we get here, then it is undefined and needs a lookup. */
1717	switch (ELF_ST_BIND(sym->st_info)) {
1718	case STB_LOCAL:
1719		/* Local, but undefined? huh? */
1720		*res = 0;
1721		return (EINVAL);
1722
1723	case STB_GLOBAL:
1724	case STB_WEAK:
1725		/* Relative to Data or Function name */
1726		symbol = ef->ddbstrtab + sym->st_name;
1727
1728		/* Force a lookup failure if the symbol name is bogus. */
1729		if (*symbol == 0) {
1730			*res = 0;
1731			return (EINVAL);
1732		}
1733		res1 = (Elf_Addr)linker_file_lookup_symbol(lf, symbol, deps);
1734
1735		/*
1736		 * Cache global lookups during module relocation. The failure
1737		 * case is particularly expensive for callers, who must scan
1738		 * through the entire globals table doing strcmp(). Cache to
1739		 * avoid doing such work repeatedly.
1740		 *
1741		 * After relocation is complete, undefined globals will be
1742		 * restored to SHN_UNDEF in elf_obj_cleanup_globals_cache(),
1743		 * above.
1744		 */
1745		if (res1 != 0) {
1746			sym->st_shndx = SHN_FBSD_CACHED;
1747			sym->st_value = res1;
1748			*res = res1;
1749			return (0);
1750		} else if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
1751			sym->st_value = 0;
1752			*res = 0;
1753			return (0);
1754		}
1755		return (EINVAL);
1756
1757	default:
1758		return (EINVAL);
1759	}
1760}
1761
1762static void
1763link_elf_fix_link_set(elf_file_t ef)
1764{
1765	static const char startn[] = "__start_";
1766	static const char stopn[] = "__stop_";
1767	Elf_Sym *sym;
1768	const char *sym_name, *linkset_name;
1769	Elf_Addr startp, stopp;
1770	Elf_Size symidx;
1771	int start, i;
1772
1773	startp = stopp = 0;
1774	for (symidx = 1 /* zero entry is special */;
1775		symidx < ef->ddbsymcnt; symidx++) {
1776		sym = ef->ddbsymtab + symidx;
1777		if (sym->st_shndx != SHN_UNDEF)
1778			continue;
1779
1780		sym_name = ef->ddbstrtab + sym->st_name;
1781		if (strncmp(sym_name, startn, sizeof(startn) - 1) == 0) {
1782			start = 1;
1783			linkset_name = sym_name + sizeof(startn) - 1;
1784		}
1785		else if (strncmp(sym_name, stopn, sizeof(stopn) - 1) == 0) {
1786			start = 0;
1787			linkset_name = sym_name + sizeof(stopn) - 1;
1788		}
1789		else
1790			continue;
1791
1792		for (i = 0; i < ef->nprogtab; i++) {
1793			if (strcmp(ef->progtab[i].name, linkset_name) == 0) {
1794				startp = (Elf_Addr)ef->progtab[i].addr;
1795				stopp = (Elf_Addr)(startp + ef->progtab[i].size);
1796				break;
1797			}
1798		}
1799		if (i == ef->nprogtab)
1800			continue;
1801
1802		sym->st_value = start ? startp : stopp;
1803		sym->st_shndx = i;
1804	}
1805}
1806
1807static int
1808link_elf_reloc_local(linker_file_t lf, bool ifuncs)
1809{
1810	elf_file_t ef = (elf_file_t)lf;
1811	const Elf_Rel *rellim;
1812	const Elf_Rel *rel;
1813	const Elf_Rela *relalim;
1814	const Elf_Rela *rela;
1815	const Elf_Sym *sym;
1816	Elf_Addr base;
1817	int i;
1818	Elf_Size symidx;
1819
1820	link_elf_fix_link_set(ef);
1821
1822	/* Perform relocations without addend if there are any: */
1823	for (i = 0; i < ef->nreltab; i++) {
1824		rel = ef->reltab[i].rel;
1825		if (rel == NULL) {
1826			link_elf_error(ef->lf.filename, "lost a reltab");
1827			return (ENOEXEC);
1828		}
1829		rellim = rel + ef->reltab[i].nrel;
1830		base = findbase(ef, ef->reltab[i].sec);
1831		if (base == 0) {
1832			link_elf_error(ef->lf.filename, "lost base for reltab");
1833			return (ENOEXEC);
1834		}
1835		for ( ; rel < rellim; rel++) {
1836			symidx = ELF_R_SYM(rel->r_info);
1837			if (symidx >= ef->ddbsymcnt)
1838				continue;
1839			sym = ef->ddbsymtab + symidx;
1840			/* Only do local relocs */
1841			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1842				continue;
1843			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1844			    elf_is_ifunc_reloc(rel->r_info)) != ifuncs)
1845				continue;
1846			if (elf_reloc_local(lf, base, rel, ELF_RELOC_REL,
1847			    elf_obj_lookup) != 0)
1848				return (ENOEXEC);
1849		}
1850	}
1851
1852	/* Perform relocations with addend if there are any: */
1853	for (i = 0; i < ef->nrelatab; i++) {
1854		rela = ef->relatab[i].rela;
1855		if (rela == NULL) {
1856			link_elf_error(ef->lf.filename, "lost a relatab!");
1857			return (ENOEXEC);
1858		}
1859		relalim = rela + ef->relatab[i].nrela;
1860		base = findbase(ef, ef->relatab[i].sec);
1861		if (base == 0) {
1862			link_elf_error(ef->lf.filename, "lost base for reltab");
1863			return (ENOEXEC);
1864		}
1865		for ( ; rela < relalim; rela++) {
1866			symidx = ELF_R_SYM(rela->r_info);
1867			if (symidx >= ef->ddbsymcnt)
1868				continue;
1869			sym = ef->ddbsymtab + symidx;
1870			/* Only do local relocs */
1871			if (ELF_ST_BIND(sym->st_info) != STB_LOCAL)
1872				continue;
1873			if ((ELF_ST_TYPE(sym->st_info) == STT_GNU_IFUNC ||
1874			    elf_is_ifunc_reloc(rela->r_info)) != ifuncs)
1875				continue;
1876			if (elf_reloc_local(lf, base, rela, ELF_RELOC_RELA,
1877			    elf_obj_lookup) != 0)
1878				return (ENOEXEC);
1879		}
1880	}
1881	return (0);
1882}
1883
1884static long
1885link_elf_symtab_get(linker_file_t lf, const Elf_Sym **symtab)
1886{
1887	elf_file_t ef = (elf_file_t)lf;
1888
1889	*symtab = ef->ddbsymtab;
1890	if (*symtab == NULL)
1891		return (0);
1892	return (ef->ddbsymcnt);
1893}
1894
1895static long
1896link_elf_strtab_get(linker_file_t lf, caddr_t *strtab)
1897{
1898	elf_file_t ef = (elf_file_t)lf;
1899
1900	*strtab = ef->ddbstrtab;
1901	if (*strtab == NULL)
1902		return (0);
1903	return (ef->ddbstrcnt);
1904}
1905
1906#ifdef VIMAGE
1907static void
1908link_elf_propagate_vnets(linker_file_t lf)
1909{
1910	elf_file_t ef = (elf_file_t) lf;
1911
1912	if (ef->progtab) {
1913		for (int i = 0; i < ef->nprogtab; i++) {
1914			if (ef->progtab[i].size == 0)
1915				continue;
1916			if (ef->progtab[i].name == NULL)
1917				continue;
1918			if (strcmp(ef->progtab[i].name, VNET_SETNAME) == 0) {
1919				vnet_data_copy(ef->progtab[i].addr,
1920				    ef->progtab[i].size);
1921				break;
1922			}
1923		}
1924	}
1925}
1926#endif
1927