1/*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 2000, Boris Popov
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *    This product includes software developed by Boris Popov.
18 * 4. Neither the name of the author nor the names of any co-contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/param.h>
36
37#include <err.h>
38#include <errno.h>
39#include <gelf.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43
44#include "ef.h"
45
46#define	MAXSEGS 16
47struct ef_file {
48	char		*ef_name;
49	struct elf_file *ef_efile;
50	GElf_Phdr	*ef_ph;
51	void		*ef_fpage;		/* First block of the file */
52	int		ef_fplen;		/* length of first block */
53	GElf_Hashelt	ef_nbuckets;
54	GElf_Hashelt	ef_nchains;
55	GElf_Hashelt	*ef_buckets;
56	GElf_Hashelt	*ef_chains;
57	GElf_Hashelt	*ef_hashtab;
58	caddr_t		ef_strtab;
59	long		ef_strsz;
60	GElf_Sym	*ef_symtab;
61	int		ef_nsegs;
62	GElf_Phdr	*ef_segs[MAXSEGS];
63	int		ef_verbose;
64	GElf_Rel	*ef_rel;		/* relocation table */
65	long		ef_relsz;		/* number of entries */
66	GElf_Rela	*ef_rela;		/* relocation table */
67	long		ef_relasz;		/* number of entries */
68};
69
70static void	ef_print_phdr(GElf_Phdr *);
71static GElf_Off	ef_get_offset(elf_file_t, GElf_Addr);
72
73static void	ef_close(elf_file_t ef);
74
75static int	ef_seg_read_rel(elf_file_t ef, GElf_Addr address, size_t len,
76		    void *dest);
77static int	ef_seg_read_string(elf_file_t ef, GElf_Addr address, size_t len,
78		    char *dest);
79
80static GElf_Addr ef_symaddr(elf_file_t ef, GElf_Size symidx);
81static int	ef_lookup_set(elf_file_t ef, const char *name,
82		    GElf_Addr *startp, GElf_Addr *stopp, long *countp);
83static int	ef_lookup_symbol(elf_file_t ef, const char *name,
84		    GElf_Sym **sym);
85
86static struct elf_file_ops ef_file_ops = {
87	.close			= ef_close,
88	.seg_read_rel		= ef_seg_read_rel,
89	.seg_read_string	= ef_seg_read_string,
90	.symaddr		= ef_symaddr,
91	.lookup_set		= ef_lookup_set,
92};
93
94static void
95ef_print_phdr(GElf_Phdr *phdr)
96{
97
98	if ((phdr->p_flags & PF_W) == 0) {
99		printf("text=0x%jx ", (uintmax_t)phdr->p_filesz);
100	} else {
101		printf("data=0x%jx", (uintmax_t)phdr->p_filesz);
102		if (phdr->p_filesz < phdr->p_memsz)
103			printf("+0x%jx",
104			    (uintmax_t)(phdr->p_memsz - phdr->p_filesz));
105		printf(" ");
106	}
107}
108
109static GElf_Off
110ef_get_offset(elf_file_t ef, GElf_Addr addr)
111{
112	GElf_Phdr *ph;
113	int i;
114
115	for (i = 0; i < ef->ef_nsegs; i++) {
116		ph = ef->ef_segs[i];
117		if (addr >= ph->p_vaddr && addr < ph->p_vaddr + ph->p_memsz) {
118			return (ph->p_offset + (addr - ph->p_vaddr));
119		}
120	}
121	return (0);
122}
123
124/*
125 * next two functions copied from link_elf.c
126 */
127static int
128ef_lookup_symbol(elf_file_t ef, const char *name, GElf_Sym **sym)
129{
130	unsigned long hash, symnum;
131	GElf_Sym *symp;
132	char *strp;
133
134	/* First, search hashed global symbols */
135	hash = elf_hash(name);
136	symnum = ef->ef_buckets[hash % ef->ef_nbuckets];
137
138	while (symnum != STN_UNDEF) {
139		if (symnum >= ef->ef_nchains) {
140			warnx("ef_lookup_symbol: file %s have corrupted symbol table\n",
141			    ef->ef_name);
142			return (ENOENT);
143		}
144
145		symp = ef->ef_symtab + symnum;
146		if (symp->st_name == 0) {
147			warnx("ef_lookup_symbol: file %s have corrupted symbol table\n",
148			    ef->ef_name);
149			return (ENOENT);
150		}
151
152		strp = ef->ef_strtab + symp->st_name;
153
154		if (strcmp(name, strp) == 0) {
155			if (symp->st_shndx != SHN_UNDEF ||
156			    (symp->st_value != 0 &&
157				GELF_ST_TYPE(symp->st_info) == STT_FUNC)) {
158				*sym = symp;
159				return (0);
160			} else
161				return (ENOENT);
162		}
163
164		symnum = ef->ef_chains[symnum];
165	}
166
167	return (ENOENT);
168}
169
170static int
171ef_lookup_set(elf_file_t ef, const char *name, GElf_Addr *startp,
172    GElf_Addr *stopp, long *countp)
173{
174	GElf_Sym *sym;
175	char *setsym;
176	int error, len;
177
178	len = strlen(name) + sizeof("__start_set_"); /* sizeof includes \0 */
179	setsym = malloc(len);
180	if (setsym == NULL)
181		return (errno);
182
183	/* get address of first entry */
184	snprintf(setsym, len, "%s%s", "__start_set_", name);
185	error = ef_lookup_symbol(ef, setsym, &sym);
186	if (error != 0)
187		goto out;
188	*startp = sym->st_value;
189
190	/* get address of last entry */
191	snprintf(setsym, len, "%s%s", "__stop_set_", name);
192	error = ef_lookup_symbol(ef, setsym, &sym);
193	if (error != 0)
194		goto out;
195	*stopp = sym->st_value;
196
197	/* and the number of entries */
198	*countp = (*stopp - *startp) / elf_pointer_size(ef->ef_efile);
199
200out:
201	free(setsym);
202	return (error);
203}
204
205static GElf_Addr
206ef_symaddr(elf_file_t ef, GElf_Size symidx)
207{
208	const GElf_Sym *sym;
209
210	if (symidx >= ef->ef_nchains)
211		return (0);
212	sym = ef->ef_symtab + symidx;
213
214	if (GELF_ST_BIND(sym->st_info) == STB_LOCAL &&
215	    sym->st_shndx != SHN_UNDEF && sym->st_value != 0)
216		return (sym->st_value);
217	return (0);
218}
219
220static int
221ef_parse_dynamic(elf_file_t ef, const GElf_Phdr *phdyn)
222{
223	GElf_Shdr *shdr;
224	GElf_Dyn *dyn, *dp;
225	size_t i, ndyn, nshdr, nsym;
226	int error;
227	GElf_Off hash_off, sym_off, str_off;
228	GElf_Off rel_off;
229	GElf_Off rela_off;
230	int rel_sz;
231	int rela_sz;
232	int dynamic_idx;
233
234	/*
235	 * The kernel linker parses the PT_DYNAMIC segment to find
236	 * various important tables.  The gelf API of libelf is
237	 * section-oriented and requires extracting data from sections
238	 * instead of segments (program headers).  As a result,
239	 * iterate over section headers to read various tables after
240	 * parsing values from PT_DYNAMIC.
241	 */
242	error = elf_read_shdrs(ef->ef_efile, &nshdr, &shdr);
243	if (error != 0)
244		return (EFTYPE);
245	dyn = NULL;
246
247	/* Find section for .dynamic. */
248	dynamic_idx = -1;
249	for (i = 0; i < nshdr; i++) {
250		if (shdr[i].sh_type == SHT_DYNAMIC) {
251			/*
252			 * PowerPC kernels contain additional sections
253			 * beyond .dynamic in PT_DYNAMIC due to a linker
254			 * script bug.  Permit a section with a smaller
255			 * size as a workaround.
256			 */
257			if (shdr[i].sh_offset != phdyn->p_offset ||
258			    ((elf_machine(ef->ef_efile) == EM_PPC ||
259			    elf_machine(ef->ef_efile) == EM_PPC64) ?
260			    shdr[i].sh_size > phdyn->p_filesz :
261			    shdr[i].sh_size != phdyn->p_filesz)) {
262				warnx(".dynamic section doesn't match phdr");
263				error = EFTYPE;
264				goto out;
265			}
266			if (dynamic_idx != -1) {
267				warnx("multiple SHT_DYNAMIC sections");
268				error = EFTYPE;
269				goto out;
270			}
271			dynamic_idx = i;
272		}
273	}
274
275	error = elf_read_dynamic(ef->ef_efile, dynamic_idx, &ndyn, &dyn);
276	if (error != 0)
277		goto out;
278
279	hash_off = rel_off = rela_off = sym_off = str_off = 0;
280	rel_sz = rela_sz = 0;
281	for (i = 0; i < ndyn; i++) {
282		dp = &dyn[i];
283		if (dp->d_tag == DT_NULL)
284			break;
285
286		switch (dp->d_tag) {
287		case DT_HASH:
288			if (hash_off != 0)
289				warnx("second DT_HASH entry ignored");
290			else
291				hash_off = ef_get_offset(ef, dp->d_un.d_ptr);
292			break;
293		case DT_STRTAB:
294			if (str_off != 0)
295				warnx("second DT_STRTAB entry ignored");
296			else
297				str_off = ef_get_offset(ef, dp->d_un.d_ptr);
298			break;
299		case DT_SYMTAB:
300			if (sym_off != 0)
301				warnx("second DT_SYMTAB entry ignored");
302			else
303				sym_off = ef_get_offset(ef, dp->d_un.d_ptr);
304			break;
305		case DT_SYMENT:
306			if (dp->d_un.d_val != elf_object_size(ef->ef_efile,
307			    ELF_T_SYM)) {
308				error = EFTYPE;
309				goto out;
310			}
311			break;
312		case DT_REL:
313			if (rel_off != 0)
314				warnx("second DT_REL entry ignored");
315			else
316				rel_off = ef_get_offset(ef, dp->d_un.d_ptr);
317			break;
318		case DT_RELSZ:
319			if (rel_sz != 0)
320				warnx("second DT_RELSZ entry ignored");
321			else
322				rel_sz = dp->d_un.d_val;
323			break;
324		case DT_RELENT:
325			if (dp->d_un.d_val != elf_object_size(ef->ef_efile,
326			    ELF_T_REL)) {
327				error = EFTYPE;
328				goto out;
329			}
330			break;
331		case DT_RELA:
332			if (rela_off != 0)
333				warnx("second DT_RELA entry ignored");
334			else
335				rela_off = ef_get_offset(ef, dp->d_un.d_ptr);
336			break;
337		case DT_RELASZ:
338			if (rela_sz != 0)
339				warnx("second DT_RELSZ entry ignored");
340			else
341				rela_sz = dp->d_un.d_val;
342			break;
343		case DT_RELAENT:
344			if (dp->d_un.d_val != elf_object_size(ef->ef_efile,
345			    ELF_T_RELA)) {
346				error = EFTYPE;
347				goto out;
348			}
349			break;
350		}
351	}
352	if (hash_off == 0) {
353		warnx("%s: no .hash section found\n", ef->ef_name);
354		error = EFTYPE;
355		goto out;
356	}
357	if (sym_off == 0) {
358		warnx("%s: no .dynsym section found\n", ef->ef_name);
359		error = EFTYPE;
360		goto out;
361	}
362	if (str_off == 0) {
363		warnx("%s: no .dynstr section found\n", ef->ef_name);
364		error = EFTYPE;
365		goto out;
366	}
367	if (rel_off == 0 && rela_off == 0) {
368		warnx("%s: no ELF relocation table found\n", ef->ef_name);
369		error = EFTYPE;
370		goto out;
371	}
372
373	nsym = 0;
374	for (i = 0; i < nshdr; i++) {
375		switch (shdr[i].sh_type) {
376		case SHT_HASH:
377			if (shdr[i].sh_offset != hash_off) {
378				warnx("%s: ignoring SHT_HASH at different offset from DT_HASH",
379				    ef->ef_name);
380				break;
381			}
382
383			/*
384			 * libelf(3) mentions ELF_T_HASH, but it is
385			 * not defined.
386			 */
387			if (shdr[i].sh_size < sizeof(*ef->ef_hashtab) * 2) {
388				warnx("hash section too small");
389				error = EFTYPE;
390				goto out;
391			}
392			error = elf_read_data(ef->ef_efile, ELF_T_WORD,
393			    shdr[i].sh_offset, shdr[i].sh_size,
394			    (void **)&ef->ef_hashtab);
395			if (error != 0) {
396				warnc(error, "can't read hash table");
397				goto out;
398			}
399			ef->ef_nbuckets = ef->ef_hashtab[0];
400			ef->ef_nchains = ef->ef_hashtab[1];
401			if ((2 + ef->ef_nbuckets + ef->ef_nchains) *
402			    sizeof(*ef->ef_hashtab) != shdr[i].sh_size) {
403				warnx("inconsistent hash section size");
404				error = EFTYPE;
405				goto out;
406			}
407
408			ef->ef_buckets = ef->ef_hashtab + 2;
409			ef->ef_chains = ef->ef_buckets + ef->ef_nbuckets;
410			break;
411		case SHT_DYNSYM:
412			if (shdr[i].sh_offset != sym_off) {
413				warnx("%s: ignoring SHT_DYNSYM at different offset from DT_SYMTAB",
414				    ef->ef_name);
415				break;
416			}
417			error = elf_read_symbols(ef->ef_efile, i, &nsym,
418			    &ef->ef_symtab);
419			if (error != 0) {
420				if (ef->ef_verbose)
421					warnx("%s: can't load .dynsym section (0x%jx)",
422					    ef->ef_name, (uintmax_t)sym_off);
423				goto out;
424			}
425			break;
426		case SHT_STRTAB:
427			if (shdr[i].sh_offset != str_off)
428				break;
429			error = elf_read_string_table(ef->ef_efile,
430			    &shdr[i], &ef->ef_strsz, &ef->ef_strtab);
431			if (error != 0) {
432				warnx("can't load .dynstr section");
433				error = EIO;
434				goto out;
435			}
436			break;
437		case SHT_REL:
438			if (shdr[i].sh_offset != rel_off)
439				break;
440			if (shdr[i].sh_size != rel_sz) {
441				warnx("%s: size mismatch for DT_REL section",
442				    ef->ef_name);
443				error = EFTYPE;
444				goto out;
445			}
446			error = elf_read_rel(ef->ef_efile, i, &ef->ef_relsz,
447			    &ef->ef_rel);
448			if (error != 0) {
449				warnx("%s: cannot load DT_REL section",
450				    ef->ef_name);
451				goto out;
452			}
453			break;
454		case SHT_RELA:
455			if (shdr[i].sh_offset != rela_off)
456				break;
457			if (shdr[i].sh_size != rela_sz) {
458				warnx("%s: size mismatch for DT_RELA section",
459				    ef->ef_name);
460				error = EFTYPE;
461				goto out;
462			}
463			error = elf_read_rela(ef->ef_efile, i, &ef->ef_relasz,
464			    &ef->ef_rela);
465			if (error != 0) {
466				warnx("%s: cannot load DT_RELA section",
467				    ef->ef_name);
468				goto out;
469			}
470			break;
471		}
472	}
473
474	if (ef->ef_hashtab == NULL) {
475		warnx("%s: did not find a symbol hash table", ef->ef_name);
476		error = EFTYPE;
477		goto out;
478	}
479	if (ef->ef_symtab == NULL) {
480		warnx("%s: did not find a dynamic symbol table", ef->ef_name);
481		error = EFTYPE;
482		goto out;
483	}
484	if (nsym != ef->ef_nchains) {
485		warnx("%s: symbol count mismatch", ef->ef_name);
486		error = EFTYPE;
487		goto out;
488	}
489	if (ef->ef_strtab == NULL) {
490		warnx("%s: did not find a dynamic string table", ef->ef_name);
491		error = EFTYPE;
492		goto out;
493	}
494	if (rel_off != 0 && ef->ef_rel == NULL) {
495		warnx("%s: did not find a DT_REL relocation table",
496		    ef->ef_name);
497		error = EFTYPE;
498		goto out;
499	}
500	if (rela_off != 0 && ef->ef_rela == NULL) {
501		warnx("%s: did not find a DT_RELA relocation table",
502		    ef->ef_name);
503		error = EFTYPE;
504		goto out;
505	}
506
507	error = 0;
508out:
509	free(dyn);
510	free(shdr);
511	return (error);
512}
513
514static int
515ef_seg_read_rel(elf_file_t ef, GElf_Addr address, size_t len, void *dest)
516{
517	GElf_Off ofs;
518	const GElf_Rela *a;
519	const GElf_Rel *r;
520	int error;
521
522	ofs = ef_get_offset(ef, address);
523	if (ofs == 0) {
524		if (ef->ef_verbose)
525			warnx("ef_seg_read_rel(%s): bad address (%jx)",
526			    ef->ef_name, (uintmax_t)address);
527		return (EFAULT);
528	}
529	error = elf_read_raw_data(ef->ef_efile, ofs, dest, len);
530	if (error != 0)
531		return (error);
532
533	for (r = ef->ef_rel; r < &ef->ef_rel[ef->ef_relsz]; r++) {
534		error = elf_reloc(ef->ef_efile, r, ELF_T_REL, 0, address,
535		    len, dest);
536		if (error != 0)
537			return (error);
538	}
539	for (a = ef->ef_rela; a < &ef->ef_rela[ef->ef_relasz]; a++) {
540		error = elf_reloc(ef->ef_efile, a, ELF_T_RELA, 0, address,
541		    len, dest);
542		if (error != 0)
543			return (error);
544	}
545	return (0);
546}
547
548static int
549ef_seg_read_string(elf_file_t ef, GElf_Addr address, size_t len, char *dest)
550{
551	GElf_Off ofs;
552
553	ofs = ef_get_offset(ef, address);
554	if (ofs == 0) {
555		if (ef->ef_verbose)
556			warnx("ef_seg_read_string(%s): bad offset (%jx:%ju)",
557			    ef->ef_name, (uintmax_t)address, (uintmax_t)ofs);
558		return (EFAULT);
559	}
560
561	return (elf_read_raw_string(ef->ef_efile, ofs, dest, len));
562}
563
564int
565ef_open(struct elf_file *efile, int verbose)
566{
567	elf_file_t ef;
568	GElf_Ehdr *hdr;
569	size_t i, nphdr, nsegs;
570	int error;
571	GElf_Phdr *phdr, *phdyn;
572
573	hdr = &efile->ef_hdr;
574	if (hdr->e_phnum == 0 ||
575	    hdr->e_phentsize != elf_object_size(efile, ELF_T_PHDR) ||
576	    hdr->e_shnum == 0 || hdr->e_shoff == 0 ||
577	    hdr->e_shentsize != elf_object_size(efile, ELF_T_SHDR))
578		return (EFTYPE);
579
580	ef = malloc(sizeof(*ef));
581	if (ef == NULL)
582		return (errno);
583
584	efile->ef_ef = ef;
585	efile->ef_ops = &ef_file_ops;
586
587	bzero(ef, sizeof(*ef));
588	ef->ef_verbose = verbose;
589	ef->ef_name = strdup(efile->ef_filename);
590	ef->ef_efile = efile;
591
592	error = elf_read_phdrs(efile, &nphdr, &ef->ef_ph);
593	if (error != 0) {
594		phdr = NULL;
595		goto out;
596	}
597
598	error = EFTYPE;
599	nsegs = 0;
600	phdyn = NULL;
601	phdr = ef->ef_ph;
602	for (i = 0; i < nphdr; i++, phdr++) {
603		if (verbose > 1)
604			ef_print_phdr(phdr);
605		switch (phdr->p_type) {
606		case PT_LOAD:
607			if (nsegs < MAXSEGS)
608				ef->ef_segs[nsegs] = phdr;
609			nsegs++;
610			break;
611		case PT_PHDR:
612			break;
613		case PT_DYNAMIC:
614			phdyn = phdr;
615			break;
616		}
617	}
618	if (verbose > 1)
619		printf("\n");
620	if (phdyn == NULL) {
621		warnx("Skipping %s: not dynamically-linked",
622		    ef->ef_name);
623		goto out;
624	}
625
626	if (nsegs > MAXSEGS) {
627		warnx("%s: too many segments", ef->ef_name);
628		goto out;
629	}
630	ef->ef_nsegs = nsegs;
631
632	error = ef_parse_dynamic(ef, phdyn);
633out:
634	if (error != 0)
635		ef_close(ef);
636	return (error);
637}
638
639static void
640ef_close(elf_file_t ef)
641{
642	free(ef->ef_rela);
643	free(ef->ef_rel);
644	free(ef->ef_strtab);
645	free(ef->ef_symtab);
646	free(ef->ef_hashtab);
647	free(ef->ef_ph);
648	if (ef->ef_name)
649		free(ef->ef_name);
650	ef->ef_efile->ef_ops = NULL;
651	ef->ef_efile->ef_ef = NULL;
652	free(ef);
653}
654