1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2016-2017 Mark Johnston <markj@FreeBSD.org>
5 * Copyright (c) 2010 The FreeBSD Foundation
6 * Copyright (c) 2008 John Birrell (jb@freebsd.org)
7 * All rights reserved.
8 *
9 * Portions of this software were developed by Rui Paulo under sponsorship
10 * from the FreeBSD Foundation.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/types.h>
35#ifndef NO_CTF
36#include <sys/ctf.h>
37#include <sys/ctf_api.h>
38#endif
39#include <sys/user.h>
40
41#include <assert.h>
42#include <err.h>
43#include <fcntl.h>
44#include <libgen.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49#ifndef NO_CTF
50#include <libctf.h>
51#endif
52#include <libutil.h>
53
54#include <zlib.h>
55#include "_libproc.h"
56
57#define	PATH_DEBUG_DIR	"/usr/lib/debug"
58
59#ifdef NO_CTF
60typedef struct ctf_file ctf_file_t;
61#endif
62
63#ifndef NO_CXA_DEMANGLE
64extern char *__cxa_demangle(const char *, char *, size_t *, int *);
65#endif /* NO_CXA_DEMANGLE */
66
67static int
68crc32_file(int fd, uint32_t *crc)
69{
70	char buf[MAXPHYS];
71	ssize_t nr;
72
73	*crc = crc32(0L, Z_NULL, 0);
74	while ((nr = read(fd, buf, sizeof(buf))) > 0) {
75		*crc = crc32(*crc, (char *)buf, nr);
76	}
77	return (!!nr);
78}
79
80static void
81demangle(const char *symbol, char *buf, size_t len)
82{
83#ifndef NO_CXA_DEMANGLE
84	char *dembuf;
85
86	if (symbol[0] == '_' && symbol[1] == 'Z' && symbol[2]) {
87		dembuf = __cxa_demangle(symbol, NULL, NULL, NULL);
88		if (!dembuf)
89			goto fail;
90		strlcpy(buf, dembuf, len);
91		free(dembuf);
92		return;
93	}
94fail:
95#endif /* NO_CXA_DEMANGLE */
96	strlcpy(buf, symbol, len);
97}
98
99struct symsort_thunk {
100	Elf *e;
101	struct symtab *symtab;
102};
103
104static int
105symvalcmp(const void *a1, const void *a2, void *_thunk)
106{
107	GElf_Sym sym1, sym2;
108	struct symsort_thunk *thunk;
109	const char *s1, *s2;
110	u_int i1, i2;
111	int bind1, bind2;
112
113	i1 = *(const u_int *)a1;
114	i2 = *(const u_int *)a2;
115	thunk = _thunk;
116
117	(void)gelf_getsym(thunk->symtab->data, i1, &sym1);
118	(void)gelf_getsym(thunk->symtab->data, i2, &sym2);
119
120	if (sym1.st_value != sym2.st_value)
121		return (sym1.st_value < sym2.st_value ? -1 : 1);
122
123	/* Prefer non-local symbols. */
124	bind1 = GELF_ST_BIND(sym1.st_info);
125	bind2 = GELF_ST_BIND(sym2.st_info);
126	if (bind1 != bind2) {
127		if (bind1 == STB_LOCAL && bind2 != STB_LOCAL)
128			return (-1);
129		if (bind1 != STB_LOCAL && bind2 == STB_LOCAL)
130			return (1);
131	}
132
133	s1 = elf_strptr(thunk->e, thunk->symtab->stridx, sym1.st_name);
134	s2 = elf_strptr(thunk->e, thunk->symtab->stridx, sym2.st_name);
135	if (s1 != NULL && s2 != NULL) {
136		/* Prefer symbols without a leading '$'. */
137		if (*s1 == '$')
138			return (-1);
139		if (*s2 == '$')
140			return (1);
141
142		/* Prefer symbols with fewer leading underscores. */
143		for (; *s1 == '_' && *s2 == '_'; s1++, s2++)
144			;
145		if (*s1 == '_')
146			return (-1);
147		if (*s2 == '_')
148			return (1);
149	}
150
151	return (0);
152}
153
154static int
155load_symtab(Elf *e, struct symtab *symtab, u_long sh_type)
156{
157	GElf_Ehdr ehdr;
158	GElf_Shdr shdr;
159	struct symsort_thunk thunk;
160	Elf_Scn *scn;
161	u_int nsyms;
162
163	if (gelf_getehdr(e, &ehdr) == NULL)
164		return (-1);
165
166	scn = NULL;
167	while ((scn = elf_nextscn(e, scn)) != NULL) {
168		(void)gelf_getshdr(scn, &shdr);
169		if (shdr.sh_type == sh_type)
170			break;
171	}
172	if (scn == NULL)
173		return (-1);
174
175	nsyms = shdr.sh_size / shdr.sh_entsize;
176	if (nsyms > (1 << 20))
177		return (-1);
178
179	if ((symtab->data = elf_getdata(scn, NULL)) == NULL)
180		return (-1);
181
182	symtab->index = calloc(nsyms, sizeof(u_int));
183	if (symtab->index == NULL)
184		return (-1);
185	for (u_int i = 0; i < nsyms; i++)
186		symtab->index[i] = i;
187	symtab->nsyms = nsyms;
188	symtab->stridx = shdr.sh_link;
189
190	thunk.e = e;
191	thunk.symtab = symtab;
192	qsort_r(symtab->index, nsyms, sizeof(u_int), symvalcmp, &thunk);
193
194	return (0);
195}
196
197static void
198load_symtabs(struct file_info *file)
199{
200
201	file->symtab.nsyms = file->dynsymtab.nsyms = 0;
202	(void)load_symtab(file->elf, &file->symtab, SHT_SYMTAB);
203	(void)load_symtab(file->elf, &file->dynsymtab, SHT_DYNSYM);
204}
205
206static int
207open_debug_file(char *path, const char *debugfile, uint32_t crc)
208{
209	size_t n;
210	uint32_t compcrc;
211	int fd;
212
213	fd = -1;
214	if ((n = strlcat(path, "/", PATH_MAX)) >= PATH_MAX)
215		return (fd);
216	if (strlcat(path, debugfile, PATH_MAX) >= PATH_MAX)
217		goto out;
218	if ((fd = open(path, O_RDONLY | O_CLOEXEC)) < 0)
219		goto out;
220	if (crc32_file(fd, &compcrc) != 0 || crc != compcrc) {
221		DPRINTFX("ERROR: CRC32 mismatch for %s", path);
222		(void)close(fd);
223		fd = -1;
224	}
225out:
226	path[n] = '\0';
227	return (fd);
228}
229
230/*
231 * Obtain an ELF descriptor for the specified mapped object. If a GNU debuglink
232 * section is present, a descriptor for the corresponding debug file is
233 * returned.
234 */
235static int
236open_object(struct map_info *mapping)
237{
238	char path[PATH_MAX];
239	GElf_Shdr shdr;
240	Elf *e, *e2;
241	Elf_Data *data;
242	Elf_Scn *scn;
243	struct file_info *file;
244	prmap_t *map;
245	const char *debugfile, *scnname;
246	size_t ndx;
247	uint32_t crc;
248	int fd, fd2;
249
250	if (mapping->map.pr_mapname[0] == '\0')
251		return (-1); /* anonymous object */
252	if (mapping->file->elf != NULL)
253		return (0); /* already loaded */
254
255	file = mapping->file;
256	map = &mapping->map;
257	if ((fd = open(map->pr_mapname, O_RDONLY | O_CLOEXEC)) < 0) {
258		DPRINTF("ERROR: open %s failed", map->pr_mapname);
259		return (-1);
260	}
261	if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
262		DPRINTFX("ERROR: elf_begin() failed: %s", elf_errmsg(-1));
263		goto err;
264	}
265	if (gelf_getehdr(e, &file->ehdr) != &file->ehdr) {
266		DPRINTFX("ERROR: elf_getehdr() failed: %s", elf_errmsg(-1));
267		goto err;
268	}
269
270	scn = NULL;
271	while ((scn = elf_nextscn(e, scn)) != NULL) {
272		if (gelf_getshdr(scn, &shdr) != &shdr) {
273			DPRINTFX("ERROR: gelf_getshdr failed: %s",
274			    elf_errmsg(-1));
275			goto err;
276		}
277		if (shdr.sh_type != SHT_PROGBITS)
278			continue;
279		if (elf_getshdrstrndx(e, &ndx) != 0) {
280			DPRINTFX("ERROR: elf_getshdrstrndx failed: %s",
281			    elf_errmsg(-1));
282			goto err;
283		}
284		if ((scnname = elf_strptr(e, ndx, shdr.sh_name)) == NULL)
285			continue;
286
287		if (strcmp(scnname, ".gnu_debuglink") == 0)
288			break;
289	}
290	if (scn == NULL)
291		goto internal;
292
293	if ((data = elf_getdata(scn, NULL)) == NULL) {
294		DPRINTFX("ERROR: elf_getdata failed: %s", elf_errmsg(-1));
295		goto err;
296	}
297
298	/*
299	 * The data contains a null-terminated file name followed by a 4-byte
300	 * CRC.
301	 */
302	if (data->d_size < sizeof(crc) + 1) {
303		DPRINTFX("ERROR: debuglink section is too small (%zd bytes)",
304		    (ssize_t)data->d_size);
305		goto internal;
306	}
307	if (strnlen(data->d_buf, data->d_size) >= data->d_size - sizeof(crc)) {
308		DPRINTFX("ERROR: no null-terminator in gnu_debuglink section");
309		goto internal;
310	}
311
312	debugfile = data->d_buf;
313	memcpy(&crc, (char *)data->d_buf + data->d_size - sizeof(crc),
314	    sizeof(crc));
315
316	/*
317	 * Search for the debug file using the algorithm described in the gdb
318	 * documentation:
319	 * - look in the directory containing the object,
320	 * - look in the subdirectory ".debug" of the directory containing the
321	 *   object,
322	 * - look in the global debug directories (currently /usr/lib/debug).
323	 */
324	(void)strlcpy(path, map->pr_mapname, sizeof(path));
325	(void)dirname(path);
326
327	if ((fd2 = open_debug_file(path, debugfile, crc)) >= 0)
328		goto external;
329
330	if (strlcat(path, "/.debug", sizeof(path)) < sizeof(path) &&
331	    (fd2 = open_debug_file(path, debugfile, crc)) >= 0)
332		goto external;
333
334	(void)snprintf(path, sizeof(path), PATH_DEBUG_DIR);
335	if (strlcat(path, map->pr_mapname, sizeof(path)) < sizeof(path)) {
336		(void)dirname(path);
337		if ((fd2 = open_debug_file(path, debugfile, crc)) >= 0)
338			goto external;
339	}
340
341internal:
342	/* We didn't find a debug file, just return the object's descriptor. */
343	file->elf = e;
344	file->fd = fd;
345	load_symtabs(file);
346	return (0);
347
348external:
349	if ((e2 = elf_begin(fd2, ELF_C_READ, NULL)) == NULL) {
350		DPRINTFX("ERROR: elf_begin failed: %s", elf_errmsg(-1));
351		(void)close(fd2);
352		goto err;
353	}
354	(void)elf_end(e);
355	(void)close(fd);
356	file->elf = e2;
357	file->fd = fd2;
358	load_symtabs(file);
359	return (0);
360
361err:
362	if (e != NULL)
363		(void)elf_end(e);
364	(void)close(fd);
365	return (-1);
366}
367
368char *
369proc_objname(struct proc_handle *p, uintptr_t addr, char *objname,
370    size_t objnamesz)
371{
372	prmap_t *map;
373	size_t i;
374
375	if (p->nmappings == 0)
376		if (proc_rdagent(p) == NULL)
377			return (NULL);
378	for (i = 0; i < p->nmappings; i++) {
379		map = &p->mappings[i].map;
380		if (addr >= map->pr_vaddr &&
381		    addr < map->pr_vaddr + map->pr_size) {
382			strlcpy(objname, map->pr_mapname, objnamesz);
383			return (objname);
384		}
385	}
386	return (NULL);
387}
388
389int
390proc_iter_objs(struct proc_handle *p, proc_map_f *func, void *cd)
391{
392	char last[MAXPATHLEN], path[MAXPATHLEN], *base;
393	prmap_t *map;
394	size_t i;
395	int error;
396
397	if (p->nmappings == 0)
398		if (proc_rdagent(p) == NULL)
399			return (-1);
400
401	error = 0;
402	memset(last, 0, sizeof(last));
403	for (i = 0; i < p->nmappings; i++) {
404		map = &p->mappings[i].map;
405		strlcpy(path, map->pr_mapname, sizeof(path));
406		base = basename(path);
407		/*
408		 * We shouldn't call the callback twice with the same object.
409		 * To do that we are assuming the fact that if there are
410		 * repeated object names (i.e. different mappings for the
411		 * same object) they occur next to each other.
412		 */
413		if (strcmp(base, last) == 0)
414			continue;
415		if ((error = (*func)(cd, map, base)) != 0)
416			break;
417		strlcpy(last, path, sizeof(last));
418	}
419	return (error);
420}
421
422static struct map_info *
423_proc_addr2map(struct proc_handle *p, uintptr_t addr)
424{
425	struct map_info *mapping;
426	size_t i;
427
428	if (p->nmappings == 0)
429		if (proc_rdagent(p) == NULL)
430			return (NULL);
431	for (i = 0; i < p->nmappings; i++) {
432		mapping = &p->mappings[i];
433		if (addr >= mapping->map.pr_vaddr &&
434		    addr < mapping->map.pr_vaddr + mapping->map.pr_size)
435			return (mapping);
436	}
437	return (NULL);
438}
439
440prmap_t *
441proc_addr2map(struct proc_handle *p, uintptr_t addr)
442{
443
444	return (&_proc_addr2map(p, addr)->map);
445}
446
447/*
448 * Look up the symbol at addr using a binary search, returning a copy of the
449 * symbol and its name.
450 */
451static int
452lookup_symbol_by_addr(Elf *e, struct symtab *symtab, uintptr_t addr,
453    const char **namep, GElf_Sym *symp)
454{
455	GElf_Sym sym;
456	Elf_Data *data;
457	const char *s;
458	u_int i, min, max, mid;
459
460	if (symtab->nsyms == 0)
461		return (ENOENT);
462
463	data = symtab->data;
464	min = 0;
465	max = symtab->nsyms - 1;
466
467	while (min <= max) {
468		mid = (max + min) / 2;
469		(void)gelf_getsym(data, symtab->index[mid], &sym);
470		if (addr >= sym.st_value && addr < sym.st_value + sym.st_size)
471			break;
472
473		if (addr < sym.st_value)
474			max = mid - 1;
475		else
476			min = mid + 1;
477	}
478	if (min > max)
479		return (ENOENT);
480
481	/*
482	 * Advance until we find the matching symbol with largest index.
483	 */
484	for (i = mid; i < symtab->nsyms; i++) {
485		(void)gelf_getsym(data, symtab->index[i], &sym);
486		if (addr < sym.st_value || addr >= sym.st_value + sym.st_size)
487			break;
488	}
489	(void)gelf_getsym(data, symtab->index[i - 1], symp);
490	s = elf_strptr(e, symtab->stridx, symp->st_name);
491	if (s != NULL && namep != NULL)
492		*namep = s;
493	return (0);
494}
495
496int
497proc_addr2sym(struct proc_handle *p, uintptr_t addr, char *name,
498    size_t namesz, GElf_Sym *symcopy)
499{
500	struct file_info *file;
501	struct map_info *mapping;
502	const char *s;
503	uintptr_t off;
504	int error;
505
506	if ((mapping = _proc_addr2map(p, addr)) == NULL) {
507		DPRINTFX("ERROR: proc_addr2map failed to resolve 0x%jx", (uintmax_t)addr);
508		return (-1);
509	}
510	if (open_object(mapping) != 0) {
511		DPRINTFX("ERROR: failed to open object %s",
512		    mapping->map.pr_mapname);
513		return (-1);
514	}
515
516	file = mapping->file;
517	off = file->ehdr.e_type == ET_DYN ?
518	    mapping->map.pr_vaddr - mapping->map.pr_offset : 0;
519	if (addr < off)
520		return (ENOENT);
521	addr -= off;
522
523	error = lookup_symbol_by_addr(file->elf, &file->dynsymtab, addr, &s,
524	    symcopy);
525	if (error == ENOENT)
526		error = lookup_symbol_by_addr(file->elf, &file->symtab, addr,
527		    &s, symcopy);
528	if (error == 0) {
529		symcopy->st_value += off;
530		demangle(s, name, namesz);
531	}
532	return (error);
533}
534
535static struct map_info *
536_proc_name2map(struct proc_handle *p, const char *name)
537{
538	char path[MAXPATHLEN], *base;
539	struct map_info *mapping;
540	size_t i, len;
541
542	if ((len = strlen(name)) == 0)
543		return (NULL);
544	if (p->nmappings == 0)
545		if (proc_rdagent(p) == NULL)
546			return (NULL);
547	for (i = 0; i < p->nmappings; i++) {
548		mapping = &p->mappings[i];
549		(void)strlcpy(path, mapping->map.pr_mapname, sizeof(path));
550		base = basename(path);
551		if (strcmp(base, name) == 0)
552			return (mapping);
553	}
554	/* If we didn't find a match, try matching prefixes of the basename. */
555	for (i = 0; i < p->nmappings; i++) {
556		mapping = &p->mappings[i];
557		strlcpy(path, mapping->map.pr_mapname, sizeof(path));
558		base = basename(path);
559		if (strncmp(base, name, len) == 0)
560			return (mapping);
561	}
562	if (strcmp(name, "a.out") == 0)
563		return (_proc_addr2map(p,
564		    p->mappings[p->exec_map].map.pr_vaddr));
565	return (NULL);
566}
567
568prmap_t *
569proc_name2map(struct proc_handle *p, const char *name)
570{
571
572	return (&_proc_name2map(p, name)->map);
573}
574
575/*
576 * Look up the symbol with the given name and return a copy of it.
577 */
578static int
579lookup_symbol_by_name(Elf *elf, struct symtab *symtab, const char *symbol,
580    GElf_Sym *symcopy, prsyminfo_t *si)
581{
582	GElf_Sym sym;
583	Elf_Data *data;
584	char *s;
585	int i;
586
587	if (symtab->nsyms == 0)
588		return (ENOENT);
589	data = symtab->data;
590	for (i = 0; gelf_getsym(data, i, &sym) != NULL; i++) {
591		s = elf_strptr(elf, symtab->stridx, sym.st_name);
592		if (s != NULL && strcmp(s, symbol) == 0) {
593			memcpy(symcopy, &sym, sizeof(*symcopy));
594			if (si != NULL)
595				si->prs_id = i;
596			return (0);
597		}
598	}
599	return (ENOENT);
600}
601
602int
603proc_name2sym(struct proc_handle *p, const char *object, const char *symbol,
604    GElf_Sym *symcopy, prsyminfo_t *si)
605{
606	struct file_info *file;
607	struct map_info *mapping;
608	uintptr_t off;
609	int error;
610
611	if ((mapping = _proc_name2map(p, object)) == NULL) {
612		DPRINTFX("ERROR: proc_name2map failed to resolve %s", object);
613		return (-1);
614	}
615	if (open_object(mapping) != 0) {
616		DPRINTFX("ERROR: failed to open object %s",
617		    mapping->map.pr_mapname);
618		return (-1);
619	}
620
621	file = mapping->file;
622	off = file->ehdr.e_type == ET_DYN ?
623	    mapping->map.pr_vaddr - mapping->map.pr_offset : 0;
624
625	error = lookup_symbol_by_name(file->elf, &file->dynsymtab, symbol,
626	    symcopy, si);
627	if (error == ENOENT)
628		error = lookup_symbol_by_name(file->elf, &file->symtab, symbol,
629		    symcopy, si);
630	if (error == 0)
631		symcopy->st_value += off;
632	return (error);
633}
634
635ctf_file_t *
636proc_name2ctf(struct proc_handle *p, const char *name)
637{
638#ifndef NO_CTF
639	ctf_file_t *ctf;
640	prmap_t *map;
641	int error;
642
643	if ((map = proc_name2map(p, name)) == NULL)
644		return (NULL);
645
646	ctf = ctf_open(map->pr_mapname, &error);
647	return (ctf);
648#else
649	(void)p;
650	(void)name;
651	return (NULL);
652#endif
653}
654
655int
656proc_iter_symbyaddr(struct proc_handle *p, const char *object, int which,
657    int mask, proc_sym_f *func, void *cd)
658{
659	GElf_Sym sym;
660	struct file_info *file;
661	struct map_info *mapping;
662	struct symtab *symtab;
663	const char *s;
664	int error, i;
665
666	if ((mapping = _proc_name2map(p, object)) == NULL) {
667		DPRINTFX("ERROR: proc_name2map failed to resolve %s", object);
668		return (-1);
669	}
670	if (open_object(mapping) != 0) {
671		DPRINTFX("ERROR: failed to open object %s",
672		    mapping->map.pr_mapname);
673		return (-1);
674	}
675
676	file = mapping->file;
677	symtab = which == PR_SYMTAB ? &file->symtab : &file->dynsymtab;
678	if (symtab->nsyms == 0)
679		return (-1);
680
681	error = 0;
682	for (i = 0; gelf_getsym(symtab->data, i, &sym) != NULL; i++) {
683		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL &&
684		    (mask & BIND_LOCAL) == 0)
685			continue;
686		if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL &&
687		    (mask & BIND_GLOBAL) == 0)
688			continue;
689		if (GELF_ST_BIND(sym.st_info) == STB_WEAK &&
690		    (mask & BIND_WEAK) == 0)
691			continue;
692		if (GELF_ST_TYPE(sym.st_info) == STT_NOTYPE &&
693		    (mask & TYPE_NOTYPE) == 0)
694			continue;
695		if (GELF_ST_TYPE(sym.st_info) == STT_OBJECT &&
696		    (mask & TYPE_OBJECT) == 0)
697			continue;
698		if (GELF_ST_TYPE(sym.st_info) == STT_FUNC &&
699		    (mask & TYPE_FUNC) == 0)
700			continue;
701		if (GELF_ST_TYPE(sym.st_info) == STT_SECTION &&
702		    (mask & TYPE_SECTION) == 0)
703			continue;
704		if (GELF_ST_TYPE(sym.st_info) == STT_FILE &&
705		    (mask & TYPE_FILE) == 0)
706			continue;
707		s = elf_strptr(file->elf, symtab->stridx, sym.st_name);
708		if (file->ehdr.e_type == ET_DYN)
709			sym.st_value += mapping->map.pr_vaddr;
710		if ((error = (*func)(cd, &sym, s)) != 0)
711			break;
712	}
713	return (error);
714}
715