1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1992, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software developed by the Computer Systems
8 * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
9 * BG 91-66 and contributed to Berkeley.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36
37#include <sys/param.h>
38#include <sys/fnv_hash.h>
39
40#define	_WANT_VNET
41
42#include <sys/user.h>
43#include <sys/linker.h>
44#include <sys/pcpu.h>
45#include <sys/stat.h>
46#include <sys/sysctl.h>
47#include <sys/mman.h>
48
49#include <stdbool.h>
50#include <net/vnet.h>
51
52#include <fcntl.h>
53#include <kvm.h>
54#include <limits.h>
55#include <paths.h>
56#include <stdint.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <unistd.h>
61
62#include "kvm_private.h"
63
64SET_DECLARE(kvm_arch, struct kvm_arch);
65
66static char _kd_is_null[] = "";
67
68char *
69kvm_geterr(kvm_t *kd)
70{
71
72	if (kd == NULL)
73		return (_kd_is_null);
74	return (kd->errbuf);
75}
76
77static int
78_kvm_read_kernel_ehdr(kvm_t *kd)
79{
80	Elf *elf;
81
82	if (elf_version(EV_CURRENT) == EV_NONE) {
83		_kvm_err(kd, kd->program, "Unsupported libelf");
84		return (-1);
85	}
86	elf = elf_begin(kd->nlfd, ELF_C_READ, NULL);
87	if (elf == NULL) {
88		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
89		return (-1);
90	}
91	if (elf_kind(elf) != ELF_K_ELF) {
92		_kvm_err(kd, kd->program, "kernel is not an ELF file");
93		return (-1);
94	}
95	if (gelf_getehdr(elf, &kd->nlehdr) == NULL) {
96		_kvm_err(kd, kd->program, "%s", elf_errmsg(0));
97		elf_end(elf);
98		return (-1);
99	}
100	elf_end(elf);
101
102	switch (kd->nlehdr.e_ident[EI_DATA]) {
103	case ELFDATA2LSB:
104	case ELFDATA2MSB:
105		return (0);
106	default:
107		_kvm_err(kd, kd->program,
108		    "unsupported ELF data encoding for kernel");
109		return (-1);
110	}
111}
112
113static kvm_t *
114_kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout)
115{
116	struct kvm_arch **parch;
117	struct stat st;
118
119	kd->vmfd = -1;
120	kd->pmfd = -1;
121	kd->nlfd = -1;
122	kd->vmst = NULL;
123	kd->procbase = NULL;
124	kd->argspc = NULL;
125	kd->argv = NULL;
126
127	if (uf == NULL)
128		uf = getbootfile();
129	else if (strlen(uf) >= MAXPATHLEN) {
130		_kvm_err(kd, kd->program, "exec file name too long");
131		goto failed;
132	}
133	if (flag & ~O_RDWR) {
134		_kvm_err(kd, kd->program, "bad flags arg");
135		goto failed;
136	}
137	if (mf == NULL)
138		mf = _PATH_MEM;
139
140	if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
141		_kvm_syserr(kd, kd->program, "%s", mf);
142		goto failed;
143	}
144	if (fstat(kd->pmfd, &st) < 0) {
145		_kvm_syserr(kd, kd->program, "%s", mf);
146		goto failed;
147	}
148	if (S_ISREG(st.st_mode) && st.st_size <= 0) {
149		errno = EINVAL;
150		_kvm_syserr(kd, kd->program, "empty file");
151		goto failed;
152	}
153	if (S_ISCHR(st.st_mode)) {
154		/*
155		 * If this is a character special device, then check that
156		 * it's /dev/mem.  If so, open kmem too.  (Maybe we should
157		 * make it work for either /dev/mem or /dev/kmem -- in either
158		 * case you're working with a live kernel.)
159		 */
160		if (strcmp(mf, _PATH_DEVNULL) == 0) {
161			kd->vmfd = open(_PATH_DEVNULL, O_RDONLY | O_CLOEXEC);
162			return (kd);
163		} else if (strcmp(mf, _PATH_MEM) == 0) {
164			if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC)) <
165			    0) {
166				_kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
167				goto failed;
168			}
169			return (kd);
170		}
171	}
172
173	/*
174	 * This is either a crash dump or a remote live system with its physical
175	 * memory fully accessible via a special device.
176	 * Open the namelist fd and determine the architecture.
177	 */
178	if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
179		_kvm_syserr(kd, kd->program, "%s", uf);
180		goto failed;
181	}
182	if (_kvm_read_kernel_ehdr(kd) < 0)
183		goto failed;
184	if (strncmp(mf, _PATH_FWMEM, strlen(_PATH_FWMEM)) == 0 ||
185	    strncmp(mf, _PATH_DEVVMM, strlen(_PATH_DEVVMM)) == 0) {
186		kd->rawdump = 1;
187		kd->writable = 1;
188	}
189	SET_FOREACH(parch, kvm_arch) {
190		if ((*parch)->ka_probe(kd)) {
191			kd->arch = *parch;
192			break;
193		}
194	}
195	if (kd->arch == NULL) {
196		_kvm_err(kd, kd->program, "unsupported architecture");
197		goto failed;
198	}
199
200	/*
201	 * Non-native kernels require a symbol resolver.
202	 */
203	if (!kd->arch->ka_native(kd) && kd->resolve_symbol == NULL) {
204		_kvm_err(kd, kd->program,
205		    "non-native kernel requires a symbol resolver");
206		goto failed;
207	}
208
209	/*
210	 * Initialize the virtual address translation machinery.
211	 */
212	if (kd->arch->ka_initvtop(kd) < 0)
213		goto failed;
214	return (kd);
215failed:
216	/*
217	 * Copy out the error if doing sane error semantics.
218	 */
219	if (errout != NULL)
220		strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
221	(void)kvm_close(kd);
222	return (NULL);
223}
224
225kvm_t *
226kvm_openfiles(const char *uf, const char *mf, const char *sf __unused, int flag,
227    char *errout)
228{
229	kvm_t *kd;
230
231	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
232		if (errout != NULL)
233			(void)strlcpy(errout, strerror(errno),
234			    _POSIX2_LINE_MAX);
235		return (NULL);
236	}
237	return (_kvm_open(kd, uf, mf, flag, errout));
238}
239
240kvm_t *
241kvm_open(const char *uf, const char *mf, const char *sf __unused, int flag,
242    const char *errstr)
243{
244	kvm_t *kd;
245
246	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
247		if (errstr != NULL)
248			(void)fprintf(stderr, "%s: %s\n",
249				      errstr, strerror(errno));
250		return (NULL);
251	}
252	kd->program = errstr;
253	return (_kvm_open(kd, uf, mf, flag, NULL));
254}
255
256kvm_t *
257kvm_open2(const char *uf, const char *mf, int flag, char *errout,
258    int (*resolver)(const char *, kvaddr_t *))
259{
260	kvm_t *kd;
261
262	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
263		if (errout != NULL)
264			(void)strlcpy(errout, strerror(errno),
265			    _POSIX2_LINE_MAX);
266		return (NULL);
267	}
268	kd->resolve_symbol = resolver;
269	return (_kvm_open(kd, uf, mf, flag, errout));
270}
271
272int
273kvm_close(kvm_t *kd)
274{
275	int error = 0;
276
277	if (kd == NULL) {
278		errno = EINVAL;
279		return (-1);
280	}
281	if (kd->vmst != NULL)
282		kd->arch->ka_freevtop(kd);
283	if (kd->pmfd >= 0)
284		error |= close(kd->pmfd);
285	if (kd->vmfd >= 0)
286		error |= close(kd->vmfd);
287	if (kd->nlfd >= 0)
288		error |= close(kd->nlfd);
289	if (kd->procbase != 0)
290		free((void *)kd->procbase);
291	if (kd->argbuf != 0)
292		free((void *) kd->argbuf);
293	if (kd->argspc != 0)
294		free((void *) kd->argspc);
295	if (kd->argv != 0)
296		free((void *)kd->argv);
297	if (kd->dpcpu_initialized != 0)
298		free(kd->dpcpu_off);
299	if (kd->pt_map != NULL)
300		free(kd->pt_map);
301	if (kd->page_map != NULL)
302		free(kd->page_map);
303	if (kd->sparse_map != MAP_FAILED && kd->sparse_map != NULL)
304		munmap(kd->sparse_map, kd->pt_sparse_size);
305	free((void *)kd);
306
307	return (error);
308}
309
310int
311kvm_nlist2(kvm_t *kd, struct kvm_nlist *nl)
312{
313
314	/*
315	 * If called via the public interface, permit initialization of
316	 * further virtualized modules on demand.
317	 */
318	return (_kvm_nlist(kd, nl, 1));
319}
320
321int
322kvm_nlist(kvm_t *kd, struct nlist *nl)
323{
324	struct kvm_nlist *kl;
325	int count, i, nfail;
326
327	/*
328	 * Avoid reporting truncated addresses by failing for non-native
329	 * cores.
330	 */
331	if (!kvm_native(kd)) {
332		_kvm_err(kd, kd->program, "kvm_nlist of non-native vmcore");
333		return (-1);
334	}
335
336	for (count = 0; nl[count].n_name != NULL && nl[count].n_name[0] != '\0';
337	     count++)
338		;
339	if (count == 0)
340		return (0);
341	kl = calloc(count + 1, sizeof(*kl));
342	if (kl == NULL) {
343		_kvm_err(kd, kd->program, "cannot allocate memory");
344		return (-1);
345	}
346	for (i = 0; i < count; i++)
347		kl[i].n_name = nl[i].n_name;
348	nfail = kvm_nlist2(kd, kl);
349	for (i = 0; i < count; i++) {
350		nl[i].n_type = kl[i].n_type;
351		nl[i].n_other = 0;
352		nl[i].n_desc = 0;
353		nl[i].n_value = kl[i].n_value;
354	}
355	free(kl);
356	return (nfail);
357}
358
359ssize_t
360kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len)
361{
362
363	return (kvm_read2(kd, kva, buf, len));
364}
365
366ssize_t
367kvm_read2(kvm_t *kd, kvaddr_t kva, void *buf, size_t len)
368{
369	int cc;
370	ssize_t cr;
371	off_t pa;
372	char *cp;
373
374	if (ISALIVE(kd)) {
375		/*
376		 * We're using /dev/kmem.  Just read straight from the
377		 * device and let the active kernel do the address translation.
378		 */
379		errno = 0;
380		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
381			_kvm_err(kd, 0, "invalid address (0x%jx)",
382			    (uintmax_t)kva);
383			return (-1);
384		}
385		cr = read(kd->vmfd, buf, len);
386		if (cr < 0) {
387			_kvm_syserr(kd, 0, "kvm_read");
388			return (-1);
389		} else if (cr < (ssize_t)len)
390			_kvm_err(kd, kd->program, "short read");
391		return (cr);
392	}
393
394	cp = buf;
395	while (len > 0) {
396		cc = kd->arch->ka_kvatop(kd, kva, &pa);
397		if (cc == 0)
398			return (-1);
399		if (cc > (ssize_t)len)
400			cc = len;
401		errno = 0;
402		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
403			_kvm_syserr(kd, 0, _PATH_MEM);
404			break;
405		}
406		cr = read(kd->pmfd, cp, cc);
407		if (cr < 0) {
408			_kvm_syserr(kd, kd->program, "kvm_read");
409			break;
410		}
411		/*
412		 * If ka_kvatop returns a bogus value or our core file is
413		 * truncated, we might wind up seeking beyond the end of the
414		 * core file in which case the read will return 0 (EOF).
415		 */
416		if (cr == 0)
417			break;
418		cp += cr;
419		kva += cr;
420		len -= cr;
421	}
422
423	return (cp - (char *)buf);
424}
425
426ssize_t
427kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len)
428{
429	int cc;
430	ssize_t cw;
431	off_t pa;
432	const char *cp;
433
434	if (!ISALIVE(kd) && !kd->writable) {
435		_kvm_err(kd, kd->program,
436		    "kvm_write not implemented for dead kernels");
437		return (-1);
438	}
439
440	if (ISALIVE(kd)) {
441		/*
442		 * Just like kvm_read, only we write.
443		 */
444		errno = 0;
445		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
446			_kvm_err(kd, 0, "invalid address (%lx)", kva);
447			return (-1);
448		}
449		cc = write(kd->vmfd, buf, len);
450		if (cc < 0) {
451			_kvm_syserr(kd, 0, "kvm_write");
452			return (-1);
453		} else if ((size_t)cc < len)
454			_kvm_err(kd, kd->program, "short write");
455		return (cc);
456	}
457
458	cp = buf;
459	while (len > 0) {
460		cc = kd->arch->ka_kvatop(kd, kva, &pa);
461		if (cc == 0)
462			return (-1);
463		if (cc > (ssize_t)len)
464			cc = len;
465		errno = 0;
466		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
467			_kvm_syserr(kd, 0, _PATH_MEM);
468			break;
469		}
470		cw = write(kd->pmfd, cp, cc);
471		if (cw < 0) {
472			_kvm_syserr(kd, kd->program, "kvm_write");
473			break;
474		}
475		/*
476		 * If ka_kvatop returns a bogus value or our core file is
477		 * truncated, we might wind up seeking beyond the end of the
478		 * core file in which case the read will return 0 (EOF).
479		 */
480		if (cw == 0)
481			break;
482		cp += cw;
483		kva += cw;
484		len -= cw;
485	}
486
487	return (cp - (const char *)buf);
488}
489
490int
491kvm_native(kvm_t *kd)
492{
493
494	if (ISALIVE(kd))
495		return (1);
496	return (kd->arch->ka_native(kd));
497}
498
499int
500kvm_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *closure)
501{
502
503	if (kd->arch->ka_walk_pages == NULL)
504		return (0);
505
506	return (kd->arch->ka_walk_pages(kd, cb, closure));
507}
508
509kssize_t
510kvm_kerndisp(kvm_t *kd)
511{
512	unsigned long kernbase, rel_kernbase;
513	size_t kernbase_len = sizeof(kernbase);
514	size_t rel_kernbase_len = sizeof(rel_kernbase);
515
516	if (ISALIVE(kd)) {
517		if (sysctlbyname("kern.base_address", &kernbase,
518		    &kernbase_len, NULL, 0) == -1) {
519			_kvm_syserr(kd, kd->program,
520				"failed to get kernel base address");
521			return (0);
522		}
523		if (sysctlbyname("kern.relbase_address", &rel_kernbase,
524		    &rel_kernbase_len, NULL, 0) == -1) {
525			_kvm_syserr(kd, kd->program,
526				"failed to get relocated kernel base address");
527			return (0);
528		}
529		return (rel_kernbase - kernbase);
530	}
531
532	if (kd->arch->ka_kerndisp == NULL)
533		return (0);
534
535	return (kd->arch->ka_kerndisp(kd));
536}
537