kvm.c revision 316040
11539Srgrimes/*-
21539Srgrimes * Copyright (c) 1989, 1992, 1993
31539Srgrimes *	The Regents of the University of California.  All rights reserved.
41539Srgrimes *
51539Srgrimes * This code is derived from software developed by the Computer Systems
61539Srgrimes * Engineering group at Lawrence Berkeley Laboratory under DARPA contract
71539Srgrimes * BG 91-66 and contributed to Berkeley.
81539Srgrimes *
91539Srgrimes * Redistribution and use in source and binary forms, with or without
101539Srgrimes * modification, are permitted provided that the following conditions
111539Srgrimes * are met:
121539Srgrimes * 1. Redistributions of source code must retain the above copyright
131539Srgrimes *    notice, this list of conditions and the following disclaimer.
141539Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151539Srgrimes *    notice, this list of conditions and the following disclaimer in the
161539Srgrimes *    documentation and/or other materials provided with the distribution.
171539Srgrimes * 4. Neither the name of the University nor the names of its contributors
181539Srgrimes *    may be used to endorse or promote products derived from this software
191539Srgrimes *    without specific prior written permission.
201539Srgrimes *
211539Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221539Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231539Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241539Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251539Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261539Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271539Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281539Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291539Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301539Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311539Srgrimes * SUCH DAMAGE.
321539Srgrimes */
331539Srgrimes
341539Srgrimes#include <sys/cdefs.h>
351539Srgrimes__FBSDID("$FreeBSD: stable/10/lib/libkvm/kvm.c 316040 2017-03-27 18:23:45Z ngie $");
361539Srgrimes
371539Srgrimes#if defined(LIBC_SCCS) && !defined(lint)
381539Srgrimes#if 0
3990644Simpstatic char sccsid[] = "@(#)kvm.c	8.2 (Berkeley) 2/13/94";
401539Srgrimes#endif
411539Srgrimes#endif /* LIBC_SCCS and not lint */
421539Srgrimes
431539Srgrimes#include <sys/param.h>
441539Srgrimes
451539Srgrimes#define	_WANT_VNET
461539Srgrimes
471539Srgrimes#include <sys/user.h>
482552Sgpalmer#include <sys/proc.h>
491539Srgrimes#include <sys/ioctl.h>
502552Sgpalmer#include <sys/stat.h>
511539Srgrimes#include <sys/sysctl.h>
522552Sgpalmer#include <sys/linker.h>
531539Srgrimes#include <sys/pcpu.h>
541539Srgrimes
552552Sgpalmer#include <net/vnet.h>
561539Srgrimes
572552Sgpalmer#include <vm/vm.h>
581539Srgrimes#include <vm/vm_param.h>
591539Srgrimes
601539Srgrimes#include <machine/vmparam.h>
611539Srgrimes
621539Srgrimes#include <ctype.h>
631539Srgrimes#include <fcntl.h>
642915Swollman#include <kvm.h>
6515267Swpaul#include <limits.h>
661539Srgrimes#include <nlist.h>
671539Srgrimes#include <paths.h>
681539Srgrimes#include <stdio.h>
691539Srgrimes#include <stdlib.h>
701539Srgrimes#include <string.h>
711539Srgrimes#include <strings.h>
721539Srgrimes#include <unistd.h>
731539Srgrimes
741539Srgrimes#include "kvm_private.h"
7542780Sdes
7642780Sdes/* from src/lib/libc/gen/nlist.c */
771539Srgrimesint __fdnlist(int, struct nlist *);
781539Srgrimes
791539Srgrimeschar *
801539Srgrimeskvm_geterr(kvm_t *kd)
811539Srgrimes{
821539Srgrimes	return (kd->errbuf);
832915Swollman}
841539Srgrimes
851539Srgrimes#include <stdarg.h>
862915Swollman
872915Swollman/*
882915Swollman * Report an error using printf style arguments.  "program" is kd->program
892915Swollman * on hard errors, and 0 on soft errors, so that under sun error emulation,
902915Swollman * only hard errors are printed out (otherwise, programs like gdb will
912915Swollman * generate tons of error messages when trying to access bogus pointers).
922915Swollman */
932915Swollmanvoid
942915Swollman_kvm_err(kvm_t *kd, const char *program, const char *fmt, ...)
952915Swollman{
962915Swollman	va_list ap;
972915Swollman
982915Swollman	va_start(ap, fmt);
991539Srgrimes	if (program != NULL) {
1001539Srgrimes		(void)fprintf(stderr, "%s: ", program);
1011539Srgrimes		(void)vfprintf(stderr, fmt, ap);
1021539Srgrimes		(void)fputc('\n', stderr);
1031539Srgrimes	} else
1041539Srgrimes		(void)vsnprintf(kd->errbuf,
1051539Srgrimes		    sizeof(kd->errbuf), fmt, ap);
1061539Srgrimes
10732962Ssteve	va_end(ap);
1081539Srgrimes}
10990644Simp
1101539Srgrimesvoid
1111539Srgrimes_kvm_syserr(kvm_t *kd, const char *program, const char *fmt, ...)
1121539Srgrimes{
1131539Srgrimes	va_list ap;
114	int n;
115
116	va_start(ap, fmt);
117	if (program != NULL) {
118		(void)fprintf(stderr, "%s: ", program);
119		(void)vfprintf(stderr, fmt, ap);
120		(void)fprintf(stderr, ": %s\n", strerror(errno));
121	} else {
122		char *cp = kd->errbuf;
123
124		(void)vsnprintf(cp, sizeof(kd->errbuf), fmt, ap);
125		n = strlen(cp);
126		(void)snprintf(&cp[n], sizeof(kd->errbuf) - n, ": %s",
127		    strerror(errno));
128	}
129	va_end(ap);
130}
131
132void *
133_kvm_malloc(kvm_t *kd, size_t n)
134{
135	void *p;
136
137	if ((p = calloc(n, sizeof(char))) == NULL)
138		_kvm_err(kd, kd->program, "can't allocate %zu bytes: %s",
139			 n, strerror(errno));
140	return (p);
141}
142
143static kvm_t *
144_kvm_open(kvm_t *kd, const char *uf, const char *mf, int flag, char *errout)
145{
146	struct stat st;
147
148	kd->vmfd = -1;
149	kd->pmfd = -1;
150	kd->nlfd = -1;
151	kd->vmst = 0;
152	kd->procbase = 0;
153	kd->argspc = 0;
154	kd->argv = 0;
155
156	if (uf == 0)
157		uf = getbootfile();
158	else if (strlen(uf) >= MAXPATHLEN) {
159		_kvm_err(kd, kd->program, "exec file name too long");
160		goto failed;
161	}
162	if (flag & ~O_RDWR) {
163		_kvm_err(kd, kd->program, "bad flags arg");
164		goto failed;
165	}
166	if (mf == 0)
167		mf = _PATH_MEM;
168
169	if ((kd->pmfd = open(mf, flag | O_CLOEXEC, 0)) < 0) {
170		_kvm_syserr(kd, kd->program, "%s", mf);
171		goto failed;
172	}
173	if (fstat(kd->pmfd, &st) < 0) {
174		_kvm_syserr(kd, kd->program, "%s", mf);
175		goto failed;
176	}
177	if (S_ISREG(st.st_mode) && st.st_size <= 0) {
178		errno = EINVAL;
179		_kvm_syserr(kd, kd->program, "empty file");
180		goto failed;
181	}
182	if (S_ISCHR(st.st_mode)) {
183		/*
184		 * If this is a character special device, then check that
185		 * it's /dev/mem.  If so, open kmem too.  (Maybe we should
186		 * make it work for either /dev/mem or /dev/kmem -- in either
187		 * case you're working with a live kernel.)
188		 */
189		if (strcmp(mf, _PATH_DEVNULL) == 0) {
190			kd->vmfd = open(_PATH_DEVNULL, O_RDONLY | O_CLOEXEC);
191			return (kd);
192		} else if (strcmp(mf, _PATH_MEM) == 0) {
193			if ((kd->vmfd = open(_PATH_KMEM, flag | O_CLOEXEC)) <
194			    0) {
195				_kvm_syserr(kd, kd->program, "%s", _PATH_KMEM);
196				goto failed;
197			}
198			return (kd);
199		}
200	}
201
202	/*
203	 * This is either a crash dump or a remote live system with its physical
204	 * memory fully accessible via a special device.
205	 * Initialize the virtual address translation machinery,
206	 * but first setup the namelist fd.
207	 */
208	if ((kd->nlfd = open(uf, O_RDONLY | O_CLOEXEC, 0)) < 0) {
209		_kvm_syserr(kd, kd->program, "%s", uf);
210		goto failed;
211	}
212	if (strncmp(mf, _PATH_FWMEM, strlen(_PATH_FWMEM)) == 0 ||
213	    strncmp(mf, _PATH_DEVVMM, strlen(_PATH_DEVVMM)) == 0) {
214		kd->rawdump = 1;
215		kd->writable = 1;
216	}
217	if (_kvm_initvtop(kd) < 0)
218		goto failed;
219	return (kd);
220failed:
221	/*
222	 * Copy out the error if doing sane error semantics.
223	 */
224	if (errout != 0)
225		strlcpy(errout, kd->errbuf, _POSIX2_LINE_MAX);
226	(void)kvm_close(kd);
227	return (NULL);
228}
229
230kvm_t *
231kvm_openfiles(const char *uf, const char *mf, const char *sf __unused, int flag,
232    char *errout)
233{
234	kvm_t *kd;
235
236	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
237		(void)strlcpy(errout, strerror(errno), _POSIX2_LINE_MAX);
238		return (NULL);
239	}
240	kd->program = 0;
241	return (_kvm_open(kd, uf, mf, flag, errout));
242}
243
244kvm_t *
245kvm_open(const char *uf, const char *mf, const char *sf __unused, int flag,
246    const char *errstr)
247{
248	kvm_t *kd;
249
250	if ((kd = calloc(1, sizeof(*kd))) == NULL) {
251		if (errstr != NULL)
252			(void)fprintf(stderr, "%s: %s\n",
253				      errstr, strerror(errno));
254		return (NULL);
255	}
256	kd->program = errstr;
257	return (_kvm_open(kd, uf, mf, flag, NULL));
258}
259
260int
261kvm_close(kvm_t *kd)
262{
263	int error = 0;
264
265	if (kd->pmfd >= 0)
266		error |= close(kd->pmfd);
267	if (kd->vmfd >= 0)
268		error |= close(kd->vmfd);
269	if (kd->nlfd >= 0)
270		error |= close(kd->nlfd);
271	if (kd->vmst)
272		_kvm_freevtop(kd);
273	if (kd->procbase != 0)
274		free((void *)kd->procbase);
275	if (kd->argbuf != 0)
276		free((void *) kd->argbuf);
277	if (kd->argspc != 0)
278		free((void *) kd->argspc);
279	if (kd->argv != 0)
280		free((void *)kd->argv);
281	free((void *)kd);
282
283	return (error);
284}
285
286/*
287 * Walk the list of unresolved symbols, generate a new list and prefix the
288 * symbol names, try again, and merge back what we could resolve.
289 */
290static int
291kvm_fdnlist_prefix(kvm_t *kd, struct nlist *nl, int missing, const char *prefix,
292    uintptr_t (*validate_fn)(kvm_t *, uintptr_t))
293{
294	struct nlist *n, *np, *p;
295	char *cp, *ce;
296	const char *ccp;
297	size_t len;
298	int slen, unresolved;
299
300	/*
301	 * Calculate the space we need to malloc for nlist and names.
302	 * We are going to store the name twice for later lookups: once
303	 * with the prefix and once the unmodified name delmited by \0.
304	 */
305	len = 0;
306	unresolved = 0;
307	for (p = nl; p->n_name && p->n_name[0]; ++p) {
308		if (p->n_type != N_UNDF)
309			continue;
310		len += sizeof(struct nlist) + strlen(prefix) +
311		    2 * (strlen(p->n_name) + 1);
312		unresolved++;
313	}
314	if (unresolved == 0)
315		return (unresolved);
316	/* Add space for the terminating nlist entry. */
317	len += sizeof(struct nlist);
318	unresolved++;
319
320	/* Alloc one chunk for (nlist, [names]) and setup pointers. */
321	n = np = malloc(len);
322	bzero(n, len);
323	if (n == NULL)
324		return (missing);
325	cp = ce = (char *)np;
326	cp += unresolved * sizeof(struct nlist);
327	ce += len;
328
329	/* Generate shortened nlist with special prefix. */
330	unresolved = 0;
331	for (p = nl; p->n_name && p->n_name[0]; ++p) {
332		if (p->n_type != N_UNDF)
333			continue;
334		bcopy(p, np, sizeof(struct nlist));
335		/* Save the new\0orig. name so we can later match it again. */
336		slen = snprintf(cp, ce - cp, "%s%s%c%s", prefix,
337		    (prefix[0] != '\0' && p->n_name[0] == '_') ?
338			(p->n_name + 1) : p->n_name, '\0', p->n_name);
339		if (slen < 0 || slen >= ce - cp)
340			continue;
341		np->n_name = cp;
342		cp += slen + 1;
343		np++;
344		unresolved++;
345	}
346
347	/* Do lookup on the reduced list. */
348	np = n;
349	unresolved = __fdnlist(kd->nlfd, np);
350
351	/* Check if we could resolve further symbols and update the list. */
352	if (unresolved >= 0 && unresolved < missing) {
353		/* Find the first freshly resolved entry. */
354		for (; np->n_name && np->n_name[0]; np++)
355			if (np->n_type != N_UNDF)
356				break;
357		/*
358		 * The lists are both in the same order,
359		 * so we can walk them in parallel.
360		 */
361		for (p = nl; np->n_name && np->n_name[0] &&
362		    p->n_name && p->n_name[0]; ++p) {
363			if (p->n_type != N_UNDF)
364				continue;
365			/* Skip expanded name and compare to orig. one. */
366			ccp = np->n_name + strlen(np->n_name) + 1;
367			if (strcmp(ccp, p->n_name) != 0)
368				continue;
369			/* Update nlist with new, translated results. */
370			p->n_type = np->n_type;
371			p->n_other = np->n_other;
372			p->n_desc = np->n_desc;
373			if (validate_fn)
374				p->n_value = (*validate_fn)(kd, np->n_value);
375			else
376				p->n_value = np->n_value;
377			missing--;
378			/* Find next freshly resolved entry. */
379			for (np++; np->n_name && np->n_name[0]; np++)
380				if (np->n_type != N_UNDF)
381					break;
382		}
383	}
384	/* We could assert missing = unresolved here. */
385
386	free(n);
387	return (unresolved);
388}
389
390int
391_kvm_nlist(kvm_t *kd, struct nlist *nl, int initialize)
392{
393	struct nlist *p;
394	int nvalid;
395	struct kld_sym_lookup lookup;
396	int error;
397	const char *prefix = "";
398	char symname[1024]; /* XXX-BZ symbol name length limit? */
399	int tried_vnet, tried_dpcpu;
400
401	/*
402	 * If we can't use the kld symbol lookup, revert to the
403	 * slow library call.
404	 */
405	if (!ISALIVE(kd)) {
406		error = __fdnlist(kd->nlfd, nl);
407		if (error <= 0)			/* Hard error or success. */
408			return (error);
409
410		if (_kvm_vnet_initialized(kd, initialize))
411			error = kvm_fdnlist_prefix(kd, nl, error,
412			    VNET_SYMPREFIX, _kvm_vnet_validaddr);
413
414		if (error > 0 && _kvm_dpcpu_initialized(kd, initialize))
415			error = kvm_fdnlist_prefix(kd, nl, error,
416			    DPCPU_SYMPREFIX, _kvm_dpcpu_validaddr);
417
418		return (error);
419	}
420
421	/*
422	 * We can use the kld lookup syscall.  Go through each nlist entry
423	 * and look it up with a kldsym(2) syscall.
424	 */
425	nvalid = 0;
426	tried_vnet = 0;
427	tried_dpcpu = 0;
428again:
429	for (p = nl; p->n_name && p->n_name[0]; ++p) {
430		if (p->n_type != N_UNDF)
431			continue;
432
433		lookup.version = sizeof(lookup);
434		lookup.symvalue = 0;
435		lookup.symsize = 0;
436
437		error = snprintf(symname, sizeof(symname), "%s%s", prefix,
438		    (prefix[0] != '\0' && p->n_name[0] == '_') ?
439			(p->n_name + 1) : p->n_name);
440		if (error < 0 || error >= (int)sizeof(symname))
441			continue;
442		lookup.symname = symname;
443		if (lookup.symname[0] == '_')
444			lookup.symname++;
445
446		if (kldsym(0, KLDSYM_LOOKUP, &lookup) != -1) {
447			p->n_type = N_TEXT;
448			p->n_other = 0;
449			p->n_desc = 0;
450			if (_kvm_vnet_initialized(kd, initialize) &&
451			    strcmp(prefix, VNET_SYMPREFIX) == 0)
452				p->n_value =
453				    _kvm_vnet_validaddr(kd, lookup.symvalue);
454			else if (_kvm_dpcpu_initialized(kd, initialize) &&
455			    strcmp(prefix, DPCPU_SYMPREFIX) == 0)
456				p->n_value =
457				    _kvm_dpcpu_validaddr(kd, lookup.symvalue);
458			else
459				p->n_value = lookup.symvalue;
460			++nvalid;
461			/* lookup.symsize */
462		}
463	}
464
465	/*
466	 * Check the number of entries that weren't found. If they exist,
467	 * try again with a prefix for virtualized or DPCPU symbol names.
468	 */
469	error = ((p - nl) - nvalid);
470	if (error && _kvm_vnet_initialized(kd, initialize) && !tried_vnet) {
471		tried_vnet = 1;
472		prefix = VNET_SYMPREFIX;
473		goto again;
474	}
475	if (error && _kvm_dpcpu_initialized(kd, initialize) && !tried_dpcpu) {
476		tried_dpcpu = 1;
477		prefix = DPCPU_SYMPREFIX;
478		goto again;
479	}
480
481	/*
482	 * Return the number of entries that weren't found. If they exist,
483	 * also fill internal error buffer.
484	 */
485	error = ((p - nl) - nvalid);
486	if (error)
487		_kvm_syserr(kd, kd->program, "kvm_nlist");
488	return (error);
489}
490
491int
492kvm_nlist(kvm_t *kd, struct nlist *nl)
493{
494
495	/*
496	 * If called via the public interface, permit intialization of
497	 * further virtualized modules on demand.
498	 */
499	return (_kvm_nlist(kd, nl, 1));
500}
501
502ssize_t
503kvm_read(kvm_t *kd, u_long kva, void *buf, size_t len)
504{
505	int cc;
506	ssize_t cr;
507	off_t pa;
508	char *cp;
509
510	if (ISALIVE(kd)) {
511		/*
512		 * We're using /dev/kmem.  Just read straight from the
513		 * device and let the active kernel do the address translation.
514		 */
515		errno = 0;
516		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
517			_kvm_err(kd, 0, "invalid address (%lx)", kva);
518			return (-1);
519		}
520		cr = read(kd->vmfd, buf, len);
521		if (cr < 0) {
522			_kvm_syserr(kd, 0, "kvm_read");
523			return (-1);
524		} else if (cr < (ssize_t)len)
525			_kvm_err(kd, kd->program, "short read");
526		return (cr);
527	}
528
529	cp = buf;
530	while (len > 0) {
531		cc = _kvm_kvatop(kd, kva, &pa);
532		if (cc == 0)
533			return (-1);
534		if (cc > (ssize_t)len)
535			cc = len;
536		errno = 0;
537		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
538			_kvm_syserr(kd, 0, _PATH_MEM);
539			break;
540		}
541		cr = read(kd->pmfd, cp, cc);
542		if (cr < 0) {
543			_kvm_syserr(kd, kd->program, "kvm_read");
544			break;
545		}
546		/*
547		 * If kvm_kvatop returns a bogus value or our core file is
548		 * truncated, we might wind up seeking beyond the end of the
549		 * core file in which case the read will return 0 (EOF).
550		 */
551		if (cr == 0)
552			break;
553		cp += cr;
554		kva += cr;
555		len -= cr;
556	}
557
558	return (cp - (char *)buf);
559}
560
561ssize_t
562kvm_write(kvm_t *kd, u_long kva, const void *buf, size_t len)
563{
564	int cc;
565	ssize_t cw;
566	off_t pa;
567	const char *cp;
568
569	if (!ISALIVE(kd) && !kd->writable) {
570		_kvm_err(kd, kd->program,
571		    "kvm_write not implemented for dead kernels");
572		return (-1);
573	}
574
575	if (ISALIVE(kd)) {
576		/*
577		 * Just like kvm_read, only we write.
578		 */
579		errno = 0;
580		if (lseek(kd->vmfd, (off_t)kva, 0) == -1 && errno != 0) {
581			_kvm_err(kd, 0, "invalid address (%lx)", kva);
582			return (-1);
583		}
584		cc = write(kd->vmfd, buf, len);
585		if (cc < 0) {
586			_kvm_syserr(kd, 0, "kvm_write");
587			return (-1);
588		} else if ((size_t)cc < len)
589			_kvm_err(kd, kd->program, "short write");
590		return (cc);
591	}
592
593	cp = buf;
594	while (len > 0) {
595		cc = _kvm_kvatop(kd, kva, &pa);
596		if (cc == 0)
597			return (-1);
598		if (cc > (ssize_t)len)
599			cc = len;
600		errno = 0;
601		if (lseek(kd->pmfd, pa, 0) == -1 && errno != 0) {
602			_kvm_syserr(kd, 0, _PATH_MEM);
603			break;
604		}
605		cw = write(kd->pmfd, cp, cc);
606		if (cw < 0) {
607			_kvm_syserr(kd, kd->program, "kvm_write");
608			break;
609		}
610		/*
611		 * If ka_kvatop returns a bogus value or our core file is
612		 * truncated, we might wind up seeking beyond the end of the
613		 * core file in which case the read will return 0 (EOF).
614		 */
615		if (cw == 0)
616			break;
617		cp += cw;
618		kva += cw;
619		len -= cw;
620	}
621
622	return (cp - (char *)buf);
623}
624