bhyveload.c revision 323739
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 323739 2017-09-19 08:19:20Z avg $
27 */
28
29/*-
30 * Copyright (c) 2011 Google, Inc.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 323739 2017-09-19 08:19:20Z avg $
55 */
56
57#include <sys/cdefs.h>
58__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 323739 2017-09-19 08:19:20Z avg $");
59
60#include <sys/ioctl.h>
61#include <sys/stat.h>
62#include <sys/disk.h>
63#include <sys/queue.h>
64
65#include <machine/specialreg.h>
66#include <machine/vmm.h>
67
68#include <dirent.h>
69#include <dlfcn.h>
70#include <errno.h>
71#include <err.h>
72#include <fcntl.h>
73#include <getopt.h>
74#include <libgen.h>
75#include <limits.h>
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <sysexits.h>
80#include <termios.h>
81#include <unistd.h>
82
83#include <vmmapi.h>
84
85#include "userboot.h"
86
87#define	MB	(1024 * 1024UL)
88#define	GB	(1024 * 1024 * 1024UL)
89#define	BSP	0
90
91#define	NDISKS	32
92
93static char *host_base;
94static struct termios term, oldterm;
95static int disk_fd[NDISKS];
96static int ndisks;
97static int consin_fd, consout_fd;
98
99static char *vmname, *progname;
100static struct vmctx *ctx;
101
102static uint64_t gdtbase, cr3, rsp;
103
104static void cb_exit(void *arg, int v);
105
106/*
107 * Console i/o callbacks
108 */
109
110static void
111cb_putc(void *arg, int ch)
112{
113	char c = ch;
114
115	(void) write(consout_fd, &c, 1);
116}
117
118static int
119cb_getc(void *arg)
120{
121	char c;
122
123	if (read(consin_fd, &c, 1) == 1)
124		return (c);
125	return (-1);
126}
127
128static int
129cb_poll(void *arg)
130{
131	int n;
132
133	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
134		return (n > 0);
135	return (0);
136}
137
138/*
139 * Host filesystem i/o callbacks
140 */
141
142struct cb_file {
143	int cf_isdir;
144	size_t cf_size;
145	struct stat cf_stat;
146	union {
147		int fd;
148		DIR *dir;
149	} cf_u;
150};
151
152static int
153cb_open(void *arg, const char *filename, void **hp)
154{
155	struct stat st;
156	struct cb_file *cf;
157	char path[PATH_MAX];
158
159	if (!host_base)
160		return (ENOENT);
161
162	strlcpy(path, host_base, PATH_MAX);
163	if (path[strlen(path) - 1] == '/')
164		path[strlen(path) - 1] = 0;
165	strlcat(path, filename, PATH_MAX);
166	cf = malloc(sizeof(struct cb_file));
167	if (stat(path, &cf->cf_stat) < 0) {
168		free(cf);
169		return (errno);
170	}
171
172	cf->cf_size = st.st_size;
173	if (S_ISDIR(cf->cf_stat.st_mode)) {
174		cf->cf_isdir = 1;
175		cf->cf_u.dir = opendir(path);
176		if (!cf->cf_u.dir)
177			goto out;
178		*hp = cf;
179		return (0);
180	}
181	if (S_ISREG(cf->cf_stat.st_mode)) {
182		cf->cf_isdir = 0;
183		cf->cf_u.fd = open(path, O_RDONLY);
184		if (cf->cf_u.fd < 0)
185			goto out;
186		*hp = cf;
187		return (0);
188	}
189
190out:
191	free(cf);
192	return (EINVAL);
193}
194
195static int
196cb_close(void *arg, void *h)
197{
198	struct cb_file *cf = h;
199
200	if (cf->cf_isdir)
201		closedir(cf->cf_u.dir);
202	else
203		close(cf->cf_u.fd);
204	free(cf);
205
206	return (0);
207}
208
209static int
210cb_isdir(void *arg, void *h)
211{
212	struct cb_file *cf = h;
213
214	return (cf->cf_isdir);
215}
216
217static int
218cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
219{
220	struct cb_file *cf = h;
221	ssize_t sz;
222
223	if (cf->cf_isdir)
224		return (EINVAL);
225	sz = read(cf->cf_u.fd, buf, size);
226	if (sz < 0)
227		return (EINVAL);
228	*resid = size - sz;
229	return (0);
230}
231
232static int
233cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
234	   size_t *namelen_return, char *name)
235{
236	struct cb_file *cf = h;
237	struct dirent *dp;
238
239	if (!cf->cf_isdir)
240		return (EINVAL);
241
242	dp = readdir(cf->cf_u.dir);
243	if (!dp)
244		return (ENOENT);
245
246	/*
247	 * Note: d_namlen is in the range 0..255 and therefore less
248	 * than PATH_MAX so we don't need to test before copying.
249	 */
250	*fileno_return = dp->d_fileno;
251	*type_return = dp->d_type;
252	*namelen_return = dp->d_namlen;
253	memcpy(name, dp->d_name, dp->d_namlen);
254	name[dp->d_namlen] = 0;
255
256	return (0);
257}
258
259static int
260cb_seek(void *arg, void *h, uint64_t offset, int whence)
261{
262	struct cb_file *cf = h;
263
264	if (cf->cf_isdir)
265		return (EINVAL);
266	if (lseek(cf->cf_u.fd, offset, whence) < 0)
267		return (errno);
268	return (0);
269}
270
271static int
272cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
273{
274	struct cb_file *cf = h;
275
276	*mode = cf->cf_stat.st_mode;
277	*uid = cf->cf_stat.st_uid;
278	*gid = cf->cf_stat.st_gid;
279	*size = cf->cf_stat.st_size;
280	return (0);
281}
282
283/*
284 * Disk image i/o callbacks
285 */
286
287static int
288cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
289	    size_t *resid)
290{
291	ssize_t n;
292
293	if (unit < 0 || unit >= ndisks )
294		return (EIO);
295	n = pread(disk_fd[unit], to, size, from);
296	if (n < 0)
297		return (errno);
298	*resid = size - n;
299	return (0);
300}
301
302static int
303cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
304{
305	struct stat sb;
306
307	if (unit < 0 || unit >= ndisks)
308		return (EBADF);
309
310	switch (cmd) {
311	case DIOCGSECTORSIZE:
312		*(u_int *)data = 512;
313		break;
314	case DIOCGMEDIASIZE:
315		if (fstat(disk_fd[unit], &sb) != 0)
316			return (ENOTTY);
317		if (S_ISCHR(sb.st_mode) &&
318		    ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0)
319				return (ENOTTY);
320		*(off_t *)data = sb.st_size;
321		break;
322	default:
323		return (ENOTTY);
324	}
325
326	return (0);
327}
328
329/*
330 * Guest virtual machine i/o callbacks
331 */
332static int
333cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
334{
335	char *ptr;
336
337	to &= 0x7fffffff;
338
339	ptr = vm_map_gpa(ctx, to, size);
340	if (ptr == NULL)
341		return (EFAULT);
342
343	memcpy(ptr, from, size);
344	return (0);
345}
346
347static int
348cb_copyout(void *arg, uint64_t from, void *to, size_t size)
349{
350	char *ptr;
351
352	from &= 0x7fffffff;
353
354	ptr = vm_map_gpa(ctx, from, size);
355	if (ptr == NULL)
356		return (EFAULT);
357
358	memcpy(to, ptr, size);
359	return (0);
360}
361
362static void
363cb_setreg(void *arg, int r, uint64_t v)
364{
365	int error;
366	enum vm_reg_name vmreg;
367
368	vmreg = VM_REG_LAST;
369
370	switch (r) {
371	case 4:
372		vmreg = VM_REG_GUEST_RSP;
373		rsp = v;
374		break;
375	default:
376		break;
377	}
378
379	if (vmreg == VM_REG_LAST) {
380		printf("test_setreg(%d): not implemented\n", r);
381		cb_exit(NULL, USERBOOT_EXIT_QUIT);
382	}
383
384	error = vm_set_register(ctx, BSP, vmreg, v);
385	if (error) {
386		perror("vm_set_register");
387		cb_exit(NULL, USERBOOT_EXIT_QUIT);
388	}
389}
390
391static void
392cb_setmsr(void *arg, int r, uint64_t v)
393{
394	int error;
395	enum vm_reg_name vmreg;
396
397	vmreg = VM_REG_LAST;
398
399	switch (r) {
400	case MSR_EFER:
401		vmreg = VM_REG_GUEST_EFER;
402		break;
403	default:
404		break;
405	}
406
407	if (vmreg == VM_REG_LAST) {
408		printf("test_setmsr(%d): not implemented\n", r);
409		cb_exit(NULL, USERBOOT_EXIT_QUIT);
410	}
411
412	error = vm_set_register(ctx, BSP, vmreg, v);
413	if (error) {
414		perror("vm_set_msr");
415		cb_exit(NULL, USERBOOT_EXIT_QUIT);
416	}
417}
418
419static void
420cb_setcr(void *arg, int r, uint64_t v)
421{
422	int error;
423	enum vm_reg_name vmreg;
424
425	vmreg = VM_REG_LAST;
426
427	switch (r) {
428	case 0:
429		vmreg = VM_REG_GUEST_CR0;
430		break;
431	case 3:
432		vmreg = VM_REG_GUEST_CR3;
433		cr3 = v;
434		break;
435	case 4:
436		vmreg = VM_REG_GUEST_CR4;
437		break;
438	default:
439		break;
440	}
441
442	if (vmreg == VM_REG_LAST) {
443		printf("test_setcr(%d): not implemented\n", r);
444		cb_exit(NULL, USERBOOT_EXIT_QUIT);
445	}
446
447	error = vm_set_register(ctx, BSP, vmreg, v);
448	if (error) {
449		perror("vm_set_cr");
450		cb_exit(NULL, USERBOOT_EXIT_QUIT);
451	}
452}
453
454static void
455cb_setgdt(void *arg, uint64_t base, size_t size)
456{
457	int error;
458
459	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
460	if (error != 0) {
461		perror("vm_set_desc(gdt)");
462		cb_exit(NULL, USERBOOT_EXIT_QUIT);
463	}
464
465	gdtbase = base;
466}
467
468static void
469cb_exec(void *arg, uint64_t rip)
470{
471	int error;
472
473	if (cr3 == 0)
474		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
475		    rsp);
476	else
477		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
478		    rsp);
479	if (error) {
480		perror("vm_setup_freebsd_registers");
481		cb_exit(NULL, USERBOOT_EXIT_QUIT);
482	}
483
484	cb_exit(NULL, 0);
485}
486
487/*
488 * Misc
489 */
490
491static void
492cb_delay(void *arg, int usec)
493{
494
495	usleep(usec);
496}
497
498static void
499cb_exit(void *arg, int v)
500{
501
502	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
503	exit(v);
504}
505
506static void
507cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
508{
509
510	*ret_lowmem = vm_get_lowmem_size(ctx);
511	*ret_highmem = vm_get_highmem_size(ctx);
512}
513
514struct env {
515	const char *str;	/* name=value */
516	SLIST_ENTRY(env) next;
517};
518
519static SLIST_HEAD(envhead, env) envhead;
520
521static void
522addenv(const char *str)
523{
524	struct env *env;
525
526	env = malloc(sizeof(struct env));
527	env->str = str;
528	SLIST_INSERT_HEAD(&envhead, env, next);
529}
530
531static const char *
532cb_getenv(void *arg, int num)
533{
534	int i;
535	struct env *env;
536
537	i = 0;
538	SLIST_FOREACH(env, &envhead, next) {
539		if (i == num)
540			return (env->str);
541		i++;
542	}
543
544	return (NULL);
545}
546
547static struct loader_callbacks cb = {
548	.getc = cb_getc,
549	.putc = cb_putc,
550	.poll = cb_poll,
551
552	.open = cb_open,
553	.close = cb_close,
554	.isdir = cb_isdir,
555	.read = cb_read,
556	.readdir = cb_readdir,
557	.seek = cb_seek,
558	.stat = cb_stat,
559
560	.diskread = cb_diskread,
561	.diskioctl = cb_diskioctl,
562
563	.copyin = cb_copyin,
564	.copyout = cb_copyout,
565	.setreg = cb_setreg,
566	.setmsr = cb_setmsr,
567	.setcr = cb_setcr,
568	.setgdt = cb_setgdt,
569	.exec = cb_exec,
570
571	.delay = cb_delay,
572	.exit = cb_exit,
573	.getmem = cb_getmem,
574
575	.getenv = cb_getenv,
576};
577
578static int
579altcons_open(char *path)
580{
581	struct stat sb;
582	int err;
583	int fd;
584
585	/*
586	 * Allow stdio to be passed in so that the same string
587	 * can be used for the bhyveload console and bhyve com-port
588	 * parameters
589	 */
590	if (!strcmp(path, "stdio"))
591		return (0);
592
593	err = stat(path, &sb);
594	if (err == 0) {
595		if (!S_ISCHR(sb.st_mode))
596			err = ENOTSUP;
597		else {
598			fd = open(path, O_RDWR | O_NONBLOCK);
599			if (fd < 0)
600				err = errno;
601			else
602				consin_fd = consout_fd = fd;
603		}
604	}
605
606	return (err);
607}
608
609static int
610disk_open(char *path)
611{
612	int err, fd;
613
614	if (ndisks >= NDISKS)
615		return (ERANGE);
616
617	err = 0;
618	fd = open(path, O_RDONLY);
619
620	if (fd > 0) {
621		disk_fd[ndisks] = fd;
622		ndisks++;
623	} else
624		err = errno;
625
626	return (err);
627}
628
629static void
630usage(void)
631{
632
633	fprintf(stderr,
634	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
635	    "       %*s [-h <host-path>] [-m mem-size] <vmname>\n",
636	    progname,
637	    (int)strlen(progname), "");
638	exit(1);
639}
640
641int
642main(int argc, char** argv)
643{
644	char *loader;
645	void *h;
646	void (*func)(struct loader_callbacks *, void *, int, int);
647	uint64_t mem_size;
648	int opt, error, need_reinit, memflags;
649
650	progname = basename(argv[0]);
651
652	loader = NULL;
653
654	memflags = 0;
655	mem_size = 256 * MB;
656
657	consin_fd = STDIN_FILENO;
658	consout_fd = STDOUT_FILENO;
659
660	while ((opt = getopt(argc, argv, "Sc:d:e:h:l:m:")) != -1) {
661		switch (opt) {
662		case 'c':
663			error = altcons_open(optarg);
664			if (error != 0)
665				errx(EX_USAGE, "Could not open '%s'", optarg);
666			break;
667
668		case 'd':
669			error = disk_open(optarg);
670			if (error != 0)
671				errx(EX_USAGE, "Could not open '%s'", optarg);
672			break;
673
674		case 'e':
675			addenv(optarg);
676			break;
677
678		case 'h':
679			host_base = optarg;
680			break;
681
682		case 'l':
683			if (loader != NULL)
684				errx(EX_USAGE, "-l can only be given once");
685			loader = strdup(optarg);
686			if (loader == NULL)
687				err(EX_OSERR, "malloc");
688			break;
689
690		case 'm':
691			error = vm_parse_memsize(optarg, &mem_size);
692			if (error != 0)
693				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
694			break;
695		case 'S':
696			memflags |= VM_MEM_F_WIRED;
697			break;
698		case '?':
699			usage();
700		}
701	}
702
703	argc -= optind;
704	argv += optind;
705
706	if (argc != 1)
707		usage();
708
709	vmname = argv[0];
710
711	need_reinit = 0;
712	error = vm_create(vmname);
713	if (error) {
714		if (errno != EEXIST) {
715			perror("vm_create");
716			exit(1);
717		}
718		need_reinit = 1;
719	}
720
721	ctx = vm_open(vmname);
722	if (ctx == NULL) {
723		perror("vm_open");
724		exit(1);
725	}
726
727	if (need_reinit) {
728		error = vm_reinit(ctx);
729		if (error) {
730			perror("vm_reinit");
731			exit(1);
732		}
733	}
734
735	vm_set_memflags(ctx, memflags);
736	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
737	if (error) {
738		perror("vm_setup_memory");
739		exit(1);
740	}
741
742	if (loader == NULL) {
743		loader = strdup("/boot/userboot.so");
744		if (loader == NULL)
745			err(EX_OSERR, "malloc");
746	}
747	h = dlopen(loader, RTLD_LOCAL);
748	if (!h) {
749		printf("%s\n", dlerror());
750		free(loader);
751		return (1);
752	}
753	func = dlsym(h, "loader_main");
754	if (!func) {
755		printf("%s\n", dlerror());
756		free(loader);
757		return (1);
758	}
759
760	tcgetattr(consout_fd, &term);
761	oldterm = term;
762	cfmakeraw(&term);
763	term.c_cflag |= CLOCAL;
764
765	tcsetattr(consout_fd, TCSAFLUSH, &term);
766
767	addenv("smbios.bios.vendor=BHYVE");
768	addenv("boot_serial=1");
769
770	func(&cb, NULL, USERBOOT_VERSION_3, ndisks);
771
772	free(loader);
773	return (0);
774}
775