1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29/*-
30 * Copyright (c) 2011 Google, Inc.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55#include <sys/cdefs.h>
56#include <sys/ioctl.h>
57#include <sys/stat.h>
58#include <sys/disk.h>
59#include <sys/queue.h>
60
61#include <machine/specialreg.h>
62#include <machine/vmm.h>
63
64#include <assert.h>
65#include <dirent.h>
66#include <dlfcn.h>
67#include <errno.h>
68#include <err.h>
69#include <fcntl.h>
70#include <getopt.h>
71#include <libgen.h>
72#include <limits.h>
73#include <setjmp.h>
74#include <stdio.h>
75#include <stdlib.h>
76#include <string.h>
77#include <sysexits.h>
78#include <termios.h>
79#include <unistd.h>
80
81#include <capsicum_helpers.h>
82#include <vmmapi.h>
83
84#include "userboot.h"
85
86#define	MB	(1024 * 1024UL)
87#define	GB	(1024 * 1024 * 1024UL)
88#define	BSP	0
89
90#define	NDISKS	32
91
92/*
93 * Reason for our loader reload and reentry, though these aren't really used
94 * at the moment.
95 */
96enum {
97	/* 0 cannot be allocated; setjmp(3) return. */
98	JMP_SWAPLOADER = 0x01,
99	JMP_REBOOT,
100};
101
102static struct termios term, oldterm;
103static int disk_fd[NDISKS];
104static int ndisks;
105static int consin_fd, consout_fd;
106static int hostbase_fd = -1;
107
108static void *loader_hdl;
109static char *loader;
110static int explicit_loader_fd = -1;
111static jmp_buf jb;
112
113static char *vmname, *progname;
114static struct vmctx *ctx;
115static struct vcpu *vcpu;
116
117static uint64_t gdtbase, cr3, rsp;
118
119static void cb_exit(void *arg, int v);
120
121/*
122 * Console i/o callbacks
123 */
124
125static void
126cb_putc(void *arg __unused, int ch)
127{
128	char c = ch;
129
130	(void) write(consout_fd, &c, 1);
131}
132
133static int
134cb_getc(void *arg __unused)
135{
136	char c;
137
138	if (read(consin_fd, &c, 1) == 1)
139		return (c);
140	return (-1);
141}
142
143static int
144cb_poll(void *arg __unused)
145{
146	int n;
147
148	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
149		return (n > 0);
150	return (0);
151}
152
153/*
154 * Host filesystem i/o callbacks
155 */
156
157struct cb_file {
158	int cf_isdir;
159	size_t cf_size;
160	struct stat cf_stat;
161	union {
162		int fd;
163		DIR *dir;
164	} cf_u;
165};
166
167static int
168cb_open(void *arg __unused, const char *filename, void **hp)
169{
170	struct cb_file *cf;
171	struct stat sb;
172	int fd, flags;
173
174	cf = NULL;
175	fd = -1;
176	flags = O_RDONLY | O_RESOLVE_BENEATH;
177	if (hostbase_fd == -1)
178		return (ENOENT);
179
180	/* Absolute paths are relative to our hostbase, chop off leading /. */
181	if (filename[0] == '/')
182		filename++;
183
184	/* Lookup of /, use . instead. */
185	if (filename[0] == '\0')
186		filename = ".";
187
188	if (fstatat(hostbase_fd, filename, &sb, AT_RESOLVE_BENEATH) < 0)
189		return (errno);
190
191	if (!S_ISDIR(sb.st_mode) && !S_ISREG(sb.st_mode))
192		return (EINVAL);
193
194	if (S_ISDIR(sb.st_mode))
195		flags |= O_DIRECTORY;
196
197	/* May be opening the root dir */
198	fd = openat(hostbase_fd, filename, flags);
199	if (fd < 0)
200		return (errno);
201
202	cf = malloc(sizeof(struct cb_file));
203	if (cf == NULL) {
204		close(fd);
205		return (ENOMEM);
206	}
207
208	cf->cf_stat = sb;
209	cf->cf_size = cf->cf_stat.st_size;
210
211	if (S_ISDIR(cf->cf_stat.st_mode)) {
212		cf->cf_isdir = 1;
213		cf->cf_u.dir = fdopendir(fd);
214		if (cf->cf_u.dir == NULL) {
215			close(fd);
216			free(cf);
217			return (ENOMEM);
218		}
219	} else {
220		assert(S_ISREG(cf->cf_stat.st_mode));
221		cf->cf_isdir = 0;
222		cf->cf_u.fd = fd;
223	}
224	*hp = cf;
225	return (0);
226}
227
228static int
229cb_close(void *arg __unused, void *h)
230{
231	struct cb_file *cf = h;
232
233	if (cf->cf_isdir)
234		closedir(cf->cf_u.dir);
235	else
236		close(cf->cf_u.fd);
237	free(cf);
238
239	return (0);
240}
241
242static int
243cb_isdir(void *arg __unused, void *h)
244{
245	struct cb_file *cf = h;
246
247	return (cf->cf_isdir);
248}
249
250static int
251cb_read(void *arg __unused, void *h, void *buf, size_t size, size_t *resid)
252{
253	struct cb_file *cf = h;
254	ssize_t sz;
255
256	if (cf->cf_isdir)
257		return (EINVAL);
258	sz = read(cf->cf_u.fd, buf, size);
259	if (sz < 0)
260		return (EINVAL);
261	*resid = size - sz;
262	return (0);
263}
264
265static int
266cb_readdir(void *arg __unused, void *h, uint32_t *fileno_return,
267    uint8_t *type_return, size_t *namelen_return, char *name)
268{
269	struct cb_file *cf = h;
270	struct dirent *dp;
271
272	if (!cf->cf_isdir)
273		return (EINVAL);
274
275	dp = readdir(cf->cf_u.dir);
276	if (!dp)
277		return (ENOENT);
278
279	/*
280	 * Note: d_namlen is in the range 0..255 and therefore less
281	 * than PATH_MAX so we don't need to test before copying.
282	 */
283	*fileno_return = dp->d_fileno;
284	*type_return = dp->d_type;
285	*namelen_return = dp->d_namlen;
286	memcpy(name, dp->d_name, dp->d_namlen);
287	name[dp->d_namlen] = 0;
288
289	return (0);
290}
291
292static int
293cb_seek(void *arg __unused, void *h, uint64_t offset, int whence)
294{
295	struct cb_file *cf = h;
296
297	if (cf->cf_isdir)
298		return (EINVAL);
299	if (lseek(cf->cf_u.fd, offset, whence) < 0)
300		return (errno);
301	return (0);
302}
303
304static int
305cb_stat(void *arg __unused, void *h, struct stat *sbp)
306{
307	struct cb_file *cf = h;
308
309	memset(sbp, 0, sizeof(struct stat));
310	sbp->st_mode = cf->cf_stat.st_mode;
311	sbp->st_uid = cf->cf_stat.st_uid;
312	sbp->st_gid = cf->cf_stat.st_gid;
313	sbp->st_size = cf->cf_stat.st_size;
314	sbp->st_mtime = cf->cf_stat.st_mtime;
315	sbp->st_dev = cf->cf_stat.st_dev;
316	sbp->st_ino = cf->cf_stat.st_ino;
317
318	return (0);
319}
320
321/*
322 * Disk image i/o callbacks
323 */
324
325static int
326cb_diskread(void *arg __unused, int unit, uint64_t from, void *to, size_t size,
327    size_t *resid)
328{
329	ssize_t n;
330
331	if (unit < 0 || unit >= ndisks)
332		return (EIO);
333	n = pread(disk_fd[unit], to, size, from);
334	if (n < 0)
335		return (errno);
336	*resid = size - n;
337	return (0);
338}
339
340static int
341cb_diskwrite(void *arg __unused, int unit, uint64_t offset, void *src,
342    size_t size, size_t *resid)
343{
344	ssize_t n;
345
346	if (unit < 0 || unit >= ndisks)
347		return (EIO);
348	n = pwrite(disk_fd[unit], src, size, offset);
349	if (n < 0)
350		return (errno);
351	*resid = size - n;
352	return (0);
353}
354
355static int
356cb_diskioctl(void *arg __unused, int unit, u_long cmd, void *data)
357{
358	struct stat sb;
359
360	if (unit < 0 || unit >= ndisks)
361		return (EBADF);
362
363	switch (cmd) {
364	case DIOCGSECTORSIZE:
365		*(u_int *)data = 512;
366		break;
367	case DIOCGMEDIASIZE:
368		if (fstat(disk_fd[unit], &sb) != 0)
369			return (ENOTTY);
370		if (S_ISCHR(sb.st_mode) &&
371		    ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0)
372				return (ENOTTY);
373		*(off_t *)data = sb.st_size;
374		break;
375	default:
376		return (ENOTTY);
377	}
378
379	return (0);
380}
381
382/*
383 * Guest virtual machine i/o callbacks
384 */
385static int
386cb_copyin(void *arg __unused, const void *from, uint64_t to, size_t size)
387{
388	char *ptr;
389
390	to &= 0x7fffffff;
391
392	ptr = vm_map_gpa(ctx, to, size);
393	if (ptr == NULL)
394		return (EFAULT);
395
396	memcpy(ptr, from, size);
397	return (0);
398}
399
400static int
401cb_copyout(void *arg __unused, uint64_t from, void *to, size_t size)
402{
403	char *ptr;
404
405	from &= 0x7fffffff;
406
407	ptr = vm_map_gpa(ctx, from, size);
408	if (ptr == NULL)
409		return (EFAULT);
410
411	memcpy(to, ptr, size);
412	return (0);
413}
414
415static void
416cb_setreg(void *arg __unused, int r, uint64_t v)
417{
418	int error;
419	enum vm_reg_name vmreg;
420
421	vmreg = VM_REG_LAST;
422
423	switch (r) {
424	case 4:
425		vmreg = VM_REG_GUEST_RSP;
426		rsp = v;
427		break;
428	default:
429		break;
430	}
431
432	if (vmreg == VM_REG_LAST) {
433		printf("test_setreg(%d): not implemented\n", r);
434		cb_exit(NULL, USERBOOT_EXIT_QUIT);
435	}
436
437	error = vm_set_register(vcpu, vmreg, v);
438	if (error) {
439		perror("vm_set_register");
440		cb_exit(NULL, USERBOOT_EXIT_QUIT);
441	}
442}
443
444static void
445cb_setmsr(void *arg __unused, int r, uint64_t v)
446{
447	int error;
448	enum vm_reg_name vmreg;
449
450	vmreg = VM_REG_LAST;
451
452	switch (r) {
453	case MSR_EFER:
454		vmreg = VM_REG_GUEST_EFER;
455		break;
456	default:
457		break;
458	}
459
460	if (vmreg == VM_REG_LAST) {
461		printf("test_setmsr(%d): not implemented\n", r);
462		cb_exit(NULL, USERBOOT_EXIT_QUIT);
463	}
464
465	error = vm_set_register(vcpu, vmreg, v);
466	if (error) {
467		perror("vm_set_msr");
468		cb_exit(NULL, USERBOOT_EXIT_QUIT);
469	}
470}
471
472static void
473cb_setcr(void *arg __unused, int r, uint64_t v)
474{
475	int error;
476	enum vm_reg_name vmreg;
477
478	vmreg = VM_REG_LAST;
479
480	switch (r) {
481	case 0:
482		vmreg = VM_REG_GUEST_CR0;
483		break;
484	case 3:
485		vmreg = VM_REG_GUEST_CR3;
486		cr3 = v;
487		break;
488	case 4:
489		vmreg = VM_REG_GUEST_CR4;
490		break;
491	default:
492		break;
493	}
494
495	if (vmreg == VM_REG_LAST) {
496		printf("test_setcr(%d): not implemented\n", r);
497		cb_exit(NULL, USERBOOT_EXIT_QUIT);
498	}
499
500	error = vm_set_register(vcpu, vmreg, v);
501	if (error) {
502		perror("vm_set_cr");
503		cb_exit(NULL, USERBOOT_EXIT_QUIT);
504	}
505}
506
507static void
508cb_setgdt(void *arg __unused, uint64_t base, size_t size)
509{
510	int error;
511
512	error = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, base, size - 1, 0);
513	if (error != 0) {
514		perror("vm_set_desc(gdt)");
515		cb_exit(NULL, USERBOOT_EXIT_QUIT);
516	}
517
518	gdtbase = base;
519}
520
521static void
522cb_exec(void *arg __unused, uint64_t rip)
523{
524	int error;
525
526	if (cr3 == 0)
527		error = vm_setup_freebsd_registers_i386(vcpu, rip, gdtbase,
528		    rsp);
529	else
530		error = vm_setup_freebsd_registers(vcpu, rip, cr3, gdtbase,
531		    rsp);
532	if (error) {
533		perror("vm_setup_freebsd_registers");
534		cb_exit(NULL, USERBOOT_EXIT_QUIT);
535	}
536
537	cb_exit(NULL, 0);
538}
539
540/*
541 * Misc
542 */
543
544static void
545cb_delay(void *arg __unused, int usec)
546{
547
548	usleep(usec);
549}
550
551static void
552cb_exit(void *arg __unused, int v)
553{
554
555	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
556	if (v == USERBOOT_EXIT_REBOOT)
557		longjmp(jb, JMP_REBOOT);
558	exit(v);
559}
560
561static void
562cb_getmem(void *arg __unused, uint64_t *ret_lowmem, uint64_t *ret_highmem)
563{
564
565	*ret_lowmem = vm_get_lowmem_size(ctx);
566	*ret_highmem = vm_get_highmem_size(ctx);
567}
568
569struct env {
570	char *str;	/* name=value */
571	SLIST_ENTRY(env) next;
572};
573
574static SLIST_HEAD(envhead, env) envhead;
575
576static void
577addenv(const char *str)
578{
579	struct env *env;
580
581	env = malloc(sizeof(struct env));
582	if (env == NULL)
583		err(EX_OSERR, "malloc");
584	env->str = strdup(str);
585	if (env->str == NULL)
586		err(EX_OSERR, "strdup");
587	SLIST_INSERT_HEAD(&envhead, env, next);
588}
589
590static char *
591cb_getenv(void *arg __unused, int num)
592{
593	int i;
594	struct env *env;
595
596	i = 0;
597	SLIST_FOREACH(env, &envhead, next) {
598		if (i == num)
599			return (env->str);
600		i++;
601	}
602
603	return (NULL);
604}
605
606static int
607cb_vm_set_register(void *arg __unused, int vcpuid, int reg, uint64_t val)
608{
609
610	assert(vcpuid == BSP);
611	return (vm_set_register(vcpu, reg, val));
612}
613
614static int
615cb_vm_set_desc(void *arg __unused, int vcpuid, int reg, uint64_t base,
616    u_int limit, u_int access)
617{
618
619	assert(vcpuid == BSP);
620	return (vm_set_desc(vcpu, reg, base, limit, access));
621}
622
623static void
624cb_swap_interpreter(void *arg __unused, const char *interp_req)
625{
626
627	/*
628	 * If the user specified a loader but we detected a mismatch, we should
629	 * not try to pivot to a different loader on them.
630	 */
631	free(loader);
632	if (explicit_loader_fd != -1) {
633		perror("requested loader interpreter does not match guest userboot");
634		cb_exit(NULL, 1);
635	}
636	if (interp_req == NULL || *interp_req == '\0') {
637		perror("guest failed to request an interpreter");
638		cb_exit(NULL, 1);
639	}
640
641	if (asprintf(&loader, "userboot_%s.so", interp_req) == -1)
642		err(EX_OSERR, "malloc");
643	longjmp(jb, JMP_SWAPLOADER);
644}
645
646static struct loader_callbacks cb = {
647	.getc = cb_getc,
648	.putc = cb_putc,
649	.poll = cb_poll,
650
651	.open = cb_open,
652	.close = cb_close,
653	.isdir = cb_isdir,
654	.read = cb_read,
655	.readdir = cb_readdir,
656	.seek = cb_seek,
657	.stat = cb_stat,
658
659	.diskread = cb_diskread,
660	.diskwrite = cb_diskwrite,
661	.diskioctl = cb_diskioctl,
662
663	.copyin = cb_copyin,
664	.copyout = cb_copyout,
665	.setreg = cb_setreg,
666	.setmsr = cb_setmsr,
667	.setcr = cb_setcr,
668	.setgdt = cb_setgdt,
669	.exec = cb_exec,
670
671	.delay = cb_delay,
672	.exit = cb_exit,
673	.getmem = cb_getmem,
674
675	.getenv = cb_getenv,
676
677	/* Version 4 additions */
678	.vm_set_register = cb_vm_set_register,
679	.vm_set_desc = cb_vm_set_desc,
680
681	/* Version 5 additions */
682	.swap_interpreter = cb_swap_interpreter,
683};
684
685static int
686altcons_open(char *path)
687{
688	struct stat sb;
689	int err;
690	int fd;
691
692	/*
693	 * Allow stdio to be passed in so that the same string
694	 * can be used for the bhyveload console and bhyve com-port
695	 * parameters
696	 */
697	if (!strcmp(path, "stdio"))
698		return (0);
699
700	err = stat(path, &sb);
701	if (err == 0) {
702		if (!S_ISCHR(sb.st_mode))
703			err = ENOTSUP;
704		else {
705			fd = open(path, O_RDWR | O_NONBLOCK);
706			if (fd < 0)
707				err = errno;
708			else
709				consin_fd = consout_fd = fd;
710		}
711	}
712
713	return (err);
714}
715
716static int
717disk_open(char *path)
718{
719	int fd;
720
721	if (ndisks >= NDISKS)
722		return (ERANGE);
723
724	fd = open(path, O_RDWR);
725	if (fd < 0)
726		return (errno);
727
728	disk_fd[ndisks] = fd;
729	ndisks++;
730
731	return (0);
732}
733
734static void
735usage(void)
736{
737
738	fprintf(stderr,
739	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
740	    "       %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n",
741	    progname,
742	    (int)strlen(progname), "");
743	exit(1);
744}
745
746static void
747hostbase_open(const char *base)
748{
749	cap_rights_t rights;
750
751	if (hostbase_fd != -1)
752		close(hostbase_fd);
753	hostbase_fd = open(base, O_DIRECTORY | O_PATH);
754	if (hostbase_fd == -1)
755		err(EX_OSERR, "open");
756
757	if (caph_rights_limit(hostbase_fd, cap_rights_init(&rights, CAP_FSTATAT,
758	    CAP_LOOKUP, CAP_PREAD)) < 0)
759		err(EX_OSERR, "caph_rights_limit");
760}
761
762static void
763loader_open(int bootfd)
764{
765	int fd;
766
767	if (loader == NULL) {
768		loader = strdup("userboot.so");
769		if (loader == NULL)
770			err(EX_OSERR, "malloc");
771	}
772
773	assert(bootfd >= 0 || explicit_loader_fd >= 0);
774	if (explicit_loader_fd >= 0)
775		fd = explicit_loader_fd;
776	else
777		fd = openat(bootfd, loader, O_RDONLY | O_RESOLVE_BENEATH);
778	if (fd == -1)
779		err(EX_OSERR, "openat");
780
781	loader_hdl = fdlopen(fd, RTLD_LOCAL);
782	if (!loader_hdl)
783		errx(EX_OSERR, "dlopen: %s", dlerror());
784	if (fd != explicit_loader_fd)
785		close(fd);
786}
787
788int
789main(int argc, char** argv)
790{
791	void (*func)(struct loader_callbacks *, void *, int, int);
792	uint64_t mem_size;
793	int bootfd, opt, error, memflags, need_reinit;
794
795	bootfd = -1;
796	progname = basename(argv[0]);
797
798	memflags = 0;
799	mem_size = 256 * MB;
800
801	consin_fd = STDIN_FILENO;
802	consout_fd = STDOUT_FILENO;
803
804	while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
805		switch (opt) {
806		case 'c':
807			error = altcons_open(optarg);
808			if (error != 0)
809				errx(EX_USAGE, "Could not open '%s'", optarg);
810			break;
811
812		case 'd':
813			error = disk_open(optarg);
814			if (error != 0)
815				errx(EX_USAGE, "Could not open '%s'", optarg);
816			break;
817
818		case 'e':
819			addenv(optarg);
820			break;
821
822		case 'h':
823			hostbase_open(optarg);
824			break;
825
826		case 'l':
827			if (loader != NULL)
828				errx(EX_USAGE, "-l can only be given once");
829			loader = strdup(optarg);
830			if (loader == NULL)
831				err(EX_OSERR, "malloc");
832			explicit_loader_fd = open(loader, O_RDONLY);
833			if (explicit_loader_fd == -1)
834				err(EX_OSERR, "%s", loader);
835			break;
836
837		case 'm':
838			error = vm_parse_memsize(optarg, &mem_size);
839			if (error != 0)
840				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
841			break;
842		case 'C':
843			memflags |= VM_MEM_F_INCORE;
844			break;
845		case 'S':
846			memflags |= VM_MEM_F_WIRED;
847			break;
848		case '?':
849			usage();
850		}
851	}
852
853	argc -= optind;
854	argv += optind;
855
856	if (argc != 1)
857		usage();
858
859	vmname = argv[0];
860
861	need_reinit = 0;
862	error = vm_create(vmname);
863	if (error) {
864		if (errno != EEXIST)
865			err(1, "vm_create");
866		need_reinit = 1;
867	}
868
869	ctx = vm_open(vmname);
870	if (ctx == NULL)
871		err(1, "vm_open");
872
873	/*
874	 * If we weren't given an explicit loader to use, we need to support the
875	 * guest requesting a different one.
876	 */
877	if (explicit_loader_fd == -1) {
878		cap_rights_t rights;
879
880		bootfd = open("/boot", O_DIRECTORY | O_PATH);
881		if (bootfd == -1)
882			err(1, "open");
883
884		/*
885		 * bootfd will be used to do a lookup of our loader and do an
886		 * fdlopen(3) on the loader; thus, we need mmap(2) in addition
887		 * to the more usual lookup rights.
888		 */
889		if (caph_rights_limit(bootfd, cap_rights_init(&rights,
890		    CAP_FSTATAT, CAP_LOOKUP, CAP_MMAP_RX, CAP_PREAD)) < 0)
891			err(1, "caph_rights_limit");
892	}
893
894	vcpu = vm_vcpu_open(ctx, BSP);
895
896	caph_cache_catpages();
897	if (caph_enter() < 0)
898		err(1, "caph_enter");
899
900	/*
901	 * setjmp in the case the guest wants to swap out interpreter,
902	 * cb_swap_interpreter will swap out loader as appropriate and set
903	 * need_reinit so that we end up in a clean state once again.
904	 */
905	if (setjmp(jb) != 0) {
906		dlclose(loader_hdl);
907		loader_hdl = NULL;
908
909		need_reinit = 1;
910	}
911
912	if (need_reinit) {
913		error = vm_reinit(ctx);
914		if (error)
915			err(1, "vm_reinit");
916	}
917
918	vm_set_memflags(ctx, memflags);
919	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
920	if (error)
921		err(1, "vm_setup_memory");
922
923	loader_open(bootfd);
924	func = dlsym(loader_hdl, "loader_main");
925	if (!func)
926		errx(1, "dlsym: %s", dlerror());
927
928	tcgetattr(consout_fd, &term);
929	oldterm = term;
930	cfmakeraw(&term);
931	term.c_cflag |= CLOCAL;
932
933	tcsetattr(consout_fd, TCSAFLUSH, &term);
934
935	addenv("smbios.bios.vendor=BHYVE");
936	addenv("boot_serial=1");
937
938	func(&cb, NULL, USERBOOT_VERSION_5, ndisks);
939
940	free(loader);
941	return (0);
942}
943