bhyveload.c revision 259301
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 259301 2013-12-13 06:59:18Z grehan $
27 */
28
29/*-
30 * Copyright (c) 2011 Google, Inc.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 259301 2013-12-13 06:59:18Z grehan $
55 */
56
57#include <sys/cdefs.h>
58__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 259301 2013-12-13 06:59:18Z grehan $");
59
60#include <sys/ioctl.h>
61#include <sys/stat.h>
62#include <sys/disk.h>
63#include <sys/queue.h>
64
65#include <machine/specialreg.h>
66#include <machine/vmm.h>
67
68#include <dirent.h>
69#include <dlfcn.h>
70#include <errno.h>
71#include <err.h>
72#include <fcntl.h>
73#include <getopt.h>
74#include <libgen.h>
75#include <limits.h>
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <sysexits.h>
80#include <termios.h>
81#include <unistd.h>
82
83#include <vmmapi.h>
84
85#include "userboot.h"
86
87#define	MB	(1024 * 1024UL)
88#define	GB	(1024 * 1024 * 1024UL)
89#define	BSP	0
90
91static char *host_base;
92static struct termios term, oldterm;
93static int disk_fd = -1;
94static int consin_fd, consout_fd;
95
96static char *vmname, *progname;
97static struct vmctx *ctx;
98
99static uint64_t gdtbase, cr3, rsp;
100
101static void cb_exit(void *arg, int v);
102
103/*
104 * Console i/o callbacks
105 */
106
107static void
108cb_putc(void *arg, int ch)
109{
110	char c = ch;
111
112	(void) write(consout_fd, &c, 1);
113}
114
115static int
116cb_getc(void *arg)
117{
118	char c;
119
120	if (read(consin_fd, &c, 1) == 1)
121		return (c);
122	return (-1);
123}
124
125static int
126cb_poll(void *arg)
127{
128	int n;
129
130	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
131		return (n > 0);
132	return (0);
133}
134
135/*
136 * Host filesystem i/o callbacks
137 */
138
139struct cb_file {
140	int cf_isdir;
141	size_t cf_size;
142	struct stat cf_stat;
143	union {
144		int fd;
145		DIR *dir;
146	} cf_u;
147};
148
149static int
150cb_open(void *arg, const char *filename, void **hp)
151{
152	struct stat st;
153	struct cb_file *cf;
154	char path[PATH_MAX];
155
156	if (!host_base)
157		return (ENOENT);
158
159	strlcpy(path, host_base, PATH_MAX);
160	if (path[strlen(path) - 1] == '/')
161		path[strlen(path) - 1] = 0;
162	strlcat(path, filename, PATH_MAX);
163	cf = malloc(sizeof(struct cb_file));
164	if (stat(path, &cf->cf_stat) < 0) {
165		free(cf);
166		return (errno);
167	}
168
169	cf->cf_size = st.st_size;
170	if (S_ISDIR(cf->cf_stat.st_mode)) {
171		cf->cf_isdir = 1;
172		cf->cf_u.dir = opendir(path);
173		if (!cf->cf_u.dir)
174			goto out;
175		*hp = cf;
176		return (0);
177	}
178	if (S_ISREG(cf->cf_stat.st_mode)) {
179		cf->cf_isdir = 0;
180		cf->cf_u.fd = open(path, O_RDONLY);
181		if (cf->cf_u.fd < 0)
182			goto out;
183		*hp = cf;
184		return (0);
185	}
186
187out:
188	free(cf);
189	return (EINVAL);
190}
191
192static int
193cb_close(void *arg, void *h)
194{
195	struct cb_file *cf = h;
196
197	if (cf->cf_isdir)
198		closedir(cf->cf_u.dir);
199	else
200		close(cf->cf_u.fd);
201	free(cf);
202
203	return (0);
204}
205
206static int
207cb_isdir(void *arg, void *h)
208{
209	struct cb_file *cf = h;
210
211	return (cf->cf_isdir);
212}
213
214static int
215cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
216{
217	struct cb_file *cf = h;
218	ssize_t sz;
219
220	if (cf->cf_isdir)
221		return (EINVAL);
222	sz = read(cf->cf_u.fd, buf, size);
223	if (sz < 0)
224		return (EINVAL);
225	*resid = size - sz;
226	return (0);
227}
228
229static int
230cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
231	   size_t *namelen_return, char *name)
232{
233	struct cb_file *cf = h;
234	struct dirent *dp;
235
236	if (!cf->cf_isdir)
237		return (EINVAL);
238
239	dp = readdir(cf->cf_u.dir);
240	if (!dp)
241		return (ENOENT);
242
243	/*
244	 * Note: d_namlen is in the range 0..255 and therefore less
245	 * than PATH_MAX so we don't need to test before copying.
246	 */
247	*fileno_return = dp->d_fileno;
248	*type_return = dp->d_type;
249	*namelen_return = dp->d_namlen;
250	memcpy(name, dp->d_name, dp->d_namlen);
251	name[dp->d_namlen] = 0;
252
253	return (0);
254}
255
256static int
257cb_seek(void *arg, void *h, uint64_t offset, int whence)
258{
259	struct cb_file *cf = h;
260
261	if (cf->cf_isdir)
262		return (EINVAL);
263	if (lseek(cf->cf_u.fd, offset, whence) < 0)
264		return (errno);
265	return (0);
266}
267
268static int
269cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
270{
271	struct cb_file *cf = h;
272
273	*mode = cf->cf_stat.st_mode;
274	*uid = cf->cf_stat.st_uid;
275	*gid = cf->cf_stat.st_gid;
276	*size = cf->cf_stat.st_size;
277	return (0);
278}
279
280/*
281 * Disk image i/o callbacks
282 */
283
284static int
285cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
286	    size_t *resid)
287{
288	ssize_t n;
289
290	if (unit != 0 || disk_fd == -1)
291		return (EIO);
292	n = pread(disk_fd, to, size, from);
293	if (n < 0)
294		return (errno);
295	*resid = size - n;
296	return (0);
297}
298
299static int
300cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
301{
302	struct stat sb;
303
304	if (unit != 0 || disk_fd == -1)
305		return (EBADF);
306
307	switch (cmd) {
308	case DIOCGSECTORSIZE:
309		*(u_int *)data = 512;
310		break;
311	case DIOCGMEDIASIZE:
312		if (fstat(disk_fd, &sb) == 0)
313			*(off_t *)data = sb.st_size;
314		else
315			return (ENOTTY);
316		break;
317	default:
318		return (ENOTTY);
319	}
320
321	return (0);
322}
323
324/*
325 * Guest virtual machine i/o callbacks
326 */
327static int
328cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
329{
330	char *ptr;
331
332	to &= 0x7fffffff;
333
334	ptr = vm_map_gpa(ctx, to, size);
335	if (ptr == NULL)
336		return (EFAULT);
337
338	memcpy(ptr, from, size);
339	return (0);
340}
341
342static int
343cb_copyout(void *arg, uint64_t from, void *to, size_t size)
344{
345	char *ptr;
346
347	from &= 0x7fffffff;
348
349	ptr = vm_map_gpa(ctx, from, size);
350	if (ptr == NULL)
351		return (EFAULT);
352
353	memcpy(to, ptr, size);
354	return (0);
355}
356
357static void
358cb_setreg(void *arg, int r, uint64_t v)
359{
360	int error;
361	enum vm_reg_name vmreg;
362
363	vmreg = VM_REG_LAST;
364
365	switch (r) {
366	case 4:
367		vmreg = VM_REG_GUEST_RSP;
368		rsp = v;
369		break;
370	default:
371		break;
372	}
373
374	if (vmreg == VM_REG_LAST) {
375		printf("test_setreg(%d): not implemented\n", r);
376		cb_exit(NULL, USERBOOT_EXIT_QUIT);
377	}
378
379	error = vm_set_register(ctx, BSP, vmreg, v);
380	if (error) {
381		perror("vm_set_register");
382		cb_exit(NULL, USERBOOT_EXIT_QUIT);
383	}
384}
385
386static void
387cb_setmsr(void *arg, int r, uint64_t v)
388{
389	int error;
390	enum vm_reg_name vmreg;
391
392	vmreg = VM_REG_LAST;
393
394	switch (r) {
395	case MSR_EFER:
396		vmreg = VM_REG_GUEST_EFER;
397		break;
398	default:
399		break;
400	}
401
402	if (vmreg == VM_REG_LAST) {
403		printf("test_setmsr(%d): not implemented\n", r);
404		cb_exit(NULL, USERBOOT_EXIT_QUIT);
405	}
406
407	error = vm_set_register(ctx, BSP, vmreg, v);
408	if (error) {
409		perror("vm_set_msr");
410		cb_exit(NULL, USERBOOT_EXIT_QUIT);
411	}
412}
413
414static void
415cb_setcr(void *arg, int r, uint64_t v)
416{
417	int error;
418	enum vm_reg_name vmreg;
419
420	vmreg = VM_REG_LAST;
421
422	switch (r) {
423	case 0:
424		vmreg = VM_REG_GUEST_CR0;
425		break;
426	case 3:
427		vmreg = VM_REG_GUEST_CR3;
428		cr3 = v;
429		break;
430	case 4:
431		vmreg = VM_REG_GUEST_CR4;
432		break;
433	default:
434		break;
435	}
436
437	if (vmreg == VM_REG_LAST) {
438		printf("test_setcr(%d): not implemented\n", r);
439		cb_exit(NULL, USERBOOT_EXIT_QUIT);
440	}
441
442	error = vm_set_register(ctx, BSP, vmreg, v);
443	if (error) {
444		perror("vm_set_cr");
445		cb_exit(NULL, USERBOOT_EXIT_QUIT);
446	}
447}
448
449static void
450cb_setgdt(void *arg, uint64_t base, size_t size)
451{
452	int error;
453
454	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
455	if (error != 0) {
456		perror("vm_set_desc(gdt)");
457		cb_exit(NULL, USERBOOT_EXIT_QUIT);
458	}
459
460	gdtbase = base;
461}
462
463static void
464cb_exec(void *arg, uint64_t rip)
465{
466	int error;
467
468	error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, rsp);
469	if (error) {
470		perror("vm_setup_freebsd_registers");
471		cb_exit(NULL, USERBOOT_EXIT_QUIT);
472	}
473
474	cb_exit(NULL, 0);
475}
476
477/*
478 * Misc
479 */
480
481static void
482cb_delay(void *arg, int usec)
483{
484
485	usleep(usec);
486}
487
488static void
489cb_exit(void *arg, int v)
490{
491
492	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
493	exit(v);
494}
495
496static void
497cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
498{
499
500	vm_get_memory_seg(ctx, 0, ret_lowmem, NULL);
501	vm_get_memory_seg(ctx, 4 * GB, ret_highmem, NULL);
502}
503
504struct env {
505	const char *str;	/* name=value */
506	SLIST_ENTRY(env) next;
507};
508
509static SLIST_HEAD(envhead, env) envhead;
510
511static void
512addenv(const char *str)
513{
514	struct env *env;
515
516	env = malloc(sizeof(struct env));
517	env->str = str;
518	SLIST_INSERT_HEAD(&envhead, env, next);
519}
520
521static const char *
522cb_getenv(void *arg, int num)
523{
524	int i;
525	struct env *env;
526
527	i = 0;
528	SLIST_FOREACH(env, &envhead, next) {
529		if (i == num)
530			return (env->str);
531		i++;
532	}
533
534	return (NULL);
535}
536
537static struct loader_callbacks cb = {
538	.getc = cb_getc,
539	.putc = cb_putc,
540	.poll = cb_poll,
541
542	.open = cb_open,
543	.close = cb_close,
544	.isdir = cb_isdir,
545	.read = cb_read,
546	.readdir = cb_readdir,
547	.seek = cb_seek,
548	.stat = cb_stat,
549
550	.diskread = cb_diskread,
551	.diskioctl = cb_diskioctl,
552
553	.copyin = cb_copyin,
554	.copyout = cb_copyout,
555	.setreg = cb_setreg,
556	.setmsr = cb_setmsr,
557	.setcr = cb_setcr,
558	.setgdt = cb_setgdt,
559	.exec = cb_exec,
560
561	.delay = cb_delay,
562	.exit = cb_exit,
563	.getmem = cb_getmem,
564
565	.getenv = cb_getenv,
566};
567
568static int
569altcons_open(char *path)
570{
571	struct stat sb;
572	int err;
573	int fd;
574
575	/*
576	 * Allow stdio to be passed in so that the same string
577	 * can be used for the bhyveload console and bhyve com-port
578	 * parameters
579	 */
580	if (!strcmp(path, "stdio"))
581		return (0);
582
583	err = stat(path, &sb);
584	if (err == 0) {
585		if (!S_ISCHR(sb.st_mode))
586			err = ENOTSUP;
587		else {
588			fd = open(path, O_RDWR | O_NONBLOCK);
589			if (fd < 0)
590				err = errno;
591			else
592				consin_fd = consout_fd = fd;
593		}
594	}
595
596	return (err);
597}
598
599static void
600usage(void)
601{
602
603	fprintf(stderr,
604	    "usage: %s [-m mem-size] [-d <disk-path>] [-h <host-path>]\n"
605	    "       %*s [-e <name=value>] [-c <console-device>] <vmname>\n",
606	    progname,
607	    (int)strlen(progname), "");
608	exit(1);
609}
610
611int
612main(int argc, char** argv)
613{
614	void *h;
615	void (*func)(struct loader_callbacks *, void *, int, int);
616	uint64_t mem_size;
617	int opt, error;
618	char *disk_image;
619
620	progname = basename(argv[0]);
621
622	mem_size = 256 * MB;
623	disk_image = NULL;
624
625	consin_fd = STDIN_FILENO;
626	consout_fd = STDOUT_FILENO;
627
628	while ((opt = getopt(argc, argv, "c:d:e:h:m:")) != -1) {
629		switch (opt) {
630		case 'c':
631			error = altcons_open(optarg);
632			if (error != 0)
633				errx(EX_USAGE, "Could not open '%s'", optarg);
634			break;
635		case 'd':
636			disk_image = optarg;
637			break;
638
639		case 'e':
640			addenv(optarg);
641			break;
642
643		case 'h':
644			host_base = optarg;
645			break;
646
647		case 'm':
648			error = vm_parse_memsize(optarg, &mem_size);
649			if (error != 0)
650				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
651			break;
652		case '?':
653			usage();
654		}
655	}
656
657	argc -= optind;
658	argv += optind;
659
660	if (argc != 1)
661		usage();
662
663	vmname = argv[0];
664
665	error = vm_create(vmname);
666	if (error != 0 && errno != EEXIST) {
667		perror("vm_create");
668		exit(1);
669
670	}
671
672	ctx = vm_open(vmname);
673	if (ctx == NULL) {
674		perror("vm_open");
675		exit(1);
676	}
677
678	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
679	if (error) {
680		perror("vm_setup_memory");
681		exit(1);
682	}
683
684	tcgetattr(consout_fd, &term);
685	oldterm = term;
686	cfmakeraw(&term);
687	term.c_cflag |= CLOCAL;
688
689	tcsetattr(consout_fd, TCSAFLUSH, &term);
690
691	h = dlopen("/boot/userboot.so", RTLD_LOCAL);
692	if (!h) {
693		printf("%s\n", dlerror());
694		return (1);
695	}
696	func = dlsym(h, "loader_main");
697	if (!func) {
698		printf("%s\n", dlerror());
699		return (1);
700	}
701
702	if (disk_image) {
703		disk_fd = open(disk_image, O_RDONLY);
704	}
705
706	addenv("smbios.bios.vendor=BHYVE");
707	addenv("boot_serial=1");
708
709	func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0);
710}
711