1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29/*-
30 * Copyright (c) 2011 Google, Inc.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * $FreeBSD$
55 */
56
57#include <sys/cdefs.h>
58__FBSDID("$FreeBSD$");
59
60#include <sys/ioctl.h>
61#include <sys/stat.h>
62#include <sys/disk.h>
63#include <sys/queue.h>
64
65#include <machine/specialreg.h>
66#include <machine/vmm.h>
67
68#include <dirent.h>
69#include <dlfcn.h>
70#include <errno.h>
71#include <err.h>
72#include <fcntl.h>
73#include <getopt.h>
74#include <libgen.h>
75#include <limits.h>
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <sysexits.h>
80#include <termios.h>
81#include <unistd.h>
82
83#include <vmmapi.h>
84
85#include "userboot.h"
86
87#define	MB	(1024 * 1024UL)
88#define	GB	(1024 * 1024 * 1024UL)
89#define	BSP	0
90
91#define	NDISKS	32
92
93static char *host_base;
94static struct termios term, oldterm;
95static int disk_fd[NDISKS];
96static int ndisks;
97static int consin_fd, consout_fd;
98
99static char *vmname, *progname;
100static struct vmctx *ctx;
101
102static uint64_t gdtbase, cr3, rsp;
103
104static void cb_exit(void *arg, int v);
105
106/*
107 * Console i/o callbacks
108 */
109
110static void
111cb_putc(void *arg, int ch)
112{
113	char c = ch;
114
115	(void) write(consout_fd, &c, 1);
116}
117
118static int
119cb_getc(void *arg)
120{
121	char c;
122
123	if (read(consin_fd, &c, 1) == 1)
124		return (c);
125	return (-1);
126}
127
128static int
129cb_poll(void *arg)
130{
131	int n;
132
133	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
134		return (n > 0);
135	return (0);
136}
137
138/*
139 * Host filesystem i/o callbacks
140 */
141
142struct cb_file {
143	int cf_isdir;
144	size_t cf_size;
145	struct stat cf_stat;
146	union {
147		int fd;
148		DIR *dir;
149	} cf_u;
150};
151
152static int
153cb_open(void *arg, const char *filename, void **hp)
154{
155	struct stat st;
156	struct cb_file *cf;
157	char path[PATH_MAX];
158
159	if (!host_base)
160		return (ENOENT);
161
162	strlcpy(path, host_base, PATH_MAX);
163	if (path[strlen(path) - 1] == '/')
164		path[strlen(path) - 1] = 0;
165	strlcat(path, filename, PATH_MAX);
166	cf = malloc(sizeof(struct cb_file));
167	if (stat(path, &cf->cf_stat) < 0) {
168		free(cf);
169		return (errno);
170	}
171
172	cf->cf_size = st.st_size;
173	if (S_ISDIR(cf->cf_stat.st_mode)) {
174		cf->cf_isdir = 1;
175		cf->cf_u.dir = opendir(path);
176		if (!cf->cf_u.dir)
177			goto out;
178		*hp = cf;
179		return (0);
180	}
181	if (S_ISREG(cf->cf_stat.st_mode)) {
182		cf->cf_isdir = 0;
183		cf->cf_u.fd = open(path, O_RDONLY);
184		if (cf->cf_u.fd < 0)
185			goto out;
186		*hp = cf;
187		return (0);
188	}
189
190out:
191	free(cf);
192	return (EINVAL);
193}
194
195static int
196cb_close(void *arg, void *h)
197{
198	struct cb_file *cf = h;
199
200	if (cf->cf_isdir)
201		closedir(cf->cf_u.dir);
202	else
203		close(cf->cf_u.fd);
204	free(cf);
205
206	return (0);
207}
208
209static int
210cb_isdir(void *arg, void *h)
211{
212	struct cb_file *cf = h;
213
214	return (cf->cf_isdir);
215}
216
217static int
218cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
219{
220	struct cb_file *cf = h;
221	ssize_t sz;
222
223	if (cf->cf_isdir)
224		return (EINVAL);
225	sz = read(cf->cf_u.fd, buf, size);
226	if (sz < 0)
227		return (EINVAL);
228	*resid = size - sz;
229	return (0);
230}
231
232static int
233cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
234	   size_t *namelen_return, char *name)
235{
236	struct cb_file *cf = h;
237	struct dirent *dp;
238
239	if (!cf->cf_isdir)
240		return (EINVAL);
241
242	dp = readdir(cf->cf_u.dir);
243	if (!dp)
244		return (ENOENT);
245
246	/*
247	 * Note: d_namlen is in the range 0..255 and therefore less
248	 * than PATH_MAX so we don't need to test before copying.
249	 */
250	*fileno_return = dp->d_fileno;
251	*type_return = dp->d_type;
252	*namelen_return = dp->d_namlen;
253	memcpy(name, dp->d_name, dp->d_namlen);
254	name[dp->d_namlen] = 0;
255
256	return (0);
257}
258
259static int
260cb_seek(void *arg, void *h, uint64_t offset, int whence)
261{
262	struct cb_file *cf = h;
263
264	if (cf->cf_isdir)
265		return (EINVAL);
266	if (lseek(cf->cf_u.fd, offset, whence) < 0)
267		return (errno);
268	return (0);
269}
270
271static int
272cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
273{
274	struct cb_file *cf = h;
275
276	*mode = cf->cf_stat.st_mode;
277	*uid = cf->cf_stat.st_uid;
278	*gid = cf->cf_stat.st_gid;
279	*size = cf->cf_stat.st_size;
280	return (0);
281}
282
283/*
284 * Disk image i/o callbacks
285 */
286
287static int
288cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
289	    size_t *resid)
290{
291	ssize_t n;
292
293	if (unit < 0 || unit >= ndisks )
294		return (EIO);
295	n = pread(disk_fd[unit], to, size, from);
296	if (n < 0)
297		return (errno);
298	*resid = size - n;
299	return (0);
300}
301
302static int
303cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
304{
305	struct stat sb;
306
307	if (unit < 0 || unit >= ndisks)
308		return (EBADF);
309
310	switch (cmd) {
311	case DIOCGSECTORSIZE:
312		*(u_int *)data = 512;
313		break;
314	case DIOCGMEDIASIZE:
315		if (fstat(disk_fd[unit], &sb) == 0)
316			*(off_t *)data = sb.st_size;
317		else
318			return (ENOTTY);
319		break;
320	default:
321		return (ENOTTY);
322	}
323
324	return (0);
325}
326
327/*
328 * Guest virtual machine i/o callbacks
329 */
330static int
331cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
332{
333	char *ptr;
334
335	to &= 0x7fffffff;
336
337	ptr = vm_map_gpa(ctx, to, size);
338	if (ptr == NULL)
339		return (EFAULT);
340
341	memcpy(ptr, from, size);
342	return (0);
343}
344
345static int
346cb_copyout(void *arg, uint64_t from, void *to, size_t size)
347{
348	char *ptr;
349
350	from &= 0x7fffffff;
351
352	ptr = vm_map_gpa(ctx, from, size);
353	if (ptr == NULL)
354		return (EFAULT);
355
356	memcpy(to, ptr, size);
357	return (0);
358}
359
360static void
361cb_setreg(void *arg, int r, uint64_t v)
362{
363	int error;
364	enum vm_reg_name vmreg;
365
366	vmreg = VM_REG_LAST;
367
368	switch (r) {
369	case 4:
370		vmreg = VM_REG_GUEST_RSP;
371		rsp = v;
372		break;
373	default:
374		break;
375	}
376
377	if (vmreg == VM_REG_LAST) {
378		printf("test_setreg(%d): not implemented\n", r);
379		cb_exit(NULL, USERBOOT_EXIT_QUIT);
380	}
381
382	error = vm_set_register(ctx, BSP, vmreg, v);
383	if (error) {
384		perror("vm_set_register");
385		cb_exit(NULL, USERBOOT_EXIT_QUIT);
386	}
387}
388
389static void
390cb_setmsr(void *arg, int r, uint64_t v)
391{
392	int error;
393	enum vm_reg_name vmreg;
394
395	vmreg = VM_REG_LAST;
396
397	switch (r) {
398	case MSR_EFER:
399		vmreg = VM_REG_GUEST_EFER;
400		break;
401	default:
402		break;
403	}
404
405	if (vmreg == VM_REG_LAST) {
406		printf("test_setmsr(%d): not implemented\n", r);
407		cb_exit(NULL, USERBOOT_EXIT_QUIT);
408	}
409
410	error = vm_set_register(ctx, BSP, vmreg, v);
411	if (error) {
412		perror("vm_set_msr");
413		cb_exit(NULL, USERBOOT_EXIT_QUIT);
414	}
415}
416
417static void
418cb_setcr(void *arg, int r, uint64_t v)
419{
420	int error;
421	enum vm_reg_name vmreg;
422
423	vmreg = VM_REG_LAST;
424
425	switch (r) {
426	case 0:
427		vmreg = VM_REG_GUEST_CR0;
428		break;
429	case 3:
430		vmreg = VM_REG_GUEST_CR3;
431		cr3 = v;
432		break;
433	case 4:
434		vmreg = VM_REG_GUEST_CR4;
435		break;
436	default:
437		break;
438	}
439
440	if (vmreg == VM_REG_LAST) {
441		printf("test_setcr(%d): not implemented\n", r);
442		cb_exit(NULL, USERBOOT_EXIT_QUIT);
443	}
444
445	error = vm_set_register(ctx, BSP, vmreg, v);
446	if (error) {
447		perror("vm_set_cr");
448		cb_exit(NULL, USERBOOT_EXIT_QUIT);
449	}
450}
451
452static void
453cb_setgdt(void *arg, uint64_t base, size_t size)
454{
455	int error;
456
457	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
458	if (error != 0) {
459		perror("vm_set_desc(gdt)");
460		cb_exit(NULL, USERBOOT_EXIT_QUIT);
461	}
462
463	gdtbase = base;
464}
465
466static void
467cb_exec(void *arg, uint64_t rip)
468{
469	int error;
470
471	if (cr3 == 0)
472		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
473		    rsp);
474	else
475		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
476		    rsp);
477	if (error) {
478		perror("vm_setup_freebsd_registers");
479		cb_exit(NULL, USERBOOT_EXIT_QUIT);
480	}
481
482	cb_exit(NULL, 0);
483}
484
485/*
486 * Misc
487 */
488
489static void
490cb_delay(void *arg, int usec)
491{
492
493	usleep(usec);
494}
495
496static void
497cb_exit(void *arg, int v)
498{
499
500	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
501	exit(v);
502}
503
504static void
505cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
506{
507
508	*ret_lowmem = vm_get_lowmem_size(ctx);
509	*ret_highmem = vm_get_highmem_size(ctx);
510}
511
512struct env {
513	const char *str;	/* name=value */
514	SLIST_ENTRY(env) next;
515};
516
517static SLIST_HEAD(envhead, env) envhead;
518
519static void
520addenv(const char *str)
521{
522	struct env *env;
523
524	env = malloc(sizeof(struct env));
525	env->str = str;
526	SLIST_INSERT_HEAD(&envhead, env, next);
527}
528
529static const char *
530cb_getenv(void *arg, int num)
531{
532	int i;
533	struct env *env;
534
535	i = 0;
536	SLIST_FOREACH(env, &envhead, next) {
537		if (i == num)
538			return (env->str);
539		i++;
540	}
541
542	return (NULL);
543}
544
545static struct loader_callbacks cb = {
546	.getc = cb_getc,
547	.putc = cb_putc,
548	.poll = cb_poll,
549
550	.open = cb_open,
551	.close = cb_close,
552	.isdir = cb_isdir,
553	.read = cb_read,
554	.readdir = cb_readdir,
555	.seek = cb_seek,
556	.stat = cb_stat,
557
558	.diskread = cb_diskread,
559	.diskioctl = cb_diskioctl,
560
561	.copyin = cb_copyin,
562	.copyout = cb_copyout,
563	.setreg = cb_setreg,
564	.setmsr = cb_setmsr,
565	.setcr = cb_setcr,
566	.setgdt = cb_setgdt,
567	.exec = cb_exec,
568
569	.delay = cb_delay,
570	.exit = cb_exit,
571	.getmem = cb_getmem,
572
573	.getenv = cb_getenv,
574};
575
576static int
577altcons_open(char *path)
578{
579	struct stat sb;
580	int err;
581	int fd;
582
583	/*
584	 * Allow stdio to be passed in so that the same string
585	 * can be used for the bhyveload console and bhyve com-port
586	 * parameters
587	 */
588	if (!strcmp(path, "stdio"))
589		return (0);
590
591	err = stat(path, &sb);
592	if (err == 0) {
593		if (!S_ISCHR(sb.st_mode))
594			err = ENOTSUP;
595		else {
596			fd = open(path, O_RDWR | O_NONBLOCK);
597			if (fd < 0)
598				err = errno;
599			else
600				consin_fd = consout_fd = fd;
601		}
602	}
603
604	return (err);
605}
606
607static int
608disk_open(char *path)
609{
610	int err, fd;
611
612	if (ndisks > NDISKS)
613		return (ERANGE);
614
615	err = 0;
616	fd = open(path, O_RDONLY);
617
618	if (fd > 0) {
619		disk_fd[ndisks] = fd;
620		ndisks++;
621	} else
622		err = errno;
623
624	return (err);
625}
626
627static void
628usage(void)
629{
630
631	fprintf(stderr,
632	    "usage: %s [-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
633	    "       %*s [-h <host-path>] [-m mem-size] <vmname>\n",
634	    progname,
635	    (int)strlen(progname), "");
636	exit(1);
637}
638
639int
640main(int argc, char** argv)
641{
642	void *h;
643	void (*func)(struct loader_callbacks *, void *, int, int);
644	uint64_t mem_size;
645	int opt, error, need_reinit;
646
647	progname = basename(argv[0]);
648
649	mem_size = 256 * MB;
650
651	consin_fd = STDIN_FILENO;
652	consout_fd = STDOUT_FILENO;
653
654	while ((opt = getopt(argc, argv, "c:d:e:h:m:")) != -1) {
655		switch (opt) {
656		case 'c':
657			error = altcons_open(optarg);
658			if (error != 0)
659				errx(EX_USAGE, "Could not open '%s'", optarg);
660			break;
661
662		case 'd':
663			error = disk_open(optarg);
664			if (error != 0)
665				errx(EX_USAGE, "Could not open '%s'", optarg);
666			break;
667
668		case 'e':
669			addenv(optarg);
670			break;
671
672		case 'h':
673			host_base = optarg;
674			break;
675
676		case 'm':
677			error = vm_parse_memsize(optarg, &mem_size);
678			if (error != 0)
679				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
680			break;
681		case '?':
682			usage();
683		}
684	}
685
686	argc -= optind;
687	argv += optind;
688
689	if (argc != 1)
690		usage();
691
692	vmname = argv[0];
693
694	need_reinit = 0;
695	error = vm_create(vmname);
696	if (error) {
697		if (errno != EEXIST) {
698			perror("vm_create");
699			exit(1);
700		}
701		need_reinit = 1;
702	}
703
704	ctx = vm_open(vmname);
705	if (ctx == NULL) {
706		perror("vm_open");
707		exit(1);
708	}
709
710	if (need_reinit) {
711		error = vm_reinit(ctx);
712		if (error) {
713			perror("vm_reinit");
714			exit(1);
715		}
716	}
717
718	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
719	if (error) {
720		perror("vm_setup_memory");
721		exit(1);
722	}
723
724	tcgetattr(consout_fd, &term);
725	oldterm = term;
726	cfmakeraw(&term);
727	term.c_cflag |= CLOCAL;
728
729	tcsetattr(consout_fd, TCSAFLUSH, &term);
730
731	h = dlopen("/boot/userboot.so", RTLD_LOCAL);
732	if (!h) {
733		printf("%s\n", dlerror());
734		return (1);
735	}
736	func = dlsym(h, "loader_main");
737	if (!func) {
738		printf("%s\n", dlerror());
739		return (1);
740	}
741
742	addenv("smbios.bios.vendor=BHYVE");
743	addenv("boot_serial=1");
744
745	func(&cb, NULL, USERBOOT_VERSION_3, ndisks);
746}
747