1221828Sgrehan/*- 2221828Sgrehan * Copyright (c) 2011 NetApp, Inc. 3221828Sgrehan * All rights reserved. 4221828Sgrehan * 5221828Sgrehan * Redistribution and use in source and binary forms, with or without 6221828Sgrehan * modification, are permitted provided that the following conditions 7221828Sgrehan * are met: 8221828Sgrehan * 1. Redistributions of source code must retain the above copyright 9221828Sgrehan * notice, this list of conditions and the following disclaimer. 10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright 11221828Sgrehan * notice, this list of conditions and the following disclaimer in the 12221828Sgrehan * documentation and/or other materials provided with the distribution. 13221828Sgrehan * 14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17221828Sgrehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24221828Sgrehan * SUCH DAMAGE. 25221828Sgrehan * 26221828Sgrehan * $FreeBSD$ 27221828Sgrehan */ 28221828Sgrehan 29221828Sgrehan#include <sys/cdefs.h> 30221828Sgrehan__FBSDID("$FreeBSD$"); 31221828Sgrehan 32221828Sgrehan#include <sys/types.h> 33221828Sgrehan#include <sys/mman.h> 34221828Sgrehan#include <sys/time.h> 35221828Sgrehan 36262350Sjhb#include <machine/atomic.h> 37221828Sgrehan#include <machine/segments.h> 38221828Sgrehan 39221828Sgrehan#include <stdio.h> 40221828Sgrehan#include <stdlib.h> 41257396Sneel#include <string.h> 42256176Sneel#include <err.h> 43221828Sgrehan#include <libgen.h> 44221828Sgrehan#include <unistd.h> 45221828Sgrehan#include <assert.h> 46221828Sgrehan#include <errno.h> 47221828Sgrehan#include <pthread.h> 48242404Sgrehan#include <pthread_np.h> 49256176Sneel#include <sysexits.h> 50295124Sgrehan#include <stdbool.h> 51221828Sgrehan 52221828Sgrehan#include <machine/vmm.h> 53221828Sgrehan#include <vmmapi.h> 54221828Sgrehan 55244167Sgrehan#include "bhyverun.h" 56243327Sgrehan#include "acpi.h" 57221828Sgrehan#include "inout.h" 58221828Sgrehan#include "dbgport.h" 59295124Sgrehan#include "fwctl.h" 60267393Sjhb#include "ioapic.h" 61241744Sgrehan#include "mem.h" 62221828Sgrehan#include "mevent.h" 63242131Sgrehan#include "mptbl.h" 64221828Sgrehan#include "pci_emul.h" 65268972Sjhb#include "pci_irq.h" 66257396Sneel#include "pci_lpc.h" 67267450Sjhb#include "smbiostbl.h" 68221828Sgrehan#include "xmsr.h" 69240912Sneel#include "spinup_ap.h" 70253181Sgrehan#include "rtc.h" 71221828Sgrehan 72221828Sgrehan#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 73221828Sgrehan 74221828Sgrehan#define MB (1024UL * 1024) 75221828Sgrehan#define GB (1024UL * MB) 76221828Sgrehan 77221828Sgrehantypedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 78270159Sgrehanextern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 79221828Sgrehan 80221828Sgrehanchar *vmname; 81221828Sgrehan 82221828Sgrehanint guest_ncpus; 83267450Sjhbchar *guest_uuid_str; 84221828Sgrehan 85267447Sjhbstatic int guest_vmexit_on_hlt, guest_vmexit_on_pause; 86256755Sgrehanstatic int virtio_msix = 1; 87267447Sjhbstatic int x2apic_mode = 0; /* default is xAPIC */ 88221828Sgrehan 89222105Sgrehanstatic int strictio; 90264273Sjhbstatic int strictmsr = 1; 91222105Sgrehan 92243327Sgrehanstatic int acpi; 93243327Sgrehan 94221828Sgrehanstatic char *progname; 95221828Sgrehanstatic const int BSP = 0; 96221828Sgrehan 97268894Sjhbstatic cpuset_t cpumask; 98221828Sgrehan 99221828Sgrehanstatic void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 100221828Sgrehan 101270159Sgrehanstatic struct vm_exit vmexit[VM_MAXCPU]; 102221828Sgrehan 103256062Sgrehanstruct bhyvestats { 104221828Sgrehan uint64_t vmexit_bogus; 105284900Sneel uint64_t vmexit_reqidle; 106221828Sgrehan uint64_t vmexit_hlt; 107221828Sgrehan uint64_t vmexit_pause; 108221828Sgrehan uint64_t vmexit_mtrap; 109256072Sneel uint64_t vmexit_inst_emul; 110221828Sgrehan uint64_t cpu_switch_rotate; 111221828Sgrehan uint64_t cpu_switch_direct; 112221828Sgrehan} stats; 113221828Sgrehan 114221828Sgrehanstruct mt_vmm_info { 115221828Sgrehan pthread_t mt_thr; 116221828Sgrehan struct vmctx *mt_ctx; 117221828Sgrehan int mt_vcpu; 118221828Sgrehan} mt_vmm_info[VM_MAXCPU]; 119221828Sgrehan 120268894Sjhbstatic cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 121268894Sjhb 122221828Sgrehanstatic void 123221828Sgrehanusage(int code) 124221828Sgrehan{ 125221828Sgrehan 126221828Sgrehan fprintf(stderr, 127295124Sgrehan "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 128270159Sgrehan " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 129267447Sjhb " -a: local apic is in xAPIC mode (deprecated)\n" 130270159Sgrehan " -A: create ACPI tables\n" 131221828Sgrehan " -c: # cpus (default 1)\n" 132268953Sjhb " -C: include guest memory in core file\n" 133257396Sneel " -e: exit on unhandled I/O access\n" 134270159Sgrehan " -g: gdb port\n" 135221828Sgrehan " -h: help\n" 136270159Sgrehan " -H: vmexit from the guest on hlt\n" 137257396Sneel " -l: LPC device configuration\n" 138264273Sjhb " -m: memory size in MB\n" 139270159Sgrehan " -p: pin 'vcpu' to 'hostcpu'\n" 140270159Sgrehan " -P: vmexit from the guest on pause\n" 141270159Sgrehan " -s: <slot,driver,configinfo> PCI slot config\n" 142295124Sgrehan " -S: guest memory cannot be swapped\n" 143284894Sneel " -u: RTC keeps UTC time\n" 144270159Sgrehan " -U: uuid\n" 145267447Sjhb " -w: ignore unimplemented MSRs\n" 146270159Sgrehan " -W: force virtio to use single-vector MSI\n" 147267450Sjhb " -x: local apic is in x2APIC mode\n" 148270159Sgrehan " -Y: disable MPtable generation\n", 149257396Sneel progname, (int)strlen(progname), ""); 150256062Sgrehan 151221828Sgrehan exit(code); 152221828Sgrehan} 153221828Sgrehan 154268894Sjhbstatic int 155268894Sjhbpincpu_parse(const char *opt) 156268894Sjhb{ 157268894Sjhb int vcpu, pcpu; 158268894Sjhb 159268894Sjhb if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 160268894Sjhb fprintf(stderr, "invalid format: %s\n", opt); 161268894Sjhb return (-1); 162268894Sjhb } 163268894Sjhb 164268894Sjhb if (vcpu < 0 || vcpu >= VM_MAXCPU) { 165268894Sjhb fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 166268894Sjhb vcpu, VM_MAXCPU - 1); 167268894Sjhb return (-1); 168268894Sjhb } 169268894Sjhb 170268894Sjhb if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 171268894Sjhb fprintf(stderr, "hostcpu '%d' outside valid range from " 172268894Sjhb "0 to %d\n", pcpu, CPU_SETSIZE - 1); 173268894Sjhb return (-1); 174268894Sjhb } 175268894Sjhb 176268894Sjhb if (vcpumap[vcpu] == NULL) { 177268894Sjhb if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 178268894Sjhb perror("malloc"); 179268894Sjhb return (-1); 180268894Sjhb } 181268894Sjhb CPU_ZERO(vcpumap[vcpu]); 182268894Sjhb } 183268894Sjhb CPU_SET(pcpu, vcpumap[vcpu]); 184268894Sjhb return (0); 185268894Sjhb} 186268894Sjhb 187270159Sgrehanvoid 188270159Sgrehanvm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 189270159Sgrehan int errcode) 190270159Sgrehan{ 191270159Sgrehan struct vmctx *ctx; 192284894Sneel int error, restart_instruction; 193270159Sgrehan 194270159Sgrehan ctx = arg; 195284894Sneel restart_instruction = 1; 196284894Sneel 197284894Sneel error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 198284894Sneel restart_instruction); 199270159Sgrehan assert(error == 0); 200270159Sgrehan} 201270159Sgrehan 202221828Sgrehanvoid * 203248477Sneelpaddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 204221828Sgrehan{ 205221828Sgrehan 206248477Sneel return (vm_map_gpa(ctx, gaddr, len)); 207221828Sgrehan} 208221828Sgrehan 209221828Sgrehanint 210221828Sgrehanfbsdrun_vmexit_on_pause(void) 211221828Sgrehan{ 212221828Sgrehan 213221828Sgrehan return (guest_vmexit_on_pause); 214221828Sgrehan} 215221828Sgrehan 216221828Sgrehanint 217221828Sgrehanfbsdrun_vmexit_on_hlt(void) 218221828Sgrehan{ 219221828Sgrehan 220221828Sgrehan return (guest_vmexit_on_hlt); 221221828Sgrehan} 222221828Sgrehan 223256755Sgrehanint 224256755Sgrehanfbsdrun_virtio_msix(void) 225256755Sgrehan{ 226256755Sgrehan 227256755Sgrehan return (virtio_msix); 228256755Sgrehan} 229256755Sgrehan 230221942Sjhbstatic void * 231221828Sgrehanfbsdrun_start_thread(void *param) 232221828Sgrehan{ 233242404Sgrehan char tname[MAXCOMLEN + 1]; 234242404Sgrehan struct mt_vmm_info *mtp; 235221828Sgrehan int vcpu; 236221828Sgrehan 237242404Sgrehan mtp = param; 238221828Sgrehan vcpu = mtp->mt_vcpu; 239242404Sgrehan 240259301Sgrehan snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 241242404Sgrehan pthread_set_name_np(mtp->mt_thr, tname); 242242404Sgrehan 243221828Sgrehan vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 244221828Sgrehan 245221828Sgrehan /* not reached */ 246221828Sgrehan exit(1); 247221828Sgrehan return (NULL); 248221828Sgrehan} 249221828Sgrehan 250221828Sgrehanvoid 251268894Sjhbfbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 252221828Sgrehan{ 253221828Sgrehan int error; 254221828Sgrehan 255268894Sjhb assert(fromcpu == BSP); 256221828Sgrehan 257270070Sgrehan /* 258270070Sgrehan * The 'newcpu' must be activated in the context of 'fromcpu'. If 259270070Sgrehan * vm_activate_cpu() is delayed until newcpu's pthread starts running 260270070Sgrehan * then vmm.ko is out-of-sync with bhyve and this can create a race 261270070Sgrehan * with vm_suspend(). 262270070Sgrehan */ 263270070Sgrehan error = vm_activate_cpu(ctx, newcpu); 264290386Sngie if (error != 0) 265290386Sngie err(EX_OSERR, "could not activate CPU %d", newcpu); 266270070Sgrehan 267268894Sjhb CPU_SET_ATOMIC(newcpu, &cpumask); 268221828Sgrehan 269221828Sgrehan /* 270221828Sgrehan * Set up the vmexit struct to allow execution to start 271221828Sgrehan * at the given RIP 272221828Sgrehan */ 273268894Sjhb vmexit[newcpu].rip = rip; 274268894Sjhb vmexit[newcpu].inst_length = 0; 275221828Sgrehan 276268894Sjhb mt_vmm_info[newcpu].mt_ctx = ctx; 277268894Sjhb mt_vmm_info[newcpu].mt_vcpu = newcpu; 278256072Sneel 279268894Sjhb error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 280268894Sjhb fbsdrun_start_thread, &mt_vmm_info[newcpu]); 281256072Sneel assert(error == 0); 282221828Sgrehan} 283221828Sgrehan 284221828Sgrehanstatic int 285262350Sjhbfbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 286262350Sjhb{ 287262350Sjhb 288268894Sjhb if (!CPU_ISSET(vcpu, &cpumask)) { 289268894Sjhb fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 290262350Sjhb exit(1); 291262350Sjhb } 292262350Sjhb 293268894Sjhb CPU_CLR_ATOMIC(vcpu, &cpumask); 294268894Sjhb return (CPU_EMPTY(&cpumask)); 295262350Sjhb} 296262350Sjhb 297262350Sjhbstatic int 298221828Sgrehanvmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 299221828Sgrehan uint32_t eax) 300221828Sgrehan{ 301256062Sgrehan#if BHYVE_DEBUG 302256062Sgrehan /* 303256062Sgrehan * put guest-driven debug here 304256062Sgrehan */ 305221828Sgrehan#endif 306221828Sgrehan return (VMEXIT_CONTINUE); 307221828Sgrehan} 308221828Sgrehan 309221828Sgrehanstatic int 310221828Sgrehanvmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 311221828Sgrehan{ 312221828Sgrehan int error; 313293412Saraujo int bytes, port, in, out; 314221828Sgrehan int vcpu; 315221828Sgrehan 316221828Sgrehan vcpu = *pvcpu; 317221828Sgrehan 318221828Sgrehan port = vme->u.inout.port; 319221828Sgrehan bytes = vme->u.inout.bytes; 320221828Sgrehan in = vme->u.inout.in; 321221828Sgrehan out = !in; 322221828Sgrehan 323221828Sgrehan /* Extra-special case of host notifications */ 324268976Sjhb if (out && port == GUEST_NIO_PORT) { 325268976Sjhb error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 326268976Sjhb return (error); 327268976Sjhb } 328221828Sgrehan 329268976Sjhb error = emulate_inout(ctx, vcpu, vme, strictio); 330270159Sgrehan if (error) { 331284899Sneel fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 332284899Sneel in ? "in" : "out", 333284899Sneel bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 334284899Sneel port, vmexit->rip); 335270159Sgrehan return (VMEXIT_ABORT); 336270159Sgrehan } else { 337221828Sgrehan return (VMEXIT_CONTINUE); 338221828Sgrehan } 339221828Sgrehan} 340221828Sgrehan 341221828Sgrehanstatic int 342221828Sgrehanvmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 343221828Sgrehan{ 344264273Sjhb uint64_t val; 345264273Sjhb uint32_t eax, edx; 346264273Sjhb int error; 347264273Sjhb 348264273Sjhb val = 0; 349264273Sjhb error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 350264273Sjhb if (error != 0) { 351264273Sjhb fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 352264273Sjhb vme->u.msr.code, *pvcpu); 353267427Sjhb if (strictmsr) { 354270159Sgrehan vm_inject_gp(ctx, *pvcpu); 355284894Sneel return (VMEXIT_CONTINUE); 356267427Sjhb } 357264273Sjhb } 358264273Sjhb 359264273Sjhb eax = val; 360264273Sjhb error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 361264273Sjhb assert(error == 0); 362264273Sjhb 363264273Sjhb edx = val >> 32; 364264273Sjhb error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 365264273Sjhb assert(error == 0); 366264273Sjhb 367264273Sjhb return (VMEXIT_CONTINUE); 368221828Sgrehan} 369221828Sgrehan 370221828Sgrehanstatic int 371221828Sgrehanvmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 372221828Sgrehan{ 373264273Sjhb int error; 374221828Sgrehan 375264273Sjhb error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 376264273Sjhb if (error != 0) { 377264273Sjhb fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 378264273Sjhb vme->u.msr.code, vme->u.msr.wval, *pvcpu); 379267427Sjhb if (strictmsr) { 380270159Sgrehan vm_inject_gp(ctx, *pvcpu); 381284894Sneel return (VMEXIT_CONTINUE); 382267427Sjhb } 383264273Sjhb } 384264273Sjhb return (VMEXIT_CONTINUE); 385221828Sgrehan} 386221828Sgrehan 387221828Sgrehanstatic int 388240912Sneelvmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 389240912Sneel{ 390240912Sneel 391302705Sngie (void)spinup_ap(ctx, *pvcpu, 392302705Sngie vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 393240912Sneel 394302705Sngie return (VMEXIT_CONTINUE); 395240912Sneel} 396240912Sneel 397270159Sgrehan#define DEBUG_EPT_MISCONFIG 398270159Sgrehan#ifdef DEBUG_EPT_MISCONFIG 399270159Sgrehan#define EXIT_REASON_EPT_MISCONFIG 49 400270159Sgrehan#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 401270159Sgrehan#define VMCS_IDENT(x) ((x) | 0x80000000) 402270159Sgrehan 403270159Sgrehanstatic uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 404270159Sgrehanstatic int ept_misconfig_ptenum; 405270159Sgrehan#endif 406270159Sgrehan 407240912Sneelstatic int 408221828Sgrehanvmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 409221828Sgrehan{ 410221828Sgrehan 411242385Sgrehan fprintf(stderr, "vm exit[%d]\n", *pvcpu); 412242385Sgrehan fprintf(stderr, "\treason\t\tVMX\n"); 413242385Sgrehan fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 414242385Sgrehan fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 415264619Sjhb fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 416242385Sgrehan fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 417242385Sgrehan fprintf(stderr, "\tqualification\t0x%016lx\n", 418242385Sgrehan vmexit->u.vmx.exit_qualification); 419264619Sjhb fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 420264619Sjhb fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 421270159Sgrehan#ifdef DEBUG_EPT_MISCONFIG 422270159Sgrehan if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 423270159Sgrehan vm_get_register(ctx, *pvcpu, 424270159Sgrehan VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 425270159Sgrehan &ept_misconfig_gpa); 426270159Sgrehan vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 427270159Sgrehan &ept_misconfig_ptenum); 428270159Sgrehan fprintf(stderr, "\tEPT misconfiguration:\n"); 429270159Sgrehan fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 430270159Sgrehan fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 431270159Sgrehan ept_misconfig_ptenum, ept_misconfig_pte[0], 432270159Sgrehan ept_misconfig_pte[1], ept_misconfig_pte[2], 433270159Sgrehan ept_misconfig_pte[3]); 434270159Sgrehan } 435270159Sgrehan#endif /* DEBUG_EPT_MISCONFIG */ 436221828Sgrehan return (VMEXIT_ABORT); 437221828Sgrehan} 438221828Sgrehan 439221828Sgrehanstatic int 440276403Sneelvmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 441276403Sneel{ 442276403Sneel 443276403Sneel fprintf(stderr, "vm exit[%d]\n", *pvcpu); 444276403Sneel fprintf(stderr, "\treason\t\tSVM\n"); 445276403Sneel fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 446276403Sneel fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 447276403Sneel fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 448276403Sneel fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 449276403Sneel fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 450276403Sneel return (VMEXIT_ABORT); 451276403Sneel} 452276403Sneel 453276403Sneelstatic int 454221828Sgrehanvmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 455221828Sgrehan{ 456256062Sgrehan 457284894Sneel assert(vmexit->inst_length == 0); 458284894Sneel 459221828Sgrehan stats.vmexit_bogus++; 460221828Sgrehan 461284894Sneel return (VMEXIT_CONTINUE); 462221828Sgrehan} 463221828Sgrehan 464221828Sgrehanstatic int 465284900Sneelvmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 466284900Sneel{ 467284900Sneel 468284900Sneel assert(vmexit->inst_length == 0); 469284900Sneel 470284900Sneel stats.vmexit_reqidle++; 471284900Sneel 472284900Sneel return (VMEXIT_CONTINUE); 473284900Sneel} 474284900Sneel 475284900Sneelstatic int 476221828Sgrehanvmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 477221828Sgrehan{ 478256062Sgrehan 479221828Sgrehan stats.vmexit_hlt++; 480256062Sgrehan 481256062Sgrehan /* 482256062Sgrehan * Just continue execution with the next instruction. We use 483256062Sgrehan * the HLT VM exit as a way to be friendly with the host 484256062Sgrehan * scheduler. 485256062Sgrehan */ 486256062Sgrehan return (VMEXIT_CONTINUE); 487221828Sgrehan} 488221828Sgrehan 489221828Sgrehanstatic int 490221828Sgrehanvmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 491221828Sgrehan{ 492256062Sgrehan 493221828Sgrehan stats.vmexit_pause++; 494221828Sgrehan 495256062Sgrehan return (VMEXIT_CONTINUE); 496221828Sgrehan} 497221828Sgrehan 498221828Sgrehanstatic int 499221828Sgrehanvmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 500221828Sgrehan{ 501256062Sgrehan 502284894Sneel assert(vmexit->inst_length == 0); 503284894Sneel 504221828Sgrehan stats.vmexit_mtrap++; 505221828Sgrehan 506284894Sneel return (VMEXIT_CONTINUE); 507221828Sgrehan} 508221828Sgrehan 509234761Sgrehanstatic int 510256072Sneelvmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 511234761Sgrehan{ 512284899Sneel int err, i; 513284899Sneel struct vie *vie; 514284899Sneel 515256072Sneel stats.vmexit_inst_emul++; 516234761Sgrehan 517284899Sneel vie = &vmexit->u.inst_emul.vie; 518256072Sneel err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 519284899Sneel vie, &vmexit->u.inst_emul.paging); 520241744Sgrehan 521241744Sgrehan if (err) { 522284899Sneel if (err == ESRCH) { 523242385Sgrehan fprintf(stderr, "Unhandled memory access to 0x%lx\n", 524256072Sneel vmexit->u.inst_emul.gpa); 525241744Sgrehan } 526241744Sgrehan 527284899Sneel fprintf(stderr, "Failed to emulate instruction ["); 528284899Sneel for (i = 0; i < vie->num_valid; i++) { 529284899Sneel fprintf(stderr, "0x%02x%s", vie->inst[i], 530284899Sneel i != (vie->num_valid - 1) ? " " : ""); 531284899Sneel } 532284899Sneel fprintf(stderr, "] at 0x%lx\n", vmexit->rip); 533234761Sgrehan return (VMEXIT_ABORT); 534234761Sgrehan } 535234761Sgrehan 536234761Sgrehan return (VMEXIT_CONTINUE); 537234761Sgrehan} 538234761Sgrehan 539268935Sjhbstatic pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 540268935Sjhbstatic pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 541268935Sjhb 542268935Sjhbstatic int 543268935Sjhbvmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 544268935Sjhb{ 545268935Sjhb enum vm_suspend_how how; 546268935Sjhb 547268935Sjhb how = vmexit->u.suspended.how; 548268935Sjhb 549268935Sjhb fbsdrun_deletecpu(ctx, *pvcpu); 550268935Sjhb 551268935Sjhb if (*pvcpu != BSP) { 552268935Sjhb pthread_mutex_lock(&resetcpu_mtx); 553268935Sjhb pthread_cond_signal(&resetcpu_cond); 554268935Sjhb pthread_mutex_unlock(&resetcpu_mtx); 555268935Sjhb pthread_exit(NULL); 556268935Sjhb } 557268935Sjhb 558268935Sjhb pthread_mutex_lock(&resetcpu_mtx); 559268935Sjhb while (!CPU_EMPTY(&cpumask)) { 560268935Sjhb pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 561268935Sjhb } 562268935Sjhb pthread_mutex_unlock(&resetcpu_mtx); 563268935Sjhb 564268935Sjhb switch (how) { 565268935Sjhb case VM_SUSPEND_RESET: 566268935Sjhb exit(0); 567268935Sjhb case VM_SUSPEND_POWEROFF: 568268935Sjhb exit(1); 569268935Sjhb case VM_SUSPEND_HALT: 570268935Sjhb exit(2); 571270159Sgrehan case VM_SUSPEND_TRIPLEFAULT: 572270159Sgrehan exit(3); 573268935Sjhb default: 574268935Sjhb fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 575268935Sjhb exit(100); 576268935Sjhb } 577268935Sjhb return (0); /* NOTREACHED */ 578268935Sjhb} 579268935Sjhb 580221828Sgrehanstatic vmexit_handler_t handler[VM_EXITCODE_MAX] = { 581234761Sgrehan [VM_EXITCODE_INOUT] = vmexit_inout, 582268976Sjhb [VM_EXITCODE_INOUT_STR] = vmexit_inout, 583234761Sgrehan [VM_EXITCODE_VMX] = vmexit_vmx, 584276403Sneel [VM_EXITCODE_SVM] = vmexit_svm, 585234761Sgrehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 586284900Sneel [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 587234761Sgrehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 588234761Sgrehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 589234761Sgrehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 590256072Sneel [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 591240912Sneel [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 592270159Sgrehan [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 593270159Sgrehan [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 594221828Sgrehan}; 595221828Sgrehan 596221828Sgrehanstatic void 597284894Sneelvm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 598221828Sgrehan{ 599293412Saraujo int error, rc; 600253452Sgrehan enum vm_exitcode exitcode; 601270070Sgrehan cpuset_t active_cpus; 602221828Sgrehan 603268894Sjhb if (vcpumap[vcpu] != NULL) { 604246686Sneel error = pthread_setaffinity_np(pthread_self(), 605268894Sjhb sizeof(cpuset_t), vcpumap[vcpu]); 606221828Sgrehan assert(error == 0); 607221828Sgrehan } 608221828Sgrehan 609270070Sgrehan error = vm_active_cpus(ctx, &active_cpus); 610270070Sgrehan assert(CPU_ISSET(vcpu, &active_cpus)); 611270070Sgrehan 612284894Sneel error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 613284894Sneel assert(error == 0); 614284894Sneel 615221828Sgrehan while (1) { 616284894Sneel error = vm_run(ctx, vcpu, &vmexit[vcpu]); 617266393Sjhb if (error != 0) 618266393Sjhb break; 619221828Sgrehan 620253452Sgrehan exitcode = vmexit[vcpu].exitcode; 621253452Sgrehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 622253452Sgrehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 623253452Sgrehan exitcode); 624253452Sgrehan exit(1); 625253452Sgrehan } 626253452Sgrehan 627253452Sgrehan rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 628253452Sgrehan 629221828Sgrehan switch (rc) { 630221828Sgrehan case VMEXIT_CONTINUE: 631221828Sgrehan break; 632268953Sjhb case VMEXIT_ABORT: 633268953Sjhb abort(); 634221828Sgrehan default: 635221828Sgrehan exit(1); 636221828Sgrehan } 637221828Sgrehan } 638221828Sgrehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 639221828Sgrehan} 640221828Sgrehan 641245020Sneelstatic int 642245020Sneelnum_vcpus_allowed(struct vmctx *ctx) 643245020Sneel{ 644245020Sneel int tmp, error; 645221828Sgrehan 646245020Sneel error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 647245020Sneel 648245020Sneel /* 649245020Sneel * The guest is allowed to spinup more than one processor only if the 650245020Sneel * UNRESTRICTED_GUEST capability is available. 651245020Sneel */ 652245020Sneel if (error == 0) 653245020Sneel return (VM_MAXCPU); 654245020Sneel else 655245020Sneel return (1); 656245020Sneel} 657245020Sneel 658256869Sneelvoid 659256869Sneelfbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 660256869Sneel{ 661256869Sneel int err, tmp; 662256869Sneel 663256869Sneel if (fbsdrun_vmexit_on_hlt()) { 664256869Sneel err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 665256869Sneel if (err < 0) { 666256869Sneel fprintf(stderr, "VM exit on HLT not supported\n"); 667256869Sneel exit(1); 668256869Sneel } 669256869Sneel vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 670256869Sneel if (cpu == BSP) 671256869Sneel handler[VM_EXITCODE_HLT] = vmexit_hlt; 672256869Sneel } 673256869Sneel 674256869Sneel if (fbsdrun_vmexit_on_pause()) { 675256869Sneel /* 676256869Sneel * pause exit support required for this mode 677256869Sneel */ 678256869Sneel err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 679256869Sneel if (err < 0) { 680256869Sneel fprintf(stderr, 681256869Sneel "SMP mux requested, no pause support\n"); 682256869Sneel exit(1); 683256869Sneel } 684256869Sneel vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 685256869Sneel if (cpu == BSP) 686256869Sneel handler[VM_EXITCODE_PAUSE] = vmexit_pause; 687256869Sneel } 688256869Sneel 689267447Sjhb if (x2apic_mode) 690267447Sjhb err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 691267447Sjhb else 692256869Sneel err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 693256869Sneel 694256869Sneel if (err) { 695256869Sneel fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 696256869Sneel exit(1); 697256869Sneel } 698256869Sneel 699256869Sneel vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 700256869Sneel} 701256869Sneel 702295124Sgrehanstatic struct vmctx * 703295124Sgrehando_open(const char *vmname) 704295124Sgrehan{ 705295124Sgrehan struct vmctx *ctx; 706295124Sgrehan int error; 707295124Sgrehan bool reinit, romboot; 708295124Sgrehan 709295124Sgrehan reinit = romboot = false; 710295124Sgrehan 711295124Sgrehan if (lpc_bootrom()) 712295124Sgrehan romboot = true; 713295124Sgrehan 714295124Sgrehan error = vm_create(vmname); 715295124Sgrehan if (error) { 716295124Sgrehan if (errno == EEXIST) { 717295124Sgrehan if (romboot) { 718295124Sgrehan reinit = true; 719295124Sgrehan } else { 720295124Sgrehan /* 721295124Sgrehan * The virtual machine has been setup by the 722295124Sgrehan * userspace bootloader. 723295124Sgrehan */ 724295124Sgrehan } 725295124Sgrehan } else { 726295124Sgrehan perror("vm_create"); 727295124Sgrehan exit(1); 728295124Sgrehan } 729295124Sgrehan } else { 730295124Sgrehan if (!romboot) { 731295124Sgrehan /* 732295124Sgrehan * If the virtual machine was just created then a 733295124Sgrehan * bootrom must be configured to boot it. 734295124Sgrehan */ 735295124Sgrehan fprintf(stderr, "virtual machine cannot be booted\n"); 736295124Sgrehan exit(1); 737295124Sgrehan } 738295124Sgrehan } 739295124Sgrehan 740295124Sgrehan ctx = vm_open(vmname); 741295124Sgrehan if (ctx == NULL) { 742295124Sgrehan perror("vm_open"); 743295124Sgrehan exit(1); 744295124Sgrehan } 745295124Sgrehan 746295124Sgrehan if (reinit) { 747295124Sgrehan error = vm_reinit(ctx); 748295124Sgrehan if (error) { 749295124Sgrehan perror("vm_reinit"); 750295124Sgrehan exit(1); 751295124Sgrehan } 752295124Sgrehan } 753295124Sgrehan return (ctx); 754295124Sgrehan} 755295124Sgrehan 756221828Sgrehanint 757221828Sgrehanmain(int argc, char *argv[]) 758221828Sgrehan{ 759259301Sgrehan int c, error, gdb_port, err, bvmcons; 760295124Sgrehan int max_vcpus, mptgen, memflags; 761284894Sneel int rtc_localtime; 762221828Sgrehan struct vmctx *ctx; 763221828Sgrehan uint64_t rip; 764248477Sneel size_t memsize; 765295124Sgrehan char *optstr; 766221828Sgrehan 767242192Sneel bvmcons = 0; 768221828Sgrehan progname = basename(argv[0]); 769256156Sneel gdb_port = 0; 770221828Sgrehan guest_ncpus = 1; 771248477Sneel memsize = 256 * MB; 772268887Sjhb mptgen = 1; 773284894Sneel rtc_localtime = 1; 774295124Sgrehan memflags = 0; 775221828Sgrehan 776295124Sgrehan optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; 777295124Sgrehan while ((c = getopt(argc, argv, optstr)) != -1) { 778221828Sgrehan switch (c) { 779240943Sneel case 'a': 780267447Sjhb x2apic_mode = 0; 781240943Sneel break; 782243327Sgrehan case 'A': 783243327Sgrehan acpi = 1; 784243327Sgrehan break; 785242192Sneel case 'b': 786242192Sneel bvmcons = 1; 787242192Sneel break; 788221828Sgrehan case 'p': 789268894Sjhb if (pincpu_parse(optarg) != 0) { 790268894Sjhb errx(EX_USAGE, "invalid vcpu pinning " 791268894Sjhb "configuration '%s'", optarg); 792268894Sjhb } 793221828Sgrehan break; 794221828Sgrehan case 'c': 795221828Sgrehan guest_ncpus = atoi(optarg); 796221828Sgrehan break; 797268953Sjhb case 'C': 798295124Sgrehan memflags |= VM_MEM_F_INCORE; 799268953Sjhb break; 800221828Sgrehan case 'g': 801221828Sgrehan gdb_port = atoi(optarg); 802221828Sgrehan break; 803257396Sneel case 'l': 804257396Sneel if (lpc_device_parse(optarg) != 0) { 805257396Sneel errx(EX_USAGE, "invalid lpc device " 806257396Sneel "configuration '%s'", optarg); 807257396Sneel } 808257396Sneel break; 809221828Sgrehan case 's': 810267341Sjhb if (pci_parse_slot(optarg) != 0) 811249916Sneel exit(1); 812249916Sneel else 813249916Sneel break; 814295124Sgrehan case 'S': 815295124Sgrehan memflags |= VM_MEM_F_WIRED; 816295124Sgrehan break; 817221828Sgrehan case 'm': 818256176Sneel error = vm_parse_memsize(optarg, &memsize); 819256176Sneel if (error) 820256176Sneel errx(EX_USAGE, "invalid memsize '%s'", optarg); 821221828Sgrehan break; 822221828Sgrehan case 'H': 823221828Sgrehan guest_vmexit_on_hlt = 1; 824221828Sgrehan break; 825239043Sneel case 'I': 826259301Sgrehan /* 827259301Sgrehan * The "-I" option was used to add an ioapic to the 828259301Sgrehan * virtual machine. 829259301Sgrehan * 830259301Sgrehan * An ioapic is now provided unconditionally for each 831259301Sgrehan * virtual machine and this option is now deprecated. 832259301Sgrehan */ 833239043Sneel break; 834221828Sgrehan case 'P': 835221828Sgrehan guest_vmexit_on_pause = 1; 836221828Sgrehan break; 837222105Sgrehan case 'e': 838222105Sgrehan strictio = 1; 839222105Sgrehan break; 840284894Sneel case 'u': 841284894Sneel rtc_localtime = 0; 842284894Sneel break; 843267450Sjhb case 'U': 844267450Sjhb guest_uuid_str = optarg; 845267450Sjhb break; 846264273Sjhb case 'w': 847264273Sjhb strictmsr = 0; 848264273Sjhb break; 849256755Sgrehan case 'W': 850256755Sgrehan virtio_msix = 0; 851256755Sgrehan break; 852267447Sjhb case 'x': 853267447Sjhb x2apic_mode = 1; 854267447Sjhb break; 855268887Sjhb case 'Y': 856268887Sjhb mptgen = 0; 857268887Sjhb break; 858221828Sgrehan case 'h': 859221828Sgrehan usage(0); 860221828Sgrehan default: 861221828Sgrehan usage(1); 862221828Sgrehan } 863221828Sgrehan } 864221828Sgrehan argc -= optind; 865221828Sgrehan argv += optind; 866221828Sgrehan 867221828Sgrehan if (argc != 1) 868221828Sgrehan usage(1); 869221828Sgrehan 870221828Sgrehan vmname = argv[0]; 871295124Sgrehan ctx = do_open(vmname); 872221828Sgrehan 873284899Sneel if (guest_ncpus < 1) { 874284899Sneel fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 875284899Sneel exit(1); 876284899Sneel } 877284899Sneel 878245020Sneel max_vcpus = num_vcpus_allowed(ctx); 879245020Sneel if (guest_ncpus > max_vcpus) { 880245020Sneel fprintf(stderr, "%d vCPUs requested but only %d available\n", 881245020Sneel guest_ncpus, max_vcpus); 882245020Sneel exit(1); 883245020Sneel } 884245020Sneel 885256869Sneel fbsdrun_set_capabilities(ctx, BSP); 886221828Sgrehan 887295124Sgrehan vm_set_memflags(ctx, memflags); 888248477Sneel err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 889248477Sneel if (err) { 890295124Sgrehan fprintf(stderr, "Unable to setup memory (%d)\n", errno); 891248477Sneel exit(1); 892221828Sgrehan } 893221828Sgrehan 894276349Sneel error = init_msr(); 895276349Sneel if (error) { 896276349Sneel fprintf(stderr, "init_msr error %d", error); 897276349Sneel exit(1); 898276349Sneel } 899276349Sneel 900249343Sneel init_mem(); 901221828Sgrehan init_inout(); 902268972Sjhb pci_irq_init(ctx); 903267393Sjhb ioapic_init(ctx); 904252682Sgrehan 905284894Sneel rtc_init(ctx, rtc_localtime); 906268972Sjhb sci_init(ctx); 907253181Sgrehan 908252682Sgrehan /* 909252682Sgrehan * Exit if a device emulation finds an error in it's initilization 910252682Sgrehan */ 911252682Sgrehan if (init_pci(ctx) != 0) 912252682Sgrehan exit(1); 913252682Sgrehan 914221828Sgrehan if (gdb_port != 0) 915221828Sgrehan init_dbgport(gdb_port); 916221828Sgrehan 917242192Sneel if (bvmcons) 918242192Sneel init_bvmcons(); 919242192Sneel 920295124Sgrehan if (lpc_bootrom()) { 921295124Sgrehan if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 922295124Sgrehan fprintf(stderr, "ROM boot failed: unrestricted guest " 923295124Sgrehan "capability not available\n"); 924295124Sgrehan exit(1); 925295124Sgrehan } 926295124Sgrehan error = vcpu_reset(ctx, BSP); 927295124Sgrehan assert(error == 0); 928295124Sgrehan } 929295124Sgrehan 930221828Sgrehan error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 931221828Sgrehan assert(error == 0); 932221828Sgrehan 933221828Sgrehan /* 934221828Sgrehan * build the guest tables, MP etc. 935221828Sgrehan */ 936268887Sjhb if (mptgen) { 937268887Sjhb error = mptable_build(ctx, guest_ncpus); 938268887Sjhb if (error) 939268887Sjhb exit(1); 940268887Sjhb } 941221828Sgrehan 942267450Sjhb error = smbios_build(ctx); 943267450Sjhb assert(error == 0); 944267450Sjhb 945243327Sgrehan if (acpi) { 946259301Sgrehan error = acpi_build(ctx, guest_ncpus); 947243327Sgrehan assert(error == 0); 948243327Sgrehan } 949243327Sgrehan 950295124Sgrehan if (lpc_bootrom()) 951295124Sgrehan fwctl_init(); 952295124Sgrehan 953221828Sgrehan /* 954259301Sgrehan * Change the proc title to include the VM name. 955259301Sgrehan */ 956259301Sgrehan setproctitle("%s", vmname); 957259301Sgrehan 958259301Sgrehan /* 959221828Sgrehan * Add CPU 0 960221828Sgrehan */ 961268894Sjhb fbsdrun_addcpu(ctx, BSP, BSP, rip); 962221828Sgrehan 963221828Sgrehan /* 964221828Sgrehan * Head off to the main event dispatch loop 965221828Sgrehan */ 966221828Sgrehan mevent_dispatch(); 967221828Sgrehan 968221828Sgrehan exit(1); 969221828Sgrehan} 970