vmx.c revision 264619
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/10/sys/amd64/vmm/intel/vmx.c 264619 2014-04-17 18:00:07Z jhb $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/intel/vmx.c 264619 2014-04-17 18:00:07Z jhb $"); 31 32#include <sys/param.h> 33#include <sys/systm.h> 34#include <sys/smp.h> 35#include <sys/kernel.h> 36#include <sys/malloc.h> 37#include <sys/pcpu.h> 38#include <sys/proc.h> 39#include <sys/sysctl.h> 40 41#include <vm/vm.h> 42#include <vm/pmap.h> 43 44#include <machine/psl.h> 45#include <machine/cpufunc.h> 46#include <machine/md_var.h> 47#include <machine/segments.h> 48#include <machine/specialreg.h> 49#include <machine/vmparam.h> 50 51#include <machine/vmm.h> 52#include "vmm_host.h" 53#include "vmm_lapic.h" 54#include "vmm_msr.h" 55#include "vmm_ktr.h" 56#include "vmm_stat.h" 57 58#include "vmx_msr.h" 59#include "ept.h" 60#include "vmx_cpufunc.h" 61#include "vmx.h" 62#include "x86.h" 63#include "vmx_controls.h" 64 65#define PINBASED_CTLS_ONE_SETTING \ 66 (PINBASED_EXTINT_EXITING | \ 67 PINBASED_NMI_EXITING | \ 68 PINBASED_VIRTUAL_NMI) 69#define PINBASED_CTLS_ZERO_SETTING 0 70 71#define PROCBASED_CTLS_WINDOW_SETTING \ 72 (PROCBASED_INT_WINDOW_EXITING | \ 73 PROCBASED_NMI_WINDOW_EXITING) 74 75#define PROCBASED_CTLS_ONE_SETTING \ 76 (PROCBASED_SECONDARY_CONTROLS | \ 77 PROCBASED_IO_EXITING | \ 78 PROCBASED_MSR_BITMAPS | \ 79 PROCBASED_CTLS_WINDOW_SETTING) 80#define PROCBASED_CTLS_ZERO_SETTING \ 81 (PROCBASED_CR3_LOAD_EXITING | \ 82 PROCBASED_CR3_STORE_EXITING | \ 83 PROCBASED_IO_BITMAPS) 84 85#define PROCBASED_CTLS2_ONE_SETTING PROCBASED2_ENABLE_EPT 86#define PROCBASED_CTLS2_ZERO_SETTING 0 87 88#define VM_EXIT_CTLS_ONE_SETTING_NO_PAT \ 89 (VM_EXIT_HOST_LMA | \ 90 VM_EXIT_SAVE_EFER | \ 91 VM_EXIT_LOAD_EFER) 92 93#define VM_EXIT_CTLS_ONE_SETTING \ 94 (VM_EXIT_CTLS_ONE_SETTING_NO_PAT | \ 95 VM_EXIT_SAVE_PAT | \ 96 VM_EXIT_LOAD_PAT) 97#define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS 98 99#define VM_ENTRY_CTLS_ONE_SETTING_NO_PAT VM_ENTRY_LOAD_EFER 100 101#define VM_ENTRY_CTLS_ONE_SETTING \ 102 (VM_ENTRY_CTLS_ONE_SETTING_NO_PAT | \ 103 VM_ENTRY_LOAD_PAT) 104#define VM_ENTRY_CTLS_ZERO_SETTING \ 105 (VM_ENTRY_LOAD_DEBUG_CONTROLS | \ 106 VM_ENTRY_INTO_SMM | \ 107 VM_ENTRY_DEACTIVATE_DUAL_MONITOR) 108 109#define guest_msr_rw(vmx, msr) \ 110 msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW) 111 112#define HANDLED 1 113#define UNHANDLED 0 114 115MALLOC_DEFINE(M_VMX, "vmx", "vmx"); 116 117SYSCTL_DECL(_hw_vmm); 118SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL); 119 120int vmxon_enabled[MAXCPU]; 121static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); 122 123static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; 124static uint32_t exit_ctls, entry_ctls; 125 126static uint64_t cr0_ones_mask, cr0_zeros_mask; 127SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_ones_mask, CTLFLAG_RD, 128 &cr0_ones_mask, 0, NULL); 129SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr0_zeros_mask, CTLFLAG_RD, 130 &cr0_zeros_mask, 0, NULL); 131 132static uint64_t cr4_ones_mask, cr4_zeros_mask; 133SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_ones_mask, CTLFLAG_RD, 134 &cr4_ones_mask, 0, NULL); 135SYSCTL_ULONG(_hw_vmm_vmx, OID_AUTO, cr4_zeros_mask, CTLFLAG_RD, 136 &cr4_zeros_mask, 0, NULL); 137 138static int vmx_no_patmsr; 139 140static int vmx_initialized; 141SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD, 142 &vmx_initialized, 0, "Intel VMX initialized"); 143 144/* 145 * Virtual NMI blocking conditions. 146 * 147 * Some processor implementations also require NMI to be blocked if 148 * the STI_BLOCKING bit is set. It is possible to detect this at runtime 149 * based on the (exit_reason,exit_qual) tuple being set to 150 * (EXIT_REASON_INVAL_VMCS, EXIT_QUAL_NMI_WHILE_STI_BLOCKING). 151 * 152 * We take the easy way out and also include STI_BLOCKING as one of the 153 * gating items for vNMI injection. 154 */ 155static uint64_t nmi_blocking_bits = VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING | 156 VMCS_INTERRUPTIBILITY_NMI_BLOCKING | 157 VMCS_INTERRUPTIBILITY_STI_BLOCKING; 158 159/* 160 * Optional capabilities 161 */ 162static int cap_halt_exit; 163static int cap_pause_exit; 164static int cap_unrestricted_guest; 165static int cap_monitor_trap; 166static int cap_invpcid; 167 168static struct unrhdr *vpid_unr; 169static u_int vpid_alloc_failed; 170SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD, 171 &vpid_alloc_failed, 0, NULL); 172 173#ifdef KTR 174static const char * 175exit_reason_to_str(int reason) 176{ 177 static char reasonbuf[32]; 178 179 switch (reason) { 180 case EXIT_REASON_EXCEPTION: 181 return "exception"; 182 case EXIT_REASON_EXT_INTR: 183 return "extint"; 184 case EXIT_REASON_TRIPLE_FAULT: 185 return "triplefault"; 186 case EXIT_REASON_INIT: 187 return "init"; 188 case EXIT_REASON_SIPI: 189 return "sipi"; 190 case EXIT_REASON_IO_SMI: 191 return "iosmi"; 192 case EXIT_REASON_SMI: 193 return "smi"; 194 case EXIT_REASON_INTR_WINDOW: 195 return "intrwindow"; 196 case EXIT_REASON_NMI_WINDOW: 197 return "nmiwindow"; 198 case EXIT_REASON_TASK_SWITCH: 199 return "taskswitch"; 200 case EXIT_REASON_CPUID: 201 return "cpuid"; 202 case EXIT_REASON_GETSEC: 203 return "getsec"; 204 case EXIT_REASON_HLT: 205 return "hlt"; 206 case EXIT_REASON_INVD: 207 return "invd"; 208 case EXIT_REASON_INVLPG: 209 return "invlpg"; 210 case EXIT_REASON_RDPMC: 211 return "rdpmc"; 212 case EXIT_REASON_RDTSC: 213 return "rdtsc"; 214 case EXIT_REASON_RSM: 215 return "rsm"; 216 case EXIT_REASON_VMCALL: 217 return "vmcall"; 218 case EXIT_REASON_VMCLEAR: 219 return "vmclear"; 220 case EXIT_REASON_VMLAUNCH: 221 return "vmlaunch"; 222 case EXIT_REASON_VMPTRLD: 223 return "vmptrld"; 224 case EXIT_REASON_VMPTRST: 225 return "vmptrst"; 226 case EXIT_REASON_VMREAD: 227 return "vmread"; 228 case EXIT_REASON_VMRESUME: 229 return "vmresume"; 230 case EXIT_REASON_VMWRITE: 231 return "vmwrite"; 232 case EXIT_REASON_VMXOFF: 233 return "vmxoff"; 234 case EXIT_REASON_VMXON: 235 return "vmxon"; 236 case EXIT_REASON_CR_ACCESS: 237 return "craccess"; 238 case EXIT_REASON_DR_ACCESS: 239 return "draccess"; 240 case EXIT_REASON_INOUT: 241 return "inout"; 242 case EXIT_REASON_RDMSR: 243 return "rdmsr"; 244 case EXIT_REASON_WRMSR: 245 return "wrmsr"; 246 case EXIT_REASON_INVAL_VMCS: 247 return "invalvmcs"; 248 case EXIT_REASON_INVAL_MSR: 249 return "invalmsr"; 250 case EXIT_REASON_MWAIT: 251 return "mwait"; 252 case EXIT_REASON_MTF: 253 return "mtf"; 254 case EXIT_REASON_MONITOR: 255 return "monitor"; 256 case EXIT_REASON_PAUSE: 257 return "pause"; 258 case EXIT_REASON_MCE: 259 return "mce"; 260 case EXIT_REASON_TPR: 261 return "tpr"; 262 case EXIT_REASON_APIC: 263 return "apic"; 264 case EXIT_REASON_GDTR_IDTR: 265 return "gdtridtr"; 266 case EXIT_REASON_LDTR_TR: 267 return "ldtrtr"; 268 case EXIT_REASON_EPT_FAULT: 269 return "eptfault"; 270 case EXIT_REASON_EPT_MISCONFIG: 271 return "eptmisconfig"; 272 case EXIT_REASON_INVEPT: 273 return "invept"; 274 case EXIT_REASON_RDTSCP: 275 return "rdtscp"; 276 case EXIT_REASON_VMX_PREEMPT: 277 return "vmxpreempt"; 278 case EXIT_REASON_INVVPID: 279 return "invvpid"; 280 case EXIT_REASON_WBINVD: 281 return "wbinvd"; 282 case EXIT_REASON_XSETBV: 283 return "xsetbv"; 284 default: 285 snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason); 286 return (reasonbuf); 287 } 288} 289#endif /* KTR */ 290 291u_long 292vmx_fix_cr0(u_long cr0) 293{ 294 295 return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask); 296} 297 298u_long 299vmx_fix_cr4(u_long cr4) 300{ 301 302 return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask); 303} 304 305static void 306vpid_free(int vpid) 307{ 308 if (vpid < 0 || vpid > 0xffff) 309 panic("vpid_free: invalid vpid %d", vpid); 310 311 /* 312 * VPIDs [0,VM_MAXCPU] are special and are not allocated from 313 * the unit number allocator. 314 */ 315 316 if (vpid > VM_MAXCPU) 317 free_unr(vpid_unr, vpid); 318} 319 320static void 321vpid_alloc(uint16_t *vpid, int num) 322{ 323 int i, x; 324 325 if (num <= 0 || num > VM_MAXCPU) 326 panic("invalid number of vpids requested: %d", num); 327 328 /* 329 * If the "enable vpid" execution control is not enabled then the 330 * VPID is required to be 0 for all vcpus. 331 */ 332 if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) { 333 for (i = 0; i < num; i++) 334 vpid[i] = 0; 335 return; 336 } 337 338 /* 339 * Allocate a unique VPID for each vcpu from the unit number allocator. 340 */ 341 for (i = 0; i < num; i++) { 342 x = alloc_unr(vpid_unr); 343 if (x == -1) 344 break; 345 else 346 vpid[i] = x; 347 } 348 349 if (i < num) { 350 atomic_add_int(&vpid_alloc_failed, 1); 351 352 /* 353 * If the unit number allocator does not have enough unique 354 * VPIDs then we need to allocate from the [1,VM_MAXCPU] range. 355 * 356 * These VPIDs are not be unique across VMs but this does not 357 * affect correctness because the combined mappings are also 358 * tagged with the EP4TA which is unique for each VM. 359 * 360 * It is still sub-optimal because the invvpid will invalidate 361 * combined mappings for a particular VPID across all EP4TAs. 362 */ 363 while (i-- > 0) 364 vpid_free(vpid[i]); 365 366 for (i = 0; i < num; i++) 367 vpid[i] = i + 1; 368 } 369} 370 371static void 372vpid_init(void) 373{ 374 /* 375 * VPID 0 is required when the "enable VPID" execution control is 376 * disabled. 377 * 378 * VPIDs [1,VM_MAXCPU] are used as the "overflow namespace" when the 379 * unit number allocator does not have sufficient unique VPIDs to 380 * satisfy the allocation. 381 * 382 * The remaining VPIDs are managed by the unit number allocator. 383 */ 384 vpid_unr = new_unrhdr(VM_MAXCPU + 1, 0xffff, NULL); 385} 386 387static void 388msr_save_area_init(struct msr_entry *g_area, int *g_count) 389{ 390 int cnt; 391 392 static struct msr_entry guest_msrs[] = { 393 { MSR_KGSBASE, 0, 0 }, 394 }; 395 396 cnt = sizeof(guest_msrs) / sizeof(guest_msrs[0]); 397 if (cnt > GUEST_MSR_MAX_ENTRIES) 398 panic("guest msr save area overrun"); 399 bcopy(guest_msrs, g_area, sizeof(guest_msrs)); 400 *g_count = cnt; 401} 402 403static void 404vmx_disable(void *arg __unused) 405{ 406 struct invvpid_desc invvpid_desc = { 0 }; 407 struct invept_desc invept_desc = { 0 }; 408 409 if (vmxon_enabled[curcpu]) { 410 /* 411 * See sections 25.3.3.3 and 25.3.3.4 in Intel Vol 3b. 412 * 413 * VMXON or VMXOFF are not required to invalidate any TLB 414 * caching structures. This prevents potential retention of 415 * cached information in the TLB between distinct VMX episodes. 416 */ 417 invvpid(INVVPID_TYPE_ALL_CONTEXTS, invvpid_desc); 418 invept(INVEPT_TYPE_ALL_CONTEXTS, invept_desc); 419 vmxoff(); 420 } 421 load_cr4(rcr4() & ~CR4_VMXE); 422} 423 424static int 425vmx_cleanup(void) 426{ 427 428 if (vpid_unr != NULL) { 429 delete_unrhdr(vpid_unr); 430 vpid_unr = NULL; 431 } 432 433 smp_rendezvous(NULL, vmx_disable, NULL, NULL); 434 435 return (0); 436} 437 438static void 439vmx_enable(void *arg __unused) 440{ 441 int error; 442 443 load_cr4(rcr4() | CR4_VMXE); 444 445 *(uint32_t *)vmxon_region[curcpu] = vmx_revision(); 446 error = vmxon(vmxon_region[curcpu]); 447 if (error == 0) 448 vmxon_enabled[curcpu] = 1; 449} 450 451static void 452vmx_restore(void) 453{ 454 455 if (vmxon_enabled[curcpu]) 456 vmxon(vmxon_region[curcpu]); 457} 458 459static int 460vmx_init(void) 461{ 462 int error; 463 uint64_t fixed0, fixed1, feature_control; 464 uint32_t tmp; 465 466 /* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */ 467 if (!(cpu_feature2 & CPUID2_VMX)) { 468 printf("vmx_init: processor does not support VMX operation\n"); 469 return (ENXIO); 470 } 471 472 /* 473 * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits 474 * are set (bits 0 and 2 respectively). 475 */ 476 feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); 477 if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 || 478 (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) { 479 printf("vmx_init: VMX operation disabled by BIOS\n"); 480 return (ENXIO); 481 } 482 483 /* Check support for primary processor-based VM-execution controls */ 484 error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 485 MSR_VMX_TRUE_PROCBASED_CTLS, 486 PROCBASED_CTLS_ONE_SETTING, 487 PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls); 488 if (error) { 489 printf("vmx_init: processor does not support desired primary " 490 "processor-based controls\n"); 491 return (error); 492 } 493 494 /* Clear the processor-based ctl bits that are set on demand */ 495 procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING; 496 497 /* Check support for secondary processor-based VM-execution controls */ 498 error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 499 MSR_VMX_PROCBASED_CTLS2, 500 PROCBASED_CTLS2_ONE_SETTING, 501 PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2); 502 if (error) { 503 printf("vmx_init: processor does not support desired secondary " 504 "processor-based controls\n"); 505 return (error); 506 } 507 508 /* Check support for VPID */ 509 error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, 510 PROCBASED2_ENABLE_VPID, 0, &tmp); 511 if (error == 0) 512 procbased_ctls2 |= PROCBASED2_ENABLE_VPID; 513 514 /* Check support for pin-based VM-execution controls */ 515 error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 516 MSR_VMX_TRUE_PINBASED_CTLS, 517 PINBASED_CTLS_ONE_SETTING, 518 PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls); 519 if (error) { 520 printf("vmx_init: processor does not support desired " 521 "pin-based controls\n"); 522 return (error); 523 } 524 525 /* Check support for VM-exit controls */ 526 error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS, 527 VM_EXIT_CTLS_ONE_SETTING, 528 VM_EXIT_CTLS_ZERO_SETTING, 529 &exit_ctls); 530 if (error) { 531 /* Try again without the PAT MSR bits */ 532 error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, 533 MSR_VMX_TRUE_EXIT_CTLS, 534 VM_EXIT_CTLS_ONE_SETTING_NO_PAT, 535 VM_EXIT_CTLS_ZERO_SETTING, 536 &exit_ctls); 537 if (error) { 538 printf("vmx_init: processor does not support desired " 539 "exit controls\n"); 540 return (error); 541 } else { 542 if (bootverbose) 543 printf("vmm: PAT MSR access not supported\n"); 544 guest_msr_valid(MSR_PAT); 545 vmx_no_patmsr = 1; 546 } 547 } 548 549 /* Check support for VM-entry controls */ 550 if (!vmx_no_patmsr) { 551 error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, 552 MSR_VMX_TRUE_ENTRY_CTLS, 553 VM_ENTRY_CTLS_ONE_SETTING, 554 VM_ENTRY_CTLS_ZERO_SETTING, 555 &entry_ctls); 556 } else { 557 error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, 558 MSR_VMX_TRUE_ENTRY_CTLS, 559 VM_ENTRY_CTLS_ONE_SETTING_NO_PAT, 560 VM_ENTRY_CTLS_ZERO_SETTING, 561 &entry_ctls); 562 } 563 564 if (error) { 565 printf("vmx_init: processor does not support desired " 566 "entry controls\n"); 567 return (error); 568 } 569 570 /* 571 * Check support for optional features by testing them 572 * as individual bits 573 */ 574 cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 575 MSR_VMX_TRUE_PROCBASED_CTLS, 576 PROCBASED_HLT_EXITING, 0, 577 &tmp) == 0); 578 579 cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 580 MSR_VMX_PROCBASED_CTLS, 581 PROCBASED_MTF, 0, 582 &tmp) == 0); 583 584 cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 585 MSR_VMX_TRUE_PROCBASED_CTLS, 586 PROCBASED_PAUSE_EXITING, 0, 587 &tmp) == 0); 588 589 cap_unrestricted_guest = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 590 MSR_VMX_PROCBASED_CTLS2, 591 PROCBASED2_UNRESTRICTED_GUEST, 0, 592 &tmp) == 0); 593 594 cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 595 MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, 596 &tmp) == 0); 597 598 599 /* Initialize EPT */ 600 error = ept_init(); 601 if (error) { 602 printf("vmx_init: ept initialization failed (%d)\n", error); 603 return (error); 604 } 605 606 /* 607 * Stash the cr0 and cr4 bits that must be fixed to 0 or 1 608 */ 609 fixed0 = rdmsr(MSR_VMX_CR0_FIXED0); 610 fixed1 = rdmsr(MSR_VMX_CR0_FIXED1); 611 cr0_ones_mask = fixed0 & fixed1; 612 cr0_zeros_mask = ~fixed0 & ~fixed1; 613 614 /* 615 * CR0_PE and CR0_PG can be set to zero in VMX non-root operation 616 * if unrestricted guest execution is allowed. 617 */ 618 if (cap_unrestricted_guest) 619 cr0_ones_mask &= ~(CR0_PG | CR0_PE); 620 621 /* 622 * Do not allow the guest to set CR0_NW or CR0_CD. 623 */ 624 cr0_zeros_mask |= (CR0_NW | CR0_CD); 625 626 fixed0 = rdmsr(MSR_VMX_CR4_FIXED0); 627 fixed1 = rdmsr(MSR_VMX_CR4_FIXED1); 628 cr4_ones_mask = fixed0 & fixed1; 629 cr4_zeros_mask = ~fixed0 & ~fixed1; 630 631 vpid_init(); 632 633 /* enable VMX operation */ 634 smp_rendezvous(NULL, vmx_enable, NULL, NULL); 635 636 vmx_initialized = 1; 637 638 return (0); 639} 640 641static int 642vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial) 643{ 644 int error, mask_ident, shadow_ident; 645 uint64_t mask_value; 646 647 if (which != 0 && which != 4) 648 panic("vmx_setup_cr_shadow: unknown cr%d", which); 649 650 if (which == 0) { 651 mask_ident = VMCS_CR0_MASK; 652 mask_value = cr0_ones_mask | cr0_zeros_mask; 653 shadow_ident = VMCS_CR0_SHADOW; 654 } else { 655 mask_ident = VMCS_CR4_MASK; 656 mask_value = cr4_ones_mask | cr4_zeros_mask; 657 shadow_ident = VMCS_CR4_SHADOW; 658 } 659 660 error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value); 661 if (error) 662 return (error); 663 664 error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial); 665 if (error) 666 return (error); 667 668 return (0); 669} 670#define vmx_setup_cr0_shadow(vmcs,init) vmx_setup_cr_shadow(0, (vmcs), (init)) 671#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init)) 672 673static void * 674vmx_vminit(struct vm *vm, pmap_t pmap) 675{ 676 uint16_t vpid[VM_MAXCPU]; 677 int i, error, guest_msr_count; 678 struct vmx *vmx; 679 680 vmx = malloc(sizeof(struct vmx), M_VMX, M_WAITOK | M_ZERO); 681 if ((uintptr_t)vmx & PAGE_MASK) { 682 panic("malloc of struct vmx not aligned on %d byte boundary", 683 PAGE_SIZE); 684 } 685 vmx->vm = vm; 686 687 vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pml4)); 688 689 /* 690 * Clean up EPTP-tagged guest physical and combined mappings 691 * 692 * VMX transitions are not required to invalidate any guest physical 693 * mappings. So, it may be possible for stale guest physical mappings 694 * to be present in the processor TLBs. 695 * 696 * Combined mappings for this EP4TA are also invalidated for all VPIDs. 697 */ 698 ept_invalidate_mappings(vmx->eptp); 699 700 msr_bitmap_initialize(vmx->msr_bitmap); 701 702 /* 703 * It is safe to allow direct access to MSR_GSBASE and MSR_FSBASE. 704 * The guest FSBASE and GSBASE are saved and restored during 705 * vm-exit and vm-entry respectively. The host FSBASE and GSBASE are 706 * always restored from the vmcs host state area on vm-exit. 707 * 708 * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in 709 * how they are saved/restored so can be directly accessed by the 710 * guest. 711 * 712 * Guest KGSBASE is saved and restored in the guest MSR save area. 713 * Host KGSBASE is restored before returning to userland from the pcb. 714 * There will be a window of time when we are executing in the host 715 * kernel context with a value of KGSBASE from the guest. This is ok 716 * because the value of KGSBASE is inconsequential in kernel context. 717 * 718 * MSR_EFER is saved and restored in the guest VMCS area on a 719 * VM exit and entry respectively. It is also restored from the 720 * host VMCS area on a VM exit. 721 */ 722 if (guest_msr_rw(vmx, MSR_GSBASE) || 723 guest_msr_rw(vmx, MSR_FSBASE) || 724 guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) || 725 guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || 726 guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || 727 guest_msr_rw(vmx, MSR_KGSBASE) || 728 guest_msr_rw(vmx, MSR_EFER)) 729 panic("vmx_vminit: error setting guest msr access"); 730 731 /* 732 * MSR_PAT is saved and restored in the guest VMCS are on a VM exit 733 * and entry respectively. It is also restored from the host VMCS 734 * area on a VM exit. However, if running on a system with no 735 * MSR_PAT save/restore support, leave access disabled so accesses 736 * will be trapped. 737 */ 738 if (!vmx_no_patmsr && guest_msr_rw(vmx, MSR_PAT)) 739 panic("vmx_vminit: error setting guest pat msr access"); 740 741 vpid_alloc(vpid, VM_MAXCPU); 742 743 for (i = 0; i < VM_MAXCPU; i++) { 744 vmx->vmcs[i].identifier = vmx_revision(); 745 error = vmclear(&vmx->vmcs[i]); 746 if (error != 0) { 747 panic("vmx_vminit: vmclear error %d on vcpu %d\n", 748 error, i); 749 } 750 751 error = vmcs_set_defaults(&vmx->vmcs[i], 752 (u_long)vmx_exit_guest, 753 (u_long)&vmx->ctx[i], 754 vmx->eptp, 755 pinbased_ctls, 756 procbased_ctls, 757 procbased_ctls2, 758 exit_ctls, entry_ctls, 759 vtophys(vmx->msr_bitmap), 760 vpid[i]); 761 762 if (error != 0) 763 panic("vmx_vminit: vmcs_set_defaults error %d", error); 764 765 vmx->cap[i].set = 0; 766 vmx->cap[i].proc_ctls = procbased_ctls; 767 vmx->cap[i].proc_ctls2 = procbased_ctls2; 768 769 vmx->state[i].lastcpu = -1; 770 vmx->state[i].vpid = vpid[i]; 771 772 msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count); 773 774 error = vmcs_set_msr_save(&vmx->vmcs[i], 775 vtophys(vmx->guest_msrs[i]), 776 guest_msr_count); 777 if (error != 0) 778 panic("vmcs_set_msr_save error %d", error); 779 780 /* 781 * Set up the CR0/4 shadows, and init the read shadow 782 * to the power-on register value from the Intel Sys Arch. 783 * CR0 - 0x60000010 784 * CR4 - 0 785 */ 786 error = vmx_setup_cr0_shadow(&vmx->vmcs[i], 0x60000010); 787 if (error != 0) 788 panic("vmx_setup_cr0_shadow %d", error); 789 790 error = vmx_setup_cr4_shadow(&vmx->vmcs[i], 0); 791 if (error != 0) 792 panic("vmx_setup_cr4_shadow %d", error); 793 794 vmx->ctx[i].pmap = pmap; 795 vmx->ctx[i].eptp = vmx->eptp; 796 } 797 798 return (vmx); 799} 800 801static int 802vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) 803{ 804 int handled, func; 805 806 func = vmxctx->guest_rax; 807 808 handled = x86_emulate_cpuid(vm, vcpu, 809 (uint32_t*)(&vmxctx->guest_rax), 810 (uint32_t*)(&vmxctx->guest_rbx), 811 (uint32_t*)(&vmxctx->guest_rcx), 812 (uint32_t*)(&vmxctx->guest_rdx)); 813 return (handled); 814} 815 816static __inline void 817vmx_run_trace(struct vmx *vmx, int vcpu) 818{ 819#ifdef KTR 820 VCPU_CTR1(vmx->vm, vcpu, "Resume execution at %#lx", vmcs_guest_rip()); 821#endif 822} 823 824static __inline void 825vmx_exit_trace(struct vmx *vmx, int vcpu, uint64_t rip, uint32_t exit_reason, 826 int handled) 827{ 828#ifdef KTR 829 VCPU_CTR3(vmx->vm, vcpu, "%s %s vmexit at 0x%0lx", 830 handled ? "handled" : "unhandled", 831 exit_reason_to_str(exit_reason), rip); 832#endif 833} 834 835static __inline void 836vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip) 837{ 838#ifdef KTR 839 VCPU_CTR1(vmx->vm, vcpu, "astpending vmexit at 0x%0lx", rip); 840#endif 841} 842 843static void 844vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu) 845{ 846 int lastcpu; 847 struct vmxstate *vmxstate; 848 struct invvpid_desc invvpid_desc = { 0 }; 849 850 vmxstate = &vmx->state[vcpu]; 851 lastcpu = vmxstate->lastcpu; 852 vmxstate->lastcpu = curcpu; 853 854 if (lastcpu == curcpu) 855 return; 856 857 vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); 858 859 vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); 860 vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); 861 vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase()); 862 863 /* 864 * If we are using VPIDs then invalidate all mappings tagged with 'vpid' 865 * 866 * We do this because this vcpu was executing on a different host 867 * cpu when it last ran. We do not track whether it invalidated 868 * mappings associated with its 'vpid' during that run. So we must 869 * assume that the mappings associated with 'vpid' on 'curcpu' are 870 * stale and invalidate them. 871 * 872 * Note that we incur this penalty only when the scheduler chooses to 873 * move the thread associated with this vcpu between host cpus. 874 * 875 * Note also that this will invalidate mappings tagged with 'vpid' 876 * for "all" EP4TAs. 877 */ 878 if (vmxstate->vpid != 0) { 879 invvpid_desc.vpid = vmxstate->vpid; 880 invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc); 881 } 882} 883 884/* 885 * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set. 886 */ 887CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0); 888 889static void __inline 890vmx_set_int_window_exiting(struct vmx *vmx, int vcpu) 891{ 892 893 vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING; 894 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 895} 896 897static void __inline 898vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu) 899{ 900 901 vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; 902 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 903} 904 905static void __inline 906vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu) 907{ 908 909 vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING; 910 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 911} 912 913static void __inline 914vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) 915{ 916 917 vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; 918 vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 919} 920 921static int 922vmx_inject_nmi(struct vmx *vmx, int vcpu) 923{ 924 uint64_t info, interruptibility; 925 926 /* Bail out if no NMI requested */ 927 if (!vm_nmi_pending(vmx->vm, vcpu)) 928 return (0); 929 930 interruptibility = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 931 if (interruptibility & nmi_blocking_bits) 932 goto nmiblocked; 933 934 /* 935 * Inject the virtual NMI. The vector must be the NMI IDT entry 936 * or the VMCS entry check will fail. 937 */ 938 info = VMCS_INTERRUPTION_INFO_NMI | VMCS_INTERRUPTION_INFO_VALID; 939 info |= IDT_NMI; 940 vmcs_write(VMCS_ENTRY_INTR_INFO, info); 941 942 VCPU_CTR0(vmx->vm, vcpu, "Injecting vNMI"); 943 944 /* Clear the request */ 945 vm_nmi_clear(vmx->vm, vcpu); 946 return (1); 947 948nmiblocked: 949 /* 950 * Set the NMI Window Exiting execution control so we can inject 951 * the virtual NMI as soon as blocking condition goes away. 952 */ 953 vmx_set_nmi_window_exiting(vmx, vcpu); 954 955 VCPU_CTR0(vmx->vm, vcpu, "Enabling NMI window exiting"); 956 return (1); 957} 958 959static void 960vmx_inject_interrupts(struct vmx *vmx, int vcpu) 961{ 962 int vector; 963 uint64_t info, rflags, interruptibility; 964 965 const int HWINTR_BLOCKED = VMCS_INTERRUPTIBILITY_STI_BLOCKING | 966 VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING; 967 968 /* 969 * If there is already an interrupt pending then just return. 970 * 971 * This could happen if an interrupt was injected on a prior 972 * VM entry but the actual entry into guest mode was aborted 973 * because of a pending AST. 974 */ 975 info = vmcs_read(VMCS_ENTRY_INTR_INFO); 976 if (info & VMCS_INTERRUPTION_INFO_VALID) 977 return; 978 979 /* 980 * NMI injection has priority so deal with those first 981 */ 982 if (vmx_inject_nmi(vmx, vcpu)) 983 return; 984 985 /* Ask the local apic for a vector to inject */ 986 vector = lapic_pending_intr(vmx->vm, vcpu); 987 if (vector < 0) 988 return; 989 990 if (vector < 32 || vector > 255) 991 panic("vmx_inject_interrupts: invalid vector %d\n", vector); 992 993 /* Check RFLAGS.IF and the interruptibility state of the guest */ 994 rflags = vmcs_read(VMCS_GUEST_RFLAGS); 995 if ((rflags & PSL_I) == 0) 996 goto cantinject; 997 998 interruptibility = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 999 if (interruptibility & HWINTR_BLOCKED) 1000 goto cantinject; 1001 1002 /* Inject the interrupt */ 1003 info = VMCS_INTERRUPTION_INFO_HW_INTR | VMCS_INTERRUPTION_INFO_VALID; 1004 info |= vector; 1005 vmcs_write(VMCS_ENTRY_INTR_INFO, info); 1006 1007 /* Update the Local APIC ISR */ 1008 lapic_intr_accepted(vmx->vm, vcpu, vector); 1009 1010 VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector); 1011 1012 return; 1013 1014cantinject: 1015 /* 1016 * Set the Interrupt Window Exiting execution control so we can inject 1017 * the interrupt as soon as blocking condition goes away. 1018 */ 1019 vmx_set_int_window_exiting(vmx, vcpu); 1020 1021 VCPU_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting"); 1022} 1023 1024static int 1025vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1026{ 1027 int cr, vmcs_guest_cr, vmcs_shadow_cr; 1028 uint64_t crval, regval, ones_mask, zeros_mask; 1029 const struct vmxctx *vmxctx; 1030 1031 /* We only handle mov to %cr0 or %cr4 at this time */ 1032 if ((exitqual & 0xf0) != 0x00) 1033 return (UNHANDLED); 1034 1035 cr = exitqual & 0xf; 1036 if (cr != 0 && cr != 4) 1037 return (UNHANDLED); 1038 1039 vmxctx = &vmx->ctx[vcpu]; 1040 1041 /* 1042 * We must use vmcs_write() directly here because vmcs_setreg() will 1043 * call vmclear(vmcs) as a side-effect which we certainly don't want. 1044 */ 1045 switch ((exitqual >> 8) & 0xf) { 1046 case 0: 1047 regval = vmxctx->guest_rax; 1048 break; 1049 case 1: 1050 regval = vmxctx->guest_rcx; 1051 break; 1052 case 2: 1053 regval = vmxctx->guest_rdx; 1054 break; 1055 case 3: 1056 regval = vmxctx->guest_rbx; 1057 break; 1058 case 4: 1059 regval = vmcs_read(VMCS_GUEST_RSP); 1060 break; 1061 case 5: 1062 regval = vmxctx->guest_rbp; 1063 break; 1064 case 6: 1065 regval = vmxctx->guest_rsi; 1066 break; 1067 case 7: 1068 regval = vmxctx->guest_rdi; 1069 break; 1070 case 8: 1071 regval = vmxctx->guest_r8; 1072 break; 1073 case 9: 1074 regval = vmxctx->guest_r9; 1075 break; 1076 case 10: 1077 regval = vmxctx->guest_r10; 1078 break; 1079 case 11: 1080 regval = vmxctx->guest_r11; 1081 break; 1082 case 12: 1083 regval = vmxctx->guest_r12; 1084 break; 1085 case 13: 1086 regval = vmxctx->guest_r13; 1087 break; 1088 case 14: 1089 regval = vmxctx->guest_r14; 1090 break; 1091 case 15: 1092 regval = vmxctx->guest_r15; 1093 break; 1094 } 1095 1096 if (cr == 0) { 1097 ones_mask = cr0_ones_mask; 1098 zeros_mask = cr0_zeros_mask; 1099 vmcs_guest_cr = VMCS_GUEST_CR0; 1100 vmcs_shadow_cr = VMCS_CR0_SHADOW; 1101 } else { 1102 ones_mask = cr4_ones_mask; 1103 zeros_mask = cr4_zeros_mask; 1104 vmcs_guest_cr = VMCS_GUEST_CR4; 1105 vmcs_shadow_cr = VMCS_CR4_SHADOW; 1106 } 1107 vmcs_write(vmcs_shadow_cr, regval); 1108 1109 crval = regval | ones_mask; 1110 crval &= ~zeros_mask; 1111 vmcs_write(vmcs_guest_cr, crval); 1112 1113 if (cr == 0 && regval & CR0_PG) { 1114 uint64_t efer, entry_ctls; 1115 1116 /* 1117 * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and 1118 * the "IA-32e mode guest" bit in VM-entry control must be 1119 * equal. 1120 */ 1121 efer = vmcs_read(VMCS_GUEST_IA32_EFER); 1122 if (efer & EFER_LME) { 1123 efer |= EFER_LMA; 1124 vmcs_write(VMCS_GUEST_IA32_EFER, efer); 1125 entry_ctls = vmcs_read(VMCS_ENTRY_CTLS); 1126 entry_ctls |= VM_ENTRY_GUEST_LMA; 1127 vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); 1128 } 1129 } 1130 1131 return (HANDLED); 1132} 1133 1134static int 1135ept_fault_type(uint64_t ept_qual) 1136{ 1137 int fault_type; 1138 1139 if (ept_qual & EPT_VIOLATION_DATA_WRITE) 1140 fault_type = VM_PROT_WRITE; 1141 else if (ept_qual & EPT_VIOLATION_INST_FETCH) 1142 fault_type = VM_PROT_EXECUTE; 1143 else 1144 fault_type= VM_PROT_READ; 1145 1146 return (fault_type); 1147} 1148 1149static boolean_t 1150ept_emulation_fault(uint64_t ept_qual) 1151{ 1152 int read, write; 1153 1154 /* EPT fault on an instruction fetch doesn't make sense here */ 1155 if (ept_qual & EPT_VIOLATION_INST_FETCH) 1156 return (FALSE); 1157 1158 /* EPT fault must be a read fault or a write fault */ 1159 read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 1160 write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 1161 if ((read | write) == 0) 1162 return (FALSE); 1163 1164 /* 1165 * The EPT violation must have been caused by accessing a 1166 * guest-physical address that is a translation of a guest-linear 1167 * address. 1168 */ 1169 if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 1170 (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 1171 return (FALSE); 1172 } 1173 1174 return (TRUE); 1175} 1176 1177static int 1178vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 1179{ 1180 int error, handled; 1181 struct vmxctx *vmxctx; 1182 uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason; 1183 uint64_t qual, gpa; 1184 bool retu; 1185 1186 handled = 0; 1187 vmxctx = &vmx->ctx[vcpu]; 1188 1189 qual = vmexit->u.vmx.exit_qualification; 1190 reason = vmexit->u.vmx.exit_reason; 1191 vmexit->exitcode = VM_EXITCODE_BOGUS; 1192 1193 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1); 1194 1195 /* 1196 * VM exits that could be triggered during event injection on the 1197 * previous VM entry need to be handled specially by re-injecting 1198 * the event. 1199 * 1200 * See "Information for VM Exits During Event Delivery" in Intel SDM 1201 * for details. 1202 */ 1203 switch (reason) { 1204 case EXIT_REASON_EPT_FAULT: 1205 case EXIT_REASON_EPT_MISCONFIG: 1206 case EXIT_REASON_APIC: 1207 case EXIT_REASON_TASK_SWITCH: 1208 case EXIT_REASON_EXCEPTION: 1209 idtvec_info = vmcs_idt_vectoring_info(); 1210 if (idtvec_info & VMCS_IDT_VEC_VALID) { 1211 idtvec_info &= ~(1 << 12); /* clear undefined bit */ 1212 vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info); 1213 if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { 1214 idtvec_err = vmcs_idt_vectoring_err(); 1215 vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, 1216 idtvec_err); 1217 } 1218 vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); 1219 } 1220 default: 1221 break; 1222 } 1223 1224 switch (reason) { 1225 case EXIT_REASON_CR_ACCESS: 1226 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1); 1227 handled = vmx_emulate_cr_access(vmx, vcpu, qual); 1228 break; 1229 case EXIT_REASON_RDMSR: 1230 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1); 1231 retu = false; 1232 ecx = vmxctx->guest_rcx; 1233 error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu); 1234 if (error) { 1235 vmexit->exitcode = VM_EXITCODE_RDMSR; 1236 vmexit->u.msr.code = ecx; 1237 } else if (!retu) { 1238 handled = 1; 1239 } else { 1240 /* Return to userspace with a valid exitcode */ 1241 KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, 1242 ("emulate_wrmsr retu with bogus exitcode")); 1243 } 1244 break; 1245 case EXIT_REASON_WRMSR: 1246 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1); 1247 retu = false; 1248 eax = vmxctx->guest_rax; 1249 ecx = vmxctx->guest_rcx; 1250 edx = vmxctx->guest_rdx; 1251 error = emulate_wrmsr(vmx->vm, vcpu, ecx, 1252 (uint64_t)edx << 32 | eax, &retu); 1253 if (error) { 1254 vmexit->exitcode = VM_EXITCODE_WRMSR; 1255 vmexit->u.msr.code = ecx; 1256 vmexit->u.msr.wval = (uint64_t)edx << 32 | eax; 1257 } else if (!retu) { 1258 handled = 1; 1259 } else { 1260 /* Return to userspace with a valid exitcode */ 1261 KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, 1262 ("emulate_wrmsr retu with bogus exitcode")); 1263 } 1264 break; 1265 case EXIT_REASON_HLT: 1266 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1); 1267 vmexit->exitcode = VM_EXITCODE_HLT; 1268 vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS); 1269 break; 1270 case EXIT_REASON_MTF: 1271 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1); 1272 vmexit->exitcode = VM_EXITCODE_MTRAP; 1273 break; 1274 case EXIT_REASON_PAUSE: 1275 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1); 1276 vmexit->exitcode = VM_EXITCODE_PAUSE; 1277 break; 1278 case EXIT_REASON_INTR_WINDOW: 1279 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1); 1280 vmx_clear_int_window_exiting(vmx, vcpu); 1281 VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); 1282 return (1); 1283 case EXIT_REASON_EXT_INTR: 1284 /* 1285 * External interrupts serve only to cause VM exits and allow 1286 * the host interrupt handler to run. 1287 * 1288 * If this external interrupt triggers a virtual interrupt 1289 * to a VM, then that state will be recorded by the 1290 * host interrupt handler in the VM's softc. We will inject 1291 * this virtual interrupt during the subsequent VM enter. 1292 */ 1293 1294 /* 1295 * This is special. We want to treat this as an 'handled' 1296 * VM-exit but not increment the instruction pointer. 1297 */ 1298 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1); 1299 return (1); 1300 case EXIT_REASON_NMI_WINDOW: 1301 /* Exit to allow the pending virtual NMI to be injected */ 1302 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1); 1303 vmx_clear_nmi_window_exiting(vmx, vcpu); 1304 VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); 1305 return (1); 1306 case EXIT_REASON_INOUT: 1307 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); 1308 vmexit->exitcode = VM_EXITCODE_INOUT; 1309 vmexit->u.inout.bytes = (qual & 0x7) + 1; 1310 vmexit->u.inout.in = (qual & 0x8) ? 1 : 0; 1311 vmexit->u.inout.string = (qual & 0x10) ? 1 : 0; 1312 vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0; 1313 vmexit->u.inout.port = (uint16_t)(qual >> 16); 1314 vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax); 1315 break; 1316 case EXIT_REASON_CPUID: 1317 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1); 1318 handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx); 1319 break; 1320 case EXIT_REASON_EPT_FAULT: 1321 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EPT_FAULT, 1); 1322 /* 1323 * If 'gpa' lies within the address space allocated to 1324 * memory then this must be a nested page fault otherwise 1325 * this must be an instruction that accesses MMIO space. 1326 */ 1327 gpa = vmcs_gpa(); 1328 if (vm_mem_allocated(vmx->vm, gpa)) { 1329 vmexit->exitcode = VM_EXITCODE_PAGING; 1330 vmexit->u.paging.gpa = gpa; 1331 vmexit->u.paging.fault_type = ept_fault_type(qual); 1332 } else if (ept_emulation_fault(qual)) { 1333 vmexit->exitcode = VM_EXITCODE_INST_EMUL; 1334 vmexit->u.inst_emul.gpa = gpa; 1335 vmexit->u.inst_emul.gla = vmcs_gla(); 1336 vmexit->u.inst_emul.cr3 = vmcs_guest_cr3(); 1337 } 1338 break; 1339 default: 1340 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); 1341 break; 1342 } 1343 1344 if (handled) { 1345 /* 1346 * It is possible that control is returned to userland 1347 * even though we were able to handle the VM exit in the 1348 * kernel. 1349 * 1350 * In such a case we want to make sure that the userland 1351 * restarts guest execution at the instruction *after* 1352 * the one we just processed. Therefore we update the 1353 * guest rip in the VMCS and in 'vmexit'. 1354 */ 1355 vmexit->rip += vmexit->inst_length; 1356 vmexit->inst_length = 0; 1357 vmcs_write(VMCS_GUEST_RIP, vmexit->rip); 1358 } else { 1359 if (vmexit->exitcode == VM_EXITCODE_BOGUS) { 1360 /* 1361 * If this VM exit was not claimed by anybody then 1362 * treat it as a generic VMX exit. 1363 */ 1364 vmexit->exitcode = VM_EXITCODE_VMX; 1365 vmexit->u.vmx.status = VM_SUCCESS; 1366 } else { 1367 /* 1368 * The exitcode and collateral have been populated. 1369 * The VM exit will be processed further in userland. 1370 */ 1371 } 1372 } 1373 return (handled); 1374} 1375 1376static __inline int 1377vmx_exit_astpending(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 1378{ 1379 1380 vmexit->rip = vmcs_guest_rip(); 1381 vmexit->inst_length = 0; 1382 vmexit->exitcode = VM_EXITCODE_BOGUS; 1383 vmx_astpending_trace(vmx, vcpu, vmexit->rip); 1384 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_ASTPENDING, 1); 1385 1386 return (HANDLED); 1387} 1388 1389static __inline int 1390vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit) 1391{ 1392 1393 KASSERT(vmxctx->inst_fail_status != VM_SUCCESS, 1394 ("vmx_exit_inst_error: invalid inst_fail_status %d", 1395 vmxctx->inst_fail_status)); 1396 1397 vmexit->inst_length = 0; 1398 vmexit->exitcode = VM_EXITCODE_VMX; 1399 vmexit->u.vmx.status = vmxctx->inst_fail_status; 1400 vmexit->u.vmx.inst_error = vmcs_instruction_error(); 1401 vmexit->u.vmx.exit_reason = ~0; 1402 vmexit->u.vmx.exit_qualification = ~0; 1403 1404 switch (rc) { 1405 case VMX_VMRESUME_ERROR: 1406 case VMX_VMLAUNCH_ERROR: 1407 case VMX_INVEPT_ERROR: 1408 vmexit->u.vmx.inst_type = rc; 1409 break; 1410 default: 1411 panic("vm_exit_inst_error: vmx_enter_guest returned %d", rc); 1412 } 1413 1414 return (UNHANDLED); 1415} 1416 1417static int 1418vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap) 1419{ 1420 int rc, handled, launched; 1421 struct vmx *vmx; 1422 struct vmxctx *vmxctx; 1423 struct vmcs *vmcs; 1424 struct vm_exit *vmexit; 1425 uint64_t rip; 1426 uint32_t exit_reason; 1427 1428 vmx = arg; 1429 vmcs = &vmx->vmcs[vcpu]; 1430 vmxctx = &vmx->ctx[vcpu]; 1431 vmexit = vm_exitinfo(vmx->vm, vcpu); 1432 launched = 0; 1433 1434 KASSERT(vmxctx->pmap == pmap, 1435 ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap)); 1436 KASSERT(vmxctx->eptp == vmx->eptp, 1437 ("eptp %p different than ctx eptp %#lx", eptp, vmxctx->eptp)); 1438 1439 VMPTRLD(vmcs); 1440 1441 /* 1442 * XXX 1443 * We do this every time because we may setup the virtual machine 1444 * from a different process than the one that actually runs it. 1445 * 1446 * If the life of a virtual machine was spent entirely in the context 1447 * of a single process we could do this once in vmcs_set_defaults(). 1448 */ 1449 vmcs_write(VMCS_HOST_CR3, rcr3()); 1450 1451 vmcs_write(VMCS_GUEST_RIP, startrip); 1452 vmx_set_pcpu_defaults(vmx, vcpu); 1453 do { 1454 /* 1455 * Interrupts are disabled from this point on until the 1456 * guest starts executing. This is done for the following 1457 * reasons: 1458 * 1459 * If an AST is asserted on this thread after the check below, 1460 * then the IPI_AST notification will not be lost, because it 1461 * will cause a VM exit due to external interrupt as soon as 1462 * the guest state is loaded. 1463 * 1464 * A posted interrupt after 'vmx_inject_interrupts()' will 1465 * not be "lost" because it will be held pending in the host 1466 * APIC because interrupts are disabled. The pending interrupt 1467 * will be recognized as soon as the guest state is loaded. 1468 * 1469 * The same reasoning applies to the IPI generated by 1470 * pmap_invalidate_ept(). 1471 */ 1472 disable_intr(); 1473 if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) { 1474 enable_intr(); 1475 handled = vmx_exit_astpending(vmx, vcpu, vmexit); 1476 break; 1477 } 1478 1479 vmx_inject_interrupts(vmx, vcpu); 1480 vmx_run_trace(vmx, vcpu); 1481 rc = vmx_enter_guest(vmxctx, launched); 1482 1483 enable_intr(); 1484 1485 /* Collect some information for VM exit processing */ 1486 vmexit->rip = rip = vmcs_guest_rip(); 1487 vmexit->inst_length = vmexit_instruction_length(); 1488 vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason(); 1489 vmexit->u.vmx.exit_qualification = vmcs_exit_qualification(); 1490 1491 if (rc == VMX_GUEST_VMEXIT) { 1492 launched = 1; 1493 handled = vmx_exit_process(vmx, vcpu, vmexit); 1494 } else { 1495 handled = vmx_exit_inst_error(vmxctx, rc, vmexit); 1496 } 1497 1498 vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled); 1499 } while (handled); 1500 1501 /* 1502 * If a VM exit has been handled then the exitcode must be BOGUS 1503 * If a VM exit is not handled then the exitcode must not be BOGUS 1504 */ 1505 if ((handled && vmexit->exitcode != VM_EXITCODE_BOGUS) || 1506 (!handled && vmexit->exitcode == VM_EXITCODE_BOGUS)) { 1507 panic("Mismatch between handled (%d) and exitcode (%d)", 1508 handled, vmexit->exitcode); 1509 } 1510 1511 if (!handled) 1512 vmm_stat_incr(vmx->vm, vcpu, VMEXIT_USERSPACE, 1); 1513 1514 VCPU_CTR1(vmx->vm, vcpu, "returning from vmx_run: exitcode %d", 1515 vmexit->exitcode); 1516 1517 VMCLEAR(vmcs); 1518 return (0); 1519} 1520 1521static void 1522vmx_vmcleanup(void *arg) 1523{ 1524 int i, error; 1525 struct vmx *vmx = arg; 1526 1527 for (i = 0; i < VM_MAXCPU; i++) 1528 vpid_free(vmx->state[i].vpid); 1529 1530 /* 1531 * XXXSMP we also need to clear the VMCS active on the other vcpus. 1532 */ 1533 error = vmclear(&vmx->vmcs[0]); 1534 if (error != 0) 1535 panic("vmx_vmcleanup: vmclear error %d on vcpu 0", error); 1536 1537 free(vmx, M_VMX); 1538 1539 return; 1540} 1541 1542static register_t * 1543vmxctx_regptr(struct vmxctx *vmxctx, int reg) 1544{ 1545 1546 switch (reg) { 1547 case VM_REG_GUEST_RAX: 1548 return (&vmxctx->guest_rax); 1549 case VM_REG_GUEST_RBX: 1550 return (&vmxctx->guest_rbx); 1551 case VM_REG_GUEST_RCX: 1552 return (&vmxctx->guest_rcx); 1553 case VM_REG_GUEST_RDX: 1554 return (&vmxctx->guest_rdx); 1555 case VM_REG_GUEST_RSI: 1556 return (&vmxctx->guest_rsi); 1557 case VM_REG_GUEST_RDI: 1558 return (&vmxctx->guest_rdi); 1559 case VM_REG_GUEST_RBP: 1560 return (&vmxctx->guest_rbp); 1561 case VM_REG_GUEST_R8: 1562 return (&vmxctx->guest_r8); 1563 case VM_REG_GUEST_R9: 1564 return (&vmxctx->guest_r9); 1565 case VM_REG_GUEST_R10: 1566 return (&vmxctx->guest_r10); 1567 case VM_REG_GUEST_R11: 1568 return (&vmxctx->guest_r11); 1569 case VM_REG_GUEST_R12: 1570 return (&vmxctx->guest_r12); 1571 case VM_REG_GUEST_R13: 1572 return (&vmxctx->guest_r13); 1573 case VM_REG_GUEST_R14: 1574 return (&vmxctx->guest_r14); 1575 case VM_REG_GUEST_R15: 1576 return (&vmxctx->guest_r15); 1577 default: 1578 break; 1579 } 1580 return (NULL); 1581} 1582 1583static int 1584vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval) 1585{ 1586 register_t *regp; 1587 1588 if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { 1589 *retval = *regp; 1590 return (0); 1591 } else 1592 return (EINVAL); 1593} 1594 1595static int 1596vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val) 1597{ 1598 register_t *regp; 1599 1600 if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { 1601 *regp = val; 1602 return (0); 1603 } else 1604 return (EINVAL); 1605} 1606 1607static int 1608vmx_shadow_reg(int reg) 1609{ 1610 int shreg; 1611 1612 shreg = -1; 1613 1614 switch (reg) { 1615 case VM_REG_GUEST_CR0: 1616 shreg = VMCS_CR0_SHADOW; 1617 break; 1618 case VM_REG_GUEST_CR4: 1619 shreg = VMCS_CR4_SHADOW; 1620 break; 1621 default: 1622 break; 1623 } 1624 1625 return (shreg); 1626} 1627 1628static int 1629vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) 1630{ 1631 int running, hostcpu; 1632 struct vmx *vmx = arg; 1633 1634 running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 1635 if (running && hostcpu != curcpu) 1636 panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); 1637 1638 if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0) 1639 return (0); 1640 1641 return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval)); 1642} 1643 1644static int 1645vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) 1646{ 1647 int error, hostcpu, running, shadow; 1648 uint64_t ctls; 1649 struct vmx *vmx = arg; 1650 1651 running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 1652 if (running && hostcpu != curcpu) 1653 panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); 1654 1655 if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0) 1656 return (0); 1657 1658 error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val); 1659 1660 if (error == 0) { 1661 /* 1662 * If the "load EFER" VM-entry control is 1 then the 1663 * value of EFER.LMA must be identical to "IA-32e mode guest" 1664 * bit in the VM-entry control. 1665 */ 1666 if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 && 1667 (reg == VM_REG_GUEST_EFER)) { 1668 vmcs_getreg(&vmx->vmcs[vcpu], running, 1669 VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls); 1670 if (val & EFER_LMA) 1671 ctls |= VM_ENTRY_GUEST_LMA; 1672 else 1673 ctls &= ~VM_ENTRY_GUEST_LMA; 1674 vmcs_setreg(&vmx->vmcs[vcpu], running, 1675 VMCS_IDENT(VMCS_ENTRY_CTLS), ctls); 1676 } 1677 1678 shadow = vmx_shadow_reg(reg); 1679 if (shadow > 0) { 1680 /* 1681 * Store the unmodified value in the shadow 1682 */ 1683 error = vmcs_setreg(&vmx->vmcs[vcpu], running, 1684 VMCS_IDENT(shadow), val); 1685 } 1686 } 1687 1688 return (error); 1689} 1690 1691static int 1692vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) 1693{ 1694 struct vmx *vmx = arg; 1695 1696 return (vmcs_getdesc(&vmx->vmcs[vcpu], reg, desc)); 1697} 1698 1699static int 1700vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) 1701{ 1702 struct vmx *vmx = arg; 1703 1704 return (vmcs_setdesc(&vmx->vmcs[vcpu], reg, desc)); 1705} 1706 1707static int 1708vmx_inject(void *arg, int vcpu, int type, int vector, uint32_t code, 1709 int code_valid) 1710{ 1711 int error; 1712 uint64_t info; 1713 struct vmx *vmx = arg; 1714 struct vmcs *vmcs = &vmx->vmcs[vcpu]; 1715 1716 static uint32_t type_map[VM_EVENT_MAX] = { 1717 0x1, /* VM_EVENT_NONE */ 1718 0x0, /* VM_HW_INTR */ 1719 0x2, /* VM_NMI */ 1720 0x3, /* VM_HW_EXCEPTION */ 1721 0x4, /* VM_SW_INTR */ 1722 0x5, /* VM_PRIV_SW_EXCEPTION */ 1723 0x6, /* VM_SW_EXCEPTION */ 1724 }; 1725 1726 /* 1727 * If there is already an exception pending to be delivered to the 1728 * vcpu then just return. 1729 */ 1730 error = vmcs_getreg(vmcs, 0, VMCS_IDENT(VMCS_ENTRY_INTR_INFO), &info); 1731 if (error) 1732 return (error); 1733 1734 if (info & VMCS_INTERRUPTION_INFO_VALID) 1735 return (EAGAIN); 1736 1737 info = vector | (type_map[type] << 8) | (code_valid ? 1 << 11 : 0); 1738 info |= VMCS_INTERRUPTION_INFO_VALID; 1739 error = vmcs_setreg(vmcs, 0, VMCS_IDENT(VMCS_ENTRY_INTR_INFO), info); 1740 if (error != 0) 1741 return (error); 1742 1743 if (code_valid) { 1744 error = vmcs_setreg(vmcs, 0, 1745 VMCS_IDENT(VMCS_ENTRY_EXCEPTION_ERROR), 1746 code); 1747 } 1748 return (error); 1749} 1750 1751static int 1752vmx_getcap(void *arg, int vcpu, int type, int *retval) 1753{ 1754 struct vmx *vmx = arg; 1755 int vcap; 1756 int ret; 1757 1758 ret = ENOENT; 1759 1760 vcap = vmx->cap[vcpu].set; 1761 1762 switch (type) { 1763 case VM_CAP_HALT_EXIT: 1764 if (cap_halt_exit) 1765 ret = 0; 1766 break; 1767 case VM_CAP_PAUSE_EXIT: 1768 if (cap_pause_exit) 1769 ret = 0; 1770 break; 1771 case VM_CAP_MTRAP_EXIT: 1772 if (cap_monitor_trap) 1773 ret = 0; 1774 break; 1775 case VM_CAP_UNRESTRICTED_GUEST: 1776 if (cap_unrestricted_guest) 1777 ret = 0; 1778 break; 1779 case VM_CAP_ENABLE_INVPCID: 1780 if (cap_invpcid) 1781 ret = 0; 1782 break; 1783 default: 1784 break; 1785 } 1786 1787 if (ret == 0) 1788 *retval = (vcap & (1 << type)) ? 1 : 0; 1789 1790 return (ret); 1791} 1792 1793static int 1794vmx_setcap(void *arg, int vcpu, int type, int val) 1795{ 1796 struct vmx *vmx = arg; 1797 struct vmcs *vmcs = &vmx->vmcs[vcpu]; 1798 uint32_t baseval; 1799 uint32_t *pptr; 1800 int error; 1801 int flag; 1802 int reg; 1803 int retval; 1804 1805 retval = ENOENT; 1806 pptr = NULL; 1807 1808 switch (type) { 1809 case VM_CAP_HALT_EXIT: 1810 if (cap_halt_exit) { 1811 retval = 0; 1812 pptr = &vmx->cap[vcpu].proc_ctls; 1813 baseval = *pptr; 1814 flag = PROCBASED_HLT_EXITING; 1815 reg = VMCS_PRI_PROC_BASED_CTLS; 1816 } 1817 break; 1818 case VM_CAP_MTRAP_EXIT: 1819 if (cap_monitor_trap) { 1820 retval = 0; 1821 pptr = &vmx->cap[vcpu].proc_ctls; 1822 baseval = *pptr; 1823 flag = PROCBASED_MTF; 1824 reg = VMCS_PRI_PROC_BASED_CTLS; 1825 } 1826 break; 1827 case VM_CAP_PAUSE_EXIT: 1828 if (cap_pause_exit) { 1829 retval = 0; 1830 pptr = &vmx->cap[vcpu].proc_ctls; 1831 baseval = *pptr; 1832 flag = PROCBASED_PAUSE_EXITING; 1833 reg = VMCS_PRI_PROC_BASED_CTLS; 1834 } 1835 break; 1836 case VM_CAP_UNRESTRICTED_GUEST: 1837 if (cap_unrestricted_guest) { 1838 retval = 0; 1839 pptr = &vmx->cap[vcpu].proc_ctls2; 1840 baseval = *pptr; 1841 flag = PROCBASED2_UNRESTRICTED_GUEST; 1842 reg = VMCS_SEC_PROC_BASED_CTLS; 1843 } 1844 break; 1845 case VM_CAP_ENABLE_INVPCID: 1846 if (cap_invpcid) { 1847 retval = 0; 1848 pptr = &vmx->cap[vcpu].proc_ctls2; 1849 baseval = *pptr; 1850 flag = PROCBASED2_ENABLE_INVPCID; 1851 reg = VMCS_SEC_PROC_BASED_CTLS; 1852 } 1853 break; 1854 default: 1855 break; 1856 } 1857 1858 if (retval == 0) { 1859 if (val) { 1860 baseval |= flag; 1861 } else { 1862 baseval &= ~flag; 1863 } 1864 VMPTRLD(vmcs); 1865 error = vmwrite(reg, baseval); 1866 VMCLEAR(vmcs); 1867 1868 if (error) { 1869 retval = error; 1870 } else { 1871 /* 1872 * Update optional stored flags, and record 1873 * setting 1874 */ 1875 if (pptr != NULL) { 1876 *pptr = baseval; 1877 } 1878 1879 if (val) { 1880 vmx->cap[vcpu].set |= (1 << type); 1881 } else { 1882 vmx->cap[vcpu].set &= ~(1 << type); 1883 } 1884 } 1885 } 1886 1887 return (retval); 1888} 1889 1890struct vmm_ops vmm_ops_intel = { 1891 vmx_init, 1892 vmx_cleanup, 1893 vmx_restore, 1894 vmx_vminit, 1895 vmx_run, 1896 vmx_vmcleanup, 1897 vmx_getreg, 1898 vmx_setreg, 1899 vmx_getdesc, 1900 vmx_setdesc, 1901 vmx_inject, 1902 vmx_getcap, 1903 vmx_setcap, 1904 ept_vmspace_alloc, 1905 ept_vmspace_free, 1906}; 1907