vmmapi.c revision 221828
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/types.h> 33#include <sys/sysctl.h> 34#include <sys/ioctl.h> 35#include <sys/mman.h> 36 37#include <machine/specialreg.h> 38 39#include <stdio.h> 40#include <stdlib.h> 41#include <assert.h> 42#include <string.h> 43#include <fcntl.h> 44#include <unistd.h> 45 46#include <machine/vmm.h> 47#include <machine/vmm_dev.h> 48 49#include "vmmapi.h" 50#include "mptable.h" 51 52#ifndef CR4_VMXE 53#define CR4_VMXE (1UL << 13) 54#endif 55 56#define BIOS_ROM_BASE (0xf0000) 57#define BIOS_ROM_SIZE (0x10000) 58 59struct vmctx { 60 int fd; 61 char *name; 62}; 63 64#define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 65#define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 66 67static int 68vm_device_open(const char *name) 69{ 70 int fd, len; 71 char *vmfile; 72 73 len = strlen("/dev/vmm/") + strlen(name) + 1; 74 vmfile = malloc(len); 75 assert(vmfile != NULL); 76 snprintf(vmfile, len, "/dev/vmm/%s", name); 77 78 /* Open the device file */ 79 fd = open(vmfile, O_RDWR, 0); 80 81 free(vmfile); 82 return (fd); 83} 84 85int 86vm_create(const char *name) 87{ 88 89 return (CREATE((char *)name)); 90} 91 92struct vmctx * 93vm_open(const char *name) 94{ 95 struct vmctx *vm; 96 97 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 98 assert(vm != NULL); 99 100 vm->fd = -1; 101 vm->name = (char *)(vm + 1); 102 strcpy(vm->name, name); 103 104 if ((vm->fd = vm_device_open(vm->name)) < 0) 105 goto err; 106 107 return (vm); 108err: 109 vm_destroy(vm); 110 return (NULL); 111} 112 113void 114vm_destroy(struct vmctx *vm) 115{ 116 assert(vm != NULL); 117 118 DESTROY(vm->name); 119 if (vm->fd >= 0) 120 close(vm->fd); 121 free(vm); 122} 123 124int 125vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, 126 vm_paddr_t *ret_hpa, size_t *ret_len) 127{ 128 int error; 129 struct vm_memory_segment seg; 130 131 bzero(&seg, sizeof(seg)); 132 seg.gpa = gpa; 133 error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); 134 *ret_hpa = seg.hpa; 135 *ret_len = seg.len; 136 return (error); 137} 138 139int 140vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr) 141{ 142 int error; 143 struct vm_memory_segment seg; 144 145 /* 146 * Create and optionally map 'len' bytes of memory at guest 147 * physical address 'gpa' 148 */ 149 bzero(&seg, sizeof(seg)); 150 seg.gpa = gpa; 151 seg.len = len; 152 error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); 153 if (error == 0 && mapaddr != NULL) { 154 *mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 155 ctx->fd, gpa); 156 } 157 return (error); 158} 159 160char * 161vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len) 162{ 163 164 /* Map 'len' bytes of memory at guest physical address 'gpa' */ 165 return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, 166 ctx->fd, gpa)); 167} 168 169int 170vm_set_desc(struct vmctx *ctx, int vcpu, int reg, 171 uint64_t base, uint32_t limit, uint32_t access) 172{ 173 int error; 174 struct vm_seg_desc vmsegdesc; 175 176 bzero(&vmsegdesc, sizeof(vmsegdesc)); 177 vmsegdesc.cpuid = vcpu; 178 vmsegdesc.regnum = reg; 179 vmsegdesc.desc.base = base; 180 vmsegdesc.desc.limit = limit; 181 vmsegdesc.desc.access = access; 182 183 error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 184 return (error); 185} 186 187int 188vm_get_desc(struct vmctx *ctx, int vcpu, int reg, 189 uint64_t *base, uint32_t *limit, uint32_t *access) 190{ 191 int error; 192 struct vm_seg_desc vmsegdesc; 193 194 bzero(&vmsegdesc, sizeof(vmsegdesc)); 195 vmsegdesc.cpuid = vcpu; 196 vmsegdesc.regnum = reg; 197 198 error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 199 if (error == 0) { 200 *base = vmsegdesc.desc.base; 201 *limit = vmsegdesc.desc.limit; 202 *access = vmsegdesc.desc.access; 203 } 204 return (error); 205} 206 207int 208vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) 209{ 210 int error; 211 struct vm_register vmreg; 212 213 bzero(&vmreg, sizeof(vmreg)); 214 vmreg.cpuid = vcpu; 215 vmreg.regnum = reg; 216 vmreg.regval = val; 217 218 error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); 219 return (error); 220} 221 222int 223vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) 224{ 225 int error; 226 struct vm_register vmreg; 227 228 bzero(&vmreg, sizeof(vmreg)); 229 vmreg.cpuid = vcpu; 230 vmreg.regnum = reg; 231 232 error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); 233 *ret_val = vmreg.regval; 234 return (error); 235} 236 237int 238vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid) 239{ 240 int error; 241 struct vm_pin vmpin; 242 243 bzero(&vmpin, sizeof(vmpin)); 244 vmpin.vm_cpuid = vcpu; 245 246 error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin); 247 *host_cpuid = vmpin.host_cpuid; 248 return (error); 249} 250 251int 252vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid) 253{ 254 int error; 255 struct vm_pin vmpin; 256 257 bzero(&vmpin, sizeof(vmpin)); 258 vmpin.vm_cpuid = vcpu; 259 vmpin.host_cpuid = host_cpuid; 260 261 error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin); 262 return (error); 263} 264 265int 266vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) 267{ 268 int error; 269 struct vm_run vmrun; 270 271 bzero(&vmrun, sizeof(vmrun)); 272 vmrun.cpuid = vcpu; 273 vmrun.rip = rip; 274 275 error = ioctl(ctx->fd, VM_RUN, &vmrun); 276 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 277 return (error); 278} 279 280static int 281vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type, 282 int vector, int error_code, int error_code_valid) 283{ 284 struct vm_event ev; 285 286 bzero(&ev, sizeof(ev)); 287 ev.cpuid = vcpu; 288 ev.type = type; 289 ev.vector = vector; 290 ev.error_code = error_code; 291 ev.error_code_valid = error_code_valid; 292 293 return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev)); 294} 295 296int 297vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, 298 int vector) 299{ 300 301 return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0)); 302} 303 304int 305vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, 306 int vector, int error_code) 307{ 308 309 return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1)); 310} 311 312int 313vm_build_tables(struct vmctx *ctxt, int ncpu, void *oemtbl, int oemtblsz) 314{ 315 316 return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu, 317 oemtbl, oemtblsz)); 318} 319 320int 321vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) 322{ 323 struct vm_lapic_irq vmirq; 324 325 bzero(&vmirq, sizeof(vmirq)); 326 vmirq.cpuid = vcpu; 327 vmirq.vector = vector; 328 329 return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); 330} 331 332int 333vm_inject_nmi(struct vmctx *ctx, int vcpu) 334{ 335 struct vm_nmi vmnmi; 336 337 bzero(&vmnmi, sizeof(vmnmi)); 338 vmnmi.cpuid = vcpu; 339 340 return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); 341} 342 343int 344vm_capability_name2type(const char *capname) 345{ 346 int i; 347 348 static struct { 349 const char *name; 350 int type; 351 } capstrmap[] = { 352 { "hlt_exit", VM_CAP_HALT_EXIT }, 353 { "mtrap_exit", VM_CAP_MTRAP_EXIT }, 354 { "pause_exit", VM_CAP_PAUSE_EXIT }, 355 { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, 356 { 0 } 357 }; 358 359 for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { 360 if (strcmp(capstrmap[i].name, capname) == 0) 361 return (capstrmap[i].type); 362 } 363 364 return (-1); 365} 366 367int 368vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, 369 int *retval) 370{ 371 int error; 372 struct vm_capability vmcap; 373 374 bzero(&vmcap, sizeof(vmcap)); 375 vmcap.cpuid = vcpu; 376 vmcap.captype = cap; 377 378 error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); 379 *retval = vmcap.capval; 380 return (error); 381} 382 383int 384vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) 385{ 386 struct vm_capability vmcap; 387 388 bzero(&vmcap, sizeof(vmcap)); 389 vmcap.cpuid = vcpu; 390 vmcap.captype = cap; 391 vmcap.capval = val; 392 393 return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); 394} 395 396int 397vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 398{ 399 struct vm_pptdev pptdev; 400 401 bzero(&pptdev, sizeof(pptdev)); 402 pptdev.bus = bus; 403 pptdev.slot = slot; 404 pptdev.func = func; 405 406 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 407} 408 409int 410vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 411{ 412 struct vm_pptdev pptdev; 413 414 bzero(&pptdev, sizeof(pptdev)); 415 pptdev.bus = bus; 416 pptdev.slot = slot; 417 pptdev.func = func; 418 419 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 420} 421 422int 423vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 424 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 425{ 426 struct vm_pptdev_mmio pptmmio; 427 428 bzero(&pptmmio, sizeof(pptmmio)); 429 pptmmio.bus = bus; 430 pptmmio.slot = slot; 431 pptmmio.func = func; 432 pptmmio.gpa = gpa; 433 pptmmio.len = len; 434 pptmmio.hpa = hpa; 435 436 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 437} 438 439int 440vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, 441 int destcpu, int vector, int numvec) 442{ 443 struct vm_pptdev_msi pptmsi; 444 445 bzero(&pptmsi, sizeof(pptmsi)); 446 pptmsi.vcpu = vcpu; 447 pptmsi.bus = bus; 448 pptmsi.slot = slot; 449 pptmsi.func = func; 450 pptmsi.destcpu = destcpu; 451 pptmsi.vector = vector; 452 pptmsi.numvec = numvec; 453 454 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 455} 456 457uint64_t * 458vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, 459 int *ret_entries) 460{ 461 int error; 462 463 static struct vm_stats vmstats; 464 465 vmstats.cpuid = vcpu; 466 467 error = ioctl(ctx->fd, VM_STATS, &vmstats); 468 if (error == 0) { 469 if (ret_entries) 470 *ret_entries = vmstats.num_entries; 471 if (ret_tv) 472 *ret_tv = vmstats.tv; 473 return (vmstats.statbuf); 474 } else 475 return (NULL); 476} 477 478const char * 479vm_get_stat_desc(struct vmctx *ctx, int index) 480{ 481 int error; 482 483 static struct vm_stat_desc statdesc; 484 485 statdesc.index = index; 486 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 487 return (statdesc.desc); 488 else 489 return (NULL); 490} 491 492/* 493 * From Intel Vol 3a: 494 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 495 */ 496int 497vcpu_reset(struct vmctx *vmctx, int vcpu) 498{ 499 int error; 500 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 501 uint32_t desc_access, desc_limit; 502 uint16_t sel; 503 504 zero = 0; 505 506 rflags = 0x2; 507 error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); 508 if (error) 509 goto done; 510 511 rip = 0xfff0; 512 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) 513 goto done; 514 515 cr0 = CR0_NE; 516 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 517 goto done; 518 519 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) 520 goto done; 521 522 cr4 = CR4_VMXE; 523 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 524 goto done; 525 526 /* 527 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 528 */ 529 desc_base = 0xffff0000; 530 desc_limit = 0xffff; 531 desc_access = 0x0093; 532 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, 533 desc_base, desc_limit, desc_access); 534 if (error) 535 goto done; 536 537 sel = 0xf000; 538 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) 539 goto done; 540 541 /* 542 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 543 */ 544 desc_base = 0; 545 desc_limit = 0xffff; 546 desc_access = 0x0093; 547 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, 548 desc_base, desc_limit, desc_access); 549 if (error) 550 goto done; 551 552 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, 553 desc_base, desc_limit, desc_access); 554 if (error) 555 goto done; 556 557 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, 558 desc_base, desc_limit, desc_access); 559 if (error) 560 goto done; 561 562 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, 563 desc_base, desc_limit, desc_access); 564 if (error) 565 goto done; 566 567 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, 568 desc_base, desc_limit, desc_access); 569 if (error) 570 goto done; 571 572 sel = 0; 573 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) 574 goto done; 575 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) 576 goto done; 577 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) 578 goto done; 579 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) 580 goto done; 581 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) 582 goto done; 583 584 /* General purpose registers */ 585 rdx = 0xf00; 586 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) 587 goto done; 588 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) 589 goto done; 590 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) 591 goto done; 592 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 593 goto done; 594 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) 595 goto done; 596 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) 597 goto done; 598 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) 599 goto done; 600 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) 601 goto done; 602 603 /* GDTR, IDTR */ 604 desc_base = 0; 605 desc_limit = 0xffff; 606 desc_access = 0; 607 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, 608 desc_base, desc_limit, desc_access); 609 if (error != 0) 610 goto done; 611 612 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, 613 desc_base, desc_limit, desc_access); 614 if (error != 0) 615 goto done; 616 617 /* TR */ 618 desc_base = 0; 619 desc_limit = 0xffff; 620 desc_access = 0x0000008b; 621 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 622 if (error) 623 goto done; 624 625 sel = 0; 626 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) 627 goto done; 628 629 /* LDTR */ 630 desc_base = 0; 631 desc_limit = 0xffff; 632 desc_access = 0x00000082; 633 error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, 634 desc_limit, desc_access); 635 if (error) 636 goto done; 637 638 sel = 0; 639 if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 640 goto done; 641 642 /* XXX cr2, debug registers */ 643 644 error = 0; 645done: 646 return (error); 647} 648