vmm_dev.c revision 331722
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: stable/11/sys/amd64/vmm/vmm_dev.c 331722 2018-03-29 02:50:57Z eadler $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: stable/11/sys/amd64/vmm/vmm_dev.c 331722 2018-03-29 02:50:57Z eadler $"); 31 32#include <sys/param.h> 33#include <sys/kernel.h> 34#include <sys/queue.h> 35#include <sys/lock.h> 36#include <sys/mutex.h> 37#include <sys/malloc.h> 38#include <sys/conf.h> 39#include <sys/sysctl.h> 40#include <sys/libkern.h> 41#include <sys/ioccom.h> 42#include <sys/mman.h> 43#include <sys/uio.h> 44 45#include <vm/vm.h> 46#include <vm/pmap.h> 47#include <vm/vm_map.h> 48#include <vm/vm_object.h> 49 50#include <machine/vmparam.h> 51#include <machine/vmm.h> 52#include <machine/vmm_instruction_emul.h> 53#include <machine/vmm_dev.h> 54 55#include "vmm_lapic.h" 56#include "vmm_stat.h" 57#include "vmm_mem.h" 58#include "io/ppt.h" 59#include "io/vatpic.h" 60#include "io/vioapic.h" 61#include "io/vhpet.h" 62#include "io/vrtc.h" 63 64struct devmem_softc { 65 int segid; 66 char *name; 67 struct cdev *cdev; 68 struct vmmdev_softc *sc; 69 SLIST_ENTRY(devmem_softc) link; 70}; 71 72struct vmmdev_softc { 73 struct vm *vm; /* vm instance cookie */ 74 struct cdev *cdev; 75 SLIST_ENTRY(vmmdev_softc) link; 76 SLIST_HEAD(, devmem_softc) devmem; 77 int flags; 78}; 79#define VSC_LINKED 0x01 80 81static SLIST_HEAD(, vmmdev_softc) head; 82 83static struct mtx vmmdev_mtx; 84 85static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); 86 87SYSCTL_DECL(_hw_vmm); 88 89static int devmem_create_cdev(const char *vmname, int id, char *devmem); 90static void devmem_destroy(void *arg); 91 92static int 93vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) 94{ 95 int error; 96 97 if (vcpu < 0 || vcpu >= VM_MAXCPU) 98 return (EINVAL); 99 100 error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); 101 return (error); 102} 103 104static void 105vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) 106{ 107 enum vcpu_state state; 108 109 state = vcpu_get_state(sc->vm, vcpu, NULL); 110 if (state != VCPU_FROZEN) { 111 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), 112 vcpu, state); 113 } 114 115 vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); 116} 117 118static int 119vcpu_lock_all(struct vmmdev_softc *sc) 120{ 121 int error, vcpu; 122 123 for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { 124 error = vcpu_lock_one(sc, vcpu); 125 if (error) 126 break; 127 } 128 129 if (error) { 130 while (--vcpu >= 0) 131 vcpu_unlock_one(sc, vcpu); 132 } 133 134 return (error); 135} 136 137static void 138vcpu_unlock_all(struct vmmdev_softc *sc) 139{ 140 int vcpu; 141 142 for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) 143 vcpu_unlock_one(sc, vcpu); 144} 145 146static struct vmmdev_softc * 147vmmdev_lookup(const char *name) 148{ 149 struct vmmdev_softc *sc; 150 151#ifdef notyet /* XXX kernel is not compiled with invariants */ 152 mtx_assert(&vmmdev_mtx, MA_OWNED); 153#endif 154 155 SLIST_FOREACH(sc, &head, link) { 156 if (strcmp(name, vm_name(sc->vm)) == 0) 157 break; 158 } 159 160 return (sc); 161} 162 163static struct vmmdev_softc * 164vmmdev_lookup2(struct cdev *cdev) 165{ 166 167 return (cdev->si_drv1); 168} 169 170static int 171vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) 172{ 173 int error, off, c, prot; 174 vm_paddr_t gpa; 175 void *hpa, *cookie; 176 struct vmmdev_softc *sc; 177 178 sc = vmmdev_lookup2(cdev); 179 if (sc == NULL) 180 return (ENXIO); 181 182 /* 183 * Get a read lock on the guest memory map by freezing any vcpu. 184 */ 185 error = vcpu_lock_one(sc, VM_MAXCPU - 1); 186 if (error) 187 return (error); 188 189 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); 190 while (uio->uio_resid > 0 && error == 0) { 191 gpa = uio->uio_offset; 192 off = gpa & PAGE_MASK; 193 c = min(uio->uio_resid, PAGE_SIZE - off); 194 195 /* 196 * The VM has a hole in its physical memory map. If we want to 197 * use 'dd' to inspect memory beyond the hole we need to 198 * provide bogus data for memory that lies in the hole. 199 * 200 * Since this device does not support lseek(2), dd(1) will 201 * read(2) blocks of data to simulate the lseek(2). 202 */ 203 hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie); 204 if (hpa == NULL) { 205 if (uio->uio_rw == UIO_READ) 206 error = uiomove(__DECONST(void *, zero_region), 207 c, uio); 208 else 209 error = EFAULT; 210 } else { 211 error = uiomove(hpa, c, uio); 212 vm_gpa_release(cookie); 213 } 214 } 215 vcpu_unlock_one(sc, VM_MAXCPU - 1); 216 return (error); 217} 218 219CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1); 220 221static int 222get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 223{ 224 struct devmem_softc *dsc; 225 int error; 226 bool sysmem; 227 228 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); 229 if (error || mseg->len == 0) 230 return (error); 231 232 if (!sysmem) { 233 SLIST_FOREACH(dsc, &sc->devmem, link) { 234 if (dsc->segid == mseg->segid) 235 break; 236 } 237 KASSERT(dsc != NULL, ("%s: devmem segment %d not found", 238 __func__, mseg->segid)); 239 error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL); 240 } else { 241 bzero(mseg->name, sizeof(mseg->name)); 242 } 243 244 return (error); 245} 246 247static int 248alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) 249{ 250 char *name; 251 int error; 252 bool sysmem; 253 254 error = 0; 255 name = NULL; 256 sysmem = true; 257 258 if (VM_MEMSEG_NAME(mseg)) { 259 sysmem = false; 260 name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK); 261 error = copystr(mseg->name, name, SPECNAMELEN + 1, 0); 262 if (error) 263 goto done; 264 } 265 266 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); 267 if (error) 268 goto done; 269 270 if (VM_MEMSEG_NAME(mseg)) { 271 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); 272 if (error) 273 vm_free_memseg(sc->vm, mseg->segid); 274 else 275 name = NULL; /* freed when 'cdev' is destroyed */ 276 } 277done: 278 free(name, M_VMMDEV); 279 return (error); 280} 281 282static int 283vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, 284 struct thread *td) 285{ 286 int error, vcpu, state_changed, size; 287 cpuset_t *cpuset; 288 struct vmmdev_softc *sc; 289 struct vm_register *vmreg; 290 struct vm_seg_desc *vmsegdesc; 291 struct vm_run *vmrun; 292 struct vm_exception *vmexc; 293 struct vm_lapic_irq *vmirq; 294 struct vm_lapic_msi *vmmsi; 295 struct vm_ioapic_irq *ioapic_irq; 296 struct vm_isa_irq *isa_irq; 297 struct vm_isa_irq_trigger *isa_irq_trigger; 298 struct vm_capability *vmcap; 299 struct vm_pptdev *pptdev; 300 struct vm_pptdev_mmio *pptmmio; 301 struct vm_pptdev_msi *pptmsi; 302 struct vm_pptdev_msix *pptmsix; 303 struct vm_nmi *vmnmi; 304 struct vm_stats *vmstats; 305 struct vm_stat_desc *statdesc; 306 struct vm_x2apic *x2apic; 307 struct vm_gpa_pte *gpapte; 308 struct vm_suspend *vmsuspend; 309 struct vm_gla2gpa *gg; 310 struct vm_activate_cpu *vac; 311 struct vm_cpuset *vm_cpuset; 312 struct vm_intinfo *vmii; 313 struct vm_rtc_time *rtctime; 314 struct vm_rtc_data *rtcdata; 315 struct vm_memmap *mm; 316 317 sc = vmmdev_lookup2(cdev); 318 if (sc == NULL) 319 return (ENXIO); 320 321 error = 0; 322 vcpu = -1; 323 state_changed = 0; 324 325 /* 326 * Some VMM ioctls can operate only on vcpus that are not running. 327 */ 328 switch (cmd) { 329 case VM_RUN: 330 case VM_GET_REGISTER: 331 case VM_SET_REGISTER: 332 case VM_GET_SEGMENT_DESCRIPTOR: 333 case VM_SET_SEGMENT_DESCRIPTOR: 334 case VM_INJECT_EXCEPTION: 335 case VM_GET_CAPABILITY: 336 case VM_SET_CAPABILITY: 337 case VM_PPTDEV_MSI: 338 case VM_PPTDEV_MSIX: 339 case VM_SET_X2APIC_STATE: 340 case VM_GLA2GPA: 341 case VM_ACTIVATE_CPU: 342 case VM_SET_INTINFO: 343 case VM_GET_INTINFO: 344 case VM_RESTART_INSTRUCTION: 345 /* 346 * XXX fragile, handle with care 347 * Assumes that the first field of the ioctl data is the vcpu. 348 */ 349 vcpu = *(int *)data; 350 error = vcpu_lock_one(sc, vcpu); 351 if (error) 352 goto done; 353 state_changed = 1; 354 break; 355 356 case VM_MAP_PPTDEV_MMIO: 357 case VM_BIND_PPTDEV: 358 case VM_UNBIND_PPTDEV: 359 case VM_ALLOC_MEMSEG: 360 case VM_MMAP_MEMSEG: 361 case VM_REINIT: 362 /* 363 * ioctls that operate on the entire virtual machine must 364 * prevent all vcpus from running. 365 */ 366 error = vcpu_lock_all(sc); 367 if (error) 368 goto done; 369 state_changed = 2; 370 break; 371 372 case VM_GET_MEMSEG: 373 case VM_MMAP_GETNEXT: 374 /* 375 * Lock a vcpu to make sure that the memory map cannot be 376 * modified while it is being inspected. 377 */ 378 vcpu = VM_MAXCPU - 1; 379 error = vcpu_lock_one(sc, vcpu); 380 if (error) 381 goto done; 382 state_changed = 1; 383 break; 384 385 default: 386 break; 387 } 388 389 switch(cmd) { 390 case VM_RUN: 391 vmrun = (struct vm_run *)data; 392 error = vm_run(sc->vm, vmrun); 393 break; 394 case VM_SUSPEND: 395 vmsuspend = (struct vm_suspend *)data; 396 error = vm_suspend(sc->vm, vmsuspend->how); 397 break; 398 case VM_REINIT: 399 error = vm_reinit(sc->vm); 400 break; 401 case VM_STAT_DESC: { 402 statdesc = (struct vm_stat_desc *)data; 403 error = vmm_stat_desc_copy(statdesc->index, 404 statdesc->desc, sizeof(statdesc->desc)); 405 break; 406 } 407 case VM_STATS: { 408 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); 409 vmstats = (struct vm_stats *)data; 410 getmicrotime(&vmstats->tv); 411 error = vmm_stat_copy(sc->vm, vmstats->cpuid, 412 &vmstats->num_entries, vmstats->statbuf); 413 break; 414 } 415 case VM_PPTDEV_MSI: 416 pptmsi = (struct vm_pptdev_msi *)data; 417 error = ppt_setup_msi(sc->vm, pptmsi->vcpu, 418 pptmsi->bus, pptmsi->slot, pptmsi->func, 419 pptmsi->addr, pptmsi->msg, 420 pptmsi->numvec); 421 break; 422 case VM_PPTDEV_MSIX: 423 pptmsix = (struct vm_pptdev_msix *)data; 424 error = ppt_setup_msix(sc->vm, pptmsix->vcpu, 425 pptmsix->bus, pptmsix->slot, 426 pptmsix->func, pptmsix->idx, 427 pptmsix->addr, pptmsix->msg, 428 pptmsix->vector_control); 429 break; 430 case VM_MAP_PPTDEV_MMIO: 431 pptmmio = (struct vm_pptdev_mmio *)data; 432 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, 433 pptmmio->func, pptmmio->gpa, pptmmio->len, 434 pptmmio->hpa); 435 break; 436 case VM_BIND_PPTDEV: 437 pptdev = (struct vm_pptdev *)data; 438 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 439 pptdev->func); 440 break; 441 case VM_UNBIND_PPTDEV: 442 pptdev = (struct vm_pptdev *)data; 443 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, 444 pptdev->func); 445 break; 446 case VM_INJECT_EXCEPTION: 447 vmexc = (struct vm_exception *)data; 448 error = vm_inject_exception(sc->vm, vmexc->cpuid, 449 vmexc->vector, vmexc->error_code_valid, vmexc->error_code, 450 vmexc->restart_instruction); 451 break; 452 case VM_INJECT_NMI: 453 vmnmi = (struct vm_nmi *)data; 454 error = vm_inject_nmi(sc->vm, vmnmi->cpuid); 455 break; 456 case VM_LAPIC_IRQ: 457 vmirq = (struct vm_lapic_irq *)data; 458 error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); 459 break; 460 case VM_LAPIC_LOCAL_IRQ: 461 vmirq = (struct vm_lapic_irq *)data; 462 error = lapic_set_local_intr(sc->vm, vmirq->cpuid, 463 vmirq->vector); 464 break; 465 case VM_LAPIC_MSI: 466 vmmsi = (struct vm_lapic_msi *)data; 467 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); 468 break; 469 case VM_IOAPIC_ASSERT_IRQ: 470 ioapic_irq = (struct vm_ioapic_irq *)data; 471 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); 472 break; 473 case VM_IOAPIC_DEASSERT_IRQ: 474 ioapic_irq = (struct vm_ioapic_irq *)data; 475 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); 476 break; 477 case VM_IOAPIC_PULSE_IRQ: 478 ioapic_irq = (struct vm_ioapic_irq *)data; 479 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); 480 break; 481 case VM_IOAPIC_PINCOUNT: 482 *(int *)data = vioapic_pincount(sc->vm); 483 break; 484 case VM_ISA_ASSERT_IRQ: 485 isa_irq = (struct vm_isa_irq *)data; 486 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); 487 if (error == 0 && isa_irq->ioapic_irq != -1) 488 error = vioapic_assert_irq(sc->vm, 489 isa_irq->ioapic_irq); 490 break; 491 case VM_ISA_DEASSERT_IRQ: 492 isa_irq = (struct vm_isa_irq *)data; 493 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); 494 if (error == 0 && isa_irq->ioapic_irq != -1) 495 error = vioapic_deassert_irq(sc->vm, 496 isa_irq->ioapic_irq); 497 break; 498 case VM_ISA_PULSE_IRQ: 499 isa_irq = (struct vm_isa_irq *)data; 500 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); 501 if (error == 0 && isa_irq->ioapic_irq != -1) 502 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); 503 break; 504 case VM_ISA_SET_IRQ_TRIGGER: 505 isa_irq_trigger = (struct vm_isa_irq_trigger *)data; 506 error = vatpic_set_irq_trigger(sc->vm, 507 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); 508 break; 509 case VM_MMAP_GETNEXT: 510 mm = (struct vm_memmap *)data; 511 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, 512 &mm->segoff, &mm->len, &mm->prot, &mm->flags); 513 break; 514 case VM_MMAP_MEMSEG: 515 mm = (struct vm_memmap *)data; 516 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, 517 mm->len, mm->prot, mm->flags); 518 break; 519 case VM_ALLOC_MEMSEG: 520 error = alloc_memseg(sc, (struct vm_memseg *)data); 521 break; 522 case VM_GET_MEMSEG: 523 error = get_memseg(sc, (struct vm_memseg *)data); 524 break; 525 case VM_GET_REGISTER: 526 vmreg = (struct vm_register *)data; 527 error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, 528 &vmreg->regval); 529 break; 530 case VM_SET_REGISTER: 531 vmreg = (struct vm_register *)data; 532 error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, 533 vmreg->regval); 534 break; 535 case VM_SET_SEGMENT_DESCRIPTOR: 536 vmsegdesc = (struct vm_seg_desc *)data; 537 error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, 538 vmsegdesc->regnum, 539 &vmsegdesc->desc); 540 break; 541 case VM_GET_SEGMENT_DESCRIPTOR: 542 vmsegdesc = (struct vm_seg_desc *)data; 543 error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, 544 vmsegdesc->regnum, 545 &vmsegdesc->desc); 546 break; 547 case VM_GET_CAPABILITY: 548 vmcap = (struct vm_capability *)data; 549 error = vm_get_capability(sc->vm, vmcap->cpuid, 550 vmcap->captype, 551 &vmcap->capval); 552 break; 553 case VM_SET_CAPABILITY: 554 vmcap = (struct vm_capability *)data; 555 error = vm_set_capability(sc->vm, vmcap->cpuid, 556 vmcap->captype, 557 vmcap->capval); 558 break; 559 case VM_SET_X2APIC_STATE: 560 x2apic = (struct vm_x2apic *)data; 561 error = vm_set_x2apic_state(sc->vm, 562 x2apic->cpuid, x2apic->state); 563 break; 564 case VM_GET_X2APIC_STATE: 565 x2apic = (struct vm_x2apic *)data; 566 error = vm_get_x2apic_state(sc->vm, 567 x2apic->cpuid, &x2apic->state); 568 break; 569 case VM_GET_GPA_PMAP: 570 gpapte = (struct vm_gpa_pte *)data; 571 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), 572 gpapte->gpa, gpapte->pte, &gpapte->ptenum); 573 error = 0; 574 break; 575 case VM_GET_HPET_CAPABILITIES: 576 error = vhpet_getcap((struct vm_hpet_cap *)data); 577 break; 578 case VM_GLA2GPA: { 579 CTASSERT(PROT_READ == VM_PROT_READ); 580 CTASSERT(PROT_WRITE == VM_PROT_WRITE); 581 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); 582 gg = (struct vm_gla2gpa *)data; 583 error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, 584 gg->prot, &gg->gpa, &gg->fault); 585 KASSERT(error == 0 || error == EFAULT, 586 ("%s: vm_gla2gpa unknown error %d", __func__, error)); 587 break; 588 } 589 case VM_ACTIVATE_CPU: 590 vac = (struct vm_activate_cpu *)data; 591 error = vm_activate_cpu(sc->vm, vac->vcpuid); 592 break; 593 case VM_GET_CPUS: 594 error = 0; 595 vm_cpuset = (struct vm_cpuset *)data; 596 size = vm_cpuset->cpusetsize; 597 if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { 598 error = ERANGE; 599 break; 600 } 601 cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); 602 if (vm_cpuset->which == VM_ACTIVE_CPUS) 603 *cpuset = vm_active_cpus(sc->vm); 604 else if (vm_cpuset->which == VM_SUSPENDED_CPUS) 605 *cpuset = vm_suspended_cpus(sc->vm); 606 else 607 error = EINVAL; 608 if (error == 0) 609 error = copyout(cpuset, vm_cpuset->cpus, size); 610 free(cpuset, M_TEMP); 611 break; 612 case VM_SET_INTINFO: 613 vmii = (struct vm_intinfo *)data; 614 error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); 615 break; 616 case VM_GET_INTINFO: 617 vmii = (struct vm_intinfo *)data; 618 error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, 619 &vmii->info2); 620 break; 621 case VM_RTC_WRITE: 622 rtcdata = (struct vm_rtc_data *)data; 623 error = vrtc_nvram_write(sc->vm, rtcdata->offset, 624 rtcdata->value); 625 break; 626 case VM_RTC_READ: 627 rtcdata = (struct vm_rtc_data *)data; 628 error = vrtc_nvram_read(sc->vm, rtcdata->offset, 629 &rtcdata->value); 630 break; 631 case VM_RTC_SETTIME: 632 rtctime = (struct vm_rtc_time *)data; 633 error = vrtc_set_time(sc->vm, rtctime->secs); 634 break; 635 case VM_RTC_GETTIME: 636 error = 0; 637 rtctime = (struct vm_rtc_time *)data; 638 rtctime->secs = vrtc_get_time(sc->vm); 639 break; 640 case VM_RESTART_INSTRUCTION: 641 error = vm_restart_instruction(sc->vm, vcpu); 642 break; 643 default: 644 error = ENOTTY; 645 break; 646 } 647 648 if (state_changed == 1) 649 vcpu_unlock_one(sc, vcpu); 650 else if (state_changed == 2) 651 vcpu_unlock_all(sc); 652 653done: 654 /* Make sure that no handler returns a bogus value like ERESTART */ 655 KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); 656 return (error); 657} 658 659static int 660vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, 661 struct vm_object **objp, int nprot) 662{ 663 struct vmmdev_softc *sc; 664 vm_paddr_t gpa; 665 size_t len; 666 vm_ooffset_t segoff, first, last; 667 int error, found, segid; 668 bool sysmem; 669 670 first = *offset; 671 last = first + mapsize; 672 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 673 return (EINVAL); 674 675 sc = vmmdev_lookup2(cdev); 676 if (sc == NULL) { 677 /* virtual machine is in the process of being created */ 678 return (EINVAL); 679 } 680 681 /* 682 * Get a read lock on the guest memory map by freezing any vcpu. 683 */ 684 error = vcpu_lock_one(sc, VM_MAXCPU - 1); 685 if (error) 686 return (error); 687 688 gpa = 0; 689 found = 0; 690 while (!found) { 691 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, 692 NULL, NULL); 693 if (error) 694 break; 695 696 if (first >= gpa && last <= gpa + len) 697 found = 1; 698 else 699 gpa += len; 700 } 701 702 if (found) { 703 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); 704 KASSERT(error == 0 && *objp != NULL, 705 ("%s: invalid memory segment %d", __func__, segid)); 706 if (sysmem) { 707 vm_object_reference(*objp); 708 *offset = segoff + (first - gpa); 709 } else { 710 error = EINVAL; 711 } 712 } 713 vcpu_unlock_one(sc, VM_MAXCPU - 1); 714 return (error); 715} 716 717static void 718vmmdev_destroy(void *arg) 719{ 720 struct vmmdev_softc *sc = arg; 721 struct devmem_softc *dsc; 722 int error; 723 724 error = vcpu_lock_all(sc); 725 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); 726 727 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { 728 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); 729 SLIST_REMOVE_HEAD(&sc->devmem, link); 730 free(dsc->name, M_VMMDEV); 731 free(dsc, M_VMMDEV); 732 } 733 734 if (sc->cdev != NULL) 735 destroy_dev(sc->cdev); 736 737 if (sc->vm != NULL) 738 vm_destroy(sc->vm); 739 740 if ((sc->flags & VSC_LINKED) != 0) { 741 mtx_lock(&vmmdev_mtx); 742 SLIST_REMOVE(&head, sc, vmmdev_softc, link); 743 mtx_unlock(&vmmdev_mtx); 744 } 745 746 free(sc, M_VMMDEV); 747} 748 749static int 750sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) 751{ 752 int error; 753 char buf[VM_MAX_NAMELEN]; 754 struct devmem_softc *dsc; 755 struct vmmdev_softc *sc; 756 struct cdev *cdev; 757 758 strlcpy(buf, "beavis", sizeof(buf)); 759 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 760 if (error != 0 || req->newptr == NULL) 761 return (error); 762 763 mtx_lock(&vmmdev_mtx); 764 sc = vmmdev_lookup(buf); 765 if (sc == NULL || sc->cdev == NULL) { 766 mtx_unlock(&vmmdev_mtx); 767 return (EINVAL); 768 } 769 770 /* 771 * The 'cdev' will be destroyed asynchronously when 'si_threadcount' 772 * goes down to 0 so we should not do it again in the callback. 773 * 774 * Setting 'sc->cdev' to NULL is also used to indicate that the VM 775 * is scheduled for destruction. 776 */ 777 cdev = sc->cdev; 778 sc->cdev = NULL; 779 mtx_unlock(&vmmdev_mtx); 780 781 /* 782 * Schedule all cdevs to be destroyed: 783 * 784 * - any new operations on the 'cdev' will return an error (ENXIO). 785 * 786 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will 787 * be destroyed and the callback will be invoked in a taskqueue 788 * context. 789 * 790 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' 791 */ 792 SLIST_FOREACH(dsc, &sc->devmem, link) { 793 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); 794 destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc); 795 } 796 destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); 797 return (0); 798} 799SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, 800 NULL, 0, sysctl_vmm_destroy, "A", NULL); 801 802static struct cdevsw vmmdevsw = { 803 .d_name = "vmmdev", 804 .d_version = D_VERSION, 805 .d_ioctl = vmmdev_ioctl, 806 .d_mmap_single = vmmdev_mmap_single, 807 .d_read = vmmdev_rw, 808 .d_write = vmmdev_rw, 809}; 810 811static int 812sysctl_vmm_create(SYSCTL_HANDLER_ARGS) 813{ 814 int error; 815 struct vm *vm; 816 struct cdev *cdev; 817 struct vmmdev_softc *sc, *sc2; 818 char buf[VM_MAX_NAMELEN]; 819 820 strlcpy(buf, "beavis", sizeof(buf)); 821 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 822 if (error != 0 || req->newptr == NULL) 823 return (error); 824 825 mtx_lock(&vmmdev_mtx); 826 sc = vmmdev_lookup(buf); 827 mtx_unlock(&vmmdev_mtx); 828 if (sc != NULL) 829 return (EEXIST); 830 831 error = vm_create(buf, &vm); 832 if (error != 0) 833 return (error); 834 835 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); 836 sc->vm = vm; 837 SLIST_INIT(&sc->devmem); 838 839 /* 840 * Lookup the name again just in case somebody sneaked in when we 841 * dropped the lock. 842 */ 843 mtx_lock(&vmmdev_mtx); 844 sc2 = vmmdev_lookup(buf); 845 if (sc2 == NULL) { 846 SLIST_INSERT_HEAD(&head, sc, link); 847 sc->flags |= VSC_LINKED; 848 } 849 mtx_unlock(&vmmdev_mtx); 850 851 if (sc2 != NULL) { 852 vmmdev_destroy(sc); 853 return (EEXIST); 854 } 855 856 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, 857 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); 858 if (error != 0) { 859 vmmdev_destroy(sc); 860 return (error); 861 } 862 863 mtx_lock(&vmmdev_mtx); 864 sc->cdev = cdev; 865 sc->cdev->si_drv1 = sc; 866 mtx_unlock(&vmmdev_mtx); 867 868 return (0); 869} 870SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW, 871 NULL, 0, sysctl_vmm_create, "A", NULL); 872 873void 874vmmdev_init(void) 875{ 876 mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); 877} 878 879int 880vmmdev_cleanup(void) 881{ 882 int error; 883 884 if (SLIST_EMPTY(&head)) 885 error = 0; 886 else 887 error = EBUSY; 888 889 return (error); 890} 891 892static int 893devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, 894 struct vm_object **objp, int nprot) 895{ 896 struct devmem_softc *dsc; 897 vm_ooffset_t first, last; 898 size_t seglen; 899 int error; 900 bool sysmem; 901 902 dsc = cdev->si_drv1; 903 if (dsc == NULL) { 904 /* 'cdev' has been created but is not ready for use */ 905 return (ENXIO); 906 } 907 908 first = *offset; 909 last = *offset + len; 910 if ((nprot & PROT_EXEC) || first < 0 || first >= last) 911 return (EINVAL); 912 913 error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1); 914 if (error) 915 return (error); 916 917 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); 918 KASSERT(error == 0 && !sysmem && *objp != NULL, 919 ("%s: invalid devmem segment %d", __func__, dsc->segid)); 920 921 vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1); 922 923 if (seglen >= last) { 924 vm_object_reference(*objp); 925 return (0); 926 } else { 927 return (EINVAL); 928 } 929} 930 931static struct cdevsw devmemsw = { 932 .d_name = "devmem", 933 .d_version = D_VERSION, 934 .d_mmap_single = devmem_mmap_single, 935}; 936 937static int 938devmem_create_cdev(const char *vmname, int segid, char *devname) 939{ 940 struct devmem_softc *dsc; 941 struct vmmdev_softc *sc; 942 struct cdev *cdev; 943 int error; 944 945 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, 946 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname); 947 if (error) 948 return (error); 949 950 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); 951 952 mtx_lock(&vmmdev_mtx); 953 sc = vmmdev_lookup(vmname); 954 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); 955 if (sc->cdev == NULL) { 956 /* virtual machine is being created or destroyed */ 957 mtx_unlock(&vmmdev_mtx); 958 free(dsc, M_VMMDEV); 959 destroy_dev_sched_cb(cdev, NULL, 0); 960 return (ENODEV); 961 } 962 963 dsc->segid = segid; 964 dsc->name = devname; 965 dsc->cdev = cdev; 966 dsc->sc = sc; 967 SLIST_INSERT_HEAD(&sc->devmem, dsc, link); 968 mtx_unlock(&vmmdev_mtx); 969 970 /* The 'cdev' is ready for use after 'si_drv1' is initialized */ 971 cdev->si_drv1 = dsc; 972 return (0); 973} 974 975static void 976devmem_destroy(void *arg) 977{ 978 struct devmem_softc *dsc = arg; 979 980 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); 981 dsc->cdev = NULL; 982 dsc->sc = NULL; 983} 984