vmm_dev.c revision 331722
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/11/sys/amd64/vmm/vmm_dev.c 331722 2018-03-29 02:50:57Z eadler $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/amd64/vmm/vmm_dev.c 331722 2018-03-29 02:50:57Z eadler $");
31
32#include <sys/param.h>
33#include <sys/kernel.h>
34#include <sys/queue.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/malloc.h>
38#include <sys/conf.h>
39#include <sys/sysctl.h>
40#include <sys/libkern.h>
41#include <sys/ioccom.h>
42#include <sys/mman.h>
43#include <sys/uio.h>
44
45#include <vm/vm.h>
46#include <vm/pmap.h>
47#include <vm/vm_map.h>
48#include <vm/vm_object.h>
49
50#include <machine/vmparam.h>
51#include <machine/vmm.h>
52#include <machine/vmm_instruction_emul.h>
53#include <machine/vmm_dev.h>
54
55#include "vmm_lapic.h"
56#include "vmm_stat.h"
57#include "vmm_mem.h"
58#include "io/ppt.h"
59#include "io/vatpic.h"
60#include "io/vioapic.h"
61#include "io/vhpet.h"
62#include "io/vrtc.h"
63
64struct devmem_softc {
65	int	segid;
66	char	*name;
67	struct cdev *cdev;
68	struct vmmdev_softc *sc;
69	SLIST_ENTRY(devmem_softc) link;
70};
71
72struct vmmdev_softc {
73	struct vm	*vm;		/* vm instance cookie */
74	struct cdev	*cdev;
75	SLIST_ENTRY(vmmdev_softc) link;
76	SLIST_HEAD(, devmem_softc) devmem;
77	int		flags;
78};
79#define	VSC_LINKED		0x01
80
81static SLIST_HEAD(, vmmdev_softc) head;
82
83static struct mtx vmmdev_mtx;
84
85static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
86
87SYSCTL_DECL(_hw_vmm);
88
89static int devmem_create_cdev(const char *vmname, int id, char *devmem);
90static void devmem_destroy(void *arg);
91
92static int
93vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
94{
95	int error;
96
97	if (vcpu < 0 || vcpu >= VM_MAXCPU)
98		return (EINVAL);
99
100	error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
101	return (error);
102}
103
104static void
105vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
106{
107	enum vcpu_state state;
108
109	state = vcpu_get_state(sc->vm, vcpu, NULL);
110	if (state != VCPU_FROZEN) {
111		panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
112		    vcpu, state);
113	}
114
115	vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
116}
117
118static int
119vcpu_lock_all(struct vmmdev_softc *sc)
120{
121	int error, vcpu;
122
123	for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
124		error = vcpu_lock_one(sc, vcpu);
125		if (error)
126			break;
127	}
128
129	if (error) {
130		while (--vcpu >= 0)
131			vcpu_unlock_one(sc, vcpu);
132	}
133
134	return (error);
135}
136
137static void
138vcpu_unlock_all(struct vmmdev_softc *sc)
139{
140	int vcpu;
141
142	for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
143		vcpu_unlock_one(sc, vcpu);
144}
145
146static struct vmmdev_softc *
147vmmdev_lookup(const char *name)
148{
149	struct vmmdev_softc *sc;
150
151#ifdef notyet	/* XXX kernel is not compiled with invariants */
152	mtx_assert(&vmmdev_mtx, MA_OWNED);
153#endif
154
155	SLIST_FOREACH(sc, &head, link) {
156		if (strcmp(name, vm_name(sc->vm)) == 0)
157			break;
158	}
159
160	return (sc);
161}
162
163static struct vmmdev_softc *
164vmmdev_lookup2(struct cdev *cdev)
165{
166
167	return (cdev->si_drv1);
168}
169
170static int
171vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
172{
173	int error, off, c, prot;
174	vm_paddr_t gpa;
175	void *hpa, *cookie;
176	struct vmmdev_softc *sc;
177
178	sc = vmmdev_lookup2(cdev);
179	if (sc == NULL)
180		return (ENXIO);
181
182	/*
183	 * Get a read lock on the guest memory map by freezing any vcpu.
184	 */
185	error = vcpu_lock_one(sc, VM_MAXCPU - 1);
186	if (error)
187		return (error);
188
189	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
190	while (uio->uio_resid > 0 && error == 0) {
191		gpa = uio->uio_offset;
192		off = gpa & PAGE_MASK;
193		c = min(uio->uio_resid, PAGE_SIZE - off);
194
195		/*
196		 * The VM has a hole in its physical memory map. If we want to
197		 * use 'dd' to inspect memory beyond the hole we need to
198		 * provide bogus data for memory that lies in the hole.
199		 *
200		 * Since this device does not support lseek(2), dd(1) will
201		 * read(2) blocks of data to simulate the lseek(2).
202		 */
203		hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie);
204		if (hpa == NULL) {
205			if (uio->uio_rw == UIO_READ)
206				error = uiomove(__DECONST(void *, zero_region),
207				    c, uio);
208			else
209				error = EFAULT;
210		} else {
211			error = uiomove(hpa, c, uio);
212			vm_gpa_release(cookie);
213		}
214	}
215	vcpu_unlock_one(sc, VM_MAXCPU - 1);
216	return (error);
217}
218
219CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
220
221static int
222get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
223{
224	struct devmem_softc *dsc;
225	int error;
226	bool sysmem;
227
228	error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
229	if (error || mseg->len == 0)
230		return (error);
231
232	if (!sysmem) {
233		SLIST_FOREACH(dsc, &sc->devmem, link) {
234			if (dsc->segid == mseg->segid)
235				break;
236		}
237		KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
238		    __func__, mseg->segid));
239		error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
240	} else {
241		bzero(mseg->name, sizeof(mseg->name));
242	}
243
244	return (error);
245}
246
247static int
248alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
249{
250	char *name;
251	int error;
252	bool sysmem;
253
254	error = 0;
255	name = NULL;
256	sysmem = true;
257
258	if (VM_MEMSEG_NAME(mseg)) {
259		sysmem = false;
260		name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
261		error = copystr(mseg->name, name, SPECNAMELEN + 1, 0);
262		if (error)
263			goto done;
264	}
265
266	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
267	if (error)
268		goto done;
269
270	if (VM_MEMSEG_NAME(mseg)) {
271		error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
272		if (error)
273			vm_free_memseg(sc->vm, mseg->segid);
274		else
275			name = NULL;	/* freed when 'cdev' is destroyed */
276	}
277done:
278	free(name, M_VMMDEV);
279	return (error);
280}
281
282static int
283vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
284	     struct thread *td)
285{
286	int error, vcpu, state_changed, size;
287	cpuset_t *cpuset;
288	struct vmmdev_softc *sc;
289	struct vm_register *vmreg;
290	struct vm_seg_desc *vmsegdesc;
291	struct vm_run *vmrun;
292	struct vm_exception *vmexc;
293	struct vm_lapic_irq *vmirq;
294	struct vm_lapic_msi *vmmsi;
295	struct vm_ioapic_irq *ioapic_irq;
296	struct vm_isa_irq *isa_irq;
297	struct vm_isa_irq_trigger *isa_irq_trigger;
298	struct vm_capability *vmcap;
299	struct vm_pptdev *pptdev;
300	struct vm_pptdev_mmio *pptmmio;
301	struct vm_pptdev_msi *pptmsi;
302	struct vm_pptdev_msix *pptmsix;
303	struct vm_nmi *vmnmi;
304	struct vm_stats *vmstats;
305	struct vm_stat_desc *statdesc;
306	struct vm_x2apic *x2apic;
307	struct vm_gpa_pte *gpapte;
308	struct vm_suspend *vmsuspend;
309	struct vm_gla2gpa *gg;
310	struct vm_activate_cpu *vac;
311	struct vm_cpuset *vm_cpuset;
312	struct vm_intinfo *vmii;
313	struct vm_rtc_time *rtctime;
314	struct vm_rtc_data *rtcdata;
315	struct vm_memmap *mm;
316
317	sc = vmmdev_lookup2(cdev);
318	if (sc == NULL)
319		return (ENXIO);
320
321	error = 0;
322	vcpu = -1;
323	state_changed = 0;
324
325	/*
326	 * Some VMM ioctls can operate only on vcpus that are not running.
327	 */
328	switch (cmd) {
329	case VM_RUN:
330	case VM_GET_REGISTER:
331	case VM_SET_REGISTER:
332	case VM_GET_SEGMENT_DESCRIPTOR:
333	case VM_SET_SEGMENT_DESCRIPTOR:
334	case VM_INJECT_EXCEPTION:
335	case VM_GET_CAPABILITY:
336	case VM_SET_CAPABILITY:
337	case VM_PPTDEV_MSI:
338	case VM_PPTDEV_MSIX:
339	case VM_SET_X2APIC_STATE:
340	case VM_GLA2GPA:
341	case VM_ACTIVATE_CPU:
342	case VM_SET_INTINFO:
343	case VM_GET_INTINFO:
344	case VM_RESTART_INSTRUCTION:
345		/*
346		 * XXX fragile, handle with care
347		 * Assumes that the first field of the ioctl data is the vcpu.
348		 */
349		vcpu = *(int *)data;
350		error = vcpu_lock_one(sc, vcpu);
351		if (error)
352			goto done;
353		state_changed = 1;
354		break;
355
356	case VM_MAP_PPTDEV_MMIO:
357	case VM_BIND_PPTDEV:
358	case VM_UNBIND_PPTDEV:
359	case VM_ALLOC_MEMSEG:
360	case VM_MMAP_MEMSEG:
361	case VM_REINIT:
362		/*
363		 * ioctls that operate on the entire virtual machine must
364		 * prevent all vcpus from running.
365		 */
366		error = vcpu_lock_all(sc);
367		if (error)
368			goto done;
369		state_changed = 2;
370		break;
371
372	case VM_GET_MEMSEG:
373	case VM_MMAP_GETNEXT:
374		/*
375		 * Lock a vcpu to make sure that the memory map cannot be
376		 * modified while it is being inspected.
377		 */
378		vcpu = VM_MAXCPU - 1;
379		error = vcpu_lock_one(sc, vcpu);
380		if (error)
381			goto done;
382		state_changed = 1;
383		break;
384
385	default:
386		break;
387	}
388
389	switch(cmd) {
390	case VM_RUN:
391		vmrun = (struct vm_run *)data;
392		error = vm_run(sc->vm, vmrun);
393		break;
394	case VM_SUSPEND:
395		vmsuspend = (struct vm_suspend *)data;
396		error = vm_suspend(sc->vm, vmsuspend->how);
397		break;
398	case VM_REINIT:
399		error = vm_reinit(sc->vm);
400		break;
401	case VM_STAT_DESC: {
402		statdesc = (struct vm_stat_desc *)data;
403		error = vmm_stat_desc_copy(statdesc->index,
404					statdesc->desc, sizeof(statdesc->desc));
405		break;
406	}
407	case VM_STATS: {
408		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
409		vmstats = (struct vm_stats *)data;
410		getmicrotime(&vmstats->tv);
411		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
412				      &vmstats->num_entries, vmstats->statbuf);
413		break;
414	}
415	case VM_PPTDEV_MSI:
416		pptmsi = (struct vm_pptdev_msi *)data;
417		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
418				      pptmsi->bus, pptmsi->slot, pptmsi->func,
419				      pptmsi->addr, pptmsi->msg,
420				      pptmsi->numvec);
421		break;
422	case VM_PPTDEV_MSIX:
423		pptmsix = (struct vm_pptdev_msix *)data;
424		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
425				       pptmsix->bus, pptmsix->slot,
426				       pptmsix->func, pptmsix->idx,
427				       pptmsix->addr, pptmsix->msg,
428				       pptmsix->vector_control);
429		break;
430	case VM_MAP_PPTDEV_MMIO:
431		pptmmio = (struct vm_pptdev_mmio *)data;
432		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
433				     pptmmio->func, pptmmio->gpa, pptmmio->len,
434				     pptmmio->hpa);
435		break;
436	case VM_BIND_PPTDEV:
437		pptdev = (struct vm_pptdev *)data;
438		error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
439					 pptdev->func);
440		break;
441	case VM_UNBIND_PPTDEV:
442		pptdev = (struct vm_pptdev *)data;
443		error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
444					   pptdev->func);
445		break;
446	case VM_INJECT_EXCEPTION:
447		vmexc = (struct vm_exception *)data;
448		error = vm_inject_exception(sc->vm, vmexc->cpuid,
449		    vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
450		    vmexc->restart_instruction);
451		break;
452	case VM_INJECT_NMI:
453		vmnmi = (struct vm_nmi *)data;
454		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
455		break;
456	case VM_LAPIC_IRQ:
457		vmirq = (struct vm_lapic_irq *)data;
458		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
459		break;
460	case VM_LAPIC_LOCAL_IRQ:
461		vmirq = (struct vm_lapic_irq *)data;
462		error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
463		    vmirq->vector);
464		break;
465	case VM_LAPIC_MSI:
466		vmmsi = (struct vm_lapic_msi *)data;
467		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
468		break;
469	case VM_IOAPIC_ASSERT_IRQ:
470		ioapic_irq = (struct vm_ioapic_irq *)data;
471		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
472		break;
473	case VM_IOAPIC_DEASSERT_IRQ:
474		ioapic_irq = (struct vm_ioapic_irq *)data;
475		error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
476		break;
477	case VM_IOAPIC_PULSE_IRQ:
478		ioapic_irq = (struct vm_ioapic_irq *)data;
479		error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
480		break;
481	case VM_IOAPIC_PINCOUNT:
482		*(int *)data = vioapic_pincount(sc->vm);
483		break;
484	case VM_ISA_ASSERT_IRQ:
485		isa_irq = (struct vm_isa_irq *)data;
486		error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
487		if (error == 0 && isa_irq->ioapic_irq != -1)
488			error = vioapic_assert_irq(sc->vm,
489			    isa_irq->ioapic_irq);
490		break;
491	case VM_ISA_DEASSERT_IRQ:
492		isa_irq = (struct vm_isa_irq *)data;
493		error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
494		if (error == 0 && isa_irq->ioapic_irq != -1)
495			error = vioapic_deassert_irq(sc->vm,
496			    isa_irq->ioapic_irq);
497		break;
498	case VM_ISA_PULSE_IRQ:
499		isa_irq = (struct vm_isa_irq *)data;
500		error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
501		if (error == 0 && isa_irq->ioapic_irq != -1)
502			error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
503		break;
504	case VM_ISA_SET_IRQ_TRIGGER:
505		isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
506		error = vatpic_set_irq_trigger(sc->vm,
507		    isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
508		break;
509	case VM_MMAP_GETNEXT:
510		mm = (struct vm_memmap *)data;
511		error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
512		    &mm->segoff, &mm->len, &mm->prot, &mm->flags);
513		break;
514	case VM_MMAP_MEMSEG:
515		mm = (struct vm_memmap *)data;
516		error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
517		    mm->len, mm->prot, mm->flags);
518		break;
519	case VM_ALLOC_MEMSEG:
520		error = alloc_memseg(sc, (struct vm_memseg *)data);
521		break;
522	case VM_GET_MEMSEG:
523		error = get_memseg(sc, (struct vm_memseg *)data);
524		break;
525	case VM_GET_REGISTER:
526		vmreg = (struct vm_register *)data;
527		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
528					&vmreg->regval);
529		break;
530	case VM_SET_REGISTER:
531		vmreg = (struct vm_register *)data;
532		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
533					vmreg->regval);
534		break;
535	case VM_SET_SEGMENT_DESCRIPTOR:
536		vmsegdesc = (struct vm_seg_desc *)data;
537		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
538					vmsegdesc->regnum,
539					&vmsegdesc->desc);
540		break;
541	case VM_GET_SEGMENT_DESCRIPTOR:
542		vmsegdesc = (struct vm_seg_desc *)data;
543		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
544					vmsegdesc->regnum,
545					&vmsegdesc->desc);
546		break;
547	case VM_GET_CAPABILITY:
548		vmcap = (struct vm_capability *)data;
549		error = vm_get_capability(sc->vm, vmcap->cpuid,
550					  vmcap->captype,
551					  &vmcap->capval);
552		break;
553	case VM_SET_CAPABILITY:
554		vmcap = (struct vm_capability *)data;
555		error = vm_set_capability(sc->vm, vmcap->cpuid,
556					  vmcap->captype,
557					  vmcap->capval);
558		break;
559	case VM_SET_X2APIC_STATE:
560		x2apic = (struct vm_x2apic *)data;
561		error = vm_set_x2apic_state(sc->vm,
562					    x2apic->cpuid, x2apic->state);
563		break;
564	case VM_GET_X2APIC_STATE:
565		x2apic = (struct vm_x2apic *)data;
566		error = vm_get_x2apic_state(sc->vm,
567					    x2apic->cpuid, &x2apic->state);
568		break;
569	case VM_GET_GPA_PMAP:
570		gpapte = (struct vm_gpa_pte *)data;
571		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
572				 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
573		error = 0;
574		break;
575	case VM_GET_HPET_CAPABILITIES:
576		error = vhpet_getcap((struct vm_hpet_cap *)data);
577		break;
578	case VM_GLA2GPA: {
579		CTASSERT(PROT_READ == VM_PROT_READ);
580		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
581		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
582		gg = (struct vm_gla2gpa *)data;
583		error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
584		    gg->prot, &gg->gpa, &gg->fault);
585		KASSERT(error == 0 || error == EFAULT,
586		    ("%s: vm_gla2gpa unknown error %d", __func__, error));
587		break;
588	}
589	case VM_ACTIVATE_CPU:
590		vac = (struct vm_activate_cpu *)data;
591		error = vm_activate_cpu(sc->vm, vac->vcpuid);
592		break;
593	case VM_GET_CPUS:
594		error = 0;
595		vm_cpuset = (struct vm_cpuset *)data;
596		size = vm_cpuset->cpusetsize;
597		if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
598			error = ERANGE;
599			break;
600		}
601		cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
602		if (vm_cpuset->which == VM_ACTIVE_CPUS)
603			*cpuset = vm_active_cpus(sc->vm);
604		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
605			*cpuset = vm_suspended_cpus(sc->vm);
606		else
607			error = EINVAL;
608		if (error == 0)
609			error = copyout(cpuset, vm_cpuset->cpus, size);
610		free(cpuset, M_TEMP);
611		break;
612	case VM_SET_INTINFO:
613		vmii = (struct vm_intinfo *)data;
614		error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
615		break;
616	case VM_GET_INTINFO:
617		vmii = (struct vm_intinfo *)data;
618		error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
619		    &vmii->info2);
620		break;
621	case VM_RTC_WRITE:
622		rtcdata = (struct vm_rtc_data *)data;
623		error = vrtc_nvram_write(sc->vm, rtcdata->offset,
624		    rtcdata->value);
625		break;
626	case VM_RTC_READ:
627		rtcdata = (struct vm_rtc_data *)data;
628		error = vrtc_nvram_read(sc->vm, rtcdata->offset,
629		    &rtcdata->value);
630		break;
631	case VM_RTC_SETTIME:
632		rtctime = (struct vm_rtc_time *)data;
633		error = vrtc_set_time(sc->vm, rtctime->secs);
634		break;
635	case VM_RTC_GETTIME:
636		error = 0;
637		rtctime = (struct vm_rtc_time *)data;
638		rtctime->secs = vrtc_get_time(sc->vm);
639		break;
640	case VM_RESTART_INSTRUCTION:
641		error = vm_restart_instruction(sc->vm, vcpu);
642		break;
643	default:
644		error = ENOTTY;
645		break;
646	}
647
648	if (state_changed == 1)
649		vcpu_unlock_one(sc, vcpu);
650	else if (state_changed == 2)
651		vcpu_unlock_all(sc);
652
653done:
654	/* Make sure that no handler returns a bogus value like ERESTART */
655	KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
656	return (error);
657}
658
659static int
660vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
661    struct vm_object **objp, int nprot)
662{
663	struct vmmdev_softc *sc;
664	vm_paddr_t gpa;
665	size_t len;
666	vm_ooffset_t segoff, first, last;
667	int error, found, segid;
668	bool sysmem;
669
670	first = *offset;
671	last = first + mapsize;
672	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
673		return (EINVAL);
674
675	sc = vmmdev_lookup2(cdev);
676	if (sc == NULL) {
677		/* virtual machine is in the process of being created */
678		return (EINVAL);
679	}
680
681	/*
682	 * Get a read lock on the guest memory map by freezing any vcpu.
683	 */
684	error = vcpu_lock_one(sc, VM_MAXCPU - 1);
685	if (error)
686		return (error);
687
688	gpa = 0;
689	found = 0;
690	while (!found) {
691		error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
692		    NULL, NULL);
693		if (error)
694			break;
695
696		if (first >= gpa && last <= gpa + len)
697			found = 1;
698		else
699			gpa += len;
700	}
701
702	if (found) {
703		error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
704		KASSERT(error == 0 && *objp != NULL,
705		    ("%s: invalid memory segment %d", __func__, segid));
706		if (sysmem) {
707			vm_object_reference(*objp);
708			*offset = segoff + (first - gpa);
709		} else {
710			error = EINVAL;
711		}
712	}
713	vcpu_unlock_one(sc, VM_MAXCPU - 1);
714	return (error);
715}
716
717static void
718vmmdev_destroy(void *arg)
719{
720	struct vmmdev_softc *sc = arg;
721	struct devmem_softc *dsc;
722	int error;
723
724	error = vcpu_lock_all(sc);
725	KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
726
727	while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
728		KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
729		SLIST_REMOVE_HEAD(&sc->devmem, link);
730		free(dsc->name, M_VMMDEV);
731		free(dsc, M_VMMDEV);
732	}
733
734	if (sc->cdev != NULL)
735		destroy_dev(sc->cdev);
736
737	if (sc->vm != NULL)
738		vm_destroy(sc->vm);
739
740	if ((sc->flags & VSC_LINKED) != 0) {
741		mtx_lock(&vmmdev_mtx);
742		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
743		mtx_unlock(&vmmdev_mtx);
744	}
745
746	free(sc, M_VMMDEV);
747}
748
749static int
750sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
751{
752	int error;
753	char buf[VM_MAX_NAMELEN];
754	struct devmem_softc *dsc;
755	struct vmmdev_softc *sc;
756	struct cdev *cdev;
757
758	strlcpy(buf, "beavis", sizeof(buf));
759	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
760	if (error != 0 || req->newptr == NULL)
761		return (error);
762
763	mtx_lock(&vmmdev_mtx);
764	sc = vmmdev_lookup(buf);
765	if (sc == NULL || sc->cdev == NULL) {
766		mtx_unlock(&vmmdev_mtx);
767		return (EINVAL);
768	}
769
770	/*
771	 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
772	 * goes down to 0 so we should not do it again in the callback.
773	 *
774	 * Setting 'sc->cdev' to NULL is also used to indicate that the VM
775	 * is scheduled for destruction.
776	 */
777	cdev = sc->cdev;
778	sc->cdev = NULL;
779	mtx_unlock(&vmmdev_mtx);
780
781	/*
782	 * Schedule all cdevs to be destroyed:
783	 *
784	 * - any new operations on the 'cdev' will return an error (ENXIO).
785	 *
786	 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
787	 *   be destroyed and the callback will be invoked in a taskqueue
788	 *   context.
789	 *
790	 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
791	 */
792	SLIST_FOREACH(dsc, &sc->devmem, link) {
793		KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
794		destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
795	}
796	destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
797	return (0);
798}
799SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
800	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
801
802static struct cdevsw vmmdevsw = {
803	.d_name		= "vmmdev",
804	.d_version	= D_VERSION,
805	.d_ioctl	= vmmdev_ioctl,
806	.d_mmap_single	= vmmdev_mmap_single,
807	.d_read		= vmmdev_rw,
808	.d_write	= vmmdev_rw,
809};
810
811static int
812sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
813{
814	int error;
815	struct vm *vm;
816	struct cdev *cdev;
817	struct vmmdev_softc *sc, *sc2;
818	char buf[VM_MAX_NAMELEN];
819
820	strlcpy(buf, "beavis", sizeof(buf));
821	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
822	if (error != 0 || req->newptr == NULL)
823		return (error);
824
825	mtx_lock(&vmmdev_mtx);
826	sc = vmmdev_lookup(buf);
827	mtx_unlock(&vmmdev_mtx);
828	if (sc != NULL)
829		return (EEXIST);
830
831	error = vm_create(buf, &vm);
832	if (error != 0)
833		return (error);
834
835	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
836	sc->vm = vm;
837	SLIST_INIT(&sc->devmem);
838
839	/*
840	 * Lookup the name again just in case somebody sneaked in when we
841	 * dropped the lock.
842	 */
843	mtx_lock(&vmmdev_mtx);
844	sc2 = vmmdev_lookup(buf);
845	if (sc2 == NULL) {
846		SLIST_INSERT_HEAD(&head, sc, link);
847		sc->flags |= VSC_LINKED;
848	}
849	mtx_unlock(&vmmdev_mtx);
850
851	if (sc2 != NULL) {
852		vmmdev_destroy(sc);
853		return (EEXIST);
854	}
855
856	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
857			   UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
858	if (error != 0) {
859		vmmdev_destroy(sc);
860		return (error);
861	}
862
863	mtx_lock(&vmmdev_mtx);
864	sc->cdev = cdev;
865	sc->cdev->si_drv1 = sc;
866	mtx_unlock(&vmmdev_mtx);
867
868	return (0);
869}
870SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
871	    NULL, 0, sysctl_vmm_create, "A", NULL);
872
873void
874vmmdev_init(void)
875{
876	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
877}
878
879int
880vmmdev_cleanup(void)
881{
882	int error;
883
884	if (SLIST_EMPTY(&head))
885		error = 0;
886	else
887		error = EBUSY;
888
889	return (error);
890}
891
892static int
893devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
894    struct vm_object **objp, int nprot)
895{
896	struct devmem_softc *dsc;
897	vm_ooffset_t first, last;
898	size_t seglen;
899	int error;
900	bool sysmem;
901
902	dsc = cdev->si_drv1;
903	if (dsc == NULL) {
904		/* 'cdev' has been created but is not ready for use */
905		return (ENXIO);
906	}
907
908	first = *offset;
909	last = *offset + len;
910	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
911		return (EINVAL);
912
913	error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1);
914	if (error)
915		return (error);
916
917	error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
918	KASSERT(error == 0 && !sysmem && *objp != NULL,
919	    ("%s: invalid devmem segment %d", __func__, dsc->segid));
920
921	vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1);
922
923	if (seglen >= last) {
924		vm_object_reference(*objp);
925		return (0);
926	} else {
927		return (EINVAL);
928	}
929}
930
931static struct cdevsw devmemsw = {
932	.d_name		= "devmem",
933	.d_version	= D_VERSION,
934	.d_mmap_single	= devmem_mmap_single,
935};
936
937static int
938devmem_create_cdev(const char *vmname, int segid, char *devname)
939{
940	struct devmem_softc *dsc;
941	struct vmmdev_softc *sc;
942	struct cdev *cdev;
943	int error;
944
945	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
946	    UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
947	if (error)
948		return (error);
949
950	dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
951
952	mtx_lock(&vmmdev_mtx);
953	sc = vmmdev_lookup(vmname);
954	KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
955	if (sc->cdev == NULL) {
956		/* virtual machine is being created or destroyed */
957		mtx_unlock(&vmmdev_mtx);
958		free(dsc, M_VMMDEV);
959		destroy_dev_sched_cb(cdev, NULL, 0);
960		return (ENODEV);
961	}
962
963	dsc->segid = segid;
964	dsc->name = devname;
965	dsc->cdev = cdev;
966	dsc->sc = sc;
967	SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
968	mtx_unlock(&vmmdev_mtx);
969
970	/* The 'cdev' is ready for use after 'si_drv1' is initialized */
971	cdev->si_drv1 = dsc;
972	return (0);
973}
974
975static void
976devmem_destroy(void *arg)
977{
978	struct devmem_softc *dsc = arg;
979
980	KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
981	dsc->cdev = NULL;
982	dsc->sc = NULL;
983}
984