vmm_dev.c revision 270071
1158115Sume/*-
2158115Sume * Copyright (c) 2011 NetApp, Inc.
3158115Sume * All rights reserved.
4158115Sume *
5158115Sume * Redistribution and use in source and binary forms, with or without
6158115Sume * modification, are permitted provided that the following conditions
7158115Sume * are met:
8158115Sume * 1. Redistributions of source code must retain the above copyright
9158115Sume *    notice, this list of conditions and the following disclaimer.
10158115Sume * 2. Redistributions in binary form must reproduce the above copyright
11158115Sume *    notice, this list of conditions and the following disclaimer in the
12158115Sume *    documentation and/or other materials provided with the distribution.
13158115Sume *
14158115Sume * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15158115Sume * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16158115Sume * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17158115Sume * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18158115Sume * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19158115Sume * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20158115Sume * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21158115Sume * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22158115Sume * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23158115Sume * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24158115Sume * SUCH DAMAGE.
25158115Sume *
26158115Sume * $FreeBSD: stable/10/sys/amd64/vmm/vmm_dev.c 270071 2014-08-17 01:00:42Z grehan $
27158115Sume */
28158115Sume
29158115Sume#include <sys/cdefs.h>
30158115Sume__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm_dev.c 270071 2014-08-17 01:00:42Z grehan $");
31158115Sume
32194093Sdes#include <sys/param.h>
33158115Sume#include <sys/kernel.h>
34194093Sdes#include <sys/queue.h>
35158115Sume#include <sys/lock.h>
36158115Sume#include <sys/mutex.h>
37158115Sume#include <sys/malloc.h>
38158115Sume#include <sys/conf.h>
39158115Sume#include <sys/sysctl.h>
40158115Sume#include <sys/libkern.h>
41158115Sume#include <sys/ioccom.h>
42158115Sume#include <sys/mman.h>
43158115Sume#include <sys/uio.h>
44158115Sume
45158115Sume#include <vm/vm.h>
46158115Sume#include <vm/pmap.h>
47158115Sume#include <vm/vm_map.h>
48158115Sume
49158115Sume#include <machine/vmparam.h>
50158115Sume#include <machine/vmm.h>
51158115Sume#include <machine/vmm_instruction_emul.h>
52158115Sume#include <machine/vmm_dev.h>
53158115Sume
54158115Sume#include "vmm_lapic.h"
55158115Sume#include "vmm_stat.h"
56158115Sume#include "vmm_mem.h"
57158115Sume#include "io/ppt.h"
58158115Sume#include "io/vatpic.h"
59194087Sdes#include "io/vioapic.h"
60158115Sume#include "io/vhpet.h"
61158115Sume
62158115Sumestruct vmmdev_softc {
63158115Sume	struct vm	*vm;		/* vm instance cookie */
64194104Sdes	struct cdev	*cdev;
65158115Sume	SLIST_ENTRY(vmmdev_softc) link;
66158115Sume	int		flags;
67158115Sume};
68158115Sume#define	VSC_LINKED		0x01
69158115Sume
70158115Sumestatic SLIST_HEAD(, vmmdev_softc) head;
71158115Sume
72158115Sumestatic struct mtx vmmdev_mtx;
73158115Sume
74158115Sumestatic MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
75158115Sume
76158115SumeSYSCTL_DECL(_hw_vmm);
77158115Sume
78158115Sumestatic struct vmmdev_softc *
79158115Sumevmmdev_lookup(const char *name)
80158115Sume{
81194104Sdes	struct vmmdev_softc *sc;
82158115Sume
83158115Sume#ifdef notyet	/* XXX kernel is not compiled with invariants */
84158115Sume	mtx_assert(&vmmdev_mtx, MA_OWNED);
85158115Sume#endif
86158115Sume
87158115Sume	SLIST_FOREACH(sc, &head, link) {
88158115Sume		if (strcmp(name, vm_name(sc->vm)) == 0)
89158115Sume			break;
90158115Sume	}
91158115Sume
92158115Sume	return (sc);
93158115Sume}
94158115Sume
95158115Sumestatic struct vmmdev_softc *
96158115Sumevmmdev_lookup2(struct cdev *cdev)
97158115Sume{
98158115Sume
99158115Sume	return (cdev->si_drv1);
100158115Sume}
101158115Sume
102158115Sumestatic int
103158115Sumevmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
104158115Sume{
105158115Sume	int error, off, c, prot;
106158115Sume	vm_paddr_t gpa;
107158115Sume	void *hpa, *cookie;
108158115Sume	struct vmmdev_softc *sc;
109158115Sume
110158115Sume	static char zerobuf[PAGE_SIZE];
111158115Sume
112158115Sume	error = 0;
113158115Sume	sc = vmmdev_lookup2(cdev);
114158115Sume	if (sc == NULL)
115158115Sume		error = ENXIO;
116158115Sume
117158115Sume	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
118158115Sume	while (uio->uio_resid > 0 && error == 0) {
119158115Sume		gpa = uio->uio_offset;
120158115Sume		off = gpa & PAGE_MASK;
121158115Sume		c = min(uio->uio_resid, PAGE_SIZE - off);
122158115Sume
123158115Sume		/*
124158115Sume		 * The VM has a hole in its physical memory map. If we want to
125158115Sume		 * use 'dd' to inspect memory beyond the hole we need to
126158115Sume		 * provide bogus data for memory that lies in the hole.
127158115Sume		 *
128		 * Since this device does not support lseek(2), dd(1) will
129		 * read(2) blocks of data to simulate the lseek(2).
130		 */
131		hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
132		if (hpa == NULL) {
133			if (uio->uio_rw == UIO_READ)
134				error = uiomove(zerobuf, c, uio);
135			else
136				error = EFAULT;
137		} else {
138			error = uiomove(hpa, c, uio);
139			vm_gpa_release(cookie);
140		}
141	}
142	return (error);
143}
144
145static int
146vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
147	     struct thread *td)
148{
149	int error, vcpu, state_changed, size;
150	cpuset_t *cpuset;
151	struct vmmdev_softc *sc;
152	struct vm_memory_segment *seg;
153	struct vm_register *vmreg;
154	struct vm_seg_desc *vmsegdesc;
155	struct vm_run *vmrun;
156	struct vm_exception *vmexc;
157	struct vm_lapic_irq *vmirq;
158	struct vm_lapic_msi *vmmsi;
159	struct vm_ioapic_irq *ioapic_irq;
160	struct vm_isa_irq *isa_irq;
161	struct vm_isa_irq_trigger *isa_irq_trigger;
162	struct vm_capability *vmcap;
163	struct vm_pptdev *pptdev;
164	struct vm_pptdev_mmio *pptmmio;
165	struct vm_pptdev_msi *pptmsi;
166	struct vm_pptdev_msix *pptmsix;
167	struct vm_nmi *vmnmi;
168	struct vm_stats *vmstats;
169	struct vm_stat_desc *statdesc;
170	struct vm_x2apic *x2apic;
171	struct vm_gpa_pte *gpapte;
172	struct vm_suspend *vmsuspend;
173	struct vm_gla2gpa *gg;
174	struct vm_activate_cpu *vac;
175	struct vm_cpuset *vm_cpuset;
176
177	sc = vmmdev_lookup2(cdev);
178	if (sc == NULL)
179		return (ENXIO);
180
181	error = 0;
182	vcpu = -1;
183	state_changed = 0;
184
185	/*
186	 * Some VMM ioctls can operate only on vcpus that are not running.
187	 */
188	switch (cmd) {
189	case VM_RUN:
190	case VM_GET_REGISTER:
191	case VM_SET_REGISTER:
192	case VM_GET_SEGMENT_DESCRIPTOR:
193	case VM_SET_SEGMENT_DESCRIPTOR:
194	case VM_INJECT_EXCEPTION:
195	case VM_GET_CAPABILITY:
196	case VM_SET_CAPABILITY:
197	case VM_PPTDEV_MSI:
198	case VM_PPTDEV_MSIX:
199	case VM_SET_X2APIC_STATE:
200	case VM_GLA2GPA:
201	case VM_ACTIVATE_CPU:
202		/*
203		 * XXX fragile, handle with care
204		 * Assumes that the first field of the ioctl data is the vcpu.
205		 */
206		vcpu = *(int *)data;
207		if (vcpu < 0 || vcpu >= VM_MAXCPU) {
208			error = EINVAL;
209			goto done;
210		}
211
212		error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
213		if (error)
214			goto done;
215
216		state_changed = 1;
217		break;
218
219	case VM_MAP_PPTDEV_MMIO:
220	case VM_BIND_PPTDEV:
221	case VM_UNBIND_PPTDEV:
222	case VM_MAP_MEMORY:
223	case VM_REINIT:
224		/*
225		 * ioctls that operate on the entire virtual machine must
226		 * prevent all vcpus from running.
227		 */
228		error = 0;
229		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
230			error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
231			if (error)
232				break;
233		}
234
235		if (error) {
236			while (--vcpu >= 0)
237				vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
238			goto done;
239		}
240
241		state_changed = 2;
242		break;
243
244	default:
245		break;
246	}
247
248	switch(cmd) {
249	case VM_RUN:
250		vmrun = (struct vm_run *)data;
251		error = vm_run(sc->vm, vmrun);
252		break;
253	case VM_SUSPEND:
254		vmsuspend = (struct vm_suspend *)data;
255		error = vm_suspend(sc->vm, vmsuspend->how);
256		break;
257	case VM_REINIT:
258		error = vm_reinit(sc->vm);
259		break;
260	case VM_STAT_DESC: {
261		statdesc = (struct vm_stat_desc *)data;
262		error = vmm_stat_desc_copy(statdesc->index,
263					statdesc->desc, sizeof(statdesc->desc));
264		break;
265	}
266	case VM_STATS: {
267		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
268		vmstats = (struct vm_stats *)data;
269		getmicrotime(&vmstats->tv);
270		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
271				      &vmstats->num_entries, vmstats->statbuf);
272		break;
273	}
274	case VM_PPTDEV_MSI:
275		pptmsi = (struct vm_pptdev_msi *)data;
276		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
277				      pptmsi->bus, pptmsi->slot, pptmsi->func,
278				      pptmsi->addr, pptmsi->msg,
279				      pptmsi->numvec);
280		break;
281	case VM_PPTDEV_MSIX:
282		pptmsix = (struct vm_pptdev_msix *)data;
283		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
284				       pptmsix->bus, pptmsix->slot,
285				       pptmsix->func, pptmsix->idx,
286				       pptmsix->addr, pptmsix->msg,
287				       pptmsix->vector_control);
288		break;
289	case VM_MAP_PPTDEV_MMIO:
290		pptmmio = (struct vm_pptdev_mmio *)data;
291		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
292				     pptmmio->func, pptmmio->gpa, pptmmio->len,
293				     pptmmio->hpa);
294		break;
295	case VM_BIND_PPTDEV:
296		pptdev = (struct vm_pptdev *)data;
297		error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
298					 pptdev->func);
299		break;
300	case VM_UNBIND_PPTDEV:
301		pptdev = (struct vm_pptdev *)data;
302		error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
303					   pptdev->func);
304		break;
305	case VM_INJECT_EXCEPTION:
306		vmexc = (struct vm_exception *)data;
307		error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc);
308		break;
309	case VM_INJECT_NMI:
310		vmnmi = (struct vm_nmi *)data;
311		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
312		break;
313	case VM_LAPIC_IRQ:
314		vmirq = (struct vm_lapic_irq *)data;
315		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
316		break;
317	case VM_LAPIC_LOCAL_IRQ:
318		vmirq = (struct vm_lapic_irq *)data;
319		error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
320		    vmirq->vector);
321		break;
322	case VM_LAPIC_MSI:
323		vmmsi = (struct vm_lapic_msi *)data;
324		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
325		break;
326	case VM_IOAPIC_ASSERT_IRQ:
327		ioapic_irq = (struct vm_ioapic_irq *)data;
328		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
329		break;
330	case VM_IOAPIC_DEASSERT_IRQ:
331		ioapic_irq = (struct vm_ioapic_irq *)data;
332		error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
333		break;
334	case VM_IOAPIC_PULSE_IRQ:
335		ioapic_irq = (struct vm_ioapic_irq *)data;
336		error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
337		break;
338	case VM_IOAPIC_PINCOUNT:
339		*(int *)data = vioapic_pincount(sc->vm);
340		break;
341	case VM_ISA_ASSERT_IRQ:
342		isa_irq = (struct vm_isa_irq *)data;
343		error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
344		if (error == 0 && isa_irq->ioapic_irq != -1)
345			error = vioapic_assert_irq(sc->vm,
346			    isa_irq->ioapic_irq);
347		break;
348	case VM_ISA_DEASSERT_IRQ:
349		isa_irq = (struct vm_isa_irq *)data;
350		error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
351		if (error == 0 && isa_irq->ioapic_irq != -1)
352			error = vioapic_deassert_irq(sc->vm,
353			    isa_irq->ioapic_irq);
354		break;
355	case VM_ISA_PULSE_IRQ:
356		isa_irq = (struct vm_isa_irq *)data;
357		error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
358		if (error == 0 && isa_irq->ioapic_irq != -1)
359			error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
360		break;
361	case VM_ISA_SET_IRQ_TRIGGER:
362		isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
363		error = vatpic_set_irq_trigger(sc->vm,
364		    isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
365		break;
366	case VM_MAP_MEMORY:
367		seg = (struct vm_memory_segment *)data;
368		error = vm_malloc(sc->vm, seg->gpa, seg->len);
369		break;
370	case VM_GET_MEMORY_SEG:
371		seg = (struct vm_memory_segment *)data;
372		seg->len = 0;
373		(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
374		error = 0;
375		break;
376	case VM_GET_REGISTER:
377		vmreg = (struct vm_register *)data;
378		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
379					&vmreg->regval);
380		break;
381	case VM_SET_REGISTER:
382		vmreg = (struct vm_register *)data;
383		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
384					vmreg->regval);
385		break;
386	case VM_SET_SEGMENT_DESCRIPTOR:
387		vmsegdesc = (struct vm_seg_desc *)data;
388		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
389					vmsegdesc->regnum,
390					&vmsegdesc->desc);
391		break;
392	case VM_GET_SEGMENT_DESCRIPTOR:
393		vmsegdesc = (struct vm_seg_desc *)data;
394		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
395					vmsegdesc->regnum,
396					&vmsegdesc->desc);
397		break;
398	case VM_GET_CAPABILITY:
399		vmcap = (struct vm_capability *)data;
400		error = vm_get_capability(sc->vm, vmcap->cpuid,
401					  vmcap->captype,
402					  &vmcap->capval);
403		break;
404	case VM_SET_CAPABILITY:
405		vmcap = (struct vm_capability *)data;
406		error = vm_set_capability(sc->vm, vmcap->cpuid,
407					  vmcap->captype,
408					  vmcap->capval);
409		break;
410	case VM_SET_X2APIC_STATE:
411		x2apic = (struct vm_x2apic *)data;
412		error = vm_set_x2apic_state(sc->vm,
413					    x2apic->cpuid, x2apic->state);
414		break;
415	case VM_GET_X2APIC_STATE:
416		x2apic = (struct vm_x2apic *)data;
417		error = vm_get_x2apic_state(sc->vm,
418					    x2apic->cpuid, &x2apic->state);
419		break;
420	case VM_GET_GPA_PMAP:
421		gpapte = (struct vm_gpa_pte *)data;
422		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
423				 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
424		error = 0;
425		break;
426	case VM_GET_HPET_CAPABILITIES:
427		error = vhpet_getcap((struct vm_hpet_cap *)data);
428		break;
429	case VM_GLA2GPA: {
430		CTASSERT(PROT_READ == VM_PROT_READ);
431		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
432		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
433		gg = (struct vm_gla2gpa *)data;
434		error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
435		    gg->prot, &gg->gpa);
436		KASSERT(error == 0 || error == 1 || error == -1,
437		    ("%s: vmm_gla2gpa unknown error %d", __func__, error));
438		if (error >= 0) {
439			/*
440			 * error = 0: the translation was successful
441			 * error = 1: a fault was injected into the guest
442			 */
443			gg->fault = error;
444			error = 0;
445		} else {
446			error = EFAULT;
447		}
448		break;
449	}
450	case VM_ACTIVATE_CPU:
451		vac = (struct vm_activate_cpu *)data;
452		error = vm_activate_cpu(sc->vm, vac->vcpuid);
453		break;
454	case VM_GET_CPUS:
455		error = 0;
456		vm_cpuset = (struct vm_cpuset *)data;
457		size = vm_cpuset->cpusetsize;
458		if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
459			error = ERANGE;
460			break;
461		}
462		cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
463		if (vm_cpuset->which == VM_ACTIVE_CPUS)
464			*cpuset = vm_active_cpus(sc->vm);
465		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
466			*cpuset = vm_suspended_cpus(sc->vm);
467		else
468			error = EINVAL;
469		if (error == 0)
470			error = copyout(cpuset, vm_cpuset->cpus, size);
471		free(cpuset, M_TEMP);
472		break;
473	default:
474		error = ENOTTY;
475		break;
476	}
477
478	if (state_changed == 1) {
479		vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
480	} else if (state_changed == 2) {
481		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
482			vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
483	}
484
485done:
486	/* Make sure that no handler returns a bogus value like ERESTART */
487	KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
488	return (error);
489}
490
491static int
492vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
493		   vm_size_t size, struct vm_object **object, int nprot)
494{
495	int error;
496	struct vmmdev_softc *sc;
497
498	sc = vmmdev_lookup2(cdev);
499	if (sc != NULL && (nprot & PROT_EXEC) == 0)
500		error = vm_get_memobj(sc->vm, *offset, size, offset, object);
501	else
502		error = EINVAL;
503
504	return (error);
505}
506
507static void
508vmmdev_destroy(void *arg)
509{
510
511	struct vmmdev_softc *sc = arg;
512
513	if (sc->cdev != NULL)
514		destroy_dev(sc->cdev);
515
516	if (sc->vm != NULL)
517		vm_destroy(sc->vm);
518
519	if ((sc->flags & VSC_LINKED) != 0) {
520		mtx_lock(&vmmdev_mtx);
521		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
522		mtx_unlock(&vmmdev_mtx);
523	}
524
525	free(sc, M_VMMDEV);
526}
527
528static int
529sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
530{
531	int error;
532	char buf[VM_MAX_NAMELEN];
533	struct vmmdev_softc *sc;
534	struct cdev *cdev;
535
536	strlcpy(buf, "beavis", sizeof(buf));
537	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
538	if (error != 0 || req->newptr == NULL)
539		return (error);
540
541	mtx_lock(&vmmdev_mtx);
542	sc = vmmdev_lookup(buf);
543	if (sc == NULL || sc->cdev == NULL) {
544		mtx_unlock(&vmmdev_mtx);
545		return (EINVAL);
546	}
547
548	/*
549	 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
550	 * goes down to 0 so we should not do it again in the callback.
551	 */
552	cdev = sc->cdev;
553	sc->cdev = NULL;
554	mtx_unlock(&vmmdev_mtx);
555
556	/*
557	 * Schedule the 'cdev' to be destroyed:
558	 *
559	 * - any new operations on this 'cdev' will return an error (ENXIO).
560	 *
561	 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
562	 *   be destroyed and the callback will be invoked in a taskqueue
563	 *   context.
564	 */
565	destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
566
567	return (0);
568}
569SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
570	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
571
572static struct cdevsw vmmdevsw = {
573	.d_name		= "vmmdev",
574	.d_version	= D_VERSION,
575	.d_ioctl	= vmmdev_ioctl,
576	.d_mmap_single	= vmmdev_mmap_single,
577	.d_read		= vmmdev_rw,
578	.d_write	= vmmdev_rw,
579};
580
581static int
582sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
583{
584	int error;
585	struct vm *vm;
586	struct cdev *cdev;
587	struct vmmdev_softc *sc, *sc2;
588	char buf[VM_MAX_NAMELEN];
589
590	strlcpy(buf, "beavis", sizeof(buf));
591	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
592	if (error != 0 || req->newptr == NULL)
593		return (error);
594
595	mtx_lock(&vmmdev_mtx);
596	sc = vmmdev_lookup(buf);
597	mtx_unlock(&vmmdev_mtx);
598	if (sc != NULL)
599		return (EEXIST);
600
601	error = vm_create(buf, &vm);
602	if (error != 0)
603		return (error);
604
605	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
606	sc->vm = vm;
607
608	/*
609	 * Lookup the name again just in case somebody sneaked in when we
610	 * dropped the lock.
611	 */
612	mtx_lock(&vmmdev_mtx);
613	sc2 = vmmdev_lookup(buf);
614	if (sc2 == NULL) {
615		SLIST_INSERT_HEAD(&head, sc, link);
616		sc->flags |= VSC_LINKED;
617	}
618	mtx_unlock(&vmmdev_mtx);
619
620	if (sc2 != NULL) {
621		vmmdev_destroy(sc);
622		return (EEXIST);
623	}
624
625	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
626			   UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
627	if (error != 0) {
628		vmmdev_destroy(sc);
629		return (error);
630	}
631
632	mtx_lock(&vmmdev_mtx);
633	sc->cdev = cdev;
634	sc->cdev->si_drv1 = sc;
635	mtx_unlock(&vmmdev_mtx);
636
637	return (0);
638}
639SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
640	    NULL, 0, sysctl_vmm_create, "A", NULL);
641
642void
643vmmdev_init(void)
644{
645	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
646}
647
648int
649vmmdev_cleanup(void)
650{
651	int error;
652
653	if (SLIST_EMPTY(&head))
654		error = 0;
655	else
656		error = EBUSY;
657
658	return (error);
659}
660