ppt.c revision 279470
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/amd64/vmm/io/ppt.c 279470 2015-03-01 04:22:06Z rstone $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/io/ppt.c 279470 2015-03-01 04:22:06Z rstone $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/module.h>
37#include <sys/bus.h>
38#include <sys/pciio.h>
39#include <sys/rman.h>
40#include <sys/smp.h>
41#include <sys/sysctl.h>
42
43#include <dev/pci/pcivar.h>
44#include <dev/pci/pcireg.h>
45
46#include <machine/resource.h>
47
48#include <machine/vmm.h>
49#include <machine/vmm_dev.h>
50
51#include "vmm_lapic.h"
52#include "vmm_ktr.h"
53
54#include "iommu.h"
55#include "ppt.h"
56
57/* XXX locking */
58
59#define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
60#define	MAX_MSIMSGS	32
61
62/*
63 * If the MSI-X table is located in the middle of a BAR then that MMIO
64 * region gets split into two segments - one segment above the MSI-X table
65 * and the other segment below the MSI-X table - with a hole in place of
66 * the MSI-X table so accesses to it can be trapped and emulated.
67 *
68 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
69 */
70#define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
71
72MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
73
74struct pptintr_arg {				/* pptintr(pptintr_arg) */
75	struct pptdev	*pptdev;
76	uint64_t	addr;
77	uint64_t	msg_data;
78};
79
80static struct pptdev {
81	device_t	dev;
82	struct vm	*vm;			/* owner of this device */
83	struct vm_memory_segment mmio[MAX_MMIOSEGS];
84	struct {
85		int	num_msgs;		/* guest state */
86
87		int	startrid;		/* host state */
88		struct resource *res[MAX_MSIMSGS];
89		void	*cookie[MAX_MSIMSGS];
90		struct pptintr_arg arg[MAX_MSIMSGS];
91	} msi;
92
93	struct {
94		int num_msgs;
95		int startrid;
96		int msix_table_rid;
97		struct resource *msix_table_res;
98		struct resource **res;
99		void **cookie;
100		struct pptintr_arg *arg;
101	} msix;
102} pptdevs[64];
103
104SYSCTL_DECL(_hw_vmm);
105SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
106
107static int num_pptdevs;
108SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
109    "number of pci passthru devices");
110
111static int
112ppt_probe(device_t dev)
113{
114	int bus, slot, func;
115	struct pci_devinfo *dinfo;
116
117	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
118
119	bus = pci_get_bus(dev);
120	slot = pci_get_slot(dev);
121	func = pci_get_function(dev);
122
123	/*
124	 * To qualify as a pci passthrough device a device must:
125	 * - be allowed by administrator to be used in this role
126	 * - be an endpoint device
127	 */
128	if (vmm_is_pptdev(bus, slot, func) &&
129	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
130		return (0);
131	else
132		return (ENXIO);
133}
134
135static int
136ppt_attach(device_t dev)
137{
138	int n;
139
140	if (num_pptdevs >= MAX_PPTDEVS) {
141		printf("ppt_attach: maximum number of pci passthrough devices "
142		       "exceeded\n");
143		return (ENXIO);
144	}
145
146	n = num_pptdevs++;
147	pptdevs[n].dev = dev;
148
149	if (bootverbose)
150		device_printf(dev, "attached\n");
151
152	return (0);
153}
154
155static int
156ppt_detach(device_t dev)
157{
158	/*
159	 * XXX check whether there are any pci passthrough devices assigned
160	 * to guests before we allow this driver to detach.
161	 */
162
163	return (0);
164}
165
166static device_method_t ppt_methods[] = {
167	/* Device interface */
168	DEVMETHOD(device_probe,		ppt_probe),
169	DEVMETHOD(device_attach,	ppt_attach),
170	DEVMETHOD(device_detach,	ppt_detach),
171	{0, 0}
172};
173
174static devclass_t ppt_devclass;
175DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
176DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
177
178static struct pptdev *
179ppt_find(int bus, int slot, int func)
180{
181	device_t dev;
182	int i, b, s, f;
183
184	for (i = 0; i < num_pptdevs; i++) {
185		dev = pptdevs[i].dev;
186		b = pci_get_bus(dev);
187		s = pci_get_slot(dev);
188		f = pci_get_function(dev);
189		if (bus == b && slot == s && func == f)
190			return (&pptdevs[i]);
191	}
192	return (NULL);
193}
194
195static void
196ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
197{
198	int i;
199	struct vm_memory_segment *seg;
200
201	for (i = 0; i < MAX_MMIOSEGS; i++) {
202		seg = &ppt->mmio[i];
203		if (seg->len == 0)
204			continue;
205		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
206		bzero(seg, sizeof(struct vm_memory_segment));
207	}
208}
209
210static void
211ppt_teardown_msi(struct pptdev *ppt)
212{
213	int i, rid;
214	void *cookie;
215	struct resource *res;
216
217	if (ppt->msi.num_msgs == 0)
218		return;
219
220	for (i = 0; i < ppt->msi.num_msgs; i++) {
221		rid = ppt->msi.startrid + i;
222		res = ppt->msi.res[i];
223		cookie = ppt->msi.cookie[i];
224
225		if (cookie != NULL)
226			bus_teardown_intr(ppt->dev, res, cookie);
227
228		if (res != NULL)
229			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
230
231		ppt->msi.res[i] = NULL;
232		ppt->msi.cookie[i] = NULL;
233	}
234
235	if (ppt->msi.startrid == 1)
236		pci_release_msi(ppt->dev);
237
238	ppt->msi.num_msgs = 0;
239}
240
241static void
242ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
243{
244	int rid;
245	struct resource *res;
246	void *cookie;
247
248	rid = ppt->msix.startrid + idx;
249	res = ppt->msix.res[idx];
250	cookie = ppt->msix.cookie[idx];
251
252	if (cookie != NULL)
253		bus_teardown_intr(ppt->dev, res, cookie);
254
255	if (res != NULL)
256		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
257
258	ppt->msix.res[idx] = NULL;
259	ppt->msix.cookie[idx] = NULL;
260}
261
262static void
263ppt_teardown_msix(struct pptdev *ppt)
264{
265	int i;
266
267	if (ppt->msix.num_msgs == 0)
268		return;
269
270	for (i = 0; i < ppt->msix.num_msgs; i++)
271		ppt_teardown_msix_intr(ppt, i);
272
273	if (ppt->msix.msix_table_res) {
274		bus_release_resource(ppt->dev, SYS_RES_MEMORY,
275				     ppt->msix.msix_table_rid,
276				     ppt->msix.msix_table_res);
277		ppt->msix.msix_table_res = NULL;
278		ppt->msix.msix_table_rid = 0;
279	}
280
281	free(ppt->msix.res, M_PPTMSIX);
282	free(ppt->msix.cookie, M_PPTMSIX);
283	free(ppt->msix.arg, M_PPTMSIX);
284
285	pci_release_msi(ppt->dev);
286
287	ppt->msix.num_msgs = 0;
288}
289
290int
291ppt_avail_devices(void)
292{
293
294	return (num_pptdevs);
295}
296
297int
298ppt_assigned_devices(struct vm *vm)
299{
300	int i, num;
301
302	num = 0;
303	for (i = 0; i < num_pptdevs; i++) {
304		if (pptdevs[i].vm == vm)
305			num++;
306	}
307	return (num);
308}
309
310boolean_t
311ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
312{
313	int i, n;
314	struct pptdev *ppt;
315	struct vm_memory_segment *seg;
316
317	for (n = 0; n < num_pptdevs; n++) {
318		ppt = &pptdevs[n];
319		if (ppt->vm != vm)
320			continue;
321
322		for (i = 0; i < MAX_MMIOSEGS; i++) {
323			seg = &ppt->mmio[i];
324			if (seg->len == 0)
325				continue;
326			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
327				return (TRUE);
328		}
329	}
330
331	return (FALSE);
332}
333
334int
335ppt_assign_device(struct vm *vm, int bus, int slot, int func)
336{
337	struct pptdev *ppt;
338
339	ppt = ppt_find(bus, slot, func);
340	if (ppt != NULL) {
341		/*
342		 * If this device is owned by a different VM then we
343		 * cannot change its owner.
344		 */
345		if (ppt->vm != NULL && ppt->vm != vm)
346			return (EBUSY);
347
348		ppt->vm = vm;
349		iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
350		return (0);
351	}
352	return (ENOENT);
353}
354
355int
356ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
357{
358	struct pptdev *ppt;
359
360	ppt = ppt_find(bus, slot, func);
361	if (ppt != NULL) {
362		/*
363		 * If this device is not owned by this 'vm' then bail out.
364		 */
365		if (ppt->vm != vm)
366			return (EBUSY);
367		ppt_unmap_mmio(vm, ppt);
368		ppt_teardown_msi(ppt);
369		ppt_teardown_msix(ppt);
370		iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
371		ppt->vm = NULL;
372		return (0);
373	}
374	return (ENOENT);
375}
376
377int
378ppt_unassign_all(struct vm *vm)
379{
380	int i, bus, slot, func;
381	device_t dev;
382
383	for (i = 0; i < num_pptdevs; i++) {
384		if (pptdevs[i].vm == vm) {
385			dev = pptdevs[i].dev;
386			bus = pci_get_bus(dev);
387			slot = pci_get_slot(dev);
388			func = pci_get_function(dev);
389			vm_unassign_pptdev(vm, bus, slot, func);
390		}
391	}
392
393	return (0);
394}
395
396int
397ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
398	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
399{
400	int i, error;
401	struct vm_memory_segment *seg;
402	struct pptdev *ppt;
403
404	ppt = ppt_find(bus, slot, func);
405	if (ppt != NULL) {
406		if (ppt->vm != vm)
407			return (EBUSY);
408
409		for (i = 0; i < MAX_MMIOSEGS; i++) {
410			seg = &ppt->mmio[i];
411			if (seg->len == 0) {
412				error = vm_map_mmio(vm, gpa, len, hpa);
413				if (error == 0) {
414					seg->gpa = gpa;
415					seg->len = len;
416				}
417				return (error);
418			}
419		}
420		return (ENOSPC);
421	}
422	return (ENOENT);
423}
424
425static int
426pptintr(void *arg)
427{
428	struct pptdev *ppt;
429	struct pptintr_arg *pptarg;
430
431	pptarg = arg;
432	ppt = pptarg->pptdev;
433
434	if (ppt->vm != NULL)
435		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
436	else {
437		/*
438		 * XXX
439		 * This is not expected to happen - panic?
440		 */
441	}
442
443	/*
444	 * For legacy interrupts give other filters a chance in case
445	 * the interrupt was not generated by the passthrough device.
446	 */
447	if (ppt->msi.startrid == 0)
448		return (FILTER_STRAY);
449	else
450		return (FILTER_HANDLED);
451}
452
453int
454ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
455	      uint64_t addr, uint64_t msg, int numvec)
456{
457	int i, rid, flags;
458	int msi_count, startrid, error, tmp;
459	struct pptdev *ppt;
460
461	if (numvec < 0 || numvec > MAX_MSIMSGS)
462		return (EINVAL);
463
464	ppt = ppt_find(bus, slot, func);
465	if (ppt == NULL)
466		return (ENOENT);
467	if (ppt->vm != vm)		/* Make sure we own this device */
468		return (EBUSY);
469
470	/* Free any allocated resources */
471	ppt_teardown_msi(ppt);
472
473	if (numvec == 0)		/* nothing more to do */
474		return (0);
475
476	flags = RF_ACTIVE;
477	msi_count = pci_msi_count(ppt->dev);
478	if (msi_count == 0) {
479		startrid = 0;		/* legacy interrupt */
480		msi_count = 1;
481		flags |= RF_SHAREABLE;
482	} else
483		startrid = 1;		/* MSI */
484
485	/*
486	 * The device must be capable of supporting the number of vectors
487	 * the guest wants to allocate.
488	 */
489	if (numvec > msi_count)
490		return (EINVAL);
491
492	/*
493	 * Make sure that we can allocate all the MSI vectors that are needed
494	 * by the guest.
495	 */
496	if (startrid == 1) {
497		tmp = numvec;
498		error = pci_alloc_msi(ppt->dev, &tmp);
499		if (error)
500			return (error);
501		else if (tmp != numvec) {
502			pci_release_msi(ppt->dev);
503			return (ENOSPC);
504		} else {
505			/* success */
506		}
507	}
508
509	ppt->msi.startrid = startrid;
510
511	/*
512	 * Allocate the irq resource and attach it to the interrupt handler.
513	 */
514	for (i = 0; i < numvec; i++) {
515		ppt->msi.num_msgs = i + 1;
516		ppt->msi.cookie[i] = NULL;
517
518		rid = startrid + i;
519		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
520							 &rid, flags);
521		if (ppt->msi.res[i] == NULL)
522			break;
523
524		ppt->msi.arg[i].pptdev = ppt;
525		ppt->msi.arg[i].addr = addr;
526		ppt->msi.arg[i].msg_data = msg + i;
527
528		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
529				       INTR_TYPE_NET | INTR_MPSAFE,
530				       pptintr, NULL, &ppt->msi.arg[i],
531				       &ppt->msi.cookie[i]);
532		if (error != 0)
533			break;
534	}
535
536	if (i < numvec) {
537		ppt_teardown_msi(ppt);
538		return (ENXIO);
539	}
540
541	return (0);
542}
543
544int
545ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
546	       int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
547{
548	struct pptdev *ppt;
549	struct pci_devinfo *dinfo;
550	int numvec, alloced, rid, error;
551	size_t res_size, cookie_size, arg_size;
552
553	ppt = ppt_find(bus, slot, func);
554	if (ppt == NULL)
555		return (ENOENT);
556	if (ppt->vm != vm)		/* Make sure we own this device */
557		return (EBUSY);
558
559	dinfo = device_get_ivars(ppt->dev);
560	if (!dinfo)
561		return (ENXIO);
562
563	/*
564	 * First-time configuration:
565	 * 	Allocate the MSI-X table
566	 *	Allocate the IRQ resources
567	 *	Set up some variables in ppt->msix
568	 */
569	if (ppt->msix.num_msgs == 0) {
570		numvec = pci_msix_count(ppt->dev);
571		if (numvec <= 0)
572			return (EINVAL);
573
574		ppt->msix.startrid = 1;
575		ppt->msix.num_msgs = numvec;
576
577		res_size = numvec * sizeof(ppt->msix.res[0]);
578		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
579		arg_size = numvec * sizeof(ppt->msix.arg[0]);
580
581		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
582		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
583					  M_WAITOK | M_ZERO);
584		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
585
586		rid = dinfo->cfg.msix.msix_table_bar;
587		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
588					       SYS_RES_MEMORY, &rid, RF_ACTIVE);
589
590		if (ppt->msix.msix_table_res == NULL) {
591			ppt_teardown_msix(ppt);
592			return (ENOSPC);
593		}
594		ppt->msix.msix_table_rid = rid;
595
596		alloced = numvec;
597		error = pci_alloc_msix(ppt->dev, &alloced);
598		if (error || alloced != numvec) {
599			ppt_teardown_msix(ppt);
600			return (error == 0 ? ENOSPC: error);
601		}
602	}
603
604	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
605		/* Tear down the IRQ if it's already set up */
606		ppt_teardown_msix_intr(ppt, idx);
607
608		/* Allocate the IRQ resource */
609		ppt->msix.cookie[idx] = NULL;
610		rid = ppt->msix.startrid + idx;
611		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
612							    &rid, RF_ACTIVE);
613		if (ppt->msix.res[idx] == NULL)
614			return (ENXIO);
615
616		ppt->msix.arg[idx].pptdev = ppt;
617		ppt->msix.arg[idx].addr = addr;
618		ppt->msix.arg[idx].msg_data = msg;
619
620		/* Setup the MSI-X interrupt */
621		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
622				       INTR_TYPE_NET | INTR_MPSAFE,
623				       pptintr, NULL, &ppt->msix.arg[idx],
624				       &ppt->msix.cookie[idx]);
625
626		if (error != 0) {
627			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
628			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
629			ppt->msix.cookie[idx] = NULL;
630			ppt->msix.res[idx] = NULL;
631			return (ENXIO);
632		}
633	} else {
634		/* Masked, tear it down if it's already been set up */
635		ppt_teardown_msix_intr(ppt, idx);
636	}
637
638	return (0);
639}
640