1221828Sgrehan/*-
2221828Sgrehan * Copyright (c) 2011 NetApp, Inc.
3221828Sgrehan * All rights reserved.
4221828Sgrehan *
5221828Sgrehan * Redistribution and use in source and binary forms, with or without
6221828Sgrehan * modification, are permitted provided that the following conditions
7221828Sgrehan * are met:
8221828Sgrehan * 1. Redistributions of source code must retain the above copyright
9221828Sgrehan *    notice, this list of conditions and the following disclaimer.
10221828Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11221828Sgrehan *    notice, this list of conditions and the following disclaimer in the
12221828Sgrehan *    documentation and/or other materials provided with the distribution.
13221828Sgrehan *
14221828Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15221828Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16221828Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17221828Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18221828Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19221828Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20221828Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21221828Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22221828Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23221828Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24221828Sgrehan * SUCH DAMAGE.
25221828Sgrehan *
26221828Sgrehan * $FreeBSD$
27221828Sgrehan */
28221828Sgrehan
29221828Sgrehan#include <sys/cdefs.h>
30221828Sgrehan__FBSDID("$FreeBSD$");
31221828Sgrehan
32221828Sgrehan#include <sys/param.h>
33221828Sgrehan#include <sys/types.h>
34221828Sgrehan#include <sys/pciio.h>
35221828Sgrehan#include <sys/ioctl.h>
36221828Sgrehan
37221828Sgrehan#include <dev/io/iodev.h>
38245749Sneel#include <dev/pci/pcireg.h>
39245749Sneel
40221828Sgrehan#include <machine/iodev.h>
41221828Sgrehan
42221828Sgrehan#include <stdio.h>
43221828Sgrehan#include <stdlib.h>
44221828Sgrehan#include <string.h>
45221828Sgrehan#include <errno.h>
46221828Sgrehan#include <fcntl.h>
47221828Sgrehan#include <unistd.h>
48221828Sgrehan
49221828Sgrehan#include <machine/vmm.h>
50221828Sgrehan#include <vmmapi.h>
51221828Sgrehan#include "pci_emul.h"
52241744Sgrehan#include "mem.h"
53221828Sgrehan
54221828Sgrehan#ifndef _PATH_DEVPCI
55221828Sgrehan#define	_PATH_DEVPCI	"/dev/pci"
56221828Sgrehan#endif
57221828Sgrehan
58221828Sgrehan#ifndef	_PATH_DEVIO
59221828Sgrehan#define	_PATH_DEVIO	"/dev/io"
60221828Sgrehan#endif
61221828Sgrehan
62221828Sgrehan#define	LEGACY_SUPPORT	1
63221828Sgrehan
64245749Sneel#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
65234761Sgrehan#define MSIX_CAPLEN 12
66234761Sgrehan
67221828Sgrehanstatic int pcifd = -1;
68221828Sgrehanstatic int iofd = -1;
69221828Sgrehan
70221828Sgrehanstruct passthru_softc {
71221828Sgrehan	struct pci_devinst *psc_pi;
72221828Sgrehan	struct pcibar psc_bar[PCI_BARMAX + 1];
73221828Sgrehan	struct {
74221828Sgrehan		int		capoff;
75221828Sgrehan		int		msgctrl;
76221828Sgrehan		int		emulated;
77221828Sgrehan	} psc_msi;
78234761Sgrehan	struct {
79234761Sgrehan		int		capoff;
80234761Sgrehan	} psc_msix;
81221828Sgrehan	struct pcisel psc_sel;
82221828Sgrehan};
83221828Sgrehan
84221828Sgrehanstatic int
85221828Sgrehanmsi_caplen(int msgctrl)
86221828Sgrehan{
87221828Sgrehan	int len;
88221828Sgrehan
89221828Sgrehan	len = 10;		/* minimum length of msi capability */
90221828Sgrehan
91221828Sgrehan	if (msgctrl & PCIM_MSICTRL_64BIT)
92221828Sgrehan		len += 4;
93221828Sgrehan
94221828Sgrehan#if 0
95221828Sgrehan	/*
96221828Sgrehan	 * Ignore the 'mask' and 'pending' bits in the MSI capability.
97221828Sgrehan	 * We'll let the guest manipulate them directly.
98221828Sgrehan	 */
99221828Sgrehan	if (msgctrl & PCIM_MSICTRL_VECTOR)
100221828Sgrehan		len += 10;
101221828Sgrehan#endif
102221828Sgrehan
103221828Sgrehan	return (len);
104221828Sgrehan}
105221828Sgrehan
106221828Sgrehanstatic uint32_t
107221828Sgrehanread_config(const struct pcisel *sel, long reg, int width)
108221828Sgrehan{
109221828Sgrehan	struct pci_io pi;
110221828Sgrehan
111221828Sgrehan	bzero(&pi, sizeof(pi));
112221828Sgrehan	pi.pi_sel = *sel;
113221828Sgrehan	pi.pi_reg = reg;
114221828Sgrehan	pi.pi_width = width;
115221828Sgrehan
116221828Sgrehan	if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
117221828Sgrehan		return (0);				/* XXX */
118221828Sgrehan	else
119221828Sgrehan		return (pi.pi_data);
120221828Sgrehan}
121221828Sgrehan
122221828Sgrehanstatic void
123221828Sgrehanwrite_config(const struct pcisel *sel, long reg, int width, uint32_t data)
124221828Sgrehan{
125221828Sgrehan	struct pci_io pi;
126221828Sgrehan
127221828Sgrehan	bzero(&pi, sizeof(pi));
128221828Sgrehan	pi.pi_sel = *sel;
129221828Sgrehan	pi.pi_reg = reg;
130221828Sgrehan	pi.pi_width = width;
131221828Sgrehan	pi.pi_data = data;
132221828Sgrehan
133221828Sgrehan	(void)ioctl(pcifd, PCIOCWRITE, &pi);		/* XXX */
134221828Sgrehan}
135221828Sgrehan
136221828Sgrehan#ifdef LEGACY_SUPPORT
137221828Sgrehanstatic int
138221828Sgrehanpassthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
139221828Sgrehan{
140221828Sgrehan	int capoff, i;
141221828Sgrehan	struct msicap msicap;
142221828Sgrehan	u_char *capdata;
143221828Sgrehan
144221828Sgrehan	pci_populate_msicap(&msicap, msgnum, nextptr);
145221828Sgrehan
146221828Sgrehan	/*
147221828Sgrehan	 * XXX
148221828Sgrehan	 * Copy the msi capability structure in the last 16 bytes of the
149221828Sgrehan	 * config space. This is wrong because it could shadow something
150221828Sgrehan	 * useful to the device.
151221828Sgrehan	 */
152221828Sgrehan	capoff = 256 - roundup(sizeof(msicap), 4);
153221828Sgrehan	capdata = (u_char *)&msicap;
154221828Sgrehan	for (i = 0; i < sizeof(msicap); i++)
155221828Sgrehan		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
156221828Sgrehan
157221828Sgrehan	return (capoff);
158221828Sgrehan}
159221828Sgrehan#endif	/* LEGACY_SUPPORT */
160221828Sgrehan
161221828Sgrehanstatic int
162221828Sgrehancfginitmsi(struct passthru_softc *sc)
163221828Sgrehan{
164245749Sneel	int i, ptr, capptr, cap, sts, caplen, table_size;
165221828Sgrehan	uint32_t u32;
166221828Sgrehan	struct pcisel sel;
167221828Sgrehan	struct pci_devinst *pi;
168234761Sgrehan	struct msixcap msixcap;
169234761Sgrehan	uint32_t *msixcap_ptr;
170221828Sgrehan
171221828Sgrehan	pi = sc->psc_pi;
172221828Sgrehan	sel = sc->psc_sel;
173221828Sgrehan
174221828Sgrehan	/*
175221828Sgrehan	 * Parse the capabilities and cache the location of the MSI
176234761Sgrehan	 * and MSI-X capabilities.
177221828Sgrehan	 */
178221828Sgrehan	sts = read_config(&sel, PCIR_STATUS, 2);
179221828Sgrehan	if (sts & PCIM_STATUS_CAPPRESENT) {
180221828Sgrehan		ptr = read_config(&sel, PCIR_CAP_PTR, 1);
181221828Sgrehan		while (ptr != 0 && ptr != 0xff) {
182221828Sgrehan			cap = read_config(&sel, ptr + PCICAP_ID, 1);
183221828Sgrehan			if (cap == PCIY_MSI) {
184221828Sgrehan				/*
185221828Sgrehan				 * Copy the MSI capability into the config
186221828Sgrehan				 * space of the emulated pci device
187221828Sgrehan				 */
188221828Sgrehan				sc->psc_msi.capoff = ptr;
189221828Sgrehan				sc->psc_msi.msgctrl = read_config(&sel,
190221828Sgrehan								  ptr + 2, 2);
191221828Sgrehan				sc->psc_msi.emulated = 0;
192221828Sgrehan				caplen = msi_caplen(sc->psc_msi.msgctrl);
193234761Sgrehan				capptr = ptr;
194221828Sgrehan				while (caplen > 0) {
195234761Sgrehan					u32 = read_config(&sel, capptr, 4);
196234761Sgrehan					pci_set_cfgdata32(pi, capptr, u32);
197221828Sgrehan					caplen -= 4;
198234761Sgrehan					capptr += 4;
199221828Sgrehan				}
200234761Sgrehan			} else if (cap == PCIY_MSIX) {
201234761Sgrehan				/*
202234761Sgrehan				 * Copy the MSI-X capability
203234761Sgrehan				 */
204234761Sgrehan				sc->psc_msix.capoff = ptr;
205234761Sgrehan				caplen = 12;
206234761Sgrehan				msixcap_ptr = (uint32_t*) &msixcap;
207234761Sgrehan				capptr = ptr;
208234761Sgrehan				while (caplen > 0) {
209234761Sgrehan					u32 = read_config(&sel, capptr, 4);
210234761Sgrehan					*msixcap_ptr = u32;
211234761Sgrehan					pci_set_cfgdata32(pi, capptr, u32);
212234761Sgrehan					caplen -= 4;
213234761Sgrehan					capptr += 4;
214234761Sgrehan					msixcap_ptr++;
215234761Sgrehan				}
216221828Sgrehan			}
217221828Sgrehan			ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1);
218221828Sgrehan		}
219221828Sgrehan	}
220221828Sgrehan
221241744Sgrehan	if (sc->psc_msix.capoff != 0) {
222241744Sgrehan		pi->pi_msix.pba_bar =
223245749Sneel		    msixcap.pba_info & PCIM_MSIX_BIR_MASK;
224241744Sgrehan		pi->pi_msix.pba_offset =
225245749Sneel		    msixcap.pba_info & ~PCIM_MSIX_BIR_MASK;
226241744Sgrehan		pi->pi_msix.table_bar =
227245749Sneel		    msixcap.table_info & PCIM_MSIX_BIR_MASK;
228241744Sgrehan		pi->pi_msix.table_offset =
229245749Sneel		    msixcap.table_info & ~PCIM_MSIX_BIR_MASK;
230241744Sgrehan		pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
231268887Sjhb		pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count);
232245749Sneel
233245749Sneel		/* Allocate the emulated MSI-X table array */
234245749Sneel		table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
235268953Sjhb		pi->pi_msix.table = calloc(1, table_size);
236245749Sneel
237245749Sneel		/* Mask all table entries */
238245749Sneel		for (i = 0; i < pi->pi_msix.table_count; i++) {
239245749Sneel			pi->pi_msix.table[i].vector_control |=
240245749Sneel						PCIM_MSIX_VCTRL_MASK;
241245749Sneel		}
242241744Sgrehan	}
243234761Sgrehan
244221828Sgrehan#ifdef LEGACY_SUPPORT
245221828Sgrehan	/*
246221828Sgrehan	 * If the passthrough device does not support MSI then craft a
247221828Sgrehan	 * MSI capability for it. We link the new MSI capability at the
248221828Sgrehan	 * head of the list of capabilities.
249221828Sgrehan	 */
250221828Sgrehan	if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) {
251221828Sgrehan		int origptr, msiptr;
252221828Sgrehan		origptr = read_config(&sel, PCIR_CAP_PTR, 1);
253221828Sgrehan		msiptr = passthru_add_msicap(pi, 1, origptr);
254221828Sgrehan		sc->psc_msi.capoff = msiptr;
255221828Sgrehan		sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2);
256221828Sgrehan		sc->psc_msi.emulated = 1;
257221828Sgrehan		pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr);
258221828Sgrehan	}
259221828Sgrehan#endif
260221828Sgrehan
261234761Sgrehan	/* Make sure one of the capabilities is present */
262234761Sgrehan	if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0)
263221828Sgrehan		return (-1);
264221828Sgrehan	else
265221828Sgrehan		return (0);
266221828Sgrehan}
267221828Sgrehan
268241744Sgrehanstatic uint64_t
269241744Sgrehanmsix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
270234761Sgrehan{
271234761Sgrehan	struct pci_devinst *pi;
272241744Sgrehan	struct msix_table_entry *entry;
273234761Sgrehan	uint8_t *src8;
274234761Sgrehan	uint16_t *src16;
275234761Sgrehan	uint32_t *src32;
276234761Sgrehan	uint64_t *src64;
277241744Sgrehan	uint64_t data;
278241744Sgrehan	size_t entry_offset;
279241744Sgrehan	int index;
280234761Sgrehan
281234761Sgrehan	pi = sc->psc_pi;
282268887Sjhb	if (offset < pi->pi_msix.table_offset)
283268887Sjhb		return (-1);
284268887Sjhb
285248171Sneel	offset -= pi->pi_msix.table_offset;
286234761Sgrehan	index = offset / MSIX_TABLE_ENTRY_SIZE;
287245749Sneel	if (index >= pi->pi_msix.table_count)
288245749Sneel		return (-1);
289245749Sneel
290234761Sgrehan	entry = &pi->pi_msix.table[index];
291245749Sneel	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
292234761Sgrehan
293234761Sgrehan	switch(size) {
294234761Sgrehan	case 1:
295241744Sgrehan		src8 = (uint8_t *)((void *)entry + entry_offset);
296241744Sgrehan		data = *src8;
297234761Sgrehan		break;
298234761Sgrehan	case 2:
299241744Sgrehan		src16 = (uint16_t *)((void *)entry + entry_offset);
300241744Sgrehan		data = *src16;
301234761Sgrehan		break;
302234761Sgrehan	case 4:
303241744Sgrehan		src32 = (uint32_t *)((void *)entry + entry_offset);
304241744Sgrehan		data = *src32;
305234761Sgrehan		break;
306234761Sgrehan	case 8:
307241744Sgrehan		src64 = (uint64_t *)((void *)entry + entry_offset);
308241744Sgrehan		data = *src64;
309234761Sgrehan		break;
310234761Sgrehan	default:
311234761Sgrehan		return (-1);
312234761Sgrehan	}
313234761Sgrehan
314241744Sgrehan	return (data);
315234761Sgrehan}
316234761Sgrehan
317241744Sgrehanstatic void
318241744Sgrehanmsix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
319241744Sgrehan		 uint64_t offset, int size, uint64_t data)
320234761Sgrehan{
321234761Sgrehan	struct pci_devinst *pi;
322241744Sgrehan	struct msix_table_entry *entry;
323234761Sgrehan	uint32_t *dest;
324241744Sgrehan	size_t entry_offset;
325234761Sgrehan	uint32_t vector_control;
326241744Sgrehan	int error, index;
327234761Sgrehan
328234761Sgrehan	pi = sc->psc_pi;
329268887Sjhb	if (offset < pi->pi_msix.table_offset)
330268887Sjhb		return;
331268887Sjhb
332248171Sneel	offset -= pi->pi_msix.table_offset;
333234761Sgrehan	index = offset / MSIX_TABLE_ENTRY_SIZE;
334245749Sneel	if (index >= pi->pi_msix.table_count)
335245749Sneel		return;
336245749Sneel
337234761Sgrehan	entry = &pi->pi_msix.table[index];
338245749Sneel	entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
339234761Sgrehan
340234761Sgrehan	/* Only 4 byte naturally-aligned writes are supported */
341241744Sgrehan	assert(size == 4);
342241744Sgrehan	assert(entry_offset % 4 == 0);
343241744Sgrehan
344241744Sgrehan	vector_control = entry->vector_control;
345241744Sgrehan	dest = (uint32_t *)((void *)entry + entry_offset);
346241744Sgrehan	*dest = data;
347241744Sgrehan	/* If MSI-X hasn't been enabled, do nothing */
348241744Sgrehan	if (pi->pi_msix.enabled) {
349241744Sgrehan		/* If the entry is masked, don't set it up */
350241744Sgrehan		if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
351241744Sgrehan		    (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
352262350Sjhb			error = vm_setup_pptdev_msix(ctx, vcpu,
353262350Sjhb			    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
354262350Sjhb			    sc->psc_sel.pc_func, index, entry->addr,
355262350Sjhb			    entry->msg_data, entry->vector_control);
356234761Sgrehan		}
357234761Sgrehan	}
358234761Sgrehan}
359234761Sgrehan
360234761Sgrehanstatic int
361234761Sgrehaninit_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
362234761Sgrehan{
363246191Sneel	int b, s, f;
364246191Sneel	int error, idx;
365268887Sjhb	size_t len, remaining;
366268887Sjhb	uint32_t table_size, table_offset;
367268887Sjhb	uint32_t pba_size, pba_offset;
368234761Sgrehan	vm_paddr_t start;
369234761Sgrehan	struct pci_devinst *pi = sc->psc_pi;
370234761Sgrehan
371246190Sneel	assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0);
372246190Sneel
373246191Sneel	b = sc->psc_sel.pc_bus;
374246191Sneel	s = sc->psc_sel.pc_dev;
375246191Sneel	f = sc->psc_sel.pc_func;
376246191Sneel
377234761Sgrehan	/*
378234761Sgrehan	 * If the MSI-X table BAR maps memory intended for
379234761Sgrehan	 * other uses, it is at least assured that the table
380234761Sgrehan	 * either resides in its own page within the region,
381234761Sgrehan	 * or it resides in a page shared with only the PBA.
382234761Sgrehan	 */
383268887Sjhb	table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
384241744Sgrehan
385268887Sjhb	table_size = pi->pi_msix.table_offset - table_offset;
386268887Sjhb	table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
387246191Sneel	table_size = roundup2(table_size, 4096);
388246191Sneel
389268887Sjhb	if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) {
390268887Sjhb		pba_offset = pi->pi_msix.pba_offset;
391268887Sjhb		pba_size = pi->pi_msix.pba_size;
392268887Sjhb		if (pba_offset >= table_offset + table_size ||
393268887Sjhb		    table_offset >= pba_offset + pba_size) {
394268887Sjhb			/*
395268887Sjhb			 * The PBA can reside in the same BAR as the MSI-x
396268887Sjhb			 * tables as long as it does not overlap with any
397268887Sjhb			 * naturally aligned page occupied by the tables.
398268887Sjhb			 */
399268887Sjhb		} else {
400268887Sjhb			/* Need to also emulate the PBA, not supported yet */
401268887Sjhb			printf("Unsupported MSI-X configuration: %d/%d/%d\n",
402268887Sjhb		            b, s, f);
403268887Sjhb			return (-1);
404268887Sjhb		}
405268887Sjhb	}
406268887Sjhb
407234761Sgrehan	idx = pi->pi_msix.table_bar;
408246191Sneel	start = pi->pi_bar[idx].addr;
409246191Sneel	remaining = pi->pi_bar[idx].size;
410234761Sgrehan
411246191Sneel	/* Map everything before the MSI-X table */
412268887Sjhb	if (table_offset > 0) {
413268887Sjhb		len = table_offset;
414246191Sneel		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
415246191Sneel		if (error)
416246191Sneel			return (error);
417246191Sneel
418246191Sneel		base += len;
419246191Sneel		start += len;
420246191Sneel		remaining -= len;
421234761Sgrehan	}
422246191Sneel
423246191Sneel	/* Skip the MSI-X table */
424246191Sneel	base += table_size;
425246191Sneel	start += table_size;
426246191Sneel	remaining -= table_size;
427246191Sneel
428246191Sneel	/* Map everything beyond the end of the MSI-X table */
429246191Sneel	if (remaining > 0) {
430246191Sneel		len = remaining;
431246191Sneel		error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base);
432246191Sneel		if (error)
433246191Sneel			return (error);
434246191Sneel	}
435246191Sneel
436246191Sneel	return (0);
437234761Sgrehan}
438234761Sgrehan
439234761Sgrehanstatic int
440221828Sgrehancfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
441221828Sgrehan{
442221828Sgrehan	int i, error;
443221828Sgrehan	struct pci_devinst *pi;
444221828Sgrehan	struct pci_bar_io bar;
445221828Sgrehan	enum pcibar_type bartype;
446268887Sjhb	uint64_t base, size;
447221828Sgrehan
448221828Sgrehan	pi = sc->psc_pi;
449221828Sgrehan
450221828Sgrehan	/*
451221828Sgrehan	 * Initialize BAR registers
452221828Sgrehan	 */
453221828Sgrehan	for (i = 0; i <= PCI_BARMAX; i++) {
454221828Sgrehan		bzero(&bar, sizeof(bar));
455221828Sgrehan		bar.pbi_sel = sc->psc_sel;
456221828Sgrehan		bar.pbi_reg = PCIR_BAR(i);
457221828Sgrehan
458221828Sgrehan		if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0)
459221828Sgrehan			continue;
460221828Sgrehan
461221828Sgrehan		if (PCI_BAR_IO(bar.pbi_base)) {
462221828Sgrehan			bartype = PCIBAR_IO;
463221828Sgrehan			base = bar.pbi_base & PCIM_BAR_IO_BASE;
464221828Sgrehan		} else {
465221828Sgrehan			switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) {
466221828Sgrehan			case PCIM_BAR_MEM_64:
467221828Sgrehan				bartype = PCIBAR_MEM64;
468221828Sgrehan				break;
469221828Sgrehan			default:
470221828Sgrehan				bartype = PCIBAR_MEM32;
471221828Sgrehan				break;
472221828Sgrehan			}
473221828Sgrehan			base = bar.pbi_base & PCIM_BAR_MEM_BASE;
474221828Sgrehan		}
475268887Sjhb		size = bar.pbi_length;
476221828Sgrehan
477268887Sjhb		if (bartype != PCIBAR_IO) {
478268887Sjhb			if (((base | size) & PAGE_MASK) != 0) {
479268887Sjhb				printf("passthru device %d/%d/%d BAR %d: "
480268887Sjhb				    "base %#lx or size %#lx not page aligned\n",
481268887Sjhb				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
482268887Sjhb				    sc->psc_sel.pc_func, i, base, size);
483268887Sjhb				return (-1);
484268887Sjhb			}
485268887Sjhb		}
486268887Sjhb
487221828Sgrehan		/* Cache information about the "real" BAR */
488221828Sgrehan		sc->psc_bar[i].type = bartype;
489268887Sjhb		sc->psc_bar[i].size = size;
490221828Sgrehan		sc->psc_bar[i].addr = base;
491221828Sgrehan
492221828Sgrehan		/* Allocate the BAR in the guest I/O or MMIO space */
493268887Sjhb		error = pci_emul_alloc_pbar(pi, i, base, bartype, size);
494221828Sgrehan		if (error)
495221828Sgrehan			return (-1);
496221828Sgrehan
497234761Sgrehan		/* The MSI-X table needs special handling */
498246190Sneel		if (i == pci_msix_table_bar(pi)) {
499234761Sgrehan			error = init_msix_table(ctx, sc, base);
500234761Sgrehan			if (error)
501234761Sgrehan				return (-1);
502234761Sgrehan		} else if (bartype != PCIBAR_IO) {
503268887Sjhb			/* Map the physical BAR in the guest MMIO space */
504221828Sgrehan			error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
505221828Sgrehan				sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
506221828Sgrehan				pi->pi_bar[i].addr, pi->pi_bar[i].size, base);
507221828Sgrehan			if (error)
508221828Sgrehan				return (-1);
509221828Sgrehan		}
510221828Sgrehan
511221828Sgrehan		/*
512221828Sgrehan		 * 64-bit BAR takes up two slots so skip the next one.
513221828Sgrehan		 */
514221828Sgrehan		if (bartype == PCIBAR_MEM64) {
515221828Sgrehan			i++;
516221828Sgrehan			assert(i <= PCI_BARMAX);
517221828Sgrehan			sc->psc_bar[i].type = PCIBAR_MEMHI64;
518221828Sgrehan		}
519221828Sgrehan	}
520221828Sgrehan	return (0);
521221828Sgrehan}
522221828Sgrehan
523221828Sgrehanstatic int
524221828Sgrehancfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
525221828Sgrehan{
526221828Sgrehan	int error;
527221828Sgrehan	struct passthru_softc *sc;
528221828Sgrehan
529221828Sgrehan	error = 1;
530221828Sgrehan	sc = pi->pi_arg;
531221828Sgrehan
532221828Sgrehan	bzero(&sc->psc_sel, sizeof(struct pcisel));
533221828Sgrehan	sc->psc_sel.pc_bus = bus;
534221828Sgrehan	sc->psc_sel.pc_dev = slot;
535221828Sgrehan	sc->psc_sel.pc_func = func;
536221828Sgrehan
537234761Sgrehan	if (cfginitmsi(sc) != 0)
538234761Sgrehan		goto done;
539234761Sgrehan
540221828Sgrehan	if (cfginitbar(ctx, sc) != 0)
541221828Sgrehan		goto done;
542221828Sgrehan
543221828Sgrehan	error = 0;				/* success */
544221828Sgrehandone:
545221828Sgrehan	return (error);
546221828Sgrehan}
547221828Sgrehan
548221828Sgrehanstatic int
549221828Sgrehanpassthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
550221828Sgrehan{
551221828Sgrehan	int bus, slot, func, error;
552221828Sgrehan	struct passthru_softc *sc;
553221828Sgrehan
554221828Sgrehan	sc = NULL;
555221828Sgrehan	error = 1;
556221828Sgrehan
557221828Sgrehan	if (pcifd < 0) {
558221828Sgrehan		pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
559221828Sgrehan		if (pcifd < 0)
560221828Sgrehan			goto done;
561221828Sgrehan	}
562221828Sgrehan
563221828Sgrehan	if (iofd < 0) {
564221828Sgrehan		iofd = open(_PATH_DEVIO, O_RDWR, 0);
565221828Sgrehan		if (iofd < 0)
566221828Sgrehan			goto done;
567221828Sgrehan	}
568221828Sgrehan
569241744Sgrehan	if (opts == NULL ||
570241744Sgrehan	    sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
571221828Sgrehan		goto done;
572221828Sgrehan
573221828Sgrehan	if (vm_assign_pptdev(ctx, bus, slot, func) != 0)
574221828Sgrehan		goto done;
575221828Sgrehan
576268953Sjhb	sc = calloc(1, sizeof(struct passthru_softc));
577221828Sgrehan
578221828Sgrehan	pi->pi_arg = sc;
579221828Sgrehan	sc->psc_pi = pi;
580221828Sgrehan
581221828Sgrehan	/* initialize config space */
582241744Sgrehan	if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
583221828Sgrehan		goto done;
584221828Sgrehan
585221828Sgrehan	error = 0;		/* success */
586221828Sgrehandone:
587221828Sgrehan	if (error) {
588221828Sgrehan		free(sc);
589221828Sgrehan		vm_unassign_pptdev(ctx, bus, slot, func);
590221828Sgrehan	}
591221828Sgrehan	return (error);
592221828Sgrehan}
593221828Sgrehan
594221828Sgrehanstatic int
595221828Sgrehanbar_access(int coff)
596221828Sgrehan{
597221828Sgrehan	if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1))
598221828Sgrehan		return (1);
599221828Sgrehan	else
600221828Sgrehan		return (0);
601221828Sgrehan}
602221828Sgrehan
603221828Sgrehanstatic int
604221828Sgrehanmsicap_access(struct passthru_softc *sc, int coff)
605221828Sgrehan{
606221828Sgrehan	int caplen;
607221828Sgrehan
608221828Sgrehan	if (sc->psc_msi.capoff == 0)
609221828Sgrehan		return (0);
610221828Sgrehan
611221828Sgrehan	caplen = msi_caplen(sc->psc_msi.msgctrl);
612221828Sgrehan
613221828Sgrehan	if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen)
614221828Sgrehan		return (1);
615221828Sgrehan	else
616221828Sgrehan		return (0);
617221828Sgrehan}
618221828Sgrehan
619234761Sgrehanstatic int
620234761Sgrehanmsixcap_access(struct passthru_softc *sc, int coff)
621234761Sgrehan{
622234761Sgrehan	if (sc->psc_msix.capoff == 0)
623234761Sgrehan		return (0);
624234761Sgrehan
625234761Sgrehan	return (coff >= sc->psc_msix.capoff &&
626234761Sgrehan	        coff < sc->psc_msix.capoff + MSIX_CAPLEN);
627234761Sgrehan}
628234761Sgrehan
629221828Sgrehanstatic int
630241744Sgrehanpassthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
631241744Sgrehan		 int coff, int bytes, uint32_t *rv)
632221828Sgrehan{
633221828Sgrehan	struct passthru_softc *sc;
634221828Sgrehan
635221828Sgrehan	sc = pi->pi_arg;
636221828Sgrehan
637221828Sgrehan	/*
638221828Sgrehan	 * PCI BARs and MSI capability is emulated.
639221828Sgrehan	 */
640221828Sgrehan	if (bar_access(coff) || msicap_access(sc, coff))
641221828Sgrehan		return (-1);
642221828Sgrehan
643221828Sgrehan#ifdef LEGACY_SUPPORT
644221828Sgrehan	/*
645221828Sgrehan	 * Emulate PCIR_CAP_PTR if this device does not support MSI capability
646221828Sgrehan	 * natively.
647221828Sgrehan	 */
648221828Sgrehan	if (sc->psc_msi.emulated) {
649221828Sgrehan		if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4)
650221828Sgrehan			return (-1);
651221828Sgrehan	}
652221828Sgrehan#endif
653221828Sgrehan
654221828Sgrehan	/* Everything else just read from the device's config space */
655221828Sgrehan	*rv = read_config(&sc->psc_sel, coff, bytes);
656221828Sgrehan
657221828Sgrehan	return (0);
658221828Sgrehan}
659221828Sgrehan
660221828Sgrehanstatic int
661241744Sgrehanpassthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
662241744Sgrehan		  int coff, int bytes, uint32_t val)
663221828Sgrehan{
664234761Sgrehan	int error, msix_table_entries, i;
665221828Sgrehan	struct passthru_softc *sc;
666221828Sgrehan
667221828Sgrehan	sc = pi->pi_arg;
668221828Sgrehan
669221828Sgrehan	/*
670221828Sgrehan	 * PCI BARs are emulated
671221828Sgrehan	 */
672221828Sgrehan	if (bar_access(coff))
673221828Sgrehan		return (-1);
674221828Sgrehan
675221828Sgrehan	/*
676221828Sgrehan	 * MSI capability is emulated
677221828Sgrehan	 */
678221828Sgrehan	if (msicap_access(sc, coff)) {
679221828Sgrehan		msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
680221828Sgrehan
681262350Sjhb		error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
682262350Sjhb			sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
683262350Sjhb			pi->pi_msi.addr, pi->pi_msi.msg_data,
684262350Sjhb			pi->pi_msi.maxmsgnum);
685221828Sgrehan		if (error != 0) {
686262350Sjhb			printf("vm_setup_pptdev_msi error %d\r\n", errno);
687221828Sgrehan			exit(1);
688221828Sgrehan		}
689221828Sgrehan		return (0);
690221828Sgrehan	}
691221828Sgrehan
692234761Sgrehan	if (msixcap_access(sc, coff)) {
693234761Sgrehan		msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val);
694234761Sgrehan		if (pi->pi_msix.enabled) {
695234761Sgrehan			msix_table_entries = pi->pi_msix.table_count;
696234761Sgrehan			for (i = 0; i < msix_table_entries; i++) {
697262350Sjhb				error = vm_setup_pptdev_msix(ctx, vcpu,
698262350Sjhb				    sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
699262350Sjhb				    sc->psc_sel.pc_func, i,
700262350Sjhb				    pi->pi_msix.table[i].addr,
701262350Sjhb				    pi->pi_msix.table[i].msg_data,
702262350Sjhb				    pi->pi_msix.table[i].vector_control);
703234761Sgrehan
704234761Sgrehan				if (error) {
705262350Sjhb					printf("vm_setup_pptdev_msix error "
706262350Sjhb					    "%d\r\n", errno);
707234761Sgrehan					exit(1);
708234761Sgrehan				}
709234761Sgrehan			}
710234761Sgrehan		}
711234761Sgrehan		return (0);
712234761Sgrehan	}
713234761Sgrehan
714221828Sgrehan#ifdef LEGACY_SUPPORT
715221828Sgrehan	/*
716221828Sgrehan	 * If this device does not support MSI natively then we cannot let
717221828Sgrehan	 * the guest disable legacy interrupts from the device. It is the
718221828Sgrehan	 * legacy interrupt that is triggering the virtual MSI to the guest.
719221828Sgrehan	 */
720221828Sgrehan	if (sc->psc_msi.emulated && pci_msi_enabled(pi)) {
721221828Sgrehan		if (coff == PCIR_COMMAND && bytes == 2)
722221828Sgrehan			val &= ~PCIM_CMD_INTxDIS;
723221828Sgrehan	}
724221828Sgrehan#endif
725221828Sgrehan
726221828Sgrehan	write_config(&sc->psc_sel, coff, bytes, val);
727221828Sgrehan
728221828Sgrehan	return (0);
729221828Sgrehan}
730221828Sgrehan
731221828Sgrehanstatic void
732241744Sgrehanpassthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
733241744Sgrehan	       uint64_t offset, int size, uint64_t value)
734221828Sgrehan{
735221828Sgrehan	struct passthru_softc *sc;
736221828Sgrehan	struct iodev_pio_req pio;
737221828Sgrehan
738221828Sgrehan	sc = pi->pi_arg;
739221828Sgrehan
740246190Sneel	if (baridx == pci_msix_table_bar(pi)) {
741241744Sgrehan		msix_table_write(ctx, vcpu, sc, offset, size, value);
742241744Sgrehan	} else {
743241744Sgrehan		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
744241744Sgrehan		bzero(&pio, sizeof(struct iodev_pio_req));
745241744Sgrehan		pio.access = IODEV_PIO_WRITE;
746241744Sgrehan		pio.port = sc->psc_bar[baridx].addr + offset;
747241744Sgrehan		pio.width = size;
748241744Sgrehan		pio.val = value;
749241744Sgrehan
750241744Sgrehan		(void)ioctl(iofd, IODEV_PIO, &pio);
751241744Sgrehan	}
752221828Sgrehan}
753221828Sgrehan
754241744Sgrehanstatic uint64_t
755241744Sgrehanpassthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
756241744Sgrehan	      uint64_t offset, int size)
757221828Sgrehan{
758221828Sgrehan	struct passthru_softc *sc;
759221828Sgrehan	struct iodev_pio_req pio;
760241744Sgrehan	uint64_t val;
761221828Sgrehan
762221828Sgrehan	sc = pi->pi_arg;
763221828Sgrehan
764246190Sneel	if (baridx == pci_msix_table_bar(pi)) {
765241744Sgrehan		val = msix_table_read(sc, offset, size);
766241744Sgrehan	} else {
767241744Sgrehan		assert(pi->pi_bar[baridx].type == PCIBAR_IO);
768241744Sgrehan		bzero(&pio, sizeof(struct iodev_pio_req));
769241744Sgrehan		pio.access = IODEV_PIO_READ;
770241744Sgrehan		pio.port = sc->psc_bar[baridx].addr + offset;
771241744Sgrehan		pio.width = size;
772241744Sgrehan		pio.val = 0;
773221828Sgrehan
774241744Sgrehan		(void)ioctl(iofd, IODEV_PIO, &pio);
775221828Sgrehan
776241744Sgrehan		val = pio.val;
777241744Sgrehan	}
778241744Sgrehan
779241744Sgrehan	return (val);
780221828Sgrehan}
781221828Sgrehan
782221828Sgrehanstruct pci_devemu passthru = {
783221828Sgrehan	.pe_emu		= "passthru",
784221828Sgrehan	.pe_init	= passthru_init,
785221828Sgrehan	.pe_cfgwrite	= passthru_cfgwrite,
786221828Sgrehan	.pe_cfgread	= passthru_cfgread,
787241744Sgrehan	.pe_barwrite 	= passthru_write,
788241744Sgrehan	.pe_barread    	= passthru_read,
789221828Sgrehan};
790221828SgrehanPCI_EMUL_SET(passthru);
791