1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/linker_set.h>
31#include <sys/mman.h>
32
33#include <ctype.h>
34#include <err.h>
35#include <errno.h>
36#include <pthread.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <strings.h>
41#include <assert.h>
42#include <stdbool.h>
43#include <sysexits.h>
44
45#include <machine/vmm.h>
46#include <machine/vmm_snapshot.h>
47#include <vmmapi.h>
48
49#include "acpi.h"
50#include "bhyverun.h"
51#include "config.h"
52#include "debug.h"
53#ifdef __amd64__
54#include "amd64/inout.h"
55#endif
56#include "mem.h"
57#include "pci_emul.h"
58#ifdef __amd64__
59#include "amd64/pci_lpc.h"
60#include "pci_passthru.h"
61#endif
62#include "qemu_fwcfg.h"
63
64#define CONF1_ADDR_PORT	   0x0cf8
65#define CONF1_DATA_PORT	   0x0cfc
66
67#define CONF1_ENABLE	   0x80000000ul
68
69#define	MAXBUSES	(PCI_BUSMAX + 1)
70#define MAXSLOTS	(PCI_SLOTMAX + 1)
71#define	MAXFUNCS	(PCI_FUNCMAX + 1)
72
73#define GB		(1024 * 1024 * 1024UL)
74
75struct funcinfo {
76	nvlist_t *fi_config;
77	struct pci_devemu *fi_pde;
78	struct pci_devinst *fi_devi;
79};
80
81struct intxinfo {
82	int		ii_count;
83	struct pci_irq	ii_irq;
84};
85
86struct slotinfo {
87	struct intxinfo si_intpins[4];
88	struct funcinfo si_funcs[MAXFUNCS];
89};
90
91struct businfo {
92	uint16_t iobase, iolimit;		/* I/O window */
93	uint32_t membase32, memlimit32;		/* mmio window below 4GB */
94	uint64_t membase64, memlimit64;		/* mmio window above 4GB */
95	struct slotinfo slotinfo[MAXSLOTS];
96};
97
98static struct businfo *pci_businfo[MAXBUSES];
99
100SET_DECLARE(pci_devemu_set, struct pci_devemu);
101
102static uint64_t pci_emul_iobase;
103static uint8_t *pci_emul_rombase;
104static uint64_t pci_emul_romoffset;
105static uint8_t *pci_emul_romlim;
106static uint64_t pci_emul_membase32;
107static uint64_t pci_emul_membase64;
108static uint64_t pci_emul_memlim64;
109
110struct pci_bar_allocation {
111	TAILQ_ENTRY(pci_bar_allocation) chain;
112	struct pci_devinst *pdi;
113	int idx;
114	enum pcibar_type type;
115	uint64_t size;
116};
117
118static TAILQ_HEAD(pci_bar_list, pci_bar_allocation) pci_bars =
119    TAILQ_HEAD_INITIALIZER(pci_bars);
120
121struct boot_device {
122	TAILQ_ENTRY(boot_device) boot_device_chain;
123	struct pci_devinst *pdi;
124	int bootindex;
125};
126static TAILQ_HEAD(boot_list, boot_device) boot_devices = TAILQ_HEAD_INITIALIZER(
127    boot_devices);
128
129#if defined(__amd64__)
130#define	PCI_EMUL_IOBASE		0x2000
131#define	PCI_EMUL_IOLIMIT	0x10000
132#define	PCI_EMUL_IOMASK		0xffff
133/*
134 * OVMF always uses 0xc0000000 as base address for 32 bit PCI MMIO. Don't
135 * change this address without changing it in OVMF.
136 */
137#define	PCI_EMUL_MEMBASE32	0xc0000000
138#elif defined(__aarch64__)
139#define	PCI_EMUL_IOBASE		0xdf000000UL
140#define	PCI_EMUL_IOLIMIT	0xe0000000UL
141#define	PCI_EMUL_MEMBASE32	0xa0000000UL
142#else
143#error Unsupported platform
144#endif
145
146#define	PCI_EMUL_ROMSIZE	0x10000000
147
148#define	PCI_EMUL_ECFG_BASE	0xE0000000		    /* 3.5GB */
149#define	PCI_EMUL_ECFG_SIZE	(MAXBUSES * 1024 * 1024)    /* 1MB per bus */
150#ifdef __amd64__
151SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
152#endif
153
154#define	PCI_EMUL_MEMLIMIT32	PCI_EMUL_ECFG_BASE
155#define PCI_EMUL_MEMSIZE64	(32*GB)
156
157static void pci_lintr_route(struct pci_devinst *pi);
158static void pci_lintr_update(struct pci_devinst *pi);
159
160static struct pci_devemu *pci_emul_finddev(const char *name);
161static void pci_cfgrw(int in, int bus, int slot, int func, int coff,
162    int bytes, uint32_t *val);
163
164static __inline void
165CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes)
166{
167
168	if (bytes == 1)
169		pci_set_cfgdata8(pi, coff, val);
170	else if (bytes == 2)
171		pci_set_cfgdata16(pi, coff, val);
172	else
173		pci_set_cfgdata32(pi, coff, val);
174}
175
176static __inline uint32_t
177CFGREAD(struct pci_devinst *pi, int coff, int bytes)
178{
179
180	if (bytes == 1)
181		return (pci_get_cfgdata8(pi, coff));
182	else if (bytes == 2)
183		return (pci_get_cfgdata16(pi, coff));
184	else
185		return (pci_get_cfgdata32(pi, coff));
186}
187
188static int
189is_pcir_bar(int coff)
190{
191	return (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1));
192}
193
194static int
195is_pcir_bios(int coff)
196{
197	return (coff >= PCIR_BIOS && coff < PCIR_BIOS + 4);
198}
199
200/*
201 * I/O access
202 */
203
204/*
205 * Slot options are in the form:
206 *
207 *  <bus>:<slot>:<func>,<emul>[,<config>]
208 *  <slot>[:<func>],<emul>[,<config>]
209 *
210 *  slot is 0..31
211 *  func is 0..7
212 *  emul is a string describing the type of PCI device e.g. virtio-net
213 *  config is an optional string, depending on the device, that can be
214 *  used for configuration.
215 *   Examples are:
216 *     1,virtio-net,tap0
217 *     3:0,dummy
218 */
219static void
220pci_parse_slot_usage(char *aopt)
221{
222
223	EPRINTLN("Invalid PCI slot info field \"%s\"", aopt);
224}
225
226/*
227 * Helper function to parse a list of comma-separated options where
228 * each option is formatted as "name[=value]".  If no value is
229 * provided, the option is treated as a boolean and is given a value
230 * of true.
231 */
232int
233pci_parse_legacy_config(nvlist_t *nvl, const char *opt)
234{
235	char *config, *name, *tofree, *value;
236
237	if (opt == NULL)
238		return (0);
239
240	config = tofree = strdup(opt);
241	while ((name = strsep(&config, ",")) != NULL) {
242		value = strchr(name, '=');
243		if (value != NULL) {
244			*value = '\0';
245			value++;
246			set_config_value_node(nvl, name, value);
247		} else
248			set_config_bool_node(nvl, name, true);
249	}
250	free(tofree);
251	return (0);
252}
253
254/*
255 * PCI device configuration is stored in MIBs that encode the device's
256 * location:
257 *
258 * pci.<bus>.<slot>.<func>
259 *
260 * Where "bus", "slot", and "func" are all decimal values without
261 * leading zeroes.  Each valid device must have a "device" node which
262 * identifies the driver model of the device.
263 *
264 * Device backends can provide a parser for the "config" string.  If
265 * a custom parser is not provided, pci_parse_legacy_config() is used
266 * to parse the string.
267 */
268int
269pci_parse_slot(char *opt)
270{
271	char node_name[sizeof("pci.XXX.XX.X")];
272	struct pci_devemu *pde;
273	char *emul, *config, *str, *cp;
274	int error, bnum, snum, fnum;
275	nvlist_t *nvl;
276
277	error = -1;
278	str = strdup(opt);
279
280	emul = config = NULL;
281	if ((cp = strchr(str, ',')) != NULL) {
282		*cp = '\0';
283		emul = cp + 1;
284		if ((cp = strchr(emul, ',')) != NULL) {
285			*cp = '\0';
286			config = cp + 1;
287		}
288	} else {
289		pci_parse_slot_usage(opt);
290		goto done;
291	}
292
293	/* <bus>:<slot>:<func> */
294	if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) {
295		bnum = 0;
296		/* <slot>:<func> */
297		if (sscanf(str, "%d:%d", &snum, &fnum) != 2) {
298			fnum = 0;
299			/* <slot> */
300			if (sscanf(str, "%d", &snum) != 1) {
301				snum = -1;
302			}
303		}
304	}
305
306	if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS ||
307	    fnum < 0 || fnum >= MAXFUNCS) {
308		pci_parse_slot_usage(opt);
309		goto done;
310	}
311
312	pde = pci_emul_finddev(emul);
313	if (pde == NULL) {
314		EPRINTLN("pci slot %d:%d:%d: unknown device \"%s\"", bnum, snum,
315		    fnum, emul);
316		goto done;
317	}
318
319	snprintf(node_name, sizeof(node_name), "pci.%d.%d.%d", bnum, snum,
320	    fnum);
321	nvl = find_config_node(node_name);
322	if (nvl != NULL) {
323		EPRINTLN("pci slot %d:%d:%d already occupied!", bnum, snum,
324		    fnum);
325		goto done;
326	}
327	nvl = create_config_node(node_name);
328	if (pde->pe_alias != NULL)
329		set_config_value_node(nvl, "device", pde->pe_alias);
330	else
331		set_config_value_node(nvl, "device", pde->pe_emu);
332
333	if (pde->pe_legacy_config != NULL)
334		error = pde->pe_legacy_config(nvl, config);
335	else
336		error = pci_parse_legacy_config(nvl, config);
337done:
338	free(str);
339	return (error);
340}
341
342void
343pci_print_supported_devices(void)
344{
345	struct pci_devemu **pdpp, *pdp;
346
347	SET_FOREACH(pdpp, pci_devemu_set) {
348		pdp = *pdpp;
349		printf("%s\n", pdp->pe_emu);
350	}
351}
352
353uint32_t
354pci_config_read_reg(const struct pcisel *const host_sel, nvlist_t *nvl,
355    const uint32_t reg, const uint8_t size, const uint32_t def)
356{
357	const char *config;
358	const nvlist_t *pci_regs;
359
360	assert(size == 1 || size == 2 || size == 4);
361
362	pci_regs = find_relative_config_node(nvl, "pcireg");
363	if (pci_regs == NULL) {
364		return def;
365	}
366
367	switch (reg) {
368	case PCIR_DEVICE:
369		config = get_config_value_node(pci_regs, "device");
370		break;
371	case PCIR_VENDOR:
372		config = get_config_value_node(pci_regs, "vendor");
373		break;
374	case PCIR_REVID:
375		config = get_config_value_node(pci_regs, "revid");
376		break;
377	case PCIR_SUBVEND_0:
378		config = get_config_value_node(pci_regs, "subvendor");
379		break;
380	case PCIR_SUBDEV_0:
381		config = get_config_value_node(pci_regs, "subdevice");
382		break;
383	default:
384		return (-1);
385	}
386
387	if (config == NULL) {
388		return def;
389	} else if (host_sel != NULL && strcmp(config, "host") == 0) {
390#ifdef __amd64__
391		return pci_host_read_config(host_sel, reg, size);
392#else
393		errx(1, "cannot fetch host PCI configuration");
394#endif
395	} else {
396		return strtol(config, NULL, 16);
397	}
398}
399
400static int
401pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset)
402{
403
404	if (offset < pi->pi_msix.pba_offset)
405		return (0);
406
407	if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
408		return (0);
409	}
410
411	return (1);
412}
413
414int
415pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
416		     uint64_t value)
417{
418	int msix_entry_offset;
419	int tab_index;
420	char *dest;
421
422	/* support only 4 or 8 byte writes */
423	if (size != 4 && size != 8)
424		return (-1);
425
426	/*
427	 * Return if table index is beyond what device supports
428	 */
429	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
430	if (tab_index >= pi->pi_msix.table_count)
431		return (-1);
432
433	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
434
435	/* support only aligned writes */
436	if ((msix_entry_offset % size) != 0)
437		return (-1);
438
439	dest = (char *)(pi->pi_msix.table + tab_index);
440	dest += msix_entry_offset;
441
442	if (size == 4)
443		*((uint32_t *)dest) = value;
444	else
445		*((uint64_t *)dest) = value;
446
447	return (0);
448}
449
450uint64_t
451pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size)
452{
453	char *dest;
454	int msix_entry_offset;
455	int tab_index;
456	uint64_t retval = ~0;
457
458	/*
459	 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X
460	 * table but we also allow 1 byte access to accommodate reads from
461	 * ddb.
462	 */
463	if (size != 1 && size != 4 && size != 8)
464		return (retval);
465
466	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
467
468	/* support only aligned reads */
469	if ((msix_entry_offset % size) != 0) {
470		return (retval);
471	}
472
473	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
474
475	if (tab_index < pi->pi_msix.table_count) {
476		/* valid MSI-X Table access */
477		dest = (char *)(pi->pi_msix.table + tab_index);
478		dest += msix_entry_offset;
479
480		if (size == 1)
481			retval = *((uint8_t *)dest);
482		else if (size == 4)
483			retval = *((uint32_t *)dest);
484		else
485			retval = *((uint64_t *)dest);
486	} else if (pci_valid_pba_offset(pi, offset)) {
487		/* return 0 for PBA access */
488		retval = 0;
489	}
490
491	return (retval);
492}
493
494int
495pci_msix_table_bar(struct pci_devinst *pi)
496{
497
498	if (pi->pi_msix.table != NULL)
499		return (pi->pi_msix.table_bar);
500	else
501		return (-1);
502}
503
504int
505pci_msix_pba_bar(struct pci_devinst *pi)
506{
507
508	if (pi->pi_msix.table != NULL)
509		return (pi->pi_msix.pba_bar);
510	else
511		return (-1);
512}
513
514#ifdef __amd64__
515static int
516pci_emul_io_handler(struct vmctx *ctx __unused, int in, int port,
517    int bytes, uint32_t *eax, void *arg)
518{
519	struct pci_devinst *pdi = arg;
520	struct pci_devemu *pe = pdi->pi_d;
521	uint64_t offset;
522	int i;
523
524	assert(port >= 0);
525
526	for (i = 0; i <= PCI_BARMAX; i++) {
527		if (pdi->pi_bar[i].type == PCIBAR_IO &&
528		    (uint64_t)port >= pdi->pi_bar[i].addr &&
529		    (uint64_t)port + bytes <=
530		    pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
531			offset = port - pdi->pi_bar[i].addr;
532			if (in)
533				*eax = (*pe->pe_barread)(pdi, i,
534							 offset, bytes);
535			else
536				(*pe->pe_barwrite)(pdi, i, offset,
537						   bytes, *eax);
538			return (0);
539		}
540	}
541	return (-1);
542}
543#else
544static int
545pci_emul_iomem_handler(struct vcpu *vcpu __unused, int dir,
546    uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
547{
548	struct pci_devinst *pdi = arg1;
549	struct pci_devemu *pe = pdi->pi_d;
550	uint64_t offset;
551	int bidx = (int)arg2;
552
553	assert(bidx <= PCI_BARMAX);
554	assert(pdi->pi_bar[bidx].type == PCIBAR_IO);
555	assert(addr >= pdi->pi_bar[bidx].addr &&
556	       addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
557	assert(size == 1 || size == 2 || size == 4);
558
559	offset = addr - pdi->pi_bar[bidx].addr;
560	if (dir == MEM_F_READ)
561		*val = (*pe->pe_barread)(pdi, bidx, offset, size);
562	else
563		(*pe->pe_barwrite)(pdi, bidx, offset, size, *val);
564
565	return (0);
566}
567#endif /* !__amd64__ */
568
569static int
570pci_emul_mem_handler(struct vcpu *vcpu __unused, int dir,
571    uint64_t addr, int size, uint64_t *val, void *arg1, long arg2)
572{
573	struct pci_devinst *pdi = arg1;
574	struct pci_devemu *pe = pdi->pi_d;
575	uint64_t offset;
576	int bidx = (int)arg2;
577
578	assert(bidx <= PCI_BARMAX);
579	assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
580	       pdi->pi_bar[bidx].type == PCIBAR_MEM64);
581	assert(addr >= pdi->pi_bar[bidx].addr &&
582	       addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
583
584	offset = addr - pdi->pi_bar[bidx].addr;
585
586	if (dir == MEM_F_WRITE) {
587		if (size == 8) {
588			(*pe->pe_barwrite)(pdi, bidx, offset,
589					   4, *val & 0xffffffff);
590			(*pe->pe_barwrite)(pdi, bidx, offset + 4,
591					   4, *val >> 32);
592		} else {
593			(*pe->pe_barwrite)(pdi, bidx, offset,
594					   size, *val);
595		}
596	} else {
597		if (size == 8) {
598			*val = (*pe->pe_barread)(pdi, bidx,
599						 offset, 4);
600			*val |= (*pe->pe_barread)(pdi, bidx,
601						  offset + 4, 4) << 32;
602		} else {
603			*val = (*pe->pe_barread)(pdi, bidx,
604						 offset, size);
605		}
606	}
607
608	return (0);
609}
610
611
612static int
613pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
614			uint64_t *addr)
615{
616	uint64_t base;
617
618	assert((size & (size - 1)) == 0);	/* must be a power of 2 */
619
620	base = roundup2(*baseptr, size);
621
622	if (base + size <= limit) {
623		*addr = base;
624		*baseptr = base + size;
625		return (0);
626	} else
627		return (-1);
628}
629
630/*
631 * Register (or unregister) the MMIO or I/O region associated with the BAR
632 * register 'idx' of an emulated pci device.
633 */
634static void
635modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
636{
637	struct pci_devemu *pe;
638	int error;
639	enum pcibar_type type;
640
641	pe = pi->pi_d;
642	type = pi->pi_bar[idx].type;
643	switch (type) {
644	case PCIBAR_IO:
645	{
646#ifdef __amd64__
647		struct inout_port iop;
648
649		bzero(&iop, sizeof(struct inout_port));
650		iop.name = pi->pi_name;
651		iop.port = pi->pi_bar[idx].addr;
652		iop.size = pi->pi_bar[idx].size;
653		if (registration) {
654			iop.flags = IOPORT_F_INOUT;
655			iop.handler = pci_emul_io_handler;
656			iop.arg = pi;
657			error = register_inout(&iop);
658		} else
659			error = unregister_inout(&iop);
660#else
661		struct mem_range mr;
662
663		bzero(&mr, sizeof(struct mem_range));
664		mr.name = pi->pi_name;
665		mr.base = pi->pi_bar[idx].addr;
666		mr.size = pi->pi_bar[idx].size;
667		if (registration) {
668			mr.flags = MEM_F_RW;
669			mr.handler = pci_emul_iomem_handler;
670			mr.arg1 = pi;
671			mr.arg2 = idx;
672			error = register_mem(&mr);
673		} else
674			error = unregister_mem(&mr);
675#endif
676		break;
677	}
678	case PCIBAR_MEM32:
679	case PCIBAR_MEM64:
680	{
681		struct mem_range mr;
682
683		bzero(&mr, sizeof(struct mem_range));
684		mr.name = pi->pi_name;
685		mr.base = pi->pi_bar[idx].addr;
686		mr.size = pi->pi_bar[idx].size;
687		if (registration) {
688			mr.flags = MEM_F_RW;
689			mr.handler = pci_emul_mem_handler;
690			mr.arg1 = pi;
691			mr.arg2 = idx;
692			error = register_mem(&mr);
693		} else
694			error = unregister_mem(&mr);
695		break;
696	}
697	case PCIBAR_ROM:
698		error = 0;
699		break;
700	default:
701		error = EINVAL;
702		break;
703	}
704	assert(error == 0);
705
706	if (pe->pe_baraddr != NULL)
707		(*pe->pe_baraddr)(pi, idx, registration, pi->pi_bar[idx].addr);
708}
709
710static void
711unregister_bar(struct pci_devinst *pi, int idx)
712{
713
714	modify_bar_registration(pi, idx, 0);
715}
716
717static void
718register_bar(struct pci_devinst *pi, int idx)
719{
720
721	modify_bar_registration(pi, idx, 1);
722}
723
724/* Is the ROM enabled for the emulated pci device? */
725static int
726romen(struct pci_devinst *pi)
727{
728	return (pi->pi_bar[PCI_ROM_IDX].lobits & PCIM_BIOS_ENABLE) ==
729	    PCIM_BIOS_ENABLE;
730}
731
732/* Are we decoding i/o port accesses for the emulated pci device? */
733static int
734porten(struct pci_devinst *pi)
735{
736	uint16_t cmd;
737
738	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
739
740	return (cmd & PCIM_CMD_PORTEN);
741}
742
743/* Are we decoding memory accesses for the emulated pci device? */
744static int
745memen(struct pci_devinst *pi)
746{
747	uint16_t cmd;
748
749	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
750
751	return (cmd & PCIM_CMD_MEMEN);
752}
753
754/*
755 * Update the MMIO or I/O address that is decoded by the BAR register.
756 *
757 * If the pci device has enabled the address space decoding then intercept
758 * the address range decoded by the BAR register.
759 */
760static void
761update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
762{
763	int decode;
764
765	if (pi->pi_bar[idx].type == PCIBAR_IO)
766		decode = porten(pi);
767	else
768		decode = memen(pi);
769
770	if (decode)
771		unregister_bar(pi, idx);
772
773	switch (type) {
774	case PCIBAR_IO:
775	case PCIBAR_MEM32:
776		pi->pi_bar[idx].addr = addr;
777		break;
778	case PCIBAR_MEM64:
779		pi->pi_bar[idx].addr &= ~0xffffffffUL;
780		pi->pi_bar[idx].addr |= addr;
781		break;
782	case PCIBAR_MEMHI64:
783		pi->pi_bar[idx].addr &= 0xffffffff;
784		pi->pi_bar[idx].addr |= addr;
785		break;
786	default:
787		assert(0);
788	}
789
790	if (decode)
791		register_bar(pi, idx);
792}
793
794int
795pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
796    uint64_t size)
797{
798	assert((type == PCIBAR_ROM) || (idx >= 0 && idx <= PCI_BARMAX));
799	assert((type != PCIBAR_ROM) || (idx == PCI_ROM_IDX));
800
801	if ((size & (size - 1)) != 0)
802		size = 1UL << flsl(size);	/* round up to a power of 2 */
803
804	/* Enforce minimum BAR sizes required by the PCI standard */
805	if (type == PCIBAR_IO) {
806		if (size < 4)
807			size = 4;
808	} else if (type == PCIBAR_ROM) {
809		if (size < ~PCIM_BIOS_ADDR_MASK + 1)
810			size = ~PCIM_BIOS_ADDR_MASK + 1;
811	} else {
812		if (size < 16)
813			size = 16;
814	}
815
816	/*
817	 * To reduce fragmentation of the MMIO space, we allocate the BARs by
818	 * size. Therefore, don't allocate the BAR yet. We create a list of all
819	 * BAR allocation which is sorted by BAR size. When all PCI devices are
820	 * initialized, we will assign an address to the BARs.
821	 */
822
823	/* create a new list entry */
824	struct pci_bar_allocation *const new_bar = malloc(sizeof(*new_bar));
825	memset(new_bar, 0, sizeof(*new_bar));
826	new_bar->pdi = pdi;
827	new_bar->idx = idx;
828	new_bar->type = type;
829	new_bar->size = size;
830
831	/*
832	 * Search for a BAR which size is lower than the size of our newly
833	 * allocated BAR.
834	 */
835	struct pci_bar_allocation *bar = NULL;
836	TAILQ_FOREACH(bar, &pci_bars, chain) {
837		if (bar->size < size) {
838			break;
839		}
840	}
841
842	if (bar == NULL) {
843		/*
844		 * Either the list is empty or new BAR is the smallest BAR of
845		 * the list. Append it to the end of our list.
846		 */
847		TAILQ_INSERT_TAIL(&pci_bars, new_bar, chain);
848	} else {
849		/*
850		 * The found BAR is smaller than our new BAR. For that reason,
851		 * insert our new BAR before the found BAR.
852		 */
853		TAILQ_INSERT_BEFORE(bar, new_bar, chain);
854	}
855
856	/*
857	 * pci_passthru devices synchronize their physical and virtual command
858	 * register on init. For that reason, the virtual cmd reg should be
859	 * updated as early as possible.
860	 */
861	uint16_t enbit = 0;
862	switch (type) {
863	case PCIBAR_IO:
864		enbit = PCIM_CMD_PORTEN;
865		break;
866	case PCIBAR_MEM64:
867	case PCIBAR_MEM32:
868		enbit = PCIM_CMD_MEMEN;
869		break;
870	default:
871		enbit = 0;
872		break;
873	}
874
875	const uint16_t cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
876	pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
877
878	return (0);
879}
880
881static int
882pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
883    const enum pcibar_type type, const uint64_t size)
884{
885	int error;
886	uint64_t *baseptr, limit, addr, mask, lobits, bar;
887
888	switch (type) {
889	case PCIBAR_NONE:
890		baseptr = NULL;
891		addr = mask = lobits = 0;
892		break;
893	case PCIBAR_IO:
894		baseptr = &pci_emul_iobase;
895		limit = PCI_EMUL_IOLIMIT;
896		mask = PCIM_BAR_IO_BASE;
897		lobits = PCIM_BAR_IO_SPACE;
898		break;
899	case PCIBAR_MEM64:
900		/*
901		 * XXX
902		 * Some drivers do not work well if the 64-bit BAR is allocated
903		 * above 4GB. Allow for this by allocating small requests under
904		 * 4GB unless then allocation size is larger than some arbitrary
905		 * number (128MB currently).
906		 */
907		if (size > 128 * 1024 * 1024) {
908			baseptr = &pci_emul_membase64;
909			limit = pci_emul_memlim64;
910			mask = PCIM_BAR_MEM_BASE;
911			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
912				 PCIM_BAR_MEM_PREFETCH;
913		} else {
914			baseptr = &pci_emul_membase32;
915			limit = PCI_EMUL_MEMLIMIT32;
916			mask = PCIM_BAR_MEM_BASE;
917			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
918		}
919		break;
920	case PCIBAR_MEM32:
921		baseptr = &pci_emul_membase32;
922		limit = PCI_EMUL_MEMLIMIT32;
923		mask = PCIM_BAR_MEM_BASE;
924		lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
925		break;
926	case PCIBAR_ROM:
927		/* do not claim memory for ROM. OVMF will do it for us. */
928		baseptr = NULL;
929		limit = 0;
930		mask = PCIM_BIOS_ADDR_MASK;
931		lobits = 0;
932		break;
933	default:
934		printf("pci_emul_alloc_base: invalid bar type %d\n", type);
935		assert(0);
936	}
937
938	if (baseptr != NULL) {
939		error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
940		if (error != 0)
941			return (error);
942	} else {
943		addr = 0;
944	}
945
946	pdi->pi_bar[idx].type = type;
947	pdi->pi_bar[idx].addr = addr;
948	pdi->pi_bar[idx].size = size;
949	/*
950	 * passthru devices are using same lobits as physical device they set
951	 * this property
952	 */
953	if (pdi->pi_bar[idx].lobits != 0) {
954		lobits = pdi->pi_bar[idx].lobits;
955	} else {
956		pdi->pi_bar[idx].lobits = lobits;
957	}
958
959	/* Initialize the BAR register in config space */
960	bar = (addr & mask) | lobits;
961	pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
962
963	if (type == PCIBAR_MEM64) {
964		assert(idx + 1 <= PCI_BARMAX);
965		pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
966		pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
967	}
968
969	if (type != PCIBAR_ROM) {
970		register_bar(pdi, idx);
971	}
972
973	return (0);
974}
975
976int
977pci_emul_alloc_rom(struct pci_devinst *const pdi, const uint64_t size,
978    void **const addr)
979{
980	/* allocate ROM space once on first call */
981	if (pci_emul_rombase == 0) {
982		pci_emul_rombase = vm_create_devmem(pdi->pi_vmctx, VM_PCIROM,
983		    "pcirom", PCI_EMUL_ROMSIZE);
984		if (pci_emul_rombase == MAP_FAILED) {
985			warnx("%s: failed to create rom segment", __func__);
986			return (-1);
987		}
988		pci_emul_romlim = pci_emul_rombase + PCI_EMUL_ROMSIZE;
989		pci_emul_romoffset = 0;
990	}
991
992	/* ROM size should be a power of 2 and greater than 2 KB */
993	const uint64_t rom_size = MAX(1UL << flsl(size),
994	    ~PCIM_BIOS_ADDR_MASK + 1);
995
996	/* check if ROM fits into ROM space */
997	if (pci_emul_romoffset + rom_size > PCI_EMUL_ROMSIZE) {
998		warnx("%s: no space left in rom segment:", __func__);
999		warnx("%16lu bytes left",
1000		    PCI_EMUL_ROMSIZE - pci_emul_romoffset);
1001		warnx("%16lu bytes required by %d/%d/%d", rom_size, pdi->pi_bus,
1002		    pdi->pi_slot, pdi->pi_func);
1003		return (-1);
1004	}
1005
1006	/* allocate ROM BAR */
1007	const int error = pci_emul_alloc_bar(pdi, PCI_ROM_IDX, PCIBAR_ROM,
1008	    rom_size);
1009	if (error)
1010		return error;
1011
1012	/* return address */
1013	*addr = pci_emul_rombase + pci_emul_romoffset;
1014
1015	/* save offset into ROM Space */
1016	pdi->pi_romoffset = pci_emul_romoffset;
1017
1018	/* increase offset for next ROM */
1019	pci_emul_romoffset += rom_size;
1020
1021	return (0);
1022}
1023
1024int
1025pci_emul_add_boot_device(struct pci_devinst *pi, int bootindex)
1026{
1027	struct boot_device *new_device, *device;
1028
1029	/* don't permit a negative bootindex */
1030	if (bootindex < 0) {
1031		errx(4, "Invalid bootindex %d for %s", bootindex, pi->pi_name);
1032	}
1033
1034	/* alloc new boot device */
1035	new_device = calloc(1, sizeof(struct boot_device));
1036	if (new_device == NULL) {
1037		return (ENOMEM);
1038	}
1039	new_device->pdi = pi;
1040	new_device->bootindex = bootindex;
1041
1042	/* search for boot device with higher boot index */
1043	TAILQ_FOREACH(device, &boot_devices, boot_device_chain) {
1044		if (device->bootindex == bootindex) {
1045			errx(4,
1046			    "Could not set bootindex %d for %s. Bootindex already occupied by %s",
1047			    bootindex, pi->pi_name, device->pdi->pi_name);
1048		} else if (device->bootindex > bootindex) {
1049			break;
1050		}
1051	}
1052
1053	/* add boot device to queue */
1054	if (device == NULL) {
1055		TAILQ_INSERT_TAIL(&boot_devices, new_device, boot_device_chain);
1056	} else {
1057		TAILQ_INSERT_BEFORE(device, new_device, boot_device_chain);
1058	}
1059
1060	return (0);
1061}
1062
1063#define	CAP_START_OFFSET	0x40
1064static int
1065pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
1066{
1067	int i, capoff, reallen;
1068	uint16_t sts;
1069
1070	assert(caplen > 0);
1071
1072	reallen = roundup2(caplen, 4);		/* dword aligned */
1073
1074	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
1075	if ((sts & PCIM_STATUS_CAPPRESENT) == 0)
1076		capoff = CAP_START_OFFSET;
1077	else
1078		capoff = pi->pi_capend + 1;
1079
1080	/* Check if we have enough space */
1081	if (capoff + reallen > PCI_REGMAX + 1)
1082		return (-1);
1083
1084	/* Set the previous capability pointer */
1085	if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
1086		pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
1087		pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
1088	} else
1089		pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff);
1090
1091	/* Copy the capability */
1092	for (i = 0; i < caplen; i++)
1093		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
1094
1095	/* Set the next capability pointer */
1096	pci_set_cfgdata8(pi, capoff + 1, 0);
1097
1098	pi->pi_prevcap = capoff;
1099	pi->pi_capend = capoff + reallen - 1;
1100	return (0);
1101}
1102
1103static struct pci_devemu *
1104pci_emul_finddev(const char *name)
1105{
1106	struct pci_devemu **pdpp, *pdp;
1107
1108	SET_FOREACH(pdpp, pci_devemu_set) {
1109		pdp = *pdpp;
1110		if (!strcmp(pdp->pe_emu, name)) {
1111			return (pdp);
1112		}
1113	}
1114
1115	return (NULL);
1116}
1117
1118static int
1119pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot,
1120    int func, struct funcinfo *fi)
1121{
1122	struct pci_devinst *pdi;
1123	int err;
1124
1125	pdi = calloc(1, sizeof(struct pci_devinst));
1126
1127	pdi->pi_vmctx = ctx;
1128	pdi->pi_bus = bus;
1129	pdi->pi_slot = slot;
1130	pdi->pi_func = func;
1131	pthread_mutex_init(&pdi->pi_lintr.lock, NULL);
1132	pdi->pi_lintr.pin = 0;
1133	pdi->pi_lintr.state = IDLE;
1134	pci_irq_init_irq(&pdi->pi_lintr.irq);
1135	pdi->pi_d = pde;
1136	snprintf(pdi->pi_name, PI_NAMESZ, "%s@pci.%d.%d.%d", pde->pe_emu, bus,
1137	    slot, func);
1138
1139	/* Disable legacy interrupts */
1140	pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
1141	pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
1142
1143	pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_BUSMASTEREN);
1144
1145	err = (*pde->pe_init)(pdi, fi->fi_config);
1146	if (err == 0)
1147		fi->fi_devi = pdi;
1148	else
1149		free(pdi);
1150
1151	return (err);
1152}
1153
1154void
1155pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
1156{
1157	int mmc;
1158
1159	/* Number of msi messages must be a power of 2 between 1 and 32 */
1160	assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
1161	mmc = ffs(msgnum) - 1;
1162
1163	bzero(msicap, sizeof(struct msicap));
1164	msicap->capid = PCIY_MSI;
1165	msicap->nextptr = nextptr;
1166	msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
1167}
1168
1169int
1170pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
1171{
1172	struct msicap msicap;
1173
1174	pci_populate_msicap(&msicap, msgnum, 0);
1175
1176	return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
1177}
1178
1179static void
1180pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum,
1181		     uint32_t msix_tab_size)
1182{
1183
1184	assert(msix_tab_size % 4096 == 0);
1185
1186	bzero(msixcap, sizeof(struct msixcap));
1187	msixcap->capid = PCIY_MSIX;
1188
1189	/*
1190	 * Message Control Register, all fields set to
1191	 * zero except for the Table Size.
1192	 * Note: Table size N is encoded as N-1
1193	 */
1194	msixcap->msgctrl = msgnum - 1;
1195
1196	/*
1197	 * MSI-X BAR setup:
1198	 * - MSI-X table start at offset 0
1199	 * - PBA table starts at a 4K aligned offset after the MSI-X table
1200	 */
1201	msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK;
1202	msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK);
1203}
1204
1205static void
1206pci_msix_table_init(struct pci_devinst *pi, int table_entries)
1207{
1208	int i, table_size;
1209
1210	assert(table_entries > 0);
1211	assert(table_entries <= MAX_MSIX_TABLE_ENTRIES);
1212
1213	table_size = table_entries * MSIX_TABLE_ENTRY_SIZE;
1214	pi->pi_msix.table = calloc(1, table_size);
1215
1216	/* set mask bit of vector control register */
1217	for (i = 0; i < table_entries; i++)
1218		pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK;
1219}
1220
1221int
1222pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum)
1223{
1224	uint32_t tab_size;
1225	struct msixcap msixcap;
1226
1227	assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES);
1228	assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0);
1229
1230	tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE;
1231
1232	/* Align table size to nearest 4K */
1233	tab_size = roundup2(tab_size, 4096);
1234
1235	pi->pi_msix.table_bar = barnum;
1236	pi->pi_msix.pba_bar   = barnum;
1237	pi->pi_msix.table_offset = 0;
1238	pi->pi_msix.table_count = msgnum;
1239	pi->pi_msix.pba_offset = tab_size;
1240	pi->pi_msix.pba_size = PBA_SIZE(msgnum);
1241
1242	pci_msix_table_init(pi, msgnum);
1243
1244	pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size);
1245
1246	/* allocate memory for MSI-X Table and PBA */
1247	pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32,
1248				tab_size + pi->pi_msix.pba_size);
1249
1250	return (pci_emul_add_capability(pi, (u_char *)&msixcap,
1251					sizeof(msixcap)));
1252}
1253
1254static void
1255msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
1256		 int bytes, uint32_t val)
1257{
1258	uint16_t msgctrl, rwmask;
1259	int off;
1260
1261	off = offset - capoff;
1262	/* Message Control Register */
1263	if (off == 2 && bytes == 2) {
1264		rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK;
1265		msgctrl = pci_get_cfgdata16(pi, offset);
1266		msgctrl &= ~rwmask;
1267		msgctrl |= val & rwmask;
1268		val = msgctrl;
1269
1270		pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE;
1271		pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK;
1272		pci_lintr_update(pi);
1273	}
1274
1275	CFGWRITE(pi, offset, val, bytes);
1276}
1277
1278static void
1279msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
1280		int bytes, uint32_t val)
1281{
1282	uint16_t msgctrl, rwmask, msgdata, mme;
1283	uint32_t addrlo;
1284
1285	/*
1286	 * If guest is writing to the message control register make sure
1287	 * we do not overwrite read-only fields.
1288	 */
1289	if ((offset - capoff) == 2 && bytes == 2) {
1290		rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
1291		msgctrl = pci_get_cfgdata16(pi, offset);
1292		msgctrl &= ~rwmask;
1293		msgctrl |= val & rwmask;
1294		val = msgctrl;
1295	}
1296	CFGWRITE(pi, offset, val, bytes);
1297
1298	msgctrl = pci_get_cfgdata16(pi, capoff + 2);
1299	addrlo = pci_get_cfgdata32(pi, capoff + 4);
1300	if (msgctrl & PCIM_MSICTRL_64BIT)
1301		msgdata = pci_get_cfgdata16(pi, capoff + 12);
1302	else
1303		msgdata = pci_get_cfgdata16(pi, capoff + 8);
1304
1305	mme = msgctrl & PCIM_MSICTRL_MME_MASK;
1306	pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
1307	if (pi->pi_msi.enabled) {
1308		pi->pi_msi.addr = addrlo;
1309		pi->pi_msi.msg_data = msgdata;
1310		pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
1311	} else {
1312		pi->pi_msi.maxmsgnum = 0;
1313	}
1314	pci_lintr_update(pi);
1315}
1316
1317static void
1318pciecap_cfgwrite(struct pci_devinst *pi, int capoff __unused, int offset,
1319    int bytes, uint32_t val)
1320{
1321
1322	/* XXX don't write to the readonly parts */
1323	CFGWRITE(pi, offset, val, bytes);
1324}
1325
1326#define	PCIECAP_VERSION	0x2
1327int
1328pci_emul_add_pciecap(struct pci_devinst *pi, int type)
1329{
1330	int err;
1331	struct pciecap pciecap;
1332
1333	bzero(&pciecap, sizeof(pciecap));
1334
1335	/*
1336	 * Use the integrated endpoint type for endpoints on a root complex bus.
1337	 *
1338	 * NB: bhyve currently only supports a single PCI bus that is the root
1339	 * complex bus, so all endpoints are integrated.
1340	 */
1341	if ((type == PCIEM_TYPE_ENDPOINT) && (pi->pi_bus == 0))
1342		type = PCIEM_TYPE_ROOT_INT_EP;
1343
1344	pciecap.capid = PCIY_EXPRESS;
1345	pciecap.pcie_capabilities = PCIECAP_VERSION | type;
1346	if (type != PCIEM_TYPE_ROOT_INT_EP) {
1347		pciecap.link_capabilities = 0x411;	/* gen1, x1 */
1348		pciecap.link_status = 0x11;		/* gen1, x1 */
1349	}
1350
1351	err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap));
1352	return (err);
1353}
1354
1355/*
1356 * This function assumes that 'coff' is in the capabilities region of the
1357 * config space. A capoff parameter of zero will force a search for the
1358 * offset and type.
1359 */
1360void
1361pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val,
1362    uint8_t capoff, int capid)
1363{
1364	uint8_t nextoff;
1365
1366	/* Do not allow un-aligned writes */
1367	if ((offset & (bytes - 1)) != 0)
1368		return;
1369
1370	if (capoff == 0) {
1371		/* Find the capability that we want to update */
1372		capoff = CAP_START_OFFSET;
1373		while (1) {
1374			nextoff = pci_get_cfgdata8(pi, capoff + 1);
1375			if (nextoff == 0)
1376				break;
1377			if (offset >= capoff && offset < nextoff)
1378				break;
1379
1380			capoff = nextoff;
1381		}
1382		assert(offset >= capoff);
1383		capid = pci_get_cfgdata8(pi, capoff);
1384	}
1385
1386	/*
1387	 * Capability ID and Next Capability Pointer are readonly.
1388	 * However, some o/s's do 4-byte writes that include these.
1389	 * For this case, trim the write back to 2 bytes and adjust
1390	 * the data.
1391	 */
1392	if (offset == capoff || offset == capoff + 1) {
1393		if (offset == capoff && bytes == 4) {
1394			bytes = 2;
1395			offset += 2;
1396			val >>= 16;
1397		} else
1398			return;
1399	}
1400
1401	switch (capid) {
1402	case PCIY_MSI:
1403		msicap_cfgwrite(pi, capoff, offset, bytes, val);
1404		break;
1405	case PCIY_MSIX:
1406		msixcap_cfgwrite(pi, capoff, offset, bytes, val);
1407		break;
1408	case PCIY_EXPRESS:
1409		pciecap_cfgwrite(pi, capoff, offset, bytes, val);
1410		break;
1411	default:
1412		break;
1413	}
1414}
1415
1416static int
1417pci_emul_iscap(struct pci_devinst *pi, int offset)
1418{
1419	uint16_t sts;
1420
1421	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
1422	if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
1423		if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend)
1424			return (1);
1425	}
1426	return (0);
1427}
1428
1429static int
1430pci_emul_fallback_handler(struct vcpu *vcpu __unused, int dir,
1431    uint64_t addr __unused, int size __unused, uint64_t *val,
1432    void *arg1 __unused, long arg2 __unused)
1433{
1434	/*
1435	 * Ignore writes; return 0xff's for reads. The mem read code
1436	 * will take care of truncating to the correct size.
1437	 */
1438	if (dir == MEM_F_READ) {
1439		*val = 0xffffffffffffffff;
1440	}
1441
1442	return (0);
1443}
1444
1445static int
1446pci_emul_ecfg_handler(struct vcpu *vcpu __unused, int dir, uint64_t addr,
1447    int bytes, uint64_t *val, void *arg1 __unused, long arg2 __unused)
1448{
1449	int bus, slot, func, coff, in;
1450
1451	coff = addr & 0xfff;
1452	func = (addr >> 12) & 0x7;
1453	slot = (addr >> 15) & 0x1f;
1454	bus = (addr >> 20) & 0xff;
1455	in = (dir == MEM_F_READ);
1456	if (in)
1457		*val = ~0UL;
1458	pci_cfgrw(in, bus, slot, func, coff, bytes, (uint32_t *)val);
1459	return (0);
1460}
1461
1462uint64_t
1463pci_ecfg_base(void)
1464{
1465
1466	return (PCI_EMUL_ECFG_BASE);
1467}
1468
1469static int
1470init_bootorder(void)
1471{
1472	struct boot_device *device;
1473	FILE *fp;
1474	char *bootorder;
1475	size_t bootorder_len;
1476
1477	if (TAILQ_EMPTY(&boot_devices))
1478		return (0);
1479
1480	fp = open_memstream(&bootorder, &bootorder_len);
1481	TAILQ_FOREACH(device, &boot_devices, boot_device_chain) {
1482		fprintf(fp, "/pci@i0cf8/pci@%d,%d\n",
1483		    device->pdi->pi_slot, device->pdi->pi_func);
1484	}
1485	fclose(fp);
1486
1487	return (qemu_fwcfg_add_file("bootorder", bootorder_len, bootorder));
1488}
1489
1490#define	BUSIO_ROUNDUP		32
1491#define	BUSMEM32_ROUNDUP	(1024 * 1024)
1492#define	BUSMEM64_ROUNDUP	(512 * 1024 * 1024)
1493
1494int
1495init_pci(struct vmctx *ctx)
1496{
1497	char node_name[sizeof("pci.XXX.XX.X")];
1498	struct mem_range mr;
1499	struct pci_devemu *pde;
1500	struct businfo *bi;
1501	struct slotinfo *si;
1502	struct funcinfo *fi;
1503	nvlist_t *nvl;
1504	const char *emul;
1505	size_t lowmem;
1506	int bus, slot, func;
1507	int error;
1508
1509	if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32)
1510		errx(EX_OSERR, "Invalid lowmem limit");
1511
1512	pci_emul_iobase = PCI_EMUL_IOBASE;
1513	pci_emul_membase32 = PCI_EMUL_MEMBASE32;
1514
1515	pci_emul_membase64 = vm_get_highmem_base(ctx) +
1516	    vm_get_highmem_size(ctx);
1517	pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64);
1518	pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
1519
1520	TAILQ_INIT(&boot_devices);
1521
1522	for (bus = 0; bus < MAXBUSES; bus++) {
1523		snprintf(node_name, sizeof(node_name), "pci.%d", bus);
1524		nvl = find_config_node(node_name);
1525		if (nvl == NULL)
1526			continue;
1527		pci_businfo[bus] = calloc(1, sizeof(struct businfo));
1528		bi = pci_businfo[bus];
1529
1530		/*
1531		 * Keep track of the i/o and memory resources allocated to
1532		 * this bus.
1533		 */
1534		bi->iobase = pci_emul_iobase;
1535		bi->membase32 = pci_emul_membase32;
1536		bi->membase64 = pci_emul_membase64;
1537
1538		/* first run: init devices */
1539		for (slot = 0; slot < MAXSLOTS; slot++) {
1540			si = &bi->slotinfo[slot];
1541			for (func = 0; func < MAXFUNCS; func++) {
1542				fi = &si->si_funcs[func];
1543				snprintf(node_name, sizeof(node_name),
1544				    "pci.%d.%d.%d", bus, slot, func);
1545				nvl = find_config_node(node_name);
1546				if (nvl == NULL)
1547					continue;
1548
1549				fi->fi_config = nvl;
1550				emul = get_config_value_node(nvl, "device");
1551				if (emul == NULL) {
1552					EPRINTLN("pci slot %d:%d:%d: missing "
1553					    "\"device\" value", bus, slot, func);
1554					return (EINVAL);
1555				}
1556				pde = pci_emul_finddev(emul);
1557				if (pde == NULL) {
1558					EPRINTLN("pci slot %d:%d:%d: unknown "
1559					    "device \"%s\"", bus, slot, func,
1560					    emul);
1561					return (EINVAL);
1562				}
1563				if (pde->pe_alias != NULL) {
1564					EPRINTLN("pci slot %d:%d:%d: legacy "
1565					    "device \"%s\", use \"%s\" instead",
1566					    bus, slot, func, emul,
1567					    pde->pe_alias);
1568					return (EINVAL);
1569				}
1570				fi->fi_pde = pde;
1571				error = pci_emul_init(ctx, pde, bus, slot,
1572				    func, fi);
1573				if (error)
1574					return (error);
1575			}
1576		}
1577
1578		/* second run: assign BARs and free list */
1579		struct pci_bar_allocation *bar;
1580		struct pci_bar_allocation *bar_tmp;
1581		TAILQ_FOREACH_SAFE(bar, &pci_bars, chain, bar_tmp) {
1582			pci_emul_assign_bar(bar->pdi, bar->idx, bar->type,
1583			    bar->size);
1584			free(bar);
1585		}
1586		TAILQ_INIT(&pci_bars);
1587
1588		/*
1589		 * Add some slop to the I/O and memory resources decoded by
1590		 * this bus to give a guest some flexibility if it wants to
1591		 * reprogram the BARs.
1592		 */
1593		pci_emul_iobase += BUSIO_ROUNDUP;
1594		pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
1595		bi->iolimit = pci_emul_iobase;
1596
1597		pci_emul_membase32 += BUSMEM32_ROUNDUP;
1598		pci_emul_membase32 = roundup2(pci_emul_membase32,
1599		    BUSMEM32_ROUNDUP);
1600		bi->memlimit32 = pci_emul_membase32;
1601
1602		pci_emul_membase64 += BUSMEM64_ROUNDUP;
1603		pci_emul_membase64 = roundup2(pci_emul_membase64,
1604		    BUSMEM64_ROUNDUP);
1605		bi->memlimit64 = pci_emul_membase64;
1606	}
1607
1608	/*
1609	 * PCI backends are initialized before routing INTx interrupts
1610	 * so that LPC devices are able to reserve ISA IRQs before
1611	 * routing PIRQ pins.
1612	 */
1613	for (bus = 0; bus < MAXBUSES; bus++) {
1614		if ((bi = pci_businfo[bus]) == NULL)
1615			continue;
1616
1617		for (slot = 0; slot < MAXSLOTS; slot++) {
1618			si = &bi->slotinfo[slot];
1619			for (func = 0; func < MAXFUNCS; func++) {
1620				fi = &si->si_funcs[func];
1621				if (fi->fi_devi == NULL)
1622					continue;
1623				pci_lintr_route(fi->fi_devi);
1624			}
1625		}
1626	}
1627#ifdef __amd64__
1628	lpc_pirq_routed();
1629#endif
1630
1631	if ((error = init_bootorder()) != 0) {
1632		warnx("%s: Unable to init bootorder", __func__);
1633		return (error);
1634	}
1635
1636	/*
1637	 * The guest physical memory map looks like the following on amd64:
1638	 * [0,		    lowmem)		guest system memory
1639	 * [lowmem,	    0xC0000000)		memory hole (may be absent)
1640	 * [0xC0000000,     0xE0000000)		PCI hole (32-bit BAR allocation)
1641	 * [0xE0000000,	    0xF0000000)		PCI extended config window
1642	 * [0xF0000000,	    4GB)		LAPIC, IOAPIC, HPET, firmware
1643	 * [4GB,	    4GB + highmem)	guest system memory
1644	 * [roundup(4GB + highmem, 32GB), ...)	PCI 64-bit BAR allocation
1645	 *
1646	 * On arm64 the guest physical memory map looks like this:
1647	 * [0x0DF00000,	    0x10000000)		PCI I/O memory
1648	 * [0xA0000000,	    0xE0000000)		PCI 32-bit BAR allocation
1649	 * [0xE0000000,	    0xF0000000)		PCI extended config window
1650	 * [4GB,	    4GB + highmem)	guest system memory
1651	 * [roundup(4GB + highmem, 32GB), ...)	PCI 64-bit BAR allocation
1652	 *
1653	 * "lowmem" is guest memory below 0xC0000000.  amd64 guests provisioned
1654	 * with less than 3GB of RAM will have no memory above the 4GB boundary.
1655	 * System memory for arm64 guests is all above the 4GB boundary.
1656	 */
1657
1658	/*
1659	 * Accesses to memory addresses that are not allocated to system
1660	 * memory or PCI devices return 0xff's.
1661	 */
1662	lowmem = vm_get_lowmem_size(ctx);
1663	bzero(&mr, sizeof(struct mem_range));
1664	mr.name = "PCI hole";
1665	mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
1666	mr.base = lowmem;
1667	mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem;
1668	mr.handler = pci_emul_fallback_handler;
1669	error = register_mem_fallback(&mr);
1670	assert(error == 0);
1671
1672	/* PCI extended config space */
1673	bzero(&mr, sizeof(struct mem_range));
1674	mr.name = "PCI ECFG";
1675	mr.flags = MEM_F_RW | MEM_F_IMMUTABLE;
1676	mr.base = PCI_EMUL_ECFG_BASE;
1677	mr.size = PCI_EMUL_ECFG_SIZE;
1678	mr.handler = pci_emul_ecfg_handler;
1679	error = register_mem(&mr);
1680	assert(error == 0);
1681
1682	return (0);
1683}
1684
1685#ifdef __amd64__
1686static void
1687pci_apic_prt_entry(int bus __unused, int slot, int pin, struct pci_irq *irq,
1688    void *arg __unused)
1689{
1690
1691	dsdt_line("  Package ()");
1692	dsdt_line("  {");
1693	dsdt_line("    0x%X,", slot << 16 | 0xffff);
1694	dsdt_line("    0x%02X,", pin - 1);
1695	dsdt_line("    Zero,");
1696	dsdt_line("    0x%X", irq->ioapic_irq);
1697	dsdt_line("  },");
1698}
1699
1700static void
1701pci_pirq_prt_entry(int bus __unused, int slot, int pin, struct pci_irq *irq,
1702    void *arg __unused)
1703{
1704	char *name;
1705
1706	name = lpc_pirq_name(irq->pirq_pin);
1707	if (name == NULL)
1708		return;
1709	dsdt_line("  Package ()");
1710	dsdt_line("  {");
1711	dsdt_line("    0x%X,", slot << 16 | 0xffff);
1712	dsdt_line("    0x%02X,", pin - 1);
1713	dsdt_line("    %s,", name);
1714	dsdt_line("    0x00");
1715	dsdt_line("  },");
1716	free(name);
1717}
1718#endif
1719
1720/*
1721 * A bhyve virtual machine has a flat PCI hierarchy with a root port
1722 * corresponding to each PCI bus.
1723 */
1724static void
1725pci_bus_write_dsdt(int bus)
1726{
1727	struct businfo *bi;
1728	struct slotinfo *si;
1729	struct pci_devinst *pi;
1730	int func, slot;
1731
1732	/*
1733	 * If there are no devices on this 'bus' then just return.
1734	 */
1735	if ((bi = pci_businfo[bus]) == NULL) {
1736		/*
1737		 * Bus 0 is special because it decodes the I/O ports used
1738		 * for PCI config space access even if there are no devices
1739		 * on it.
1740		 */
1741		if (bus != 0)
1742			return;
1743	}
1744
1745	dsdt_line("  Device (PC%02X)", bus);
1746	dsdt_line("  {");
1747	dsdt_line("    Name (_HID, EisaId (\"PNP0A03\"))");
1748
1749	dsdt_line("    Method (_BBN, 0, NotSerialized)");
1750	dsdt_line("    {");
1751	dsdt_line("        Return (0x%08X)", bus);
1752	dsdt_line("    }");
1753	dsdt_line("    Name (_CRS, ResourceTemplate ()");
1754	dsdt_line("    {");
1755	dsdt_line("      WordBusNumber (ResourceProducer, MinFixed, "
1756	    "MaxFixed, PosDecode,");
1757	dsdt_line("        0x0000,             // Granularity");
1758	dsdt_line("        0x%04X,             // Range Minimum", bus);
1759	dsdt_line("        0x%04X,             // Range Maximum", bus);
1760	dsdt_line("        0x0000,             // Translation Offset");
1761	dsdt_line("        0x0001,             // Length");
1762	dsdt_line("        ,, )");
1763
1764#ifdef __amd64__
1765	if (bus == 0) {
1766		dsdt_indent(3);
1767		dsdt_fixed_ioport(0xCF8, 8);
1768		dsdt_unindent(3);
1769
1770		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
1771		    "PosDecode, EntireRange,");
1772		dsdt_line("        0x0000,             // Granularity");
1773		dsdt_line("        0x0000,             // Range Minimum");
1774		dsdt_line("        0x0CF7,             // Range Maximum");
1775		dsdt_line("        0x0000,             // Translation Offset");
1776		dsdt_line("        0x0CF8,             // Length");
1777		dsdt_line("        ,, , TypeStatic)");
1778
1779		dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
1780		    "PosDecode, EntireRange,");
1781		dsdt_line("        0x0000,             // Granularity");
1782		dsdt_line("        0x0D00,             // Range Minimum");
1783		dsdt_line("        0x%04X,             // Range Maximum",
1784		    PCI_EMUL_IOBASE - 1);
1785		dsdt_line("        0x0000,             // Translation Offset");
1786		dsdt_line("        0x%04X,             // Length",
1787		    PCI_EMUL_IOBASE - 0x0D00);
1788		dsdt_line("        ,, , TypeStatic)");
1789
1790		if (bi == NULL) {
1791			dsdt_line("    })");
1792			goto done;
1793		}
1794	}
1795#endif
1796	assert(bi != NULL);
1797
1798	/* i/o window */
1799	dsdt_line("      WordIO (ResourceProducer, MinFixed, MaxFixed, "
1800	    "PosDecode, EntireRange,");
1801	dsdt_line("        0x0000,             // Granularity");
1802	dsdt_line("        0x%04X,             // Range Minimum", bi->iobase);
1803	dsdt_line("        0x%04X,             // Range Maximum",
1804	    bi->iolimit - 1);
1805	dsdt_line("        0x0000,             // Translation Offset");
1806	dsdt_line("        0x%04X,             // Length",
1807	    bi->iolimit - bi->iobase);
1808	dsdt_line("        ,, , TypeStatic)");
1809
1810	/* mmio window (32-bit) */
1811	dsdt_line("      DWordMemory (ResourceProducer, PosDecode, "
1812	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
1813	dsdt_line("        0x00000000,         // Granularity");
1814	dsdt_line("        0x%08X,         // Range Minimum\n", bi->membase32);
1815	dsdt_line("        0x%08X,         // Range Maximum\n",
1816	    bi->memlimit32 - 1);
1817	dsdt_line("        0x00000000,         // Translation Offset");
1818	dsdt_line("        0x%08X,         // Length\n",
1819	    bi->memlimit32 - bi->membase32);
1820	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
1821
1822	/* mmio window (64-bit) */
1823	dsdt_line("      QWordMemory (ResourceProducer, PosDecode, "
1824	    "MinFixed, MaxFixed, NonCacheable, ReadWrite,");
1825	dsdt_line("        0x0000000000000000, // Granularity");
1826	dsdt_line("        0x%016lX, // Range Minimum\n", bi->membase64);
1827	dsdt_line("        0x%016lX, // Range Maximum\n",
1828	    bi->memlimit64 - 1);
1829	dsdt_line("        0x0000000000000000, // Translation Offset");
1830	dsdt_line("        0x%016lX, // Length\n",
1831	    bi->memlimit64 - bi->membase64);
1832	dsdt_line("        ,, , AddressRangeMemory, TypeStatic)");
1833	dsdt_line("    })");
1834
1835#ifdef __amd64__
1836	if (pci_count_lintr(bus) != 0) {
1837		dsdt_indent(2);
1838		dsdt_line("Name (PPRT, Package ()");
1839		dsdt_line("{");
1840		pci_walk_lintr(bus, pci_pirq_prt_entry, NULL);
1841		dsdt_line("})");
1842		dsdt_line("Name (APRT, Package ()");
1843		dsdt_line("{");
1844		pci_walk_lintr(bus, pci_apic_prt_entry, NULL);
1845		dsdt_line("})");
1846		dsdt_line("Method (_PRT, 0, NotSerialized)");
1847		dsdt_line("{");
1848		dsdt_line("  If (PICM)");
1849		dsdt_line("  {");
1850		dsdt_line("    Return (APRT)");
1851		dsdt_line("  }");
1852		dsdt_line("  Else");
1853		dsdt_line("  {");
1854		dsdt_line("    Return (PPRT)");
1855		dsdt_line("  }");
1856		dsdt_line("}");
1857		dsdt_unindent(2);
1858	}
1859#endif
1860
1861	dsdt_indent(2);
1862	for (slot = 0; slot < MAXSLOTS; slot++) {
1863		si = &bi->slotinfo[slot];
1864		for (func = 0; func < MAXFUNCS; func++) {
1865			pi = si->si_funcs[func].fi_devi;
1866			if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL)
1867				pi->pi_d->pe_write_dsdt(pi);
1868		}
1869	}
1870	dsdt_unindent(2);
1871#ifdef __amd64__
1872done:
1873#endif
1874	dsdt_line("  }");
1875}
1876
1877void
1878pci_write_dsdt(void)
1879{
1880	int bus;
1881
1882	dsdt_indent(1);
1883	dsdt_line("Name (PICM, 0x00)");
1884	dsdt_line("Method (_PIC, 1, NotSerialized)");
1885	dsdt_line("{");
1886	dsdt_line("  Store (Arg0, PICM)");
1887	dsdt_line("}");
1888	dsdt_line("");
1889	dsdt_line("Scope (_SB)");
1890	dsdt_line("{");
1891	for (bus = 0; bus < MAXBUSES; bus++)
1892		pci_bus_write_dsdt(bus);
1893	dsdt_line("}");
1894	dsdt_unindent(1);
1895}
1896
1897int
1898pci_bus_configured(int bus)
1899{
1900	assert(bus >= 0 && bus < MAXBUSES);
1901	return (pci_businfo[bus] != NULL);
1902}
1903
1904int
1905pci_msi_enabled(struct pci_devinst *pi)
1906{
1907	return (pi->pi_msi.enabled);
1908}
1909
1910int
1911pci_msi_maxmsgnum(struct pci_devinst *pi)
1912{
1913	if (pi->pi_msi.enabled)
1914		return (pi->pi_msi.maxmsgnum);
1915	else
1916		return (0);
1917}
1918
1919int
1920pci_msix_enabled(struct pci_devinst *pi)
1921{
1922
1923	return (pi->pi_msix.enabled && !pi->pi_msi.enabled);
1924}
1925
1926void
1927pci_generate_msix(struct pci_devinst *pi, int index)
1928{
1929	struct msix_table_entry *mte;
1930
1931	if (!pci_msix_enabled(pi))
1932		return;
1933
1934	if (pi->pi_msix.function_mask)
1935		return;
1936
1937	if (index >= pi->pi_msix.table_count)
1938		return;
1939
1940	mte = &pi->pi_msix.table[index];
1941	if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
1942		/* XXX Set PBA bit if interrupt is disabled */
1943		vm_raise_msi(pi->pi_vmctx, mte->addr, mte->msg_data,
1944		    pi->pi_bus, pi->pi_slot, pi->pi_func);
1945	}
1946}
1947
1948void
1949pci_generate_msi(struct pci_devinst *pi, int index)
1950{
1951
1952	if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
1953		vm_raise_msi(pi->pi_vmctx, pi->pi_msi.addr,
1954		    pi->pi_msi.msg_data + index,
1955		    pi->pi_bus, pi->pi_slot, pi->pi_func);
1956	}
1957}
1958
1959static bool
1960pci_lintr_permitted(struct pci_devinst *pi)
1961{
1962	uint16_t cmd;
1963
1964	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
1965	return (!(pi->pi_msi.enabled || pi->pi_msix.enabled ||
1966		(cmd & PCIM_CMD_INTxDIS)));
1967}
1968
1969void
1970pci_lintr_request(struct pci_devinst *pi)
1971{
1972	struct businfo *bi;
1973	struct slotinfo *si;
1974	int bestpin, bestcount, pin;
1975
1976	bi = pci_businfo[pi->pi_bus];
1977	assert(bi != NULL);
1978
1979	/*
1980	 * Just allocate a pin from our slot.  The pin will be
1981	 * assigned IRQs later when interrupts are routed.
1982	 */
1983	si = &bi->slotinfo[pi->pi_slot];
1984	bestpin = 0;
1985	bestcount = si->si_intpins[0].ii_count;
1986	for (pin = 1; pin < 4; pin++) {
1987		if (si->si_intpins[pin].ii_count < bestcount) {
1988			bestpin = pin;
1989			bestcount = si->si_intpins[pin].ii_count;
1990		}
1991	}
1992
1993	si->si_intpins[bestpin].ii_count++;
1994	pi->pi_lintr.pin = bestpin + 1;
1995	pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1);
1996}
1997
1998static void
1999pci_lintr_route(struct pci_devinst *pi)
2000{
2001	struct businfo *bi;
2002	struct intxinfo *ii;
2003	struct pci_irq *irq;
2004
2005	if (pi->pi_lintr.pin == 0)
2006		return;
2007
2008	bi = pci_businfo[pi->pi_bus];
2009	assert(bi != NULL);
2010	ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1];
2011	irq = &ii->ii_irq;
2012	pci_irq_route(pi, irq);
2013	pi->pi_lintr.irq = *irq;
2014	pci_set_cfgdata8(pi, PCIR_INTLINE, pci_irq_intline(irq));
2015}
2016
2017void
2018pci_lintr_assert(struct pci_devinst *pi)
2019{
2020
2021	assert(pi->pi_lintr.pin > 0);
2022
2023	pthread_mutex_lock(&pi->pi_lintr.lock);
2024	if (pi->pi_lintr.state == IDLE) {
2025		if (pci_lintr_permitted(pi)) {
2026			pi->pi_lintr.state = ASSERTED;
2027			pci_irq_assert(pi);
2028		} else
2029			pi->pi_lintr.state = PENDING;
2030	}
2031	pthread_mutex_unlock(&pi->pi_lintr.lock);
2032}
2033
2034void
2035pci_lintr_deassert(struct pci_devinst *pi)
2036{
2037
2038	assert(pi->pi_lintr.pin > 0);
2039
2040	pthread_mutex_lock(&pi->pi_lintr.lock);
2041	if (pi->pi_lintr.state == ASSERTED) {
2042		pi->pi_lintr.state = IDLE;
2043		pci_irq_deassert(pi);
2044	} else if (pi->pi_lintr.state == PENDING)
2045		pi->pi_lintr.state = IDLE;
2046	pthread_mutex_unlock(&pi->pi_lintr.lock);
2047}
2048
2049static void
2050pci_lintr_update(struct pci_devinst *pi)
2051{
2052
2053	pthread_mutex_lock(&pi->pi_lintr.lock);
2054	if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) {
2055		pci_irq_deassert(pi);
2056		pi->pi_lintr.state = PENDING;
2057	} else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) {
2058		pi->pi_lintr.state = ASSERTED;
2059		pci_irq_assert(pi);
2060	}
2061	pthread_mutex_unlock(&pi->pi_lintr.lock);
2062}
2063
2064int
2065pci_count_lintr(int bus)
2066{
2067	int count, slot, pin;
2068	struct slotinfo *slotinfo;
2069
2070	count = 0;
2071	if (pci_businfo[bus] != NULL) {
2072		for (slot = 0; slot < MAXSLOTS; slot++) {
2073			slotinfo = &pci_businfo[bus]->slotinfo[slot];
2074			for (pin = 0; pin < 4; pin++) {
2075				if (slotinfo->si_intpins[pin].ii_count != 0)
2076					count++;
2077			}
2078		}
2079	}
2080	return (count);
2081}
2082
2083void
2084pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg)
2085{
2086	struct businfo *bi;
2087	struct slotinfo *si;
2088	struct intxinfo *ii;
2089	int slot, pin;
2090
2091	if ((bi = pci_businfo[bus]) == NULL)
2092		return;
2093
2094	for (slot = 0; slot < MAXSLOTS; slot++) {
2095		si = &bi->slotinfo[slot];
2096		for (pin = 0; pin < 4; pin++) {
2097			ii = &si->si_intpins[pin];
2098			if (ii->ii_count != 0)
2099				cb(bus, slot, pin + 1, &ii->ii_irq, arg);
2100		}
2101	}
2102}
2103
2104/*
2105 * Return 1 if the emulated device in 'slot' is a multi-function device.
2106 * Return 0 otherwise.
2107 */
2108static int
2109pci_emul_is_mfdev(int bus, int slot)
2110{
2111	struct businfo *bi;
2112	struct slotinfo *si;
2113	int f, numfuncs;
2114
2115	numfuncs = 0;
2116	if ((bi = pci_businfo[bus]) != NULL) {
2117		si = &bi->slotinfo[slot];
2118		for (f = 0; f < MAXFUNCS; f++) {
2119			if (si->si_funcs[f].fi_devi != NULL) {
2120				numfuncs++;
2121			}
2122		}
2123	}
2124	return (numfuncs > 1);
2125}
2126
2127/*
2128 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on
2129 * whether or not is a multi-function being emulated in the pci 'slot'.
2130 */
2131static void
2132pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv)
2133{
2134	int mfdev;
2135
2136	if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) {
2137		mfdev = pci_emul_is_mfdev(bus, slot);
2138		switch (bytes) {
2139		case 1:
2140		case 2:
2141			*rv &= ~PCIM_MFDEV;
2142			if (mfdev) {
2143				*rv |= PCIM_MFDEV;
2144			}
2145			break;
2146		case 4:
2147			*rv &= ~(PCIM_MFDEV << 16);
2148			if (mfdev) {
2149				*rv |= (PCIM_MFDEV << 16);
2150			}
2151			break;
2152		}
2153	}
2154}
2155
2156/*
2157 * Update device state in response to changes to the PCI command
2158 * register.
2159 */
2160void
2161pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old)
2162{
2163	int i;
2164	uint16_t changed, new;
2165
2166	new = pci_get_cfgdata16(pi, PCIR_COMMAND);
2167	changed = old ^ new;
2168
2169	/*
2170	 * If the MMIO or I/O address space decoding has changed then
2171	 * register/unregister all BARs that decode that address space.
2172	 */
2173	for (i = 0; i <= PCI_BARMAX_WITH_ROM; i++) {
2174		switch (pi->pi_bar[i].type) {
2175			case PCIBAR_NONE:
2176			case PCIBAR_MEMHI64:
2177				break;
2178			case PCIBAR_IO:
2179				/* I/O address space decoding changed? */
2180				if (changed & PCIM_CMD_PORTEN) {
2181					if (new & PCIM_CMD_PORTEN)
2182						register_bar(pi, i);
2183					else
2184						unregister_bar(pi, i);
2185				}
2186				break;
2187			case PCIBAR_ROM:
2188				/* skip (un-)register of ROM if it disabled */
2189				if (!romen(pi))
2190					break;
2191				/* fallthrough */
2192			case PCIBAR_MEM32:
2193			case PCIBAR_MEM64:
2194				/* MMIO address space decoding changed? */
2195				if (changed & PCIM_CMD_MEMEN) {
2196					if (new & PCIM_CMD_MEMEN)
2197						register_bar(pi, i);
2198					else
2199						unregister_bar(pi, i);
2200				}
2201				break;
2202			default:
2203				assert(0);
2204		}
2205	}
2206
2207	/*
2208	 * If INTx has been unmasked and is pending, assert the
2209	 * interrupt.
2210	 */
2211	pci_lintr_update(pi);
2212}
2213
2214static void
2215pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes)
2216{
2217	int rshift;
2218	uint32_t cmd, old, readonly;
2219
2220	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* stash old value */
2221
2222	/*
2223	 * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3.
2224	 *
2225	 * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are
2226	 * 'write 1 to clear'. However these bits are not set to '1' by
2227	 * any device emulation so it is simpler to treat them as readonly.
2228	 */
2229	rshift = (coff & 0x3) * 8;
2230	readonly = 0xFFFFF880 >> rshift;
2231
2232	old = CFGREAD(pi, coff, bytes);
2233	new &= ~readonly;
2234	new |= (old & readonly);
2235	CFGWRITE(pi, coff, new, bytes);			/* update config */
2236
2237	pci_emul_cmd_changed(pi, cmd);
2238}
2239
2240static void
2241pci_cfgrw(int in, int bus, int slot, int func, int coff, int bytes,
2242    uint32_t *valp)
2243{
2244	struct businfo *bi;
2245	struct slotinfo *si;
2246	struct pci_devinst *pi;
2247	struct pci_devemu *pe;
2248	int idx, needcfg;
2249	uint64_t addr, bar, mask;
2250
2251	if ((bi = pci_businfo[bus]) != NULL) {
2252		si = &bi->slotinfo[slot];
2253		pi = si->si_funcs[func].fi_devi;
2254	} else
2255		pi = NULL;
2256
2257	/*
2258	 * Just return if there is no device at this slot:func or if the
2259	 * guest is doing an un-aligned access.
2260	 */
2261	if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) ||
2262	    (coff & (bytes - 1)) != 0) {
2263		if (in)
2264			*valp = 0xffffffff;
2265		return;
2266	}
2267
2268	/*
2269	 * Ignore all writes beyond the standard config space and return all
2270	 * ones on reads.
2271	 */
2272	if (coff >= PCI_REGMAX + 1) {
2273		if (in) {
2274			*valp = 0xffffffff;
2275			/*
2276			 * Extended capabilities begin at offset 256 in config
2277			 * space. Absence of extended capabilities is signaled
2278			 * with all 0s in the extended capability header at
2279			 * offset 256.
2280			 */
2281			if (coff <= PCI_REGMAX + 4)
2282				*valp = 0x00000000;
2283		}
2284		return;
2285	}
2286
2287	pe = pi->pi_d;
2288
2289	/*
2290	 * Config read
2291	 */
2292	if (in) {
2293		/* Let the device emulation override the default handler */
2294		if (pe->pe_cfgread != NULL) {
2295			needcfg = pe->pe_cfgread(pi, coff, bytes, valp);
2296		} else {
2297			needcfg = 1;
2298		}
2299
2300		if (needcfg)
2301			*valp = CFGREAD(pi, coff, bytes);
2302
2303		pci_emul_hdrtype_fixup(bus, slot, coff, bytes, valp);
2304	} else {
2305		/* Let the device emulation override the default handler */
2306		if (pe->pe_cfgwrite != NULL &&
2307		    (*pe->pe_cfgwrite)(pi, coff, bytes, *valp) == 0)
2308			return;
2309
2310		/*
2311		 * Special handling for write to BAR and ROM registers
2312		 */
2313		if (is_pcir_bar(coff) || is_pcir_bios(coff)) {
2314			/*
2315			 * Ignore writes to BAR registers that are not
2316			 * 4-byte aligned.
2317			 */
2318			if (bytes != 4 || (coff & 0x3) != 0)
2319				return;
2320
2321			if (is_pcir_bar(coff)) {
2322				idx = (coff - PCIR_BAR(0)) / 4;
2323			} else if (is_pcir_bios(coff)) {
2324				idx = PCI_ROM_IDX;
2325			} else {
2326				errx(4, "%s: invalid BAR offset %d", __func__,
2327				    coff);
2328			}
2329
2330			mask = ~(pi->pi_bar[idx].size - 1);
2331			switch (pi->pi_bar[idx].type) {
2332			case PCIBAR_NONE:
2333				pi->pi_bar[idx].addr = bar = 0;
2334				break;
2335			case PCIBAR_IO:
2336				addr = *valp & mask;
2337#if defined(PCI_EMUL_IOMASK)
2338				addr &= PCI_EMUL_IOMASK;
2339#endif
2340				bar = addr | pi->pi_bar[idx].lobits;
2341				/*
2342				 * Register the new BAR value for interception
2343				 */
2344				if (addr != pi->pi_bar[idx].addr) {
2345					update_bar_address(pi, addr, idx,
2346							   PCIBAR_IO);
2347				}
2348				break;
2349			case PCIBAR_MEM32:
2350				addr = bar = *valp & mask;
2351				bar |= pi->pi_bar[idx].lobits;
2352				if (addr != pi->pi_bar[idx].addr) {
2353					update_bar_address(pi, addr, idx,
2354							   PCIBAR_MEM32);
2355				}
2356				break;
2357			case PCIBAR_MEM64:
2358				addr = bar = *valp & mask;
2359				bar |= pi->pi_bar[idx].lobits;
2360				if (addr != (uint32_t)pi->pi_bar[idx].addr) {
2361					update_bar_address(pi, addr, idx,
2362							   PCIBAR_MEM64);
2363				}
2364				break;
2365			case PCIBAR_MEMHI64:
2366				mask = ~(pi->pi_bar[idx - 1].size - 1);
2367				addr = ((uint64_t)*valp << 32) & mask;
2368				bar = addr >> 32;
2369				if (bar != pi->pi_bar[idx - 1].addr >> 32) {
2370					update_bar_address(pi, addr, idx - 1,
2371							   PCIBAR_MEMHI64);
2372				}
2373				break;
2374			case PCIBAR_ROM:
2375				addr = bar = *valp & mask;
2376				if (memen(pi) && romen(pi)) {
2377					unregister_bar(pi, idx);
2378				}
2379				pi->pi_bar[idx].addr = addr;
2380				pi->pi_bar[idx].lobits = *valp &
2381				    PCIM_BIOS_ENABLE;
2382				/* romen could have changed it value */
2383				if (memen(pi) && romen(pi)) {
2384					register_bar(pi, idx);
2385				}
2386				bar |= pi->pi_bar[idx].lobits;
2387				break;
2388			default:
2389				assert(0);
2390			}
2391			pci_set_cfgdata32(pi, coff, bar);
2392
2393		} else if (pci_emul_iscap(pi, coff)) {
2394			pci_emul_capwrite(pi, coff, bytes, *valp, 0, 0);
2395		} else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
2396			pci_emul_cmdsts_write(pi, coff, *valp, bytes);
2397		} else {
2398			CFGWRITE(pi, coff, *valp, bytes);
2399		}
2400	}
2401}
2402
2403#ifdef __amd64__
2404static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff;
2405
2406static int
2407pci_emul_cfgaddr(struct vmctx *ctx __unused, int in,
2408    int port __unused, int bytes, uint32_t *eax, void *arg __unused)
2409{
2410	uint32_t x;
2411
2412	if (bytes != 4) {
2413		if (in)
2414			*eax = (bytes == 2) ? 0xffff : 0xff;
2415		return (0);
2416	}
2417
2418	if (in) {
2419		x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff;
2420		if (cfgenable)
2421			x |= CONF1_ENABLE;
2422		*eax = x;
2423	} else {
2424		x = *eax;
2425		cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE;
2426		cfgoff = (x & PCI_REGMAX) & ~0x03;
2427		cfgfunc = (x >> 8) & PCI_FUNCMAX;
2428		cfgslot = (x >> 11) & PCI_SLOTMAX;
2429		cfgbus = (x >> 16) & PCI_BUSMAX;
2430	}
2431
2432	return (0);
2433}
2434INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr);
2435
2436static int
2437pci_emul_cfgdata(struct vmctx *ctx __unused, int in, int port,
2438    int bytes, uint32_t *eax, void *arg __unused)
2439{
2440	int coff;
2441
2442	assert(bytes == 1 || bytes == 2 || bytes == 4);
2443
2444	coff = cfgoff + (port - CONF1_DATA_PORT);
2445	if (cfgenable) {
2446		pci_cfgrw(in, cfgbus, cfgslot, cfgfunc, coff, bytes, eax);
2447	} else {
2448		/* Ignore accesses to cfgdata if not enabled by cfgaddr */
2449		if (in)
2450			*eax = 0xffffffff;
2451	}
2452	return (0);
2453}
2454
2455INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
2456INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
2457INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
2458INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
2459#endif
2460
2461#ifdef BHYVE_SNAPSHOT
2462/*
2463 * Saves/restores PCI device emulated state. Returns 0 on success.
2464 */
2465static int
2466pci_snapshot_pci_dev(struct vm_snapshot_meta *meta)
2467{
2468	struct pci_devinst *pi;
2469	int i;
2470	int ret;
2471
2472	pi = meta->dev_data;
2473
2474	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done);
2475	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done);
2476	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done);
2477	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done);
2478
2479	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done);
2480	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done);
2481	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done);
2482	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done);
2483	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done);
2484	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done);
2485	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done);
2486	SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done);
2487
2488	SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata),
2489			      meta, ret, done);
2490
2491	for (i = 0; i < (int)nitems(pi->pi_bar); i++) {
2492		SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done);
2493		SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done);
2494		SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done);
2495	}
2496
2497	/* Restore MSI-X table. */
2498	for (i = 0; i < pi->pi_msix.table_count; i++) {
2499		SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr,
2500				      meta, ret, done);
2501		SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data,
2502				      meta, ret, done);
2503		SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control,
2504				      meta, ret, done);
2505	}
2506
2507done:
2508	return (ret);
2509}
2510
2511int
2512pci_snapshot(struct vm_snapshot_meta *meta)
2513{
2514	struct pci_devemu *pde;
2515	struct pci_devinst *pdi;
2516	int ret;
2517
2518	assert(meta->dev_name != NULL);
2519
2520	pdi = meta->dev_data;
2521	pde = pdi->pi_d;
2522
2523	if (pde->pe_snapshot == NULL)
2524		return (ENOTSUP);
2525
2526	ret = pci_snapshot_pci_dev(meta);
2527	if (ret == 0)
2528		ret = (*pde->pe_snapshot)(meta);
2529
2530	return (ret);
2531}
2532
2533int
2534pci_pause(struct pci_devinst *pdi)
2535{
2536	struct pci_devemu *pde = pdi->pi_d;
2537
2538	if (pde->pe_pause == NULL) {
2539		/* The pause/resume functionality is optional. */
2540		return (0);
2541	}
2542
2543	return (*pde->pe_pause)(pdi);
2544}
2545
2546int
2547pci_resume(struct pci_devinst *pdi)
2548{
2549	struct pci_devemu *pde = pdi->pi_d;
2550
2551	if (pde->pe_resume == NULL) {
2552		/* The pause/resume functionality is optional. */
2553		return (0);
2554	}
2555
2556	return (*pde->pe_resume)(pdi);
2557}
2558#endif
2559
2560#define PCI_EMUL_TEST
2561#ifdef PCI_EMUL_TEST
2562/*
2563 * Define a dummy test device
2564 */
2565#define DIOSZ	8
2566#define DMEMSZ	4096
2567struct pci_emul_dsoftc {
2568	uint8_t   ioregs[DIOSZ];
2569	uint8_t	  memregs[2][DMEMSZ];
2570};
2571
2572#define	PCI_EMUL_MSI_MSGS	 4
2573#define	PCI_EMUL_MSIX_MSGS	16
2574
2575static int
2576pci_emul_dinit(struct pci_devinst *pi, nvlist_t *nvl __unused)
2577{
2578	int error;
2579	struct pci_emul_dsoftc *sc;
2580
2581	sc = calloc(1, sizeof(struct pci_emul_dsoftc));
2582
2583	pi->pi_arg = sc;
2584
2585	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
2586	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
2587	pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
2588
2589	error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS);
2590	assert(error == 0);
2591
2592	error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ);
2593	assert(error == 0);
2594
2595	error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
2596	assert(error == 0);
2597
2598	error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ);
2599	assert(error == 0);
2600
2601	return (0);
2602}
2603
2604static void
2605pci_emul_diow(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
2606    uint64_t value)
2607{
2608	int i;
2609	struct pci_emul_dsoftc *sc = pi->pi_arg;
2610
2611	if (baridx == 0) {
2612		if (offset + size > DIOSZ) {
2613			printf("diow: iow too large, offset %ld size %d\n",
2614			       offset, size);
2615			return;
2616		}
2617
2618		if (size == 1) {
2619			sc->ioregs[offset] = value & 0xff;
2620		} else if (size == 2) {
2621			*(uint16_t *)&sc->ioregs[offset] = value & 0xffff;
2622		} else if (size == 4) {
2623			*(uint32_t *)&sc->ioregs[offset] = value;
2624		} else {
2625			printf("diow: iow unknown size %d\n", size);
2626		}
2627
2628		/*
2629		 * Special magic value to generate an interrupt
2630		 */
2631		if (offset == 4 && size == 4 && pci_msi_enabled(pi))
2632			pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
2633
2634		if (value == 0xabcdef) {
2635			for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
2636				pci_generate_msi(pi, i);
2637		}
2638	}
2639
2640	if (baridx == 1 || baridx == 2) {
2641		if (offset + size > DMEMSZ) {
2642			printf("diow: memw too large, offset %ld size %d\n",
2643			       offset, size);
2644			return;
2645		}
2646
2647		i = baridx - 1;		/* 'memregs' index */
2648
2649		if (size == 1) {
2650			sc->memregs[i][offset] = value;
2651		} else if (size == 2) {
2652			*(uint16_t *)&sc->memregs[i][offset] = value;
2653		} else if (size == 4) {
2654			*(uint32_t *)&sc->memregs[i][offset] = value;
2655		} else if (size == 8) {
2656			*(uint64_t *)&sc->memregs[i][offset] = value;
2657		} else {
2658			printf("diow: memw unknown size %d\n", size);
2659		}
2660
2661		/*
2662		 * magic interrupt ??
2663		 */
2664	}
2665
2666	if (baridx > 2 || baridx < 0) {
2667		printf("diow: unknown bar idx %d\n", baridx);
2668	}
2669}
2670
2671static uint64_t
2672pci_emul_dior(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
2673{
2674	struct pci_emul_dsoftc *sc = pi->pi_arg;
2675	uint32_t value;
2676	int i;
2677
2678	if (baridx == 0) {
2679		if (offset + size > DIOSZ) {
2680			printf("dior: ior too large, offset %ld size %d\n",
2681			       offset, size);
2682			return (0);
2683		}
2684
2685		value = 0;
2686		if (size == 1) {
2687			value = sc->ioregs[offset];
2688		} else if (size == 2) {
2689			value = *(uint16_t *) &sc->ioregs[offset];
2690		} else if (size == 4) {
2691			value = *(uint32_t *) &sc->ioregs[offset];
2692		} else {
2693			printf("dior: ior unknown size %d\n", size);
2694		}
2695	}
2696
2697	if (baridx == 1 || baridx == 2) {
2698		if (offset + size > DMEMSZ) {
2699			printf("dior: memr too large, offset %ld size %d\n",
2700			       offset, size);
2701			return (0);
2702		}
2703
2704		i = baridx - 1;		/* 'memregs' index */
2705
2706		if (size == 1) {
2707			value = sc->memregs[i][offset];
2708		} else if (size == 2) {
2709			value = *(uint16_t *) &sc->memregs[i][offset];
2710		} else if (size == 4) {
2711			value = *(uint32_t *) &sc->memregs[i][offset];
2712		} else if (size == 8) {
2713			value = *(uint64_t *) &sc->memregs[i][offset];
2714		} else {
2715			printf("dior: ior unknown size %d\n", size);
2716		}
2717	}
2718
2719
2720	if (baridx > 2 || baridx < 0) {
2721		printf("dior: unknown bar idx %d\n", baridx);
2722		return (0);
2723	}
2724
2725	return (value);
2726}
2727
2728#ifdef BHYVE_SNAPSHOT
2729struct pci_devinst *
2730pci_next(const struct pci_devinst *cursor)
2731{
2732	unsigned bus = 0, slot = 0, func = 0;
2733	struct businfo *bi;
2734	struct slotinfo *si;
2735	struct funcinfo *fi;
2736
2737	bus = cursor ? cursor->pi_bus : 0;
2738	slot = cursor ? cursor->pi_slot : 0;
2739	func = cursor ? (cursor->pi_func + 1) : 0;
2740
2741	for (; bus < MAXBUSES; bus++) {
2742		if ((bi = pci_businfo[bus]) == NULL)
2743			continue;
2744
2745		if (slot >= MAXSLOTS)
2746			slot = 0;
2747
2748		for (; slot < MAXSLOTS; slot++) {
2749			si = &bi->slotinfo[slot];
2750			if (func >= MAXFUNCS)
2751				func = 0;
2752			for (; func < MAXFUNCS; func++) {
2753				fi = &si->si_funcs[func];
2754				if (fi->fi_devi == NULL)
2755					continue;
2756
2757				return (fi->fi_devi);
2758			}
2759		}
2760	}
2761
2762	return (NULL);
2763}
2764
2765static int
2766pci_emul_snapshot(struct vm_snapshot_meta *meta __unused)
2767{
2768	return (0);
2769}
2770#endif
2771
2772static const struct pci_devemu pci_dummy = {
2773	.pe_emu = "dummy",
2774	.pe_init = pci_emul_dinit,
2775	.pe_barwrite = pci_emul_diow,
2776	.pe_barread = pci_emul_dior,
2777#ifdef BHYVE_SNAPSHOT
2778	.pe_snapshot = pci_emul_snapshot,
2779#endif
2780};
2781PCI_EMUL_SET(pci_dummy);
2782
2783#endif /* PCI_EMUL_TEST */
2784