pci_emul.c revision 259301
1257752Semaste/*-
2257752Semaste * Copyright (c) 2011 NetApp, Inc.
3257752Semaste * All rights reserved.
4257752Semaste *
5257752Semaste * Redistribution and use in source and binary forms, with or without
6257752Semaste * modification, are permitted provided that the following conditions
7257752Semaste * are met:
8257752Semaste * 1. Redistributions of source code must retain the above copyright
9257752Semaste *    notice, this list of conditions and the following disclaimer.
10257752Semaste * 2. Redistributions in binary form must reproduce the above copyright
11257752Semaste *    notice, this list of conditions and the following disclaimer in the
12257752Semaste *    documentation and/or other materials provided with the distribution.
13257752Semaste *
14257752Semaste * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15257752Semaste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16257752Semaste * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17257752Semaste * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18257752Semaste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19257752Semaste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20257752Semaste * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21257752Semaste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22257752Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23257752Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24257752Semaste * SUCH DAMAGE.
25257752Semaste *
26257752Semaste * $FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 259301 2013-12-13 06:59:18Z grehan $
27257752Semaste */
28257752Semaste
29257752Semaste#include <sys/cdefs.h>
30257752Semaste__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyve/pci_emul.c 259301 2013-12-13 06:59:18Z grehan $");
31257752Semaste
32257752Semaste#include <sys/param.h>
33257752Semaste#include <sys/linker_set.h>
34257752Semaste#include <sys/errno.h>
35257752Semaste
36257752Semaste#include <ctype.h>
37257752Semaste#include <stdio.h>
38257752Semaste#include <stdlib.h>
39257752Semaste#include <string.h>
40257752Semaste#include <strings.h>
41257752Semaste#include <assert.h>
42257752Semaste#include <stdbool.h>
43257752Semaste
44257752Semaste#include <machine/vmm.h>
45257752Semaste#include <vmmapi.h>
46257752Semaste
47257752Semaste#include "bhyverun.h"
48257752Semaste#include "inout.h"
49257752Semaste#include "legacy_irq.h"
50257752Semaste#include "mem.h"
51257752Semaste#include "pci_emul.h"
52257752Semaste#include "ioapic.h"
53257752Semaste
54257752Semaste#define CONF1_ADDR_PORT    0x0cf8
55257752Semaste#define CONF1_DATA_PORT    0x0cfc
56257752Semaste
57257752Semaste#define CONF1_ENABLE	   0x80000000ul
58257752Semaste
59257752Semaste#define	CFGWRITE(pi,off,val,b)						\
60257752Semastedo {									\
61257752Semaste	if ((b) == 1) {							\
62257752Semaste		pci_set_cfgdata8((pi),(off),(val));			\
63257752Semaste	} else if ((b) == 2) {						\
64257752Semaste		pci_set_cfgdata16((pi),(off),(val));			\
65257752Semaste	} else {							\
66257752Semaste		pci_set_cfgdata32((pi),(off),(val));			\
67257752Semaste	}								\
68257752Semaste} while (0)
69257752Semaste
70257752Semaste#define MAXSLOTS	(PCI_SLOTMAX + 1)
71257752Semaste#define	MAXFUNCS	(PCI_FUNCMAX + 1)
72257752Semaste
73257752Semastestatic struct slotinfo {
74257752Semaste	char	*si_name;
75257752Semaste	char	*si_param;
76257752Semaste	struct pci_devinst *si_devi;
77257752Semaste	int	si_legacy;
78257752Semaste} pci_slotinfo[MAXSLOTS][MAXFUNCS];
79257752Semaste
80257752SemasteSET_DECLARE(pci_devemu_set, struct pci_devemu);
81257752Semaste
82257752Semastestatic uint64_t pci_emul_iobase;
83257752Semastestatic uint64_t pci_emul_membase32;
84257752Semastestatic uint64_t pci_emul_membase64;
85257752Semaste
86257752Semaste#define	PCI_EMUL_IOBASE		0x2000
87257752Semaste#define	PCI_EMUL_IOLIMIT	0x10000
88257752Semaste
89257752Semaste#define	PCI_EMUL_MEMLIMIT32	0xE0000000		/* 3.5GB */
90257752Semaste
91257752Semaste#define	PCI_EMUL_MEMBASE64	0xD000000000UL
92257752Semaste#define	PCI_EMUL_MEMLIMIT64	0xFD00000000UL
93257752Semaste
94257752Semastestatic struct pci_devemu *pci_emul_finddev(char *name);
95257752Semaste
96257752Semastestatic int pci_emul_devices;
97
98/*
99 * I/O access
100 */
101
102/*
103 * Slot options are in the form:
104 *
105 *  <slot>[:<func>],<emul>[,<config>]
106 *
107 *  slot is 0..31
108 *  func is 0..7
109 *  emul is a string describing the type of PCI device e.g. virtio-net
110 *  config is an optional string, depending on the device, that can be
111 *  used for configuration.
112 *   Examples are:
113 *     1,virtio-net,tap0
114 *     3:0,dummy
115 */
116static void
117pci_parse_slot_usage(char *aopt)
118{
119
120	fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt);
121}
122
123int
124pci_parse_slot(char *opt, int legacy)
125{
126	char *slot, *func, *emul, *config;
127	char *str, *cpy;
128	int error, snum, fnum;
129
130	error = -1;
131	str = cpy = strdup(opt);
132
133        slot = strsep(&str, ",");
134        func = NULL;
135        if (strchr(slot, ':') != NULL) {
136		func = cpy;
137		(void) strsep(&func, ":");
138        }
139
140	emul = strsep(&str, ",");
141	config = str;
142
143	if (emul == NULL) {
144		pci_parse_slot_usage(opt);
145		goto done;
146	}
147
148	snum = atoi(slot);
149	fnum = func ? atoi(func) : 0;
150
151	if (snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) {
152		pci_parse_slot_usage(opt);
153		goto done;
154	}
155
156	if (pci_slotinfo[snum][fnum].si_name != NULL) {
157		fprintf(stderr, "pci slot %d:%d already occupied!\n",
158			snum, fnum);
159		goto done;
160	}
161
162	if (pci_emul_finddev(emul) == NULL) {
163		fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n",
164			snum, fnum, emul);
165		goto done;
166	}
167
168	error = 0;
169	pci_slotinfo[snum][fnum].si_name = emul;
170	pci_slotinfo[snum][fnum].si_param = config;
171	pci_slotinfo[snum][fnum].si_legacy = legacy;
172
173done:
174	if (error)
175		free(cpy);
176
177	return (error);
178}
179
180static int
181pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset)
182{
183
184	if (offset < pi->pi_msix.pba_offset)
185		return (0);
186
187	if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) {
188		return (0);
189	}
190
191	return (1);
192}
193
194int
195pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size,
196		     uint64_t value)
197{
198	int msix_entry_offset;
199	int tab_index;
200	char *dest;
201
202	/* support only 4 or 8 byte writes */
203	if (size != 4 && size != 8)
204		return (-1);
205
206	/*
207	 * Return if table index is beyond what device supports
208	 */
209	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
210	if (tab_index >= pi->pi_msix.table_count)
211		return (-1);
212
213	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
214
215	/* support only aligned writes */
216	if ((msix_entry_offset % size) != 0)
217		return (-1);
218
219	dest = (char *)(pi->pi_msix.table + tab_index);
220	dest += msix_entry_offset;
221
222	if (size == 4)
223		*((uint32_t *)dest) = value;
224	else
225		*((uint64_t *)dest) = value;
226
227	return (0);
228}
229
230uint64_t
231pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size)
232{
233	char *dest;
234	int msix_entry_offset;
235	int tab_index;
236	uint64_t retval = ~0;
237
238	/*
239	 * The PCI standard only allows 4 and 8 byte accesses to the MSI-X
240	 * table but we also allow 1 byte access to accomodate reads from
241	 * ddb.
242	 */
243	if (size != 1 && size != 4 && size != 8)
244		return (retval);
245
246	msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
247
248	/* support only aligned reads */
249	if ((msix_entry_offset % size) != 0) {
250		return (retval);
251	}
252
253	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
254
255	if (tab_index < pi->pi_msix.table_count) {
256		/* valid MSI-X Table access */
257		dest = (char *)(pi->pi_msix.table + tab_index);
258		dest += msix_entry_offset;
259
260		if (size == 1)
261			retval = *((uint8_t *)dest);
262		else if (size == 4)
263			retval = *((uint32_t *)dest);
264		else
265			retval = *((uint64_t *)dest);
266	} else if (pci_valid_pba_offset(pi, offset)) {
267		/* return 0 for PBA access */
268		retval = 0;
269	}
270
271	return (retval);
272}
273
274int
275pci_msix_table_bar(struct pci_devinst *pi)
276{
277
278	if (pi->pi_msix.table != NULL)
279		return (pi->pi_msix.table_bar);
280	else
281		return (-1);
282}
283
284int
285pci_msix_pba_bar(struct pci_devinst *pi)
286{
287
288	if (pi->pi_msix.table != NULL)
289		return (pi->pi_msix.pba_bar);
290	else
291		return (-1);
292}
293
294static int
295pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
296		    uint32_t *eax, void *arg)
297{
298	struct pci_devinst *pdi = arg;
299	struct pci_devemu *pe = pdi->pi_d;
300	uint64_t offset;
301	int i;
302
303	for (i = 0; i <= PCI_BARMAX; i++) {
304		if (pdi->pi_bar[i].type == PCIBAR_IO &&
305		    port >= pdi->pi_bar[i].addr &&
306		    port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
307			offset = port - pdi->pi_bar[i].addr;
308			if (in)
309				*eax = (*pe->pe_barread)(ctx, vcpu, pdi, i,
310							 offset, bytes);
311			else
312				(*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset,
313						   bytes, *eax);
314			return (0);
315		}
316	}
317	return (-1);
318}
319
320static int
321pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
322		     int size, uint64_t *val, void *arg1, long arg2)
323{
324	struct pci_devinst *pdi = arg1;
325	struct pci_devemu *pe = pdi->pi_d;
326	uint64_t offset;
327	int bidx = (int) arg2;
328
329	assert(bidx <= PCI_BARMAX);
330	assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
331	       pdi->pi_bar[bidx].type == PCIBAR_MEM64);
332	assert(addr >= pdi->pi_bar[bidx].addr &&
333	       addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
334
335	offset = addr - pdi->pi_bar[bidx].addr;
336
337	if (dir == MEM_F_WRITE)
338		(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val);
339	else
340		*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size);
341
342	return (0);
343}
344
345
346static int
347pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
348			uint64_t *addr)
349{
350	uint64_t base;
351
352	assert((size & (size - 1)) == 0);	/* must be a power of 2 */
353
354	base = roundup2(*baseptr, size);
355
356	if (base + size <= limit) {
357		*addr = base;
358		*baseptr = base + size;
359		return (0);
360	} else
361		return (-1);
362}
363
364int
365pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
366		   uint64_t size)
367{
368
369	return (pci_emul_alloc_pbar(pdi, idx, 0, type, size));
370}
371
372/*
373 * Register (or unregister) the MMIO or I/O region associated with the BAR
374 * register 'idx' of an emulated pci device.
375 */
376static void
377modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
378{
379	int error;
380	struct inout_port iop;
381	struct mem_range mr;
382
383	switch (pi->pi_bar[idx].type) {
384	case PCIBAR_IO:
385		bzero(&iop, sizeof(struct inout_port));
386		iop.name = pi->pi_name;
387		iop.port = pi->pi_bar[idx].addr;
388		iop.size = pi->pi_bar[idx].size;
389		if (registration) {
390			iop.flags = IOPORT_F_INOUT;
391			iop.handler = pci_emul_io_handler;
392			iop.arg = pi;
393			error = register_inout(&iop);
394		} else
395			error = unregister_inout(&iop);
396		break;
397	case PCIBAR_MEM32:
398	case PCIBAR_MEM64:
399		bzero(&mr, sizeof(struct mem_range));
400		mr.name = pi->pi_name;
401		mr.base = pi->pi_bar[idx].addr;
402		mr.size = pi->pi_bar[idx].size;
403		if (registration) {
404			mr.flags = MEM_F_RW;
405			mr.handler = pci_emul_mem_handler;
406			mr.arg1 = pi;
407			mr.arg2 = idx;
408			error = register_mem(&mr);
409		} else
410			error = unregister_mem(&mr);
411		break;
412	default:
413		error = EINVAL;
414		break;
415	}
416	assert(error == 0);
417}
418
419static void
420unregister_bar(struct pci_devinst *pi, int idx)
421{
422
423	modify_bar_registration(pi, idx, 0);
424}
425
426static void
427register_bar(struct pci_devinst *pi, int idx)
428{
429
430	modify_bar_registration(pi, idx, 1);
431}
432
433/* Are we decoding i/o port accesses for the emulated pci device? */
434static int
435porten(struct pci_devinst *pi)
436{
437	uint16_t cmd;
438
439	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
440
441	return (cmd & PCIM_CMD_PORTEN);
442}
443
444/* Are we decoding memory accesses for the emulated pci device? */
445static int
446memen(struct pci_devinst *pi)
447{
448	uint16_t cmd;
449
450	cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
451
452	return (cmd & PCIM_CMD_MEMEN);
453}
454
455/*
456 * Update the MMIO or I/O address that is decoded by the BAR register.
457 *
458 * If the pci device has enabled the address space decoding then intercept
459 * the address range decoded by the BAR register.
460 */
461static void
462update_bar_address(struct  pci_devinst *pi, uint64_t addr, int idx, int type)
463{
464	int decode;
465
466	if (pi->pi_bar[idx].type == PCIBAR_IO)
467		decode = porten(pi);
468	else
469		decode = memen(pi);
470
471	if (decode)
472		unregister_bar(pi, idx);
473
474	switch (type) {
475	case PCIBAR_IO:
476	case PCIBAR_MEM32:
477		pi->pi_bar[idx].addr = addr;
478		break;
479	case PCIBAR_MEM64:
480		pi->pi_bar[idx].addr &= ~0xffffffffUL;
481		pi->pi_bar[idx].addr |= addr;
482		break;
483	case PCIBAR_MEMHI64:
484		pi->pi_bar[idx].addr &= 0xffffffff;
485		pi->pi_bar[idx].addr |= addr;
486		break;
487	default:
488		assert(0);
489	}
490
491	if (decode)
492		register_bar(pi, idx);
493}
494
495int
496pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
497		    enum pcibar_type type, uint64_t size)
498{
499	int error;
500	uint64_t *baseptr, limit, addr, mask, lobits, bar;
501
502	assert(idx >= 0 && idx <= PCI_BARMAX);
503
504	if ((size & (size - 1)) != 0)
505		size = 1UL << flsl(size);	/* round up to a power of 2 */
506
507	/* Enforce minimum BAR sizes required by the PCI standard */
508	if (type == PCIBAR_IO) {
509		if (size < 4)
510			size = 4;
511	} else {
512		if (size < 16)
513			size = 16;
514	}
515
516	switch (type) {
517	case PCIBAR_NONE:
518		baseptr = NULL;
519		addr = mask = lobits = 0;
520		break;
521	case PCIBAR_IO:
522		if (hostbase &&
523		    pci_slotinfo[pdi->pi_slot][pdi->pi_func].si_legacy) {
524			assert(hostbase < PCI_EMUL_IOBASE);
525			baseptr = &hostbase;
526		} else {
527			baseptr = &pci_emul_iobase;
528		}
529		limit = PCI_EMUL_IOLIMIT;
530		mask = PCIM_BAR_IO_BASE;
531		lobits = PCIM_BAR_IO_SPACE;
532		break;
533	case PCIBAR_MEM64:
534		/*
535		 * XXX
536		 * Some drivers do not work well if the 64-bit BAR is allocated
537		 * above 4GB. Allow for this by allocating small requests under
538		 * 4GB unless then allocation size is larger than some arbitrary
539		 * number (32MB currently).
540		 */
541		if (size > 32 * 1024 * 1024) {
542			/*
543			 * XXX special case for device requiring peer-peer DMA
544			 */
545			if (size == 0x100000000UL)
546				baseptr = &hostbase;
547			else
548				baseptr = &pci_emul_membase64;
549			limit = PCI_EMUL_MEMLIMIT64;
550			mask = PCIM_BAR_MEM_BASE;
551			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
552				 PCIM_BAR_MEM_PREFETCH;
553			break;
554		} else {
555			baseptr = &pci_emul_membase32;
556			limit = PCI_EMUL_MEMLIMIT32;
557			mask = PCIM_BAR_MEM_BASE;
558			lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
559		}
560		break;
561	case PCIBAR_MEM32:
562		baseptr = &pci_emul_membase32;
563		limit = PCI_EMUL_MEMLIMIT32;
564		mask = PCIM_BAR_MEM_BASE;
565		lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
566		break;
567	default:
568		printf("pci_emul_alloc_base: invalid bar type %d\n", type);
569		assert(0);
570	}
571
572	if (baseptr != NULL) {
573		error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
574		if (error != 0)
575			return (error);
576	}
577
578	pdi->pi_bar[idx].type = type;
579	pdi->pi_bar[idx].addr = addr;
580	pdi->pi_bar[idx].size = size;
581
582	/* Initialize the BAR register in config space */
583	bar = (addr & mask) | lobits;
584	pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
585
586	if (type == PCIBAR_MEM64) {
587		assert(idx + 1 <= PCI_BARMAX);
588		pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
589		pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
590	}
591
592	register_bar(pdi, idx);
593
594	return (0);
595}
596
597#define	CAP_START_OFFSET	0x40
598static int
599pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
600{
601	int i, capoff, capid, reallen;
602	uint16_t sts;
603
604	static u_char endofcap[4] = {
605		PCIY_RESERVED, 0, 0, 0
606	};
607
608	assert(caplen > 0 && capdata[0] != PCIY_RESERVED);
609
610	reallen = roundup2(caplen, 4);		/* dword aligned */
611
612	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
613	if ((sts & PCIM_STATUS_CAPPRESENT) == 0) {
614		capoff = CAP_START_OFFSET;
615		pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff);
616		pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT);
617	} else {
618		capoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
619		while (1) {
620			assert((capoff & 0x3) == 0);
621			capid = pci_get_cfgdata8(pi, capoff);
622			if (capid == PCIY_RESERVED)
623				break;
624			capoff = pci_get_cfgdata8(pi, capoff + 1);
625		}
626	}
627
628	/* Check if we have enough space */
629	if (capoff + reallen + sizeof(endofcap) > PCI_REGMAX + 1)
630		return (-1);
631
632	/* Copy the capability */
633	for (i = 0; i < caplen; i++)
634		pci_set_cfgdata8(pi, capoff + i, capdata[i]);
635
636	/* Set the next capability pointer */
637	pci_set_cfgdata8(pi, capoff + 1, capoff + reallen);
638
639	/* Copy of the reserved capability which serves as the end marker */
640	for (i = 0; i < sizeof(endofcap); i++)
641		pci_set_cfgdata8(pi, capoff + reallen + i, endofcap[i]);
642
643	return (0);
644}
645
646static struct pci_devemu *
647pci_emul_finddev(char *name)
648{
649	struct pci_devemu **pdpp, *pdp;
650
651	SET_FOREACH(pdpp, pci_devemu_set) {
652		pdp = *pdpp;
653		if (!strcmp(pdp->pe_emu, name)) {
654			return (pdp);
655		}
656	}
657
658	return (NULL);
659}
660
661static int
662pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func,
663	      char *params)
664{
665	struct pci_devinst *pdi;
666	int err;
667
668	pdi = malloc(sizeof(struct pci_devinst));
669	bzero(pdi, sizeof(*pdi));
670
671	pdi->pi_vmctx = ctx;
672	pdi->pi_bus = 0;
673	pdi->pi_slot = slot;
674	pdi->pi_func = func;
675	pdi->pi_lintr_pin = -1;
676	pdi->pi_d = pde;
677	snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot);
678
679	/* Disable legacy interrupts */
680	pci_set_cfgdata8(pdi, PCIR_INTLINE, 255);
681	pci_set_cfgdata8(pdi, PCIR_INTPIN, 0);
682
683	pci_set_cfgdata8(pdi, PCIR_COMMAND,
684		    PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
685
686	err = (*pde->pe_init)(ctx, pdi, params);
687	if (err != 0) {
688		free(pdi);
689	} else {
690		pci_emul_devices++;
691		pci_slotinfo[slot][func].si_devi = pdi;
692	}
693
694	return (err);
695}
696
697void
698pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr)
699{
700	int mmc;
701
702	CTASSERT(sizeof(struct msicap) == 14);
703
704	/* Number of msi messages must be a power of 2 between 1 and 32 */
705	assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32);
706	mmc = ffs(msgnum) - 1;
707
708	bzero(msicap, sizeof(struct msicap));
709	msicap->capid = PCIY_MSI;
710	msicap->nextptr = nextptr;
711	msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1);
712}
713
714int
715pci_emul_add_msicap(struct pci_devinst *pi, int msgnum)
716{
717	struct msicap msicap;
718
719	pci_populate_msicap(&msicap, msgnum, 0);
720
721	return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap)));
722}
723
724static void
725pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum,
726		     uint32_t msix_tab_size, int nextptr)
727{
728	CTASSERT(sizeof(struct msixcap) == 12);
729
730	assert(msix_tab_size % 4096 == 0);
731
732	bzero(msixcap, sizeof(struct msixcap));
733	msixcap->capid = PCIY_MSIX;
734	msixcap->nextptr = nextptr;
735
736	/*
737	 * Message Control Register, all fields set to
738	 * zero except for the Table Size.
739	 * Note: Table size N is encoded as N-1
740	 */
741	msixcap->msgctrl = msgnum - 1;
742
743	/*
744	 * MSI-X BAR setup:
745	 * - MSI-X table start at offset 0
746	 * - PBA table starts at a 4K aligned offset after the MSI-X table
747	 */
748	msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK;
749	msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK);
750}
751
752static void
753pci_msix_table_init(struct pci_devinst *pi, int table_entries)
754{
755	int i, table_size;
756
757	assert(table_entries > 0);
758	assert(table_entries <= MAX_MSIX_TABLE_ENTRIES);
759
760	table_size = table_entries * MSIX_TABLE_ENTRY_SIZE;
761	pi->pi_msix.table = malloc(table_size);
762	bzero(pi->pi_msix.table, table_size);
763
764	/* set mask bit of vector control register */
765	for (i = 0; i < table_entries; i++)
766		pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK;
767}
768
769int
770pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum)
771{
772	uint16_t pba_index;
773	uint32_t tab_size;
774	struct msixcap msixcap;
775
776	assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES);
777	assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0);
778
779	tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE;
780
781	/* Align table size to nearest 4K */
782	tab_size = roundup2(tab_size, 4096);
783
784	pi->pi_msix.table_bar = barnum;
785	pi->pi_msix.pba_bar   = barnum;
786	pi->pi_msix.table_offset = 0;
787	pi->pi_msix.table_count = msgnum;
788	pi->pi_msix.pba_offset = tab_size;
789
790	/* calculate the MMIO size required for MSI-X PBA */
791	pba_index = (msgnum - 1) / (PBA_TABLE_ENTRY_SIZE * 8);
792	pi->pi_msix.pba_size = (pba_index + 1) * PBA_TABLE_ENTRY_SIZE;
793
794	pci_msix_table_init(pi, msgnum);
795
796	pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size, 0);
797
798	/* allocate memory for MSI-X Table and PBA */
799	pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32,
800				tab_size + pi->pi_msix.pba_size);
801
802	return (pci_emul_add_capability(pi, (u_char *)&msixcap,
803					sizeof(msixcap)));
804}
805
806void
807msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
808		 int bytes, uint32_t val)
809{
810	uint16_t msgctrl, rwmask;
811	int off, table_bar;
812
813	off = offset - capoff;
814	table_bar = pi->pi_msix.table_bar;
815	/* Message Control Register */
816	if (off == 2 && bytes == 2) {
817		rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK;
818		msgctrl = pci_get_cfgdata16(pi, offset);
819		msgctrl &= ~rwmask;
820		msgctrl |= val & rwmask;
821		val = msgctrl;
822
823		pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE;
824		pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK;
825	}
826
827	CFGWRITE(pi, offset, val, bytes);
828}
829
830void
831msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
832		int bytes, uint32_t val)
833{
834	uint16_t msgctrl, rwmask, msgdata, mme;
835	uint32_t addrlo;
836
837	/*
838	 * If guest is writing to the message control register make sure
839	 * we do not overwrite read-only fields.
840	 */
841	if ((offset - capoff) == 2 && bytes == 2) {
842		rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE;
843		msgctrl = pci_get_cfgdata16(pi, offset);
844		msgctrl &= ~rwmask;
845		msgctrl |= val & rwmask;
846		val = msgctrl;
847
848		addrlo = pci_get_cfgdata32(pi, capoff + 4);
849		if (msgctrl & PCIM_MSICTRL_64BIT)
850			msgdata = pci_get_cfgdata16(pi, capoff + 12);
851		else
852			msgdata = pci_get_cfgdata16(pi, capoff + 8);
853
854		/*
855		 * XXX check delivery mode, destination mode etc
856		 */
857		mme = msgctrl & PCIM_MSICTRL_MME_MASK;
858		pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
859		if (pi->pi_msi.enabled) {
860			pi->pi_msi.cpu = (addrlo >> 12) & 0xff;
861			pi->pi_msi.vector = msgdata & 0xff;
862			pi->pi_msi.msgnum = 1 << (mme >> 4);
863		} else {
864			pi->pi_msi.cpu = 0;
865			pi->pi_msi.vector = 0;
866			pi->pi_msi.msgnum = 0;
867		}
868	}
869
870	CFGWRITE(pi, offset, val, bytes);
871}
872
873void
874pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
875		 int bytes, uint32_t val)
876{
877
878	/* XXX don't write to the readonly parts */
879	CFGWRITE(pi, offset, val, bytes);
880}
881
882#define	PCIECAP_VERSION	0x2
883int
884pci_emul_add_pciecap(struct pci_devinst *pi, int type)
885{
886	int err;
887	struct pciecap pciecap;
888
889	CTASSERT(sizeof(struct pciecap) == 60);
890
891	if (type != PCIEM_TYPE_ROOT_PORT)
892		return (-1);
893
894	bzero(&pciecap, sizeof(pciecap));
895
896	pciecap.capid = PCIY_EXPRESS;
897	pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT;
898	pciecap.link_capabilities = 0x411;	/* gen1, x1 */
899	pciecap.link_status = 0x11;		/* gen1, x1 */
900
901	err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap));
902	return (err);
903}
904
905/*
906 * This function assumes that 'coff' is in the capabilities region of the
907 * config space.
908 */
909static void
910pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val)
911{
912	int capid;
913	uint8_t capoff, nextoff;
914
915	/* Do not allow un-aligned writes */
916	if ((offset & (bytes - 1)) != 0)
917		return;
918
919	/* Find the capability that we want to update */
920	capoff = CAP_START_OFFSET;
921	while (1) {
922		capid = pci_get_cfgdata8(pi, capoff);
923		if (capid == PCIY_RESERVED)
924			break;
925
926		nextoff = pci_get_cfgdata8(pi, capoff + 1);
927		if (offset >= capoff && offset < nextoff)
928			break;
929
930		capoff = nextoff;
931	}
932	assert(offset >= capoff);
933
934	/*
935	 * Capability ID and Next Capability Pointer are readonly.
936	 * However, some o/s's do 4-byte writes that include these.
937	 * For this case, trim the write back to 2 bytes and adjust
938	 * the data.
939	 */
940	if (offset == capoff || offset == capoff + 1) {
941		if (offset == capoff && bytes == 4) {
942			bytes = 2;
943			offset += 2;
944			val >>= 16;
945		} else
946			return;
947	}
948
949	switch (capid) {
950	case PCIY_MSI:
951		msicap_cfgwrite(pi, capoff, offset, bytes, val);
952		break;
953	case PCIY_MSIX:
954		msixcap_cfgwrite(pi, capoff, offset, bytes, val);
955		break;
956	case PCIY_EXPRESS:
957		pciecap_cfgwrite(pi, capoff, offset, bytes, val);
958		break;
959	default:
960		break;
961	}
962}
963
964static int
965pci_emul_iscap(struct pci_devinst *pi, int offset)
966{
967	int found;
968	uint16_t sts;
969	uint8_t capid, lastoff;
970
971	found = 0;
972	sts = pci_get_cfgdata16(pi, PCIR_STATUS);
973	if ((sts & PCIM_STATUS_CAPPRESENT) != 0) {
974		lastoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR);
975		while (1) {
976			assert((lastoff & 0x3) == 0);
977			capid = pci_get_cfgdata8(pi, lastoff);
978			if (capid == PCIY_RESERVED)
979				break;
980			lastoff = pci_get_cfgdata8(pi, lastoff + 1);
981		}
982		if (offset >= CAP_START_OFFSET && offset <= lastoff)
983			found = 1;
984	}
985	return (found);
986}
987
988static int
989pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
990			  int size, uint64_t *val, void *arg1, long arg2)
991{
992	/*
993	 * Ignore writes; return 0xff's for reads. The mem read code
994	 * will take care of truncating to the correct size.
995	 */
996	if (dir == MEM_F_READ) {
997		*val = 0xffffffffffffffff;
998	}
999
1000	return (0);
1001}
1002
1003int
1004init_pci(struct vmctx *ctx)
1005{
1006	struct mem_range memp;
1007	struct pci_devemu *pde;
1008	struct slotinfo *si;
1009	size_t lowmem;
1010	int slot, func;
1011	int error;
1012
1013	pci_emul_iobase = PCI_EMUL_IOBASE;
1014	pci_emul_membase32 = vm_get_lowmem_limit(ctx);
1015	pci_emul_membase64 = PCI_EMUL_MEMBASE64;
1016
1017	for (slot = 0; slot < MAXSLOTS; slot++) {
1018		for (func = 0; func < MAXFUNCS; func++) {
1019			si = &pci_slotinfo[slot][func];
1020			if (si->si_name != NULL) {
1021				pde = pci_emul_finddev(si->si_name);
1022				assert(pde != NULL);
1023				error = pci_emul_init(ctx, pde, slot, func,
1024					    si->si_param);
1025				if (error)
1026					return (error);
1027			}
1028		}
1029	}
1030
1031	/*
1032	 * The guest physical memory map looks like the following:
1033	 * [0,		    lowmem)		guest system memory
1034	 * [lowmem,	    lowmem_limit)	memory hole (may be absent)
1035	 * [lowmem_limit,   4GB)		PCI hole (32-bit BAR allocation)
1036	 * [4GB,	    4GB + highmem)
1037	 *
1038	 * Accesses to memory addresses that are not allocated to system
1039	 * memory or PCI devices return 0xff's.
1040	 */
1041	error = vm_get_memory_seg(ctx, 0, &lowmem, NULL);
1042	assert(error == 0);
1043
1044	memset(&memp, 0, sizeof(struct mem_range));
1045	memp.name = "PCI hole";
1046	memp.flags = MEM_F_RW;
1047	memp.base = lowmem;
1048	memp.size = (4ULL * 1024 * 1024 * 1024) - lowmem;
1049	memp.handler = pci_emul_fallback_handler;
1050
1051	error = register_mem_fallback(&memp);
1052	assert(error == 0);
1053
1054	return (0);
1055}
1056
1057int
1058pci_msi_enabled(struct pci_devinst *pi)
1059{
1060	return (pi->pi_msi.enabled);
1061}
1062
1063int
1064pci_msi_msgnum(struct pci_devinst *pi)
1065{
1066	if (pi->pi_msi.enabled)
1067		return (pi->pi_msi.msgnum);
1068	else
1069		return (0);
1070}
1071
1072int
1073pci_msix_enabled(struct pci_devinst *pi)
1074{
1075
1076	return (pi->pi_msix.enabled && !pi->pi_msi.enabled);
1077}
1078
1079void
1080pci_generate_msix(struct pci_devinst *pi, int index)
1081{
1082	struct msix_table_entry *mte;
1083
1084	if (!pci_msix_enabled(pi))
1085		return;
1086
1087	if (pi->pi_msix.function_mask)
1088		return;
1089
1090	if (index >= pi->pi_msix.table_count)
1091		return;
1092
1093	mte = &pi->pi_msix.table[index];
1094	if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
1095		/* XXX Set PBA bit if interrupt is disabled */
1096		vm_lapic_irq(pi->pi_vmctx,
1097			     (mte->addr >> 12) & 0xff, mte->msg_data & 0xff);
1098	}
1099}
1100
1101void
1102pci_generate_msi(struct pci_devinst *pi, int msg)
1103{
1104
1105	if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) {
1106		vm_lapic_irq(pi->pi_vmctx,
1107			     pi->pi_msi.cpu,
1108			     pi->pi_msi.vector + msg);
1109	}
1110}
1111
1112int
1113pci_is_legacy(struct pci_devinst *pi)
1114{
1115
1116	return (pci_slotinfo[pi->pi_slot][pi->pi_func].si_legacy);
1117}
1118
1119int
1120pci_lintr_request(struct pci_devinst *pi, int req)
1121{
1122	int irq;
1123
1124	irq = legacy_irq_alloc(req);
1125	if (irq < 0)
1126		return (-1);
1127
1128	pi->pi_lintr_pin = irq;
1129	pci_set_cfgdata8(pi, PCIR_INTLINE, irq);
1130	pci_set_cfgdata8(pi, PCIR_INTPIN, 1);
1131	return (0);
1132}
1133
1134void
1135pci_lintr_assert(struct pci_devinst *pi)
1136{
1137
1138	assert(pi->pi_lintr_pin >= 0);
1139	ioapic_assert_pin(pi->pi_vmctx, pi->pi_lintr_pin);
1140}
1141
1142void
1143pci_lintr_deassert(struct pci_devinst *pi)
1144{
1145
1146	assert(pi->pi_lintr_pin >= 0);
1147	ioapic_deassert_pin(pi->pi_vmctx, pi->pi_lintr_pin);
1148}
1149
1150/*
1151 * Return 1 if the emulated device in 'slot' is a multi-function device.
1152 * Return 0 otherwise.
1153 */
1154static int
1155pci_emul_is_mfdev(int slot)
1156{
1157	int f, numfuncs;
1158
1159	numfuncs = 0;
1160	for (f = 0; f < MAXFUNCS; f++) {
1161		if (pci_slotinfo[slot][f].si_devi != NULL) {
1162			numfuncs++;
1163		}
1164	}
1165	return (numfuncs > 1);
1166}
1167
1168/*
1169 * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on
1170 * whether or not is a multi-function being emulated in the pci 'slot'.
1171 */
1172static void
1173pci_emul_hdrtype_fixup(int slot, int off, int bytes, uint32_t *rv)
1174{
1175	int mfdev;
1176
1177	if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) {
1178		mfdev = pci_emul_is_mfdev(slot);
1179		switch (bytes) {
1180		case 1:
1181		case 2:
1182			*rv &= ~PCIM_MFDEV;
1183			if (mfdev) {
1184				*rv |= PCIM_MFDEV;
1185			}
1186			break;
1187		case 4:
1188			*rv &= ~(PCIM_MFDEV << 16);
1189			if (mfdev) {
1190				*rv |= (PCIM_MFDEV << 16);
1191			}
1192			break;
1193		}
1194	}
1195}
1196
1197static int cfgbus, cfgslot, cfgfunc, cfgoff;
1198
1199static int
1200pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
1201		 uint32_t *eax, void *arg)
1202{
1203	uint32_t x;
1204
1205	if (bytes != 4) {
1206		if (in)
1207			*eax = (bytes == 2) ? 0xffff : 0xff;
1208		return (0);
1209	}
1210
1211	if (in) {
1212		x = (cfgbus << 16) |
1213		    (cfgslot << 11) |
1214		    (cfgfunc << 8) |
1215		    cfgoff;
1216		*eax = x | CONF1_ENABLE;
1217	} else {
1218		x = *eax;
1219		cfgoff = x & PCI_REGMAX;
1220		cfgfunc = (x >> 8) & PCI_FUNCMAX;
1221		cfgslot = (x >> 11) & PCI_SLOTMAX;
1222		cfgbus = (x >> 16) & PCI_BUSMAX;
1223	}
1224
1225	return (0);
1226}
1227INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr);
1228
1229static uint32_t
1230bits_changed(uint32_t old, uint32_t new, uint32_t mask)
1231{
1232
1233	return ((old ^ new) & mask);
1234}
1235
1236static void
1237pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
1238{
1239	int i;
1240	uint16_t old;
1241
1242	/*
1243	 * The command register is at an offset of 4 bytes and thus the
1244	 * guest could write 1, 2 or 4 bytes starting at this offset.
1245	 */
1246
1247	old = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* stash old value */
1248	CFGWRITE(pi, PCIR_COMMAND, new, bytes);		/* update config */
1249	new = pci_get_cfgdata16(pi, PCIR_COMMAND);	/* get updated value */
1250
1251	/*
1252	 * If the MMIO or I/O address space decoding has changed then
1253	 * register/unregister all BARs that decode that address space.
1254	 */
1255	for (i = 0; i <= PCI_BARMAX; i++) {
1256		switch (pi->pi_bar[i].type) {
1257			case PCIBAR_NONE:
1258			case PCIBAR_MEMHI64:
1259				break;
1260			case PCIBAR_IO:
1261				/* I/O address space decoding changed? */
1262				if (bits_changed(old, new, PCIM_CMD_PORTEN)) {
1263					if (porten(pi))
1264						register_bar(pi, i);
1265					else
1266						unregister_bar(pi, i);
1267				}
1268				break;
1269			case PCIBAR_MEM32:
1270			case PCIBAR_MEM64:
1271				/* MMIO address space decoding changed? */
1272				if (bits_changed(old, new, PCIM_CMD_MEMEN)) {
1273					if (memen(pi))
1274						register_bar(pi, i);
1275					else
1276						unregister_bar(pi, i);
1277				}
1278				break;
1279			default:
1280				assert(0);
1281		}
1282	}
1283}
1284
1285static int
1286pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
1287		 uint32_t *eax, void *arg)
1288{
1289	struct pci_devinst *pi;
1290	struct pci_devemu *pe;
1291	int coff, idx, needcfg;
1292	uint64_t addr, bar, mask;
1293
1294	assert(bytes == 1 || bytes == 2 || bytes == 4);
1295
1296	if (cfgbus == 0)
1297		pi = pci_slotinfo[cfgslot][cfgfunc].si_devi;
1298	else
1299		pi = NULL;
1300
1301	coff = cfgoff + (port - CONF1_DATA_PORT);
1302
1303#if 0
1304	printf("pcicfg-%s from 0x%0x of %d bytes (%d/%d/%d)\n\r",
1305		in ? "read" : "write", coff, bytes, cfgbus, cfgslot, cfgfunc);
1306#endif
1307
1308	/*
1309	 * Just return if there is no device at this cfgslot:cfgfunc or
1310	 * if the guest is doing an un-aligned access
1311	 */
1312	if (pi == NULL || (coff & (bytes - 1)) != 0) {
1313		if (in)
1314			*eax = 0xffffffff;
1315		return (0);
1316	}
1317
1318	pe = pi->pi_d;
1319
1320	/*
1321	 * Config read
1322	 */
1323	if (in) {
1324		/* Let the device emulation override the default handler */
1325		if (pe->pe_cfgread != NULL) {
1326			needcfg = pe->pe_cfgread(ctx, vcpu, pi,
1327						    coff, bytes, eax);
1328		} else {
1329			needcfg = 1;
1330		}
1331
1332		if (needcfg) {
1333			if (bytes == 1)
1334				*eax = pci_get_cfgdata8(pi, coff);
1335			else if (bytes == 2)
1336				*eax = pci_get_cfgdata16(pi, coff);
1337			else
1338				*eax = pci_get_cfgdata32(pi, coff);
1339		}
1340
1341		pci_emul_hdrtype_fixup(cfgslot, coff, bytes, eax);
1342	} else {
1343		/* Let the device emulation override the default handler */
1344		if (pe->pe_cfgwrite != NULL &&
1345		    (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0)
1346			return (0);
1347
1348		/*
1349		 * Special handling for write to BAR registers
1350		 */
1351		if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) {
1352			/*
1353			 * Ignore writes to BAR registers that are not
1354			 * 4-byte aligned.
1355			 */
1356			if (bytes != 4 || (coff & 0x3) != 0)
1357				return (0);
1358			idx = (coff - PCIR_BAR(0)) / 4;
1359			mask = ~(pi->pi_bar[idx].size - 1);
1360			switch (pi->pi_bar[idx].type) {
1361			case PCIBAR_NONE:
1362				pi->pi_bar[idx].addr = bar = 0;
1363				break;
1364			case PCIBAR_IO:
1365				addr = *eax & mask;
1366				addr &= 0xffff;
1367				bar = addr | PCIM_BAR_IO_SPACE;
1368				/*
1369				 * Register the new BAR value for interception
1370				 */
1371				if (addr != pi->pi_bar[idx].addr) {
1372					update_bar_address(pi, addr, idx,
1373							   PCIBAR_IO);
1374				}
1375				break;
1376			case PCIBAR_MEM32:
1377				addr = bar = *eax & mask;
1378				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
1379				if (addr != pi->pi_bar[idx].addr) {
1380					update_bar_address(pi, addr, idx,
1381							   PCIBAR_MEM32);
1382				}
1383				break;
1384			case PCIBAR_MEM64:
1385				addr = bar = *eax & mask;
1386				bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
1387				       PCIM_BAR_MEM_PREFETCH;
1388				if (addr != (uint32_t)pi->pi_bar[idx].addr) {
1389					update_bar_address(pi, addr, idx,
1390							   PCIBAR_MEM64);
1391				}
1392				break;
1393			case PCIBAR_MEMHI64:
1394				mask = ~(pi->pi_bar[idx - 1].size - 1);
1395				addr = ((uint64_t)*eax << 32) & mask;
1396				bar = addr >> 32;
1397				if (bar != pi->pi_bar[idx - 1].addr >> 32) {
1398					update_bar_address(pi, addr, idx - 1,
1399							   PCIBAR_MEMHI64);
1400				}
1401				break;
1402			default:
1403				assert(0);
1404			}
1405			pci_set_cfgdata32(pi, coff, bar);
1406
1407		} else if (pci_emul_iscap(pi, coff)) {
1408			pci_emul_capwrite(pi, coff, bytes, *eax);
1409		} else if (coff == PCIR_COMMAND) {
1410			pci_emul_cmdwrite(pi, *eax, bytes);
1411		} else {
1412			CFGWRITE(pi, coff, *eax, bytes);
1413		}
1414	}
1415
1416	return (0);
1417}
1418
1419INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata);
1420INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata);
1421INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
1422INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
1423
1424/*
1425 * I/O ports to configure PCI IRQ routing. We ignore all writes to it.
1426 */
1427static int
1428pci_irq_port_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
1429		     uint32_t *eax, void *arg)
1430{
1431	assert(in == 0);
1432	return (0);
1433}
1434INOUT_PORT(pci_irq, 0xC00, IOPORT_F_OUT, pci_irq_port_handler);
1435INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler);
1436
1437#define PCI_EMUL_TEST
1438#ifdef PCI_EMUL_TEST
1439/*
1440 * Define a dummy test device
1441 */
1442#define DIOSZ	20
1443#define DMEMSZ	4096
1444struct pci_emul_dsoftc {
1445	uint8_t   ioregs[DIOSZ];
1446	uint8_t	  memregs[DMEMSZ];
1447};
1448
1449#define	PCI_EMUL_MSI_MSGS	 4
1450#define	PCI_EMUL_MSIX_MSGS	16
1451
1452static int
1453pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
1454{
1455	int error;
1456	struct pci_emul_dsoftc *sc;
1457
1458	sc = malloc(sizeof(struct pci_emul_dsoftc));
1459	memset(sc, 0, sizeof(struct pci_emul_dsoftc));
1460
1461	pi->pi_arg = sc;
1462
1463	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001);
1464	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
1465	pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
1466
1467	error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS);
1468	assert(error == 0);
1469
1470	error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ);
1471	assert(error == 0);
1472
1473	error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
1474	assert(error == 0);
1475
1476	return (0);
1477}
1478
1479static void
1480pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1481	      uint64_t offset, int size, uint64_t value)
1482{
1483	int i;
1484	struct pci_emul_dsoftc *sc = pi->pi_arg;
1485
1486	if (baridx == 0) {
1487		if (offset + size > DIOSZ) {
1488			printf("diow: iow too large, offset %ld size %d\n",
1489			       offset, size);
1490			return;
1491		}
1492
1493		if (size == 1) {
1494			sc->ioregs[offset] = value & 0xff;
1495		} else if (size == 2) {
1496			*(uint16_t *)&sc->ioregs[offset] = value & 0xffff;
1497		} else if (size == 4) {
1498			*(uint32_t *)&sc->ioregs[offset] = value;
1499		} else {
1500			printf("diow: iow unknown size %d\n", size);
1501		}
1502
1503		/*
1504		 * Special magic value to generate an interrupt
1505		 */
1506		if (offset == 4 && size == 4 && pci_msi_enabled(pi))
1507			pci_generate_msi(pi, value % pci_msi_msgnum(pi));
1508
1509		if (value == 0xabcdef) {
1510			for (i = 0; i < pci_msi_msgnum(pi); i++)
1511				pci_generate_msi(pi, i);
1512		}
1513	}
1514
1515	if (baridx == 1) {
1516		if (offset + size > DMEMSZ) {
1517			printf("diow: memw too large, offset %ld size %d\n",
1518			       offset, size);
1519			return;
1520		}
1521
1522		if (size == 1) {
1523			sc->memregs[offset] = value;
1524		} else if (size == 2) {
1525			*(uint16_t *)&sc->memregs[offset] = value;
1526		} else if (size == 4) {
1527			*(uint32_t *)&sc->memregs[offset] = value;
1528		} else if (size == 8) {
1529			*(uint64_t *)&sc->memregs[offset] = value;
1530		} else {
1531			printf("diow: memw unknown size %d\n", size);
1532		}
1533
1534		/*
1535		 * magic interrupt ??
1536		 */
1537	}
1538
1539	if (baridx > 1) {
1540		printf("diow: unknown bar idx %d\n", baridx);
1541	}
1542}
1543
1544static uint64_t
1545pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
1546	      uint64_t offset, int size)
1547{
1548	struct pci_emul_dsoftc *sc = pi->pi_arg;
1549	uint32_t value;
1550
1551	if (baridx == 0) {
1552		if (offset + size > DIOSZ) {
1553			printf("dior: ior too large, offset %ld size %d\n",
1554			       offset, size);
1555			return (0);
1556		}
1557
1558		if (size == 1) {
1559			value = sc->ioregs[offset];
1560		} else if (size == 2) {
1561			value = *(uint16_t *) &sc->ioregs[offset];
1562		} else if (size == 4) {
1563			value = *(uint32_t *) &sc->ioregs[offset];
1564		} else {
1565			printf("dior: ior unknown size %d\n", size);
1566		}
1567	}
1568
1569	if (baridx == 1) {
1570		if (offset + size > DMEMSZ) {
1571			printf("dior: memr too large, offset %ld size %d\n",
1572			       offset, size);
1573			return (0);
1574		}
1575
1576		if (size == 1) {
1577			value = sc->memregs[offset];
1578		} else if (size == 2) {
1579			value = *(uint16_t *) &sc->memregs[offset];
1580		} else if (size == 4) {
1581			value = *(uint32_t *) &sc->memregs[offset];
1582		} else if (size == 8) {
1583			value = *(uint64_t *) &sc->memregs[offset];
1584		} else {
1585			printf("dior: ior unknown size %d\n", size);
1586		}
1587	}
1588
1589
1590	if (baridx > 1) {
1591		printf("dior: unknown bar idx %d\n", baridx);
1592		return (0);
1593	}
1594
1595	return (value);
1596}
1597
1598struct pci_devemu pci_dummy = {
1599	.pe_emu = "dummy",
1600	.pe_init = pci_emul_dinit,
1601	.pe_barwrite = pci_emul_diow,
1602	.pe_barread = pci_emul_dior
1603};
1604PCI_EMUL_SET(pci_dummy);
1605
1606#endif /* PCI_EMUL_TEST */
1607