pci.c revision 330938
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/pci/pci.c 330938 2018-03-14 19:04:40Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/limits.h>
39#include <sys/linker.h>
40#include <sys/fcntl.h>
41#include <sys/conf.h>
42#include <sys/kernel.h>
43#include <sys/queue.h>
44#include <sys/sysctl.h>
45#include <sys/endian.h>
46
47#include <vm/vm.h>
48#include <vm/pmap.h>
49#include <vm/vm_extern.h>
50
51#include <sys/bus.h>
52#include <machine/bus.h>
53#include <sys/rman.h>
54#include <machine/resource.h>
55#include <machine/stdarg.h>
56
57#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
58#include <machine/intr_machdep.h>
59#endif
60
61#include <sys/pciio.h>
62#include <dev/pci/pcireg.h>
63#include <dev/pci/pcivar.h>
64#include <dev/pci/pci_private.h>
65
66#include <dev/usb/controller/xhcireg.h>
67#include <dev/usb/controller/ehcireg.h>
68#include <dev/usb/controller/ohcireg.h>
69#include <dev/usb/controller/uhcireg.h>
70
71#include "pcib_if.h"
72#include "pci_if.h"
73
74#define	PCIR_IS_BIOS(cfg, reg)						\
75	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
76	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
77
78static int		pci_has_quirk(uint32_t devid, int quirk);
79static pci_addr_t	pci_mapbase(uint64_t mapreg);
80static const char	*pci_maptype(uint64_t mapreg);
81static int		pci_mapsize(uint64_t testval);
82static int		pci_maprange(uint64_t mapreg);
83static pci_addr_t	pci_rombase(uint64_t mapreg);
84static int		pci_romsize(uint64_t testval);
85static void		pci_fixancient(pcicfgregs *cfg);
86static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
87
88static int		pci_porten(device_t dev);
89static int		pci_memen(device_t dev);
90static void		pci_assign_interrupt(device_t bus, device_t dev,
91			    int force_route);
92static int		pci_add_map(device_t bus, device_t dev, int reg,
93			    struct resource_list *rl, int force, int prefetch);
94static int		pci_probe(device_t dev);
95static int		pci_attach(device_t dev);
96#ifdef PCI_RES_BUS
97static int		pci_detach(device_t dev);
98#endif
99static void		pci_load_vendor_data(void);
100static int		pci_describe_parse_line(char **ptr, int *vendor,
101			    int *device, char **desc);
102static char		*pci_describe_device(device_t dev);
103static int		pci_modevent(module_t mod, int what, void *arg);
104static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
105			    pcicfgregs *cfg);
106static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
107static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
108			    int reg, uint32_t *data);
109#if 0
110static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
111			    int reg, uint32_t data);
112#endif
113static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
114static void		pci_mask_msix(device_t dev, u_int index);
115static void		pci_unmask_msix(device_t dev, u_int index);
116static int		pci_msi_blacklisted(void);
117static int		pci_msix_blacklisted(void);
118static void		pci_resume_msi(device_t dev);
119static void		pci_resume_msix(device_t dev);
120static int		pci_remap_intr_method(device_t bus, device_t dev,
121			    u_int irq);
122
123static uint16_t		pci_get_rid_method(device_t dev, device_t child);
124
125static device_method_t pci_methods[] = {
126	/* Device interface */
127	DEVMETHOD(device_probe,		pci_probe),
128	DEVMETHOD(device_attach,	pci_attach),
129#ifdef PCI_RES_BUS
130	DEVMETHOD(device_detach,	pci_detach),
131#else
132	DEVMETHOD(device_detach,	bus_generic_detach),
133#endif
134	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
135	DEVMETHOD(device_suspend,	pci_suspend),
136	DEVMETHOD(device_resume,	pci_resume),
137
138	/* Bus interface */
139	DEVMETHOD(bus_print_child,	pci_print_child),
140	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
141	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
142	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
143	DEVMETHOD(bus_driver_added,	pci_driver_added),
144	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
145	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
146
147	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
148	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
149	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
150	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
151	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
152	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
153	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
154	DEVMETHOD(bus_release_resource,	pci_release_resource),
155	DEVMETHOD(bus_activate_resource, pci_activate_resource),
156	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
157	DEVMETHOD(bus_child_deleted,	pci_child_deleted),
158	DEVMETHOD(bus_child_detached,	pci_child_detached),
159	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
160	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
161	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
162
163	/* PCI interface */
164	DEVMETHOD(pci_read_config,	pci_read_config_method),
165	DEVMETHOD(pci_write_config,	pci_write_config_method),
166	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
167	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
168	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
169	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
170	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
171	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
172	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
173	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
174	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
175	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
176	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
177	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
178	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
179	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
180	DEVMETHOD(pci_enable_msi,	pci_enable_msi_method),
181	DEVMETHOD(pci_enable_msix,	pci_enable_msix_method),
182	DEVMETHOD(pci_disable_msi,	pci_disable_msi_method),
183	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
184	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
185	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187	DEVMETHOD(pci_msix_pba_bar,	pci_msix_pba_bar_method),
188	DEVMETHOD(pci_msix_table_bar,	pci_msix_table_bar_method),
189	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
190	DEVMETHOD(pci_child_added,	pci_child_added_method),
191
192	DEVMETHOD_END
193};
194
195DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
196
197static devclass_t pci_devclass;
198DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
199MODULE_VERSION(pci, 1);
200
201static char	*pci_vendordata;
202static size_t	pci_vendordata_size;
203
204struct pci_quirk {
205	uint32_t devid;	/* Vendor/device of the card */
206	int	type;
207#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
208#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
209#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
210#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
211#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
212#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
213	int	arg1;
214	int	arg2;
215};
216
217static const struct pci_quirk pci_quirks[] = {
218	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
219	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
221	/* As does the Serverworks OSB4 (the SMBus mapping register) */
222	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
223
224	/*
225	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
226	 * or the CMIC-SL (AKA ServerWorks GC_LE).
227	 */
228	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230
231	/*
232	 * MSI doesn't work on earlier Intel chipsets including
233	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
234	 */
235	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242
243	/*
244	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
245	 * bridge.
246	 */
247	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248
249	/*
250	 * MSI-X allocation doesn't work properly for devices passed through
251	 * by VMware up to at least ESXi 5.1.
252	 */
253	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
254	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
255
256	/*
257	 * Some virtualization environments emulate an older chipset
258	 * but support MSI just fine.  QEMU uses the Intel 82440.
259	 */
260	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
261
262	/*
263	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
264	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
265	 * It prevents us from attaching hpet(4) when the bit is unset.
266	 * Note this quirk only affects SB600 revision A13 and earlier.
267	 * For SB600 A21 and later, firmware must set the bit to hide it.
268	 * For SB700 and later, it is unused and hardcoded to zero.
269	 */
270	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
271
272	/*
273	 * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
274	 * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
275	 * of the command register is set.
276	 */
277	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
278	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
279	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
280	{ 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
281	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
282
283	/*
284	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
285	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
286	 */
287	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
288	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
289	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
290	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
291	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
292	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
293
294	{ 0 }
295};
296
297/* map register information */
298#define	PCI_MAPMEM	0x01	/* memory map */
299#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
300#define	PCI_MAPPORT	0x04	/* port map */
301
302struct devlist pci_devq;
303uint32_t pci_generation;
304uint32_t pci_numdevs = 0;
305static int pcie_chipset, pcix_chipset;
306
307/* sysctl vars */
308SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
309
310static int pci_enable_io_modes = 1;
311TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
312SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
313    &pci_enable_io_modes, 1,
314    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
315enable these bits correctly.  We'd like to do this all the time, but there\n\
316are some peripherals that this causes problems with.");
317
318static int pci_do_realloc_bars = 0;
319TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
320SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
321    &pci_do_realloc_bars, 0,
322    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
323
324static int pci_do_power_nodriver = 0;
325TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
326SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
327    &pci_do_power_nodriver, 0,
328  "Place a function into D3 state when no driver attaches to it.  0 means\n\
329disable.  1 means conservatively place devices into D3 state.  2 means\n\
330agressively place devices into D3 state.  3 means put absolutely everything\n\
331in D3 state.");
332
333int pci_do_power_resume = 1;
334TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
335SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
336    &pci_do_power_resume, 1,
337  "Transition from D3 -> D0 on resume.");
338
339int pci_do_power_suspend = 1;
340TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
341SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
342    &pci_do_power_suspend, 1,
343  "Transition from D0 -> D3 on suspend.");
344
345static int pci_do_msi = 1;
346TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
347SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
348    "Enable support for MSI interrupts");
349
350static int pci_do_msix = 1;
351TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
352SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
353    "Enable support for MSI-X interrupts");
354
355static int pci_msix_rewrite_table = 0;
356SYSCTL_INT(_hw_pci, OID_AUTO, msix_rewrite_table, CTLFLAG_RWTUN,
357    &pci_msix_rewrite_table, 0,
358    "Rewrite entire MSI-X table when updating MSI-X entries");
359
360static int pci_honor_msi_blacklist = 1;
361TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
362SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
363    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
364
365#if defined(__i386__) || defined(__amd64__)
366static int pci_usb_takeover = 1;
367#else
368static int pci_usb_takeover = 0;
369#endif
370TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
371SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
372    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
373Disable this if you depend on BIOS emulation of USB devices, that is\n\
374you use USB devices (like keyboard or mouse) but do not load USB drivers");
375
376static int pci_clear_bars;
377TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
378SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
379    "Ignore firmware-assigned resources for BARs.");
380
381#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
382static int pci_clear_buses;
383TUNABLE_INT("hw.pci.clear_buses", &pci_clear_buses);
384SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
385    "Ignore firmware-assigned bus numbers.");
386#endif
387
388static int pci_enable_ari = 1;
389TUNABLE_INT("hw.pci.enable_ari", &pci_enable_ari);
390SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
391    0, "Enable support for PCIe Alternative RID Interpretation");
392
393static int
394pci_has_quirk(uint32_t devid, int quirk)
395{
396	const struct pci_quirk *q;
397
398	for (q = &pci_quirks[0]; q->devid; q++) {
399		if (q->devid == devid && q->type == quirk)
400			return (1);
401	}
402	return (0);
403}
404
405/* Find a device_t by bus/slot/function in domain 0 */
406
407device_t
408pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
409{
410
411	return (pci_find_dbsf(0, bus, slot, func));
412}
413
414/* Find a device_t by domain/bus/slot/function */
415
416device_t
417pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
418{
419	struct pci_devinfo *dinfo;
420
421	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
422		if ((dinfo->cfg.domain == domain) &&
423		    (dinfo->cfg.bus == bus) &&
424		    (dinfo->cfg.slot == slot) &&
425		    (dinfo->cfg.func == func)) {
426			return (dinfo->cfg.dev);
427		}
428	}
429
430	return (NULL);
431}
432
433/* Find a device_t by vendor/device ID */
434
435device_t
436pci_find_device(uint16_t vendor, uint16_t device)
437{
438	struct pci_devinfo *dinfo;
439
440	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
441		if ((dinfo->cfg.vendor == vendor) &&
442		    (dinfo->cfg.device == device)) {
443			return (dinfo->cfg.dev);
444		}
445	}
446
447	return (NULL);
448}
449
450device_t
451pci_find_class(uint8_t class, uint8_t subclass)
452{
453	struct pci_devinfo *dinfo;
454
455	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
456		if (dinfo->cfg.baseclass == class &&
457		    dinfo->cfg.subclass == subclass) {
458			return (dinfo->cfg.dev);
459		}
460	}
461
462	return (NULL);
463}
464
465static int
466pci_printf(pcicfgregs *cfg, const char *fmt, ...)
467{
468	va_list ap;
469	int retval;
470
471	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
472	    cfg->func);
473	va_start(ap, fmt);
474	retval += vprintf(fmt, ap);
475	va_end(ap);
476	return (retval);
477}
478
479/* return base address of memory or port map */
480
481static pci_addr_t
482pci_mapbase(uint64_t mapreg)
483{
484
485	if (PCI_BAR_MEM(mapreg))
486		return (mapreg & PCIM_BAR_MEM_BASE);
487	else
488		return (mapreg & PCIM_BAR_IO_BASE);
489}
490
491/* return map type of memory or port map */
492
493static const char *
494pci_maptype(uint64_t mapreg)
495{
496
497	if (PCI_BAR_IO(mapreg))
498		return ("I/O Port");
499	if (mapreg & PCIM_BAR_MEM_PREFETCH)
500		return ("Prefetchable Memory");
501	return ("Memory");
502}
503
504/* return log2 of map size decoded for memory or port map */
505
506static int
507pci_mapsize(uint64_t testval)
508{
509	int ln2size;
510
511	testval = pci_mapbase(testval);
512	ln2size = 0;
513	if (testval != 0) {
514		while ((testval & 1) == 0)
515		{
516			ln2size++;
517			testval >>= 1;
518		}
519	}
520	return (ln2size);
521}
522
523/* return base address of device ROM */
524
525static pci_addr_t
526pci_rombase(uint64_t mapreg)
527{
528
529	return (mapreg & PCIM_BIOS_ADDR_MASK);
530}
531
532/* return log2 of map size decided for device ROM */
533
534static int
535pci_romsize(uint64_t testval)
536{
537	int ln2size;
538
539	testval = pci_rombase(testval);
540	ln2size = 0;
541	if (testval != 0) {
542		while ((testval & 1) == 0)
543		{
544			ln2size++;
545			testval >>= 1;
546		}
547	}
548	return (ln2size);
549}
550
551/* return log2 of address range supported by map register */
552
553static int
554pci_maprange(uint64_t mapreg)
555{
556	int ln2range = 0;
557
558	if (PCI_BAR_IO(mapreg))
559		ln2range = 32;
560	else
561		switch (mapreg & PCIM_BAR_MEM_TYPE) {
562		case PCIM_BAR_MEM_32:
563			ln2range = 32;
564			break;
565		case PCIM_BAR_MEM_1MB:
566			ln2range = 20;
567			break;
568		case PCIM_BAR_MEM_64:
569			ln2range = 64;
570			break;
571		}
572	return (ln2range);
573}
574
575/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
576
577static void
578pci_fixancient(pcicfgregs *cfg)
579{
580	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
581		return;
582
583	/* PCI to PCI bridges use header type 1 */
584	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
585		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
586}
587
588/* extract header type specific config data */
589
590static void
591pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
592{
593#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
594	switch (cfg->hdrtype & PCIM_HDRTYPE) {
595	case PCIM_HDRTYPE_NORMAL:
596		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
597		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
598		cfg->mingnt         = REG(PCIR_MINGNT, 1);
599		cfg->maxlat         = REG(PCIR_MAXLAT, 1);
600		cfg->nummaps	    = PCI_MAXMAPS_0;
601		break;
602	case PCIM_HDRTYPE_BRIDGE:
603		cfg->nummaps	    = PCI_MAXMAPS_1;
604		break;
605	case PCIM_HDRTYPE_CARDBUS:
606		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
607		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
608		cfg->nummaps	    = PCI_MAXMAPS_2;
609		break;
610	}
611#undef REG
612}
613
614/* read configuration header into pcicfgregs structure */
615struct pci_devinfo *
616pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
617{
618#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
619	pcicfgregs *cfg = NULL;
620	struct pci_devinfo *devlist_entry;
621	struct devlist *devlist_head;
622
623	devlist_head = &pci_devq;
624
625	devlist_entry = NULL;
626
627	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
628		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
629
630		cfg = &devlist_entry->cfg;
631
632		cfg->domain		= d;
633		cfg->bus		= b;
634		cfg->slot		= s;
635		cfg->func		= f;
636		cfg->vendor		= REG(PCIR_VENDOR, 2);
637		cfg->device		= REG(PCIR_DEVICE, 2);
638		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
639		cfg->statreg		= REG(PCIR_STATUS, 2);
640		cfg->baseclass		= REG(PCIR_CLASS, 1);
641		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
642		cfg->progif		= REG(PCIR_PROGIF, 1);
643		cfg->revid		= REG(PCIR_REVID, 1);
644		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
645		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
646		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
647		cfg->intpin		= REG(PCIR_INTPIN, 1);
648		cfg->intline		= REG(PCIR_INTLINE, 1);
649
650		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
651		cfg->hdrtype		&= ~PCIM_MFDEV;
652		STAILQ_INIT(&cfg->maps);
653
654		pci_fixancient(cfg);
655		pci_hdrtypedata(pcib, b, s, f, cfg);
656
657		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
658			pci_read_cap(pcib, cfg);
659
660		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
661
662		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
663		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
664		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
665		devlist_entry->conf.pc_sel.pc_func = cfg->func;
666		devlist_entry->conf.pc_hdr = cfg->hdrtype;
667
668		devlist_entry->conf.pc_subvendor = cfg->subvendor;
669		devlist_entry->conf.pc_subdevice = cfg->subdevice;
670		devlist_entry->conf.pc_vendor = cfg->vendor;
671		devlist_entry->conf.pc_device = cfg->device;
672
673		devlist_entry->conf.pc_class = cfg->baseclass;
674		devlist_entry->conf.pc_subclass = cfg->subclass;
675		devlist_entry->conf.pc_progif = cfg->progif;
676		devlist_entry->conf.pc_revid = cfg->revid;
677
678		pci_numdevs++;
679		pci_generation++;
680	}
681	return (devlist_entry);
682#undef REG
683}
684
685static void
686pci_read_cap(device_t pcib, pcicfgregs *cfg)
687{
688#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
689#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
690#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
691	uint64_t addr;
692#endif
693	uint32_t val;
694	int	ptr, nextptr, ptrptr;
695
696	switch (cfg->hdrtype & PCIM_HDRTYPE) {
697	case PCIM_HDRTYPE_NORMAL:
698	case PCIM_HDRTYPE_BRIDGE:
699		ptrptr = PCIR_CAP_PTR;
700		break;
701	case PCIM_HDRTYPE_CARDBUS:
702		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
703		break;
704	default:
705		return;		/* no extended capabilities support */
706	}
707	nextptr = REG(ptrptr, 1);	/* sanity check? */
708
709	/*
710	 * Read capability entries.
711	 */
712	while (nextptr != 0) {
713		/* Sanity check */
714		if (nextptr > 255) {
715			printf("illegal PCI extended capability offset %d\n",
716			    nextptr);
717			return;
718		}
719		/* Find the next entry */
720		ptr = nextptr;
721		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
722
723		/* Process this entry */
724		switch (REG(ptr + PCICAP_ID, 1)) {
725		case PCIY_PMG:		/* PCI power management */
726			if (cfg->pp.pp_cap == 0) {
727				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
728				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
729				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
730				if ((nextptr - ptr) > PCIR_POWER_DATA)
731					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
732			}
733			break;
734		case PCIY_HT:		/* HyperTransport */
735			/* Determine HT-specific capability type. */
736			val = REG(ptr + PCIR_HT_COMMAND, 2);
737
738			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
739				cfg->ht.ht_slave = ptr;
740
741#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
742			switch (val & PCIM_HTCMD_CAP_MASK) {
743			case PCIM_HTCAP_MSI_MAPPING:
744				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
745					/* Sanity check the mapping window. */
746					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
747					    4);
748					addr <<= 32;
749					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
750					    4);
751					if (addr != MSI_INTEL_ADDR_BASE)
752						device_printf(pcib,
753	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
754						    cfg->domain, cfg->bus,
755						    cfg->slot, cfg->func,
756						    (long long)addr);
757				} else
758					addr = MSI_INTEL_ADDR_BASE;
759
760				cfg->ht.ht_msimap = ptr;
761				cfg->ht.ht_msictrl = val;
762				cfg->ht.ht_msiaddr = addr;
763				break;
764			}
765#endif
766			break;
767		case PCIY_MSI:		/* PCI MSI */
768			cfg->msi.msi_location = ptr;
769			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
770			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
771						     PCIM_MSICTRL_MMC_MASK)>>1);
772			break;
773		case PCIY_MSIX:		/* PCI MSI-X */
774			cfg->msix.msix_location = ptr;
775			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
776			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
777			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
778			val = REG(ptr + PCIR_MSIX_TABLE, 4);
779			cfg->msix.msix_table_bar = PCIR_BAR(val &
780			    PCIM_MSIX_BIR_MASK);
781			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
782			val = REG(ptr + PCIR_MSIX_PBA, 4);
783			cfg->msix.msix_pba_bar = PCIR_BAR(val &
784			    PCIM_MSIX_BIR_MASK);
785			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
786			break;
787		case PCIY_VPD:		/* PCI Vital Product Data */
788			cfg->vpd.vpd_reg = ptr;
789			break;
790		case PCIY_SUBVENDOR:
791			/* Should always be true. */
792			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
793			    PCIM_HDRTYPE_BRIDGE) {
794				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
795				cfg->subvendor = val & 0xffff;
796				cfg->subdevice = val >> 16;
797			}
798			break;
799		case PCIY_PCIX:		/* PCI-X */
800			/*
801			 * Assume we have a PCI-X chipset if we have
802			 * at least one PCI-PCI bridge with a PCI-X
803			 * capability.  Note that some systems with
804			 * PCI-express or HT chipsets might match on
805			 * this check as well.
806			 */
807			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
808			    PCIM_HDRTYPE_BRIDGE)
809				pcix_chipset = 1;
810			cfg->pcix.pcix_location = ptr;
811			break;
812		case PCIY_EXPRESS:	/* PCI-express */
813			/*
814			 * Assume we have a PCI-express chipset if we have
815			 * at least one PCI-express device.
816			 */
817			pcie_chipset = 1;
818			cfg->pcie.pcie_location = ptr;
819			val = REG(ptr + PCIER_FLAGS, 2);
820			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
821			break;
822		default:
823			break;
824		}
825	}
826
827#if defined(__powerpc__)
828	/*
829	 * Enable the MSI mapping window for all HyperTransport
830	 * slaves.  PCI-PCI bridges have their windows enabled via
831	 * PCIB_MAP_MSI().
832	 */
833	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
834	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
835		device_printf(pcib,
836	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
837		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
838		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
839		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
840		     2);
841	}
842#endif
843/* REG and WREG use carry through to next functions */
844}
845
846/*
847 * PCI Vital Product Data
848 */
849
850#define	PCI_VPD_TIMEOUT		1000000
851
852static int
853pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
854{
855	int count = PCI_VPD_TIMEOUT;
856
857	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
858
859	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
860
861	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
862		if (--count < 0)
863			return (ENXIO);
864		DELAY(1);	/* limit looping */
865	}
866	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
867
868	return (0);
869}
870
871#if 0
872static int
873pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
874{
875	int count = PCI_VPD_TIMEOUT;
876
877	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
878
879	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
880	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
881	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
882		if (--count < 0)
883			return (ENXIO);
884		DELAY(1);	/* limit looping */
885	}
886
887	return (0);
888}
889#endif
890
891#undef PCI_VPD_TIMEOUT
892
893struct vpd_readstate {
894	device_t	pcib;
895	pcicfgregs	*cfg;
896	uint32_t	val;
897	int		bytesinval;
898	int		off;
899	uint8_t		cksum;
900};
901
902static int
903vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
904{
905	uint32_t reg;
906	uint8_t byte;
907
908	if (vrs->bytesinval == 0) {
909		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
910			return (ENXIO);
911		vrs->val = le32toh(reg);
912		vrs->off += 4;
913		byte = vrs->val & 0xff;
914		vrs->bytesinval = 3;
915	} else {
916		vrs->val = vrs->val >> 8;
917		byte = vrs->val & 0xff;
918		vrs->bytesinval--;
919	}
920
921	vrs->cksum += byte;
922	*data = byte;
923	return (0);
924}
925
926static void
927pci_read_vpd(device_t pcib, pcicfgregs *cfg)
928{
929	struct vpd_readstate vrs;
930	int state;
931	int name;
932	int remain;
933	int i;
934	int alloc, off;		/* alloc/off for RO/W arrays */
935	int cksumvalid;
936	int dflen;
937	uint8_t byte;
938	uint8_t byte2;
939
940	/* init vpd reader */
941	vrs.bytesinval = 0;
942	vrs.off = 0;
943	vrs.pcib = pcib;
944	vrs.cfg = cfg;
945	vrs.cksum = 0;
946
947	state = 0;
948	name = remain = i = 0;	/* shut up stupid gcc */
949	alloc = off = 0;	/* shut up stupid gcc */
950	dflen = 0;		/* shut up stupid gcc */
951	cksumvalid = -1;
952	while (state >= 0) {
953		if (vpd_nextbyte(&vrs, &byte)) {
954			state = -2;
955			break;
956		}
957#if 0
958		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
959		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
960		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
961#endif
962		switch (state) {
963		case 0:		/* item name */
964			if (byte & 0x80) {
965				if (vpd_nextbyte(&vrs, &byte2)) {
966					state = -2;
967					break;
968				}
969				remain = byte2;
970				if (vpd_nextbyte(&vrs, &byte2)) {
971					state = -2;
972					break;
973				}
974				remain |= byte2 << 8;
975				if (remain > (0x7f*4 - vrs.off)) {
976					state = -1;
977					pci_printf(cfg,
978					    "invalid VPD data, remain %#x\n",
979					    remain);
980				}
981				name = byte & 0x7f;
982			} else {
983				remain = byte & 0x7;
984				name = (byte >> 3) & 0xf;
985			}
986			switch (name) {
987			case 0x2:	/* String */
988				cfg->vpd.vpd_ident = malloc(remain + 1,
989				    M_DEVBUF, M_WAITOK);
990				i = 0;
991				state = 1;
992				break;
993			case 0xf:	/* End */
994				state = -1;
995				break;
996			case 0x10:	/* VPD-R */
997				alloc = 8;
998				off = 0;
999				cfg->vpd.vpd_ros = malloc(alloc *
1000				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1001				    M_WAITOK | M_ZERO);
1002				state = 2;
1003				break;
1004			case 0x11:	/* VPD-W */
1005				alloc = 8;
1006				off = 0;
1007				cfg->vpd.vpd_w = malloc(alloc *
1008				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1009				    M_WAITOK | M_ZERO);
1010				state = 5;
1011				break;
1012			default:	/* Invalid data, abort */
1013				state = -1;
1014				break;
1015			}
1016			break;
1017
1018		case 1:	/* Identifier String */
1019			cfg->vpd.vpd_ident[i++] = byte;
1020			remain--;
1021			if (remain == 0)  {
1022				cfg->vpd.vpd_ident[i] = '\0';
1023				state = 0;
1024			}
1025			break;
1026
1027		case 2:	/* VPD-R Keyword Header */
1028			if (off == alloc) {
1029				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1030				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1031				    M_DEVBUF, M_WAITOK | M_ZERO);
1032			}
1033			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1034			if (vpd_nextbyte(&vrs, &byte2)) {
1035				state = -2;
1036				break;
1037			}
1038			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1039			if (vpd_nextbyte(&vrs, &byte2)) {
1040				state = -2;
1041				break;
1042			}
1043			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1044			if (dflen == 0 &&
1045			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1046			    2) == 0) {
1047				/*
1048				 * if this happens, we can't trust the rest
1049				 * of the VPD.
1050				 */
1051				pci_printf(cfg, "bad keyword length: %d\n",
1052				    dflen);
1053				cksumvalid = 0;
1054				state = -1;
1055				break;
1056			} else if (dflen == 0) {
1057				cfg->vpd.vpd_ros[off].value = malloc(1 *
1058				    sizeof(*cfg->vpd.vpd_ros[off].value),
1059				    M_DEVBUF, M_WAITOK);
1060				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1061			} else
1062				cfg->vpd.vpd_ros[off].value = malloc(
1063				    (dflen + 1) *
1064				    sizeof(*cfg->vpd.vpd_ros[off].value),
1065				    M_DEVBUF, M_WAITOK);
1066			remain -= 3;
1067			i = 0;
1068			/* keep in sync w/ state 3's transistions */
1069			if (dflen == 0 && remain == 0)
1070				state = 0;
1071			else if (dflen == 0)
1072				state = 2;
1073			else
1074				state = 3;
1075			break;
1076
1077		case 3:	/* VPD-R Keyword Value */
1078			cfg->vpd.vpd_ros[off].value[i++] = byte;
1079			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1080			    "RV", 2) == 0 && cksumvalid == -1) {
1081				if (vrs.cksum == 0)
1082					cksumvalid = 1;
1083				else {
1084					if (bootverbose)
1085						pci_printf(cfg,
1086					    "bad VPD cksum, remain %hhu\n",
1087						    vrs.cksum);
1088					cksumvalid = 0;
1089					state = -1;
1090					break;
1091				}
1092			}
1093			dflen--;
1094			remain--;
1095			/* keep in sync w/ state 2's transistions */
1096			if (dflen == 0)
1097				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1098			if (dflen == 0 && remain == 0) {
1099				cfg->vpd.vpd_rocnt = off;
1100				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1101				    off * sizeof(*cfg->vpd.vpd_ros),
1102				    M_DEVBUF, M_WAITOK | M_ZERO);
1103				state = 0;
1104			} else if (dflen == 0)
1105				state = 2;
1106			break;
1107
1108		case 4:
1109			remain--;
1110			if (remain == 0)
1111				state = 0;
1112			break;
1113
1114		case 5:	/* VPD-W Keyword Header */
1115			if (off == alloc) {
1116				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1117				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1118				    M_DEVBUF, M_WAITOK | M_ZERO);
1119			}
1120			cfg->vpd.vpd_w[off].keyword[0] = byte;
1121			if (vpd_nextbyte(&vrs, &byte2)) {
1122				state = -2;
1123				break;
1124			}
1125			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1126			if (vpd_nextbyte(&vrs, &byte2)) {
1127				state = -2;
1128				break;
1129			}
1130			cfg->vpd.vpd_w[off].len = dflen = byte2;
1131			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1132			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1133			    sizeof(*cfg->vpd.vpd_w[off].value),
1134			    M_DEVBUF, M_WAITOK);
1135			remain -= 3;
1136			i = 0;
1137			/* keep in sync w/ state 6's transistions */
1138			if (dflen == 0 && remain == 0)
1139				state = 0;
1140			else if (dflen == 0)
1141				state = 5;
1142			else
1143				state = 6;
1144			break;
1145
1146		case 6:	/* VPD-W Keyword Value */
1147			cfg->vpd.vpd_w[off].value[i++] = byte;
1148			dflen--;
1149			remain--;
1150			/* keep in sync w/ state 5's transistions */
1151			if (dflen == 0)
1152				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1153			if (dflen == 0 && remain == 0) {
1154				cfg->vpd.vpd_wcnt = off;
1155				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1156				    off * sizeof(*cfg->vpd.vpd_w),
1157				    M_DEVBUF, M_WAITOK | M_ZERO);
1158				state = 0;
1159			} else if (dflen == 0)
1160				state = 5;
1161			break;
1162
1163		default:
1164			pci_printf(cfg, "invalid state: %d\n", state);
1165			state = -1;
1166			break;
1167		}
1168	}
1169
1170	if (cksumvalid == 0 || state < -1) {
1171		/* read-only data bad, clean up */
1172		if (cfg->vpd.vpd_ros != NULL) {
1173			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1174				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1175			free(cfg->vpd.vpd_ros, M_DEVBUF);
1176			cfg->vpd.vpd_ros = NULL;
1177		}
1178	}
1179	if (state < -1) {
1180		/* I/O error, clean up */
1181		pci_printf(cfg, "failed to read VPD data.\n");
1182		if (cfg->vpd.vpd_ident != NULL) {
1183			free(cfg->vpd.vpd_ident, M_DEVBUF);
1184			cfg->vpd.vpd_ident = NULL;
1185		}
1186		if (cfg->vpd.vpd_w != NULL) {
1187			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1188				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1189			free(cfg->vpd.vpd_w, M_DEVBUF);
1190			cfg->vpd.vpd_w = NULL;
1191		}
1192	}
1193	cfg->vpd.vpd_cached = 1;
1194#undef REG
1195#undef WREG
1196}
1197
1198int
1199pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1200{
1201	struct pci_devinfo *dinfo = device_get_ivars(child);
1202	pcicfgregs *cfg = &dinfo->cfg;
1203
1204	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1205		pci_read_vpd(device_get_parent(dev), cfg);
1206
1207	*identptr = cfg->vpd.vpd_ident;
1208
1209	if (*identptr == NULL)
1210		return (ENXIO);
1211
1212	return (0);
1213}
1214
1215int
1216pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1217	const char **vptr)
1218{
1219	struct pci_devinfo *dinfo = device_get_ivars(child);
1220	pcicfgregs *cfg = &dinfo->cfg;
1221	int i;
1222
1223	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1224		pci_read_vpd(device_get_parent(dev), cfg);
1225
1226	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1227		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1228		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1229			*vptr = cfg->vpd.vpd_ros[i].value;
1230			return (0);
1231		}
1232
1233	*vptr = NULL;
1234	return (ENXIO);
1235}
1236
1237struct pcicfg_vpd *
1238pci_fetch_vpd_list(device_t dev)
1239{
1240	struct pci_devinfo *dinfo = device_get_ivars(dev);
1241	pcicfgregs *cfg = &dinfo->cfg;
1242
1243	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1244		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1245	return (&cfg->vpd);
1246}
1247
1248/*
1249 * Find the requested HyperTransport capability and return the offset
1250 * in configuration space via the pointer provided.  The function
1251 * returns 0 on success and an error code otherwise.
1252 */
1253int
1254pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1255{
1256	int ptr, error;
1257	uint16_t val;
1258
1259	error = pci_find_cap(child, PCIY_HT, &ptr);
1260	if (error)
1261		return (error);
1262
1263	/*
1264	 * Traverse the capabilities list checking each HT capability
1265	 * to see if it matches the requested HT capability.
1266	 */
1267	while (ptr != 0) {
1268		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1269		if (capability == PCIM_HTCAP_SLAVE ||
1270		    capability == PCIM_HTCAP_HOST)
1271			val &= 0xe000;
1272		else
1273			val &= PCIM_HTCMD_CAP_MASK;
1274		if (val == capability) {
1275			if (capreg != NULL)
1276				*capreg = ptr;
1277			return (0);
1278		}
1279
1280		/* Skip to the next HT capability. */
1281		while (ptr != 0) {
1282			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1283			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1284			    PCIY_HT)
1285				break;
1286		}
1287	}
1288	return (ENOENT);
1289}
1290
1291/*
1292 * Find the requested capability and return the offset in
1293 * configuration space via the pointer provided.  The function returns
1294 * 0 on success and an error code otherwise.
1295 */
1296int
1297pci_find_cap_method(device_t dev, device_t child, int capability,
1298    int *capreg)
1299{
1300	struct pci_devinfo *dinfo = device_get_ivars(child);
1301	pcicfgregs *cfg = &dinfo->cfg;
1302	u_int32_t status;
1303	u_int8_t ptr;
1304
1305	/*
1306	 * Check the CAP_LIST bit of the PCI status register first.
1307	 */
1308	status = pci_read_config(child, PCIR_STATUS, 2);
1309	if (!(status & PCIM_STATUS_CAPPRESENT))
1310		return (ENXIO);
1311
1312	/*
1313	 * Determine the start pointer of the capabilities list.
1314	 */
1315	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1316	case PCIM_HDRTYPE_NORMAL:
1317	case PCIM_HDRTYPE_BRIDGE:
1318		ptr = PCIR_CAP_PTR;
1319		break;
1320	case PCIM_HDRTYPE_CARDBUS:
1321		ptr = PCIR_CAP_PTR_2;
1322		break;
1323	default:
1324		/* XXX: panic? */
1325		return (ENXIO);		/* no extended capabilities support */
1326	}
1327	ptr = pci_read_config(child, ptr, 1);
1328
1329	/*
1330	 * Traverse the capabilities list.
1331	 */
1332	while (ptr != 0) {
1333		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1334			if (capreg != NULL)
1335				*capreg = ptr;
1336			return (0);
1337		}
1338		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1339	}
1340
1341	return (ENOENT);
1342}
1343
1344/*
1345 * Find the requested extended capability and return the offset in
1346 * configuration space via the pointer provided.  The function returns
1347 * 0 on success and an error code otherwise.
1348 */
1349int
1350pci_find_extcap_method(device_t dev, device_t child, int capability,
1351    int *capreg)
1352{
1353	struct pci_devinfo *dinfo = device_get_ivars(child);
1354	pcicfgregs *cfg = &dinfo->cfg;
1355	uint32_t ecap;
1356	uint16_t ptr;
1357
1358	/* Only supported for PCI-express devices. */
1359	if (cfg->pcie.pcie_location == 0)
1360		return (ENXIO);
1361
1362	ptr = PCIR_EXTCAP;
1363	ecap = pci_read_config(child, ptr, 4);
1364	if (ecap == 0xffffffff || ecap == 0)
1365		return (ENOENT);
1366	for (;;) {
1367		if (PCI_EXTCAP_ID(ecap) == capability) {
1368			if (capreg != NULL)
1369				*capreg = ptr;
1370			return (0);
1371		}
1372		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1373		if (ptr == 0)
1374			break;
1375		ecap = pci_read_config(child, ptr, 4);
1376	}
1377
1378	return (ENOENT);
1379}
1380
1381/*
1382 * Support for MSI-X message interrupts.
1383 */
1384static void
1385pci_write_msix_entry(device_t dev, u_int index, uint64_t address, uint32_t data)
1386{
1387	struct pci_devinfo *dinfo = device_get_ivars(dev);
1388	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1389	uint32_t offset;
1390
1391	KASSERT(msix->msix_table_len > index, ("bogus index"));
1392	offset = msix->msix_table_offset + index * 16;
1393	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1394	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1395	bus_write_4(msix->msix_table_res, offset + 8, data);
1396}
1397
1398void
1399pci_enable_msix_method(device_t dev, device_t child, u_int index,
1400    uint64_t address, uint32_t data)
1401{
1402
1403	if (pci_msix_rewrite_table) {
1404		struct pci_devinfo *dinfo = device_get_ivars(child);
1405		struct pcicfg_msix *msix = &dinfo->cfg.msix;
1406
1407		/*
1408		 * Some VM hosts require MSIX to be disabled in the
1409		 * control register before updating the MSIX table
1410		 * entries are allowed. It is not enough to only
1411		 * disable MSIX while updating a single entry. MSIX
1412		 * must be disabled while updating all entries in the
1413		 * table.
1414		 */
1415		pci_write_config(child,
1416		    msix->msix_location + PCIR_MSIX_CTRL,
1417		    msix->msix_ctrl & ~PCIM_MSIXCTRL_MSIX_ENABLE, 2);
1418		pci_resume_msix(child);
1419	} else
1420		pci_write_msix_entry(child, index, address, data);
1421
1422	/* Enable MSI -> HT mapping. */
1423	pci_ht_map_msi(child, address);
1424}
1425
1426void
1427pci_mask_msix(device_t dev, u_int index)
1428{
1429	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431	uint32_t offset, val;
1432
1433	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1434	offset = msix->msix_table_offset + index * 16 + 12;
1435	val = bus_read_4(msix->msix_table_res, offset);
1436	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1437		val |= PCIM_MSIX_VCTRL_MASK;
1438		bus_write_4(msix->msix_table_res, offset, val);
1439	}
1440}
1441
1442void
1443pci_unmask_msix(device_t dev, u_int index)
1444{
1445	struct pci_devinfo *dinfo = device_get_ivars(dev);
1446	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1447	uint32_t offset, val;
1448
1449	KASSERT(msix->msix_table_len > index, ("bogus index"));
1450	offset = msix->msix_table_offset + index * 16 + 12;
1451	val = bus_read_4(msix->msix_table_res, offset);
1452	if (val & PCIM_MSIX_VCTRL_MASK) {
1453		val &= ~PCIM_MSIX_VCTRL_MASK;
1454		bus_write_4(msix->msix_table_res, offset, val);
1455	}
1456}
1457
1458int
1459pci_pending_msix(device_t dev, u_int index)
1460{
1461	struct pci_devinfo *dinfo = device_get_ivars(dev);
1462	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1463	uint32_t offset, bit;
1464
1465	KASSERT(msix->msix_table_len > index, ("bogus index"));
1466	offset = msix->msix_pba_offset + (index / 32) * 4;
1467	bit = 1 << index % 32;
1468	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1469}
1470
1471/*
1472 * Restore MSI-X registers and table during resume.  If MSI-X is
1473 * enabled then walk the virtual table to restore the actual MSI-X
1474 * table.
1475 */
1476static void
1477pci_resume_msix(device_t dev)
1478{
1479	struct pci_devinfo *dinfo = device_get_ivars(dev);
1480	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481	struct msix_table_entry *mte;
1482	struct msix_vector *mv;
1483	int i;
1484
1485	if (msix->msix_alloc > 0) {
1486		/* First, mask all vectors. */
1487		for (i = 0; i < msix->msix_msgnum; i++)
1488			pci_mask_msix(dev, i);
1489
1490		/* Second, program any messages with at least one handler. */
1491		for (i = 0; i < msix->msix_table_len; i++) {
1492			mte = &msix->msix_table[i];
1493			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1494				continue;
1495			mv = &msix->msix_vectors[mte->mte_vector - 1];
1496			pci_write_msix_entry(dev, i, mv->mv_address,
1497			    mv->mv_data);
1498			pci_unmask_msix(dev, i);
1499		}
1500	}
1501	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1502	    msix->msix_ctrl, 2);
1503}
1504
1505/*
1506 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1507 * returned in *count.  After this function returns, each message will be
1508 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1509 */
1510int
1511pci_alloc_msix_method(device_t dev, device_t child, int *count)
1512{
1513	struct pci_devinfo *dinfo = device_get_ivars(child);
1514	pcicfgregs *cfg = &dinfo->cfg;
1515	struct resource_list_entry *rle;
1516	int actual, error, i, irq, max;
1517
1518	/* Don't let count == 0 get us into trouble. */
1519	if (*count == 0)
1520		return (EINVAL);
1521
1522	/* If rid 0 is allocated, then fail. */
1523	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1524	if (rle != NULL && rle->res != NULL)
1525		return (ENXIO);
1526
1527	/* Already have allocated messages? */
1528	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1529		return (ENXIO);
1530
1531	/* If MSI-X is blacklisted for this system, fail. */
1532	if (pci_msix_blacklisted())
1533		return (ENXIO);
1534
1535	/* MSI-X capability present? */
1536	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1537		return (ENODEV);
1538
1539	/* Make sure the appropriate BARs are mapped. */
1540	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1541	    cfg->msix.msix_table_bar);
1542	if (rle == NULL || rle->res == NULL ||
1543	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1544		return (ENXIO);
1545	cfg->msix.msix_table_res = rle->res;
1546	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1547		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1548		    cfg->msix.msix_pba_bar);
1549		if (rle == NULL || rle->res == NULL ||
1550		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1551			return (ENXIO);
1552	}
1553	cfg->msix.msix_pba_res = rle->res;
1554
1555	if (bootverbose)
1556		device_printf(child,
1557		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1558		    *count, cfg->msix.msix_msgnum);
1559	max = min(*count, cfg->msix.msix_msgnum);
1560	for (i = 0; i < max; i++) {
1561		/* Allocate a message. */
1562		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1563		if (error) {
1564			if (i == 0)
1565				return (error);
1566			break;
1567		}
1568		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1569		    irq, 1);
1570	}
1571	actual = i;
1572
1573	if (bootverbose) {
1574		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1575		if (actual == 1)
1576			device_printf(child, "using IRQ %lu for MSI-X\n",
1577			    rle->start);
1578		else {
1579			int run;
1580
1581			/*
1582			 * Be fancy and try to print contiguous runs of
1583			 * IRQ values as ranges.  'irq' is the previous IRQ.
1584			 * 'run' is true if we are in a range.
1585			 */
1586			device_printf(child, "using IRQs %lu", rle->start);
1587			irq = rle->start;
1588			run = 0;
1589			for (i = 1; i < actual; i++) {
1590				rle = resource_list_find(&dinfo->resources,
1591				    SYS_RES_IRQ, i + 1);
1592
1593				/* Still in a run? */
1594				if (rle->start == irq + 1) {
1595					run = 1;
1596					irq++;
1597					continue;
1598				}
1599
1600				/* Finish previous range. */
1601				if (run) {
1602					printf("-%d", irq);
1603					run = 0;
1604				}
1605
1606				/* Start new range. */
1607				printf(",%lu", rle->start);
1608				irq = rle->start;
1609			}
1610
1611			/* Unfinished range? */
1612			if (run)
1613				printf("-%d", irq);
1614			printf(" for MSI-X\n");
1615		}
1616	}
1617
1618	/* Mask all vectors. */
1619	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1620		pci_mask_msix(child, i);
1621
1622	/* Allocate and initialize vector data and virtual table. */
1623	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1624	    M_DEVBUF, M_WAITOK | M_ZERO);
1625	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1626	    M_DEVBUF, M_WAITOK | M_ZERO);
1627	for (i = 0; i < actual; i++) {
1628		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1629		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1630		cfg->msix.msix_table[i].mte_vector = i + 1;
1631	}
1632
1633	/* Update control register to enable MSI-X. */
1634	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1635	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1636	    cfg->msix.msix_ctrl, 2);
1637
1638	/* Update counts of alloc'd messages. */
1639	cfg->msix.msix_alloc = actual;
1640	cfg->msix.msix_table_len = actual;
1641	*count = actual;
1642	return (0);
1643}
1644
1645/*
1646 * By default, pci_alloc_msix() will assign the allocated IRQ
1647 * resources consecutively to the first N messages in the MSI-X table.
1648 * However, device drivers may want to use different layouts if they
1649 * either receive fewer messages than they asked for, or they wish to
1650 * populate the MSI-X table sparsely.  This method allows the driver
1651 * to specify what layout it wants.  It must be called after a
1652 * successful pci_alloc_msix() but before any of the associated
1653 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1654 *
1655 * The 'vectors' array contains 'count' message vectors.  The array
1656 * maps directly to the MSI-X table in that index 0 in the array
1657 * specifies the vector for the first message in the MSI-X table, etc.
1658 * The vector value in each array index can either be 0 to indicate
1659 * that no vector should be assigned to a message slot, or it can be a
1660 * number from 1 to N (where N is the count returned from a
1661 * succcessful call to pci_alloc_msix()) to indicate which message
1662 * vector (IRQ) to be used for the corresponding message.
1663 *
1664 * On successful return, each message with a non-zero vector will have
1665 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1666 * 1.  Additionally, if any of the IRQs allocated via the previous
1667 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1668 * will be freed back to the system automatically.
1669 *
1670 * For example, suppose a driver has a MSI-X table with 6 messages and
1671 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1672 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1673 * C.  After the call to pci_alloc_msix(), the device will be setup to
1674 * have an MSI-X table of ABC--- (where - means no vector assigned).
1675 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1676 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1677 * be freed back to the system.  This device will also have valid
1678 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1679 *
1680 * In any case, the SYS_RES_IRQ rid X will always map to the message
1681 * at MSI-X table index X - 1 and will only be valid if a vector is
1682 * assigned to that table entry.
1683 */
1684int
1685pci_remap_msix_method(device_t dev, device_t child, int count,
1686    const u_int *vectors)
1687{
1688	struct pci_devinfo *dinfo = device_get_ivars(child);
1689	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1690	struct resource_list_entry *rle;
1691	int i, irq, j, *used;
1692
1693	/*
1694	 * Have to have at least one message in the table but the
1695	 * table can't be bigger than the actual MSI-X table in the
1696	 * device.
1697	 */
1698	if (count == 0 || count > msix->msix_msgnum)
1699		return (EINVAL);
1700
1701	/* Sanity check the vectors. */
1702	for (i = 0; i < count; i++)
1703		if (vectors[i] > msix->msix_alloc)
1704			return (EINVAL);
1705
1706	/*
1707	 * Make sure there aren't any holes in the vectors to be used.
1708	 * It's a big pain to support it, and it doesn't really make
1709	 * sense anyway.  Also, at least one vector must be used.
1710	 */
1711	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1712	    M_ZERO);
1713	for (i = 0; i < count; i++)
1714		if (vectors[i] != 0)
1715			used[vectors[i] - 1] = 1;
1716	for (i = 0; i < msix->msix_alloc - 1; i++)
1717		if (used[i] == 0 && used[i + 1] == 1) {
1718			free(used, M_DEVBUF);
1719			return (EINVAL);
1720		}
1721	if (used[0] != 1) {
1722		free(used, M_DEVBUF);
1723		return (EINVAL);
1724	}
1725
1726	/* Make sure none of the resources are allocated. */
1727	for (i = 0; i < msix->msix_table_len; i++) {
1728		if (msix->msix_table[i].mte_vector == 0)
1729			continue;
1730		if (msix->msix_table[i].mte_handlers > 0)
1731			return (EBUSY);
1732		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1733		KASSERT(rle != NULL, ("missing resource"));
1734		if (rle->res != NULL)
1735			return (EBUSY);
1736	}
1737
1738	/* Free the existing resource list entries. */
1739	for (i = 0; i < msix->msix_table_len; i++) {
1740		if (msix->msix_table[i].mte_vector == 0)
1741			continue;
1742		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1743	}
1744
1745	/*
1746	 * Build the new virtual table keeping track of which vectors are
1747	 * used.
1748	 */
1749	free(msix->msix_table, M_DEVBUF);
1750	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1751	    M_DEVBUF, M_WAITOK | M_ZERO);
1752	for (i = 0; i < count; i++)
1753		msix->msix_table[i].mte_vector = vectors[i];
1754	msix->msix_table_len = count;
1755
1756	/* Free any unused IRQs and resize the vectors array if necessary. */
1757	j = msix->msix_alloc - 1;
1758	if (used[j] == 0) {
1759		struct msix_vector *vec;
1760
1761		while (used[j] == 0) {
1762			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1763			    msix->msix_vectors[j].mv_irq);
1764			j--;
1765		}
1766		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1767		    M_WAITOK);
1768		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1769		    (j + 1));
1770		free(msix->msix_vectors, M_DEVBUF);
1771		msix->msix_vectors = vec;
1772		msix->msix_alloc = j + 1;
1773	}
1774	free(used, M_DEVBUF);
1775
1776	/* Map the IRQs onto the rids. */
1777	for (i = 0; i < count; i++) {
1778		if (vectors[i] == 0)
1779			continue;
1780		irq = msix->msix_vectors[vectors[i] - 1].mv_irq;
1781		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1782		    irq, 1);
1783	}
1784
1785	if (bootverbose) {
1786		device_printf(child, "Remapped MSI-X IRQs as: ");
1787		for (i = 0; i < count; i++) {
1788			if (i != 0)
1789				printf(", ");
1790			if (vectors[i] == 0)
1791				printf("---");
1792			else
1793				printf("%d",
1794				    msix->msix_vectors[vectors[i] - 1].mv_irq);
1795		}
1796		printf("\n");
1797	}
1798
1799	return (0);
1800}
1801
1802static int
1803pci_release_msix(device_t dev, device_t child)
1804{
1805	struct pci_devinfo *dinfo = device_get_ivars(child);
1806	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1807	struct resource_list_entry *rle;
1808	int i;
1809
1810	/* Do we have any messages to release? */
1811	if (msix->msix_alloc == 0)
1812		return (ENODEV);
1813
1814	/* Make sure none of the resources are allocated. */
1815	for (i = 0; i < msix->msix_table_len; i++) {
1816		if (msix->msix_table[i].mte_vector == 0)
1817			continue;
1818		if (msix->msix_table[i].mte_handlers > 0)
1819			return (EBUSY);
1820		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1821		KASSERT(rle != NULL, ("missing resource"));
1822		if (rle->res != NULL)
1823			return (EBUSY);
1824	}
1825
1826	/* Update control register to disable MSI-X. */
1827	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1828	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1829	    msix->msix_ctrl, 2);
1830
1831	/* Free the resource list entries. */
1832	for (i = 0; i < msix->msix_table_len; i++) {
1833		if (msix->msix_table[i].mte_vector == 0)
1834			continue;
1835		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1836	}
1837	free(msix->msix_table, M_DEVBUF);
1838	msix->msix_table_len = 0;
1839
1840	/* Release the IRQs. */
1841	for (i = 0; i < msix->msix_alloc; i++)
1842		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1843		    msix->msix_vectors[i].mv_irq);
1844	free(msix->msix_vectors, M_DEVBUF);
1845	msix->msix_alloc = 0;
1846	return (0);
1847}
1848
1849/*
1850 * Return the max supported MSI-X messages this device supports.
1851 * Basically, assuming the MD code can alloc messages, this function
1852 * should return the maximum value that pci_alloc_msix() can return.
1853 * Thus, it is subject to the tunables, etc.
1854 */
1855int
1856pci_msix_count_method(device_t dev, device_t child)
1857{
1858	struct pci_devinfo *dinfo = device_get_ivars(child);
1859	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1860
1861	if (pci_do_msix && msix->msix_location != 0)
1862		return (msix->msix_msgnum);
1863	return (0);
1864}
1865
1866int
1867pci_msix_pba_bar_method(device_t dev, device_t child)
1868{
1869	struct pci_devinfo *dinfo = device_get_ivars(child);
1870	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1871
1872	if (pci_do_msix && msix->msix_location != 0)
1873		return (msix->msix_pba_bar);
1874	return (-1);
1875}
1876
1877int
1878pci_msix_table_bar_method(device_t dev, device_t child)
1879{
1880	struct pci_devinfo *dinfo = device_get_ivars(child);
1881	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1882
1883	if (pci_do_msix && msix->msix_location != 0)
1884		return (msix->msix_table_bar);
1885	return (-1);
1886}
1887
1888/*
1889 * HyperTransport MSI mapping control
1890 */
1891void
1892pci_ht_map_msi(device_t dev, uint64_t addr)
1893{
1894	struct pci_devinfo *dinfo = device_get_ivars(dev);
1895	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1896
1897	if (!ht->ht_msimap)
1898		return;
1899
1900	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1901	    ht->ht_msiaddr >> 20 == addr >> 20) {
1902		/* Enable MSI -> HT mapping. */
1903		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1904		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1905		    ht->ht_msictrl, 2);
1906	}
1907
1908	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1909		/* Disable MSI -> HT mapping. */
1910		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1911		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1912		    ht->ht_msictrl, 2);
1913	}
1914}
1915
1916int
1917pci_get_max_payload(device_t dev)
1918{
1919	struct pci_devinfo *dinfo = device_get_ivars(dev);
1920	int cap;
1921	uint16_t val;
1922
1923	cap = dinfo->cfg.pcie.pcie_location;
1924	if (cap == 0)
1925		return (0);
1926	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1927	val &= PCIEM_CTL_MAX_PAYLOAD;
1928	val >>= 5;
1929	return (1 << (val + 7));
1930}
1931
1932int
1933pci_get_max_read_req(device_t dev)
1934{
1935	struct pci_devinfo *dinfo = device_get_ivars(dev);
1936	int cap;
1937	uint16_t val;
1938
1939	cap = dinfo->cfg.pcie.pcie_location;
1940	if (cap == 0)
1941		return (0);
1942	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1943	val &= PCIEM_CTL_MAX_READ_REQUEST;
1944	val >>= 12;
1945	return (1 << (val + 7));
1946}
1947
1948int
1949pci_set_max_read_req(device_t dev, int size)
1950{
1951	struct pci_devinfo *dinfo = device_get_ivars(dev);
1952	int cap;
1953	uint16_t val;
1954
1955	cap = dinfo->cfg.pcie.pcie_location;
1956	if (cap == 0)
1957		return (0);
1958	if (size < 128)
1959		size = 128;
1960	if (size > 4096)
1961		size = 4096;
1962	size = (1 << (fls(size) - 1));
1963	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1964	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1965	val |= (fls(size) - 8) << 12;
1966	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1967	return (size);
1968}
1969
1970uint32_t
1971pcie_read_config(device_t dev, int reg, int width)
1972{
1973	struct pci_devinfo *dinfo = device_get_ivars(dev);
1974	int cap;
1975
1976	cap = dinfo->cfg.pcie.pcie_location;
1977	if (cap == 0) {
1978		if (width == 2)
1979			return (0xffff);
1980		return (0xffffffff);
1981	}
1982
1983	return (pci_read_config(dev, cap + reg, width));
1984}
1985
1986void
1987pcie_write_config(device_t dev, int reg, uint32_t value, int width)
1988{
1989	struct pci_devinfo *dinfo = device_get_ivars(dev);
1990	int cap;
1991
1992	cap = dinfo->cfg.pcie.pcie_location;
1993	if (cap == 0)
1994		return;
1995	pci_write_config(dev, cap + reg, value, width);
1996}
1997
1998/*
1999 * Adjusts a PCI-e capability register by clearing the bits in mask
2000 * and setting the bits in (value & mask).  Bits not set in mask are
2001 * not adjusted.
2002 *
2003 * Returns the old value on success or all ones on failure.
2004 */
2005uint32_t
2006pcie_adjust_config(device_t dev, int reg, uint32_t mask, uint32_t value,
2007    int width)
2008{
2009	struct pci_devinfo *dinfo = device_get_ivars(dev);
2010	uint32_t old, new;
2011	int cap;
2012
2013	cap = dinfo->cfg.pcie.pcie_location;
2014	if (cap == 0) {
2015		if (width == 2)
2016			return (0xffff);
2017		return (0xffffffff);
2018	}
2019
2020	old = pci_read_config(dev, cap + reg, width);
2021	new = old & ~mask;
2022	new |= (value & mask);
2023	pci_write_config(dev, cap + reg, new, width);
2024	return (old);
2025}
2026
2027/*
2028 * Support for MSI message signalled interrupts.
2029 */
2030void
2031pci_enable_msi_method(device_t dev, device_t child, uint64_t address,
2032    uint16_t data)
2033{
2034	struct pci_devinfo *dinfo = device_get_ivars(child);
2035	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2036
2037	/* Write data and address values. */
2038	pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR,
2039	    address & 0xffffffff, 4);
2040	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2041		pci_write_config(child, msi->msi_location + PCIR_MSI_ADDR_HIGH,
2042		    address >> 32, 4);
2043		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA_64BIT,
2044		    data, 2);
2045	} else
2046		pci_write_config(child, msi->msi_location + PCIR_MSI_DATA, data,
2047		    2);
2048
2049	/* Enable MSI in the control register. */
2050	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
2051	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2052	    msi->msi_ctrl, 2);
2053
2054	/* Enable MSI -> HT mapping. */
2055	pci_ht_map_msi(child, address);
2056}
2057
2058void
2059pci_disable_msi_method(device_t dev, device_t child)
2060{
2061	struct pci_devinfo *dinfo = device_get_ivars(child);
2062	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2063
2064	/* Disable MSI -> HT mapping. */
2065	pci_ht_map_msi(child, 0);
2066
2067	/* Disable MSI in the control register. */
2068	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
2069	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2070	    msi->msi_ctrl, 2);
2071}
2072
2073/*
2074 * Restore MSI registers during resume.  If MSI is enabled then
2075 * restore the data and address registers in addition to the control
2076 * register.
2077 */
2078static void
2079pci_resume_msi(device_t dev)
2080{
2081	struct pci_devinfo *dinfo = device_get_ivars(dev);
2082	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2083	uint64_t address;
2084	uint16_t data;
2085
2086	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
2087		address = msi->msi_addr;
2088		data = msi->msi_data;
2089		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
2090		    address & 0xffffffff, 4);
2091		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
2092			pci_write_config(dev, msi->msi_location +
2093			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
2094			pci_write_config(dev, msi->msi_location +
2095			    PCIR_MSI_DATA_64BIT, data, 2);
2096		} else
2097			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
2098			    data, 2);
2099	}
2100	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
2101	    2);
2102}
2103
2104static int
2105pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
2106{
2107	struct pci_devinfo *dinfo = device_get_ivars(dev);
2108	pcicfgregs *cfg = &dinfo->cfg;
2109	struct resource_list_entry *rle;
2110	struct msix_table_entry *mte;
2111	struct msix_vector *mv;
2112	uint64_t addr;
2113	uint32_t data;
2114	int error, i, j;
2115
2116	/*
2117	 * Handle MSI first.  We try to find this IRQ among our list
2118	 * of MSI IRQs.  If we find it, we request updated address and
2119	 * data registers and apply the results.
2120	 */
2121	if (cfg->msi.msi_alloc > 0) {
2122
2123		/* If we don't have any active handlers, nothing to do. */
2124		if (cfg->msi.msi_handlers == 0)
2125			return (0);
2126		for (i = 0; i < cfg->msi.msi_alloc; i++) {
2127			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
2128			    i + 1);
2129			if (rle->start == irq) {
2130				error = PCIB_MAP_MSI(device_get_parent(bus),
2131				    dev, irq, &addr, &data);
2132				if (error)
2133					return (error);
2134				pci_disable_msi(dev);
2135				dinfo->cfg.msi.msi_addr = addr;
2136				dinfo->cfg.msi.msi_data = data;
2137				pci_enable_msi(dev, addr, data);
2138				return (0);
2139			}
2140		}
2141		return (ENOENT);
2142	}
2143
2144	/*
2145	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2146	 * we request the updated mapping info.  If that works, we go
2147	 * through all the slots that use this IRQ and update them.
2148	 */
2149	if (cfg->msix.msix_alloc > 0) {
2150		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2151			mv = &cfg->msix.msix_vectors[i];
2152			if (mv->mv_irq == irq) {
2153				error = PCIB_MAP_MSI(device_get_parent(bus),
2154				    dev, irq, &addr, &data);
2155				if (error)
2156					return (error);
2157				mv->mv_address = addr;
2158				mv->mv_data = data;
2159				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2160					mte = &cfg->msix.msix_table[j];
2161					if (mte->mte_vector != i + 1)
2162						continue;
2163					if (mte->mte_handlers == 0)
2164						continue;
2165					pci_mask_msix(dev, j);
2166					pci_enable_msix(dev, j, addr, data);
2167					pci_unmask_msix(dev, j);
2168				}
2169			}
2170		}
2171		return (ENOENT);
2172	}
2173
2174	return (ENOENT);
2175}
2176
2177/*
2178 * Returns true if the specified device is blacklisted because MSI
2179 * doesn't work.
2180 */
2181int
2182pci_msi_device_blacklisted(device_t dev)
2183{
2184
2185	if (!pci_honor_msi_blacklist)
2186		return (0);
2187
2188	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2189}
2190
2191/*
2192 * Determine if MSI is blacklisted globally on this system.  Currently,
2193 * we just check for blacklisted chipsets as represented by the
2194 * host-PCI bridge at device 0:0:0.  In the future, it may become
2195 * necessary to check other system attributes, such as the kenv values
2196 * that give the motherboard manufacturer and model number.
2197 */
2198static int
2199pci_msi_blacklisted(void)
2200{
2201	device_t dev;
2202
2203	if (!pci_honor_msi_blacklist)
2204		return (0);
2205
2206	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2207	if (!(pcie_chipset || pcix_chipset)) {
2208		if (vm_guest != VM_GUEST_NO) {
2209			/*
2210			 * Whitelist older chipsets in virtual
2211			 * machines known to support MSI.
2212			 */
2213			dev = pci_find_bsf(0, 0, 0);
2214			if (dev != NULL)
2215				return (!pci_has_quirk(pci_get_devid(dev),
2216					PCI_QUIRK_ENABLE_MSI_VM));
2217		}
2218		return (1);
2219	}
2220
2221	dev = pci_find_bsf(0, 0, 0);
2222	if (dev != NULL)
2223		return (pci_msi_device_blacklisted(dev));
2224	return (0);
2225}
2226
2227/*
2228 * Returns true if the specified device is blacklisted because MSI-X
2229 * doesn't work.  Note that this assumes that if MSI doesn't work,
2230 * MSI-X doesn't either.
2231 */
2232int
2233pci_msix_device_blacklisted(device_t dev)
2234{
2235
2236	if (!pci_honor_msi_blacklist)
2237		return (0);
2238
2239	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2240		return (1);
2241
2242	return (pci_msi_device_blacklisted(dev));
2243}
2244
2245/*
2246 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2247 * is blacklisted, assume that MSI-X is as well.  Check for additional
2248 * chipsets where MSI works but MSI-X does not.
2249 */
2250static int
2251pci_msix_blacklisted(void)
2252{
2253	device_t dev;
2254
2255	if (!pci_honor_msi_blacklist)
2256		return (0);
2257
2258	dev = pci_find_bsf(0, 0, 0);
2259	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2260	    PCI_QUIRK_DISABLE_MSIX))
2261		return (1);
2262
2263	return (pci_msi_blacklisted());
2264}
2265
2266/*
2267 * Attempt to allocate *count MSI messages.  The actual number allocated is
2268 * returned in *count.  After this function returns, each message will be
2269 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2270 */
2271int
2272pci_alloc_msi_method(device_t dev, device_t child, int *count)
2273{
2274	struct pci_devinfo *dinfo = device_get_ivars(child);
2275	pcicfgregs *cfg = &dinfo->cfg;
2276	struct resource_list_entry *rle;
2277	int actual, error, i, irqs[32];
2278	uint16_t ctrl;
2279
2280	/* Don't let count == 0 get us into trouble. */
2281	if (*count == 0)
2282		return (EINVAL);
2283
2284	/* If rid 0 is allocated, then fail. */
2285	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2286	if (rle != NULL && rle->res != NULL)
2287		return (ENXIO);
2288
2289	/* Already have allocated messages? */
2290	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2291		return (ENXIO);
2292
2293	/* If MSI is blacklisted for this system, fail. */
2294	if (pci_msi_blacklisted())
2295		return (ENXIO);
2296
2297	/* MSI capability present? */
2298	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2299		return (ENODEV);
2300
2301	if (bootverbose)
2302		device_printf(child,
2303		    "attempting to allocate %d MSI vectors (%d supported)\n",
2304		    *count, cfg->msi.msi_msgnum);
2305
2306	/* Don't ask for more than the device supports. */
2307	actual = min(*count, cfg->msi.msi_msgnum);
2308
2309	/* Don't ask for more than 32 messages. */
2310	actual = min(actual, 32);
2311
2312	/* MSI requires power of 2 number of messages. */
2313	if (!powerof2(actual))
2314		return (EINVAL);
2315
2316	for (;;) {
2317		/* Try to allocate N messages. */
2318		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2319		    actual, irqs);
2320		if (error == 0)
2321			break;
2322		if (actual == 1)
2323			return (error);
2324
2325		/* Try N / 2. */
2326		actual >>= 1;
2327	}
2328
2329	/*
2330	 * We now have N actual messages mapped onto SYS_RES_IRQ
2331	 * resources in the irqs[] array, so add new resources
2332	 * starting at rid 1.
2333	 */
2334	for (i = 0; i < actual; i++)
2335		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2336		    irqs[i], irqs[i], 1);
2337
2338	if (bootverbose) {
2339		if (actual == 1)
2340			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2341		else {
2342			int run;
2343
2344			/*
2345			 * Be fancy and try to print contiguous runs
2346			 * of IRQ values as ranges.  'run' is true if
2347			 * we are in a range.
2348			 */
2349			device_printf(child, "using IRQs %d", irqs[0]);
2350			run = 0;
2351			for (i = 1; i < actual; i++) {
2352
2353				/* Still in a run? */
2354				if (irqs[i] == irqs[i - 1] + 1) {
2355					run = 1;
2356					continue;
2357				}
2358
2359				/* Finish previous range. */
2360				if (run) {
2361					printf("-%d", irqs[i - 1]);
2362					run = 0;
2363				}
2364
2365				/* Start new range. */
2366				printf(",%d", irqs[i]);
2367			}
2368
2369			/* Unfinished range? */
2370			if (run)
2371				printf("-%d", irqs[actual - 1]);
2372			printf(" for MSI\n");
2373		}
2374	}
2375
2376	/* Update control register with actual count. */
2377	ctrl = cfg->msi.msi_ctrl;
2378	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2379	ctrl |= (ffs(actual) - 1) << 4;
2380	cfg->msi.msi_ctrl = ctrl;
2381	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2382
2383	/* Update counts of alloc'd messages. */
2384	cfg->msi.msi_alloc = actual;
2385	cfg->msi.msi_handlers = 0;
2386	*count = actual;
2387	return (0);
2388}
2389
2390/* Release the MSI messages associated with this device. */
2391int
2392pci_release_msi_method(device_t dev, device_t child)
2393{
2394	struct pci_devinfo *dinfo = device_get_ivars(child);
2395	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2396	struct resource_list_entry *rle;
2397	int error, i, irqs[32];
2398
2399	/* Try MSI-X first. */
2400	error = pci_release_msix(dev, child);
2401	if (error != ENODEV)
2402		return (error);
2403
2404	/* Do we have any messages to release? */
2405	if (msi->msi_alloc == 0)
2406		return (ENODEV);
2407	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2408
2409	/* Make sure none of the resources are allocated. */
2410	if (msi->msi_handlers > 0)
2411		return (EBUSY);
2412	for (i = 0; i < msi->msi_alloc; i++) {
2413		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2414		KASSERT(rle != NULL, ("missing MSI resource"));
2415		if (rle->res != NULL)
2416			return (EBUSY);
2417		irqs[i] = rle->start;
2418	}
2419
2420	/* Update control register with 0 count. */
2421	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2422	    ("%s: MSI still enabled", __func__));
2423	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2424	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2425	    msi->msi_ctrl, 2);
2426
2427	/* Release the messages. */
2428	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2429	for (i = 0; i < msi->msi_alloc; i++)
2430		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2431
2432	/* Update alloc count. */
2433	msi->msi_alloc = 0;
2434	msi->msi_addr = 0;
2435	msi->msi_data = 0;
2436	return (0);
2437}
2438
2439/*
2440 * Return the max supported MSI messages this device supports.
2441 * Basically, assuming the MD code can alloc messages, this function
2442 * should return the maximum value that pci_alloc_msi() can return.
2443 * Thus, it is subject to the tunables, etc.
2444 */
2445int
2446pci_msi_count_method(device_t dev, device_t child)
2447{
2448	struct pci_devinfo *dinfo = device_get_ivars(child);
2449	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2450
2451	if (pci_do_msi && msi->msi_location != 0)
2452		return (msi->msi_msgnum);
2453	return (0);
2454}
2455
2456/* free pcicfgregs structure and all depending data structures */
2457
2458int
2459pci_freecfg(struct pci_devinfo *dinfo)
2460{
2461	struct devlist *devlist_head;
2462	struct pci_map *pm, *next;
2463	int i;
2464
2465	devlist_head = &pci_devq;
2466
2467	if (dinfo->cfg.vpd.vpd_reg) {
2468		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2469		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2470			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2471		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2472		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2473			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2474		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2475	}
2476	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2477		free(pm, M_DEVBUF);
2478	}
2479	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2480	free(dinfo, M_DEVBUF);
2481
2482	/* increment the generation count */
2483	pci_generation++;
2484
2485	/* we're losing one device */
2486	pci_numdevs--;
2487	return (0);
2488}
2489
2490/*
2491 * PCI power manangement
2492 */
2493int
2494pci_set_powerstate_method(device_t dev, device_t child, int state)
2495{
2496	struct pci_devinfo *dinfo = device_get_ivars(child);
2497	pcicfgregs *cfg = &dinfo->cfg;
2498	uint16_t status;
2499	int oldstate, highest, delay;
2500
2501	if (cfg->pp.pp_cap == 0)
2502		return (EOPNOTSUPP);
2503
2504	/*
2505	 * Optimize a no state change request away.  While it would be OK to
2506	 * write to the hardware in theory, some devices have shown odd
2507	 * behavior when going from D3 -> D3.
2508	 */
2509	oldstate = pci_get_powerstate(child);
2510	if (oldstate == state)
2511		return (0);
2512
2513	/*
2514	 * The PCI power management specification states that after a state
2515	 * transition between PCI power states, system software must
2516	 * guarantee a minimal delay before the function accesses the device.
2517	 * Compute the worst case delay that we need to guarantee before we
2518	 * access the device.  Many devices will be responsive much more
2519	 * quickly than this delay, but there are some that don't respond
2520	 * instantly to state changes.  Transitions to/from D3 state require
2521	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2522	 * is done below with DELAY rather than a sleeper function because
2523	 * this function can be called from contexts where we cannot sleep.
2524	 */
2525	highest = (oldstate > state) ? oldstate : state;
2526	if (highest == PCI_POWERSTATE_D3)
2527	    delay = 10000;
2528	else if (highest == PCI_POWERSTATE_D2)
2529	    delay = 200;
2530	else
2531	    delay = 0;
2532	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2533	    & ~PCIM_PSTAT_DMASK;
2534	switch (state) {
2535	case PCI_POWERSTATE_D0:
2536		status |= PCIM_PSTAT_D0;
2537		break;
2538	case PCI_POWERSTATE_D1:
2539		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2540			return (EOPNOTSUPP);
2541		status |= PCIM_PSTAT_D1;
2542		break;
2543	case PCI_POWERSTATE_D2:
2544		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2545			return (EOPNOTSUPP);
2546		status |= PCIM_PSTAT_D2;
2547		break;
2548	case PCI_POWERSTATE_D3:
2549		status |= PCIM_PSTAT_D3;
2550		break;
2551	default:
2552		return (EINVAL);
2553	}
2554
2555	if (bootverbose)
2556		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2557		    state);
2558
2559	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2560	if (delay)
2561		DELAY(delay);
2562	return (0);
2563}
2564
2565int
2566pci_get_powerstate_method(device_t dev, device_t child)
2567{
2568	struct pci_devinfo *dinfo = device_get_ivars(child);
2569	pcicfgregs *cfg = &dinfo->cfg;
2570	uint16_t status;
2571	int result;
2572
2573	if (cfg->pp.pp_cap != 0) {
2574		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2575		switch (status & PCIM_PSTAT_DMASK) {
2576		case PCIM_PSTAT_D0:
2577			result = PCI_POWERSTATE_D0;
2578			break;
2579		case PCIM_PSTAT_D1:
2580			result = PCI_POWERSTATE_D1;
2581			break;
2582		case PCIM_PSTAT_D2:
2583			result = PCI_POWERSTATE_D2;
2584			break;
2585		case PCIM_PSTAT_D3:
2586			result = PCI_POWERSTATE_D3;
2587			break;
2588		default:
2589			result = PCI_POWERSTATE_UNKNOWN;
2590			break;
2591		}
2592	} else {
2593		/* No support, device is always at D0 */
2594		result = PCI_POWERSTATE_D0;
2595	}
2596	return (result);
2597}
2598
2599/*
2600 * Some convenience functions for PCI device drivers.
2601 */
2602
2603static __inline void
2604pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2605{
2606	uint16_t	command;
2607
2608	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2609	command |= bit;
2610	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2611}
2612
2613static __inline void
2614pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2615{
2616	uint16_t	command;
2617
2618	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2619	command &= ~bit;
2620	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2621}
2622
2623int
2624pci_enable_busmaster_method(device_t dev, device_t child)
2625{
2626	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2627	return (0);
2628}
2629
2630int
2631pci_disable_busmaster_method(device_t dev, device_t child)
2632{
2633	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2634	return (0);
2635}
2636
2637int
2638pci_enable_io_method(device_t dev, device_t child, int space)
2639{
2640	uint16_t bit;
2641
2642	switch(space) {
2643	case SYS_RES_IOPORT:
2644		bit = PCIM_CMD_PORTEN;
2645		break;
2646	case SYS_RES_MEMORY:
2647		bit = PCIM_CMD_MEMEN;
2648		break;
2649	default:
2650		return (EINVAL);
2651	}
2652	pci_set_command_bit(dev, child, bit);
2653	return (0);
2654}
2655
2656int
2657pci_disable_io_method(device_t dev, device_t child, int space)
2658{
2659	uint16_t bit;
2660
2661	switch(space) {
2662	case SYS_RES_IOPORT:
2663		bit = PCIM_CMD_PORTEN;
2664		break;
2665	case SYS_RES_MEMORY:
2666		bit = PCIM_CMD_MEMEN;
2667		break;
2668	default:
2669		return (EINVAL);
2670	}
2671	pci_clear_command_bit(dev, child, bit);
2672	return (0);
2673}
2674
2675/*
2676 * New style pci driver.  Parent device is either a pci-host-bridge or a
2677 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2678 */
2679
2680void
2681pci_print_verbose(struct pci_devinfo *dinfo)
2682{
2683
2684	if (bootverbose) {
2685		pcicfgregs *cfg = &dinfo->cfg;
2686
2687		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2688		    cfg->vendor, cfg->device, cfg->revid);
2689		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2690		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2691		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2692		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2693		    cfg->mfdev);
2694		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2695		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2696		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2697		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2698		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2699		if (cfg->intpin > 0)
2700			printf("\tintpin=%c, irq=%d\n",
2701			    cfg->intpin +'a' -1, cfg->intline);
2702		if (cfg->pp.pp_cap) {
2703			uint16_t status;
2704
2705			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2706			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2707			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2708			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2709			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2710			    status & PCIM_PSTAT_DMASK);
2711		}
2712		if (cfg->msi.msi_location) {
2713			int ctrl;
2714
2715			ctrl = cfg->msi.msi_ctrl;
2716			printf("\tMSI supports %d message%s%s%s\n",
2717			    cfg->msi.msi_msgnum,
2718			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2719			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2720			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2721		}
2722		if (cfg->msix.msix_location) {
2723			printf("\tMSI-X supports %d message%s ",
2724			    cfg->msix.msix_msgnum,
2725			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2726			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2727				printf("in map 0x%x\n",
2728				    cfg->msix.msix_table_bar);
2729			else
2730				printf("in maps 0x%x and 0x%x\n",
2731				    cfg->msix.msix_table_bar,
2732				    cfg->msix.msix_pba_bar);
2733		}
2734	}
2735}
2736
2737static int
2738pci_porten(device_t dev)
2739{
2740	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2741}
2742
2743static int
2744pci_memen(device_t dev)
2745{
2746	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2747}
2748
2749static void
2750pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2751{
2752	struct pci_devinfo *dinfo;
2753	pci_addr_t map, testval;
2754	int ln2range;
2755	uint16_t cmd;
2756
2757	/*
2758	 * The device ROM BAR is special.  It is always a 32-bit
2759	 * memory BAR.  Bit 0 is special and should not be set when
2760	 * sizing the BAR.
2761	 */
2762	dinfo = device_get_ivars(dev);
2763	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2764		map = pci_read_config(dev, reg, 4);
2765		pci_write_config(dev, reg, 0xfffffffe, 4);
2766		testval = pci_read_config(dev, reg, 4);
2767		pci_write_config(dev, reg, map, 4);
2768		*mapp = map;
2769		*testvalp = testval;
2770		return;
2771	}
2772
2773	map = pci_read_config(dev, reg, 4);
2774	ln2range = pci_maprange(map);
2775	if (ln2range == 64)
2776		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2777
2778	/*
2779	 * Disable decoding via the command register before
2780	 * determining the BAR's length since we will be placing it in
2781	 * a weird state.
2782	 */
2783	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2784	pci_write_config(dev, PCIR_COMMAND,
2785	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2786
2787	/*
2788	 * Determine the BAR's length by writing all 1's.  The bottom
2789	 * log_2(size) bits of the BAR will stick as 0 when we read
2790	 * the value back.
2791	 */
2792	pci_write_config(dev, reg, 0xffffffff, 4);
2793	testval = pci_read_config(dev, reg, 4);
2794	if (ln2range == 64) {
2795		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2796		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2797	}
2798
2799	/*
2800	 * Restore the original value of the BAR.  We may have reprogrammed
2801	 * the BAR of the low-level console device and when booting verbose,
2802	 * we need the console device addressable.
2803	 */
2804	pci_write_config(dev, reg, map, 4);
2805	if (ln2range == 64)
2806		pci_write_config(dev, reg + 4, map >> 32, 4);
2807	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2808
2809	*mapp = map;
2810	*testvalp = testval;
2811}
2812
2813static void
2814pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2815{
2816	struct pci_devinfo *dinfo;
2817	int ln2range;
2818
2819	/* The device ROM BAR is always a 32-bit memory BAR. */
2820	dinfo = device_get_ivars(dev);
2821	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2822		ln2range = 32;
2823	else
2824		ln2range = pci_maprange(pm->pm_value);
2825	pci_write_config(dev, pm->pm_reg, base, 4);
2826	if (ln2range == 64)
2827		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2828	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2829	if (ln2range == 64)
2830		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2831		    pm->pm_reg + 4, 4) << 32;
2832}
2833
2834struct pci_map *
2835pci_find_bar(device_t dev, int reg)
2836{
2837	struct pci_devinfo *dinfo;
2838	struct pci_map *pm;
2839
2840	dinfo = device_get_ivars(dev);
2841	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2842		if (pm->pm_reg == reg)
2843			return (pm);
2844	}
2845	return (NULL);
2846}
2847
2848int
2849pci_bar_enabled(device_t dev, struct pci_map *pm)
2850{
2851	struct pci_devinfo *dinfo;
2852	uint16_t cmd;
2853
2854	dinfo = device_get_ivars(dev);
2855	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2856	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2857		return (0);
2858	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2859	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2860		return ((cmd & PCIM_CMD_MEMEN) != 0);
2861	else
2862		return ((cmd & PCIM_CMD_PORTEN) != 0);
2863}
2864
2865static struct pci_map *
2866pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2867{
2868	struct pci_devinfo *dinfo;
2869	struct pci_map *pm, *prev;
2870
2871	dinfo = device_get_ivars(dev);
2872	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2873	pm->pm_reg = reg;
2874	pm->pm_value = value;
2875	pm->pm_size = size;
2876	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2877		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2878		    reg));
2879		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2880		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2881			break;
2882	}
2883	if (prev != NULL)
2884		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2885	else
2886		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2887	return (pm);
2888}
2889
2890static void
2891pci_restore_bars(device_t dev)
2892{
2893	struct pci_devinfo *dinfo;
2894	struct pci_map *pm;
2895	int ln2range;
2896
2897	dinfo = device_get_ivars(dev);
2898	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2899		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2900			ln2range = 32;
2901		else
2902			ln2range = pci_maprange(pm->pm_value);
2903		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2904		if (ln2range == 64)
2905			pci_write_config(dev, pm->pm_reg + 4,
2906			    pm->pm_value >> 32, 4);
2907	}
2908}
2909
2910/*
2911 * Add a resource based on a pci map register. Return 1 if the map
2912 * register is a 32bit map register or 2 if it is a 64bit register.
2913 */
2914static int
2915pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2916    int force, int prefetch)
2917{
2918	struct pci_map *pm;
2919	pci_addr_t base, map, testval;
2920	pci_addr_t start, end, count;
2921	int barlen, basezero, flags, maprange, mapsize, type;
2922	uint16_t cmd;
2923	struct resource *res;
2924
2925	/*
2926	 * The BAR may already exist if the device is a CardBus card
2927	 * whose CIS is stored in this BAR.
2928	 */
2929	pm = pci_find_bar(dev, reg);
2930	if (pm != NULL) {
2931		maprange = pci_maprange(pm->pm_value);
2932		barlen = maprange == 64 ? 2 : 1;
2933		return (barlen);
2934	}
2935
2936	pci_read_bar(dev, reg, &map, &testval);
2937	if (PCI_BAR_MEM(map)) {
2938		type = SYS_RES_MEMORY;
2939		if (map & PCIM_BAR_MEM_PREFETCH)
2940			prefetch = 1;
2941	} else
2942		type = SYS_RES_IOPORT;
2943	mapsize = pci_mapsize(testval);
2944	base = pci_mapbase(map);
2945#ifdef __PCI_BAR_ZERO_VALID
2946	basezero = 0;
2947#else
2948	basezero = base == 0;
2949#endif
2950	maprange = pci_maprange(map);
2951	barlen = maprange == 64 ? 2 : 1;
2952
2953	/*
2954	 * For I/O registers, if bottom bit is set, and the next bit up
2955	 * isn't clear, we know we have a BAR that doesn't conform to the
2956	 * spec, so ignore it.  Also, sanity check the size of the data
2957	 * areas to the type of memory involved.  Memory must be at least
2958	 * 16 bytes in size, while I/O ranges must be at least 4.
2959	 */
2960	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2961		return (barlen);
2962	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2963	    (type == SYS_RES_IOPORT && mapsize < 2))
2964		return (barlen);
2965
2966	/* Save a record of this BAR. */
2967	pm = pci_add_bar(dev, reg, map, mapsize);
2968	if (bootverbose) {
2969		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2970		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2971		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2972			printf(", port disabled\n");
2973		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2974			printf(", memory disabled\n");
2975		else
2976			printf(", enabled\n");
2977	}
2978
2979	/*
2980	 * If base is 0, then we have problems if this architecture does
2981	 * not allow that.  It is best to ignore such entries for the
2982	 * moment.  These will be allocated later if the driver specifically
2983	 * requests them.  However, some removable busses look better when
2984	 * all resources are allocated, so allow '0' to be overriden.
2985	 *
2986	 * Similarly treat maps whose values is the same as the test value
2987	 * read back.  These maps have had all f's written to them by the
2988	 * BIOS in an attempt to disable the resources.
2989	 */
2990	if (!force && (basezero || map == testval))
2991		return (barlen);
2992	if ((u_long)base != base) {
2993		device_printf(bus,
2994		    "pci%d:%d:%d:%d bar %#x too many address bits",
2995		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2996		    pci_get_function(dev), reg);
2997		return (barlen);
2998	}
2999
3000	/*
3001	 * This code theoretically does the right thing, but has
3002	 * undesirable side effects in some cases where peripherals
3003	 * respond oddly to having these bits enabled.  Let the user
3004	 * be able to turn them off (since pci_enable_io_modes is 1 by
3005	 * default).
3006	 */
3007	if (pci_enable_io_modes) {
3008		/* Turn on resources that have been left off by a lazy BIOS */
3009		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
3010			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3011			cmd |= PCIM_CMD_PORTEN;
3012			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3013		}
3014		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
3015			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3016			cmd |= PCIM_CMD_MEMEN;
3017			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3018		}
3019	} else {
3020		if (type == SYS_RES_IOPORT && !pci_porten(dev))
3021			return (barlen);
3022		if (type == SYS_RES_MEMORY && !pci_memen(dev))
3023			return (barlen);
3024	}
3025
3026	count = (pci_addr_t)1 << mapsize;
3027	flags = RF_ALIGNMENT_LOG2(mapsize);
3028	if (prefetch)
3029		flags |= RF_PREFETCHABLE;
3030	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
3031		start = 0;	/* Let the parent decide. */
3032		end = ~0ul;
3033	} else {
3034		start = base;
3035		end = base + count - 1;
3036	}
3037	resource_list_add(rl, type, reg, start, end, count);
3038
3039	/*
3040	 * Try to allocate the resource for this BAR from our parent
3041	 * so that this resource range is already reserved.  The
3042	 * driver for this device will later inherit this resource in
3043	 * pci_alloc_resource().
3044	 */
3045	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
3046	    flags);
3047	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
3048		/*
3049		 * If the allocation fails, try to allocate a resource for
3050		 * this BAR using any available range.  The firmware felt
3051		 * it was important enough to assign a resource, so don't
3052		 * disable decoding if we can help it.
3053		 */
3054		resource_list_delete(rl, type, reg);
3055		resource_list_add(rl, type, reg, 0, ~0ul, count);
3056		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
3057		    count, flags);
3058	}
3059	if (res == NULL) {
3060		/*
3061		 * If the allocation fails, delete the resource list entry
3062		 * and disable decoding for this device.
3063		 *
3064		 * If the driver requests this resource in the future,
3065		 * pci_reserve_map() will try to allocate a fresh
3066		 * resource range.
3067		 */
3068		resource_list_delete(rl, type, reg);
3069		pci_disable_io(dev, type);
3070		if (bootverbose)
3071			device_printf(bus,
3072			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
3073			    pci_get_domain(dev), pci_get_bus(dev),
3074			    pci_get_slot(dev), pci_get_function(dev), reg);
3075	} else {
3076		start = rman_get_start(res);
3077		pci_write_bar(dev, pm, start);
3078	}
3079	return (barlen);
3080}
3081
3082/*
3083 * For ATA devices we need to decide early what addressing mode to use.
3084 * Legacy demands that the primary and secondary ATA ports sits on the
3085 * same addresses that old ISA hardware did. This dictates that we use
3086 * those addresses and ignore the BAR's if we cannot set PCI native
3087 * addressing mode.
3088 */
3089static void
3090pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
3091    uint32_t prefetchmask)
3092{
3093	int rid, type, progif;
3094#if 0
3095	/* if this device supports PCI native addressing use it */
3096	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3097	if ((progif & 0x8a) == 0x8a) {
3098		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
3099		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
3100			printf("Trying ATA native PCI addressing mode\n");
3101			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
3102		}
3103	}
3104#endif
3105	progif = pci_read_config(dev, PCIR_PROGIF, 1);
3106	type = SYS_RES_IOPORT;
3107	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
3108		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
3109		    prefetchmask & (1 << 0));
3110		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
3111		    prefetchmask & (1 << 1));
3112	} else {
3113		rid = PCIR_BAR(0);
3114		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
3115		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
3116		    0x1f7, 8, 0);
3117		rid = PCIR_BAR(1);
3118		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
3119		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
3120		    0x3f6, 1, 0);
3121	}
3122	if (progif & PCIP_STORAGE_IDE_MODESEC) {
3123		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
3124		    prefetchmask & (1 << 2));
3125		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
3126		    prefetchmask & (1 << 3));
3127	} else {
3128		rid = PCIR_BAR(2);
3129		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3130		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3131		    0x177, 8, 0);
3132		rid = PCIR_BAR(3);
3133		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3134		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3135		    0x376, 1, 0);
3136	}
3137	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3138	    prefetchmask & (1 << 4));
3139	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3140	    prefetchmask & (1 << 5));
3141}
3142
3143static void
3144pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3145{
3146	struct pci_devinfo *dinfo = device_get_ivars(dev);
3147	pcicfgregs *cfg = &dinfo->cfg;
3148	char tunable_name[64];
3149	int irq;
3150
3151	/* Has to have an intpin to have an interrupt. */
3152	if (cfg->intpin == 0)
3153		return;
3154
3155	/* Let the user override the IRQ with a tunable. */
3156	irq = PCI_INVALID_IRQ;
3157	snprintf(tunable_name, sizeof(tunable_name),
3158	    "hw.pci%d.%d.%d.INT%c.irq",
3159	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3160	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3161		irq = PCI_INVALID_IRQ;
3162
3163	/*
3164	 * If we didn't get an IRQ via the tunable, then we either use the
3165	 * IRQ value in the intline register or we ask the bus to route an
3166	 * interrupt for us.  If force_route is true, then we only use the
3167	 * value in the intline register if the bus was unable to assign an
3168	 * IRQ.
3169	 */
3170	if (!PCI_INTERRUPT_VALID(irq)) {
3171		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3172			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3173		if (!PCI_INTERRUPT_VALID(irq))
3174			irq = cfg->intline;
3175	}
3176
3177	/* If after all that we don't have an IRQ, just bail. */
3178	if (!PCI_INTERRUPT_VALID(irq))
3179		return;
3180
3181	/* Update the config register if it changed. */
3182	if (irq != cfg->intline) {
3183		cfg->intline = irq;
3184		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3185	}
3186
3187	/* Add this IRQ as rid 0 interrupt resource. */
3188	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3189}
3190
3191/* Perform early OHCI takeover from SMM. */
3192static void
3193ohci_early_takeover(device_t self)
3194{
3195	struct resource *res;
3196	uint32_t ctl;
3197	int rid;
3198	int i;
3199
3200	rid = PCIR_BAR(0);
3201	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3202	if (res == NULL)
3203		return;
3204
3205	ctl = bus_read_4(res, OHCI_CONTROL);
3206	if (ctl & OHCI_IR) {
3207		if (bootverbose)
3208			printf("ohci early: "
3209			    "SMM active, request owner change\n");
3210		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3211		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3212			DELAY(1000);
3213			ctl = bus_read_4(res, OHCI_CONTROL);
3214		}
3215		if (ctl & OHCI_IR) {
3216			if (bootverbose)
3217				printf("ohci early: "
3218				    "SMM does not respond, resetting\n");
3219			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3220		}
3221		/* Disable interrupts */
3222		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3223	}
3224
3225	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3226}
3227
3228/* Perform early UHCI takeover from SMM. */
3229static void
3230uhci_early_takeover(device_t self)
3231{
3232	struct resource *res;
3233	int rid;
3234
3235	/*
3236	 * Set the PIRQD enable bit and switch off all the others. We don't
3237	 * want legacy support to interfere with us XXX Does this also mean
3238	 * that the BIOS won't touch the keyboard anymore if it is connected
3239	 * to the ports of the root hub?
3240	 */
3241	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3242
3243	/* Disable interrupts */
3244	rid = PCI_UHCI_BASE_REG;
3245	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3246	if (res != NULL) {
3247		bus_write_2(res, UHCI_INTR, 0);
3248		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3249	}
3250}
3251
3252/* Perform early EHCI takeover from SMM. */
3253static void
3254ehci_early_takeover(device_t self)
3255{
3256	struct resource *res;
3257	uint32_t cparams;
3258	uint32_t eec;
3259	uint8_t eecp;
3260	uint8_t bios_sem;
3261	uint8_t offs;
3262	int rid;
3263	int i;
3264
3265	rid = PCIR_BAR(0);
3266	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3267	if (res == NULL)
3268		return;
3269
3270	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3271
3272	/* Synchronise with the BIOS if it owns the controller. */
3273	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3274	    eecp = EHCI_EECP_NEXT(eec)) {
3275		eec = pci_read_config(self, eecp, 4);
3276		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3277			continue;
3278		}
3279		bios_sem = pci_read_config(self, eecp +
3280		    EHCI_LEGSUP_BIOS_SEM, 1);
3281		if (bios_sem == 0) {
3282			continue;
3283		}
3284		if (bootverbose)
3285			printf("ehci early: "
3286			    "SMM active, request owner change\n");
3287
3288		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3289
3290		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3291			DELAY(1000);
3292			bios_sem = pci_read_config(self, eecp +
3293			    EHCI_LEGSUP_BIOS_SEM, 1);
3294		}
3295
3296		if (bios_sem != 0) {
3297			if (bootverbose)
3298				printf("ehci early: "
3299				    "SMM does not respond\n");
3300		}
3301		/* Disable interrupts */
3302		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3303		bus_write_4(res, offs + EHCI_USBINTR, 0);
3304	}
3305	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3306}
3307
3308/* Perform early XHCI takeover from SMM. */
3309static void
3310xhci_early_takeover(device_t self)
3311{
3312	struct resource *res;
3313	uint32_t cparams;
3314	uint32_t eec;
3315	uint8_t eecp;
3316	uint8_t bios_sem;
3317	uint8_t offs;
3318	int rid;
3319	int i;
3320
3321	rid = PCIR_BAR(0);
3322	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3323	if (res == NULL)
3324		return;
3325
3326	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3327
3328	eec = -1;
3329
3330	/* Synchronise with the BIOS if it owns the controller. */
3331	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3332	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3333		eec = bus_read_4(res, eecp);
3334
3335		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3336			continue;
3337
3338		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3339		if (bios_sem == 0)
3340			continue;
3341
3342		if (bootverbose)
3343			printf("xhci early: "
3344			    "SMM active, request owner change\n");
3345
3346		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3347
3348		/* wait a maximum of 5 second */
3349
3350		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3351			DELAY(1000);
3352			bios_sem = bus_read_1(res, eecp +
3353			    XHCI_XECP_BIOS_SEM);
3354		}
3355
3356		if (bios_sem != 0) {
3357			if (bootverbose)
3358				printf("xhci early: "
3359				    "SMM does not respond\n");
3360		}
3361
3362		/* Disable interrupts */
3363		offs = bus_read_1(res, XHCI_CAPLENGTH);
3364		bus_write_4(res, offs + XHCI_USBCMD, 0);
3365		bus_read_4(res, offs + XHCI_USBSTS);
3366	}
3367	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3368}
3369
3370#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3371static void
3372pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3373    struct resource_list *rl)
3374{
3375	struct resource *res;
3376	char *cp;
3377	u_long start, end, count;
3378	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3379
3380	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3381	case PCIM_HDRTYPE_BRIDGE:
3382		sec_reg = PCIR_SECBUS_1;
3383		sub_reg = PCIR_SUBBUS_1;
3384		break;
3385	case PCIM_HDRTYPE_CARDBUS:
3386		sec_reg = PCIR_SECBUS_2;
3387		sub_reg = PCIR_SUBBUS_2;
3388		break;
3389	default:
3390		return;
3391	}
3392
3393	/*
3394	 * If the existing bus range is valid, attempt to reserve it
3395	 * from our parent.  If this fails for any reason, clear the
3396	 * secbus and subbus registers.
3397	 *
3398	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3399	 * This would at least preserve the existing sec_bus if it is
3400	 * valid.
3401	 */
3402	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3403	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3404
3405	/* Quirk handling. */
3406	switch (pci_get_devid(dev)) {
3407	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3408		sup_bus = pci_read_config(dev, 0x41, 1);
3409		if (sup_bus != 0xff) {
3410			sec_bus = sup_bus + 1;
3411			sub_bus = sup_bus + 1;
3412			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3413			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3414		}
3415		break;
3416
3417	case 0x00dd10de:
3418		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3419		if ((cp = getenv("smbios.planar.maker")) == NULL)
3420			break;
3421		if (strncmp(cp, "Compal", 6) != 0) {
3422			freeenv(cp);
3423			break;
3424		}
3425		freeenv(cp);
3426		if ((cp = getenv("smbios.planar.product")) == NULL)
3427			break;
3428		if (strncmp(cp, "08A0", 4) != 0) {
3429			freeenv(cp);
3430			break;
3431		}
3432		freeenv(cp);
3433		if (sub_bus < 0xa) {
3434			sub_bus = 0xa;
3435			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3436		}
3437		break;
3438	}
3439
3440	if (bootverbose)
3441		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3442	if (sec_bus > 0 && sub_bus >= sec_bus) {
3443		start = sec_bus;
3444		end = sub_bus;
3445		count = end - start + 1;
3446
3447		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3448
3449		/*
3450		 * If requested, clear secondary bus registers in
3451		 * bridge devices to force a complete renumbering
3452		 * rather than reserving the existing range.  However,
3453		 * preserve the existing size.
3454		 */
3455		if (pci_clear_buses)
3456			goto clear;
3457
3458		rid = 0;
3459		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3460		    start, end, count, 0);
3461		if (res != NULL)
3462			return;
3463
3464		if (bootverbose)
3465			device_printf(bus,
3466			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3467			    pci_get_domain(dev), pci_get_bus(dev),
3468			    pci_get_slot(dev), pci_get_function(dev));
3469	}
3470
3471clear:
3472	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3473	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3474}
3475
3476static struct resource *
3477pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3478    u_long end, u_long count, u_int flags)
3479{
3480	struct pci_devinfo *dinfo;
3481	pcicfgregs *cfg;
3482	struct resource_list *rl;
3483	struct resource *res;
3484	int sec_reg, sub_reg;
3485
3486	dinfo = device_get_ivars(child);
3487	cfg = &dinfo->cfg;
3488	rl = &dinfo->resources;
3489	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3490	case PCIM_HDRTYPE_BRIDGE:
3491		sec_reg = PCIR_SECBUS_1;
3492		sub_reg = PCIR_SUBBUS_1;
3493		break;
3494	case PCIM_HDRTYPE_CARDBUS:
3495		sec_reg = PCIR_SECBUS_2;
3496		sub_reg = PCIR_SUBBUS_2;
3497		break;
3498	default:
3499		return (NULL);
3500	}
3501
3502	if (*rid != 0)
3503		return (NULL);
3504
3505	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3506		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3507	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3508		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3509		    start, end, count, flags & ~RF_ACTIVE);
3510		if (res == NULL) {
3511			resource_list_delete(rl, PCI_RES_BUS, *rid);
3512			device_printf(child, "allocating %lu bus%s failed\n",
3513			    count, count == 1 ? "" : "es");
3514			return (NULL);
3515		}
3516		if (bootverbose)
3517			device_printf(child,
3518			    "Lazy allocation of %lu bus%s at %lu\n", count,
3519			    count == 1 ? "" : "es", rman_get_start(res));
3520		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3521		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3522	}
3523	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3524	    end, count, flags));
3525}
3526#endif
3527
3528void
3529pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3530{
3531	struct pci_devinfo *dinfo;
3532	pcicfgregs *cfg;
3533	struct resource_list *rl;
3534	const struct pci_quirk *q;
3535	uint32_t devid;
3536	int i;
3537
3538	dinfo = device_get_ivars(dev);
3539	cfg = &dinfo->cfg;
3540	rl = &dinfo->resources;
3541	devid = (cfg->device << 16) | cfg->vendor;
3542
3543	/* ATA devices needs special map treatment */
3544	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3545	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3546	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3547	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3548	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3549		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3550	else
3551		for (i = 0; i < cfg->nummaps;) {
3552			/*
3553			 * Skip quirked resources.
3554			 */
3555			for (q = &pci_quirks[0]; q->devid != 0; q++)
3556				if (q->devid == devid &&
3557				    q->type == PCI_QUIRK_UNMAP_REG &&
3558				    q->arg1 == PCIR_BAR(i))
3559					break;
3560			if (q->devid != 0) {
3561				i++;
3562				continue;
3563			}
3564			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3565			    prefetchmask & (1 << i));
3566		}
3567
3568	/*
3569	 * Add additional, quirked resources.
3570	 */
3571	for (q = &pci_quirks[0]; q->devid != 0; q++)
3572		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3573			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3574
3575	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3576#ifdef __PCI_REROUTE_INTERRUPT
3577		/*
3578		 * Try to re-route interrupts. Sometimes the BIOS or
3579		 * firmware may leave bogus values in these registers.
3580		 * If the re-route fails, then just stick with what we
3581		 * have.
3582		 */
3583		pci_assign_interrupt(bus, dev, 1);
3584#else
3585		pci_assign_interrupt(bus, dev, 0);
3586#endif
3587	}
3588
3589	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3590	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3591		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3592			xhci_early_takeover(dev);
3593		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3594			ehci_early_takeover(dev);
3595		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3596			ohci_early_takeover(dev);
3597		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3598			uhci_early_takeover(dev);
3599	}
3600
3601#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3602	/*
3603	 * Reserve resources for secondary bus ranges behind bridge
3604	 * devices.
3605	 */
3606	pci_reserve_secbus(bus, dev, cfg, rl);
3607#endif
3608}
3609
3610static struct pci_devinfo *
3611pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3612    int slot, int func, size_t dinfo_size)
3613{
3614	struct pci_devinfo *dinfo;
3615
3616	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3617	if (dinfo != NULL)
3618		pci_add_child(dev, dinfo);
3619
3620	return (dinfo);
3621}
3622
3623void
3624pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3625{
3626#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3627	device_t pcib = device_get_parent(dev);
3628	struct pci_devinfo *dinfo;
3629	int maxslots;
3630	int s, f, pcifunchigh;
3631	uint8_t hdrtype;
3632	int first_func;
3633
3634	/*
3635	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3636	 * enable ARI.  We must enable ARI before detecting the rest of the
3637	 * functions on this bus as ARI changes the set of slots and functions
3638	 * that are legal on this bus.
3639	 */
3640	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3641	    dinfo_size);
3642	if (dinfo != NULL && pci_enable_ari)
3643		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3644
3645	/*
3646	 * Start looking for new devices on slot 0 at function 1 because we
3647	 * just identified the device at slot 0, function 0.
3648	 */
3649	first_func = 1;
3650
3651	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3652	    ("dinfo_size too small"));
3653	maxslots = PCIB_MAXSLOTS(pcib);
3654	for (s = 0; s <= maxslots; s++, first_func = 0) {
3655		pcifunchigh = 0;
3656		f = 0;
3657		DELAY(1);
3658		hdrtype = REG(PCIR_HDRTYPE, 1);
3659		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3660			continue;
3661		if (hdrtype & PCIM_MFDEV)
3662			pcifunchigh = PCIB_MAXFUNCS(pcib);
3663		for (f = first_func; f <= pcifunchigh; f++)
3664			pci_identify_function(pcib, dev, domain, busno, s, f,
3665			    dinfo_size);
3666	}
3667#undef REG
3668}
3669
3670void
3671pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3672{
3673	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3674	device_set_ivars(dinfo->cfg.dev, dinfo);
3675	resource_list_init(&dinfo->resources);
3676	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3677	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3678	pci_print_verbose(dinfo);
3679	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3680	pci_child_added(dinfo->cfg.dev);
3681}
3682
3683void
3684pci_child_added_method(device_t dev, device_t child)
3685{
3686
3687}
3688
3689static int
3690pci_probe(device_t dev)
3691{
3692
3693	device_set_desc(dev, "PCI bus");
3694
3695	/* Allow other subclasses to override this driver. */
3696	return (BUS_PROBE_GENERIC);
3697}
3698
3699int
3700pci_attach_common(device_t dev)
3701{
3702	struct pci_softc *sc;
3703	int busno, domain;
3704#ifdef PCI_DMA_BOUNDARY
3705	int error, tag_valid;
3706#endif
3707#ifdef PCI_RES_BUS
3708	int rid;
3709#endif
3710
3711	sc = device_get_softc(dev);
3712	domain = pcib_get_domain(dev);
3713	busno = pcib_get_bus(dev);
3714#ifdef PCI_RES_BUS
3715	rid = 0;
3716	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3717	    1, 0);
3718	if (sc->sc_bus == NULL) {
3719		device_printf(dev, "failed to allocate bus number\n");
3720		return (ENXIO);
3721	}
3722#endif
3723	if (bootverbose)
3724		device_printf(dev, "domain=%d, physical bus=%d\n",
3725		    domain, busno);
3726#ifdef PCI_DMA_BOUNDARY
3727	tag_valid = 0;
3728	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3729	    devclass_find("pci")) {
3730		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3731		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3732		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3733		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3734		if (error)
3735			device_printf(dev, "Failed to create DMA tag: %d\n",
3736			    error);
3737		else
3738			tag_valid = 1;
3739	}
3740	if (!tag_valid)
3741#endif
3742		sc->sc_dma_tag = bus_get_dma_tag(dev);
3743	return (0);
3744}
3745
3746static int
3747pci_attach(device_t dev)
3748{
3749	int busno, domain, error;
3750
3751	error = pci_attach_common(dev);
3752	if (error)
3753		return (error);
3754
3755	/*
3756	 * Since there can be multiple independantly numbered PCI
3757	 * busses on systems with multiple PCI domains, we can't use
3758	 * the unit number to decide which bus we are probing. We ask
3759	 * the parent pcib what our domain and bus numbers are.
3760	 */
3761	domain = pcib_get_domain(dev);
3762	busno = pcib_get_bus(dev);
3763	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3764	return (bus_generic_attach(dev));
3765}
3766
3767#ifdef PCI_RES_BUS
3768static int
3769pci_detach(device_t dev)
3770{
3771	struct pci_softc *sc;
3772	int error;
3773
3774	error = bus_generic_detach(dev);
3775	if (error)
3776		return (error);
3777	sc = device_get_softc(dev);
3778	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3779}
3780#endif
3781
3782static void
3783pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3784    int state)
3785{
3786	device_t child, pcib;
3787	int dstate, i;
3788
3789	/*
3790	 * Set the device to the given state.  If the firmware suggests
3791	 * a different power state, use it instead.  If power management
3792	 * is not present, the firmware is responsible for managing
3793	 * device power.  Skip children who aren't attached since they
3794	 * are handled separately.
3795	 */
3796	pcib = device_get_parent(dev);
3797	for (i = 0; i < numdevs; i++) {
3798		child = devlist[i];
3799		dstate = state;
3800		if (device_is_attached(child) &&
3801		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3802			pci_set_powerstate(child, dstate);
3803	}
3804}
3805
3806int
3807pci_suspend(device_t dev)
3808{
3809	device_t child, *devlist;
3810	struct pci_devinfo *dinfo;
3811	int error, i, numdevs;
3812
3813	/*
3814	 * Save the PCI configuration space for each child and set the
3815	 * device in the appropriate power state for this sleep state.
3816	 */
3817	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3818		return (error);
3819	for (i = 0; i < numdevs; i++) {
3820		child = devlist[i];
3821		dinfo = device_get_ivars(child);
3822		pci_cfg_save(child, dinfo, 0);
3823	}
3824
3825	/* Suspend devices before potentially powering them down. */
3826	error = bus_generic_suspend(dev);
3827	if (error) {
3828		free(devlist, M_TEMP);
3829		return (error);
3830	}
3831	if (pci_do_power_suspend)
3832		pci_set_power_children(dev, devlist, numdevs,
3833		    PCI_POWERSTATE_D3);
3834	free(devlist, M_TEMP);
3835	return (0);
3836}
3837
3838int
3839pci_resume(device_t dev)
3840{
3841	device_t child, *devlist;
3842	struct pci_devinfo *dinfo;
3843	int error, i, numdevs;
3844
3845	/*
3846	 * Set each child to D0 and restore its PCI configuration space.
3847	 */
3848	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3849		return (error);
3850	if (pci_do_power_resume)
3851		pci_set_power_children(dev, devlist, numdevs,
3852		    PCI_POWERSTATE_D0);
3853
3854	/* Now the device is powered up, restore its config space. */
3855	for (i = 0; i < numdevs; i++) {
3856		child = devlist[i];
3857		dinfo = device_get_ivars(child);
3858
3859		pci_cfg_restore(child, dinfo);
3860		if (!device_is_attached(child))
3861			pci_cfg_save(child, dinfo, 1);
3862	}
3863
3864	/*
3865	 * Resume critical devices first, then everything else later.
3866	 */
3867	for (i = 0; i < numdevs; i++) {
3868		child = devlist[i];
3869		switch (pci_get_class(child)) {
3870		case PCIC_DISPLAY:
3871		case PCIC_MEMORY:
3872		case PCIC_BRIDGE:
3873		case PCIC_BASEPERIPH:
3874			DEVICE_RESUME(child);
3875			break;
3876		}
3877	}
3878	for (i = 0; i < numdevs; i++) {
3879		child = devlist[i];
3880		switch (pci_get_class(child)) {
3881		case PCIC_DISPLAY:
3882		case PCIC_MEMORY:
3883		case PCIC_BRIDGE:
3884		case PCIC_BASEPERIPH:
3885			break;
3886		default:
3887			DEVICE_RESUME(child);
3888		}
3889	}
3890	free(devlist, M_TEMP);
3891	return (0);
3892}
3893
3894static void
3895pci_load_vendor_data(void)
3896{
3897	caddr_t data;
3898	void *ptr;
3899	size_t sz;
3900
3901	data = preload_search_by_type("pci_vendor_data");
3902	if (data != NULL) {
3903		ptr = preload_fetch_addr(data);
3904		sz = preload_fetch_size(data);
3905		if (ptr != NULL && sz != 0) {
3906			pci_vendordata = ptr;
3907			pci_vendordata_size = sz;
3908			/* terminate the database */
3909			pci_vendordata[pci_vendordata_size] = '\n';
3910		}
3911	}
3912}
3913
3914void
3915pci_driver_added(device_t dev, driver_t *driver)
3916{
3917	int numdevs;
3918	device_t *devlist;
3919	device_t child;
3920	struct pci_devinfo *dinfo;
3921	int i;
3922
3923	if (bootverbose)
3924		device_printf(dev, "driver added\n");
3925	DEVICE_IDENTIFY(driver, dev);
3926	if (device_get_children(dev, &devlist, &numdevs) != 0)
3927		return;
3928	for (i = 0; i < numdevs; i++) {
3929		child = devlist[i];
3930		if (device_get_state(child) != DS_NOTPRESENT)
3931			continue;
3932		dinfo = device_get_ivars(child);
3933		pci_print_verbose(dinfo);
3934		if (bootverbose)
3935			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3936		pci_cfg_restore(child, dinfo);
3937		if (device_probe_and_attach(child) != 0)
3938			pci_child_detached(dev, child);
3939	}
3940	free(devlist, M_TEMP);
3941}
3942
3943int
3944pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3945    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3946{
3947	struct pci_devinfo *dinfo;
3948	struct msix_table_entry *mte;
3949	struct msix_vector *mv;
3950	uint64_t addr;
3951	uint32_t data;
3952	void *cookie;
3953	int error, rid;
3954
3955	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3956	    arg, &cookie);
3957	if (error)
3958		return (error);
3959
3960	/* If this is not a direct child, just bail out. */
3961	if (device_get_parent(child) != dev) {
3962		*cookiep = cookie;
3963		return(0);
3964	}
3965
3966	rid = rman_get_rid(irq);
3967	if (rid == 0) {
3968		/* Make sure that INTx is enabled */
3969		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3970	} else {
3971		/*
3972		 * Check to see if the interrupt is MSI or MSI-X.
3973		 * Ask our parent to map the MSI and give
3974		 * us the address and data register values.
3975		 * If we fail for some reason, teardown the
3976		 * interrupt handler.
3977		 */
3978		dinfo = device_get_ivars(child);
3979		if (dinfo->cfg.msi.msi_alloc > 0) {
3980			if (dinfo->cfg.msi.msi_addr == 0) {
3981				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3982			    ("MSI has handlers, but vectors not mapped"));
3983				error = PCIB_MAP_MSI(device_get_parent(dev),
3984				    child, rman_get_start(irq), &addr, &data);
3985				if (error)
3986					goto bad;
3987				dinfo->cfg.msi.msi_addr = addr;
3988				dinfo->cfg.msi.msi_data = data;
3989			}
3990			if (dinfo->cfg.msi.msi_handlers == 0)
3991				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3992				    dinfo->cfg.msi.msi_data);
3993			dinfo->cfg.msi.msi_handlers++;
3994		} else {
3995			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3996			    ("No MSI or MSI-X interrupts allocated"));
3997			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3998			    ("MSI-X index too high"));
3999			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4000			KASSERT(mte->mte_vector != 0, ("no message vector"));
4001			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
4002			KASSERT(mv->mv_irq == rman_get_start(irq),
4003			    ("IRQ mismatch"));
4004			if (mv->mv_address == 0) {
4005				KASSERT(mte->mte_handlers == 0,
4006		    ("MSI-X table entry has handlers, but vector not mapped"));
4007				error = PCIB_MAP_MSI(device_get_parent(dev),
4008				    child, rman_get_start(irq), &addr, &data);
4009				if (error)
4010					goto bad;
4011				mv->mv_address = addr;
4012				mv->mv_data = data;
4013			}
4014
4015			/*
4016			 * The MSIX table entry must be made valid by
4017			 * incrementing the mte_handlers before
4018			 * calling pci_enable_msix() and
4019			 * pci_resume_msix(). Else the MSIX rewrite
4020			 * table quirk will not work as expected.
4021			 */
4022			mte->mte_handlers++;
4023			if (mte->mte_handlers == 1) {
4024				pci_enable_msix(child, rid - 1, mv->mv_address,
4025				    mv->mv_data);
4026				pci_unmask_msix(child, rid - 1);
4027			}
4028		}
4029
4030		/*
4031		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
4032		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
4033		 * in which case we "enable" INTx so MSI/MSI-X actually works.
4034		 */
4035		if (!pci_has_quirk(pci_get_devid(child),
4036		    PCI_QUIRK_MSI_INTX_BUG))
4037			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4038		else
4039			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
4040	bad:
4041		if (error) {
4042			(void)bus_generic_teardown_intr(dev, child, irq,
4043			    cookie);
4044			return (error);
4045		}
4046	}
4047	*cookiep = cookie;
4048	return (0);
4049}
4050
4051int
4052pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
4053    void *cookie)
4054{
4055	struct msix_table_entry *mte;
4056	struct resource_list_entry *rle;
4057	struct pci_devinfo *dinfo;
4058	int error, rid;
4059
4060	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
4061		return (EINVAL);
4062
4063	/* If this isn't a direct child, just bail out */
4064	if (device_get_parent(child) != dev)
4065		return(bus_generic_teardown_intr(dev, child, irq, cookie));
4066
4067	rid = rman_get_rid(irq);
4068	if (rid == 0) {
4069		/* Mask INTx */
4070		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
4071	} else {
4072		/*
4073		 * Check to see if the interrupt is MSI or MSI-X.  If so,
4074		 * decrement the appropriate handlers count and mask the
4075		 * MSI-X message, or disable MSI messages if the count
4076		 * drops to 0.
4077		 */
4078		dinfo = device_get_ivars(child);
4079		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
4080		if (rle->res != irq)
4081			return (EINVAL);
4082		if (dinfo->cfg.msi.msi_alloc > 0) {
4083			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
4084			    ("MSI-X index too high"));
4085			if (dinfo->cfg.msi.msi_handlers == 0)
4086				return (EINVAL);
4087			dinfo->cfg.msi.msi_handlers--;
4088			if (dinfo->cfg.msi.msi_handlers == 0)
4089				pci_disable_msi(child);
4090		} else {
4091			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
4092			    ("No MSI or MSI-X interrupts allocated"));
4093			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
4094			    ("MSI-X index too high"));
4095			mte = &dinfo->cfg.msix.msix_table[rid - 1];
4096			if (mte->mte_handlers == 0)
4097				return (EINVAL);
4098			mte->mte_handlers--;
4099			if (mte->mte_handlers == 0)
4100				pci_mask_msix(child, rid - 1);
4101		}
4102	}
4103	error = bus_generic_teardown_intr(dev, child, irq, cookie);
4104	if (rid > 0)
4105		KASSERT(error == 0,
4106		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
4107	return (error);
4108}
4109
4110int
4111pci_print_child(device_t dev, device_t child)
4112{
4113	struct pci_devinfo *dinfo;
4114	struct resource_list *rl;
4115	int retval = 0;
4116
4117	dinfo = device_get_ivars(child);
4118	rl = &dinfo->resources;
4119
4120	retval += bus_print_child_header(dev, child);
4121
4122	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
4123	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
4124	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
4125	if (device_get_flags(dev))
4126		retval += printf(" flags %#x", device_get_flags(dev));
4127
4128	retval += printf(" at device %d.%d", pci_get_slot(child),
4129	    pci_get_function(child));
4130
4131	retval += bus_print_child_domain(dev, child);
4132	retval += bus_print_child_footer(dev, child);
4133
4134	return (retval);
4135}
4136
4137static const struct
4138{
4139	int		class;
4140	int		subclass;
4141	int		report; /* 0 = bootverbose, 1 = always */
4142	const char	*desc;
4143} pci_nomatch_tab[] = {
4144	{PCIC_OLD,		-1,			1, "old"},
4145	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4146	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4147	{PCIC_STORAGE,		-1,			1, "mass storage"},
4148	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4149	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4150	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4151	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4152	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4153	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4154	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4155	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4156	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4157	{PCIC_NETWORK,		-1,			1, "network"},
4158	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4159	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4160	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4161	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4162	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4163	{PCIC_DISPLAY,		-1,			1, "display"},
4164	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4165	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4166	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4167	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4168	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4169	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4170	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4171	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4172	{PCIC_MEMORY,		-1,			1, "memory"},
4173	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4174	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4175	{PCIC_BRIDGE,		-1,			1, "bridge"},
4176	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4177	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4178	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4179	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4180	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4181	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4182	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4183	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4184	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4185	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4186	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4187	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4188	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4189	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4190	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4191	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4192	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4193	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4194	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4195	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4196	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4197	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4198	{PCIC_INPUTDEV,		-1,			1, "input device"},
4199	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4200	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4201	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4202	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4203	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4204	{PCIC_DOCKING,		-1,			1, "docking station"},
4205	{PCIC_PROCESSOR,	-1,			1, "processor"},
4206	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4207	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4208	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4209	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4210	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4211	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4212	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4213	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4214	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4215	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4216	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4217	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4218	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4219	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4220	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4221	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4222	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4223	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4224	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4225	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4226	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4227	{PCIC_DASP,		-1,			0, "dasp"},
4228	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4229	{PCIC_DASP,		PCIS_DASP_PERFCNTRS,	1, "performance counters"},
4230	{PCIC_DASP,		PCIS_DASP_COMM_SYNC,	1, "communication synchronizer"},
4231	{PCIC_DASP,		PCIS_DASP_MGMT_CARD,	1, "signal processing management"},
4232	{0, 0, 0,		NULL}
4233};
4234
4235void
4236pci_probe_nomatch(device_t dev, device_t child)
4237{
4238	int i, report;
4239	const char *cp, *scp;
4240	char *device;
4241
4242	/*
4243	 * Look for a listing for this device in a loaded device database.
4244	 */
4245	report = 1;
4246	if ((device = pci_describe_device(child)) != NULL) {
4247		device_printf(dev, "<%s>", device);
4248		free(device, M_DEVBUF);
4249	} else {
4250		/*
4251		 * Scan the class/subclass descriptions for a general
4252		 * description.
4253		 */
4254		cp = "unknown";
4255		scp = NULL;
4256		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4257			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4258				if (pci_nomatch_tab[i].subclass == -1) {
4259					cp = pci_nomatch_tab[i].desc;
4260					report = pci_nomatch_tab[i].report;
4261				} else if (pci_nomatch_tab[i].subclass ==
4262				    pci_get_subclass(child)) {
4263					scp = pci_nomatch_tab[i].desc;
4264					report = pci_nomatch_tab[i].report;
4265				}
4266			}
4267		}
4268		if (report || bootverbose) {
4269			device_printf(dev, "<%s%s%s>",
4270			    cp ? cp : "",
4271			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4272			    scp ? scp : "");
4273		}
4274	}
4275	if (report || bootverbose) {
4276		printf(" at device %d.%d (no driver attached)\n",
4277		    pci_get_slot(child), pci_get_function(child));
4278	}
4279	pci_cfg_save(child, device_get_ivars(child), 1);
4280}
4281
4282void
4283pci_child_detached(device_t dev, device_t child)
4284{
4285	struct pci_devinfo *dinfo;
4286	struct resource_list *rl;
4287
4288	dinfo = device_get_ivars(child);
4289	rl = &dinfo->resources;
4290
4291	/*
4292	 * Have to deallocate IRQs before releasing any MSI messages and
4293	 * have to release MSI messages before deallocating any memory
4294	 * BARs.
4295	 */
4296	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4297		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4298	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4299		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4300		(void)pci_release_msi(child);
4301	}
4302	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4303		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4304	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4305		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4306#ifdef PCI_RES_BUS
4307	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4308		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4309#endif
4310
4311	pci_cfg_save(child, dinfo, 1);
4312}
4313
4314/*
4315 * Parse the PCI device database, if loaded, and return a pointer to a
4316 * description of the device.
4317 *
4318 * The database is flat text formatted as follows:
4319 *
4320 * Any line not in a valid format is ignored.
4321 * Lines are terminated with newline '\n' characters.
4322 *
4323 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4324 * the vendor name.
4325 *
4326 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4327 * - devices cannot be listed without a corresponding VENDOR line.
4328 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4329 * another TAB, then the device name.
4330 */
4331
4332/*
4333 * Assuming (ptr) points to the beginning of a line in the database,
4334 * return the vendor or device and description of the next entry.
4335 * The value of (vendor) or (device) inappropriate for the entry type
4336 * is set to -1.  Returns nonzero at the end of the database.
4337 *
4338 * Note that this is slightly unrobust in the face of corrupt data;
4339 * we attempt to safeguard against this by spamming the end of the
4340 * database with a newline when we initialise.
4341 */
4342static int
4343pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4344{
4345	char	*cp = *ptr;
4346	int	left;
4347
4348	*device = -1;
4349	*vendor = -1;
4350	**desc = '\0';
4351	for (;;) {
4352		left = pci_vendordata_size - (cp - pci_vendordata);
4353		if (left <= 0) {
4354			*ptr = cp;
4355			return(1);
4356		}
4357
4358		/* vendor entry? */
4359		if (*cp != '\t' &&
4360		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4361			break;
4362		/* device entry? */
4363		if (*cp == '\t' &&
4364		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4365			break;
4366
4367		/* skip to next line */
4368		while (*cp != '\n' && left > 0) {
4369			cp++;
4370			left--;
4371		}
4372		if (*cp == '\n') {
4373			cp++;
4374			left--;
4375		}
4376	}
4377	/* skip to next line */
4378	while (*cp != '\n' && left > 0) {
4379		cp++;
4380		left--;
4381	}
4382	if (*cp == '\n' && left > 0)
4383		cp++;
4384	*ptr = cp;
4385	return(0);
4386}
4387
4388static char *
4389pci_describe_device(device_t dev)
4390{
4391	int	vendor, device;
4392	char	*desc, *vp, *dp, *line;
4393
4394	desc = vp = dp = NULL;
4395
4396	/*
4397	 * If we have no vendor data, we can't do anything.
4398	 */
4399	if (pci_vendordata == NULL)
4400		goto out;
4401
4402	/*
4403	 * Scan the vendor data looking for this device
4404	 */
4405	line = pci_vendordata;
4406	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4407		goto out;
4408	for (;;) {
4409		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4410			goto out;
4411		if (vendor == pci_get_vendor(dev))
4412			break;
4413	}
4414	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4415		goto out;
4416	for (;;) {
4417		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4418			*dp = 0;
4419			break;
4420		}
4421		if (vendor != -1) {
4422			*dp = 0;
4423			break;
4424		}
4425		if (device == pci_get_device(dev))
4426			break;
4427	}
4428	if (dp[0] == '\0')
4429		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4430	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4431	    NULL)
4432		sprintf(desc, "%s, %s", vp, dp);
4433out:
4434	if (vp != NULL)
4435		free(vp, M_DEVBUF);
4436	if (dp != NULL)
4437		free(dp, M_DEVBUF);
4438	return(desc);
4439}
4440
4441int
4442pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4443{
4444	struct pci_devinfo *dinfo;
4445	pcicfgregs *cfg;
4446
4447	dinfo = device_get_ivars(child);
4448	cfg = &dinfo->cfg;
4449
4450	switch (which) {
4451	case PCI_IVAR_ETHADDR:
4452		/*
4453		 * The generic accessor doesn't deal with failure, so
4454		 * we set the return value, then return an error.
4455		 */
4456		*((uint8_t **) result) = NULL;
4457		return (EINVAL);
4458	case PCI_IVAR_SUBVENDOR:
4459		*result = cfg->subvendor;
4460		break;
4461	case PCI_IVAR_SUBDEVICE:
4462		*result = cfg->subdevice;
4463		break;
4464	case PCI_IVAR_VENDOR:
4465		*result = cfg->vendor;
4466		break;
4467	case PCI_IVAR_DEVICE:
4468		*result = cfg->device;
4469		break;
4470	case PCI_IVAR_DEVID:
4471		*result = (cfg->device << 16) | cfg->vendor;
4472		break;
4473	case PCI_IVAR_CLASS:
4474		*result = cfg->baseclass;
4475		break;
4476	case PCI_IVAR_SUBCLASS:
4477		*result = cfg->subclass;
4478		break;
4479	case PCI_IVAR_PROGIF:
4480		*result = cfg->progif;
4481		break;
4482	case PCI_IVAR_REVID:
4483		*result = cfg->revid;
4484		break;
4485	case PCI_IVAR_INTPIN:
4486		*result = cfg->intpin;
4487		break;
4488	case PCI_IVAR_IRQ:
4489		*result = cfg->intline;
4490		break;
4491	case PCI_IVAR_DOMAIN:
4492		*result = cfg->domain;
4493		break;
4494	case PCI_IVAR_BUS:
4495		*result = cfg->bus;
4496		break;
4497	case PCI_IVAR_SLOT:
4498		*result = cfg->slot;
4499		break;
4500	case PCI_IVAR_FUNCTION:
4501		*result = cfg->func;
4502		break;
4503	case PCI_IVAR_CMDREG:
4504		*result = cfg->cmdreg;
4505		break;
4506	case PCI_IVAR_CACHELNSZ:
4507		*result = cfg->cachelnsz;
4508		break;
4509	case PCI_IVAR_MINGNT:
4510		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4511			*result = -1;
4512			return (EINVAL);
4513		}
4514		*result = cfg->mingnt;
4515		break;
4516	case PCI_IVAR_MAXLAT:
4517		if (cfg->hdrtype != PCIM_HDRTYPE_NORMAL) {
4518			*result = -1;
4519			return (EINVAL);
4520		}
4521		*result = cfg->maxlat;
4522		break;
4523	case PCI_IVAR_LATTIMER:
4524		*result = cfg->lattimer;
4525		break;
4526	default:
4527		return (ENOENT);
4528	}
4529	return (0);
4530}
4531
4532int
4533pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4534{
4535	struct pci_devinfo *dinfo;
4536
4537	dinfo = device_get_ivars(child);
4538
4539	switch (which) {
4540	case PCI_IVAR_INTPIN:
4541		dinfo->cfg.intpin = value;
4542		return (0);
4543	case PCI_IVAR_ETHADDR:
4544	case PCI_IVAR_SUBVENDOR:
4545	case PCI_IVAR_SUBDEVICE:
4546	case PCI_IVAR_VENDOR:
4547	case PCI_IVAR_DEVICE:
4548	case PCI_IVAR_DEVID:
4549	case PCI_IVAR_CLASS:
4550	case PCI_IVAR_SUBCLASS:
4551	case PCI_IVAR_PROGIF:
4552	case PCI_IVAR_REVID:
4553	case PCI_IVAR_IRQ:
4554	case PCI_IVAR_DOMAIN:
4555	case PCI_IVAR_BUS:
4556	case PCI_IVAR_SLOT:
4557	case PCI_IVAR_FUNCTION:
4558		return (EINVAL);	/* disallow for now */
4559
4560	default:
4561		return (ENOENT);
4562	}
4563}
4564
4565#include "opt_ddb.h"
4566#ifdef DDB
4567#include <ddb/ddb.h>
4568#include <sys/cons.h>
4569
4570/*
4571 * List resources based on pci map registers, used for within ddb
4572 */
4573
4574DB_SHOW_COMMAND(pciregs, db_pci_dump)
4575{
4576	struct pci_devinfo *dinfo;
4577	struct devlist *devlist_head;
4578	struct pci_conf *p;
4579	const char *name;
4580	int i, error, none_count;
4581
4582	none_count = 0;
4583	/* get the head of the device queue */
4584	devlist_head = &pci_devq;
4585
4586	/*
4587	 * Go through the list of devices and print out devices
4588	 */
4589	for (error = 0, i = 0,
4590	     dinfo = STAILQ_FIRST(devlist_head);
4591	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4592	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4593
4594		/* Populate pd_name and pd_unit */
4595		name = NULL;
4596		if (dinfo->cfg.dev)
4597			name = device_get_name(dinfo->cfg.dev);
4598
4599		p = &dinfo->conf;
4600		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4601			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4602			(name && *name) ? name : "none",
4603			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4604			none_count++,
4605			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4606			p->pc_sel.pc_func, (p->pc_class << 16) |
4607			(p->pc_subclass << 8) | p->pc_progif,
4608			(p->pc_subdevice << 16) | p->pc_subvendor,
4609			(p->pc_device << 16) | p->pc_vendor,
4610			p->pc_revid, p->pc_hdr);
4611	}
4612}
4613#endif /* DDB */
4614
4615static struct resource *
4616pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4617    u_long start, u_long end, u_long count, u_int flags)
4618{
4619	struct pci_devinfo *dinfo = device_get_ivars(child);
4620	struct resource_list *rl = &dinfo->resources;
4621	struct resource *res;
4622	struct pci_map *pm;
4623	uint16_t cmd;
4624	pci_addr_t map, testval;
4625	int mapsize;
4626
4627	res = NULL;
4628	pm = pci_find_bar(child, *rid);
4629	if (pm != NULL) {
4630		/* This is a BAR that we failed to allocate earlier. */
4631		mapsize = pm->pm_size;
4632		map = pm->pm_value;
4633	} else {
4634		/*
4635		 * Weed out the bogons, and figure out how large the
4636		 * BAR/map is.  BARs that read back 0 here are bogus
4637		 * and unimplemented.  Note: atapci in legacy mode are
4638		 * special and handled elsewhere in the code.  If you
4639		 * have a atapci device in legacy mode and it fails
4640		 * here, that other code is broken.
4641		 */
4642		pci_read_bar(child, *rid, &map, &testval);
4643
4644		/*
4645		 * Determine the size of the BAR and ignore BARs with a size
4646		 * of 0.  Device ROM BARs use a different mask value.
4647		 */
4648		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4649			mapsize = pci_romsize(testval);
4650		else
4651			mapsize = pci_mapsize(testval);
4652		if (mapsize == 0)
4653			goto out;
4654		pm = pci_add_bar(child, *rid, map, mapsize);
4655	}
4656
4657	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4658		if (type != SYS_RES_MEMORY) {
4659			if (bootverbose)
4660				device_printf(dev,
4661				    "child %s requested type %d for rid %#x,"
4662				    " but the BAR says it is an memio\n",
4663				    device_get_nameunit(child), type, *rid);
4664			goto out;
4665		}
4666	} else {
4667		if (type != SYS_RES_IOPORT) {
4668			if (bootverbose)
4669				device_printf(dev,
4670				    "child %s requested type %d for rid %#x,"
4671				    " but the BAR says it is an ioport\n",
4672				    device_get_nameunit(child), type, *rid);
4673			goto out;
4674		}
4675	}
4676
4677	/*
4678	 * For real BARs, we need to override the size that
4679	 * the driver requests, because that's what the BAR
4680	 * actually uses and we would otherwise have a
4681	 * situation where we might allocate the excess to
4682	 * another driver, which won't work.
4683	 */
4684	count = (pci_addr_t)1 << mapsize;
4685	if (RF_ALIGNMENT(flags) < mapsize)
4686		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4687	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4688		flags |= RF_PREFETCHABLE;
4689
4690	/*
4691	 * Allocate enough resource, and then write back the
4692	 * appropriate BAR for that resource.
4693	 */
4694	resource_list_add(rl, type, *rid, start, end, count);
4695	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4696	    count, flags & ~RF_ACTIVE);
4697	if (res == NULL) {
4698		resource_list_delete(rl, type, *rid);
4699		device_printf(child,
4700		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4701		    count, *rid, type, start, end);
4702		goto out;
4703	}
4704	if (bootverbose)
4705		device_printf(child,
4706		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4707		    count, *rid, type, rman_get_start(res));
4708
4709	/* Disable decoding via the CMD register before updating the BAR */
4710	cmd = pci_read_config(child, PCIR_COMMAND, 2);
4711	pci_write_config(child, PCIR_COMMAND,
4712	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
4713
4714	map = rman_get_start(res);
4715	pci_write_bar(child, pm, map);
4716
4717	/* Restore the original value of the CMD register */
4718	pci_write_config(child, PCIR_COMMAND, cmd, 2);
4719out:
4720	return (res);
4721}
4722
4723struct resource *
4724pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4725		   u_long start, u_long end, u_long count, u_int flags)
4726{
4727	struct pci_devinfo *dinfo;
4728	struct resource_list *rl;
4729	struct resource_list_entry *rle;
4730	struct resource *res;
4731	pcicfgregs *cfg;
4732
4733	if (device_get_parent(child) != dev)
4734		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4735		    type, rid, start, end, count, flags));
4736
4737	/*
4738	 * Perform lazy resource allocation
4739	 */
4740	dinfo = device_get_ivars(child);
4741	rl = &dinfo->resources;
4742	cfg = &dinfo->cfg;
4743	switch (type) {
4744#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4745	case PCI_RES_BUS:
4746		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4747		    flags));
4748#endif
4749	case SYS_RES_IRQ:
4750		/*
4751		 * Can't alloc legacy interrupt once MSI messages have
4752		 * been allocated.
4753		 */
4754		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4755		    cfg->msix.msix_alloc > 0))
4756			return (NULL);
4757
4758		/*
4759		 * If the child device doesn't have an interrupt
4760		 * routed and is deserving of an interrupt, try to
4761		 * assign it one.
4762		 */
4763		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4764		    (cfg->intpin != 0))
4765			pci_assign_interrupt(dev, child, 0);
4766		break;
4767	case SYS_RES_IOPORT:
4768	case SYS_RES_MEMORY:
4769#ifdef NEW_PCIB
4770		/*
4771		 * PCI-PCI bridge I/O window resources are not BARs.
4772		 * For those allocations just pass the request up the
4773		 * tree.
4774		 */
4775		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4776			switch (*rid) {
4777			case PCIR_IOBASEL_1:
4778			case PCIR_MEMBASE_1:
4779			case PCIR_PMBASEL_1:
4780				/*
4781				 * XXX: Should we bother creating a resource
4782				 * list entry?
4783				 */
4784				return (bus_generic_alloc_resource(dev, child,
4785				    type, rid, start, end, count, flags));
4786			}
4787		}
4788#endif
4789		/* Reserve resources for this BAR if needed. */
4790		rle = resource_list_find(rl, type, *rid);
4791		if (rle == NULL) {
4792			res = pci_reserve_map(dev, child, type, rid, start, end,
4793			    count, flags);
4794			if (res == NULL)
4795				return (NULL);
4796		}
4797	}
4798	return (resource_list_alloc(rl, dev, child, type, rid,
4799	    start, end, count, flags));
4800}
4801
4802int
4803pci_release_resource(device_t dev, device_t child, int type, int rid,
4804    struct resource *r)
4805{
4806	struct pci_devinfo *dinfo;
4807	struct resource_list *rl;
4808	pcicfgregs *cfg;
4809
4810	if (device_get_parent(child) != dev)
4811		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4812		    type, rid, r));
4813
4814	dinfo = device_get_ivars(child);
4815	cfg = &dinfo->cfg;
4816#ifdef NEW_PCIB
4817	/*
4818	 * PCI-PCI bridge I/O window resources are not BARs.  For
4819	 * those allocations just pass the request up the tree.
4820	 */
4821	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4822	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4823		switch (rid) {
4824		case PCIR_IOBASEL_1:
4825		case PCIR_MEMBASE_1:
4826		case PCIR_PMBASEL_1:
4827			return (bus_generic_release_resource(dev, child, type,
4828			    rid, r));
4829		}
4830	}
4831#endif
4832
4833	rl = &dinfo->resources;
4834	return (resource_list_release(rl, dev, child, type, rid, r));
4835}
4836
4837int
4838pci_activate_resource(device_t dev, device_t child, int type, int rid,
4839    struct resource *r)
4840{
4841	struct pci_devinfo *dinfo;
4842	int error;
4843
4844	error = bus_generic_activate_resource(dev, child, type, rid, r);
4845	if (error)
4846		return (error);
4847
4848	/* Enable decoding in the command register when activating BARs. */
4849	if (device_get_parent(child) == dev) {
4850		/* Device ROMs need their decoding explicitly enabled. */
4851		dinfo = device_get_ivars(child);
4852		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4853			pci_write_bar(child, pci_find_bar(child, rid),
4854			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4855		switch (type) {
4856		case SYS_RES_IOPORT:
4857		case SYS_RES_MEMORY:
4858			error = PCI_ENABLE_IO(dev, child, type);
4859			break;
4860		}
4861	}
4862	return (error);
4863}
4864
4865int
4866pci_deactivate_resource(device_t dev, device_t child, int type,
4867    int rid, struct resource *r)
4868{
4869	struct pci_devinfo *dinfo;
4870	int error;
4871
4872	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4873	if (error)
4874		return (error);
4875
4876	/* Disable decoding for device ROMs. */
4877	if (device_get_parent(child) == dev) {
4878		dinfo = device_get_ivars(child);
4879		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4880			pci_write_bar(child, pci_find_bar(child, rid),
4881			    rman_get_start(r));
4882	}
4883	return (0);
4884}
4885
4886void
4887pci_child_deleted(device_t dev, device_t child)
4888{
4889	struct resource_list_entry *rle;
4890	struct resource_list *rl;
4891	struct pci_devinfo *dinfo;
4892
4893	dinfo = device_get_ivars(child);
4894	rl = &dinfo->resources;
4895
4896	/* Turn off access to resources we're about to free */
4897	if (bus_child_present(child) != 0) {
4898		pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4899		    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4900
4901		pci_disable_busmaster(child);
4902	}
4903
4904	/* Free all allocated resources */
4905	STAILQ_FOREACH(rle, rl, link) {
4906		if (rle->res) {
4907			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4908			    resource_list_busy(rl, rle->type, rle->rid)) {
4909				pci_printf(&dinfo->cfg,
4910				    "Resource still owned, oops. "
4911				    "(type=%d, rid=%d, addr=%lx)\n",
4912				    rle->type, rle->rid,
4913				    rman_get_start(rle->res));
4914				bus_release_resource(child, rle->type, rle->rid,
4915				    rle->res);
4916			}
4917			resource_list_unreserve(rl, dev, child, rle->type,
4918			    rle->rid);
4919		}
4920	}
4921	resource_list_free(rl);
4922
4923	pci_freecfg(dinfo);
4924}
4925
4926/* KBI compatability shim. */
4927extern void pci_delete_child(device_t dev, device_t child);
4928
4929void
4930pci_delete_child(device_t dev, device_t child)
4931{
4932
4933	device_delete_child (dev, child);
4934}
4935
4936void
4937pci_delete_resource(device_t dev, device_t child, int type, int rid)
4938{
4939	struct pci_devinfo *dinfo;
4940	struct resource_list *rl;
4941	struct resource_list_entry *rle;
4942
4943	if (device_get_parent(child) != dev)
4944		return;
4945
4946	dinfo = device_get_ivars(child);
4947	rl = &dinfo->resources;
4948	rle = resource_list_find(rl, type, rid);
4949	if (rle == NULL)
4950		return;
4951
4952	if (rle->res) {
4953		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4954		    resource_list_busy(rl, type, rid)) {
4955			device_printf(dev, "delete_resource: "
4956			    "Resource still owned by child, oops. "
4957			    "(type=%d, rid=%d, addr=%lx)\n",
4958			    type, rid, rman_get_start(rle->res));
4959			return;
4960		}
4961		resource_list_unreserve(rl, dev, child, type, rid);
4962	}
4963	resource_list_delete(rl, type, rid);
4964}
4965
4966struct resource_list *
4967pci_get_resource_list (device_t dev, device_t child)
4968{
4969	struct pci_devinfo *dinfo = device_get_ivars(child);
4970
4971	return (&dinfo->resources);
4972}
4973
4974bus_dma_tag_t
4975pci_get_dma_tag(device_t bus, device_t dev)
4976{
4977	struct pci_softc *sc = device_get_softc(bus);
4978
4979	return (sc->sc_dma_tag);
4980}
4981
4982uint32_t
4983pci_read_config_method(device_t dev, device_t child, int reg, int width)
4984{
4985	struct pci_devinfo *dinfo = device_get_ivars(child);
4986	pcicfgregs *cfg = &dinfo->cfg;
4987
4988	return (PCIB_READ_CONFIG(device_get_parent(dev),
4989	    cfg->bus, cfg->slot, cfg->func, reg, width));
4990}
4991
4992void
4993pci_write_config_method(device_t dev, device_t child, int reg,
4994    uint32_t val, int width)
4995{
4996	struct pci_devinfo *dinfo = device_get_ivars(child);
4997	pcicfgregs *cfg = &dinfo->cfg;
4998
4999	PCIB_WRITE_CONFIG(device_get_parent(dev),
5000	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
5001}
5002
5003int
5004pci_child_location_str_method(device_t dev, device_t child, char *buf,
5005    size_t buflen)
5006{
5007
5008	snprintf(buf, buflen, "slot=%d function=%d dbsf=pci%d:%d:%d:%d",
5009	    pci_get_slot(child), pci_get_function(child), pci_get_domain(child),
5010	    pci_get_bus(child), pci_get_slot(child), pci_get_function(child));
5011	return (0);
5012}
5013
5014int
5015pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
5016    size_t buflen)
5017{
5018	struct pci_devinfo *dinfo;
5019	pcicfgregs *cfg;
5020
5021	dinfo = device_get_ivars(child);
5022	cfg = &dinfo->cfg;
5023	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
5024	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
5025	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
5026	    cfg->progif);
5027	return (0);
5028}
5029
5030int
5031pci_assign_interrupt_method(device_t dev, device_t child)
5032{
5033	struct pci_devinfo *dinfo = device_get_ivars(child);
5034	pcicfgregs *cfg = &dinfo->cfg;
5035
5036	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
5037	    cfg->intpin));
5038}
5039
5040static void
5041pci_lookup(void *arg, const char *name, device_t *dev)
5042{
5043	long val;
5044	char *end;
5045	int domain, bus, slot, func;
5046
5047	if (*dev != NULL)
5048		return;
5049
5050	/*
5051	 * Accept pciconf-style selectors of either pciD:B:S:F or
5052	 * pciB:S:F.  In the latter case, the domain is assumed to
5053	 * be zero.
5054	 */
5055	if (strncmp(name, "pci", 3) != 0)
5056		return;
5057	val = strtol(name + 3, &end, 10);
5058	if (val < 0 || val > INT_MAX || *end != ':')
5059		return;
5060	domain = val;
5061	val = strtol(end + 1, &end, 10);
5062	if (val < 0 || val > INT_MAX || *end != ':')
5063		return;
5064	bus = val;
5065	val = strtol(end + 1, &end, 10);
5066	if (val < 0 || val > INT_MAX)
5067		return;
5068	slot = val;
5069	if (*end == ':') {
5070		val = strtol(end + 1, &end, 10);
5071		if (val < 0 || val > INT_MAX || *end != '\0')
5072			return;
5073		func = val;
5074	} else if (*end == '\0') {
5075		func = slot;
5076		slot = bus;
5077		bus = domain;
5078		domain = 0;
5079	} else
5080		return;
5081
5082	if (domain > PCI_DOMAINMAX || bus > PCI_BUSMAX || slot > PCI_SLOTMAX ||
5083	    func > PCIE_ARI_FUNCMAX || (slot != 0 && func > PCI_FUNCMAX))
5084		return;
5085
5086	*dev = pci_find_dbsf(domain, bus, slot, func);
5087}
5088
5089static int
5090pci_modevent(module_t mod, int what, void *arg)
5091{
5092	static struct cdev *pci_cdev;
5093	static eventhandler_tag tag;
5094
5095	switch (what) {
5096	case MOD_LOAD:
5097		STAILQ_INIT(&pci_devq);
5098		pci_generation = 0;
5099		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
5100		    "pci");
5101		pci_load_vendor_data();
5102		tag = EVENTHANDLER_REGISTER(dev_lookup, pci_lookup, NULL,
5103		    1000);
5104		break;
5105
5106	case MOD_UNLOAD:
5107		if (tag != NULL)
5108			EVENTHANDLER_DEREGISTER(dev_lookup, tag);
5109		destroy_dev(pci_cdev);
5110		break;
5111	}
5112
5113	return (0);
5114}
5115
5116static void
5117pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
5118{
5119#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
5120	struct pcicfg_pcie *cfg;
5121	int version, pos;
5122
5123	cfg = &dinfo->cfg.pcie;
5124	pos = cfg->pcie_location;
5125
5126	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5127
5128	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
5129
5130	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5131	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5132	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5133		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
5134
5135	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5136	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5137	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5138		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
5139
5140	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5141	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5142		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
5143
5144	if (version > 1) {
5145		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
5146		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
5147		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
5148	}
5149#undef WREG
5150}
5151
5152static void
5153pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
5154{
5155	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
5156	    dinfo->cfg.pcix.pcix_command,  2);
5157}
5158
5159void
5160pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
5161{
5162
5163	/*
5164	 * Only do header type 0 devices.  Type 1 devices are bridges,
5165	 * which we know need special treatment.  Type 2 devices are
5166	 * cardbus bridges which also require special treatment.
5167	 * Other types are unknown, and we err on the side of safety
5168	 * by ignoring them.
5169	 */
5170	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5171		return;
5172
5173	/*
5174	 * Restore the device to full power mode.  We must do this
5175	 * before we restore the registers because moving from D3 to
5176	 * D0 will cause the chip's BARs and some other registers to
5177	 * be reset to some unknown power on reset values.  Cut down
5178	 * the noise on boot by doing nothing if we are already in
5179	 * state D0.
5180	 */
5181	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
5182		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5183	pci_restore_bars(dev);
5184	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
5185	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
5186	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
5187	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
5188	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
5189	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
5190	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
5191	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
5192	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
5193
5194	/*
5195	 * Restore extended capabilities for PCI-Express and PCI-X
5196	 */
5197	if (dinfo->cfg.pcie.pcie_location != 0)
5198		pci_cfg_restore_pcie(dev, dinfo);
5199	if (dinfo->cfg.pcix.pcix_location != 0)
5200		pci_cfg_restore_pcix(dev, dinfo);
5201
5202	/* Restore MSI and MSI-X configurations if they are present. */
5203	if (dinfo->cfg.msi.msi_location != 0)
5204		pci_resume_msi(dev);
5205	if (dinfo->cfg.msix.msix_location != 0)
5206		pci_resume_msix(dev);
5207}
5208
5209static void
5210pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
5211{
5212#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
5213	struct pcicfg_pcie *cfg;
5214	int version, pos;
5215
5216	cfg = &dinfo->cfg.pcie;
5217	pos = cfg->pcie_location;
5218
5219	cfg->pcie_flags = RREG(PCIER_FLAGS);
5220
5221	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
5222
5223	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
5224
5225	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5226	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5227	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5228		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5229
5230	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5231	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5232	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5233		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5234
5235	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5236	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5237		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5238
5239	if (version > 1) {
5240		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5241		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5242		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5243	}
5244#undef RREG
5245}
5246
5247static void
5248pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5249{
5250	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5251	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5252}
5253
5254void
5255pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5256{
5257	uint32_t cls;
5258	int ps;
5259
5260	/*
5261	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5262	 * we know need special treatment.  Type 2 devices are cardbus bridges
5263	 * which also require special treatment.  Other types are unknown, and
5264	 * we err on the side of safety by ignoring them.  Powering down
5265	 * bridges should not be undertaken lightly.
5266	 */
5267	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5268		return;
5269
5270	/*
5271	 * Some drivers apparently write to these registers w/o updating our
5272	 * cached copy.  No harm happens if we update the copy, so do so here
5273	 * so we can restore them.  The COMMAND register is modified by the
5274	 * bus w/o updating the cache.  This should represent the normally
5275	 * writable portion of the 'defined' part of type 0 headers.  In
5276	 * theory we also need to save/restore the PCI capability structures
5277	 * we know about, but apart from power we don't know any that are
5278	 * writable.
5279	 */
5280	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5281	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5282	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5283	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5284	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5285	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5286	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5287	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5288	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5289	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5290	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5291	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5292	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5293	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5294	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5295
5296	if (dinfo->cfg.pcie.pcie_location != 0)
5297		pci_cfg_save_pcie(dev, dinfo);
5298
5299	if (dinfo->cfg.pcix.pcix_location != 0)
5300		pci_cfg_save_pcix(dev, dinfo);
5301
5302	/*
5303	 * don't set the state for display devices, base peripherals and
5304	 * memory devices since bad things happen when they are powered down.
5305	 * We should (a) have drivers that can easily detach and (b) use
5306	 * generic drivers for these devices so that some device actually
5307	 * attaches.  We need to make sure that when we implement (a) we don't
5308	 * power the device down on a reattach.
5309	 */
5310	cls = pci_get_class(dev);
5311	if (!setstate)
5312		return;
5313	switch (pci_do_power_nodriver)
5314	{
5315		case 0:		/* NO powerdown at all */
5316			return;
5317		case 1:		/* Conservative about what to power down */
5318			if (cls == PCIC_STORAGE)
5319				return;
5320			/*FALLTHROUGH*/
5321		case 2:		/* Agressive about what to power down */
5322			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5323			    cls == PCIC_BASEPERIPH)
5324				return;
5325			/*FALLTHROUGH*/
5326		case 3:		/* Power down everything */
5327			break;
5328	}
5329	/*
5330	 * PCI spec says we can only go into D3 state from D0 state.
5331	 * Transition from D[12] into D0 before going to D3 state.
5332	 */
5333	ps = pci_get_powerstate(dev);
5334	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5335		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5336	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5337		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5338}
5339
5340/* Wrapper APIs suitable for device driver use. */
5341void
5342pci_save_state(device_t dev)
5343{
5344	struct pci_devinfo *dinfo;
5345
5346	dinfo = device_get_ivars(dev);
5347	pci_cfg_save(dev, dinfo, 0);
5348}
5349
5350void
5351pci_restore_state(device_t dev)
5352{
5353	struct pci_devinfo *dinfo;
5354
5355	dinfo = device_get_ivars(dev);
5356	pci_cfg_restore(dev, dinfo);
5357}
5358
5359static uint16_t
5360pci_get_rid_method(device_t dev, device_t child)
5361{
5362
5363	return (PCIB_GET_RID(device_get_parent(dev), child));
5364}
5365
5366/* Find the upstream port of a given PCI device in a root complex. */
5367device_t
5368pci_find_pcie_root_port(device_t dev)
5369{
5370	struct pci_devinfo *dinfo;
5371	devclass_t pci_class;
5372	device_t pcib, bus;
5373
5374	pci_class = devclass_find("pci");
5375	KASSERT(device_get_devclass(device_get_parent(dev)) == pci_class,
5376	    ("%s: non-pci device %s", __func__, device_get_nameunit(dev)));
5377
5378	/*
5379	 * Walk the bridge hierarchy until we find a PCI-e root
5380	 * port or a non-PCI device.
5381	 */
5382	for (;;) {
5383		bus = device_get_parent(dev);
5384		KASSERT(bus != NULL, ("%s: null parent of %s", __func__,
5385		    device_get_nameunit(dev)));
5386
5387		pcib = device_get_parent(bus);
5388		KASSERT(pcib != NULL, ("%s: null bridge of %s", __func__,
5389		    device_get_nameunit(bus)));
5390
5391		/*
5392		 * pcib's parent must be a PCI bus for this to be a
5393		 * PCI-PCI bridge.
5394		 */
5395		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
5396			return (NULL);
5397
5398		dinfo = device_get_ivars(pcib);
5399		if (dinfo->cfg.pcie.pcie_location != 0 &&
5400		    dinfo->cfg.pcie.pcie_type == PCIEM_TYPE_ROOT_PORT)
5401			return (pcib);
5402
5403		dev = pcib;
5404	}
5405}
5406
5407/*
5408 * Wait for pending transactions to complete on a PCI-express function.
5409 *
5410 * The maximum delay is specified in milliseconds in max_delay.  Note
5411 * that this function may sleep.
5412 *
5413 * Returns true if the function is idle and false if the timeout is
5414 * exceeded.  If dev is not a PCI-express function, this returns true.
5415 */
5416bool
5417pcie_wait_for_pending_transactions(device_t dev, u_int max_delay)
5418{
5419	struct pci_devinfo *dinfo = device_get_ivars(dev);
5420	uint16_t sta;
5421	int cap;
5422
5423	cap = dinfo->cfg.pcie.pcie_location;
5424	if (cap == 0)
5425		return (true);
5426
5427	sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5428	while (sta & PCIEM_STA_TRANSACTION_PND) {
5429		if (max_delay == 0)
5430			return (false);
5431
5432		/* Poll once every 100 milliseconds up to the timeout. */
5433		if (max_delay > 100) {
5434			pause_sbt("pcietp", 100 * SBT_1MS, 0, C_HARDCLOCK);
5435			max_delay -= 100;
5436		} else {
5437			pause_sbt("pcietp", max_delay * SBT_1MS, 0,
5438			    C_HARDCLOCK);
5439			max_delay = 0;
5440		}
5441		sta = pci_read_config(dev, cap + PCIER_DEVICE_STA, 2);
5442	}
5443
5444	return (true);
5445}
5446
5447/*
5448 * Determine the maximum Completion Timeout in microseconds.
5449 *
5450 * For non-PCI-express functions this returns 0.
5451 */
5452int
5453pcie_get_max_completion_timeout(device_t dev)
5454{
5455	struct pci_devinfo *dinfo = device_get_ivars(dev);
5456	int cap;
5457
5458	cap = dinfo->cfg.pcie.pcie_location;
5459	if (cap == 0)
5460		return (0);
5461
5462	/*
5463	 * Functions using the 1.x spec use the default timeout range of
5464	 * 50 microseconds to 50 milliseconds.  Functions that do not
5465	 * support programmable timeouts also use this range.
5466	 */
5467	if ((dinfo->cfg.pcie.pcie_flags & PCIEM_FLAGS_VERSION) < 2 ||
5468	    (pci_read_config(dev, cap + PCIER_DEVICE_CAP2, 4) &
5469	    PCIEM_CAP2_COMP_TIMO_RANGES) == 0)
5470		return (50 * 1000);
5471
5472	switch (pci_read_config(dev, cap + PCIER_DEVICE_CTL2, 2) &
5473	    PCIEM_CTL2_COMP_TIMO_VAL) {
5474	case PCIEM_CTL2_COMP_TIMO_100US:
5475		return (100);
5476	case PCIEM_CTL2_COMP_TIMO_10MS:
5477		return (10 * 1000);
5478	case PCIEM_CTL2_COMP_TIMO_55MS:
5479		return (55 * 1000);
5480	case PCIEM_CTL2_COMP_TIMO_210MS:
5481		return (210 * 1000);
5482	case PCIEM_CTL2_COMP_TIMO_900MS:
5483		return (900 * 1000);
5484	case PCIEM_CTL2_COMP_TIMO_3500MS:
5485		return (3500 * 1000);
5486	case PCIEM_CTL2_COMP_TIMO_13S:
5487		return (13 * 1000 * 1000);
5488	case PCIEM_CTL2_COMP_TIMO_64S:
5489		return (64 * 1000 * 1000);
5490	default:
5491		return (50 * 1000);
5492	}
5493}
5494
5495/*
5496 * Perform a Function Level Reset (FLR) on a device.
5497 *
5498 * This function first waits for any pending transactions to complete
5499 * within the timeout specified by max_delay.  If transactions are
5500 * still pending, the function will return false without attempting a
5501 * reset.
5502 *
5503 * If dev is not a PCI-express function or does not support FLR, this
5504 * function returns false.
5505 *
5506 * Note that no registers are saved or restored.  The caller is
5507 * responsible for saving and restoring any registers including
5508 * PCI-standard registers via pci_save_state() and
5509 * pci_restore_state().
5510 */
5511bool
5512pcie_flr(device_t dev, u_int max_delay, bool force)
5513{
5514	struct pci_devinfo *dinfo = device_get_ivars(dev);
5515	uint16_t cmd, ctl;
5516	int compl_delay;
5517	int cap;
5518
5519	cap = dinfo->cfg.pcie.pcie_location;
5520	if (cap == 0)
5521		return (false);
5522
5523	if (!(pci_read_config(dev, cap + PCIER_DEVICE_CAP, 4) & PCIEM_CAP_FLR))
5524		return (false);
5525
5526	/*
5527	 * Disable busmastering to prevent generation of new
5528	 * transactions while waiting for the device to go idle.  If
5529	 * the idle timeout fails, the command register is restored
5530	 * which will re-enable busmastering.
5531	 */
5532	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
5533	pci_write_config(dev, PCIR_COMMAND, cmd & ~(PCIM_CMD_BUSMASTEREN), 2);
5534	if (!pcie_wait_for_pending_transactions(dev, max_delay)) {
5535		if (!force) {
5536			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
5537			return (false);
5538		}
5539		pci_printf(&dinfo->cfg,
5540		    "Resetting with transactions pending after %d ms\n",
5541		    max_delay);
5542
5543		/*
5544		 * Extend the post-FLR delay to cover the maximum
5545		 * Completion Timeout delay of anything in flight
5546		 * during the FLR delay.  Enforce a minimum delay of
5547		 * at least 10ms.
5548		 */
5549		compl_delay = pcie_get_max_completion_timeout(dev) / 1000;
5550		if (compl_delay < 10)
5551			compl_delay = 10;
5552	} else
5553		compl_delay = 0;
5554
5555	/* Initiate the reset. */
5556	ctl = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
5557	pci_write_config(dev, cap + PCIER_DEVICE_CTL, ctl |
5558	    PCIEM_CTL_INITIATE_FLR, 2);
5559
5560	/* Wait for 100ms. */
5561	pause_sbt("pcieflr", (100 + compl_delay) * SBT_1MS, 0, C_HARDCLOCK);
5562
5563	if (pci_read_config(dev, cap + PCIER_DEVICE_STA, 2) &
5564	    PCIEM_STA_TRANSACTION_PND)
5565		pci_printf(&dinfo->cfg, "Transactions pending after FLR!\n");
5566	return (true);
5567}
5568