pci.c revision 284021
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/pci/pci.c 284021 2015-06-05 08:36:25Z kib $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#define	PCIR_IS_BIOS(cfg, reg)						\
74	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76
77static int		pci_has_quirk(uint32_t devid, int quirk);
78static pci_addr_t	pci_mapbase(uint64_t mapreg);
79static const char	*pci_maptype(uint64_t mapreg);
80static int		pci_mapsize(uint64_t testval);
81static int		pci_maprange(uint64_t mapreg);
82static pci_addr_t	pci_rombase(uint64_t mapreg);
83static int		pci_romsize(uint64_t testval);
84static void		pci_fixancient(pcicfgregs *cfg);
85static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86
87static int		pci_porten(device_t dev);
88static int		pci_memen(device_t dev);
89static void		pci_assign_interrupt(device_t bus, device_t dev,
90			    int force_route);
91static int		pci_add_map(device_t bus, device_t dev, int reg,
92			    struct resource_list *rl, int force, int prefetch);
93static int		pci_probe(device_t dev);
94static int		pci_attach(device_t dev);
95#ifdef PCI_RES_BUS
96static int		pci_detach(device_t dev);
97#endif
98static void		pci_load_vendor_data(void);
99static int		pci_describe_parse_line(char **ptr, int *vendor,
100			    int *device, char **desc);
101static char		*pci_describe_device(device_t dev);
102static int		pci_modevent(module_t mod, int what, void *arg);
103static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104			    pcicfgregs *cfg);
105static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107			    int reg, uint32_t *data);
108#if 0
109static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110			    int reg, uint32_t data);
111#endif
112static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113static void		pci_disable_msi(device_t dev);
114static void		pci_enable_msi(device_t dev, uint64_t address,
115			    uint16_t data);
116static void		pci_enable_msix(device_t dev, u_int index,
117			    uint64_t address, uint32_t data);
118static void		pci_mask_msix(device_t dev, u_int index);
119static void		pci_unmask_msix(device_t dev, u_int index);
120static int		pci_msi_blacklisted(void);
121static int		pci_msix_blacklisted(void);
122static void		pci_resume_msi(device_t dev);
123static void		pci_resume_msix(device_t dev);
124static int		pci_remap_intr_method(device_t bus, device_t dev,
125			    u_int irq);
126
127static uint16_t		pci_get_rid_method(device_t dev, device_t child);
128
129static device_method_t pci_methods[] = {
130	/* Device interface */
131	DEVMETHOD(device_probe,		pci_probe),
132	DEVMETHOD(device_attach,	pci_attach),
133#ifdef PCI_RES_BUS
134	DEVMETHOD(device_detach,	pci_detach),
135#else
136	DEVMETHOD(device_detach,	bus_generic_detach),
137#endif
138	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
139	DEVMETHOD(device_suspend,	pci_suspend),
140	DEVMETHOD(device_resume,	pci_resume),
141
142	/* Bus interface */
143	DEVMETHOD(bus_print_child,	pci_print_child),
144	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
145	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
146	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
147	DEVMETHOD(bus_driver_added,	pci_driver_added),
148	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
149	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
150
151	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
152	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
153	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
154	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
155	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
156	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
157	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
158	DEVMETHOD(bus_release_resource,	pci_release_resource),
159	DEVMETHOD(bus_activate_resource, pci_activate_resource),
160	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
161	DEVMETHOD(bus_child_detached,	pci_child_detached),
162	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
163	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
164	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
165
166	/* PCI interface */
167	DEVMETHOD(pci_read_config,	pci_read_config_method),
168	DEVMETHOD(pci_write_config,	pci_write_config_method),
169	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
184	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
185	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
188
189	DEVMETHOD_END
190};
191
192DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
193
194static devclass_t pci_devclass;
195DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
196MODULE_VERSION(pci, 1);
197
198static char	*pci_vendordata;
199static size_t	pci_vendordata_size;
200
201struct pci_quirk {
202	uint32_t devid;	/* Vendor/device of the card */
203	int	type;
204#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
205#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
206#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
207#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
208#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
209#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
210	int	arg1;
211	int	arg2;
212};
213
214static const struct pci_quirk pci_quirks[] = {
215	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
216	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
217	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
218	/* As does the Serverworks OSB4 (the SMBus mapping register) */
219	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220
221	/*
222	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
223	 * or the CMIC-SL (AKA ServerWorks GC_LE).
224	 */
225	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227
228	/*
229	 * MSI doesn't work on earlier Intel chipsets including
230	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
231	 */
232	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239
240	/*
241	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
242	 * bridge.
243	 */
244	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245
246	/*
247	 * MSI-X allocation doesn't work properly for devices passed through
248	 * by VMware up to at least ESXi 5.1.
249	 */
250	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
251	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
252
253	/*
254	 * Some virtualization environments emulate an older chipset
255	 * but support MSI just fine.  QEMU uses the Intel 82440.
256	 */
257	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
258
259	/*
260	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
261	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
262	 * It prevents us from attaching hpet(4) when the bit is unset.
263	 * Note this quirk only affects SB600 revision A13 and earlier.
264	 * For SB600 A21 and later, firmware must set the bit to hide it.
265	 * For SB700 and later, it is unused and hardcoded to zero.
266	 */
267	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
268
269	/*
270	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
271	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
272	 * command register is set.
273	 */
274	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
275	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
276	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
277
278	/*
279	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
280	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
281	 */
282	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
283	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
284	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
285	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
286	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
287	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
288
289	{ 0 }
290};
291
292/* map register information */
293#define	PCI_MAPMEM	0x01	/* memory map */
294#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
295#define	PCI_MAPPORT	0x04	/* port map */
296
297struct devlist pci_devq;
298uint32_t pci_generation;
299uint32_t pci_numdevs = 0;
300static int pcie_chipset, pcix_chipset;
301
302/* sysctl vars */
303SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
304
305static int pci_enable_io_modes = 1;
306TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
307SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
308    &pci_enable_io_modes, 1,
309    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
310enable these bits correctly.  We'd like to do this all the time, but there\n\
311are some peripherals that this causes problems with.");
312
313static int pci_do_realloc_bars = 0;
314TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
315SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
316    &pci_do_realloc_bars, 0,
317    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
318
319static int pci_do_power_nodriver = 0;
320TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
321SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
322    &pci_do_power_nodriver, 0,
323  "Place a function into D3 state when no driver attaches to it.  0 means\n\
324disable.  1 means conservatively place devices into D3 state.  2 means\n\
325agressively place devices into D3 state.  3 means put absolutely everything\n\
326in D3 state.");
327
328int pci_do_power_resume = 1;
329TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
330SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
331    &pci_do_power_resume, 1,
332  "Transition from D3 -> D0 on resume.");
333
334int pci_do_power_suspend = 1;
335TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
336SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
337    &pci_do_power_suspend, 1,
338  "Transition from D0 -> D3 on suspend.");
339
340static int pci_do_msi = 1;
341TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
342SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
343    "Enable support for MSI interrupts");
344
345static int pci_do_msix = 1;
346TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
347SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
348    "Enable support for MSI-X interrupts");
349
350static int pci_honor_msi_blacklist = 1;
351TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
352SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
353    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
354
355#if defined(__i386__) || defined(__amd64__)
356static int pci_usb_takeover = 1;
357#else
358static int pci_usb_takeover = 0;
359#endif
360TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
361SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
362    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
363Disable this if you depend on BIOS emulation of USB devices, that is\n\
364you use USB devices (like keyboard or mouse) but do not load USB drivers");
365
366static int pci_clear_bars;
367TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
368SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
369    "Ignore firmware-assigned resources for BARs.");
370
371#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
372static int pci_clear_buses;
373TUNABLE_INT("hw.pci.clear_buses", &pci_clear_buses);
374SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
375    "Ignore firmware-assigned bus numbers.");
376#endif
377
378static int pci_enable_ari = 1;
379TUNABLE_INT("hw.pci.enable_ari", &pci_enable_ari);
380SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
381    0, "Enable support for PCIe Alternative RID Interpretation");
382
383static int
384pci_has_quirk(uint32_t devid, int quirk)
385{
386	const struct pci_quirk *q;
387
388	for (q = &pci_quirks[0]; q->devid; q++) {
389		if (q->devid == devid && q->type == quirk)
390			return (1);
391	}
392	return (0);
393}
394
395/* Find a device_t by bus/slot/function in domain 0 */
396
397device_t
398pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
399{
400
401	return (pci_find_dbsf(0, bus, slot, func));
402}
403
404/* Find a device_t by domain/bus/slot/function */
405
406device_t
407pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
408{
409	struct pci_devinfo *dinfo;
410
411	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
412		if ((dinfo->cfg.domain == domain) &&
413		    (dinfo->cfg.bus == bus) &&
414		    (dinfo->cfg.slot == slot) &&
415		    (dinfo->cfg.func == func)) {
416			return (dinfo->cfg.dev);
417		}
418	}
419
420	return (NULL);
421}
422
423/* Find a device_t by vendor/device ID */
424
425device_t
426pci_find_device(uint16_t vendor, uint16_t device)
427{
428	struct pci_devinfo *dinfo;
429
430	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
431		if ((dinfo->cfg.vendor == vendor) &&
432		    (dinfo->cfg.device == device)) {
433			return (dinfo->cfg.dev);
434		}
435	}
436
437	return (NULL);
438}
439
440device_t
441pci_find_class(uint8_t class, uint8_t subclass)
442{
443	struct pci_devinfo *dinfo;
444
445	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
446		if (dinfo->cfg.baseclass == class &&
447		    dinfo->cfg.subclass == subclass) {
448			return (dinfo->cfg.dev);
449		}
450	}
451
452	return (NULL);
453}
454
455static int
456pci_printf(pcicfgregs *cfg, const char *fmt, ...)
457{
458	va_list ap;
459	int retval;
460
461	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
462	    cfg->func);
463	va_start(ap, fmt);
464	retval += vprintf(fmt, ap);
465	va_end(ap);
466	return (retval);
467}
468
469/* return base address of memory or port map */
470
471static pci_addr_t
472pci_mapbase(uint64_t mapreg)
473{
474
475	if (PCI_BAR_MEM(mapreg))
476		return (mapreg & PCIM_BAR_MEM_BASE);
477	else
478		return (mapreg & PCIM_BAR_IO_BASE);
479}
480
481/* return map type of memory or port map */
482
483static const char *
484pci_maptype(uint64_t mapreg)
485{
486
487	if (PCI_BAR_IO(mapreg))
488		return ("I/O Port");
489	if (mapreg & PCIM_BAR_MEM_PREFETCH)
490		return ("Prefetchable Memory");
491	return ("Memory");
492}
493
494/* return log2 of map size decoded for memory or port map */
495
496static int
497pci_mapsize(uint64_t testval)
498{
499	int ln2size;
500
501	testval = pci_mapbase(testval);
502	ln2size = 0;
503	if (testval != 0) {
504		while ((testval & 1) == 0)
505		{
506			ln2size++;
507			testval >>= 1;
508		}
509	}
510	return (ln2size);
511}
512
513/* return base address of device ROM */
514
515static pci_addr_t
516pci_rombase(uint64_t mapreg)
517{
518
519	return (mapreg & PCIM_BIOS_ADDR_MASK);
520}
521
522/* return log2 of map size decided for device ROM */
523
524static int
525pci_romsize(uint64_t testval)
526{
527	int ln2size;
528
529	testval = pci_rombase(testval);
530	ln2size = 0;
531	if (testval != 0) {
532		while ((testval & 1) == 0)
533		{
534			ln2size++;
535			testval >>= 1;
536		}
537	}
538	return (ln2size);
539}
540
541/* return log2 of address range supported by map register */
542
543static int
544pci_maprange(uint64_t mapreg)
545{
546	int ln2range = 0;
547
548	if (PCI_BAR_IO(mapreg))
549		ln2range = 32;
550	else
551		switch (mapreg & PCIM_BAR_MEM_TYPE) {
552		case PCIM_BAR_MEM_32:
553			ln2range = 32;
554			break;
555		case PCIM_BAR_MEM_1MB:
556			ln2range = 20;
557			break;
558		case PCIM_BAR_MEM_64:
559			ln2range = 64;
560			break;
561		}
562	return (ln2range);
563}
564
565/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
566
567static void
568pci_fixancient(pcicfgregs *cfg)
569{
570	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
571		return;
572
573	/* PCI to PCI bridges use header type 1 */
574	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
575		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
576}
577
578/* extract header type specific config data */
579
580static void
581pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
582{
583#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
584	switch (cfg->hdrtype & PCIM_HDRTYPE) {
585	case PCIM_HDRTYPE_NORMAL:
586		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
587		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
588		cfg->nummaps	    = PCI_MAXMAPS_0;
589		break;
590	case PCIM_HDRTYPE_BRIDGE:
591		cfg->nummaps	    = PCI_MAXMAPS_1;
592		break;
593	case PCIM_HDRTYPE_CARDBUS:
594		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
595		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
596		cfg->nummaps	    = PCI_MAXMAPS_2;
597		break;
598	}
599#undef REG
600}
601
602/* read configuration header into pcicfgregs structure */
603struct pci_devinfo *
604pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
605{
606#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
607	pcicfgregs *cfg = NULL;
608	struct pci_devinfo *devlist_entry;
609	struct devlist *devlist_head;
610
611	devlist_head = &pci_devq;
612
613	devlist_entry = NULL;
614
615	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
616		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
617		if (devlist_entry == NULL)
618			return (NULL);
619
620		cfg = &devlist_entry->cfg;
621
622		cfg->domain		= d;
623		cfg->bus		= b;
624		cfg->slot		= s;
625		cfg->func		= f;
626		cfg->vendor		= REG(PCIR_VENDOR, 2);
627		cfg->device		= REG(PCIR_DEVICE, 2);
628		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
629		cfg->statreg		= REG(PCIR_STATUS, 2);
630		cfg->baseclass		= REG(PCIR_CLASS, 1);
631		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
632		cfg->progif		= REG(PCIR_PROGIF, 1);
633		cfg->revid		= REG(PCIR_REVID, 1);
634		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
635		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
636		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
637		cfg->intpin		= REG(PCIR_INTPIN, 1);
638		cfg->intline		= REG(PCIR_INTLINE, 1);
639
640		cfg->mingnt		= REG(PCIR_MINGNT, 1);
641		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
642
643		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
644		cfg->hdrtype		&= ~PCIM_MFDEV;
645		STAILQ_INIT(&cfg->maps);
646
647		pci_fixancient(cfg);
648		pci_hdrtypedata(pcib, b, s, f, cfg);
649
650		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
651			pci_read_cap(pcib, cfg);
652
653		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
654
655		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
656		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
657		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
658		devlist_entry->conf.pc_sel.pc_func = cfg->func;
659		devlist_entry->conf.pc_hdr = cfg->hdrtype;
660
661		devlist_entry->conf.pc_subvendor = cfg->subvendor;
662		devlist_entry->conf.pc_subdevice = cfg->subdevice;
663		devlist_entry->conf.pc_vendor = cfg->vendor;
664		devlist_entry->conf.pc_device = cfg->device;
665
666		devlist_entry->conf.pc_class = cfg->baseclass;
667		devlist_entry->conf.pc_subclass = cfg->subclass;
668		devlist_entry->conf.pc_progif = cfg->progif;
669		devlist_entry->conf.pc_revid = cfg->revid;
670
671		pci_numdevs++;
672		pci_generation++;
673	}
674	return (devlist_entry);
675#undef REG
676}
677
678static void
679pci_read_cap(device_t pcib, pcicfgregs *cfg)
680{
681#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
682#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
683#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
684	uint64_t addr;
685#endif
686	uint32_t val;
687	int	ptr, nextptr, ptrptr;
688
689	switch (cfg->hdrtype & PCIM_HDRTYPE) {
690	case PCIM_HDRTYPE_NORMAL:
691	case PCIM_HDRTYPE_BRIDGE:
692		ptrptr = PCIR_CAP_PTR;
693		break;
694	case PCIM_HDRTYPE_CARDBUS:
695		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
696		break;
697	default:
698		return;		/* no extended capabilities support */
699	}
700	nextptr = REG(ptrptr, 1);	/* sanity check? */
701
702	/*
703	 * Read capability entries.
704	 */
705	while (nextptr != 0) {
706		/* Sanity check */
707		if (nextptr > 255) {
708			printf("illegal PCI extended capability offset %d\n",
709			    nextptr);
710			return;
711		}
712		/* Find the next entry */
713		ptr = nextptr;
714		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
715
716		/* Process this entry */
717		switch (REG(ptr + PCICAP_ID, 1)) {
718		case PCIY_PMG:		/* PCI power management */
719			if (cfg->pp.pp_cap == 0) {
720				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
721				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
722				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
723				if ((nextptr - ptr) > PCIR_POWER_DATA)
724					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
725			}
726			break;
727		case PCIY_HT:		/* HyperTransport */
728			/* Determine HT-specific capability type. */
729			val = REG(ptr + PCIR_HT_COMMAND, 2);
730
731			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
732				cfg->ht.ht_slave = ptr;
733
734#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
735			switch (val & PCIM_HTCMD_CAP_MASK) {
736			case PCIM_HTCAP_MSI_MAPPING:
737				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
738					/* Sanity check the mapping window. */
739					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
740					    4);
741					addr <<= 32;
742					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
743					    4);
744					if (addr != MSI_INTEL_ADDR_BASE)
745						device_printf(pcib,
746	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
747						    cfg->domain, cfg->bus,
748						    cfg->slot, cfg->func,
749						    (long long)addr);
750				} else
751					addr = MSI_INTEL_ADDR_BASE;
752
753				cfg->ht.ht_msimap = ptr;
754				cfg->ht.ht_msictrl = val;
755				cfg->ht.ht_msiaddr = addr;
756				break;
757			}
758#endif
759			break;
760		case PCIY_MSI:		/* PCI MSI */
761			cfg->msi.msi_location = ptr;
762			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
763			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
764						     PCIM_MSICTRL_MMC_MASK)>>1);
765			break;
766		case PCIY_MSIX:		/* PCI MSI-X */
767			cfg->msix.msix_location = ptr;
768			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
769			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
770			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
771			val = REG(ptr + PCIR_MSIX_TABLE, 4);
772			cfg->msix.msix_table_bar = PCIR_BAR(val &
773			    PCIM_MSIX_BIR_MASK);
774			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
775			val = REG(ptr + PCIR_MSIX_PBA, 4);
776			cfg->msix.msix_pba_bar = PCIR_BAR(val &
777			    PCIM_MSIX_BIR_MASK);
778			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
779			break;
780		case PCIY_VPD:		/* PCI Vital Product Data */
781			cfg->vpd.vpd_reg = ptr;
782			break;
783		case PCIY_SUBVENDOR:
784			/* Should always be true. */
785			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
786			    PCIM_HDRTYPE_BRIDGE) {
787				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
788				cfg->subvendor = val & 0xffff;
789				cfg->subdevice = val >> 16;
790			}
791			break;
792		case PCIY_PCIX:		/* PCI-X */
793			/*
794			 * Assume we have a PCI-X chipset if we have
795			 * at least one PCI-PCI bridge with a PCI-X
796			 * capability.  Note that some systems with
797			 * PCI-express or HT chipsets might match on
798			 * this check as well.
799			 */
800			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
801			    PCIM_HDRTYPE_BRIDGE)
802				pcix_chipset = 1;
803			cfg->pcix.pcix_location = ptr;
804			break;
805		case PCIY_EXPRESS:	/* PCI-express */
806			/*
807			 * Assume we have a PCI-express chipset if we have
808			 * at least one PCI-express device.
809			 */
810			pcie_chipset = 1;
811			cfg->pcie.pcie_location = ptr;
812			val = REG(ptr + PCIER_FLAGS, 2);
813			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
814			break;
815		default:
816			break;
817		}
818	}
819
820#if defined(__powerpc__)
821	/*
822	 * Enable the MSI mapping window for all HyperTransport
823	 * slaves.  PCI-PCI bridges have their windows enabled via
824	 * PCIB_MAP_MSI().
825	 */
826	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
827	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
828		device_printf(pcib,
829	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
830		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
831		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
832		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
833		     2);
834	}
835#endif
836/* REG and WREG use carry through to next functions */
837}
838
839/*
840 * PCI Vital Product Data
841 */
842
843#define	PCI_VPD_TIMEOUT		1000000
844
845static int
846pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
847{
848	int count = PCI_VPD_TIMEOUT;
849
850	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
851
852	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
853
854	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
855		if (--count < 0)
856			return (ENXIO);
857		DELAY(1);	/* limit looping */
858	}
859	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
860
861	return (0);
862}
863
864#if 0
865static int
866pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
867{
868	int count = PCI_VPD_TIMEOUT;
869
870	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
871
872	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
873	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
874	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
875		if (--count < 0)
876			return (ENXIO);
877		DELAY(1);	/* limit looping */
878	}
879
880	return (0);
881}
882#endif
883
884#undef PCI_VPD_TIMEOUT
885
886struct vpd_readstate {
887	device_t	pcib;
888	pcicfgregs	*cfg;
889	uint32_t	val;
890	int		bytesinval;
891	int		off;
892	uint8_t		cksum;
893};
894
895static int
896vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
897{
898	uint32_t reg;
899	uint8_t byte;
900
901	if (vrs->bytesinval == 0) {
902		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
903			return (ENXIO);
904		vrs->val = le32toh(reg);
905		vrs->off += 4;
906		byte = vrs->val & 0xff;
907		vrs->bytesinval = 3;
908	} else {
909		vrs->val = vrs->val >> 8;
910		byte = vrs->val & 0xff;
911		vrs->bytesinval--;
912	}
913
914	vrs->cksum += byte;
915	*data = byte;
916	return (0);
917}
918
919static void
920pci_read_vpd(device_t pcib, pcicfgregs *cfg)
921{
922	struct vpd_readstate vrs;
923	int state;
924	int name;
925	int remain;
926	int i;
927	int alloc, off;		/* alloc/off for RO/W arrays */
928	int cksumvalid;
929	int dflen;
930	uint8_t byte;
931	uint8_t byte2;
932
933	/* init vpd reader */
934	vrs.bytesinval = 0;
935	vrs.off = 0;
936	vrs.pcib = pcib;
937	vrs.cfg = cfg;
938	vrs.cksum = 0;
939
940	state = 0;
941	name = remain = i = 0;	/* shut up stupid gcc */
942	alloc = off = 0;	/* shut up stupid gcc */
943	dflen = 0;		/* shut up stupid gcc */
944	cksumvalid = -1;
945	while (state >= 0) {
946		if (vpd_nextbyte(&vrs, &byte)) {
947			state = -2;
948			break;
949		}
950#if 0
951		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
952		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
953		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
954#endif
955		switch (state) {
956		case 0:		/* item name */
957			if (byte & 0x80) {
958				if (vpd_nextbyte(&vrs, &byte2)) {
959					state = -2;
960					break;
961				}
962				remain = byte2;
963				if (vpd_nextbyte(&vrs, &byte2)) {
964					state = -2;
965					break;
966				}
967				remain |= byte2 << 8;
968				if (remain > (0x7f*4 - vrs.off)) {
969					state = -1;
970					pci_printf(cfg,
971					    "invalid VPD data, remain %#x\n",
972					    remain);
973				}
974				name = byte & 0x7f;
975			} else {
976				remain = byte & 0x7;
977				name = (byte >> 3) & 0xf;
978			}
979			switch (name) {
980			case 0x2:	/* String */
981				cfg->vpd.vpd_ident = malloc(remain + 1,
982				    M_DEVBUF, M_WAITOK);
983				i = 0;
984				state = 1;
985				break;
986			case 0xf:	/* End */
987				state = -1;
988				break;
989			case 0x10:	/* VPD-R */
990				alloc = 8;
991				off = 0;
992				cfg->vpd.vpd_ros = malloc(alloc *
993				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
994				    M_WAITOK | M_ZERO);
995				state = 2;
996				break;
997			case 0x11:	/* VPD-W */
998				alloc = 8;
999				off = 0;
1000				cfg->vpd.vpd_w = malloc(alloc *
1001				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1002				    M_WAITOK | M_ZERO);
1003				state = 5;
1004				break;
1005			default:	/* Invalid data, abort */
1006				state = -1;
1007				break;
1008			}
1009			break;
1010
1011		case 1:	/* Identifier String */
1012			cfg->vpd.vpd_ident[i++] = byte;
1013			remain--;
1014			if (remain == 0)  {
1015				cfg->vpd.vpd_ident[i] = '\0';
1016				state = 0;
1017			}
1018			break;
1019
1020		case 2:	/* VPD-R Keyword Header */
1021			if (off == alloc) {
1022				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1023				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1024				    M_DEVBUF, M_WAITOK | M_ZERO);
1025			}
1026			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1027			if (vpd_nextbyte(&vrs, &byte2)) {
1028				state = -2;
1029				break;
1030			}
1031			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1032			if (vpd_nextbyte(&vrs, &byte2)) {
1033				state = -2;
1034				break;
1035			}
1036			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1037			if (dflen == 0 &&
1038			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1039			    2) == 0) {
1040				/*
1041				 * if this happens, we can't trust the rest
1042				 * of the VPD.
1043				 */
1044				pci_printf(cfg, "bad keyword length: %d\n",
1045				    dflen);
1046				cksumvalid = 0;
1047				state = -1;
1048				break;
1049			} else if (dflen == 0) {
1050				cfg->vpd.vpd_ros[off].value = malloc(1 *
1051				    sizeof(*cfg->vpd.vpd_ros[off].value),
1052				    M_DEVBUF, M_WAITOK);
1053				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1054			} else
1055				cfg->vpd.vpd_ros[off].value = malloc(
1056				    (dflen + 1) *
1057				    sizeof(*cfg->vpd.vpd_ros[off].value),
1058				    M_DEVBUF, M_WAITOK);
1059			remain -= 3;
1060			i = 0;
1061			/* keep in sync w/ state 3's transistions */
1062			if (dflen == 0 && remain == 0)
1063				state = 0;
1064			else if (dflen == 0)
1065				state = 2;
1066			else
1067				state = 3;
1068			break;
1069
1070		case 3:	/* VPD-R Keyword Value */
1071			cfg->vpd.vpd_ros[off].value[i++] = byte;
1072			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1073			    "RV", 2) == 0 && cksumvalid == -1) {
1074				if (vrs.cksum == 0)
1075					cksumvalid = 1;
1076				else {
1077					if (bootverbose)
1078						pci_printf(cfg,
1079					    "bad VPD cksum, remain %hhu\n",
1080						    vrs.cksum);
1081					cksumvalid = 0;
1082					state = -1;
1083					break;
1084				}
1085			}
1086			dflen--;
1087			remain--;
1088			/* keep in sync w/ state 2's transistions */
1089			if (dflen == 0)
1090				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1091			if (dflen == 0 && remain == 0) {
1092				cfg->vpd.vpd_rocnt = off;
1093				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1094				    off * sizeof(*cfg->vpd.vpd_ros),
1095				    M_DEVBUF, M_WAITOK | M_ZERO);
1096				state = 0;
1097			} else if (dflen == 0)
1098				state = 2;
1099			break;
1100
1101		case 4:
1102			remain--;
1103			if (remain == 0)
1104				state = 0;
1105			break;
1106
1107		case 5:	/* VPD-W Keyword Header */
1108			if (off == alloc) {
1109				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1110				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1111				    M_DEVBUF, M_WAITOK | M_ZERO);
1112			}
1113			cfg->vpd.vpd_w[off].keyword[0] = byte;
1114			if (vpd_nextbyte(&vrs, &byte2)) {
1115				state = -2;
1116				break;
1117			}
1118			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1119			if (vpd_nextbyte(&vrs, &byte2)) {
1120				state = -2;
1121				break;
1122			}
1123			cfg->vpd.vpd_w[off].len = dflen = byte2;
1124			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1125			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1126			    sizeof(*cfg->vpd.vpd_w[off].value),
1127			    M_DEVBUF, M_WAITOK);
1128			remain -= 3;
1129			i = 0;
1130			/* keep in sync w/ state 6's transistions */
1131			if (dflen == 0 && remain == 0)
1132				state = 0;
1133			else if (dflen == 0)
1134				state = 5;
1135			else
1136				state = 6;
1137			break;
1138
1139		case 6:	/* VPD-W Keyword Value */
1140			cfg->vpd.vpd_w[off].value[i++] = byte;
1141			dflen--;
1142			remain--;
1143			/* keep in sync w/ state 5's transistions */
1144			if (dflen == 0)
1145				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1146			if (dflen == 0 && remain == 0) {
1147				cfg->vpd.vpd_wcnt = off;
1148				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1149				    off * sizeof(*cfg->vpd.vpd_w),
1150				    M_DEVBUF, M_WAITOK | M_ZERO);
1151				state = 0;
1152			} else if (dflen == 0)
1153				state = 5;
1154			break;
1155
1156		default:
1157			pci_printf(cfg, "invalid state: %d\n", state);
1158			state = -1;
1159			break;
1160		}
1161	}
1162
1163	if (cksumvalid == 0 || state < -1) {
1164		/* read-only data bad, clean up */
1165		if (cfg->vpd.vpd_ros != NULL) {
1166			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1167				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1168			free(cfg->vpd.vpd_ros, M_DEVBUF);
1169			cfg->vpd.vpd_ros = NULL;
1170		}
1171	}
1172	if (state < -1) {
1173		/* I/O error, clean up */
1174		pci_printf(cfg, "failed to read VPD data.\n");
1175		if (cfg->vpd.vpd_ident != NULL) {
1176			free(cfg->vpd.vpd_ident, M_DEVBUF);
1177			cfg->vpd.vpd_ident = NULL;
1178		}
1179		if (cfg->vpd.vpd_w != NULL) {
1180			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1181				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1182			free(cfg->vpd.vpd_w, M_DEVBUF);
1183			cfg->vpd.vpd_w = NULL;
1184		}
1185	}
1186	cfg->vpd.vpd_cached = 1;
1187#undef REG
1188#undef WREG
1189}
1190
1191int
1192pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1193{
1194	struct pci_devinfo *dinfo = device_get_ivars(child);
1195	pcicfgregs *cfg = &dinfo->cfg;
1196
1197	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1198		pci_read_vpd(device_get_parent(dev), cfg);
1199
1200	*identptr = cfg->vpd.vpd_ident;
1201
1202	if (*identptr == NULL)
1203		return (ENXIO);
1204
1205	return (0);
1206}
1207
1208int
1209pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1210	const char **vptr)
1211{
1212	struct pci_devinfo *dinfo = device_get_ivars(child);
1213	pcicfgregs *cfg = &dinfo->cfg;
1214	int i;
1215
1216	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1217		pci_read_vpd(device_get_parent(dev), cfg);
1218
1219	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1220		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1221		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1222			*vptr = cfg->vpd.vpd_ros[i].value;
1223			return (0);
1224		}
1225
1226	*vptr = NULL;
1227	return (ENXIO);
1228}
1229
1230struct pcicfg_vpd *
1231pci_fetch_vpd_list(device_t dev)
1232{
1233	struct pci_devinfo *dinfo = device_get_ivars(dev);
1234	pcicfgregs *cfg = &dinfo->cfg;
1235
1236	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1237		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1238	return (&cfg->vpd);
1239}
1240
1241/*
1242 * Find the requested HyperTransport capability and return the offset
1243 * in configuration space via the pointer provided.  The function
1244 * returns 0 on success and an error code otherwise.
1245 */
1246int
1247pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1248{
1249	int ptr, error;
1250	uint16_t val;
1251
1252	error = pci_find_cap(child, PCIY_HT, &ptr);
1253	if (error)
1254		return (error);
1255
1256	/*
1257	 * Traverse the capabilities list checking each HT capability
1258	 * to see if it matches the requested HT capability.
1259	 */
1260	while (ptr != 0) {
1261		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1262		if (capability == PCIM_HTCAP_SLAVE ||
1263		    capability == PCIM_HTCAP_HOST)
1264			val &= 0xe000;
1265		else
1266			val &= PCIM_HTCMD_CAP_MASK;
1267		if (val == capability) {
1268			if (capreg != NULL)
1269				*capreg = ptr;
1270			return (0);
1271		}
1272
1273		/* Skip to the next HT capability. */
1274		while (ptr != 0) {
1275			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1276			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1277			    PCIY_HT)
1278				break;
1279		}
1280	}
1281	return (ENOENT);
1282}
1283
1284/*
1285 * Find the requested capability and return the offset in
1286 * configuration space via the pointer provided.  The function returns
1287 * 0 on success and an error code otherwise.
1288 */
1289int
1290pci_find_cap_method(device_t dev, device_t child, int capability,
1291    int *capreg)
1292{
1293	struct pci_devinfo *dinfo = device_get_ivars(child);
1294	pcicfgregs *cfg = &dinfo->cfg;
1295	u_int32_t status;
1296	u_int8_t ptr;
1297
1298	/*
1299	 * Check the CAP_LIST bit of the PCI status register first.
1300	 */
1301	status = pci_read_config(child, PCIR_STATUS, 2);
1302	if (!(status & PCIM_STATUS_CAPPRESENT))
1303		return (ENXIO);
1304
1305	/*
1306	 * Determine the start pointer of the capabilities list.
1307	 */
1308	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1309	case PCIM_HDRTYPE_NORMAL:
1310	case PCIM_HDRTYPE_BRIDGE:
1311		ptr = PCIR_CAP_PTR;
1312		break;
1313	case PCIM_HDRTYPE_CARDBUS:
1314		ptr = PCIR_CAP_PTR_2;
1315		break;
1316	default:
1317		/* XXX: panic? */
1318		return (ENXIO);		/* no extended capabilities support */
1319	}
1320	ptr = pci_read_config(child, ptr, 1);
1321
1322	/*
1323	 * Traverse the capabilities list.
1324	 */
1325	while (ptr != 0) {
1326		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1327			if (capreg != NULL)
1328				*capreg = ptr;
1329			return (0);
1330		}
1331		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1332	}
1333
1334	return (ENOENT);
1335}
1336
1337/*
1338 * Find the requested extended capability and return the offset in
1339 * configuration space via the pointer provided.  The function returns
1340 * 0 on success and an error code otherwise.
1341 */
1342int
1343pci_find_extcap_method(device_t dev, device_t child, int capability,
1344    int *capreg)
1345{
1346	struct pci_devinfo *dinfo = device_get_ivars(child);
1347	pcicfgregs *cfg = &dinfo->cfg;
1348	uint32_t ecap;
1349	uint16_t ptr;
1350
1351	/* Only supported for PCI-express devices. */
1352	if (cfg->pcie.pcie_location == 0)
1353		return (ENXIO);
1354
1355	ptr = PCIR_EXTCAP;
1356	ecap = pci_read_config(child, ptr, 4);
1357	if (ecap == 0xffffffff || ecap == 0)
1358		return (ENOENT);
1359	for (;;) {
1360		if (PCI_EXTCAP_ID(ecap) == capability) {
1361			if (capreg != NULL)
1362				*capreg = ptr;
1363			return (0);
1364		}
1365		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1366		if (ptr == 0)
1367			break;
1368		ecap = pci_read_config(child, ptr, 4);
1369	}
1370
1371	return (ENOENT);
1372}
1373
1374/*
1375 * Support for MSI-X message interrupts.
1376 */
1377void
1378pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1379{
1380	struct pci_devinfo *dinfo = device_get_ivars(dev);
1381	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1382	uint32_t offset;
1383
1384	KASSERT(msix->msix_table_len > index, ("bogus index"));
1385	offset = msix->msix_table_offset + index * 16;
1386	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1387	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1388	bus_write_4(msix->msix_table_res, offset + 8, data);
1389
1390	/* Enable MSI -> HT mapping. */
1391	pci_ht_map_msi(dev, address);
1392}
1393
1394void
1395pci_mask_msix(device_t dev, u_int index)
1396{
1397	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399	uint32_t offset, val;
1400
1401	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1402	offset = msix->msix_table_offset + index * 16 + 12;
1403	val = bus_read_4(msix->msix_table_res, offset);
1404	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1405		val |= PCIM_MSIX_VCTRL_MASK;
1406		bus_write_4(msix->msix_table_res, offset, val);
1407	}
1408}
1409
1410void
1411pci_unmask_msix(device_t dev, u_int index)
1412{
1413	struct pci_devinfo *dinfo = device_get_ivars(dev);
1414	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1415	uint32_t offset, val;
1416
1417	KASSERT(msix->msix_table_len > index, ("bogus index"));
1418	offset = msix->msix_table_offset + index * 16 + 12;
1419	val = bus_read_4(msix->msix_table_res, offset);
1420	if (val & PCIM_MSIX_VCTRL_MASK) {
1421		val &= ~PCIM_MSIX_VCTRL_MASK;
1422		bus_write_4(msix->msix_table_res, offset, val);
1423	}
1424}
1425
1426int
1427pci_pending_msix(device_t dev, u_int index)
1428{
1429	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431	uint32_t offset, bit;
1432
1433	KASSERT(msix->msix_table_len > index, ("bogus index"));
1434	offset = msix->msix_pba_offset + (index / 32) * 4;
1435	bit = 1 << index % 32;
1436	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1437}
1438
1439/*
1440 * Restore MSI-X registers and table during resume.  If MSI-X is
1441 * enabled then walk the virtual table to restore the actual MSI-X
1442 * table.
1443 */
1444static void
1445pci_resume_msix(device_t dev)
1446{
1447	struct pci_devinfo *dinfo = device_get_ivars(dev);
1448	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1449	struct msix_table_entry *mte;
1450	struct msix_vector *mv;
1451	int i;
1452
1453	if (msix->msix_alloc > 0) {
1454		/* First, mask all vectors. */
1455		for (i = 0; i < msix->msix_msgnum; i++)
1456			pci_mask_msix(dev, i);
1457
1458		/* Second, program any messages with at least one handler. */
1459		for (i = 0; i < msix->msix_table_len; i++) {
1460			mte = &msix->msix_table[i];
1461			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1462				continue;
1463			mv = &msix->msix_vectors[mte->mte_vector - 1];
1464			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1465			pci_unmask_msix(dev, i);
1466		}
1467	}
1468	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1469	    msix->msix_ctrl, 2);
1470}
1471
1472/*
1473 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1474 * returned in *count.  After this function returns, each message will be
1475 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1476 */
1477int
1478pci_alloc_msix_method(device_t dev, device_t child, int *count)
1479{
1480	struct pci_devinfo *dinfo = device_get_ivars(child);
1481	pcicfgregs *cfg = &dinfo->cfg;
1482	struct resource_list_entry *rle;
1483	int actual, error, i, irq, max;
1484
1485	/* Don't let count == 0 get us into trouble. */
1486	if (*count == 0)
1487		return (EINVAL);
1488
1489	/* If rid 0 is allocated, then fail. */
1490	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1491	if (rle != NULL && rle->res != NULL)
1492		return (ENXIO);
1493
1494	/* Already have allocated messages? */
1495	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1496		return (ENXIO);
1497
1498	/* If MSI-X is blacklisted for this system, fail. */
1499	if (pci_msix_blacklisted())
1500		return (ENXIO);
1501
1502	/* MSI-X capability present? */
1503	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1504		return (ENODEV);
1505
1506	/* Make sure the appropriate BARs are mapped. */
1507	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1508	    cfg->msix.msix_table_bar);
1509	if (rle == NULL || rle->res == NULL ||
1510	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1511		return (ENXIO);
1512	cfg->msix.msix_table_res = rle->res;
1513	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1514		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1515		    cfg->msix.msix_pba_bar);
1516		if (rle == NULL || rle->res == NULL ||
1517		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1518			return (ENXIO);
1519	}
1520	cfg->msix.msix_pba_res = rle->res;
1521
1522	if (bootverbose)
1523		device_printf(child,
1524		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1525		    *count, cfg->msix.msix_msgnum);
1526	max = min(*count, cfg->msix.msix_msgnum);
1527	for (i = 0; i < max; i++) {
1528		/* Allocate a message. */
1529		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1530		if (error) {
1531			if (i == 0)
1532				return (error);
1533			break;
1534		}
1535		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1536		    irq, 1);
1537	}
1538	actual = i;
1539
1540	if (bootverbose) {
1541		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1542		if (actual == 1)
1543			device_printf(child, "using IRQ %lu for MSI-X\n",
1544			    rle->start);
1545		else {
1546			int run;
1547
1548			/*
1549			 * Be fancy and try to print contiguous runs of
1550			 * IRQ values as ranges.  'irq' is the previous IRQ.
1551			 * 'run' is true if we are in a range.
1552			 */
1553			device_printf(child, "using IRQs %lu", rle->start);
1554			irq = rle->start;
1555			run = 0;
1556			for (i = 1; i < actual; i++) {
1557				rle = resource_list_find(&dinfo->resources,
1558				    SYS_RES_IRQ, i + 1);
1559
1560				/* Still in a run? */
1561				if (rle->start == irq + 1) {
1562					run = 1;
1563					irq++;
1564					continue;
1565				}
1566
1567				/* Finish previous range. */
1568				if (run) {
1569					printf("-%d", irq);
1570					run = 0;
1571				}
1572
1573				/* Start new range. */
1574				printf(",%lu", rle->start);
1575				irq = rle->start;
1576			}
1577
1578			/* Unfinished range? */
1579			if (run)
1580				printf("-%d", irq);
1581			printf(" for MSI-X\n");
1582		}
1583	}
1584
1585	/* Mask all vectors. */
1586	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1587		pci_mask_msix(child, i);
1588
1589	/* Allocate and initialize vector data and virtual table. */
1590	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1591	    M_DEVBUF, M_WAITOK | M_ZERO);
1592	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1593	    M_DEVBUF, M_WAITOK | M_ZERO);
1594	for (i = 0; i < actual; i++) {
1595		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1597		cfg->msix.msix_table[i].mte_vector = i + 1;
1598	}
1599
1600	/* Update control register to enable MSI-X. */
1601	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1602	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1603	    cfg->msix.msix_ctrl, 2);
1604
1605	/* Update counts of alloc'd messages. */
1606	cfg->msix.msix_alloc = actual;
1607	cfg->msix.msix_table_len = actual;
1608	*count = actual;
1609	return (0);
1610}
1611
1612/*
1613 * By default, pci_alloc_msix() will assign the allocated IRQ
1614 * resources consecutively to the first N messages in the MSI-X table.
1615 * However, device drivers may want to use different layouts if they
1616 * either receive fewer messages than they asked for, or they wish to
1617 * populate the MSI-X table sparsely.  This method allows the driver
1618 * to specify what layout it wants.  It must be called after a
1619 * successful pci_alloc_msix() but before any of the associated
1620 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1621 *
1622 * The 'vectors' array contains 'count' message vectors.  The array
1623 * maps directly to the MSI-X table in that index 0 in the array
1624 * specifies the vector for the first message in the MSI-X table, etc.
1625 * The vector value in each array index can either be 0 to indicate
1626 * that no vector should be assigned to a message slot, or it can be a
1627 * number from 1 to N (where N is the count returned from a
1628 * succcessful call to pci_alloc_msix()) to indicate which message
1629 * vector (IRQ) to be used for the corresponding message.
1630 *
1631 * On successful return, each message with a non-zero vector will have
1632 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1633 * 1.  Additionally, if any of the IRQs allocated via the previous
1634 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1635 * will be freed back to the system automatically.
1636 *
1637 * For example, suppose a driver has a MSI-X table with 6 messages and
1638 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1639 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1640 * C.  After the call to pci_alloc_msix(), the device will be setup to
1641 * have an MSI-X table of ABC--- (where - means no vector assigned).
1642 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1643 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1644 * be freed back to the system.  This device will also have valid
1645 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1646 *
1647 * In any case, the SYS_RES_IRQ rid X will always map to the message
1648 * at MSI-X table index X - 1 and will only be valid if a vector is
1649 * assigned to that table entry.
1650 */
1651int
1652pci_remap_msix_method(device_t dev, device_t child, int count,
1653    const u_int *vectors)
1654{
1655	struct pci_devinfo *dinfo = device_get_ivars(child);
1656	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1657	struct resource_list_entry *rle;
1658	int i, irq, j, *used;
1659
1660	/*
1661	 * Have to have at least one message in the table but the
1662	 * table can't be bigger than the actual MSI-X table in the
1663	 * device.
1664	 */
1665	if (count == 0 || count > msix->msix_msgnum)
1666		return (EINVAL);
1667
1668	/* Sanity check the vectors. */
1669	for (i = 0; i < count; i++)
1670		if (vectors[i] > msix->msix_alloc)
1671			return (EINVAL);
1672
1673	/*
1674	 * Make sure there aren't any holes in the vectors to be used.
1675	 * It's a big pain to support it, and it doesn't really make
1676	 * sense anyway.  Also, at least one vector must be used.
1677	 */
1678	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1679	    M_ZERO);
1680	for (i = 0; i < count; i++)
1681		if (vectors[i] != 0)
1682			used[vectors[i] - 1] = 1;
1683	for (i = 0; i < msix->msix_alloc - 1; i++)
1684		if (used[i] == 0 && used[i + 1] == 1) {
1685			free(used, M_DEVBUF);
1686			return (EINVAL);
1687		}
1688	if (used[0] != 1) {
1689		free(used, M_DEVBUF);
1690		return (EINVAL);
1691	}
1692
1693	/* Make sure none of the resources are allocated. */
1694	for (i = 0; i < msix->msix_table_len; i++) {
1695		if (msix->msix_table[i].mte_vector == 0)
1696			continue;
1697		if (msix->msix_table[i].mte_handlers > 0)
1698			return (EBUSY);
1699		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1700		KASSERT(rle != NULL, ("missing resource"));
1701		if (rle->res != NULL)
1702			return (EBUSY);
1703	}
1704
1705	/* Free the existing resource list entries. */
1706	for (i = 0; i < msix->msix_table_len; i++) {
1707		if (msix->msix_table[i].mte_vector == 0)
1708			continue;
1709		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1710	}
1711
1712	/*
1713	 * Build the new virtual table keeping track of which vectors are
1714	 * used.
1715	 */
1716	free(msix->msix_table, M_DEVBUF);
1717	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1718	    M_DEVBUF, M_WAITOK | M_ZERO);
1719	for (i = 0; i < count; i++)
1720		msix->msix_table[i].mte_vector = vectors[i];
1721	msix->msix_table_len = count;
1722
1723	/* Free any unused IRQs and resize the vectors array if necessary. */
1724	j = msix->msix_alloc - 1;
1725	if (used[j] == 0) {
1726		struct msix_vector *vec;
1727
1728		while (used[j] == 0) {
1729			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1730			    msix->msix_vectors[j].mv_irq);
1731			j--;
1732		}
1733		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1734		    M_WAITOK);
1735		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1736		    (j + 1));
1737		free(msix->msix_vectors, M_DEVBUF);
1738		msix->msix_vectors = vec;
1739		msix->msix_alloc = j + 1;
1740	}
1741	free(used, M_DEVBUF);
1742
1743	/* Map the IRQs onto the rids. */
1744	for (i = 0; i < count; i++) {
1745		if (vectors[i] == 0)
1746			continue;
1747		irq = msix->msix_vectors[vectors[i]].mv_irq;
1748		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1749		    irq, 1);
1750	}
1751
1752	if (bootverbose) {
1753		device_printf(child, "Remapped MSI-X IRQs as: ");
1754		for (i = 0; i < count; i++) {
1755			if (i != 0)
1756				printf(", ");
1757			if (vectors[i] == 0)
1758				printf("---");
1759			else
1760				printf("%d",
1761				    msix->msix_vectors[vectors[i]].mv_irq);
1762		}
1763		printf("\n");
1764	}
1765
1766	return (0);
1767}
1768
1769static int
1770pci_release_msix(device_t dev, device_t child)
1771{
1772	struct pci_devinfo *dinfo = device_get_ivars(child);
1773	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1774	struct resource_list_entry *rle;
1775	int i;
1776
1777	/* Do we have any messages to release? */
1778	if (msix->msix_alloc == 0)
1779		return (ENODEV);
1780
1781	/* Make sure none of the resources are allocated. */
1782	for (i = 0; i < msix->msix_table_len; i++) {
1783		if (msix->msix_table[i].mte_vector == 0)
1784			continue;
1785		if (msix->msix_table[i].mte_handlers > 0)
1786			return (EBUSY);
1787		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1788		KASSERT(rle != NULL, ("missing resource"));
1789		if (rle->res != NULL)
1790			return (EBUSY);
1791	}
1792
1793	/* Update control register to disable MSI-X. */
1794	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1795	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1796	    msix->msix_ctrl, 2);
1797
1798	/* Free the resource list entries. */
1799	for (i = 0; i < msix->msix_table_len; i++) {
1800		if (msix->msix_table[i].mte_vector == 0)
1801			continue;
1802		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1803	}
1804	free(msix->msix_table, M_DEVBUF);
1805	msix->msix_table_len = 0;
1806
1807	/* Release the IRQs. */
1808	for (i = 0; i < msix->msix_alloc; i++)
1809		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1810		    msix->msix_vectors[i].mv_irq);
1811	free(msix->msix_vectors, M_DEVBUF);
1812	msix->msix_alloc = 0;
1813	return (0);
1814}
1815
1816/*
1817 * Return the max supported MSI-X messages this device supports.
1818 * Basically, assuming the MD code can alloc messages, this function
1819 * should return the maximum value that pci_alloc_msix() can return.
1820 * Thus, it is subject to the tunables, etc.
1821 */
1822int
1823pci_msix_count_method(device_t dev, device_t child)
1824{
1825	struct pci_devinfo *dinfo = device_get_ivars(child);
1826	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1827
1828	if (pci_do_msix && msix->msix_location != 0)
1829		return (msix->msix_msgnum);
1830	return (0);
1831}
1832
1833/*
1834 * HyperTransport MSI mapping control
1835 */
1836void
1837pci_ht_map_msi(device_t dev, uint64_t addr)
1838{
1839	struct pci_devinfo *dinfo = device_get_ivars(dev);
1840	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1841
1842	if (!ht->ht_msimap)
1843		return;
1844
1845	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1846	    ht->ht_msiaddr >> 20 == addr >> 20) {
1847		/* Enable MSI -> HT mapping. */
1848		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1849		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1850		    ht->ht_msictrl, 2);
1851	}
1852
1853	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1854		/* Disable MSI -> HT mapping. */
1855		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1856		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1857		    ht->ht_msictrl, 2);
1858	}
1859}
1860
1861int
1862pci_get_max_read_req(device_t dev)
1863{
1864	struct pci_devinfo *dinfo = device_get_ivars(dev);
1865	int cap;
1866	uint16_t val;
1867
1868	cap = dinfo->cfg.pcie.pcie_location;
1869	if (cap == 0)
1870		return (0);
1871	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1872	val &= PCIEM_CTL_MAX_READ_REQUEST;
1873	val >>= 12;
1874	return (1 << (val + 7));
1875}
1876
1877int
1878pci_set_max_read_req(device_t dev, int size)
1879{
1880	struct pci_devinfo *dinfo = device_get_ivars(dev);
1881	int cap;
1882	uint16_t val;
1883
1884	cap = dinfo->cfg.pcie.pcie_location;
1885	if (cap == 0)
1886		return (0);
1887	if (size < 128)
1888		size = 128;
1889	if (size > 4096)
1890		size = 4096;
1891	size = (1 << (fls(size) - 1));
1892	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1893	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1894	val |= (fls(size) - 8) << 12;
1895	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1896	return (size);
1897}
1898
1899/*
1900 * Support for MSI message signalled interrupts.
1901 */
1902void
1903pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1904{
1905	struct pci_devinfo *dinfo = device_get_ivars(dev);
1906	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1907
1908	/* Write data and address values. */
1909	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1910	    address & 0xffffffff, 4);
1911	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1912		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1913		    address >> 32, 4);
1914		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1915		    data, 2);
1916	} else
1917		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1918		    2);
1919
1920	/* Enable MSI in the control register. */
1921	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1922	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1923	    2);
1924
1925	/* Enable MSI -> HT mapping. */
1926	pci_ht_map_msi(dev, address);
1927}
1928
1929void
1930pci_disable_msi(device_t dev)
1931{
1932	struct pci_devinfo *dinfo = device_get_ivars(dev);
1933	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1934
1935	/* Disable MSI -> HT mapping. */
1936	pci_ht_map_msi(dev, 0);
1937
1938	/* Disable MSI in the control register. */
1939	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1940	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1941	    2);
1942}
1943
1944/*
1945 * Restore MSI registers during resume.  If MSI is enabled then
1946 * restore the data and address registers in addition to the control
1947 * register.
1948 */
1949static void
1950pci_resume_msi(device_t dev)
1951{
1952	struct pci_devinfo *dinfo = device_get_ivars(dev);
1953	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1954	uint64_t address;
1955	uint16_t data;
1956
1957	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1958		address = msi->msi_addr;
1959		data = msi->msi_data;
1960		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1961		    address & 0xffffffff, 4);
1962		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1963			pci_write_config(dev, msi->msi_location +
1964			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1965			pci_write_config(dev, msi->msi_location +
1966			    PCIR_MSI_DATA_64BIT, data, 2);
1967		} else
1968			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1969			    data, 2);
1970	}
1971	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1972	    2);
1973}
1974
1975static int
1976pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1977{
1978	struct pci_devinfo *dinfo = device_get_ivars(dev);
1979	pcicfgregs *cfg = &dinfo->cfg;
1980	struct resource_list_entry *rle;
1981	struct msix_table_entry *mte;
1982	struct msix_vector *mv;
1983	uint64_t addr;
1984	uint32_t data;
1985	int error, i, j;
1986
1987	/*
1988	 * Handle MSI first.  We try to find this IRQ among our list
1989	 * of MSI IRQs.  If we find it, we request updated address and
1990	 * data registers and apply the results.
1991	 */
1992	if (cfg->msi.msi_alloc > 0) {
1993
1994		/* If we don't have any active handlers, nothing to do. */
1995		if (cfg->msi.msi_handlers == 0)
1996			return (0);
1997		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1998			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1999			    i + 1);
2000			if (rle->start == irq) {
2001				error = PCIB_MAP_MSI(device_get_parent(bus),
2002				    dev, irq, &addr, &data);
2003				if (error)
2004					return (error);
2005				pci_disable_msi(dev);
2006				dinfo->cfg.msi.msi_addr = addr;
2007				dinfo->cfg.msi.msi_data = data;
2008				pci_enable_msi(dev, addr, data);
2009				return (0);
2010			}
2011		}
2012		return (ENOENT);
2013	}
2014
2015	/*
2016	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2017	 * we request the updated mapping info.  If that works, we go
2018	 * through all the slots that use this IRQ and update them.
2019	 */
2020	if (cfg->msix.msix_alloc > 0) {
2021		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2022			mv = &cfg->msix.msix_vectors[i];
2023			if (mv->mv_irq == irq) {
2024				error = PCIB_MAP_MSI(device_get_parent(bus),
2025				    dev, irq, &addr, &data);
2026				if (error)
2027					return (error);
2028				mv->mv_address = addr;
2029				mv->mv_data = data;
2030				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2031					mte = &cfg->msix.msix_table[j];
2032					if (mte->mte_vector != i + 1)
2033						continue;
2034					if (mte->mte_handlers == 0)
2035						continue;
2036					pci_mask_msix(dev, j);
2037					pci_enable_msix(dev, j, addr, data);
2038					pci_unmask_msix(dev, j);
2039				}
2040			}
2041		}
2042		return (ENOENT);
2043	}
2044
2045	return (ENOENT);
2046}
2047
2048/*
2049 * Returns true if the specified device is blacklisted because MSI
2050 * doesn't work.
2051 */
2052int
2053pci_msi_device_blacklisted(device_t dev)
2054{
2055
2056	if (!pci_honor_msi_blacklist)
2057		return (0);
2058
2059	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2060}
2061
2062/*
2063 * Determine if MSI is blacklisted globally on this system.  Currently,
2064 * we just check for blacklisted chipsets as represented by the
2065 * host-PCI bridge at device 0:0:0.  In the future, it may become
2066 * necessary to check other system attributes, such as the kenv values
2067 * that give the motherboard manufacturer and model number.
2068 */
2069static int
2070pci_msi_blacklisted(void)
2071{
2072	device_t dev;
2073
2074	if (!pci_honor_msi_blacklist)
2075		return (0);
2076
2077	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2078	if (!(pcie_chipset || pcix_chipset)) {
2079		if (vm_guest != VM_GUEST_NO) {
2080			/*
2081			 * Whitelist older chipsets in virtual
2082			 * machines known to support MSI.
2083			 */
2084			dev = pci_find_bsf(0, 0, 0);
2085			if (dev != NULL)
2086				return (!pci_has_quirk(pci_get_devid(dev),
2087					PCI_QUIRK_ENABLE_MSI_VM));
2088		}
2089		return (1);
2090	}
2091
2092	dev = pci_find_bsf(0, 0, 0);
2093	if (dev != NULL)
2094		return (pci_msi_device_blacklisted(dev));
2095	return (0);
2096}
2097
2098/*
2099 * Returns true if the specified device is blacklisted because MSI-X
2100 * doesn't work.  Note that this assumes that if MSI doesn't work,
2101 * MSI-X doesn't either.
2102 */
2103int
2104pci_msix_device_blacklisted(device_t dev)
2105{
2106
2107	if (!pci_honor_msi_blacklist)
2108		return (0);
2109
2110	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2111		return (1);
2112
2113	return (pci_msi_device_blacklisted(dev));
2114}
2115
2116/*
2117 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2118 * is blacklisted, assume that MSI-X is as well.  Check for additional
2119 * chipsets where MSI works but MSI-X does not.
2120 */
2121static int
2122pci_msix_blacklisted(void)
2123{
2124	device_t dev;
2125
2126	if (!pci_honor_msi_blacklist)
2127		return (0);
2128
2129	dev = pci_find_bsf(0, 0, 0);
2130	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2131	    PCI_QUIRK_DISABLE_MSIX))
2132		return (1);
2133
2134	return (pci_msi_blacklisted());
2135}
2136
2137/*
2138 * Attempt to allocate *count MSI messages.  The actual number allocated is
2139 * returned in *count.  After this function returns, each message will be
2140 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2141 */
2142int
2143pci_alloc_msi_method(device_t dev, device_t child, int *count)
2144{
2145	struct pci_devinfo *dinfo = device_get_ivars(child);
2146	pcicfgregs *cfg = &dinfo->cfg;
2147	struct resource_list_entry *rle;
2148	int actual, error, i, irqs[32];
2149	uint16_t ctrl;
2150
2151	/* Don't let count == 0 get us into trouble. */
2152	if (*count == 0)
2153		return (EINVAL);
2154
2155	/* If rid 0 is allocated, then fail. */
2156	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2157	if (rle != NULL && rle->res != NULL)
2158		return (ENXIO);
2159
2160	/* Already have allocated messages? */
2161	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2162		return (ENXIO);
2163
2164	/* If MSI is blacklisted for this system, fail. */
2165	if (pci_msi_blacklisted())
2166		return (ENXIO);
2167
2168	/* MSI capability present? */
2169	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2170		return (ENODEV);
2171
2172	if (bootverbose)
2173		device_printf(child,
2174		    "attempting to allocate %d MSI vectors (%d supported)\n",
2175		    *count, cfg->msi.msi_msgnum);
2176
2177	/* Don't ask for more than the device supports. */
2178	actual = min(*count, cfg->msi.msi_msgnum);
2179
2180	/* Don't ask for more than 32 messages. */
2181	actual = min(actual, 32);
2182
2183	/* MSI requires power of 2 number of messages. */
2184	if (!powerof2(actual))
2185		return (EINVAL);
2186
2187	for (;;) {
2188		/* Try to allocate N messages. */
2189		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2190		    actual, irqs);
2191		if (error == 0)
2192			break;
2193		if (actual == 1)
2194			return (error);
2195
2196		/* Try N / 2. */
2197		actual >>= 1;
2198	}
2199
2200	/*
2201	 * We now have N actual messages mapped onto SYS_RES_IRQ
2202	 * resources in the irqs[] array, so add new resources
2203	 * starting at rid 1.
2204	 */
2205	for (i = 0; i < actual; i++)
2206		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2207		    irqs[i], irqs[i], 1);
2208
2209	if (bootverbose) {
2210		if (actual == 1)
2211			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2212		else {
2213			int run;
2214
2215			/*
2216			 * Be fancy and try to print contiguous runs
2217			 * of IRQ values as ranges.  'run' is true if
2218			 * we are in a range.
2219			 */
2220			device_printf(child, "using IRQs %d", irqs[0]);
2221			run = 0;
2222			for (i = 1; i < actual; i++) {
2223
2224				/* Still in a run? */
2225				if (irqs[i] == irqs[i - 1] + 1) {
2226					run = 1;
2227					continue;
2228				}
2229
2230				/* Finish previous range. */
2231				if (run) {
2232					printf("-%d", irqs[i - 1]);
2233					run = 0;
2234				}
2235
2236				/* Start new range. */
2237				printf(",%d", irqs[i]);
2238			}
2239
2240			/* Unfinished range? */
2241			if (run)
2242				printf("-%d", irqs[actual - 1]);
2243			printf(" for MSI\n");
2244		}
2245	}
2246
2247	/* Update control register with actual count. */
2248	ctrl = cfg->msi.msi_ctrl;
2249	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2250	ctrl |= (ffs(actual) - 1) << 4;
2251	cfg->msi.msi_ctrl = ctrl;
2252	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2253
2254	/* Update counts of alloc'd messages. */
2255	cfg->msi.msi_alloc = actual;
2256	cfg->msi.msi_handlers = 0;
2257	*count = actual;
2258	return (0);
2259}
2260
2261/* Release the MSI messages associated with this device. */
2262int
2263pci_release_msi_method(device_t dev, device_t child)
2264{
2265	struct pci_devinfo *dinfo = device_get_ivars(child);
2266	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2267	struct resource_list_entry *rle;
2268	int error, i, irqs[32];
2269
2270	/* Try MSI-X first. */
2271	error = pci_release_msix(dev, child);
2272	if (error != ENODEV)
2273		return (error);
2274
2275	/* Do we have any messages to release? */
2276	if (msi->msi_alloc == 0)
2277		return (ENODEV);
2278	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2279
2280	/* Make sure none of the resources are allocated. */
2281	if (msi->msi_handlers > 0)
2282		return (EBUSY);
2283	for (i = 0; i < msi->msi_alloc; i++) {
2284		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2285		KASSERT(rle != NULL, ("missing MSI resource"));
2286		if (rle->res != NULL)
2287			return (EBUSY);
2288		irqs[i] = rle->start;
2289	}
2290
2291	/* Update control register with 0 count. */
2292	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2293	    ("%s: MSI still enabled", __func__));
2294	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2295	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2296	    msi->msi_ctrl, 2);
2297
2298	/* Release the messages. */
2299	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2300	for (i = 0; i < msi->msi_alloc; i++)
2301		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2302
2303	/* Update alloc count. */
2304	msi->msi_alloc = 0;
2305	msi->msi_addr = 0;
2306	msi->msi_data = 0;
2307	return (0);
2308}
2309
2310/*
2311 * Return the max supported MSI messages this device supports.
2312 * Basically, assuming the MD code can alloc messages, this function
2313 * should return the maximum value that pci_alloc_msi() can return.
2314 * Thus, it is subject to the tunables, etc.
2315 */
2316int
2317pci_msi_count_method(device_t dev, device_t child)
2318{
2319	struct pci_devinfo *dinfo = device_get_ivars(child);
2320	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2321
2322	if (pci_do_msi && msi->msi_location != 0)
2323		return (msi->msi_msgnum);
2324	return (0);
2325}
2326
2327/* free pcicfgregs structure and all depending data structures */
2328
2329int
2330pci_freecfg(struct pci_devinfo *dinfo)
2331{
2332	struct devlist *devlist_head;
2333	struct pci_map *pm, *next;
2334	int i;
2335
2336	devlist_head = &pci_devq;
2337
2338	if (dinfo->cfg.vpd.vpd_reg) {
2339		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2340		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2341			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2342		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2343		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2344			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2345		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2346	}
2347	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2348		free(pm, M_DEVBUF);
2349	}
2350	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2351	free(dinfo, M_DEVBUF);
2352
2353	/* increment the generation count */
2354	pci_generation++;
2355
2356	/* we're losing one device */
2357	pci_numdevs--;
2358	return (0);
2359}
2360
2361/*
2362 * PCI power manangement
2363 */
2364int
2365pci_set_powerstate_method(device_t dev, device_t child, int state)
2366{
2367	struct pci_devinfo *dinfo = device_get_ivars(child);
2368	pcicfgregs *cfg = &dinfo->cfg;
2369	uint16_t status;
2370	int oldstate, highest, delay;
2371
2372	if (cfg->pp.pp_cap == 0)
2373		return (EOPNOTSUPP);
2374
2375	/*
2376	 * Optimize a no state change request away.  While it would be OK to
2377	 * write to the hardware in theory, some devices have shown odd
2378	 * behavior when going from D3 -> D3.
2379	 */
2380	oldstate = pci_get_powerstate(child);
2381	if (oldstate == state)
2382		return (0);
2383
2384	/*
2385	 * The PCI power management specification states that after a state
2386	 * transition between PCI power states, system software must
2387	 * guarantee a minimal delay before the function accesses the device.
2388	 * Compute the worst case delay that we need to guarantee before we
2389	 * access the device.  Many devices will be responsive much more
2390	 * quickly than this delay, but there are some that don't respond
2391	 * instantly to state changes.  Transitions to/from D3 state require
2392	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2393	 * is done below with DELAY rather than a sleeper function because
2394	 * this function can be called from contexts where we cannot sleep.
2395	 */
2396	highest = (oldstate > state) ? oldstate : state;
2397	if (highest == PCI_POWERSTATE_D3)
2398	    delay = 10000;
2399	else if (highest == PCI_POWERSTATE_D2)
2400	    delay = 200;
2401	else
2402	    delay = 0;
2403	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2404	    & ~PCIM_PSTAT_DMASK;
2405	switch (state) {
2406	case PCI_POWERSTATE_D0:
2407		status |= PCIM_PSTAT_D0;
2408		break;
2409	case PCI_POWERSTATE_D1:
2410		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2411			return (EOPNOTSUPP);
2412		status |= PCIM_PSTAT_D1;
2413		break;
2414	case PCI_POWERSTATE_D2:
2415		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2416			return (EOPNOTSUPP);
2417		status |= PCIM_PSTAT_D2;
2418		break;
2419	case PCI_POWERSTATE_D3:
2420		status |= PCIM_PSTAT_D3;
2421		break;
2422	default:
2423		return (EINVAL);
2424	}
2425
2426	if (bootverbose)
2427		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2428		    state);
2429
2430	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2431	if (delay)
2432		DELAY(delay);
2433	return (0);
2434}
2435
2436int
2437pci_get_powerstate_method(device_t dev, device_t child)
2438{
2439	struct pci_devinfo *dinfo = device_get_ivars(child);
2440	pcicfgregs *cfg = &dinfo->cfg;
2441	uint16_t status;
2442	int result;
2443
2444	if (cfg->pp.pp_cap != 0) {
2445		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2446		switch (status & PCIM_PSTAT_DMASK) {
2447		case PCIM_PSTAT_D0:
2448			result = PCI_POWERSTATE_D0;
2449			break;
2450		case PCIM_PSTAT_D1:
2451			result = PCI_POWERSTATE_D1;
2452			break;
2453		case PCIM_PSTAT_D2:
2454			result = PCI_POWERSTATE_D2;
2455			break;
2456		case PCIM_PSTAT_D3:
2457			result = PCI_POWERSTATE_D3;
2458			break;
2459		default:
2460			result = PCI_POWERSTATE_UNKNOWN;
2461			break;
2462		}
2463	} else {
2464		/* No support, device is always at D0 */
2465		result = PCI_POWERSTATE_D0;
2466	}
2467	return (result);
2468}
2469
2470/*
2471 * Some convenience functions for PCI device drivers.
2472 */
2473
2474static __inline void
2475pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2476{
2477	uint16_t	command;
2478
2479	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2480	command |= bit;
2481	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2482}
2483
2484static __inline void
2485pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2486{
2487	uint16_t	command;
2488
2489	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2490	command &= ~bit;
2491	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2492}
2493
2494int
2495pci_enable_busmaster_method(device_t dev, device_t child)
2496{
2497	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2498	return (0);
2499}
2500
2501int
2502pci_disable_busmaster_method(device_t dev, device_t child)
2503{
2504	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2505	return (0);
2506}
2507
2508int
2509pci_enable_io_method(device_t dev, device_t child, int space)
2510{
2511	uint16_t bit;
2512
2513	switch(space) {
2514	case SYS_RES_IOPORT:
2515		bit = PCIM_CMD_PORTEN;
2516		break;
2517	case SYS_RES_MEMORY:
2518		bit = PCIM_CMD_MEMEN;
2519		break;
2520	default:
2521		return (EINVAL);
2522	}
2523	pci_set_command_bit(dev, child, bit);
2524	return (0);
2525}
2526
2527int
2528pci_disable_io_method(device_t dev, device_t child, int space)
2529{
2530	uint16_t bit;
2531
2532	switch(space) {
2533	case SYS_RES_IOPORT:
2534		bit = PCIM_CMD_PORTEN;
2535		break;
2536	case SYS_RES_MEMORY:
2537		bit = PCIM_CMD_MEMEN;
2538		break;
2539	default:
2540		return (EINVAL);
2541	}
2542	pci_clear_command_bit(dev, child, bit);
2543	return (0);
2544}
2545
2546/*
2547 * New style pci driver.  Parent device is either a pci-host-bridge or a
2548 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2549 */
2550
2551void
2552pci_print_verbose(struct pci_devinfo *dinfo)
2553{
2554
2555	if (bootverbose) {
2556		pcicfgregs *cfg = &dinfo->cfg;
2557
2558		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2559		    cfg->vendor, cfg->device, cfg->revid);
2560		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2561		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2562		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2563		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2564		    cfg->mfdev);
2565		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2566		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2567		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2568		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2569		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2570		if (cfg->intpin > 0)
2571			printf("\tintpin=%c, irq=%d\n",
2572			    cfg->intpin +'a' -1, cfg->intline);
2573		if (cfg->pp.pp_cap) {
2574			uint16_t status;
2575
2576			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2577			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2578			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2579			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2580			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2581			    status & PCIM_PSTAT_DMASK);
2582		}
2583		if (cfg->msi.msi_location) {
2584			int ctrl;
2585
2586			ctrl = cfg->msi.msi_ctrl;
2587			printf("\tMSI supports %d message%s%s%s\n",
2588			    cfg->msi.msi_msgnum,
2589			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2590			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2591			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2592		}
2593		if (cfg->msix.msix_location) {
2594			printf("\tMSI-X supports %d message%s ",
2595			    cfg->msix.msix_msgnum,
2596			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2597			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2598				printf("in map 0x%x\n",
2599				    cfg->msix.msix_table_bar);
2600			else
2601				printf("in maps 0x%x and 0x%x\n",
2602				    cfg->msix.msix_table_bar,
2603				    cfg->msix.msix_pba_bar);
2604		}
2605	}
2606}
2607
2608static int
2609pci_porten(device_t dev)
2610{
2611	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2612}
2613
2614static int
2615pci_memen(device_t dev)
2616{
2617	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2618}
2619
2620static void
2621pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2622{
2623	struct pci_devinfo *dinfo;
2624	pci_addr_t map, testval;
2625	int ln2range;
2626	uint16_t cmd;
2627
2628	/*
2629	 * The device ROM BAR is special.  It is always a 32-bit
2630	 * memory BAR.  Bit 0 is special and should not be set when
2631	 * sizing the BAR.
2632	 */
2633	dinfo = device_get_ivars(dev);
2634	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2635		map = pci_read_config(dev, reg, 4);
2636		pci_write_config(dev, reg, 0xfffffffe, 4);
2637		testval = pci_read_config(dev, reg, 4);
2638		pci_write_config(dev, reg, map, 4);
2639		*mapp = map;
2640		*testvalp = testval;
2641		return;
2642	}
2643
2644	map = pci_read_config(dev, reg, 4);
2645	ln2range = pci_maprange(map);
2646	if (ln2range == 64)
2647		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2648
2649	/*
2650	 * Disable decoding via the command register before
2651	 * determining the BAR's length since we will be placing it in
2652	 * a weird state.
2653	 */
2654	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2655	pci_write_config(dev, PCIR_COMMAND,
2656	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2657
2658	/*
2659	 * Determine the BAR's length by writing all 1's.  The bottom
2660	 * log_2(size) bits of the BAR will stick as 0 when we read
2661	 * the value back.
2662	 */
2663	pci_write_config(dev, reg, 0xffffffff, 4);
2664	testval = pci_read_config(dev, reg, 4);
2665	if (ln2range == 64) {
2666		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2667		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2668	}
2669
2670	/*
2671	 * Restore the original value of the BAR.  We may have reprogrammed
2672	 * the BAR of the low-level console device and when booting verbose,
2673	 * we need the console device addressable.
2674	 */
2675	pci_write_config(dev, reg, map, 4);
2676	if (ln2range == 64)
2677		pci_write_config(dev, reg + 4, map >> 32, 4);
2678	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2679
2680	*mapp = map;
2681	*testvalp = testval;
2682}
2683
2684static void
2685pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2686{
2687	struct pci_devinfo *dinfo;
2688	int ln2range;
2689
2690	/* The device ROM BAR is always a 32-bit memory BAR. */
2691	dinfo = device_get_ivars(dev);
2692	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2693		ln2range = 32;
2694	else
2695		ln2range = pci_maprange(pm->pm_value);
2696	pci_write_config(dev, pm->pm_reg, base, 4);
2697	if (ln2range == 64)
2698		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2699	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2700	if (ln2range == 64)
2701		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2702		    pm->pm_reg + 4, 4) << 32;
2703}
2704
2705struct pci_map *
2706pci_find_bar(device_t dev, int reg)
2707{
2708	struct pci_devinfo *dinfo;
2709	struct pci_map *pm;
2710
2711	dinfo = device_get_ivars(dev);
2712	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2713		if (pm->pm_reg == reg)
2714			return (pm);
2715	}
2716	return (NULL);
2717}
2718
2719int
2720pci_bar_enabled(device_t dev, struct pci_map *pm)
2721{
2722	struct pci_devinfo *dinfo;
2723	uint16_t cmd;
2724
2725	dinfo = device_get_ivars(dev);
2726	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2727	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2728		return (0);
2729	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2730	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2731		return ((cmd & PCIM_CMD_MEMEN) != 0);
2732	else
2733		return ((cmd & PCIM_CMD_PORTEN) != 0);
2734}
2735
2736static struct pci_map *
2737pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2738{
2739	struct pci_devinfo *dinfo;
2740	struct pci_map *pm, *prev;
2741
2742	dinfo = device_get_ivars(dev);
2743	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2744	pm->pm_reg = reg;
2745	pm->pm_value = value;
2746	pm->pm_size = size;
2747	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2748		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2749		    reg));
2750		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2751		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2752			break;
2753	}
2754	if (prev != NULL)
2755		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2756	else
2757		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2758	return (pm);
2759}
2760
2761static void
2762pci_restore_bars(device_t dev)
2763{
2764	struct pci_devinfo *dinfo;
2765	struct pci_map *pm;
2766	int ln2range;
2767
2768	dinfo = device_get_ivars(dev);
2769	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2770		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2771			ln2range = 32;
2772		else
2773			ln2range = pci_maprange(pm->pm_value);
2774		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2775		if (ln2range == 64)
2776			pci_write_config(dev, pm->pm_reg + 4,
2777			    pm->pm_value >> 32, 4);
2778	}
2779}
2780
2781/*
2782 * Add a resource based on a pci map register. Return 1 if the map
2783 * register is a 32bit map register or 2 if it is a 64bit register.
2784 */
2785static int
2786pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2787    int force, int prefetch)
2788{
2789	struct pci_map *pm;
2790	pci_addr_t base, map, testval;
2791	pci_addr_t start, end, count;
2792	int barlen, basezero, flags, maprange, mapsize, type;
2793	uint16_t cmd;
2794	struct resource *res;
2795
2796	/*
2797	 * The BAR may already exist if the device is a CardBus card
2798	 * whose CIS is stored in this BAR.
2799	 */
2800	pm = pci_find_bar(dev, reg);
2801	if (pm != NULL) {
2802		maprange = pci_maprange(pm->pm_value);
2803		barlen = maprange == 64 ? 2 : 1;
2804		return (barlen);
2805	}
2806
2807	pci_read_bar(dev, reg, &map, &testval);
2808	if (PCI_BAR_MEM(map)) {
2809		type = SYS_RES_MEMORY;
2810		if (map & PCIM_BAR_MEM_PREFETCH)
2811			prefetch = 1;
2812	} else
2813		type = SYS_RES_IOPORT;
2814	mapsize = pci_mapsize(testval);
2815	base = pci_mapbase(map);
2816#ifdef __PCI_BAR_ZERO_VALID
2817	basezero = 0;
2818#else
2819	basezero = base == 0;
2820#endif
2821	maprange = pci_maprange(map);
2822	barlen = maprange == 64 ? 2 : 1;
2823
2824	/*
2825	 * For I/O registers, if bottom bit is set, and the next bit up
2826	 * isn't clear, we know we have a BAR that doesn't conform to the
2827	 * spec, so ignore it.  Also, sanity check the size of the data
2828	 * areas to the type of memory involved.  Memory must be at least
2829	 * 16 bytes in size, while I/O ranges must be at least 4.
2830	 */
2831	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2832		return (barlen);
2833	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2834	    (type == SYS_RES_IOPORT && mapsize < 2))
2835		return (barlen);
2836
2837	/* Save a record of this BAR. */
2838	pm = pci_add_bar(dev, reg, map, mapsize);
2839	if (bootverbose) {
2840		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2841		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2842		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2843			printf(", port disabled\n");
2844		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2845			printf(", memory disabled\n");
2846		else
2847			printf(", enabled\n");
2848	}
2849
2850	/*
2851	 * If base is 0, then we have problems if this architecture does
2852	 * not allow that.  It is best to ignore such entries for the
2853	 * moment.  These will be allocated later if the driver specifically
2854	 * requests them.  However, some removable busses look better when
2855	 * all resources are allocated, so allow '0' to be overriden.
2856	 *
2857	 * Similarly treat maps whose values is the same as the test value
2858	 * read back.  These maps have had all f's written to them by the
2859	 * BIOS in an attempt to disable the resources.
2860	 */
2861	if (!force && (basezero || map == testval))
2862		return (barlen);
2863	if ((u_long)base != base) {
2864		device_printf(bus,
2865		    "pci%d:%d:%d:%d bar %#x too many address bits",
2866		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2867		    pci_get_function(dev), reg);
2868		return (barlen);
2869	}
2870
2871	/*
2872	 * This code theoretically does the right thing, but has
2873	 * undesirable side effects in some cases where peripherals
2874	 * respond oddly to having these bits enabled.  Let the user
2875	 * be able to turn them off (since pci_enable_io_modes is 1 by
2876	 * default).
2877	 */
2878	if (pci_enable_io_modes) {
2879		/* Turn on resources that have been left off by a lazy BIOS */
2880		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2881			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2882			cmd |= PCIM_CMD_PORTEN;
2883			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2884		}
2885		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2886			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2887			cmd |= PCIM_CMD_MEMEN;
2888			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2889		}
2890	} else {
2891		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2892			return (barlen);
2893		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2894			return (barlen);
2895	}
2896
2897	count = (pci_addr_t)1 << mapsize;
2898	flags = RF_ALIGNMENT_LOG2(mapsize);
2899	if (prefetch)
2900		flags |= RF_PREFETCHABLE;
2901	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2902		start = 0;	/* Let the parent decide. */
2903		end = ~0ul;
2904	} else {
2905		start = base;
2906		end = base + count - 1;
2907	}
2908	resource_list_add(rl, type, reg, start, end, count);
2909
2910	/*
2911	 * Try to allocate the resource for this BAR from our parent
2912	 * so that this resource range is already reserved.  The
2913	 * driver for this device will later inherit this resource in
2914	 * pci_alloc_resource().
2915	 */
2916	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2917	    flags);
2918	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2919		/*
2920		 * If the allocation fails, try to allocate a resource for
2921		 * this BAR using any available range.  The firmware felt
2922		 * it was important enough to assign a resource, so don't
2923		 * disable decoding if we can help it.
2924		 */
2925		resource_list_delete(rl, type, reg);
2926		resource_list_add(rl, type, reg, 0, ~0ul, count);
2927		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2928		    count, flags);
2929	}
2930	if (res == NULL) {
2931		/*
2932		 * If the allocation fails, delete the resource list entry
2933		 * and disable decoding for this device.
2934		 *
2935		 * If the driver requests this resource in the future,
2936		 * pci_reserve_map() will try to allocate a fresh
2937		 * resource range.
2938		 */
2939		resource_list_delete(rl, type, reg);
2940		pci_disable_io(dev, type);
2941		if (bootverbose)
2942			device_printf(bus,
2943			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2944			    pci_get_domain(dev), pci_get_bus(dev),
2945			    pci_get_slot(dev), pci_get_function(dev), reg);
2946	} else {
2947		start = rman_get_start(res);
2948		pci_write_bar(dev, pm, start);
2949	}
2950	return (barlen);
2951}
2952
2953/*
2954 * For ATA devices we need to decide early what addressing mode to use.
2955 * Legacy demands that the primary and secondary ATA ports sits on the
2956 * same addresses that old ISA hardware did. This dictates that we use
2957 * those addresses and ignore the BAR's if we cannot set PCI native
2958 * addressing mode.
2959 */
2960static void
2961pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2962    uint32_t prefetchmask)
2963{
2964	int rid, type, progif;
2965#if 0
2966	/* if this device supports PCI native addressing use it */
2967	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2968	if ((progif & 0x8a) == 0x8a) {
2969		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2970		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2971			printf("Trying ATA native PCI addressing mode\n");
2972			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2973		}
2974	}
2975#endif
2976	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2977	type = SYS_RES_IOPORT;
2978	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2979		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2980		    prefetchmask & (1 << 0));
2981		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2982		    prefetchmask & (1 << 1));
2983	} else {
2984		rid = PCIR_BAR(0);
2985		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2986		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2987		    0x1f7, 8, 0);
2988		rid = PCIR_BAR(1);
2989		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2990		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2991		    0x3f6, 1, 0);
2992	}
2993	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2994		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2995		    prefetchmask & (1 << 2));
2996		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2997		    prefetchmask & (1 << 3));
2998	} else {
2999		rid = PCIR_BAR(2);
3000		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3001		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3002		    0x177, 8, 0);
3003		rid = PCIR_BAR(3);
3004		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3005		(void)resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3006		    0x376, 1, 0);
3007	}
3008	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3009	    prefetchmask & (1 << 4));
3010	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3011	    prefetchmask & (1 << 5));
3012}
3013
3014static void
3015pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3016{
3017	struct pci_devinfo *dinfo = device_get_ivars(dev);
3018	pcicfgregs *cfg = &dinfo->cfg;
3019	char tunable_name[64];
3020	int irq;
3021
3022	/* Has to have an intpin to have an interrupt. */
3023	if (cfg->intpin == 0)
3024		return;
3025
3026	/* Let the user override the IRQ with a tunable. */
3027	irq = PCI_INVALID_IRQ;
3028	snprintf(tunable_name, sizeof(tunable_name),
3029	    "hw.pci%d.%d.%d.INT%c.irq",
3030	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3031	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3032		irq = PCI_INVALID_IRQ;
3033
3034	/*
3035	 * If we didn't get an IRQ via the tunable, then we either use the
3036	 * IRQ value in the intline register or we ask the bus to route an
3037	 * interrupt for us.  If force_route is true, then we only use the
3038	 * value in the intline register if the bus was unable to assign an
3039	 * IRQ.
3040	 */
3041	if (!PCI_INTERRUPT_VALID(irq)) {
3042		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3043			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3044		if (!PCI_INTERRUPT_VALID(irq))
3045			irq = cfg->intline;
3046	}
3047
3048	/* If after all that we don't have an IRQ, just bail. */
3049	if (!PCI_INTERRUPT_VALID(irq))
3050		return;
3051
3052	/* Update the config register if it changed. */
3053	if (irq != cfg->intline) {
3054		cfg->intline = irq;
3055		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3056	}
3057
3058	/* Add this IRQ as rid 0 interrupt resource. */
3059	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3060}
3061
3062/* Perform early OHCI takeover from SMM. */
3063static void
3064ohci_early_takeover(device_t self)
3065{
3066	struct resource *res;
3067	uint32_t ctl;
3068	int rid;
3069	int i;
3070
3071	rid = PCIR_BAR(0);
3072	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3073	if (res == NULL)
3074		return;
3075
3076	ctl = bus_read_4(res, OHCI_CONTROL);
3077	if (ctl & OHCI_IR) {
3078		if (bootverbose)
3079			printf("ohci early: "
3080			    "SMM active, request owner change\n");
3081		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3082		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3083			DELAY(1000);
3084			ctl = bus_read_4(res, OHCI_CONTROL);
3085		}
3086		if (ctl & OHCI_IR) {
3087			if (bootverbose)
3088				printf("ohci early: "
3089				    "SMM does not respond, resetting\n");
3090			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3091		}
3092		/* Disable interrupts */
3093		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3094	}
3095
3096	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3097}
3098
3099/* Perform early UHCI takeover from SMM. */
3100static void
3101uhci_early_takeover(device_t self)
3102{
3103	struct resource *res;
3104	int rid;
3105
3106	/*
3107	 * Set the PIRQD enable bit and switch off all the others. We don't
3108	 * want legacy support to interfere with us XXX Does this also mean
3109	 * that the BIOS won't touch the keyboard anymore if it is connected
3110	 * to the ports of the root hub?
3111	 */
3112	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3113
3114	/* Disable interrupts */
3115	rid = PCI_UHCI_BASE_REG;
3116	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3117	if (res != NULL) {
3118		bus_write_2(res, UHCI_INTR, 0);
3119		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3120	}
3121}
3122
3123/* Perform early EHCI takeover from SMM. */
3124static void
3125ehci_early_takeover(device_t self)
3126{
3127	struct resource *res;
3128	uint32_t cparams;
3129	uint32_t eec;
3130	uint8_t eecp;
3131	uint8_t bios_sem;
3132	uint8_t offs;
3133	int rid;
3134	int i;
3135
3136	rid = PCIR_BAR(0);
3137	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3138	if (res == NULL)
3139		return;
3140
3141	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3142
3143	/* Synchronise with the BIOS if it owns the controller. */
3144	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3145	    eecp = EHCI_EECP_NEXT(eec)) {
3146		eec = pci_read_config(self, eecp, 4);
3147		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3148			continue;
3149		}
3150		bios_sem = pci_read_config(self, eecp +
3151		    EHCI_LEGSUP_BIOS_SEM, 1);
3152		if (bios_sem == 0) {
3153			continue;
3154		}
3155		if (bootverbose)
3156			printf("ehci early: "
3157			    "SMM active, request owner change\n");
3158
3159		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3160
3161		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3162			DELAY(1000);
3163			bios_sem = pci_read_config(self, eecp +
3164			    EHCI_LEGSUP_BIOS_SEM, 1);
3165		}
3166
3167		if (bios_sem != 0) {
3168			if (bootverbose)
3169				printf("ehci early: "
3170				    "SMM does not respond\n");
3171		}
3172		/* Disable interrupts */
3173		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3174		bus_write_4(res, offs + EHCI_USBINTR, 0);
3175	}
3176	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3177}
3178
3179/* Perform early XHCI takeover from SMM. */
3180static void
3181xhci_early_takeover(device_t self)
3182{
3183	struct resource *res;
3184	uint32_t cparams;
3185	uint32_t eec;
3186	uint8_t eecp;
3187	uint8_t bios_sem;
3188	uint8_t offs;
3189	int rid;
3190	int i;
3191
3192	rid = PCIR_BAR(0);
3193	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3194	if (res == NULL)
3195		return;
3196
3197	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3198
3199	eec = -1;
3200
3201	/* Synchronise with the BIOS if it owns the controller. */
3202	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3203	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3204		eec = bus_read_4(res, eecp);
3205
3206		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3207			continue;
3208
3209		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3210		if (bios_sem == 0)
3211			continue;
3212
3213		if (bootverbose)
3214			printf("xhci early: "
3215			    "SMM active, request owner change\n");
3216
3217		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3218
3219		/* wait a maximum of 5 second */
3220
3221		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3222			DELAY(1000);
3223			bios_sem = bus_read_1(res, eecp +
3224			    XHCI_XECP_BIOS_SEM);
3225		}
3226
3227		if (bios_sem != 0) {
3228			if (bootverbose)
3229				printf("xhci early: "
3230				    "SMM does not respond\n");
3231		}
3232
3233		/* Disable interrupts */
3234		offs = bus_read_1(res, XHCI_CAPLENGTH);
3235		bus_write_4(res, offs + XHCI_USBCMD, 0);
3236		bus_read_4(res, offs + XHCI_USBSTS);
3237	}
3238	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3239}
3240
3241#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3242static void
3243pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3244    struct resource_list *rl)
3245{
3246	struct resource *res;
3247	char *cp;
3248	u_long start, end, count;
3249	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3250
3251	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3252	case PCIM_HDRTYPE_BRIDGE:
3253		sec_reg = PCIR_SECBUS_1;
3254		sub_reg = PCIR_SUBBUS_1;
3255		break;
3256	case PCIM_HDRTYPE_CARDBUS:
3257		sec_reg = PCIR_SECBUS_2;
3258		sub_reg = PCIR_SUBBUS_2;
3259		break;
3260	default:
3261		return;
3262	}
3263
3264	/*
3265	 * If the existing bus range is valid, attempt to reserve it
3266	 * from our parent.  If this fails for any reason, clear the
3267	 * secbus and subbus registers.
3268	 *
3269	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3270	 * This would at least preserve the existing sec_bus if it is
3271	 * valid.
3272	 */
3273	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3274	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3275
3276	/* Quirk handling. */
3277	switch (pci_get_devid(dev)) {
3278	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3279		sup_bus = pci_read_config(dev, 0x41, 1);
3280		if (sup_bus != 0xff) {
3281			sec_bus = sup_bus + 1;
3282			sub_bus = sup_bus + 1;
3283			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3284			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3285		}
3286		break;
3287
3288	case 0x00dd10de:
3289		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3290		if ((cp = getenv("smbios.planar.maker")) == NULL)
3291			break;
3292		if (strncmp(cp, "Compal", 6) != 0) {
3293			freeenv(cp);
3294			break;
3295		}
3296		freeenv(cp);
3297		if ((cp = getenv("smbios.planar.product")) == NULL)
3298			break;
3299		if (strncmp(cp, "08A0", 4) != 0) {
3300			freeenv(cp);
3301			break;
3302		}
3303		freeenv(cp);
3304		if (sub_bus < 0xa) {
3305			sub_bus = 0xa;
3306			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3307		}
3308		break;
3309	}
3310
3311	if (bootverbose)
3312		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3313	if (sec_bus > 0 && sub_bus >= sec_bus) {
3314		start = sec_bus;
3315		end = sub_bus;
3316		count = end - start + 1;
3317
3318		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3319
3320		/*
3321		 * If requested, clear secondary bus registers in
3322		 * bridge devices to force a complete renumbering
3323		 * rather than reserving the existing range.  However,
3324		 * preserve the existing size.
3325		 */
3326		if (pci_clear_buses)
3327			goto clear;
3328
3329		rid = 0;
3330		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3331		    start, end, count, 0);
3332		if (res != NULL)
3333			return;
3334
3335		if (bootverbose)
3336			device_printf(bus,
3337			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3338			    pci_get_domain(dev), pci_get_bus(dev),
3339			    pci_get_slot(dev), pci_get_function(dev));
3340	}
3341
3342clear:
3343	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3344	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3345}
3346
3347static struct resource *
3348pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3349    u_long end, u_long count, u_int flags)
3350{
3351	struct pci_devinfo *dinfo;
3352	pcicfgregs *cfg;
3353	struct resource_list *rl;
3354	struct resource *res;
3355	int sec_reg, sub_reg;
3356
3357	dinfo = device_get_ivars(child);
3358	cfg = &dinfo->cfg;
3359	rl = &dinfo->resources;
3360	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3361	case PCIM_HDRTYPE_BRIDGE:
3362		sec_reg = PCIR_SECBUS_1;
3363		sub_reg = PCIR_SUBBUS_1;
3364		break;
3365	case PCIM_HDRTYPE_CARDBUS:
3366		sec_reg = PCIR_SECBUS_2;
3367		sub_reg = PCIR_SUBBUS_2;
3368		break;
3369	default:
3370		return (NULL);
3371	}
3372
3373	if (*rid != 0)
3374		return (NULL);
3375
3376	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3377		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3378	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3379		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3380		    start, end, count, flags & ~RF_ACTIVE);
3381		if (res == NULL) {
3382			resource_list_delete(rl, PCI_RES_BUS, *rid);
3383			device_printf(child, "allocating %lu bus%s failed\n",
3384			    count, count == 1 ? "" : "es");
3385			return (NULL);
3386		}
3387		if (bootverbose)
3388			device_printf(child,
3389			    "Lazy allocation of %lu bus%s at %lu\n", count,
3390			    count == 1 ? "" : "es", rman_get_start(res));
3391		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3392		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3393	}
3394	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3395	    end, count, flags));
3396}
3397#endif
3398
3399void
3400pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3401{
3402	struct pci_devinfo *dinfo;
3403	pcicfgregs *cfg;
3404	struct resource_list *rl;
3405	const struct pci_quirk *q;
3406	uint32_t devid;
3407	int i;
3408
3409	dinfo = device_get_ivars(dev);
3410	cfg = &dinfo->cfg;
3411	rl = &dinfo->resources;
3412	devid = (cfg->device << 16) | cfg->vendor;
3413
3414	/* ATA devices needs special map treatment */
3415	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3416	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3417	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3418	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3419	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3420		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3421	else
3422		for (i = 0; i < cfg->nummaps;) {
3423			/*
3424			 * Skip quirked resources.
3425			 */
3426			for (q = &pci_quirks[0]; q->devid != 0; q++)
3427				if (q->devid == devid &&
3428				    q->type == PCI_QUIRK_UNMAP_REG &&
3429				    q->arg1 == PCIR_BAR(i))
3430					break;
3431			if (q->devid != 0) {
3432				i++;
3433				continue;
3434			}
3435			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3436			    prefetchmask & (1 << i));
3437		}
3438
3439	/*
3440	 * Add additional, quirked resources.
3441	 */
3442	for (q = &pci_quirks[0]; q->devid != 0; q++)
3443		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3444			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3445
3446	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3447#ifdef __PCI_REROUTE_INTERRUPT
3448		/*
3449		 * Try to re-route interrupts. Sometimes the BIOS or
3450		 * firmware may leave bogus values in these registers.
3451		 * If the re-route fails, then just stick with what we
3452		 * have.
3453		 */
3454		pci_assign_interrupt(bus, dev, 1);
3455#else
3456		pci_assign_interrupt(bus, dev, 0);
3457#endif
3458	}
3459
3460	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3461	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3462		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3463			xhci_early_takeover(dev);
3464		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3465			ehci_early_takeover(dev);
3466		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3467			ohci_early_takeover(dev);
3468		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3469			uhci_early_takeover(dev);
3470	}
3471
3472#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3473	/*
3474	 * Reserve resources for secondary bus ranges behind bridge
3475	 * devices.
3476	 */
3477	pci_reserve_secbus(bus, dev, cfg, rl);
3478#endif
3479}
3480
3481static struct pci_devinfo *
3482pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3483    int slot, int func, size_t dinfo_size)
3484{
3485	struct pci_devinfo *dinfo;
3486
3487	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3488	if (dinfo != NULL)
3489		pci_add_child(dev, dinfo);
3490
3491	return (dinfo);
3492}
3493
3494void
3495pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3496{
3497#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3498	device_t pcib = device_get_parent(dev);
3499	struct pci_devinfo *dinfo;
3500	int maxslots;
3501	int s, f, pcifunchigh;
3502	uint8_t hdrtype;
3503	int first_func;
3504
3505	/*
3506	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3507	 * enable ARI.  We must enable ARI before detecting the rest of the
3508	 * functions on this bus as ARI changes the set of slots and functions
3509	 * that are legal on this bus.
3510	 */
3511	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3512	    dinfo_size);
3513	if (dinfo != NULL && pci_enable_ari)
3514		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3515
3516	/*
3517	 * Start looking for new devices on slot 0 at function 1 because we
3518	 * just identified the device at slot 0, function 0.
3519	 */
3520	first_func = 1;
3521
3522	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3523	    ("dinfo_size too small"));
3524	maxslots = PCIB_MAXSLOTS(pcib);
3525	for (s = 0; s <= maxslots; s++, first_func = 0) {
3526		pcifunchigh = 0;
3527		f = 0;
3528		DELAY(1);
3529		hdrtype = REG(PCIR_HDRTYPE, 1);
3530		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3531			continue;
3532		if (hdrtype & PCIM_MFDEV)
3533			pcifunchigh = PCIB_MAXFUNCS(pcib);
3534		for (f = first_func; f <= pcifunchigh; f++)
3535			pci_identify_function(pcib, dev, domain, busno, s, f,
3536			    dinfo_size);
3537	}
3538#undef REG
3539}
3540
3541void
3542pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3543{
3544	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3545	device_set_ivars(dinfo->cfg.dev, dinfo);
3546	resource_list_init(&dinfo->resources);
3547	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3548	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3549	pci_print_verbose(dinfo);
3550	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3551}
3552
3553static int
3554pci_probe(device_t dev)
3555{
3556
3557	device_set_desc(dev, "PCI bus");
3558
3559	/* Allow other subclasses to override this driver. */
3560	return (BUS_PROBE_GENERIC);
3561}
3562
3563int
3564pci_attach_common(device_t dev)
3565{
3566	struct pci_softc *sc;
3567	int busno, domain;
3568#ifdef PCI_DMA_BOUNDARY
3569	int error, tag_valid;
3570#endif
3571#ifdef PCI_RES_BUS
3572	int rid;
3573#endif
3574
3575	sc = device_get_softc(dev);
3576	domain = pcib_get_domain(dev);
3577	busno = pcib_get_bus(dev);
3578#ifdef PCI_RES_BUS
3579	rid = 0;
3580	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3581	    1, 0);
3582	if (sc->sc_bus == NULL) {
3583		device_printf(dev, "failed to allocate bus number\n");
3584		return (ENXIO);
3585	}
3586#endif
3587	if (bootverbose)
3588		device_printf(dev, "domain=%d, physical bus=%d\n",
3589		    domain, busno);
3590#ifdef PCI_DMA_BOUNDARY
3591	tag_valid = 0;
3592	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3593	    devclass_find("pci")) {
3594		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3595		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3596		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3597		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3598		if (error)
3599			device_printf(dev, "Failed to create DMA tag: %d\n",
3600			    error);
3601		else
3602			tag_valid = 1;
3603	}
3604	if (!tag_valid)
3605#endif
3606		sc->sc_dma_tag = bus_get_dma_tag(dev);
3607	return (0);
3608}
3609
3610static int
3611pci_attach(device_t dev)
3612{
3613	int busno, domain, error;
3614
3615	error = pci_attach_common(dev);
3616	if (error)
3617		return (error);
3618
3619	/*
3620	 * Since there can be multiple independantly numbered PCI
3621	 * busses on systems with multiple PCI domains, we can't use
3622	 * the unit number to decide which bus we are probing. We ask
3623	 * the parent pcib what our domain and bus numbers are.
3624	 */
3625	domain = pcib_get_domain(dev);
3626	busno = pcib_get_bus(dev);
3627	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3628	return (bus_generic_attach(dev));
3629}
3630
3631#ifdef PCI_RES_BUS
3632static int
3633pci_detach(device_t dev)
3634{
3635	struct pci_softc *sc;
3636	int error;
3637
3638	error = bus_generic_detach(dev);
3639	if (error)
3640		return (error);
3641	sc = device_get_softc(dev);
3642	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3643}
3644#endif
3645
3646static void
3647pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3648    int state)
3649{
3650	device_t child, pcib;
3651	int dstate, i;
3652
3653	/*
3654	 * Set the device to the given state.  If the firmware suggests
3655	 * a different power state, use it instead.  If power management
3656	 * is not present, the firmware is responsible for managing
3657	 * device power.  Skip children who aren't attached since they
3658	 * are handled separately.
3659	 */
3660	pcib = device_get_parent(dev);
3661	for (i = 0; i < numdevs; i++) {
3662		child = devlist[i];
3663		dstate = state;
3664		if (device_is_attached(child) &&
3665		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3666			pci_set_powerstate(child, dstate);
3667	}
3668}
3669
3670int
3671pci_suspend(device_t dev)
3672{
3673	device_t child, *devlist;
3674	struct pci_devinfo *dinfo;
3675	int error, i, numdevs;
3676
3677	/*
3678	 * Save the PCI configuration space for each child and set the
3679	 * device in the appropriate power state for this sleep state.
3680	 */
3681	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3682		return (error);
3683	for (i = 0; i < numdevs; i++) {
3684		child = devlist[i];
3685		dinfo = device_get_ivars(child);
3686		pci_cfg_save(child, dinfo, 0);
3687	}
3688
3689	/* Suspend devices before potentially powering them down. */
3690	error = bus_generic_suspend(dev);
3691	if (error) {
3692		free(devlist, M_TEMP);
3693		return (error);
3694	}
3695	if (pci_do_power_suspend)
3696		pci_set_power_children(dev, devlist, numdevs,
3697		    PCI_POWERSTATE_D3);
3698	free(devlist, M_TEMP);
3699	return (0);
3700}
3701
3702int
3703pci_resume(device_t dev)
3704{
3705	device_t child, *devlist;
3706	struct pci_devinfo *dinfo;
3707	int error, i, numdevs;
3708
3709	/*
3710	 * Set each child to D0 and restore its PCI configuration space.
3711	 */
3712	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3713		return (error);
3714	if (pci_do_power_resume)
3715		pci_set_power_children(dev, devlist, numdevs,
3716		    PCI_POWERSTATE_D0);
3717
3718	/* Now the device is powered up, restore its config space. */
3719	for (i = 0; i < numdevs; i++) {
3720		child = devlist[i];
3721		dinfo = device_get_ivars(child);
3722
3723		pci_cfg_restore(child, dinfo);
3724		if (!device_is_attached(child))
3725			pci_cfg_save(child, dinfo, 1);
3726	}
3727
3728	/*
3729	 * Resume critical devices first, then everything else later.
3730	 */
3731	for (i = 0; i < numdevs; i++) {
3732		child = devlist[i];
3733		switch (pci_get_class(child)) {
3734		case PCIC_DISPLAY:
3735		case PCIC_MEMORY:
3736		case PCIC_BRIDGE:
3737		case PCIC_BASEPERIPH:
3738			DEVICE_RESUME(child);
3739			break;
3740		}
3741	}
3742	for (i = 0; i < numdevs; i++) {
3743		child = devlist[i];
3744		switch (pci_get_class(child)) {
3745		case PCIC_DISPLAY:
3746		case PCIC_MEMORY:
3747		case PCIC_BRIDGE:
3748		case PCIC_BASEPERIPH:
3749			break;
3750		default:
3751			DEVICE_RESUME(child);
3752		}
3753	}
3754	free(devlist, M_TEMP);
3755	return (0);
3756}
3757
3758static void
3759pci_load_vendor_data(void)
3760{
3761	caddr_t data;
3762	void *ptr;
3763	size_t sz;
3764
3765	data = preload_search_by_type("pci_vendor_data");
3766	if (data != NULL) {
3767		ptr = preload_fetch_addr(data);
3768		sz = preload_fetch_size(data);
3769		if (ptr != NULL && sz != 0) {
3770			pci_vendordata = ptr;
3771			pci_vendordata_size = sz;
3772			/* terminate the database */
3773			pci_vendordata[pci_vendordata_size] = '\n';
3774		}
3775	}
3776}
3777
3778void
3779pci_driver_added(device_t dev, driver_t *driver)
3780{
3781	int numdevs;
3782	device_t *devlist;
3783	device_t child;
3784	struct pci_devinfo *dinfo;
3785	int i;
3786
3787	if (bootverbose)
3788		device_printf(dev, "driver added\n");
3789	DEVICE_IDENTIFY(driver, dev);
3790	if (device_get_children(dev, &devlist, &numdevs) != 0)
3791		return;
3792	for (i = 0; i < numdevs; i++) {
3793		child = devlist[i];
3794		if (device_get_state(child) != DS_NOTPRESENT)
3795			continue;
3796		dinfo = device_get_ivars(child);
3797		pci_print_verbose(dinfo);
3798		if (bootverbose)
3799			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3800		pci_cfg_restore(child, dinfo);
3801		if (device_probe_and_attach(child) != 0)
3802			pci_child_detached(dev, child);
3803	}
3804	free(devlist, M_TEMP);
3805}
3806
3807int
3808pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3809    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3810{
3811	struct pci_devinfo *dinfo;
3812	struct msix_table_entry *mte;
3813	struct msix_vector *mv;
3814	uint64_t addr;
3815	uint32_t data;
3816	void *cookie;
3817	int error, rid;
3818
3819	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3820	    arg, &cookie);
3821	if (error)
3822		return (error);
3823
3824	/* If this is not a direct child, just bail out. */
3825	if (device_get_parent(child) != dev) {
3826		*cookiep = cookie;
3827		return(0);
3828	}
3829
3830	rid = rman_get_rid(irq);
3831	if (rid == 0) {
3832		/* Make sure that INTx is enabled */
3833		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3834	} else {
3835		/*
3836		 * Check to see if the interrupt is MSI or MSI-X.
3837		 * Ask our parent to map the MSI and give
3838		 * us the address and data register values.
3839		 * If we fail for some reason, teardown the
3840		 * interrupt handler.
3841		 */
3842		dinfo = device_get_ivars(child);
3843		if (dinfo->cfg.msi.msi_alloc > 0) {
3844			if (dinfo->cfg.msi.msi_addr == 0) {
3845				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3846			    ("MSI has handlers, but vectors not mapped"));
3847				error = PCIB_MAP_MSI(device_get_parent(dev),
3848				    child, rman_get_start(irq), &addr, &data);
3849				if (error)
3850					goto bad;
3851				dinfo->cfg.msi.msi_addr = addr;
3852				dinfo->cfg.msi.msi_data = data;
3853			}
3854			if (dinfo->cfg.msi.msi_handlers == 0)
3855				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3856				    dinfo->cfg.msi.msi_data);
3857			dinfo->cfg.msi.msi_handlers++;
3858		} else {
3859			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3860			    ("No MSI or MSI-X interrupts allocated"));
3861			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3862			    ("MSI-X index too high"));
3863			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3864			KASSERT(mte->mte_vector != 0, ("no message vector"));
3865			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3866			KASSERT(mv->mv_irq == rman_get_start(irq),
3867			    ("IRQ mismatch"));
3868			if (mv->mv_address == 0) {
3869				KASSERT(mte->mte_handlers == 0,
3870		    ("MSI-X table entry has handlers, but vector not mapped"));
3871				error = PCIB_MAP_MSI(device_get_parent(dev),
3872				    child, rman_get_start(irq), &addr, &data);
3873				if (error)
3874					goto bad;
3875				mv->mv_address = addr;
3876				mv->mv_data = data;
3877			}
3878			if (mte->mte_handlers == 0) {
3879				pci_enable_msix(child, rid - 1, mv->mv_address,
3880				    mv->mv_data);
3881				pci_unmask_msix(child, rid - 1);
3882			}
3883			mte->mte_handlers++;
3884		}
3885
3886		/*
3887		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3888		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3889		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3890		 */
3891		if (!pci_has_quirk(pci_get_devid(child),
3892		    PCI_QUIRK_MSI_INTX_BUG))
3893			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3894		else
3895			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3896	bad:
3897		if (error) {
3898			(void)bus_generic_teardown_intr(dev, child, irq,
3899			    cookie);
3900			return (error);
3901		}
3902	}
3903	*cookiep = cookie;
3904	return (0);
3905}
3906
3907int
3908pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3909    void *cookie)
3910{
3911	struct msix_table_entry *mte;
3912	struct resource_list_entry *rle;
3913	struct pci_devinfo *dinfo;
3914	int error, rid;
3915
3916	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3917		return (EINVAL);
3918
3919	/* If this isn't a direct child, just bail out */
3920	if (device_get_parent(child) != dev)
3921		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3922
3923	rid = rman_get_rid(irq);
3924	if (rid == 0) {
3925		/* Mask INTx */
3926		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3927	} else {
3928		/*
3929		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3930		 * decrement the appropriate handlers count and mask the
3931		 * MSI-X message, or disable MSI messages if the count
3932		 * drops to 0.
3933		 */
3934		dinfo = device_get_ivars(child);
3935		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3936		if (rle->res != irq)
3937			return (EINVAL);
3938		if (dinfo->cfg.msi.msi_alloc > 0) {
3939			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3940			    ("MSI-X index too high"));
3941			if (dinfo->cfg.msi.msi_handlers == 0)
3942				return (EINVAL);
3943			dinfo->cfg.msi.msi_handlers--;
3944			if (dinfo->cfg.msi.msi_handlers == 0)
3945				pci_disable_msi(child);
3946		} else {
3947			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3948			    ("No MSI or MSI-X interrupts allocated"));
3949			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3950			    ("MSI-X index too high"));
3951			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3952			if (mte->mte_handlers == 0)
3953				return (EINVAL);
3954			mte->mte_handlers--;
3955			if (mte->mte_handlers == 0)
3956				pci_mask_msix(child, rid - 1);
3957		}
3958	}
3959	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3960	if (rid > 0)
3961		KASSERT(error == 0,
3962		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3963	return (error);
3964}
3965
3966int
3967pci_print_child(device_t dev, device_t child)
3968{
3969	struct pci_devinfo *dinfo;
3970	struct resource_list *rl;
3971	int retval = 0;
3972
3973	dinfo = device_get_ivars(child);
3974	rl = &dinfo->resources;
3975
3976	retval += bus_print_child_header(dev, child);
3977
3978	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3979	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3980	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3981	if (device_get_flags(dev))
3982		retval += printf(" flags %#x", device_get_flags(dev));
3983
3984	retval += printf(" at device %d.%d", pci_get_slot(child),
3985	    pci_get_function(child));
3986
3987	retval += bus_print_child_domain(dev, child);
3988	retval += bus_print_child_footer(dev, child);
3989
3990	return (retval);
3991}
3992
3993static const struct
3994{
3995	int		class;
3996	int		subclass;
3997	int		report; /* 0 = bootverbose, 1 = always */
3998	const char	*desc;
3999} pci_nomatch_tab[] = {
4000	{PCIC_OLD,		-1,			1, "old"},
4001	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4002	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4003	{PCIC_STORAGE,		-1,			1, "mass storage"},
4004	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4005	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4006	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4007	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4008	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4009	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4010	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4011	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4012	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4013	{PCIC_NETWORK,		-1,			1, "network"},
4014	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4015	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4016	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4017	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4018	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4019	{PCIC_DISPLAY,		-1,			1, "display"},
4020	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4021	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4022	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4023	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4024	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4025	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4026	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4027	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4028	{PCIC_MEMORY,		-1,			1, "memory"},
4029	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4030	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4031	{PCIC_BRIDGE,		-1,			1, "bridge"},
4032	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4033	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4034	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4035	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4036	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4037	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4038	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4039	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4040	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4041	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4042	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4043	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4044	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4045	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4046	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4047	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4048	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4049	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4050	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4051	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4052	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4053	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4054	{PCIC_INPUTDEV,		-1,			1, "input device"},
4055	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4056	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4057	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4058	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4059	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4060	{PCIC_DOCKING,		-1,			1, "docking station"},
4061	{PCIC_PROCESSOR,	-1,			1, "processor"},
4062	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4063	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4064	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4065	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4066	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4067	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4068	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4069	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4070	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4071	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4072	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4073	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4074	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4075	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4076	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4077	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4078	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4079	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4080	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4081	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4082	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4083	{PCIC_DASP,		-1,			0, "dasp"},
4084	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4085	{0, 0, 0,		NULL}
4086};
4087
4088void
4089pci_probe_nomatch(device_t dev, device_t child)
4090{
4091	int i, report;
4092	const char *cp, *scp;
4093	char *device;
4094
4095	/*
4096	 * Look for a listing for this device in a loaded device database.
4097	 */
4098	report = 1;
4099	if ((device = pci_describe_device(child)) != NULL) {
4100		device_printf(dev, "<%s>", device);
4101		free(device, M_DEVBUF);
4102	} else {
4103		/*
4104		 * Scan the class/subclass descriptions for a general
4105		 * description.
4106		 */
4107		cp = "unknown";
4108		scp = NULL;
4109		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4110			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4111				if (pci_nomatch_tab[i].subclass == -1) {
4112					cp = pci_nomatch_tab[i].desc;
4113					report = pci_nomatch_tab[i].report;
4114				} else if (pci_nomatch_tab[i].subclass ==
4115				    pci_get_subclass(child)) {
4116					scp = pci_nomatch_tab[i].desc;
4117					report = pci_nomatch_tab[i].report;
4118				}
4119			}
4120		}
4121		if (report || bootverbose) {
4122			device_printf(dev, "<%s%s%s>",
4123			    cp ? cp : "",
4124			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4125			    scp ? scp : "");
4126		}
4127	}
4128	if (report || bootverbose) {
4129		printf(" at device %d.%d (no driver attached)\n",
4130		    pci_get_slot(child), pci_get_function(child));
4131	}
4132	pci_cfg_save(child, device_get_ivars(child), 1);
4133}
4134
4135void
4136pci_child_detached(device_t dev, device_t child)
4137{
4138	struct pci_devinfo *dinfo;
4139	struct resource_list *rl;
4140
4141	dinfo = device_get_ivars(child);
4142	rl = &dinfo->resources;
4143
4144	/*
4145	 * Have to deallocate IRQs before releasing any MSI messages and
4146	 * have to release MSI messages before deallocating any memory
4147	 * BARs.
4148	 */
4149	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4150		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4151	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4152		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4153		(void)pci_release_msi(child);
4154	}
4155	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4156		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4157	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4158		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4159#ifdef PCI_RES_BUS
4160	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4161		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4162#endif
4163
4164	pci_cfg_save(child, dinfo, 1);
4165}
4166
4167/*
4168 * Parse the PCI device database, if loaded, and return a pointer to a
4169 * description of the device.
4170 *
4171 * The database is flat text formatted as follows:
4172 *
4173 * Any line not in a valid format is ignored.
4174 * Lines are terminated with newline '\n' characters.
4175 *
4176 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4177 * the vendor name.
4178 *
4179 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4180 * - devices cannot be listed without a corresponding VENDOR line.
4181 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4182 * another TAB, then the device name.
4183 */
4184
4185/*
4186 * Assuming (ptr) points to the beginning of a line in the database,
4187 * return the vendor or device and description of the next entry.
4188 * The value of (vendor) or (device) inappropriate for the entry type
4189 * is set to -1.  Returns nonzero at the end of the database.
4190 *
4191 * Note that this is slightly unrobust in the face of corrupt data;
4192 * we attempt to safeguard against this by spamming the end of the
4193 * database with a newline when we initialise.
4194 */
4195static int
4196pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4197{
4198	char	*cp = *ptr;
4199	int	left;
4200
4201	*device = -1;
4202	*vendor = -1;
4203	**desc = '\0';
4204	for (;;) {
4205		left = pci_vendordata_size - (cp - pci_vendordata);
4206		if (left <= 0) {
4207			*ptr = cp;
4208			return(1);
4209		}
4210
4211		/* vendor entry? */
4212		if (*cp != '\t' &&
4213		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4214			break;
4215		/* device entry? */
4216		if (*cp == '\t' &&
4217		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4218			break;
4219
4220		/* skip to next line */
4221		while (*cp != '\n' && left > 0) {
4222			cp++;
4223			left--;
4224		}
4225		if (*cp == '\n') {
4226			cp++;
4227			left--;
4228		}
4229	}
4230	/* skip to next line */
4231	while (*cp != '\n' && left > 0) {
4232		cp++;
4233		left--;
4234	}
4235	if (*cp == '\n' && left > 0)
4236		cp++;
4237	*ptr = cp;
4238	return(0);
4239}
4240
4241static char *
4242pci_describe_device(device_t dev)
4243{
4244	int	vendor, device;
4245	char	*desc, *vp, *dp, *line;
4246
4247	desc = vp = dp = NULL;
4248
4249	/*
4250	 * If we have no vendor data, we can't do anything.
4251	 */
4252	if (pci_vendordata == NULL)
4253		goto out;
4254
4255	/*
4256	 * Scan the vendor data looking for this device
4257	 */
4258	line = pci_vendordata;
4259	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4260		goto out;
4261	for (;;) {
4262		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4263			goto out;
4264		if (vendor == pci_get_vendor(dev))
4265			break;
4266	}
4267	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4268		goto out;
4269	for (;;) {
4270		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4271			*dp = 0;
4272			break;
4273		}
4274		if (vendor != -1) {
4275			*dp = 0;
4276			break;
4277		}
4278		if (device == pci_get_device(dev))
4279			break;
4280	}
4281	if (dp[0] == '\0')
4282		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4283	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4284	    NULL)
4285		sprintf(desc, "%s, %s", vp, dp);
4286out:
4287	if (vp != NULL)
4288		free(vp, M_DEVBUF);
4289	if (dp != NULL)
4290		free(dp, M_DEVBUF);
4291	return(desc);
4292}
4293
4294int
4295pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4296{
4297	struct pci_devinfo *dinfo;
4298	pcicfgregs *cfg;
4299
4300	dinfo = device_get_ivars(child);
4301	cfg = &dinfo->cfg;
4302
4303	switch (which) {
4304	case PCI_IVAR_ETHADDR:
4305		/*
4306		 * The generic accessor doesn't deal with failure, so
4307		 * we set the return value, then return an error.
4308		 */
4309		*((uint8_t **) result) = NULL;
4310		return (EINVAL);
4311	case PCI_IVAR_SUBVENDOR:
4312		*result = cfg->subvendor;
4313		break;
4314	case PCI_IVAR_SUBDEVICE:
4315		*result = cfg->subdevice;
4316		break;
4317	case PCI_IVAR_VENDOR:
4318		*result = cfg->vendor;
4319		break;
4320	case PCI_IVAR_DEVICE:
4321		*result = cfg->device;
4322		break;
4323	case PCI_IVAR_DEVID:
4324		*result = (cfg->device << 16) | cfg->vendor;
4325		break;
4326	case PCI_IVAR_CLASS:
4327		*result = cfg->baseclass;
4328		break;
4329	case PCI_IVAR_SUBCLASS:
4330		*result = cfg->subclass;
4331		break;
4332	case PCI_IVAR_PROGIF:
4333		*result = cfg->progif;
4334		break;
4335	case PCI_IVAR_REVID:
4336		*result = cfg->revid;
4337		break;
4338	case PCI_IVAR_INTPIN:
4339		*result = cfg->intpin;
4340		break;
4341	case PCI_IVAR_IRQ:
4342		*result = cfg->intline;
4343		break;
4344	case PCI_IVAR_DOMAIN:
4345		*result = cfg->domain;
4346		break;
4347	case PCI_IVAR_BUS:
4348		*result = cfg->bus;
4349		break;
4350	case PCI_IVAR_SLOT:
4351		*result = cfg->slot;
4352		break;
4353	case PCI_IVAR_FUNCTION:
4354		*result = cfg->func;
4355		break;
4356	case PCI_IVAR_CMDREG:
4357		*result = cfg->cmdreg;
4358		break;
4359	case PCI_IVAR_CACHELNSZ:
4360		*result = cfg->cachelnsz;
4361		break;
4362	case PCI_IVAR_MINGNT:
4363		*result = cfg->mingnt;
4364		break;
4365	case PCI_IVAR_MAXLAT:
4366		*result = cfg->maxlat;
4367		break;
4368	case PCI_IVAR_LATTIMER:
4369		*result = cfg->lattimer;
4370		break;
4371	default:
4372		return (ENOENT);
4373	}
4374	return (0);
4375}
4376
4377int
4378pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4379{
4380	struct pci_devinfo *dinfo;
4381
4382	dinfo = device_get_ivars(child);
4383
4384	switch (which) {
4385	case PCI_IVAR_INTPIN:
4386		dinfo->cfg.intpin = value;
4387		return (0);
4388	case PCI_IVAR_ETHADDR:
4389	case PCI_IVAR_SUBVENDOR:
4390	case PCI_IVAR_SUBDEVICE:
4391	case PCI_IVAR_VENDOR:
4392	case PCI_IVAR_DEVICE:
4393	case PCI_IVAR_DEVID:
4394	case PCI_IVAR_CLASS:
4395	case PCI_IVAR_SUBCLASS:
4396	case PCI_IVAR_PROGIF:
4397	case PCI_IVAR_REVID:
4398	case PCI_IVAR_IRQ:
4399	case PCI_IVAR_DOMAIN:
4400	case PCI_IVAR_BUS:
4401	case PCI_IVAR_SLOT:
4402	case PCI_IVAR_FUNCTION:
4403		return (EINVAL);	/* disallow for now */
4404
4405	default:
4406		return (ENOENT);
4407	}
4408}
4409
4410#include "opt_ddb.h"
4411#ifdef DDB
4412#include <ddb/ddb.h>
4413#include <sys/cons.h>
4414
4415/*
4416 * List resources based on pci map registers, used for within ddb
4417 */
4418
4419DB_SHOW_COMMAND(pciregs, db_pci_dump)
4420{
4421	struct pci_devinfo *dinfo;
4422	struct devlist *devlist_head;
4423	struct pci_conf *p;
4424	const char *name;
4425	int i, error, none_count;
4426
4427	none_count = 0;
4428	/* get the head of the device queue */
4429	devlist_head = &pci_devq;
4430
4431	/*
4432	 * Go through the list of devices and print out devices
4433	 */
4434	for (error = 0, i = 0,
4435	     dinfo = STAILQ_FIRST(devlist_head);
4436	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4437	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4438
4439		/* Populate pd_name and pd_unit */
4440		name = NULL;
4441		if (dinfo->cfg.dev)
4442			name = device_get_name(dinfo->cfg.dev);
4443
4444		p = &dinfo->conf;
4445		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4446			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4447			(name && *name) ? name : "none",
4448			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4449			none_count++,
4450			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4451			p->pc_sel.pc_func, (p->pc_class << 16) |
4452			(p->pc_subclass << 8) | p->pc_progif,
4453			(p->pc_subdevice << 16) | p->pc_subvendor,
4454			(p->pc_device << 16) | p->pc_vendor,
4455			p->pc_revid, p->pc_hdr);
4456	}
4457}
4458#endif /* DDB */
4459
4460static struct resource *
4461pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4462    u_long start, u_long end, u_long count, u_int flags)
4463{
4464	struct pci_devinfo *dinfo = device_get_ivars(child);
4465	struct resource_list *rl = &dinfo->resources;
4466	struct resource *res;
4467	struct pci_map *pm;
4468	pci_addr_t map, testval;
4469	int mapsize;
4470
4471	res = NULL;
4472	pm = pci_find_bar(child, *rid);
4473	if (pm != NULL) {
4474		/* This is a BAR that we failed to allocate earlier. */
4475		mapsize = pm->pm_size;
4476		map = pm->pm_value;
4477	} else {
4478		/*
4479		 * Weed out the bogons, and figure out how large the
4480		 * BAR/map is.  BARs that read back 0 here are bogus
4481		 * and unimplemented.  Note: atapci in legacy mode are
4482		 * special and handled elsewhere in the code.  If you
4483		 * have a atapci device in legacy mode and it fails
4484		 * here, that other code is broken.
4485		 */
4486		pci_read_bar(child, *rid, &map, &testval);
4487
4488		/*
4489		 * Determine the size of the BAR and ignore BARs with a size
4490		 * of 0.  Device ROM BARs use a different mask value.
4491		 */
4492		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4493			mapsize = pci_romsize(testval);
4494		else
4495			mapsize = pci_mapsize(testval);
4496		if (mapsize == 0)
4497			goto out;
4498		pm = pci_add_bar(child, *rid, map, mapsize);
4499	}
4500
4501	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4502		if (type != SYS_RES_MEMORY) {
4503			if (bootverbose)
4504				device_printf(dev,
4505				    "child %s requested type %d for rid %#x,"
4506				    " but the BAR says it is an memio\n",
4507				    device_get_nameunit(child), type, *rid);
4508			goto out;
4509		}
4510	} else {
4511		if (type != SYS_RES_IOPORT) {
4512			if (bootverbose)
4513				device_printf(dev,
4514				    "child %s requested type %d for rid %#x,"
4515				    " but the BAR says it is an ioport\n",
4516				    device_get_nameunit(child), type, *rid);
4517			goto out;
4518		}
4519	}
4520
4521	/*
4522	 * For real BARs, we need to override the size that
4523	 * the driver requests, because that's what the BAR
4524	 * actually uses and we would otherwise have a
4525	 * situation where we might allocate the excess to
4526	 * another driver, which won't work.
4527	 */
4528	count = (pci_addr_t)1 << mapsize;
4529	if (RF_ALIGNMENT(flags) < mapsize)
4530		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4531	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4532		flags |= RF_PREFETCHABLE;
4533
4534	/*
4535	 * Allocate enough resource, and then write back the
4536	 * appropriate BAR for that resource.
4537	 */
4538	resource_list_add(rl, type, *rid, start, end, count);
4539	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4540	    count, flags & ~RF_ACTIVE);
4541	if (res == NULL) {
4542		resource_list_delete(rl, type, *rid);
4543		device_printf(child,
4544		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4545		    count, *rid, type, start, end);
4546		goto out;
4547	}
4548	if (bootverbose)
4549		device_printf(child,
4550		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4551		    count, *rid, type, rman_get_start(res));
4552	map = rman_get_start(res);
4553	pci_write_bar(child, pm, map);
4554out:
4555	return (res);
4556}
4557
4558struct resource *
4559pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4560		   u_long start, u_long end, u_long count, u_int flags)
4561{
4562	struct pci_devinfo *dinfo;
4563	struct resource_list *rl;
4564	struct resource_list_entry *rle;
4565	struct resource *res;
4566	pcicfgregs *cfg;
4567
4568	if (device_get_parent(child) != dev)
4569		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4570		    type, rid, start, end, count, flags));
4571
4572	/*
4573	 * Perform lazy resource allocation
4574	 */
4575	dinfo = device_get_ivars(child);
4576	rl = &dinfo->resources;
4577	cfg = &dinfo->cfg;
4578	switch (type) {
4579#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4580	case PCI_RES_BUS:
4581		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4582		    flags));
4583#endif
4584	case SYS_RES_IRQ:
4585		/*
4586		 * Can't alloc legacy interrupt once MSI messages have
4587		 * been allocated.
4588		 */
4589		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4590		    cfg->msix.msix_alloc > 0))
4591			return (NULL);
4592
4593		/*
4594		 * If the child device doesn't have an interrupt
4595		 * routed and is deserving of an interrupt, try to
4596		 * assign it one.
4597		 */
4598		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4599		    (cfg->intpin != 0))
4600			pci_assign_interrupt(dev, child, 0);
4601		break;
4602	case SYS_RES_IOPORT:
4603	case SYS_RES_MEMORY:
4604#ifdef NEW_PCIB
4605		/*
4606		 * PCI-PCI bridge I/O window resources are not BARs.
4607		 * For those allocations just pass the request up the
4608		 * tree.
4609		 */
4610		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4611			switch (*rid) {
4612			case PCIR_IOBASEL_1:
4613			case PCIR_MEMBASE_1:
4614			case PCIR_PMBASEL_1:
4615				/*
4616				 * XXX: Should we bother creating a resource
4617				 * list entry?
4618				 */
4619				return (bus_generic_alloc_resource(dev, child,
4620				    type, rid, start, end, count, flags));
4621			}
4622		}
4623#endif
4624		/* Reserve resources for this BAR if needed. */
4625		rle = resource_list_find(rl, type, *rid);
4626		if (rle == NULL) {
4627			res = pci_reserve_map(dev, child, type, rid, start, end,
4628			    count, flags);
4629			if (res == NULL)
4630				return (NULL);
4631		}
4632	}
4633	return (resource_list_alloc(rl, dev, child, type, rid,
4634	    start, end, count, flags));
4635}
4636
4637int
4638pci_release_resource(device_t dev, device_t child, int type, int rid,
4639    struct resource *r)
4640{
4641	struct pci_devinfo *dinfo;
4642	struct resource_list *rl;
4643	pcicfgregs *cfg;
4644
4645	if (device_get_parent(child) != dev)
4646		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4647		    type, rid, r));
4648
4649	dinfo = device_get_ivars(child);
4650	cfg = &dinfo->cfg;
4651#ifdef NEW_PCIB
4652	/*
4653	 * PCI-PCI bridge I/O window resources are not BARs.  For
4654	 * those allocations just pass the request up the tree.
4655	 */
4656	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4657	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4658		switch (rid) {
4659		case PCIR_IOBASEL_1:
4660		case PCIR_MEMBASE_1:
4661		case PCIR_PMBASEL_1:
4662			return (bus_generic_release_resource(dev, child, type,
4663			    rid, r));
4664		}
4665	}
4666#endif
4667
4668	rl = &dinfo->resources;
4669	return (resource_list_release(rl, dev, child, type, rid, r));
4670}
4671
4672int
4673pci_activate_resource(device_t dev, device_t child, int type, int rid,
4674    struct resource *r)
4675{
4676	struct pci_devinfo *dinfo;
4677	int error;
4678
4679	error = bus_generic_activate_resource(dev, child, type, rid, r);
4680	if (error)
4681		return (error);
4682
4683	/* Enable decoding in the command register when activating BARs. */
4684	if (device_get_parent(child) == dev) {
4685		/* Device ROMs need their decoding explicitly enabled. */
4686		dinfo = device_get_ivars(child);
4687		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4688			pci_write_bar(child, pci_find_bar(child, rid),
4689			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4690		switch (type) {
4691		case SYS_RES_IOPORT:
4692		case SYS_RES_MEMORY:
4693			error = PCI_ENABLE_IO(dev, child, type);
4694			break;
4695		}
4696	}
4697	return (error);
4698}
4699
4700int
4701pci_deactivate_resource(device_t dev, device_t child, int type,
4702    int rid, struct resource *r)
4703{
4704	struct pci_devinfo *dinfo;
4705	int error;
4706
4707	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4708	if (error)
4709		return (error);
4710
4711	/* Disable decoding for device ROMs. */
4712	if (device_get_parent(child) == dev) {
4713		dinfo = device_get_ivars(child);
4714		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4715			pci_write_bar(child, pci_find_bar(child, rid),
4716			    rman_get_start(r));
4717	}
4718	return (0);
4719}
4720
4721void
4722pci_delete_child(device_t dev, device_t child)
4723{
4724	struct resource_list_entry *rle;
4725	struct resource_list *rl;
4726	struct pci_devinfo *dinfo;
4727
4728	dinfo = device_get_ivars(child);
4729	rl = &dinfo->resources;
4730
4731	if (device_is_attached(child))
4732		device_detach(child);
4733
4734	/* Turn off access to resources we're about to free */
4735	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4736	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4737
4738	/* Free all allocated resources */
4739	STAILQ_FOREACH(rle, rl, link) {
4740		if (rle->res) {
4741			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4742			    resource_list_busy(rl, rle->type, rle->rid)) {
4743				pci_printf(&dinfo->cfg,
4744				    "Resource still owned, oops. "
4745				    "(type=%d, rid=%d, addr=%lx)\n",
4746				    rle->type, rle->rid,
4747				    rman_get_start(rle->res));
4748				bus_release_resource(child, rle->type, rle->rid,
4749				    rle->res);
4750			}
4751			resource_list_unreserve(rl, dev, child, rle->type,
4752			    rle->rid);
4753		}
4754	}
4755	resource_list_free(rl);
4756
4757	device_delete_child(dev, child);
4758	pci_freecfg(dinfo);
4759}
4760
4761void
4762pci_delete_resource(device_t dev, device_t child, int type, int rid)
4763{
4764	struct pci_devinfo *dinfo;
4765	struct resource_list *rl;
4766	struct resource_list_entry *rle;
4767
4768	if (device_get_parent(child) != dev)
4769		return;
4770
4771	dinfo = device_get_ivars(child);
4772	rl = &dinfo->resources;
4773	rle = resource_list_find(rl, type, rid);
4774	if (rle == NULL)
4775		return;
4776
4777	if (rle->res) {
4778		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4779		    resource_list_busy(rl, type, rid)) {
4780			device_printf(dev, "delete_resource: "
4781			    "Resource still owned by child, oops. "
4782			    "(type=%d, rid=%d, addr=%lx)\n",
4783			    type, rid, rman_get_start(rle->res));
4784			return;
4785		}
4786		resource_list_unreserve(rl, dev, child, type, rid);
4787	}
4788	resource_list_delete(rl, type, rid);
4789}
4790
4791struct resource_list *
4792pci_get_resource_list (device_t dev, device_t child)
4793{
4794	struct pci_devinfo *dinfo = device_get_ivars(child);
4795
4796	return (&dinfo->resources);
4797}
4798
4799bus_dma_tag_t
4800pci_get_dma_tag(device_t bus, device_t dev)
4801{
4802	struct pci_softc *sc = device_get_softc(bus);
4803
4804	return (sc->sc_dma_tag);
4805}
4806
4807uint32_t
4808pci_read_config_method(device_t dev, device_t child, int reg, int width)
4809{
4810	struct pci_devinfo *dinfo = device_get_ivars(child);
4811	pcicfgregs *cfg = &dinfo->cfg;
4812
4813	return (PCIB_READ_CONFIG(device_get_parent(dev),
4814	    cfg->bus, cfg->slot, cfg->func, reg, width));
4815}
4816
4817void
4818pci_write_config_method(device_t dev, device_t child, int reg,
4819    uint32_t val, int width)
4820{
4821	struct pci_devinfo *dinfo = device_get_ivars(child);
4822	pcicfgregs *cfg = &dinfo->cfg;
4823
4824	PCIB_WRITE_CONFIG(device_get_parent(dev),
4825	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4826}
4827
4828int
4829pci_child_location_str_method(device_t dev, device_t child, char *buf,
4830    size_t buflen)
4831{
4832
4833	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4834	    pci_get_function(child));
4835	return (0);
4836}
4837
4838int
4839pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4840    size_t buflen)
4841{
4842	struct pci_devinfo *dinfo;
4843	pcicfgregs *cfg;
4844
4845	dinfo = device_get_ivars(child);
4846	cfg = &dinfo->cfg;
4847	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4848	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4849	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4850	    cfg->progif);
4851	return (0);
4852}
4853
4854int
4855pci_assign_interrupt_method(device_t dev, device_t child)
4856{
4857	struct pci_devinfo *dinfo = device_get_ivars(child);
4858	pcicfgregs *cfg = &dinfo->cfg;
4859
4860	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4861	    cfg->intpin));
4862}
4863
4864static int
4865pci_modevent(module_t mod, int what, void *arg)
4866{
4867	static struct cdev *pci_cdev;
4868
4869	switch (what) {
4870	case MOD_LOAD:
4871		STAILQ_INIT(&pci_devq);
4872		pci_generation = 0;
4873		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4874		    "pci");
4875		pci_load_vendor_data();
4876		break;
4877
4878	case MOD_UNLOAD:
4879		destroy_dev(pci_cdev);
4880		break;
4881	}
4882
4883	return (0);
4884}
4885
4886static void
4887pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4888{
4889#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4890	struct pcicfg_pcie *cfg;
4891	int version, pos;
4892
4893	cfg = &dinfo->cfg.pcie;
4894	pos = cfg->pcie_location;
4895
4896	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4897
4898	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4899
4900	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4901	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4902	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4903		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4904
4905	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4906	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4907	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4908		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4909
4910	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4911	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4912		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4913
4914	if (version > 1) {
4915		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4916		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4917		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4918	}
4919#undef WREG
4920}
4921
4922static void
4923pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4924{
4925	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4926	    dinfo->cfg.pcix.pcix_command,  2);
4927}
4928
4929void
4930pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4931{
4932
4933	/*
4934	 * Only do header type 0 devices.  Type 1 devices are bridges,
4935	 * which we know need special treatment.  Type 2 devices are
4936	 * cardbus bridges which also require special treatment.
4937	 * Other types are unknown, and we err on the side of safety
4938	 * by ignoring them.
4939	 */
4940	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4941		return;
4942
4943	/*
4944	 * Restore the device to full power mode.  We must do this
4945	 * before we restore the registers because moving from D3 to
4946	 * D0 will cause the chip's BARs and some other registers to
4947	 * be reset to some unknown power on reset values.  Cut down
4948	 * the noise on boot by doing nothing if we are already in
4949	 * state D0.
4950	 */
4951	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4952		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4953	pci_restore_bars(dev);
4954	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4955	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4956	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4957	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4958	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4959	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4960	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4961	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4962	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4963
4964	/*
4965	 * Restore extended capabilities for PCI-Express and PCI-X
4966	 */
4967	if (dinfo->cfg.pcie.pcie_location != 0)
4968		pci_cfg_restore_pcie(dev, dinfo);
4969	if (dinfo->cfg.pcix.pcix_location != 0)
4970		pci_cfg_restore_pcix(dev, dinfo);
4971
4972	/* Restore MSI and MSI-X configurations if they are present. */
4973	if (dinfo->cfg.msi.msi_location != 0)
4974		pci_resume_msi(dev);
4975	if (dinfo->cfg.msix.msix_location != 0)
4976		pci_resume_msix(dev);
4977}
4978
4979static void
4980pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4981{
4982#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4983	struct pcicfg_pcie *cfg;
4984	int version, pos;
4985
4986	cfg = &dinfo->cfg.pcie;
4987	pos = cfg->pcie_location;
4988
4989	cfg->pcie_flags = RREG(PCIER_FLAGS);
4990
4991	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4992
4993	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4994
4995	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4996	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4997	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4998		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4999
5000	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5001	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5002	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5003		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5004
5005	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5006	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5007		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5008
5009	if (version > 1) {
5010		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5011		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5012		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5013	}
5014#undef RREG
5015}
5016
5017static void
5018pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5019{
5020	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5021	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5022}
5023
5024void
5025pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5026{
5027	uint32_t cls;
5028	int ps;
5029
5030	/*
5031	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5032	 * we know need special treatment.  Type 2 devices are cardbus bridges
5033	 * which also require special treatment.  Other types are unknown, and
5034	 * we err on the side of safety by ignoring them.  Powering down
5035	 * bridges should not be undertaken lightly.
5036	 */
5037	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5038		return;
5039
5040	/*
5041	 * Some drivers apparently write to these registers w/o updating our
5042	 * cached copy.  No harm happens if we update the copy, so do so here
5043	 * so we can restore them.  The COMMAND register is modified by the
5044	 * bus w/o updating the cache.  This should represent the normally
5045	 * writable portion of the 'defined' part of type 0 headers.  In
5046	 * theory we also need to save/restore the PCI capability structures
5047	 * we know about, but apart from power we don't know any that are
5048	 * writable.
5049	 */
5050	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5051	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5052	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5053	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5054	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5055	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5056	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5057	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5058	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5059	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5060	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5061	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5062	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5063	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5064	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5065
5066	if (dinfo->cfg.pcie.pcie_location != 0)
5067		pci_cfg_save_pcie(dev, dinfo);
5068
5069	if (dinfo->cfg.pcix.pcix_location != 0)
5070		pci_cfg_save_pcix(dev, dinfo);
5071
5072	/*
5073	 * don't set the state for display devices, base peripherals and
5074	 * memory devices since bad things happen when they are powered down.
5075	 * We should (a) have drivers that can easily detach and (b) use
5076	 * generic drivers for these devices so that some device actually
5077	 * attaches.  We need to make sure that when we implement (a) we don't
5078	 * power the device down on a reattach.
5079	 */
5080	cls = pci_get_class(dev);
5081	if (!setstate)
5082		return;
5083	switch (pci_do_power_nodriver)
5084	{
5085		case 0:		/* NO powerdown at all */
5086			return;
5087		case 1:		/* Conservative about what to power down */
5088			if (cls == PCIC_STORAGE)
5089				return;
5090			/*FALLTHROUGH*/
5091		case 2:		/* Agressive about what to power down */
5092			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5093			    cls == PCIC_BASEPERIPH)
5094				return;
5095			/*FALLTHROUGH*/
5096		case 3:		/* Power down everything */
5097			break;
5098	}
5099	/*
5100	 * PCI spec says we can only go into D3 state from D0 state.
5101	 * Transition from D[12] into D0 before going to D3 state.
5102	 */
5103	ps = pci_get_powerstate(dev);
5104	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5105		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5106	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5107		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5108}
5109
5110/* Wrapper APIs suitable for device driver use. */
5111void
5112pci_save_state(device_t dev)
5113{
5114	struct pci_devinfo *dinfo;
5115
5116	dinfo = device_get_ivars(dev);
5117	pci_cfg_save(dev, dinfo, 0);
5118}
5119
5120void
5121pci_restore_state(device_t dev)
5122{
5123	struct pci_devinfo *dinfo;
5124
5125	dinfo = device_get_ivars(dev);
5126	pci_cfg_restore(dev, dinfo);
5127}
5128
5129static uint16_t
5130pci_get_rid_method(device_t dev, device_t child)
5131{
5132
5133	return (PCIB_GET_RID(device_get_parent(dev), child));
5134}
5135