pci.c revision 280970
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/pci/pci.c 280970 2015-04-01 21:48:54Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#define	PCIR_IS_BIOS(cfg, reg)						\
74	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76
77static int		pci_has_quirk(uint32_t devid, int quirk);
78static pci_addr_t	pci_mapbase(uint64_t mapreg);
79static const char	*pci_maptype(uint64_t mapreg);
80static int		pci_mapsize(uint64_t testval);
81static int		pci_maprange(uint64_t mapreg);
82static pci_addr_t	pci_rombase(uint64_t mapreg);
83static int		pci_romsize(uint64_t testval);
84static void		pci_fixancient(pcicfgregs *cfg);
85static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86
87static int		pci_porten(device_t dev);
88static int		pci_memen(device_t dev);
89static void		pci_assign_interrupt(device_t bus, device_t dev,
90			    int force_route);
91static int		pci_add_map(device_t bus, device_t dev, int reg,
92			    struct resource_list *rl, int force, int prefetch);
93static int		pci_probe(device_t dev);
94static int		pci_attach(device_t dev);
95#ifdef PCI_RES_BUS
96static int		pci_detach(device_t dev);
97#endif
98static void		pci_load_vendor_data(void);
99static int		pci_describe_parse_line(char **ptr, int *vendor,
100			    int *device, char **desc);
101static char		*pci_describe_device(device_t dev);
102static int		pci_modevent(module_t mod, int what, void *arg);
103static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104			    pcicfgregs *cfg);
105static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
106static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107			    int reg, uint32_t *data);
108#if 0
109static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110			    int reg, uint32_t data);
111#endif
112static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113static void		pci_disable_msi(device_t dev);
114static void		pci_enable_msi(device_t dev, uint64_t address,
115			    uint16_t data);
116static void		pci_enable_msix(device_t dev, u_int index,
117			    uint64_t address, uint32_t data);
118static void		pci_mask_msix(device_t dev, u_int index);
119static void		pci_unmask_msix(device_t dev, u_int index);
120static int		pci_msi_blacklisted(void);
121static int		pci_msix_blacklisted(void);
122static void		pci_resume_msi(device_t dev);
123static void		pci_resume_msix(device_t dev);
124static int		pci_remap_intr_method(device_t bus, device_t dev,
125			    u_int irq);
126
127static uint16_t		pci_get_rid_method(device_t dev, device_t child);
128
129static device_method_t pci_methods[] = {
130	/* Device interface */
131	DEVMETHOD(device_probe,		pci_probe),
132	DEVMETHOD(device_attach,	pci_attach),
133#ifdef PCI_RES_BUS
134	DEVMETHOD(device_detach,	pci_detach),
135#else
136	DEVMETHOD(device_detach,	bus_generic_detach),
137#endif
138	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
139	DEVMETHOD(device_suspend,	pci_suspend),
140	DEVMETHOD(device_resume,	pci_resume),
141
142	/* Bus interface */
143	DEVMETHOD(bus_print_child,	pci_print_child),
144	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
145	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
146	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
147	DEVMETHOD(bus_driver_added,	pci_driver_added),
148	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
149	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
150
151	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
152	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
153	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
154	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
155	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
156	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
157	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
158	DEVMETHOD(bus_release_resource,	pci_release_resource),
159	DEVMETHOD(bus_activate_resource, pci_activate_resource),
160	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
161	DEVMETHOD(bus_child_detached,	pci_child_detached),
162	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
163	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
164	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
165
166	/* PCI interface */
167	DEVMETHOD(pci_read_config,	pci_read_config_method),
168	DEVMETHOD(pci_write_config,	pci_write_config_method),
169	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
170	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
171	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
172	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
173	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
174	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
175	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
176	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
177	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
178	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
179	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
180	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
181	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
183	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
184	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
185	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
188
189	DEVMETHOD_END
190};
191
192DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
193
194static devclass_t pci_devclass;
195DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
196MODULE_VERSION(pci, 1);
197
198static char	*pci_vendordata;
199static size_t	pci_vendordata_size;
200
201struct pci_quirk {
202	uint32_t devid;	/* Vendor/device of the card */
203	int	type;
204#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
205#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
206#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
207#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
208#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
209#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
210	int	arg1;
211	int	arg2;
212};
213
214static const struct pci_quirk pci_quirks[] = {
215	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
216	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
217	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
218	/* As does the Serverworks OSB4 (the SMBus mapping register) */
219	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220
221	/*
222	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
223	 * or the CMIC-SL (AKA ServerWorks GC_LE).
224	 */
225	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227
228	/*
229	 * MSI doesn't work on earlier Intel chipsets including
230	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
231	 */
232	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239
240	/*
241	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
242	 * bridge.
243	 */
244	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245
246	/*
247	 * MSI-X allocation doesn't work properly for devices passed through
248	 * by VMware up to at least ESXi 5.1.
249	 */
250	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
251	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
252
253	/*
254	 * Some virtualization environments emulate an older chipset
255	 * but support MSI just fine.  QEMU uses the Intel 82440.
256	 */
257	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
258
259	/*
260	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
261	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
262	 * It prevents us from attaching hpet(4) when the bit is unset.
263	 * Note this quirk only affects SB600 revision A13 and earlier.
264	 * For SB600 A21 and later, firmware must set the bit to hide it.
265	 * For SB700 and later, it is unused and hardcoded to zero.
266	 */
267	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
268
269	/*
270	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
271	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
272	 * command register is set.
273	 */
274	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
275	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
276	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
277
278	/*
279	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
280	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
281	 */
282	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
283	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
284	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
285	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
286	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
287	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
288
289	{ 0 }
290};
291
292/* map register information */
293#define	PCI_MAPMEM	0x01	/* memory map */
294#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
295#define	PCI_MAPPORT	0x04	/* port map */
296
297struct devlist pci_devq;
298uint32_t pci_generation;
299uint32_t pci_numdevs = 0;
300static int pcie_chipset, pcix_chipset;
301
302/* sysctl vars */
303SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
304
305static int pci_enable_io_modes = 1;
306TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
307SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
308    &pci_enable_io_modes, 1,
309    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
310enable these bits correctly.  We'd like to do this all the time, but there\n\
311are some peripherals that this causes problems with.");
312
313static int pci_do_realloc_bars = 0;
314TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
315SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
316    &pci_do_realloc_bars, 0,
317    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
318
319static int pci_do_power_nodriver = 0;
320TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
321SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
322    &pci_do_power_nodriver, 0,
323  "Place a function into D3 state when no driver attaches to it.  0 means\n\
324disable.  1 means conservatively place devices into D3 state.  2 means\n\
325agressively place devices into D3 state.  3 means put absolutely everything\n\
326in D3 state.");
327
328int pci_do_power_resume = 1;
329TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
330SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
331    &pci_do_power_resume, 1,
332  "Transition from D3 -> D0 on resume.");
333
334int pci_do_power_suspend = 1;
335TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
336SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
337    &pci_do_power_suspend, 1,
338  "Transition from D0 -> D3 on suspend.");
339
340static int pci_do_msi = 1;
341TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
342SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
343    "Enable support for MSI interrupts");
344
345static int pci_do_msix = 1;
346TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
347SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
348    "Enable support for MSI-X interrupts");
349
350static int pci_honor_msi_blacklist = 1;
351TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
352SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
353    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
354
355#if defined(__i386__) || defined(__amd64__)
356static int pci_usb_takeover = 1;
357#else
358static int pci_usb_takeover = 0;
359#endif
360TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
361SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
362    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
363Disable this if you depend on BIOS emulation of USB devices, that is\n\
364you use USB devices (like keyboard or mouse) but do not load USB drivers");
365
366static int pci_clear_bars;
367TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
368SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
369    "Ignore firmware-assigned resources for BARs.");
370
371#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
372static int pci_clear_buses;
373TUNABLE_INT("hw.pci.clear_buses", &pci_clear_buses);
374SYSCTL_INT(_hw_pci, OID_AUTO, clear_buses, CTLFLAG_RDTUN, &pci_clear_buses, 0,
375    "Ignore firmware-assigned bus numbers.");
376#endif
377
378static int pci_enable_ari = 1;
379TUNABLE_INT("hw.pci.enable_ari", &pci_enable_ari);
380SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
381    0, "Enable support for PCIe Alternative RID Interpretation");
382
383static int
384pci_has_quirk(uint32_t devid, int quirk)
385{
386	const struct pci_quirk *q;
387
388	for (q = &pci_quirks[0]; q->devid; q++) {
389		if (q->devid == devid && q->type == quirk)
390			return (1);
391	}
392	return (0);
393}
394
395/* Find a device_t by bus/slot/function in domain 0 */
396
397device_t
398pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
399{
400
401	return (pci_find_dbsf(0, bus, slot, func));
402}
403
404/* Find a device_t by domain/bus/slot/function */
405
406device_t
407pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
408{
409	struct pci_devinfo *dinfo;
410
411	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
412		if ((dinfo->cfg.domain == domain) &&
413		    (dinfo->cfg.bus == bus) &&
414		    (dinfo->cfg.slot == slot) &&
415		    (dinfo->cfg.func == func)) {
416			return (dinfo->cfg.dev);
417		}
418	}
419
420	return (NULL);
421}
422
423/* Find a device_t by vendor/device ID */
424
425device_t
426pci_find_device(uint16_t vendor, uint16_t device)
427{
428	struct pci_devinfo *dinfo;
429
430	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
431		if ((dinfo->cfg.vendor == vendor) &&
432		    (dinfo->cfg.device == device)) {
433			return (dinfo->cfg.dev);
434		}
435	}
436
437	return (NULL);
438}
439
440device_t
441pci_find_class(uint8_t class, uint8_t subclass)
442{
443	struct pci_devinfo *dinfo;
444
445	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
446		if (dinfo->cfg.baseclass == class &&
447		    dinfo->cfg.subclass == subclass) {
448			return (dinfo->cfg.dev);
449		}
450	}
451
452	return (NULL);
453}
454
455static int
456pci_printf(pcicfgregs *cfg, const char *fmt, ...)
457{
458	va_list ap;
459	int retval;
460
461	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
462	    cfg->func);
463	va_start(ap, fmt);
464	retval += vprintf(fmt, ap);
465	va_end(ap);
466	return (retval);
467}
468
469/* return base address of memory or port map */
470
471static pci_addr_t
472pci_mapbase(uint64_t mapreg)
473{
474
475	if (PCI_BAR_MEM(mapreg))
476		return (mapreg & PCIM_BAR_MEM_BASE);
477	else
478		return (mapreg & PCIM_BAR_IO_BASE);
479}
480
481/* return map type of memory or port map */
482
483static const char *
484pci_maptype(uint64_t mapreg)
485{
486
487	if (PCI_BAR_IO(mapreg))
488		return ("I/O Port");
489	if (mapreg & PCIM_BAR_MEM_PREFETCH)
490		return ("Prefetchable Memory");
491	return ("Memory");
492}
493
494/* return log2 of map size decoded for memory or port map */
495
496static int
497pci_mapsize(uint64_t testval)
498{
499	int ln2size;
500
501	testval = pci_mapbase(testval);
502	ln2size = 0;
503	if (testval != 0) {
504		while ((testval & 1) == 0)
505		{
506			ln2size++;
507			testval >>= 1;
508		}
509	}
510	return (ln2size);
511}
512
513/* return base address of device ROM */
514
515static pci_addr_t
516pci_rombase(uint64_t mapreg)
517{
518
519	return (mapreg & PCIM_BIOS_ADDR_MASK);
520}
521
522/* return log2 of map size decided for device ROM */
523
524static int
525pci_romsize(uint64_t testval)
526{
527	int ln2size;
528
529	testval = pci_rombase(testval);
530	ln2size = 0;
531	if (testval != 0) {
532		while ((testval & 1) == 0)
533		{
534			ln2size++;
535			testval >>= 1;
536		}
537	}
538	return (ln2size);
539}
540
541/* return log2 of address range supported by map register */
542
543static int
544pci_maprange(uint64_t mapreg)
545{
546	int ln2range = 0;
547
548	if (PCI_BAR_IO(mapreg))
549		ln2range = 32;
550	else
551		switch (mapreg & PCIM_BAR_MEM_TYPE) {
552		case PCIM_BAR_MEM_32:
553			ln2range = 32;
554			break;
555		case PCIM_BAR_MEM_1MB:
556			ln2range = 20;
557			break;
558		case PCIM_BAR_MEM_64:
559			ln2range = 64;
560			break;
561		}
562	return (ln2range);
563}
564
565/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
566
567static void
568pci_fixancient(pcicfgregs *cfg)
569{
570	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
571		return;
572
573	/* PCI to PCI bridges use header type 1 */
574	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
575		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
576}
577
578/* extract header type specific config data */
579
580static void
581pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
582{
583#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
584	switch (cfg->hdrtype & PCIM_HDRTYPE) {
585	case PCIM_HDRTYPE_NORMAL:
586		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
587		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
588		cfg->nummaps	    = PCI_MAXMAPS_0;
589		break;
590	case PCIM_HDRTYPE_BRIDGE:
591		cfg->nummaps	    = PCI_MAXMAPS_1;
592		break;
593	case PCIM_HDRTYPE_CARDBUS:
594		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
595		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
596		cfg->nummaps	    = PCI_MAXMAPS_2;
597		break;
598	}
599#undef REG
600}
601
602/* read configuration header into pcicfgregs structure */
603struct pci_devinfo *
604pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
605{
606#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
607	pcicfgregs *cfg = NULL;
608	struct pci_devinfo *devlist_entry;
609	struct devlist *devlist_head;
610
611	devlist_head = &pci_devq;
612
613	devlist_entry = NULL;
614
615	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
616		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
617		if (devlist_entry == NULL)
618			return (NULL);
619
620		cfg = &devlist_entry->cfg;
621
622		cfg->domain		= d;
623		cfg->bus		= b;
624		cfg->slot		= s;
625		cfg->func		= f;
626		cfg->vendor		= REG(PCIR_VENDOR, 2);
627		cfg->device		= REG(PCIR_DEVICE, 2);
628		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
629		cfg->statreg		= REG(PCIR_STATUS, 2);
630		cfg->baseclass		= REG(PCIR_CLASS, 1);
631		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
632		cfg->progif		= REG(PCIR_PROGIF, 1);
633		cfg->revid		= REG(PCIR_REVID, 1);
634		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
635		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
636		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
637		cfg->intpin		= REG(PCIR_INTPIN, 1);
638		cfg->intline		= REG(PCIR_INTLINE, 1);
639
640		cfg->mingnt		= REG(PCIR_MINGNT, 1);
641		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
642
643		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
644		cfg->hdrtype		&= ~PCIM_MFDEV;
645		STAILQ_INIT(&cfg->maps);
646
647		pci_fixancient(cfg);
648		pci_hdrtypedata(pcib, b, s, f, cfg);
649
650		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
651			pci_read_cap(pcib, cfg);
652
653		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
654
655		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
656		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
657		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
658		devlist_entry->conf.pc_sel.pc_func = cfg->func;
659		devlist_entry->conf.pc_hdr = cfg->hdrtype;
660
661		devlist_entry->conf.pc_subvendor = cfg->subvendor;
662		devlist_entry->conf.pc_subdevice = cfg->subdevice;
663		devlist_entry->conf.pc_vendor = cfg->vendor;
664		devlist_entry->conf.pc_device = cfg->device;
665
666		devlist_entry->conf.pc_class = cfg->baseclass;
667		devlist_entry->conf.pc_subclass = cfg->subclass;
668		devlist_entry->conf.pc_progif = cfg->progif;
669		devlist_entry->conf.pc_revid = cfg->revid;
670
671		pci_numdevs++;
672		pci_generation++;
673	}
674	return (devlist_entry);
675#undef REG
676}
677
678static void
679pci_read_cap(device_t pcib, pcicfgregs *cfg)
680{
681#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
682#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
683#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
684	uint64_t addr;
685#endif
686	uint32_t val;
687	int	ptr, nextptr, ptrptr;
688
689	switch (cfg->hdrtype & PCIM_HDRTYPE) {
690	case PCIM_HDRTYPE_NORMAL:
691	case PCIM_HDRTYPE_BRIDGE:
692		ptrptr = PCIR_CAP_PTR;
693		break;
694	case PCIM_HDRTYPE_CARDBUS:
695		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
696		break;
697	default:
698		return;		/* no extended capabilities support */
699	}
700	nextptr = REG(ptrptr, 1);	/* sanity check? */
701
702	/*
703	 * Read capability entries.
704	 */
705	while (nextptr != 0) {
706		/* Sanity check */
707		if (nextptr > 255) {
708			printf("illegal PCI extended capability offset %d\n",
709			    nextptr);
710			return;
711		}
712		/* Find the next entry */
713		ptr = nextptr;
714		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
715
716		/* Process this entry */
717		switch (REG(ptr + PCICAP_ID, 1)) {
718		case PCIY_PMG:		/* PCI power management */
719			if (cfg->pp.pp_cap == 0) {
720				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
721				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
722				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
723				if ((nextptr - ptr) > PCIR_POWER_DATA)
724					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
725			}
726			break;
727		case PCIY_HT:		/* HyperTransport */
728			/* Determine HT-specific capability type. */
729			val = REG(ptr + PCIR_HT_COMMAND, 2);
730
731			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
732				cfg->ht.ht_slave = ptr;
733
734#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
735			switch (val & PCIM_HTCMD_CAP_MASK) {
736			case PCIM_HTCAP_MSI_MAPPING:
737				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
738					/* Sanity check the mapping window. */
739					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
740					    4);
741					addr <<= 32;
742					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
743					    4);
744					if (addr != MSI_INTEL_ADDR_BASE)
745						device_printf(pcib,
746	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
747						    cfg->domain, cfg->bus,
748						    cfg->slot, cfg->func,
749						    (long long)addr);
750				} else
751					addr = MSI_INTEL_ADDR_BASE;
752
753				cfg->ht.ht_msimap = ptr;
754				cfg->ht.ht_msictrl = val;
755				cfg->ht.ht_msiaddr = addr;
756				break;
757			}
758#endif
759			break;
760		case PCIY_MSI:		/* PCI MSI */
761			cfg->msi.msi_location = ptr;
762			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
763			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
764						     PCIM_MSICTRL_MMC_MASK)>>1);
765			break;
766		case PCIY_MSIX:		/* PCI MSI-X */
767			cfg->msix.msix_location = ptr;
768			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
769			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
770			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
771			val = REG(ptr + PCIR_MSIX_TABLE, 4);
772			cfg->msix.msix_table_bar = PCIR_BAR(val &
773			    PCIM_MSIX_BIR_MASK);
774			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
775			val = REG(ptr + PCIR_MSIX_PBA, 4);
776			cfg->msix.msix_pba_bar = PCIR_BAR(val &
777			    PCIM_MSIX_BIR_MASK);
778			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
779			break;
780		case PCIY_VPD:		/* PCI Vital Product Data */
781			cfg->vpd.vpd_reg = ptr;
782			break;
783		case PCIY_SUBVENDOR:
784			/* Should always be true. */
785			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
786			    PCIM_HDRTYPE_BRIDGE) {
787				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
788				cfg->subvendor = val & 0xffff;
789				cfg->subdevice = val >> 16;
790			}
791			break;
792		case PCIY_PCIX:		/* PCI-X */
793			/*
794			 * Assume we have a PCI-X chipset if we have
795			 * at least one PCI-PCI bridge with a PCI-X
796			 * capability.  Note that some systems with
797			 * PCI-express or HT chipsets might match on
798			 * this check as well.
799			 */
800			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
801			    PCIM_HDRTYPE_BRIDGE)
802				pcix_chipset = 1;
803			cfg->pcix.pcix_location = ptr;
804			break;
805		case PCIY_EXPRESS:	/* PCI-express */
806			/*
807			 * Assume we have a PCI-express chipset if we have
808			 * at least one PCI-express device.
809			 */
810			pcie_chipset = 1;
811			cfg->pcie.pcie_location = ptr;
812			val = REG(ptr + PCIER_FLAGS, 2);
813			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
814			break;
815		default:
816			break;
817		}
818	}
819
820#if defined(__powerpc__)
821	/*
822	 * Enable the MSI mapping window for all HyperTransport
823	 * slaves.  PCI-PCI bridges have their windows enabled via
824	 * PCIB_MAP_MSI().
825	 */
826	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
827	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
828		device_printf(pcib,
829	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
830		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
831		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
832		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
833		     2);
834	}
835#endif
836/* REG and WREG use carry through to next functions */
837}
838
839/*
840 * PCI Vital Product Data
841 */
842
843#define	PCI_VPD_TIMEOUT		1000000
844
845static int
846pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
847{
848	int count = PCI_VPD_TIMEOUT;
849
850	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
851
852	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
853
854	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
855		if (--count < 0)
856			return (ENXIO);
857		DELAY(1);	/* limit looping */
858	}
859	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
860
861	return (0);
862}
863
864#if 0
865static int
866pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
867{
868	int count = PCI_VPD_TIMEOUT;
869
870	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
871
872	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
873	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
874	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
875		if (--count < 0)
876			return (ENXIO);
877		DELAY(1);	/* limit looping */
878	}
879
880	return (0);
881}
882#endif
883
884#undef PCI_VPD_TIMEOUT
885
886struct vpd_readstate {
887	device_t	pcib;
888	pcicfgregs	*cfg;
889	uint32_t	val;
890	int		bytesinval;
891	int		off;
892	uint8_t		cksum;
893};
894
895static int
896vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
897{
898	uint32_t reg;
899	uint8_t byte;
900
901	if (vrs->bytesinval == 0) {
902		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
903			return (ENXIO);
904		vrs->val = le32toh(reg);
905		vrs->off += 4;
906		byte = vrs->val & 0xff;
907		vrs->bytesinval = 3;
908	} else {
909		vrs->val = vrs->val >> 8;
910		byte = vrs->val & 0xff;
911		vrs->bytesinval--;
912	}
913
914	vrs->cksum += byte;
915	*data = byte;
916	return (0);
917}
918
919static void
920pci_read_vpd(device_t pcib, pcicfgregs *cfg)
921{
922	struct vpd_readstate vrs;
923	int state;
924	int name;
925	int remain;
926	int i;
927	int alloc, off;		/* alloc/off for RO/W arrays */
928	int cksumvalid;
929	int dflen;
930	uint8_t byte;
931	uint8_t byte2;
932
933	/* init vpd reader */
934	vrs.bytesinval = 0;
935	vrs.off = 0;
936	vrs.pcib = pcib;
937	vrs.cfg = cfg;
938	vrs.cksum = 0;
939
940	state = 0;
941	name = remain = i = 0;	/* shut up stupid gcc */
942	alloc = off = 0;	/* shut up stupid gcc */
943	dflen = 0;		/* shut up stupid gcc */
944	cksumvalid = -1;
945	while (state >= 0) {
946		if (vpd_nextbyte(&vrs, &byte)) {
947			state = -2;
948			break;
949		}
950#if 0
951		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
952		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
953		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
954#endif
955		switch (state) {
956		case 0:		/* item name */
957			if (byte & 0x80) {
958				if (vpd_nextbyte(&vrs, &byte2)) {
959					state = -2;
960					break;
961				}
962				remain = byte2;
963				if (vpd_nextbyte(&vrs, &byte2)) {
964					state = -2;
965					break;
966				}
967				remain |= byte2 << 8;
968				if (remain > (0x7f*4 - vrs.off)) {
969					state = -1;
970					pci_printf(cfg,
971					    "invalid VPD data, remain %#x\n",
972					    remain);
973				}
974				name = byte & 0x7f;
975			} else {
976				remain = byte & 0x7;
977				name = (byte >> 3) & 0xf;
978			}
979			switch (name) {
980			case 0x2:	/* String */
981				cfg->vpd.vpd_ident = malloc(remain + 1,
982				    M_DEVBUF, M_WAITOK);
983				i = 0;
984				state = 1;
985				break;
986			case 0xf:	/* End */
987				state = -1;
988				break;
989			case 0x10:	/* VPD-R */
990				alloc = 8;
991				off = 0;
992				cfg->vpd.vpd_ros = malloc(alloc *
993				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
994				    M_WAITOK | M_ZERO);
995				state = 2;
996				break;
997			case 0x11:	/* VPD-W */
998				alloc = 8;
999				off = 0;
1000				cfg->vpd.vpd_w = malloc(alloc *
1001				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1002				    M_WAITOK | M_ZERO);
1003				state = 5;
1004				break;
1005			default:	/* Invalid data, abort */
1006				state = -1;
1007				break;
1008			}
1009			break;
1010
1011		case 1:	/* Identifier String */
1012			cfg->vpd.vpd_ident[i++] = byte;
1013			remain--;
1014			if (remain == 0)  {
1015				cfg->vpd.vpd_ident[i] = '\0';
1016				state = 0;
1017			}
1018			break;
1019
1020		case 2:	/* VPD-R Keyword Header */
1021			if (off == alloc) {
1022				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1023				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1024				    M_DEVBUF, M_WAITOK | M_ZERO);
1025			}
1026			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1027			if (vpd_nextbyte(&vrs, &byte2)) {
1028				state = -2;
1029				break;
1030			}
1031			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1032			if (vpd_nextbyte(&vrs, &byte2)) {
1033				state = -2;
1034				break;
1035			}
1036			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1037			if (dflen == 0 &&
1038			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1039			    2) == 0) {
1040				/*
1041				 * if this happens, we can't trust the rest
1042				 * of the VPD.
1043				 */
1044				pci_printf(cfg, "bad keyword length: %d\n",
1045				    dflen);
1046				cksumvalid = 0;
1047				state = -1;
1048				break;
1049			} else if (dflen == 0) {
1050				cfg->vpd.vpd_ros[off].value = malloc(1 *
1051				    sizeof(*cfg->vpd.vpd_ros[off].value),
1052				    M_DEVBUF, M_WAITOK);
1053				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1054			} else
1055				cfg->vpd.vpd_ros[off].value = malloc(
1056				    (dflen + 1) *
1057				    sizeof(*cfg->vpd.vpd_ros[off].value),
1058				    M_DEVBUF, M_WAITOK);
1059			remain -= 3;
1060			i = 0;
1061			/* keep in sync w/ state 3's transistions */
1062			if (dflen == 0 && remain == 0)
1063				state = 0;
1064			else if (dflen == 0)
1065				state = 2;
1066			else
1067				state = 3;
1068			break;
1069
1070		case 3:	/* VPD-R Keyword Value */
1071			cfg->vpd.vpd_ros[off].value[i++] = byte;
1072			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1073			    "RV", 2) == 0 && cksumvalid == -1) {
1074				if (vrs.cksum == 0)
1075					cksumvalid = 1;
1076				else {
1077					if (bootverbose)
1078						pci_printf(cfg,
1079					    "bad VPD cksum, remain %hhu\n",
1080						    vrs.cksum);
1081					cksumvalid = 0;
1082					state = -1;
1083					break;
1084				}
1085			}
1086			dflen--;
1087			remain--;
1088			/* keep in sync w/ state 2's transistions */
1089			if (dflen == 0)
1090				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1091			if (dflen == 0 && remain == 0) {
1092				cfg->vpd.vpd_rocnt = off;
1093				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1094				    off * sizeof(*cfg->vpd.vpd_ros),
1095				    M_DEVBUF, M_WAITOK | M_ZERO);
1096				state = 0;
1097			} else if (dflen == 0)
1098				state = 2;
1099			break;
1100
1101		case 4:
1102			remain--;
1103			if (remain == 0)
1104				state = 0;
1105			break;
1106
1107		case 5:	/* VPD-W Keyword Header */
1108			if (off == alloc) {
1109				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1110				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1111				    M_DEVBUF, M_WAITOK | M_ZERO);
1112			}
1113			cfg->vpd.vpd_w[off].keyword[0] = byte;
1114			if (vpd_nextbyte(&vrs, &byte2)) {
1115				state = -2;
1116				break;
1117			}
1118			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1119			if (vpd_nextbyte(&vrs, &byte2)) {
1120				state = -2;
1121				break;
1122			}
1123			cfg->vpd.vpd_w[off].len = dflen = byte2;
1124			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1125			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1126			    sizeof(*cfg->vpd.vpd_w[off].value),
1127			    M_DEVBUF, M_WAITOK);
1128			remain -= 3;
1129			i = 0;
1130			/* keep in sync w/ state 6's transistions */
1131			if (dflen == 0 && remain == 0)
1132				state = 0;
1133			else if (dflen == 0)
1134				state = 5;
1135			else
1136				state = 6;
1137			break;
1138
1139		case 6:	/* VPD-W Keyword Value */
1140			cfg->vpd.vpd_w[off].value[i++] = byte;
1141			dflen--;
1142			remain--;
1143			/* keep in sync w/ state 5's transistions */
1144			if (dflen == 0)
1145				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1146			if (dflen == 0 && remain == 0) {
1147				cfg->vpd.vpd_wcnt = off;
1148				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1149				    off * sizeof(*cfg->vpd.vpd_w),
1150				    M_DEVBUF, M_WAITOK | M_ZERO);
1151				state = 0;
1152			} else if (dflen == 0)
1153				state = 5;
1154			break;
1155
1156		default:
1157			pci_printf(cfg, "invalid state: %d\n", state);
1158			state = -1;
1159			break;
1160		}
1161	}
1162
1163	if (cksumvalid == 0 || state < -1) {
1164		/* read-only data bad, clean up */
1165		if (cfg->vpd.vpd_ros != NULL) {
1166			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1167				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1168			free(cfg->vpd.vpd_ros, M_DEVBUF);
1169			cfg->vpd.vpd_ros = NULL;
1170		}
1171	}
1172	if (state < -1) {
1173		/* I/O error, clean up */
1174		pci_printf(cfg, "failed to read VPD data.\n");
1175		if (cfg->vpd.vpd_ident != NULL) {
1176			free(cfg->vpd.vpd_ident, M_DEVBUF);
1177			cfg->vpd.vpd_ident = NULL;
1178		}
1179		if (cfg->vpd.vpd_w != NULL) {
1180			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1181				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1182			free(cfg->vpd.vpd_w, M_DEVBUF);
1183			cfg->vpd.vpd_w = NULL;
1184		}
1185	}
1186	cfg->vpd.vpd_cached = 1;
1187#undef REG
1188#undef WREG
1189}
1190
1191int
1192pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1193{
1194	struct pci_devinfo *dinfo = device_get_ivars(child);
1195	pcicfgregs *cfg = &dinfo->cfg;
1196
1197	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1198		pci_read_vpd(device_get_parent(dev), cfg);
1199
1200	*identptr = cfg->vpd.vpd_ident;
1201
1202	if (*identptr == NULL)
1203		return (ENXIO);
1204
1205	return (0);
1206}
1207
1208int
1209pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1210	const char **vptr)
1211{
1212	struct pci_devinfo *dinfo = device_get_ivars(child);
1213	pcicfgregs *cfg = &dinfo->cfg;
1214	int i;
1215
1216	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1217		pci_read_vpd(device_get_parent(dev), cfg);
1218
1219	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1220		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1221		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1222			*vptr = cfg->vpd.vpd_ros[i].value;
1223			return (0);
1224		}
1225
1226	*vptr = NULL;
1227	return (ENXIO);
1228}
1229
1230struct pcicfg_vpd *
1231pci_fetch_vpd_list(device_t dev)
1232{
1233	struct pci_devinfo *dinfo = device_get_ivars(dev);
1234	pcicfgregs *cfg = &dinfo->cfg;
1235
1236	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1237		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1238	return (&cfg->vpd);
1239}
1240
1241/*
1242 * Find the requested HyperTransport capability and return the offset
1243 * in configuration space via the pointer provided.  The function
1244 * returns 0 on success and an error code otherwise.
1245 */
1246int
1247pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1248{
1249	int ptr, error;
1250	uint16_t val;
1251
1252	error = pci_find_cap(child, PCIY_HT, &ptr);
1253	if (error)
1254		return (error);
1255
1256	/*
1257	 * Traverse the capabilities list checking each HT capability
1258	 * to see if it matches the requested HT capability.
1259	 */
1260	while (ptr != 0) {
1261		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1262		if (capability == PCIM_HTCAP_SLAVE ||
1263		    capability == PCIM_HTCAP_HOST)
1264			val &= 0xe000;
1265		else
1266			val &= PCIM_HTCMD_CAP_MASK;
1267		if (val == capability) {
1268			if (capreg != NULL)
1269				*capreg = ptr;
1270			return (0);
1271		}
1272
1273		/* Skip to the next HT capability. */
1274		while (ptr != 0) {
1275			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1276			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1277			    PCIY_HT)
1278				break;
1279		}
1280	}
1281	return (ENOENT);
1282}
1283
1284/*
1285 * Find the requested capability and return the offset in
1286 * configuration space via the pointer provided.  The function returns
1287 * 0 on success and an error code otherwise.
1288 */
1289int
1290pci_find_cap_method(device_t dev, device_t child, int capability,
1291    int *capreg)
1292{
1293	struct pci_devinfo *dinfo = device_get_ivars(child);
1294	pcicfgregs *cfg = &dinfo->cfg;
1295	u_int32_t status;
1296	u_int8_t ptr;
1297
1298	/*
1299	 * Check the CAP_LIST bit of the PCI status register first.
1300	 */
1301	status = pci_read_config(child, PCIR_STATUS, 2);
1302	if (!(status & PCIM_STATUS_CAPPRESENT))
1303		return (ENXIO);
1304
1305	/*
1306	 * Determine the start pointer of the capabilities list.
1307	 */
1308	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1309	case PCIM_HDRTYPE_NORMAL:
1310	case PCIM_HDRTYPE_BRIDGE:
1311		ptr = PCIR_CAP_PTR;
1312		break;
1313	case PCIM_HDRTYPE_CARDBUS:
1314		ptr = PCIR_CAP_PTR_2;
1315		break;
1316	default:
1317		/* XXX: panic? */
1318		return (ENXIO);		/* no extended capabilities support */
1319	}
1320	ptr = pci_read_config(child, ptr, 1);
1321
1322	/*
1323	 * Traverse the capabilities list.
1324	 */
1325	while (ptr != 0) {
1326		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1327			if (capreg != NULL)
1328				*capreg = ptr;
1329			return (0);
1330		}
1331		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1332	}
1333
1334	return (ENOENT);
1335}
1336
1337/*
1338 * Find the requested extended capability and return the offset in
1339 * configuration space via the pointer provided.  The function returns
1340 * 0 on success and an error code otherwise.
1341 */
1342int
1343pci_find_extcap_method(device_t dev, device_t child, int capability,
1344    int *capreg)
1345{
1346	struct pci_devinfo *dinfo = device_get_ivars(child);
1347	pcicfgregs *cfg = &dinfo->cfg;
1348	uint32_t ecap;
1349	uint16_t ptr;
1350
1351	/* Only supported for PCI-express devices. */
1352	if (cfg->pcie.pcie_location == 0)
1353		return (ENXIO);
1354
1355	ptr = PCIR_EXTCAP;
1356	ecap = pci_read_config(child, ptr, 4);
1357	if (ecap == 0xffffffff || ecap == 0)
1358		return (ENOENT);
1359	for (;;) {
1360		if (PCI_EXTCAP_ID(ecap) == capability) {
1361			if (capreg != NULL)
1362				*capreg = ptr;
1363			return (0);
1364		}
1365		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1366		if (ptr == 0)
1367			break;
1368		ecap = pci_read_config(child, ptr, 4);
1369	}
1370
1371	return (ENOENT);
1372}
1373
1374/*
1375 * Support for MSI-X message interrupts.
1376 */
1377void
1378pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1379{
1380	struct pci_devinfo *dinfo = device_get_ivars(dev);
1381	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1382	uint32_t offset;
1383
1384	KASSERT(msix->msix_table_len > index, ("bogus index"));
1385	offset = msix->msix_table_offset + index * 16;
1386	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1387	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1388	bus_write_4(msix->msix_table_res, offset + 8, data);
1389
1390	/* Enable MSI -> HT mapping. */
1391	pci_ht_map_msi(dev, address);
1392}
1393
1394void
1395pci_mask_msix(device_t dev, u_int index)
1396{
1397	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399	uint32_t offset, val;
1400
1401	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1402	offset = msix->msix_table_offset + index * 16 + 12;
1403	val = bus_read_4(msix->msix_table_res, offset);
1404	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1405		val |= PCIM_MSIX_VCTRL_MASK;
1406		bus_write_4(msix->msix_table_res, offset, val);
1407	}
1408}
1409
1410void
1411pci_unmask_msix(device_t dev, u_int index)
1412{
1413	struct pci_devinfo *dinfo = device_get_ivars(dev);
1414	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1415	uint32_t offset, val;
1416
1417	KASSERT(msix->msix_table_len > index, ("bogus index"));
1418	offset = msix->msix_table_offset + index * 16 + 12;
1419	val = bus_read_4(msix->msix_table_res, offset);
1420	if (val & PCIM_MSIX_VCTRL_MASK) {
1421		val &= ~PCIM_MSIX_VCTRL_MASK;
1422		bus_write_4(msix->msix_table_res, offset, val);
1423	}
1424}
1425
1426int
1427pci_pending_msix(device_t dev, u_int index)
1428{
1429	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431	uint32_t offset, bit;
1432
1433	KASSERT(msix->msix_table_len > index, ("bogus index"));
1434	offset = msix->msix_pba_offset + (index / 32) * 4;
1435	bit = 1 << index % 32;
1436	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1437}
1438
1439/*
1440 * Restore MSI-X registers and table during resume.  If MSI-X is
1441 * enabled then walk the virtual table to restore the actual MSI-X
1442 * table.
1443 */
1444static void
1445pci_resume_msix(device_t dev)
1446{
1447	struct pci_devinfo *dinfo = device_get_ivars(dev);
1448	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1449	struct msix_table_entry *mte;
1450	struct msix_vector *mv;
1451	int i;
1452
1453	if (msix->msix_alloc > 0) {
1454		/* First, mask all vectors. */
1455		for (i = 0; i < msix->msix_msgnum; i++)
1456			pci_mask_msix(dev, i);
1457
1458		/* Second, program any messages with at least one handler. */
1459		for (i = 0; i < msix->msix_table_len; i++) {
1460			mte = &msix->msix_table[i];
1461			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1462				continue;
1463			mv = &msix->msix_vectors[mte->mte_vector - 1];
1464			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1465			pci_unmask_msix(dev, i);
1466		}
1467	}
1468	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1469	    msix->msix_ctrl, 2);
1470}
1471
1472/*
1473 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1474 * returned in *count.  After this function returns, each message will be
1475 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1476 */
1477int
1478pci_alloc_msix_method(device_t dev, device_t child, int *count)
1479{
1480	struct pci_devinfo *dinfo = device_get_ivars(child);
1481	pcicfgregs *cfg = &dinfo->cfg;
1482	struct resource_list_entry *rle;
1483	int actual, error, i, irq, max;
1484
1485	/* Don't let count == 0 get us into trouble. */
1486	if (*count == 0)
1487		return (EINVAL);
1488
1489	/* If rid 0 is allocated, then fail. */
1490	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1491	if (rle != NULL && rle->res != NULL)
1492		return (ENXIO);
1493
1494	/* Already have allocated messages? */
1495	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1496		return (ENXIO);
1497
1498	/* If MSI-X is blacklisted for this system, fail. */
1499	if (pci_msix_blacklisted())
1500		return (ENXIO);
1501
1502	/* MSI-X capability present? */
1503	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1504		return (ENODEV);
1505
1506	/* Make sure the appropriate BARs are mapped. */
1507	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1508	    cfg->msix.msix_table_bar);
1509	if (rle == NULL || rle->res == NULL ||
1510	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1511		return (ENXIO);
1512	cfg->msix.msix_table_res = rle->res;
1513	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1514		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1515		    cfg->msix.msix_pba_bar);
1516		if (rle == NULL || rle->res == NULL ||
1517		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1518			return (ENXIO);
1519	}
1520	cfg->msix.msix_pba_res = rle->res;
1521
1522	if (bootverbose)
1523		device_printf(child,
1524		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1525		    *count, cfg->msix.msix_msgnum);
1526	max = min(*count, cfg->msix.msix_msgnum);
1527	for (i = 0; i < max; i++) {
1528		/* Allocate a message. */
1529		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1530		if (error) {
1531			if (i == 0)
1532				return (error);
1533			break;
1534		}
1535		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1536		    irq, 1);
1537	}
1538	actual = i;
1539
1540	if (bootverbose) {
1541		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1542		if (actual == 1)
1543			device_printf(child, "using IRQ %lu for MSI-X\n",
1544			    rle->start);
1545		else {
1546			int run;
1547
1548			/*
1549			 * Be fancy and try to print contiguous runs of
1550			 * IRQ values as ranges.  'irq' is the previous IRQ.
1551			 * 'run' is true if we are in a range.
1552			 */
1553			device_printf(child, "using IRQs %lu", rle->start);
1554			irq = rle->start;
1555			run = 0;
1556			for (i = 1; i < actual; i++) {
1557				rle = resource_list_find(&dinfo->resources,
1558				    SYS_RES_IRQ, i + 1);
1559
1560				/* Still in a run? */
1561				if (rle->start == irq + 1) {
1562					run = 1;
1563					irq++;
1564					continue;
1565				}
1566
1567				/* Finish previous range. */
1568				if (run) {
1569					printf("-%d", irq);
1570					run = 0;
1571				}
1572
1573				/* Start new range. */
1574				printf(",%lu", rle->start);
1575				irq = rle->start;
1576			}
1577
1578			/* Unfinished range? */
1579			if (run)
1580				printf("-%d", irq);
1581			printf(" for MSI-X\n");
1582		}
1583	}
1584
1585	/* Mask all vectors. */
1586	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1587		pci_mask_msix(child, i);
1588
1589	/* Allocate and initialize vector data and virtual table. */
1590	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1591	    M_DEVBUF, M_WAITOK | M_ZERO);
1592	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1593	    M_DEVBUF, M_WAITOK | M_ZERO);
1594	for (i = 0; i < actual; i++) {
1595		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1597		cfg->msix.msix_table[i].mte_vector = i + 1;
1598	}
1599
1600	/* Update control register to enable MSI-X. */
1601	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1602	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1603	    cfg->msix.msix_ctrl, 2);
1604
1605	/* Update counts of alloc'd messages. */
1606	cfg->msix.msix_alloc = actual;
1607	cfg->msix.msix_table_len = actual;
1608	*count = actual;
1609	return (0);
1610}
1611
1612/*
1613 * By default, pci_alloc_msix() will assign the allocated IRQ
1614 * resources consecutively to the first N messages in the MSI-X table.
1615 * However, device drivers may want to use different layouts if they
1616 * either receive fewer messages than they asked for, or they wish to
1617 * populate the MSI-X table sparsely.  This method allows the driver
1618 * to specify what layout it wants.  It must be called after a
1619 * successful pci_alloc_msix() but before any of the associated
1620 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1621 *
1622 * The 'vectors' array contains 'count' message vectors.  The array
1623 * maps directly to the MSI-X table in that index 0 in the array
1624 * specifies the vector for the first message in the MSI-X table, etc.
1625 * The vector value in each array index can either be 0 to indicate
1626 * that no vector should be assigned to a message slot, or it can be a
1627 * number from 1 to N (where N is the count returned from a
1628 * succcessful call to pci_alloc_msix()) to indicate which message
1629 * vector (IRQ) to be used for the corresponding message.
1630 *
1631 * On successful return, each message with a non-zero vector will have
1632 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1633 * 1.  Additionally, if any of the IRQs allocated via the previous
1634 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1635 * will be freed back to the system automatically.
1636 *
1637 * For example, suppose a driver has a MSI-X table with 6 messages and
1638 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1639 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1640 * C.  After the call to pci_alloc_msix(), the device will be setup to
1641 * have an MSI-X table of ABC--- (where - means no vector assigned).
1642 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1643 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1644 * be freed back to the system.  This device will also have valid
1645 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1646 *
1647 * In any case, the SYS_RES_IRQ rid X will always map to the message
1648 * at MSI-X table index X - 1 and will only be valid if a vector is
1649 * assigned to that table entry.
1650 */
1651int
1652pci_remap_msix_method(device_t dev, device_t child, int count,
1653    const u_int *vectors)
1654{
1655	struct pci_devinfo *dinfo = device_get_ivars(child);
1656	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1657	struct resource_list_entry *rle;
1658	int i, irq, j, *used;
1659
1660	/*
1661	 * Have to have at least one message in the table but the
1662	 * table can't be bigger than the actual MSI-X table in the
1663	 * device.
1664	 */
1665	if (count == 0 || count > msix->msix_msgnum)
1666		return (EINVAL);
1667
1668	/* Sanity check the vectors. */
1669	for (i = 0; i < count; i++)
1670		if (vectors[i] > msix->msix_alloc)
1671			return (EINVAL);
1672
1673	/*
1674	 * Make sure there aren't any holes in the vectors to be used.
1675	 * It's a big pain to support it, and it doesn't really make
1676	 * sense anyway.  Also, at least one vector must be used.
1677	 */
1678	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1679	    M_ZERO);
1680	for (i = 0; i < count; i++)
1681		if (vectors[i] != 0)
1682			used[vectors[i] - 1] = 1;
1683	for (i = 0; i < msix->msix_alloc - 1; i++)
1684		if (used[i] == 0 && used[i + 1] == 1) {
1685			free(used, M_DEVBUF);
1686			return (EINVAL);
1687		}
1688	if (used[0] != 1) {
1689		free(used, M_DEVBUF);
1690		return (EINVAL);
1691	}
1692
1693	/* Make sure none of the resources are allocated. */
1694	for (i = 0; i < msix->msix_table_len; i++) {
1695		if (msix->msix_table[i].mte_vector == 0)
1696			continue;
1697		if (msix->msix_table[i].mte_handlers > 0)
1698			return (EBUSY);
1699		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1700		KASSERT(rle != NULL, ("missing resource"));
1701		if (rle->res != NULL)
1702			return (EBUSY);
1703	}
1704
1705	/* Free the existing resource list entries. */
1706	for (i = 0; i < msix->msix_table_len; i++) {
1707		if (msix->msix_table[i].mte_vector == 0)
1708			continue;
1709		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1710	}
1711
1712	/*
1713	 * Build the new virtual table keeping track of which vectors are
1714	 * used.
1715	 */
1716	free(msix->msix_table, M_DEVBUF);
1717	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1718	    M_DEVBUF, M_WAITOK | M_ZERO);
1719	for (i = 0; i < count; i++)
1720		msix->msix_table[i].mte_vector = vectors[i];
1721	msix->msix_table_len = count;
1722
1723	/* Free any unused IRQs and resize the vectors array if necessary. */
1724	j = msix->msix_alloc - 1;
1725	if (used[j] == 0) {
1726		struct msix_vector *vec;
1727
1728		while (used[j] == 0) {
1729			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1730			    msix->msix_vectors[j].mv_irq);
1731			j--;
1732		}
1733		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1734		    M_WAITOK);
1735		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1736		    (j + 1));
1737		free(msix->msix_vectors, M_DEVBUF);
1738		msix->msix_vectors = vec;
1739		msix->msix_alloc = j + 1;
1740	}
1741	free(used, M_DEVBUF);
1742
1743	/* Map the IRQs onto the rids. */
1744	for (i = 0; i < count; i++) {
1745		if (vectors[i] == 0)
1746			continue;
1747		irq = msix->msix_vectors[vectors[i]].mv_irq;
1748		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1749		    irq, 1);
1750	}
1751
1752	if (bootverbose) {
1753		device_printf(child, "Remapped MSI-X IRQs as: ");
1754		for (i = 0; i < count; i++) {
1755			if (i != 0)
1756				printf(", ");
1757			if (vectors[i] == 0)
1758				printf("---");
1759			else
1760				printf("%d",
1761				    msix->msix_vectors[vectors[i]].mv_irq);
1762		}
1763		printf("\n");
1764	}
1765
1766	return (0);
1767}
1768
1769static int
1770pci_release_msix(device_t dev, device_t child)
1771{
1772	struct pci_devinfo *dinfo = device_get_ivars(child);
1773	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1774	struct resource_list_entry *rle;
1775	int i;
1776
1777	/* Do we have any messages to release? */
1778	if (msix->msix_alloc == 0)
1779		return (ENODEV);
1780
1781	/* Make sure none of the resources are allocated. */
1782	for (i = 0; i < msix->msix_table_len; i++) {
1783		if (msix->msix_table[i].mte_vector == 0)
1784			continue;
1785		if (msix->msix_table[i].mte_handlers > 0)
1786			return (EBUSY);
1787		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1788		KASSERT(rle != NULL, ("missing resource"));
1789		if (rle->res != NULL)
1790			return (EBUSY);
1791	}
1792
1793	/* Update control register to disable MSI-X. */
1794	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1795	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1796	    msix->msix_ctrl, 2);
1797
1798	/* Free the resource list entries. */
1799	for (i = 0; i < msix->msix_table_len; i++) {
1800		if (msix->msix_table[i].mte_vector == 0)
1801			continue;
1802		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1803	}
1804	free(msix->msix_table, M_DEVBUF);
1805	msix->msix_table_len = 0;
1806
1807	/* Release the IRQs. */
1808	for (i = 0; i < msix->msix_alloc; i++)
1809		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1810		    msix->msix_vectors[i].mv_irq);
1811	free(msix->msix_vectors, M_DEVBUF);
1812	msix->msix_alloc = 0;
1813	return (0);
1814}
1815
1816/*
1817 * Return the max supported MSI-X messages this device supports.
1818 * Basically, assuming the MD code can alloc messages, this function
1819 * should return the maximum value that pci_alloc_msix() can return.
1820 * Thus, it is subject to the tunables, etc.
1821 */
1822int
1823pci_msix_count_method(device_t dev, device_t child)
1824{
1825	struct pci_devinfo *dinfo = device_get_ivars(child);
1826	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1827
1828	if (pci_do_msix && msix->msix_location != 0)
1829		return (msix->msix_msgnum);
1830	return (0);
1831}
1832
1833/*
1834 * HyperTransport MSI mapping control
1835 */
1836void
1837pci_ht_map_msi(device_t dev, uint64_t addr)
1838{
1839	struct pci_devinfo *dinfo = device_get_ivars(dev);
1840	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1841
1842	if (!ht->ht_msimap)
1843		return;
1844
1845	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1846	    ht->ht_msiaddr >> 20 == addr >> 20) {
1847		/* Enable MSI -> HT mapping. */
1848		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1849		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1850		    ht->ht_msictrl, 2);
1851	}
1852
1853	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1854		/* Disable MSI -> HT mapping. */
1855		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1856		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1857		    ht->ht_msictrl, 2);
1858	}
1859}
1860
1861int
1862pci_get_max_read_req(device_t dev)
1863{
1864	struct pci_devinfo *dinfo = device_get_ivars(dev);
1865	int cap;
1866	uint16_t val;
1867
1868	cap = dinfo->cfg.pcie.pcie_location;
1869	if (cap == 0)
1870		return (0);
1871	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1872	val &= PCIEM_CTL_MAX_READ_REQUEST;
1873	val >>= 12;
1874	return (1 << (val + 7));
1875}
1876
1877int
1878pci_set_max_read_req(device_t dev, int size)
1879{
1880	struct pci_devinfo *dinfo = device_get_ivars(dev);
1881	int cap;
1882	uint16_t val;
1883
1884	cap = dinfo->cfg.pcie.pcie_location;
1885	if (cap == 0)
1886		return (0);
1887	if (size < 128)
1888		size = 128;
1889	if (size > 4096)
1890		size = 4096;
1891	size = (1 << (fls(size) - 1));
1892	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1893	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1894	val |= (fls(size) - 8) << 12;
1895	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1896	return (size);
1897}
1898
1899/*
1900 * Support for MSI message signalled interrupts.
1901 */
1902void
1903pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1904{
1905	struct pci_devinfo *dinfo = device_get_ivars(dev);
1906	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1907
1908	/* Write data and address values. */
1909	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1910	    address & 0xffffffff, 4);
1911	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1912		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1913		    address >> 32, 4);
1914		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1915		    data, 2);
1916	} else
1917		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1918		    2);
1919
1920	/* Enable MSI in the control register. */
1921	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1922	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1923	    2);
1924
1925	/* Enable MSI -> HT mapping. */
1926	pci_ht_map_msi(dev, address);
1927}
1928
1929void
1930pci_disable_msi(device_t dev)
1931{
1932	struct pci_devinfo *dinfo = device_get_ivars(dev);
1933	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1934
1935	/* Disable MSI -> HT mapping. */
1936	pci_ht_map_msi(dev, 0);
1937
1938	/* Disable MSI in the control register. */
1939	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1940	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1941	    2);
1942}
1943
1944/*
1945 * Restore MSI registers during resume.  If MSI is enabled then
1946 * restore the data and address registers in addition to the control
1947 * register.
1948 */
1949static void
1950pci_resume_msi(device_t dev)
1951{
1952	struct pci_devinfo *dinfo = device_get_ivars(dev);
1953	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1954	uint64_t address;
1955	uint16_t data;
1956
1957	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1958		address = msi->msi_addr;
1959		data = msi->msi_data;
1960		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1961		    address & 0xffffffff, 4);
1962		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1963			pci_write_config(dev, msi->msi_location +
1964			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1965			pci_write_config(dev, msi->msi_location +
1966			    PCIR_MSI_DATA_64BIT, data, 2);
1967		} else
1968			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1969			    data, 2);
1970	}
1971	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1972	    2);
1973}
1974
1975static int
1976pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1977{
1978	struct pci_devinfo *dinfo = device_get_ivars(dev);
1979	pcicfgregs *cfg = &dinfo->cfg;
1980	struct resource_list_entry *rle;
1981	struct msix_table_entry *mte;
1982	struct msix_vector *mv;
1983	uint64_t addr;
1984	uint32_t data;
1985	int error, i, j;
1986
1987	/*
1988	 * Handle MSI first.  We try to find this IRQ among our list
1989	 * of MSI IRQs.  If we find it, we request updated address and
1990	 * data registers and apply the results.
1991	 */
1992	if (cfg->msi.msi_alloc > 0) {
1993
1994		/* If we don't have any active handlers, nothing to do. */
1995		if (cfg->msi.msi_handlers == 0)
1996			return (0);
1997		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1998			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1999			    i + 1);
2000			if (rle->start == irq) {
2001				error = PCIB_MAP_MSI(device_get_parent(bus),
2002				    dev, irq, &addr, &data);
2003				if (error)
2004					return (error);
2005				pci_disable_msi(dev);
2006				dinfo->cfg.msi.msi_addr = addr;
2007				dinfo->cfg.msi.msi_data = data;
2008				pci_enable_msi(dev, addr, data);
2009				return (0);
2010			}
2011		}
2012		return (ENOENT);
2013	}
2014
2015	/*
2016	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2017	 * we request the updated mapping info.  If that works, we go
2018	 * through all the slots that use this IRQ and update them.
2019	 */
2020	if (cfg->msix.msix_alloc > 0) {
2021		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2022			mv = &cfg->msix.msix_vectors[i];
2023			if (mv->mv_irq == irq) {
2024				error = PCIB_MAP_MSI(device_get_parent(bus),
2025				    dev, irq, &addr, &data);
2026				if (error)
2027					return (error);
2028				mv->mv_address = addr;
2029				mv->mv_data = data;
2030				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2031					mte = &cfg->msix.msix_table[j];
2032					if (mte->mte_vector != i + 1)
2033						continue;
2034					if (mte->mte_handlers == 0)
2035						continue;
2036					pci_mask_msix(dev, j);
2037					pci_enable_msix(dev, j, addr, data);
2038					pci_unmask_msix(dev, j);
2039				}
2040			}
2041		}
2042		return (ENOENT);
2043	}
2044
2045	return (ENOENT);
2046}
2047
2048/*
2049 * Returns true if the specified device is blacklisted because MSI
2050 * doesn't work.
2051 */
2052int
2053pci_msi_device_blacklisted(device_t dev)
2054{
2055
2056	if (!pci_honor_msi_blacklist)
2057		return (0);
2058
2059	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2060}
2061
2062/*
2063 * Determine if MSI is blacklisted globally on this system.  Currently,
2064 * we just check for blacklisted chipsets as represented by the
2065 * host-PCI bridge at device 0:0:0.  In the future, it may become
2066 * necessary to check other system attributes, such as the kenv values
2067 * that give the motherboard manufacturer and model number.
2068 */
2069static int
2070pci_msi_blacklisted(void)
2071{
2072	device_t dev;
2073
2074	if (!pci_honor_msi_blacklist)
2075		return (0);
2076
2077	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2078	if (!(pcie_chipset || pcix_chipset)) {
2079		if (vm_guest != VM_GUEST_NO) {
2080			/*
2081			 * Whitelist older chipsets in virtual
2082			 * machines known to support MSI.
2083			 */
2084			dev = pci_find_bsf(0, 0, 0);
2085			if (dev != NULL)
2086				return (!pci_has_quirk(pci_get_devid(dev),
2087					PCI_QUIRK_ENABLE_MSI_VM));
2088		}
2089		return (1);
2090	}
2091
2092	dev = pci_find_bsf(0, 0, 0);
2093	if (dev != NULL)
2094		return (pci_msi_device_blacklisted(dev));
2095	return (0);
2096}
2097
2098/*
2099 * Returns true if the specified device is blacklisted because MSI-X
2100 * doesn't work.  Note that this assumes that if MSI doesn't work,
2101 * MSI-X doesn't either.
2102 */
2103int
2104pci_msix_device_blacklisted(device_t dev)
2105{
2106
2107	if (!pci_honor_msi_blacklist)
2108		return (0);
2109
2110	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2111		return (1);
2112
2113	return (pci_msi_device_blacklisted(dev));
2114}
2115
2116/*
2117 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2118 * is blacklisted, assume that MSI-X is as well.  Check for additional
2119 * chipsets where MSI works but MSI-X does not.
2120 */
2121static int
2122pci_msix_blacklisted(void)
2123{
2124	device_t dev;
2125
2126	if (!pci_honor_msi_blacklist)
2127		return (0);
2128
2129	dev = pci_find_bsf(0, 0, 0);
2130	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2131	    PCI_QUIRK_DISABLE_MSIX))
2132		return (1);
2133
2134	return (pci_msi_blacklisted());
2135}
2136
2137/*
2138 * Attempt to allocate *count MSI messages.  The actual number allocated is
2139 * returned in *count.  After this function returns, each message will be
2140 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2141 */
2142int
2143pci_alloc_msi_method(device_t dev, device_t child, int *count)
2144{
2145	struct pci_devinfo *dinfo = device_get_ivars(child);
2146	pcicfgregs *cfg = &dinfo->cfg;
2147	struct resource_list_entry *rle;
2148	int actual, error, i, irqs[32];
2149	uint16_t ctrl;
2150
2151	/* Don't let count == 0 get us into trouble. */
2152	if (*count == 0)
2153		return (EINVAL);
2154
2155	/* If rid 0 is allocated, then fail. */
2156	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2157	if (rle != NULL && rle->res != NULL)
2158		return (ENXIO);
2159
2160	/* Already have allocated messages? */
2161	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2162		return (ENXIO);
2163
2164	/* If MSI is blacklisted for this system, fail. */
2165	if (pci_msi_blacklisted())
2166		return (ENXIO);
2167
2168	/* MSI capability present? */
2169	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2170		return (ENODEV);
2171
2172	if (bootverbose)
2173		device_printf(child,
2174		    "attempting to allocate %d MSI vectors (%d supported)\n",
2175		    *count, cfg->msi.msi_msgnum);
2176
2177	/* Don't ask for more than the device supports. */
2178	actual = min(*count, cfg->msi.msi_msgnum);
2179
2180	/* Don't ask for more than 32 messages. */
2181	actual = min(actual, 32);
2182
2183	/* MSI requires power of 2 number of messages. */
2184	if (!powerof2(actual))
2185		return (EINVAL);
2186
2187	for (;;) {
2188		/* Try to allocate N messages. */
2189		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2190		    actual, irqs);
2191		if (error == 0)
2192			break;
2193		if (actual == 1)
2194			return (error);
2195
2196		/* Try N / 2. */
2197		actual >>= 1;
2198	}
2199
2200	/*
2201	 * We now have N actual messages mapped onto SYS_RES_IRQ
2202	 * resources in the irqs[] array, so add new resources
2203	 * starting at rid 1.
2204	 */
2205	for (i = 0; i < actual; i++)
2206		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2207		    irqs[i], irqs[i], 1);
2208
2209	if (bootverbose) {
2210		if (actual == 1)
2211			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2212		else {
2213			int run;
2214
2215			/*
2216			 * Be fancy and try to print contiguous runs
2217			 * of IRQ values as ranges.  'run' is true if
2218			 * we are in a range.
2219			 */
2220			device_printf(child, "using IRQs %d", irqs[0]);
2221			run = 0;
2222			for (i = 1; i < actual; i++) {
2223
2224				/* Still in a run? */
2225				if (irqs[i] == irqs[i - 1] + 1) {
2226					run = 1;
2227					continue;
2228				}
2229
2230				/* Finish previous range. */
2231				if (run) {
2232					printf("-%d", irqs[i - 1]);
2233					run = 0;
2234				}
2235
2236				/* Start new range. */
2237				printf(",%d", irqs[i]);
2238			}
2239
2240			/* Unfinished range? */
2241			if (run)
2242				printf("-%d", irqs[actual - 1]);
2243			printf(" for MSI\n");
2244		}
2245	}
2246
2247	/* Update control register with actual count. */
2248	ctrl = cfg->msi.msi_ctrl;
2249	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2250	ctrl |= (ffs(actual) - 1) << 4;
2251	cfg->msi.msi_ctrl = ctrl;
2252	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2253
2254	/* Update counts of alloc'd messages. */
2255	cfg->msi.msi_alloc = actual;
2256	cfg->msi.msi_handlers = 0;
2257	*count = actual;
2258	return (0);
2259}
2260
2261/* Release the MSI messages associated with this device. */
2262int
2263pci_release_msi_method(device_t dev, device_t child)
2264{
2265	struct pci_devinfo *dinfo = device_get_ivars(child);
2266	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2267	struct resource_list_entry *rle;
2268	int error, i, irqs[32];
2269
2270	/* Try MSI-X first. */
2271	error = pci_release_msix(dev, child);
2272	if (error != ENODEV)
2273		return (error);
2274
2275	/* Do we have any messages to release? */
2276	if (msi->msi_alloc == 0)
2277		return (ENODEV);
2278	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2279
2280	/* Make sure none of the resources are allocated. */
2281	if (msi->msi_handlers > 0)
2282		return (EBUSY);
2283	for (i = 0; i < msi->msi_alloc; i++) {
2284		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2285		KASSERT(rle != NULL, ("missing MSI resource"));
2286		if (rle->res != NULL)
2287			return (EBUSY);
2288		irqs[i] = rle->start;
2289	}
2290
2291	/* Update control register with 0 count. */
2292	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2293	    ("%s: MSI still enabled", __func__));
2294	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2295	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2296	    msi->msi_ctrl, 2);
2297
2298	/* Release the messages. */
2299	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2300	for (i = 0; i < msi->msi_alloc; i++)
2301		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2302
2303	/* Update alloc count. */
2304	msi->msi_alloc = 0;
2305	msi->msi_addr = 0;
2306	msi->msi_data = 0;
2307	return (0);
2308}
2309
2310/*
2311 * Return the max supported MSI messages this device supports.
2312 * Basically, assuming the MD code can alloc messages, this function
2313 * should return the maximum value that pci_alloc_msi() can return.
2314 * Thus, it is subject to the tunables, etc.
2315 */
2316int
2317pci_msi_count_method(device_t dev, device_t child)
2318{
2319	struct pci_devinfo *dinfo = device_get_ivars(child);
2320	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2321
2322	if (pci_do_msi && msi->msi_location != 0)
2323		return (msi->msi_msgnum);
2324	return (0);
2325}
2326
2327/* free pcicfgregs structure and all depending data structures */
2328
2329int
2330pci_freecfg(struct pci_devinfo *dinfo)
2331{
2332	struct devlist *devlist_head;
2333	struct pci_map *pm, *next;
2334	int i;
2335
2336	devlist_head = &pci_devq;
2337
2338	if (dinfo->cfg.vpd.vpd_reg) {
2339		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2340		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2341			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2342		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2343		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2344			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2345		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2346	}
2347	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2348		free(pm, M_DEVBUF);
2349	}
2350	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2351	free(dinfo, M_DEVBUF);
2352
2353	/* increment the generation count */
2354	pci_generation++;
2355
2356	/* we're losing one device */
2357	pci_numdevs--;
2358	return (0);
2359}
2360
2361/*
2362 * PCI power manangement
2363 */
2364int
2365pci_set_powerstate_method(device_t dev, device_t child, int state)
2366{
2367	struct pci_devinfo *dinfo = device_get_ivars(child);
2368	pcicfgregs *cfg = &dinfo->cfg;
2369	uint16_t status;
2370	int result, oldstate, highest, delay;
2371
2372	if (cfg->pp.pp_cap == 0)
2373		return (EOPNOTSUPP);
2374
2375	/*
2376	 * Optimize a no state change request away.  While it would be OK to
2377	 * write to the hardware in theory, some devices have shown odd
2378	 * behavior when going from D3 -> D3.
2379	 */
2380	oldstate = pci_get_powerstate(child);
2381	if (oldstate == state)
2382		return (0);
2383
2384	/*
2385	 * The PCI power management specification states that after a state
2386	 * transition between PCI power states, system software must
2387	 * guarantee a minimal delay before the function accesses the device.
2388	 * Compute the worst case delay that we need to guarantee before we
2389	 * access the device.  Many devices will be responsive much more
2390	 * quickly than this delay, but there are some that don't respond
2391	 * instantly to state changes.  Transitions to/from D3 state require
2392	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2393	 * is done below with DELAY rather than a sleeper function because
2394	 * this function can be called from contexts where we cannot sleep.
2395	 */
2396	highest = (oldstate > state) ? oldstate : state;
2397	if (highest == PCI_POWERSTATE_D3)
2398	    delay = 10000;
2399	else if (highest == PCI_POWERSTATE_D2)
2400	    delay = 200;
2401	else
2402	    delay = 0;
2403	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2404	    & ~PCIM_PSTAT_DMASK;
2405	result = 0;
2406	switch (state) {
2407	case PCI_POWERSTATE_D0:
2408		status |= PCIM_PSTAT_D0;
2409		break;
2410	case PCI_POWERSTATE_D1:
2411		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2412			return (EOPNOTSUPP);
2413		status |= PCIM_PSTAT_D1;
2414		break;
2415	case PCI_POWERSTATE_D2:
2416		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2417			return (EOPNOTSUPP);
2418		status |= PCIM_PSTAT_D2;
2419		break;
2420	case PCI_POWERSTATE_D3:
2421		status |= PCIM_PSTAT_D3;
2422		break;
2423	default:
2424		return (EINVAL);
2425	}
2426
2427	if (bootverbose)
2428		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2429		    state);
2430
2431	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2432	if (delay)
2433		DELAY(delay);
2434	return (0);
2435}
2436
2437int
2438pci_get_powerstate_method(device_t dev, device_t child)
2439{
2440	struct pci_devinfo *dinfo = device_get_ivars(child);
2441	pcicfgregs *cfg = &dinfo->cfg;
2442	uint16_t status;
2443	int result;
2444
2445	if (cfg->pp.pp_cap != 0) {
2446		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2447		switch (status & PCIM_PSTAT_DMASK) {
2448		case PCIM_PSTAT_D0:
2449			result = PCI_POWERSTATE_D0;
2450			break;
2451		case PCIM_PSTAT_D1:
2452			result = PCI_POWERSTATE_D1;
2453			break;
2454		case PCIM_PSTAT_D2:
2455			result = PCI_POWERSTATE_D2;
2456			break;
2457		case PCIM_PSTAT_D3:
2458			result = PCI_POWERSTATE_D3;
2459			break;
2460		default:
2461			result = PCI_POWERSTATE_UNKNOWN;
2462			break;
2463		}
2464	} else {
2465		/* No support, device is always at D0 */
2466		result = PCI_POWERSTATE_D0;
2467	}
2468	return (result);
2469}
2470
2471/*
2472 * Some convenience functions for PCI device drivers.
2473 */
2474
2475static __inline void
2476pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2477{
2478	uint16_t	command;
2479
2480	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2481	command |= bit;
2482	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2483}
2484
2485static __inline void
2486pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2487{
2488	uint16_t	command;
2489
2490	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2491	command &= ~bit;
2492	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2493}
2494
2495int
2496pci_enable_busmaster_method(device_t dev, device_t child)
2497{
2498	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2499	return (0);
2500}
2501
2502int
2503pci_disable_busmaster_method(device_t dev, device_t child)
2504{
2505	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2506	return (0);
2507}
2508
2509int
2510pci_enable_io_method(device_t dev, device_t child, int space)
2511{
2512	uint16_t bit;
2513
2514	switch(space) {
2515	case SYS_RES_IOPORT:
2516		bit = PCIM_CMD_PORTEN;
2517		break;
2518	case SYS_RES_MEMORY:
2519		bit = PCIM_CMD_MEMEN;
2520		break;
2521	default:
2522		return (EINVAL);
2523	}
2524	pci_set_command_bit(dev, child, bit);
2525	return (0);
2526}
2527
2528int
2529pci_disable_io_method(device_t dev, device_t child, int space)
2530{
2531	uint16_t bit;
2532
2533	switch(space) {
2534	case SYS_RES_IOPORT:
2535		bit = PCIM_CMD_PORTEN;
2536		break;
2537	case SYS_RES_MEMORY:
2538		bit = PCIM_CMD_MEMEN;
2539		break;
2540	default:
2541		return (EINVAL);
2542	}
2543	pci_clear_command_bit(dev, child, bit);
2544	return (0);
2545}
2546
2547/*
2548 * New style pci driver.  Parent device is either a pci-host-bridge or a
2549 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2550 */
2551
2552void
2553pci_print_verbose(struct pci_devinfo *dinfo)
2554{
2555
2556	if (bootverbose) {
2557		pcicfgregs *cfg = &dinfo->cfg;
2558
2559		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2560		    cfg->vendor, cfg->device, cfg->revid);
2561		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2562		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2563		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2564		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2565		    cfg->mfdev);
2566		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2567		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2568		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2569		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2570		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2571		if (cfg->intpin > 0)
2572			printf("\tintpin=%c, irq=%d\n",
2573			    cfg->intpin +'a' -1, cfg->intline);
2574		if (cfg->pp.pp_cap) {
2575			uint16_t status;
2576
2577			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2578			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2579			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2580			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2581			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2582			    status & PCIM_PSTAT_DMASK);
2583		}
2584		if (cfg->msi.msi_location) {
2585			int ctrl;
2586
2587			ctrl = cfg->msi.msi_ctrl;
2588			printf("\tMSI supports %d message%s%s%s\n",
2589			    cfg->msi.msi_msgnum,
2590			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2591			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2592			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2593		}
2594		if (cfg->msix.msix_location) {
2595			printf("\tMSI-X supports %d message%s ",
2596			    cfg->msix.msix_msgnum,
2597			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2598			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2599				printf("in map 0x%x\n",
2600				    cfg->msix.msix_table_bar);
2601			else
2602				printf("in maps 0x%x and 0x%x\n",
2603				    cfg->msix.msix_table_bar,
2604				    cfg->msix.msix_pba_bar);
2605		}
2606	}
2607}
2608
2609static int
2610pci_porten(device_t dev)
2611{
2612	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2613}
2614
2615static int
2616pci_memen(device_t dev)
2617{
2618	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2619}
2620
2621static void
2622pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2623{
2624	struct pci_devinfo *dinfo;
2625	pci_addr_t map, testval;
2626	int ln2range;
2627	uint16_t cmd;
2628
2629	/*
2630	 * The device ROM BAR is special.  It is always a 32-bit
2631	 * memory BAR.  Bit 0 is special and should not be set when
2632	 * sizing the BAR.
2633	 */
2634	dinfo = device_get_ivars(dev);
2635	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2636		map = pci_read_config(dev, reg, 4);
2637		pci_write_config(dev, reg, 0xfffffffe, 4);
2638		testval = pci_read_config(dev, reg, 4);
2639		pci_write_config(dev, reg, map, 4);
2640		*mapp = map;
2641		*testvalp = testval;
2642		return;
2643	}
2644
2645	map = pci_read_config(dev, reg, 4);
2646	ln2range = pci_maprange(map);
2647	if (ln2range == 64)
2648		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2649
2650	/*
2651	 * Disable decoding via the command register before
2652	 * determining the BAR's length since we will be placing it in
2653	 * a weird state.
2654	 */
2655	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2656	pci_write_config(dev, PCIR_COMMAND,
2657	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2658
2659	/*
2660	 * Determine the BAR's length by writing all 1's.  The bottom
2661	 * log_2(size) bits of the BAR will stick as 0 when we read
2662	 * the value back.
2663	 */
2664	pci_write_config(dev, reg, 0xffffffff, 4);
2665	testval = pci_read_config(dev, reg, 4);
2666	if (ln2range == 64) {
2667		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2668		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2669	}
2670
2671	/*
2672	 * Restore the original value of the BAR.  We may have reprogrammed
2673	 * the BAR of the low-level console device and when booting verbose,
2674	 * we need the console device addressable.
2675	 */
2676	pci_write_config(dev, reg, map, 4);
2677	if (ln2range == 64)
2678		pci_write_config(dev, reg + 4, map >> 32, 4);
2679	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2680
2681	*mapp = map;
2682	*testvalp = testval;
2683}
2684
2685static void
2686pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2687{
2688	struct pci_devinfo *dinfo;
2689	int ln2range;
2690
2691	/* The device ROM BAR is always a 32-bit memory BAR. */
2692	dinfo = device_get_ivars(dev);
2693	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2694		ln2range = 32;
2695	else
2696		ln2range = pci_maprange(pm->pm_value);
2697	pci_write_config(dev, pm->pm_reg, base, 4);
2698	if (ln2range == 64)
2699		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2700	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2701	if (ln2range == 64)
2702		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2703		    pm->pm_reg + 4, 4) << 32;
2704}
2705
2706struct pci_map *
2707pci_find_bar(device_t dev, int reg)
2708{
2709	struct pci_devinfo *dinfo;
2710	struct pci_map *pm;
2711
2712	dinfo = device_get_ivars(dev);
2713	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2714		if (pm->pm_reg == reg)
2715			return (pm);
2716	}
2717	return (NULL);
2718}
2719
2720int
2721pci_bar_enabled(device_t dev, struct pci_map *pm)
2722{
2723	struct pci_devinfo *dinfo;
2724	uint16_t cmd;
2725
2726	dinfo = device_get_ivars(dev);
2727	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2728	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2729		return (0);
2730	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2731	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2732		return ((cmd & PCIM_CMD_MEMEN) != 0);
2733	else
2734		return ((cmd & PCIM_CMD_PORTEN) != 0);
2735}
2736
2737static struct pci_map *
2738pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2739{
2740	struct pci_devinfo *dinfo;
2741	struct pci_map *pm, *prev;
2742
2743	dinfo = device_get_ivars(dev);
2744	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2745	pm->pm_reg = reg;
2746	pm->pm_value = value;
2747	pm->pm_size = size;
2748	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2749		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2750		    reg));
2751		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2752		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2753			break;
2754	}
2755	if (prev != NULL)
2756		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2757	else
2758		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2759	return (pm);
2760}
2761
2762static void
2763pci_restore_bars(device_t dev)
2764{
2765	struct pci_devinfo *dinfo;
2766	struct pci_map *pm;
2767	int ln2range;
2768
2769	dinfo = device_get_ivars(dev);
2770	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2771		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2772			ln2range = 32;
2773		else
2774			ln2range = pci_maprange(pm->pm_value);
2775		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2776		if (ln2range == 64)
2777			pci_write_config(dev, pm->pm_reg + 4,
2778			    pm->pm_value >> 32, 4);
2779	}
2780}
2781
2782/*
2783 * Add a resource based on a pci map register. Return 1 if the map
2784 * register is a 32bit map register or 2 if it is a 64bit register.
2785 */
2786static int
2787pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2788    int force, int prefetch)
2789{
2790	struct pci_map *pm;
2791	pci_addr_t base, map, testval;
2792	pci_addr_t start, end, count;
2793	int barlen, basezero, flags, maprange, mapsize, type;
2794	uint16_t cmd;
2795	struct resource *res;
2796
2797	/*
2798	 * The BAR may already exist if the device is a CardBus card
2799	 * whose CIS is stored in this BAR.
2800	 */
2801	pm = pci_find_bar(dev, reg);
2802	if (pm != NULL) {
2803		maprange = pci_maprange(pm->pm_value);
2804		barlen = maprange == 64 ? 2 : 1;
2805		return (barlen);
2806	}
2807
2808	pci_read_bar(dev, reg, &map, &testval);
2809	if (PCI_BAR_MEM(map)) {
2810		type = SYS_RES_MEMORY;
2811		if (map & PCIM_BAR_MEM_PREFETCH)
2812			prefetch = 1;
2813	} else
2814		type = SYS_RES_IOPORT;
2815	mapsize = pci_mapsize(testval);
2816	base = pci_mapbase(map);
2817#ifdef __PCI_BAR_ZERO_VALID
2818	basezero = 0;
2819#else
2820	basezero = base == 0;
2821#endif
2822	maprange = pci_maprange(map);
2823	barlen = maprange == 64 ? 2 : 1;
2824
2825	/*
2826	 * For I/O registers, if bottom bit is set, and the next bit up
2827	 * isn't clear, we know we have a BAR that doesn't conform to the
2828	 * spec, so ignore it.  Also, sanity check the size of the data
2829	 * areas to the type of memory involved.  Memory must be at least
2830	 * 16 bytes in size, while I/O ranges must be at least 4.
2831	 */
2832	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2833		return (barlen);
2834	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2835	    (type == SYS_RES_IOPORT && mapsize < 2))
2836		return (barlen);
2837
2838	/* Save a record of this BAR. */
2839	pm = pci_add_bar(dev, reg, map, mapsize);
2840	if (bootverbose) {
2841		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2842		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2843		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2844			printf(", port disabled\n");
2845		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2846			printf(", memory disabled\n");
2847		else
2848			printf(", enabled\n");
2849	}
2850
2851	/*
2852	 * If base is 0, then we have problems if this architecture does
2853	 * not allow that.  It is best to ignore such entries for the
2854	 * moment.  These will be allocated later if the driver specifically
2855	 * requests them.  However, some removable busses look better when
2856	 * all resources are allocated, so allow '0' to be overriden.
2857	 *
2858	 * Similarly treat maps whose values is the same as the test value
2859	 * read back.  These maps have had all f's written to them by the
2860	 * BIOS in an attempt to disable the resources.
2861	 */
2862	if (!force && (basezero || map == testval))
2863		return (barlen);
2864	if ((u_long)base != base) {
2865		device_printf(bus,
2866		    "pci%d:%d:%d:%d bar %#x too many address bits",
2867		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2868		    pci_get_function(dev), reg);
2869		return (barlen);
2870	}
2871
2872	/*
2873	 * This code theoretically does the right thing, but has
2874	 * undesirable side effects in some cases where peripherals
2875	 * respond oddly to having these bits enabled.  Let the user
2876	 * be able to turn them off (since pci_enable_io_modes is 1 by
2877	 * default).
2878	 */
2879	if (pci_enable_io_modes) {
2880		/* Turn on resources that have been left off by a lazy BIOS */
2881		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2882			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2883			cmd |= PCIM_CMD_PORTEN;
2884			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2885		}
2886		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2887			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2888			cmd |= PCIM_CMD_MEMEN;
2889			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2890		}
2891	} else {
2892		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2893			return (barlen);
2894		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2895			return (barlen);
2896	}
2897
2898	count = (pci_addr_t)1 << mapsize;
2899	flags = RF_ALIGNMENT_LOG2(mapsize);
2900	if (prefetch)
2901		flags |= RF_PREFETCHABLE;
2902	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2903		start = 0;	/* Let the parent decide. */
2904		end = ~0ul;
2905	} else {
2906		start = base;
2907		end = base + count - 1;
2908	}
2909	resource_list_add(rl, type, reg, start, end, count);
2910
2911	/*
2912	 * Try to allocate the resource for this BAR from our parent
2913	 * so that this resource range is already reserved.  The
2914	 * driver for this device will later inherit this resource in
2915	 * pci_alloc_resource().
2916	 */
2917	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2918	    flags);
2919	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2920		/*
2921		 * If the allocation fails, try to allocate a resource for
2922		 * this BAR using any available range.  The firmware felt
2923		 * it was important enough to assign a resource, so don't
2924		 * disable decoding if we can help it.
2925		 */
2926		resource_list_delete(rl, type, reg);
2927		resource_list_add(rl, type, reg, 0, ~0ul, count);
2928		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2929		    count, flags);
2930	}
2931	if (res == NULL) {
2932		/*
2933		 * If the allocation fails, delete the resource list entry
2934		 * and disable decoding for this device.
2935		 *
2936		 * If the driver requests this resource in the future,
2937		 * pci_reserve_map() will try to allocate a fresh
2938		 * resource range.
2939		 */
2940		resource_list_delete(rl, type, reg);
2941		pci_disable_io(dev, type);
2942		if (bootverbose)
2943			device_printf(bus,
2944			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2945			    pci_get_domain(dev), pci_get_bus(dev),
2946			    pci_get_slot(dev), pci_get_function(dev), reg);
2947	} else {
2948		start = rman_get_start(res);
2949		pci_write_bar(dev, pm, start);
2950	}
2951	return (barlen);
2952}
2953
2954/*
2955 * For ATA devices we need to decide early what addressing mode to use.
2956 * Legacy demands that the primary and secondary ATA ports sits on the
2957 * same addresses that old ISA hardware did. This dictates that we use
2958 * those addresses and ignore the BAR's if we cannot set PCI native
2959 * addressing mode.
2960 */
2961static void
2962pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2963    uint32_t prefetchmask)
2964{
2965	struct resource *r;
2966	int rid, type, progif;
2967#if 0
2968	/* if this device supports PCI native addressing use it */
2969	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2970	if ((progif & 0x8a) == 0x8a) {
2971		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2972		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2973			printf("Trying ATA native PCI addressing mode\n");
2974			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2975		}
2976	}
2977#endif
2978	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2979	type = SYS_RES_IOPORT;
2980	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2981		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2982		    prefetchmask & (1 << 0));
2983		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2984		    prefetchmask & (1 << 1));
2985	} else {
2986		rid = PCIR_BAR(0);
2987		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2988		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2989		    0x1f7, 8, 0);
2990		rid = PCIR_BAR(1);
2991		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2992		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2993		    0x3f6, 1, 0);
2994	}
2995	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2996		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2997		    prefetchmask & (1 << 2));
2998		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2999		    prefetchmask & (1 << 3));
3000	} else {
3001		rid = PCIR_BAR(2);
3002		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
3003		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
3004		    0x177, 8, 0);
3005		rid = PCIR_BAR(3);
3006		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
3007		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
3008		    0x376, 1, 0);
3009	}
3010	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
3011	    prefetchmask & (1 << 4));
3012	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
3013	    prefetchmask & (1 << 5));
3014}
3015
3016static void
3017pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3018{
3019	struct pci_devinfo *dinfo = device_get_ivars(dev);
3020	pcicfgregs *cfg = &dinfo->cfg;
3021	char tunable_name[64];
3022	int irq;
3023
3024	/* Has to have an intpin to have an interrupt. */
3025	if (cfg->intpin == 0)
3026		return;
3027
3028	/* Let the user override the IRQ with a tunable. */
3029	irq = PCI_INVALID_IRQ;
3030	snprintf(tunable_name, sizeof(tunable_name),
3031	    "hw.pci%d.%d.%d.INT%c.irq",
3032	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3033	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3034		irq = PCI_INVALID_IRQ;
3035
3036	/*
3037	 * If we didn't get an IRQ via the tunable, then we either use the
3038	 * IRQ value in the intline register or we ask the bus to route an
3039	 * interrupt for us.  If force_route is true, then we only use the
3040	 * value in the intline register if the bus was unable to assign an
3041	 * IRQ.
3042	 */
3043	if (!PCI_INTERRUPT_VALID(irq)) {
3044		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3045			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3046		if (!PCI_INTERRUPT_VALID(irq))
3047			irq = cfg->intline;
3048	}
3049
3050	/* If after all that we don't have an IRQ, just bail. */
3051	if (!PCI_INTERRUPT_VALID(irq))
3052		return;
3053
3054	/* Update the config register if it changed. */
3055	if (irq != cfg->intline) {
3056		cfg->intline = irq;
3057		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3058	}
3059
3060	/* Add this IRQ as rid 0 interrupt resource. */
3061	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3062}
3063
3064/* Perform early OHCI takeover from SMM. */
3065static void
3066ohci_early_takeover(device_t self)
3067{
3068	struct resource *res;
3069	uint32_t ctl;
3070	int rid;
3071	int i;
3072
3073	rid = PCIR_BAR(0);
3074	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3075	if (res == NULL)
3076		return;
3077
3078	ctl = bus_read_4(res, OHCI_CONTROL);
3079	if (ctl & OHCI_IR) {
3080		if (bootverbose)
3081			printf("ohci early: "
3082			    "SMM active, request owner change\n");
3083		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3084		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3085			DELAY(1000);
3086			ctl = bus_read_4(res, OHCI_CONTROL);
3087		}
3088		if (ctl & OHCI_IR) {
3089			if (bootverbose)
3090				printf("ohci early: "
3091				    "SMM does not respond, resetting\n");
3092			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3093		}
3094		/* Disable interrupts */
3095		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3096	}
3097
3098	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3099}
3100
3101/* Perform early UHCI takeover from SMM. */
3102static void
3103uhci_early_takeover(device_t self)
3104{
3105	struct resource *res;
3106	int rid;
3107
3108	/*
3109	 * Set the PIRQD enable bit and switch off all the others. We don't
3110	 * want legacy support to interfere with us XXX Does this also mean
3111	 * that the BIOS won't touch the keyboard anymore if it is connected
3112	 * to the ports of the root hub?
3113	 */
3114	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3115
3116	/* Disable interrupts */
3117	rid = PCI_UHCI_BASE_REG;
3118	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3119	if (res != NULL) {
3120		bus_write_2(res, UHCI_INTR, 0);
3121		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3122	}
3123}
3124
3125/* Perform early EHCI takeover from SMM. */
3126static void
3127ehci_early_takeover(device_t self)
3128{
3129	struct resource *res;
3130	uint32_t cparams;
3131	uint32_t eec;
3132	uint8_t eecp;
3133	uint8_t bios_sem;
3134	uint8_t offs;
3135	int rid;
3136	int i;
3137
3138	rid = PCIR_BAR(0);
3139	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3140	if (res == NULL)
3141		return;
3142
3143	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3144
3145	/* Synchronise with the BIOS if it owns the controller. */
3146	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3147	    eecp = EHCI_EECP_NEXT(eec)) {
3148		eec = pci_read_config(self, eecp, 4);
3149		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3150			continue;
3151		}
3152		bios_sem = pci_read_config(self, eecp +
3153		    EHCI_LEGSUP_BIOS_SEM, 1);
3154		if (bios_sem == 0) {
3155			continue;
3156		}
3157		if (bootverbose)
3158			printf("ehci early: "
3159			    "SMM active, request owner change\n");
3160
3161		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3162
3163		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3164			DELAY(1000);
3165			bios_sem = pci_read_config(self, eecp +
3166			    EHCI_LEGSUP_BIOS_SEM, 1);
3167		}
3168
3169		if (bios_sem != 0) {
3170			if (bootverbose)
3171				printf("ehci early: "
3172				    "SMM does not respond\n");
3173		}
3174		/* Disable interrupts */
3175		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3176		bus_write_4(res, offs + EHCI_USBINTR, 0);
3177	}
3178	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3179}
3180
3181/* Perform early XHCI takeover from SMM. */
3182static void
3183xhci_early_takeover(device_t self)
3184{
3185	struct resource *res;
3186	uint32_t cparams;
3187	uint32_t eec;
3188	uint8_t eecp;
3189	uint8_t bios_sem;
3190	uint8_t offs;
3191	int rid;
3192	int i;
3193
3194	rid = PCIR_BAR(0);
3195	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3196	if (res == NULL)
3197		return;
3198
3199	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3200
3201	eec = -1;
3202
3203	/* Synchronise with the BIOS if it owns the controller. */
3204	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3205	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3206		eec = bus_read_4(res, eecp);
3207
3208		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3209			continue;
3210
3211		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3212		if (bios_sem == 0)
3213			continue;
3214
3215		if (bootverbose)
3216			printf("xhci early: "
3217			    "SMM active, request owner change\n");
3218
3219		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3220
3221		/* wait a maximum of 5 second */
3222
3223		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3224			DELAY(1000);
3225			bios_sem = bus_read_1(res, eecp +
3226			    XHCI_XECP_BIOS_SEM);
3227		}
3228
3229		if (bios_sem != 0) {
3230			if (bootverbose)
3231				printf("xhci early: "
3232				    "SMM does not respond\n");
3233		}
3234
3235		/* Disable interrupts */
3236		offs = bus_read_1(res, XHCI_CAPLENGTH);
3237		bus_write_4(res, offs + XHCI_USBCMD, 0);
3238		bus_read_4(res, offs + XHCI_USBSTS);
3239	}
3240	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3241}
3242
3243#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3244static void
3245pci_reserve_secbus(device_t bus, device_t dev, pcicfgregs *cfg,
3246    struct resource_list *rl)
3247{
3248	struct resource *res;
3249	char *cp;
3250	u_long start, end, count;
3251	int rid, sec_bus, sec_reg, sub_bus, sub_reg, sup_bus;
3252
3253	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3254	case PCIM_HDRTYPE_BRIDGE:
3255		sec_reg = PCIR_SECBUS_1;
3256		sub_reg = PCIR_SUBBUS_1;
3257		break;
3258	case PCIM_HDRTYPE_CARDBUS:
3259		sec_reg = PCIR_SECBUS_2;
3260		sub_reg = PCIR_SUBBUS_2;
3261		break;
3262	default:
3263		return;
3264	}
3265
3266	/*
3267	 * If the existing bus range is valid, attempt to reserve it
3268	 * from our parent.  If this fails for any reason, clear the
3269	 * secbus and subbus registers.
3270	 *
3271	 * XXX: Should we reset sub_bus to sec_bus if it is < sec_bus?
3272	 * This would at least preserve the existing sec_bus if it is
3273	 * valid.
3274	 */
3275	sec_bus = PCI_READ_CONFIG(bus, dev, sec_reg, 1);
3276	sub_bus = PCI_READ_CONFIG(bus, dev, sub_reg, 1);
3277
3278	/* Quirk handling. */
3279	switch (pci_get_devid(dev)) {
3280	case 0x12258086:		/* Intel 82454KX/GX (Orion) */
3281		sup_bus = pci_read_config(dev, 0x41, 1);
3282		if (sup_bus != 0xff) {
3283			sec_bus = sup_bus + 1;
3284			sub_bus = sup_bus + 1;
3285			PCI_WRITE_CONFIG(bus, dev, sec_reg, sec_bus, 1);
3286			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3287		}
3288		break;
3289
3290	case 0x00dd10de:
3291		/* Compaq R3000 BIOS sets wrong subordinate bus number. */
3292		if ((cp = getenv("smbios.planar.maker")) == NULL)
3293			break;
3294		if (strncmp(cp, "Compal", 6) != 0) {
3295			freeenv(cp);
3296			break;
3297		}
3298		freeenv(cp);
3299		if ((cp = getenv("smbios.planar.product")) == NULL)
3300			break;
3301		if (strncmp(cp, "08A0", 4) != 0) {
3302			freeenv(cp);
3303			break;
3304		}
3305		freeenv(cp);
3306		if (sub_bus < 0xa) {
3307			sub_bus = 0xa;
3308			PCI_WRITE_CONFIG(bus, dev, sub_reg, sub_bus, 1);
3309		}
3310		break;
3311	}
3312
3313	if (bootverbose)
3314		printf("\tsecbus=%d, subbus=%d\n", sec_bus, sub_bus);
3315	if (sec_bus > 0 && sub_bus >= sec_bus) {
3316		start = sec_bus;
3317		end = sub_bus;
3318		count = end - start + 1;
3319
3320		resource_list_add(rl, PCI_RES_BUS, 0, 0ul, ~0ul, count);
3321
3322		/*
3323		 * If requested, clear secondary bus registers in
3324		 * bridge devices to force a complete renumbering
3325		 * rather than reserving the existing range.  However,
3326		 * preserve the existing size.
3327		 */
3328		if (pci_clear_buses)
3329			goto clear;
3330
3331		rid = 0;
3332		res = resource_list_reserve(rl, bus, dev, PCI_RES_BUS, &rid,
3333		    start, end, count, 0);
3334		if (res != NULL)
3335			return;
3336
3337		if (bootverbose)
3338			device_printf(bus,
3339			    "pci%d:%d:%d:%d secbus failed to allocate\n",
3340			    pci_get_domain(dev), pci_get_bus(dev),
3341			    pci_get_slot(dev), pci_get_function(dev));
3342	}
3343
3344clear:
3345	PCI_WRITE_CONFIG(bus, dev, sec_reg, 0, 1);
3346	PCI_WRITE_CONFIG(bus, dev, sub_reg, 0, 1);
3347}
3348
3349static struct resource *
3350pci_alloc_secbus(device_t dev, device_t child, int *rid, u_long start,
3351    u_long end, u_long count, u_int flags)
3352{
3353	struct pci_devinfo *dinfo;
3354	pcicfgregs *cfg;
3355	struct resource_list *rl;
3356	struct resource *res;
3357	int sec_reg, sub_reg;
3358
3359	dinfo = device_get_ivars(child);
3360	cfg = &dinfo->cfg;
3361	rl = &dinfo->resources;
3362	switch (cfg->hdrtype & PCIM_HDRTYPE) {
3363	case PCIM_HDRTYPE_BRIDGE:
3364		sec_reg = PCIR_SECBUS_1;
3365		sub_reg = PCIR_SUBBUS_1;
3366		break;
3367	case PCIM_HDRTYPE_CARDBUS:
3368		sec_reg = PCIR_SECBUS_2;
3369		sub_reg = PCIR_SUBBUS_2;
3370		break;
3371	default:
3372		return (NULL);
3373	}
3374
3375	if (*rid != 0)
3376		return (NULL);
3377
3378	if (resource_list_find(rl, PCI_RES_BUS, *rid) == NULL)
3379		resource_list_add(rl, PCI_RES_BUS, *rid, start, end, count);
3380	if (!resource_list_reserved(rl, PCI_RES_BUS, *rid)) {
3381		res = resource_list_reserve(rl, dev, child, PCI_RES_BUS, rid,
3382		    start, end, count, flags & ~RF_ACTIVE);
3383		if (res == NULL) {
3384			resource_list_delete(rl, PCI_RES_BUS, *rid);
3385			device_printf(child, "allocating %lu bus%s failed\n",
3386			    count, count == 1 ? "" : "es");
3387			return (NULL);
3388		}
3389		if (bootverbose)
3390			device_printf(child,
3391			    "Lazy allocation of %lu bus%s at %lu\n", count,
3392			    count == 1 ? "" : "es", rman_get_start(res));
3393		PCI_WRITE_CONFIG(dev, child, sec_reg, rman_get_start(res), 1);
3394		PCI_WRITE_CONFIG(dev, child, sub_reg, rman_get_end(res), 1);
3395	}
3396	return (resource_list_alloc(rl, dev, child, PCI_RES_BUS, rid, start,
3397	    end, count, flags));
3398}
3399#endif
3400
3401void
3402pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3403{
3404	struct pci_devinfo *dinfo;
3405	pcicfgregs *cfg;
3406	struct resource_list *rl;
3407	const struct pci_quirk *q;
3408	uint32_t devid;
3409	int i;
3410
3411	dinfo = device_get_ivars(dev);
3412	cfg = &dinfo->cfg;
3413	rl = &dinfo->resources;
3414	devid = (cfg->device << 16) | cfg->vendor;
3415
3416	/* ATA devices needs special map treatment */
3417	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3418	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3419	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3420	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3421	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3422		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3423	else
3424		for (i = 0; i < cfg->nummaps;) {
3425			/*
3426			 * Skip quirked resources.
3427			 */
3428			for (q = &pci_quirks[0]; q->devid != 0; q++)
3429				if (q->devid == devid &&
3430				    q->type == PCI_QUIRK_UNMAP_REG &&
3431				    q->arg1 == PCIR_BAR(i))
3432					break;
3433			if (q->devid != 0) {
3434				i++;
3435				continue;
3436			}
3437			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3438			    prefetchmask & (1 << i));
3439		}
3440
3441	/*
3442	 * Add additional, quirked resources.
3443	 */
3444	for (q = &pci_quirks[0]; q->devid != 0; q++)
3445		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3446			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3447
3448	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3449#ifdef __PCI_REROUTE_INTERRUPT
3450		/*
3451		 * Try to re-route interrupts. Sometimes the BIOS or
3452		 * firmware may leave bogus values in these registers.
3453		 * If the re-route fails, then just stick with what we
3454		 * have.
3455		 */
3456		pci_assign_interrupt(bus, dev, 1);
3457#else
3458		pci_assign_interrupt(bus, dev, 0);
3459#endif
3460	}
3461
3462	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3463	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3464		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3465			xhci_early_takeover(dev);
3466		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3467			ehci_early_takeover(dev);
3468		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3469			ohci_early_takeover(dev);
3470		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3471			uhci_early_takeover(dev);
3472	}
3473
3474#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
3475	/*
3476	 * Reserve resources for secondary bus ranges behind bridge
3477	 * devices.
3478	 */
3479	pci_reserve_secbus(bus, dev, cfg, rl);
3480#endif
3481}
3482
3483static struct pci_devinfo *
3484pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3485    int slot, int func, size_t dinfo_size)
3486{
3487	struct pci_devinfo *dinfo;
3488
3489	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3490	if (dinfo != NULL)
3491		pci_add_child(dev, dinfo);
3492
3493	return (dinfo);
3494}
3495
3496void
3497pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3498{
3499#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3500	device_t pcib = device_get_parent(dev);
3501	struct pci_devinfo *dinfo;
3502	int maxslots;
3503	int s, f, pcifunchigh;
3504	uint8_t hdrtype;
3505	int first_func;
3506
3507	/*
3508	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3509	 * enable ARI.  We must enable ARI before detecting the rest of the
3510	 * functions on this bus as ARI changes the set of slots and functions
3511	 * that are legal on this bus.
3512	 */
3513	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3514	    dinfo_size);
3515	if (dinfo != NULL && pci_enable_ari)
3516		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3517
3518	/*
3519	 * Start looking for new devices on slot 0 at function 1 because we
3520	 * just identified the device at slot 0, function 0.
3521	 */
3522	first_func = 1;
3523
3524	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3525	    ("dinfo_size too small"));
3526	maxslots = PCIB_MAXSLOTS(pcib);
3527	for (s = 0; s <= maxslots; s++, first_func = 0) {
3528		pcifunchigh = 0;
3529		f = 0;
3530		DELAY(1);
3531		hdrtype = REG(PCIR_HDRTYPE, 1);
3532		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3533			continue;
3534		if (hdrtype & PCIM_MFDEV)
3535			pcifunchigh = PCIB_MAXFUNCS(pcib);
3536		for (f = first_func; f <= pcifunchigh; f++)
3537			pci_identify_function(pcib, dev, domain, busno, s, f,
3538			    dinfo_size);
3539	}
3540#undef REG
3541}
3542
3543void
3544pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3545{
3546	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3547	device_set_ivars(dinfo->cfg.dev, dinfo);
3548	resource_list_init(&dinfo->resources);
3549	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3550	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3551	pci_print_verbose(dinfo);
3552	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3553}
3554
3555static int
3556pci_probe(device_t dev)
3557{
3558
3559	device_set_desc(dev, "PCI bus");
3560
3561	/* Allow other subclasses to override this driver. */
3562	return (BUS_PROBE_GENERIC);
3563}
3564
3565int
3566pci_attach_common(device_t dev)
3567{
3568	struct pci_softc *sc;
3569	int busno, domain;
3570#ifdef PCI_DMA_BOUNDARY
3571	int error, tag_valid;
3572#endif
3573#ifdef PCI_RES_BUS
3574	int rid;
3575#endif
3576
3577	sc = device_get_softc(dev);
3578	domain = pcib_get_domain(dev);
3579	busno = pcib_get_bus(dev);
3580#ifdef PCI_RES_BUS
3581	rid = 0;
3582	sc->sc_bus = bus_alloc_resource(dev, PCI_RES_BUS, &rid, busno, busno,
3583	    1, 0);
3584	if (sc->sc_bus == NULL) {
3585		device_printf(dev, "failed to allocate bus number\n");
3586		return (ENXIO);
3587	}
3588#endif
3589	if (bootverbose)
3590		device_printf(dev, "domain=%d, physical bus=%d\n",
3591		    domain, busno);
3592#ifdef PCI_DMA_BOUNDARY
3593	tag_valid = 0;
3594	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3595	    devclass_find("pci")) {
3596		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3597		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3598		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3599		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3600		if (error)
3601			device_printf(dev, "Failed to create DMA tag: %d\n",
3602			    error);
3603		else
3604			tag_valid = 1;
3605	}
3606	if (!tag_valid)
3607#endif
3608		sc->sc_dma_tag = bus_get_dma_tag(dev);
3609	return (0);
3610}
3611
3612static int
3613pci_attach(device_t dev)
3614{
3615	int busno, domain, error;
3616
3617	error = pci_attach_common(dev);
3618	if (error)
3619		return (error);
3620
3621	/*
3622	 * Since there can be multiple independantly numbered PCI
3623	 * busses on systems with multiple PCI domains, we can't use
3624	 * the unit number to decide which bus we are probing. We ask
3625	 * the parent pcib what our domain and bus numbers are.
3626	 */
3627	domain = pcib_get_domain(dev);
3628	busno = pcib_get_bus(dev);
3629	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3630	return (bus_generic_attach(dev));
3631}
3632
3633#ifdef PCI_RES_BUS
3634static int
3635pci_detach(device_t dev)
3636{
3637	struct pci_softc *sc;
3638	int error;
3639
3640	error = bus_generic_detach(dev);
3641	if (error)
3642		return (error);
3643	sc = device_get_softc(dev);
3644	return (bus_release_resource(dev, PCI_RES_BUS, 0, sc->sc_bus));
3645}
3646#endif
3647
3648static void
3649pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3650    int state)
3651{
3652	device_t child, pcib;
3653	struct pci_devinfo *dinfo;
3654	int dstate, i;
3655
3656	/*
3657	 * Set the device to the given state.  If the firmware suggests
3658	 * a different power state, use it instead.  If power management
3659	 * is not present, the firmware is responsible for managing
3660	 * device power.  Skip children who aren't attached since they
3661	 * are handled separately.
3662	 */
3663	pcib = device_get_parent(dev);
3664	for (i = 0; i < numdevs; i++) {
3665		child = devlist[i];
3666		dinfo = device_get_ivars(child);
3667		dstate = state;
3668		if (device_is_attached(child) &&
3669		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3670			pci_set_powerstate(child, dstate);
3671	}
3672}
3673
3674int
3675pci_suspend(device_t dev)
3676{
3677	device_t child, *devlist;
3678	struct pci_devinfo *dinfo;
3679	int error, i, numdevs;
3680
3681	/*
3682	 * Save the PCI configuration space for each child and set the
3683	 * device in the appropriate power state for this sleep state.
3684	 */
3685	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3686		return (error);
3687	for (i = 0; i < numdevs; i++) {
3688		child = devlist[i];
3689		dinfo = device_get_ivars(child);
3690		pci_cfg_save(child, dinfo, 0);
3691	}
3692
3693	/* Suspend devices before potentially powering them down. */
3694	error = bus_generic_suspend(dev);
3695	if (error) {
3696		free(devlist, M_TEMP);
3697		return (error);
3698	}
3699	if (pci_do_power_suspend)
3700		pci_set_power_children(dev, devlist, numdevs,
3701		    PCI_POWERSTATE_D3);
3702	free(devlist, M_TEMP);
3703	return (0);
3704}
3705
3706int
3707pci_resume(device_t dev)
3708{
3709	device_t child, *devlist;
3710	struct pci_devinfo *dinfo;
3711	int error, i, numdevs;
3712
3713	/*
3714	 * Set each child to D0 and restore its PCI configuration space.
3715	 */
3716	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3717		return (error);
3718	if (pci_do_power_resume)
3719		pci_set_power_children(dev, devlist, numdevs,
3720		    PCI_POWERSTATE_D0);
3721
3722	/* Now the device is powered up, restore its config space. */
3723	for (i = 0; i < numdevs; i++) {
3724		child = devlist[i];
3725		dinfo = device_get_ivars(child);
3726
3727		pci_cfg_restore(child, dinfo);
3728		if (!device_is_attached(child))
3729			pci_cfg_save(child, dinfo, 1);
3730	}
3731
3732	/*
3733	 * Resume critical devices first, then everything else later.
3734	 */
3735	for (i = 0; i < numdevs; i++) {
3736		child = devlist[i];
3737		switch (pci_get_class(child)) {
3738		case PCIC_DISPLAY:
3739		case PCIC_MEMORY:
3740		case PCIC_BRIDGE:
3741		case PCIC_BASEPERIPH:
3742			DEVICE_RESUME(child);
3743			break;
3744		}
3745	}
3746	for (i = 0; i < numdevs; i++) {
3747		child = devlist[i];
3748		switch (pci_get_class(child)) {
3749		case PCIC_DISPLAY:
3750		case PCIC_MEMORY:
3751		case PCIC_BRIDGE:
3752		case PCIC_BASEPERIPH:
3753			break;
3754		default:
3755			DEVICE_RESUME(child);
3756		}
3757	}
3758	free(devlist, M_TEMP);
3759	return (0);
3760}
3761
3762static void
3763pci_load_vendor_data(void)
3764{
3765	caddr_t data;
3766	void *ptr;
3767	size_t sz;
3768
3769	data = preload_search_by_type("pci_vendor_data");
3770	if (data != NULL) {
3771		ptr = preload_fetch_addr(data);
3772		sz = preload_fetch_size(data);
3773		if (ptr != NULL && sz != 0) {
3774			pci_vendordata = ptr;
3775			pci_vendordata_size = sz;
3776			/* terminate the database */
3777			pci_vendordata[pci_vendordata_size] = '\n';
3778		}
3779	}
3780}
3781
3782void
3783pci_driver_added(device_t dev, driver_t *driver)
3784{
3785	int numdevs;
3786	device_t *devlist;
3787	device_t child;
3788	struct pci_devinfo *dinfo;
3789	int i;
3790
3791	if (bootverbose)
3792		device_printf(dev, "driver added\n");
3793	DEVICE_IDENTIFY(driver, dev);
3794	if (device_get_children(dev, &devlist, &numdevs) != 0)
3795		return;
3796	for (i = 0; i < numdevs; i++) {
3797		child = devlist[i];
3798		if (device_get_state(child) != DS_NOTPRESENT)
3799			continue;
3800		dinfo = device_get_ivars(child);
3801		pci_print_verbose(dinfo);
3802		if (bootverbose)
3803			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3804		pci_cfg_restore(child, dinfo);
3805		if (device_probe_and_attach(child) != 0)
3806			pci_child_detached(dev, child);
3807	}
3808	free(devlist, M_TEMP);
3809}
3810
3811int
3812pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3813    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3814{
3815	struct pci_devinfo *dinfo;
3816	struct msix_table_entry *mte;
3817	struct msix_vector *mv;
3818	uint64_t addr;
3819	uint32_t data;
3820	void *cookie;
3821	int error, rid;
3822
3823	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3824	    arg, &cookie);
3825	if (error)
3826		return (error);
3827
3828	/* If this is not a direct child, just bail out. */
3829	if (device_get_parent(child) != dev) {
3830		*cookiep = cookie;
3831		return(0);
3832	}
3833
3834	rid = rman_get_rid(irq);
3835	if (rid == 0) {
3836		/* Make sure that INTx is enabled */
3837		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3838	} else {
3839		/*
3840		 * Check to see if the interrupt is MSI or MSI-X.
3841		 * Ask our parent to map the MSI and give
3842		 * us the address and data register values.
3843		 * If we fail for some reason, teardown the
3844		 * interrupt handler.
3845		 */
3846		dinfo = device_get_ivars(child);
3847		if (dinfo->cfg.msi.msi_alloc > 0) {
3848			if (dinfo->cfg.msi.msi_addr == 0) {
3849				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3850			    ("MSI has handlers, but vectors not mapped"));
3851				error = PCIB_MAP_MSI(device_get_parent(dev),
3852				    child, rman_get_start(irq), &addr, &data);
3853				if (error)
3854					goto bad;
3855				dinfo->cfg.msi.msi_addr = addr;
3856				dinfo->cfg.msi.msi_data = data;
3857			}
3858			if (dinfo->cfg.msi.msi_handlers == 0)
3859				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3860				    dinfo->cfg.msi.msi_data);
3861			dinfo->cfg.msi.msi_handlers++;
3862		} else {
3863			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3864			    ("No MSI or MSI-X interrupts allocated"));
3865			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3866			    ("MSI-X index too high"));
3867			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3868			KASSERT(mte->mte_vector != 0, ("no message vector"));
3869			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3870			KASSERT(mv->mv_irq == rman_get_start(irq),
3871			    ("IRQ mismatch"));
3872			if (mv->mv_address == 0) {
3873				KASSERT(mte->mte_handlers == 0,
3874		    ("MSI-X table entry has handlers, but vector not mapped"));
3875				error = PCIB_MAP_MSI(device_get_parent(dev),
3876				    child, rman_get_start(irq), &addr, &data);
3877				if (error)
3878					goto bad;
3879				mv->mv_address = addr;
3880				mv->mv_data = data;
3881			}
3882			if (mte->mte_handlers == 0) {
3883				pci_enable_msix(child, rid - 1, mv->mv_address,
3884				    mv->mv_data);
3885				pci_unmask_msix(child, rid - 1);
3886			}
3887			mte->mte_handlers++;
3888		}
3889
3890		/*
3891		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3892		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3893		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3894		 */
3895		if (!pci_has_quirk(pci_get_devid(child),
3896		    PCI_QUIRK_MSI_INTX_BUG))
3897			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3898		else
3899			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3900	bad:
3901		if (error) {
3902			(void)bus_generic_teardown_intr(dev, child, irq,
3903			    cookie);
3904			return (error);
3905		}
3906	}
3907	*cookiep = cookie;
3908	return (0);
3909}
3910
3911int
3912pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3913    void *cookie)
3914{
3915	struct msix_table_entry *mte;
3916	struct resource_list_entry *rle;
3917	struct pci_devinfo *dinfo;
3918	int error, rid;
3919
3920	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3921		return (EINVAL);
3922
3923	/* If this isn't a direct child, just bail out */
3924	if (device_get_parent(child) != dev)
3925		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3926
3927	rid = rman_get_rid(irq);
3928	if (rid == 0) {
3929		/* Mask INTx */
3930		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3931	} else {
3932		/*
3933		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3934		 * decrement the appropriate handlers count and mask the
3935		 * MSI-X message, or disable MSI messages if the count
3936		 * drops to 0.
3937		 */
3938		dinfo = device_get_ivars(child);
3939		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3940		if (rle->res != irq)
3941			return (EINVAL);
3942		if (dinfo->cfg.msi.msi_alloc > 0) {
3943			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3944			    ("MSI-X index too high"));
3945			if (dinfo->cfg.msi.msi_handlers == 0)
3946				return (EINVAL);
3947			dinfo->cfg.msi.msi_handlers--;
3948			if (dinfo->cfg.msi.msi_handlers == 0)
3949				pci_disable_msi(child);
3950		} else {
3951			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3952			    ("No MSI or MSI-X interrupts allocated"));
3953			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3954			    ("MSI-X index too high"));
3955			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3956			if (mte->mte_handlers == 0)
3957				return (EINVAL);
3958			mte->mte_handlers--;
3959			if (mte->mte_handlers == 0)
3960				pci_mask_msix(child, rid - 1);
3961		}
3962	}
3963	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3964	if (rid > 0)
3965		KASSERT(error == 0,
3966		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3967	return (error);
3968}
3969
3970int
3971pci_print_child(device_t dev, device_t child)
3972{
3973	struct pci_devinfo *dinfo;
3974	struct resource_list *rl;
3975	int retval = 0;
3976
3977	dinfo = device_get_ivars(child);
3978	rl = &dinfo->resources;
3979
3980	retval += bus_print_child_header(dev, child);
3981
3982	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3983	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3984	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3985	if (device_get_flags(dev))
3986		retval += printf(" flags %#x", device_get_flags(dev));
3987
3988	retval += printf(" at device %d.%d", pci_get_slot(child),
3989	    pci_get_function(child));
3990
3991	retval += bus_print_child_domain(dev, child);
3992	retval += bus_print_child_footer(dev, child);
3993
3994	return (retval);
3995}
3996
3997static const struct
3998{
3999	int		class;
4000	int		subclass;
4001	int		report; /* 0 = bootverbose, 1 = always */
4002	const char	*desc;
4003} pci_nomatch_tab[] = {
4004	{PCIC_OLD,		-1,			1, "old"},
4005	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
4006	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
4007	{PCIC_STORAGE,		-1,			1, "mass storage"},
4008	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
4009	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
4010	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
4011	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
4012	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
4013	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
4014	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
4015	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
4016	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
4017	{PCIC_NETWORK,		-1,			1, "network"},
4018	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
4019	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
4020	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
4021	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
4022	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
4023	{PCIC_DISPLAY,		-1,			1, "display"},
4024	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
4025	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
4026	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
4027	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
4028	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
4029	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
4030	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
4031	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
4032	{PCIC_MEMORY,		-1,			1, "memory"},
4033	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
4034	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
4035	{PCIC_BRIDGE,		-1,			1, "bridge"},
4036	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
4037	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
4038	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
4039	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
4040	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
4041	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
4042	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
4043	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
4044	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
4045	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
4046	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
4047	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
4048	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
4049	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
4050	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
4051	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
4052	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
4053	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
4054	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
4055	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
4056	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
4057	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
4058	{PCIC_INPUTDEV,		-1,			1, "input device"},
4059	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
4060	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
4061	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
4062	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
4063	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
4064	{PCIC_DOCKING,		-1,			1, "docking station"},
4065	{PCIC_PROCESSOR,	-1,			1, "processor"},
4066	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
4067	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
4068	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
4069	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
4070	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
4071	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
4072	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
4073	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
4074	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
4075	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
4076	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
4077	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
4078	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
4079	{PCIC_SATCOM,		-1,			1, "satellite communication"},
4080	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
4081	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
4082	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
4083	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
4084	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
4085	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
4086	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
4087	{PCIC_DASP,		-1,			0, "dasp"},
4088	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
4089	{0, 0, 0,		NULL}
4090};
4091
4092void
4093pci_probe_nomatch(device_t dev, device_t child)
4094{
4095	int i, report;
4096	const char *cp, *scp;
4097	char *device;
4098
4099	/*
4100	 * Look for a listing for this device in a loaded device database.
4101	 */
4102	report = 1;
4103	if ((device = pci_describe_device(child)) != NULL) {
4104		device_printf(dev, "<%s>", device);
4105		free(device, M_DEVBUF);
4106	} else {
4107		/*
4108		 * Scan the class/subclass descriptions for a general
4109		 * description.
4110		 */
4111		cp = "unknown";
4112		scp = NULL;
4113		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
4114			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
4115				if (pci_nomatch_tab[i].subclass == -1) {
4116					cp = pci_nomatch_tab[i].desc;
4117					report = pci_nomatch_tab[i].report;
4118				} else if (pci_nomatch_tab[i].subclass ==
4119				    pci_get_subclass(child)) {
4120					scp = pci_nomatch_tab[i].desc;
4121					report = pci_nomatch_tab[i].report;
4122				}
4123			}
4124		}
4125		if (report || bootverbose) {
4126			device_printf(dev, "<%s%s%s>",
4127			    cp ? cp : "",
4128			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
4129			    scp ? scp : "");
4130		}
4131	}
4132	if (report || bootverbose) {
4133		printf(" at device %d.%d (no driver attached)\n",
4134		    pci_get_slot(child), pci_get_function(child));
4135	}
4136	pci_cfg_save(child, device_get_ivars(child), 1);
4137}
4138
4139void
4140pci_child_detached(device_t dev, device_t child)
4141{
4142	struct pci_devinfo *dinfo;
4143	struct resource_list *rl;
4144
4145	dinfo = device_get_ivars(child);
4146	rl = &dinfo->resources;
4147
4148	/*
4149	 * Have to deallocate IRQs before releasing any MSI messages and
4150	 * have to release MSI messages before deallocating any memory
4151	 * BARs.
4152	 */
4153	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
4154		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
4155	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
4156		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
4157		(void)pci_release_msi(child);
4158	}
4159	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
4160		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
4161	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
4162		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
4163#ifdef PCI_RES_BUS
4164	if (resource_list_release_active(rl, dev, child, PCI_RES_BUS) != 0)
4165		pci_printf(&dinfo->cfg, "Device leaked PCI bus numbers\n");
4166#endif
4167
4168	pci_cfg_save(child, dinfo, 1);
4169}
4170
4171/*
4172 * Parse the PCI device database, if loaded, and return a pointer to a
4173 * description of the device.
4174 *
4175 * The database is flat text formatted as follows:
4176 *
4177 * Any line not in a valid format is ignored.
4178 * Lines are terminated with newline '\n' characters.
4179 *
4180 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
4181 * the vendor name.
4182 *
4183 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
4184 * - devices cannot be listed without a corresponding VENDOR line.
4185 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
4186 * another TAB, then the device name.
4187 */
4188
4189/*
4190 * Assuming (ptr) points to the beginning of a line in the database,
4191 * return the vendor or device and description of the next entry.
4192 * The value of (vendor) or (device) inappropriate for the entry type
4193 * is set to -1.  Returns nonzero at the end of the database.
4194 *
4195 * Note that this is slightly unrobust in the face of corrupt data;
4196 * we attempt to safeguard against this by spamming the end of the
4197 * database with a newline when we initialise.
4198 */
4199static int
4200pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
4201{
4202	char	*cp = *ptr;
4203	int	left;
4204
4205	*device = -1;
4206	*vendor = -1;
4207	**desc = '\0';
4208	for (;;) {
4209		left = pci_vendordata_size - (cp - pci_vendordata);
4210		if (left <= 0) {
4211			*ptr = cp;
4212			return(1);
4213		}
4214
4215		/* vendor entry? */
4216		if (*cp != '\t' &&
4217		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4218			break;
4219		/* device entry? */
4220		if (*cp == '\t' &&
4221		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4222			break;
4223
4224		/* skip to next line */
4225		while (*cp != '\n' && left > 0) {
4226			cp++;
4227			left--;
4228		}
4229		if (*cp == '\n') {
4230			cp++;
4231			left--;
4232		}
4233	}
4234	/* skip to next line */
4235	while (*cp != '\n' && left > 0) {
4236		cp++;
4237		left--;
4238	}
4239	if (*cp == '\n' && left > 0)
4240		cp++;
4241	*ptr = cp;
4242	return(0);
4243}
4244
4245static char *
4246pci_describe_device(device_t dev)
4247{
4248	int	vendor, device;
4249	char	*desc, *vp, *dp, *line;
4250
4251	desc = vp = dp = NULL;
4252
4253	/*
4254	 * If we have no vendor data, we can't do anything.
4255	 */
4256	if (pci_vendordata == NULL)
4257		goto out;
4258
4259	/*
4260	 * Scan the vendor data looking for this device
4261	 */
4262	line = pci_vendordata;
4263	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4264		goto out;
4265	for (;;) {
4266		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4267			goto out;
4268		if (vendor == pci_get_vendor(dev))
4269			break;
4270	}
4271	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4272		goto out;
4273	for (;;) {
4274		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4275			*dp = 0;
4276			break;
4277		}
4278		if (vendor != -1) {
4279			*dp = 0;
4280			break;
4281		}
4282		if (device == pci_get_device(dev))
4283			break;
4284	}
4285	if (dp[0] == '\0')
4286		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4287	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4288	    NULL)
4289		sprintf(desc, "%s, %s", vp, dp);
4290out:
4291	if (vp != NULL)
4292		free(vp, M_DEVBUF);
4293	if (dp != NULL)
4294		free(dp, M_DEVBUF);
4295	return(desc);
4296}
4297
4298int
4299pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4300{
4301	struct pci_devinfo *dinfo;
4302	pcicfgregs *cfg;
4303
4304	dinfo = device_get_ivars(child);
4305	cfg = &dinfo->cfg;
4306
4307	switch (which) {
4308	case PCI_IVAR_ETHADDR:
4309		/*
4310		 * The generic accessor doesn't deal with failure, so
4311		 * we set the return value, then return an error.
4312		 */
4313		*((uint8_t **) result) = NULL;
4314		return (EINVAL);
4315	case PCI_IVAR_SUBVENDOR:
4316		*result = cfg->subvendor;
4317		break;
4318	case PCI_IVAR_SUBDEVICE:
4319		*result = cfg->subdevice;
4320		break;
4321	case PCI_IVAR_VENDOR:
4322		*result = cfg->vendor;
4323		break;
4324	case PCI_IVAR_DEVICE:
4325		*result = cfg->device;
4326		break;
4327	case PCI_IVAR_DEVID:
4328		*result = (cfg->device << 16) | cfg->vendor;
4329		break;
4330	case PCI_IVAR_CLASS:
4331		*result = cfg->baseclass;
4332		break;
4333	case PCI_IVAR_SUBCLASS:
4334		*result = cfg->subclass;
4335		break;
4336	case PCI_IVAR_PROGIF:
4337		*result = cfg->progif;
4338		break;
4339	case PCI_IVAR_REVID:
4340		*result = cfg->revid;
4341		break;
4342	case PCI_IVAR_INTPIN:
4343		*result = cfg->intpin;
4344		break;
4345	case PCI_IVAR_IRQ:
4346		*result = cfg->intline;
4347		break;
4348	case PCI_IVAR_DOMAIN:
4349		*result = cfg->domain;
4350		break;
4351	case PCI_IVAR_BUS:
4352		*result = cfg->bus;
4353		break;
4354	case PCI_IVAR_SLOT:
4355		*result = cfg->slot;
4356		break;
4357	case PCI_IVAR_FUNCTION:
4358		*result = cfg->func;
4359		break;
4360	case PCI_IVAR_CMDREG:
4361		*result = cfg->cmdreg;
4362		break;
4363	case PCI_IVAR_CACHELNSZ:
4364		*result = cfg->cachelnsz;
4365		break;
4366	case PCI_IVAR_MINGNT:
4367		*result = cfg->mingnt;
4368		break;
4369	case PCI_IVAR_MAXLAT:
4370		*result = cfg->maxlat;
4371		break;
4372	case PCI_IVAR_LATTIMER:
4373		*result = cfg->lattimer;
4374		break;
4375	default:
4376		return (ENOENT);
4377	}
4378	return (0);
4379}
4380
4381int
4382pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4383{
4384	struct pci_devinfo *dinfo;
4385
4386	dinfo = device_get_ivars(child);
4387
4388	switch (which) {
4389	case PCI_IVAR_INTPIN:
4390		dinfo->cfg.intpin = value;
4391		return (0);
4392	case PCI_IVAR_ETHADDR:
4393	case PCI_IVAR_SUBVENDOR:
4394	case PCI_IVAR_SUBDEVICE:
4395	case PCI_IVAR_VENDOR:
4396	case PCI_IVAR_DEVICE:
4397	case PCI_IVAR_DEVID:
4398	case PCI_IVAR_CLASS:
4399	case PCI_IVAR_SUBCLASS:
4400	case PCI_IVAR_PROGIF:
4401	case PCI_IVAR_REVID:
4402	case PCI_IVAR_IRQ:
4403	case PCI_IVAR_DOMAIN:
4404	case PCI_IVAR_BUS:
4405	case PCI_IVAR_SLOT:
4406	case PCI_IVAR_FUNCTION:
4407		return (EINVAL);	/* disallow for now */
4408
4409	default:
4410		return (ENOENT);
4411	}
4412}
4413
4414#include "opt_ddb.h"
4415#ifdef DDB
4416#include <ddb/ddb.h>
4417#include <sys/cons.h>
4418
4419/*
4420 * List resources based on pci map registers, used for within ddb
4421 */
4422
4423DB_SHOW_COMMAND(pciregs, db_pci_dump)
4424{
4425	struct pci_devinfo *dinfo;
4426	struct devlist *devlist_head;
4427	struct pci_conf *p;
4428	const char *name;
4429	int i, error, none_count;
4430
4431	none_count = 0;
4432	/* get the head of the device queue */
4433	devlist_head = &pci_devq;
4434
4435	/*
4436	 * Go through the list of devices and print out devices
4437	 */
4438	for (error = 0, i = 0,
4439	     dinfo = STAILQ_FIRST(devlist_head);
4440	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4441	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4442
4443		/* Populate pd_name and pd_unit */
4444		name = NULL;
4445		if (dinfo->cfg.dev)
4446			name = device_get_name(dinfo->cfg.dev);
4447
4448		p = &dinfo->conf;
4449		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4450			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4451			(name && *name) ? name : "none",
4452			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4453			none_count++,
4454			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4455			p->pc_sel.pc_func, (p->pc_class << 16) |
4456			(p->pc_subclass << 8) | p->pc_progif,
4457			(p->pc_subdevice << 16) | p->pc_subvendor,
4458			(p->pc_device << 16) | p->pc_vendor,
4459			p->pc_revid, p->pc_hdr);
4460	}
4461}
4462#endif /* DDB */
4463
4464static struct resource *
4465pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4466    u_long start, u_long end, u_long count, u_int flags)
4467{
4468	struct pci_devinfo *dinfo = device_get_ivars(child);
4469	struct resource_list *rl = &dinfo->resources;
4470	struct resource *res;
4471	struct pci_map *pm;
4472	pci_addr_t map, testval;
4473	int mapsize;
4474
4475	res = NULL;
4476	pm = pci_find_bar(child, *rid);
4477	if (pm != NULL) {
4478		/* This is a BAR that we failed to allocate earlier. */
4479		mapsize = pm->pm_size;
4480		map = pm->pm_value;
4481	} else {
4482		/*
4483		 * Weed out the bogons, and figure out how large the
4484		 * BAR/map is.  BARs that read back 0 here are bogus
4485		 * and unimplemented.  Note: atapci in legacy mode are
4486		 * special and handled elsewhere in the code.  If you
4487		 * have a atapci device in legacy mode and it fails
4488		 * here, that other code is broken.
4489		 */
4490		pci_read_bar(child, *rid, &map, &testval);
4491
4492		/*
4493		 * Determine the size of the BAR and ignore BARs with a size
4494		 * of 0.  Device ROM BARs use a different mask value.
4495		 */
4496		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4497			mapsize = pci_romsize(testval);
4498		else
4499			mapsize = pci_mapsize(testval);
4500		if (mapsize == 0)
4501			goto out;
4502		pm = pci_add_bar(child, *rid, map, mapsize);
4503	}
4504
4505	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4506		if (type != SYS_RES_MEMORY) {
4507			if (bootverbose)
4508				device_printf(dev,
4509				    "child %s requested type %d for rid %#x,"
4510				    " but the BAR says it is an memio\n",
4511				    device_get_nameunit(child), type, *rid);
4512			goto out;
4513		}
4514	} else {
4515		if (type != SYS_RES_IOPORT) {
4516			if (bootverbose)
4517				device_printf(dev,
4518				    "child %s requested type %d for rid %#x,"
4519				    " but the BAR says it is an ioport\n",
4520				    device_get_nameunit(child), type, *rid);
4521			goto out;
4522		}
4523	}
4524
4525	/*
4526	 * For real BARs, we need to override the size that
4527	 * the driver requests, because that's what the BAR
4528	 * actually uses and we would otherwise have a
4529	 * situation where we might allocate the excess to
4530	 * another driver, which won't work.
4531	 */
4532	count = (pci_addr_t)1 << mapsize;
4533	if (RF_ALIGNMENT(flags) < mapsize)
4534		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4535	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4536		flags |= RF_PREFETCHABLE;
4537
4538	/*
4539	 * Allocate enough resource, and then write back the
4540	 * appropriate BAR for that resource.
4541	 */
4542	resource_list_add(rl, type, *rid, start, end, count);
4543	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4544	    count, flags & ~RF_ACTIVE);
4545	if (res == NULL) {
4546		resource_list_delete(rl, type, *rid);
4547		device_printf(child,
4548		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4549		    count, *rid, type, start, end);
4550		goto out;
4551	}
4552	if (bootverbose)
4553		device_printf(child,
4554		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4555		    count, *rid, type, rman_get_start(res));
4556	map = rman_get_start(res);
4557	pci_write_bar(child, pm, map);
4558out:
4559	return (res);
4560}
4561
4562struct resource *
4563pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4564		   u_long start, u_long end, u_long count, u_int flags)
4565{
4566	struct pci_devinfo *dinfo;
4567	struct resource_list *rl;
4568	struct resource_list_entry *rle;
4569	struct resource *res;
4570	pcicfgregs *cfg;
4571
4572	if (device_get_parent(child) != dev)
4573		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4574		    type, rid, start, end, count, flags));
4575
4576	/*
4577	 * Perform lazy resource allocation
4578	 */
4579	dinfo = device_get_ivars(child);
4580	rl = &dinfo->resources;
4581	cfg = &dinfo->cfg;
4582	switch (type) {
4583#if defined(NEW_PCIB) && defined(PCI_RES_BUS)
4584	case PCI_RES_BUS:
4585		return (pci_alloc_secbus(dev, child, rid, start, end, count,
4586		    flags));
4587#endif
4588	case SYS_RES_IRQ:
4589		/*
4590		 * Can't alloc legacy interrupt once MSI messages have
4591		 * been allocated.
4592		 */
4593		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4594		    cfg->msix.msix_alloc > 0))
4595			return (NULL);
4596
4597		/*
4598		 * If the child device doesn't have an interrupt
4599		 * routed and is deserving of an interrupt, try to
4600		 * assign it one.
4601		 */
4602		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4603		    (cfg->intpin != 0))
4604			pci_assign_interrupt(dev, child, 0);
4605		break;
4606	case SYS_RES_IOPORT:
4607	case SYS_RES_MEMORY:
4608#ifdef NEW_PCIB
4609		/*
4610		 * PCI-PCI bridge I/O window resources are not BARs.
4611		 * For those allocations just pass the request up the
4612		 * tree.
4613		 */
4614		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4615			switch (*rid) {
4616			case PCIR_IOBASEL_1:
4617			case PCIR_MEMBASE_1:
4618			case PCIR_PMBASEL_1:
4619				/*
4620				 * XXX: Should we bother creating a resource
4621				 * list entry?
4622				 */
4623				return (bus_generic_alloc_resource(dev, child,
4624				    type, rid, start, end, count, flags));
4625			}
4626		}
4627#endif
4628		/* Reserve resources for this BAR if needed. */
4629		rle = resource_list_find(rl, type, *rid);
4630		if (rle == NULL) {
4631			res = pci_reserve_map(dev, child, type, rid, start, end,
4632			    count, flags);
4633			if (res == NULL)
4634				return (NULL);
4635		}
4636	}
4637	return (resource_list_alloc(rl, dev, child, type, rid,
4638	    start, end, count, flags));
4639}
4640
4641int
4642pci_release_resource(device_t dev, device_t child, int type, int rid,
4643    struct resource *r)
4644{
4645	struct pci_devinfo *dinfo;
4646	struct resource_list *rl;
4647	pcicfgregs *cfg;
4648
4649	if (device_get_parent(child) != dev)
4650		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4651		    type, rid, r));
4652
4653	dinfo = device_get_ivars(child);
4654	cfg = &dinfo->cfg;
4655#ifdef NEW_PCIB
4656	/*
4657	 * PCI-PCI bridge I/O window resources are not BARs.  For
4658	 * those allocations just pass the request up the tree.
4659	 */
4660	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4661	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4662		switch (rid) {
4663		case PCIR_IOBASEL_1:
4664		case PCIR_MEMBASE_1:
4665		case PCIR_PMBASEL_1:
4666			return (bus_generic_release_resource(dev, child, type,
4667			    rid, r));
4668		}
4669	}
4670#endif
4671
4672	rl = &dinfo->resources;
4673	return (resource_list_release(rl, dev, child, type, rid, r));
4674}
4675
4676int
4677pci_activate_resource(device_t dev, device_t child, int type, int rid,
4678    struct resource *r)
4679{
4680	struct pci_devinfo *dinfo;
4681	int error;
4682
4683	error = bus_generic_activate_resource(dev, child, type, rid, r);
4684	if (error)
4685		return (error);
4686
4687	/* Enable decoding in the command register when activating BARs. */
4688	if (device_get_parent(child) == dev) {
4689		/* Device ROMs need their decoding explicitly enabled. */
4690		dinfo = device_get_ivars(child);
4691		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4692			pci_write_bar(child, pci_find_bar(child, rid),
4693			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4694		switch (type) {
4695		case SYS_RES_IOPORT:
4696		case SYS_RES_MEMORY:
4697			error = PCI_ENABLE_IO(dev, child, type);
4698			break;
4699		}
4700	}
4701	return (error);
4702}
4703
4704int
4705pci_deactivate_resource(device_t dev, device_t child, int type,
4706    int rid, struct resource *r)
4707{
4708	struct pci_devinfo *dinfo;
4709	int error;
4710
4711	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4712	if (error)
4713		return (error);
4714
4715	/* Disable decoding for device ROMs. */
4716	if (device_get_parent(child) == dev) {
4717		dinfo = device_get_ivars(child);
4718		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4719			pci_write_bar(child, pci_find_bar(child, rid),
4720			    rman_get_start(r));
4721	}
4722	return (0);
4723}
4724
4725void
4726pci_delete_child(device_t dev, device_t child)
4727{
4728	struct resource_list_entry *rle;
4729	struct resource_list *rl;
4730	struct pci_devinfo *dinfo;
4731
4732	dinfo = device_get_ivars(child);
4733	rl = &dinfo->resources;
4734
4735	if (device_is_attached(child))
4736		device_detach(child);
4737
4738	/* Turn off access to resources we're about to free */
4739	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4740	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4741
4742	/* Free all allocated resources */
4743	STAILQ_FOREACH(rle, rl, link) {
4744		if (rle->res) {
4745			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4746			    resource_list_busy(rl, rle->type, rle->rid)) {
4747				pci_printf(&dinfo->cfg,
4748				    "Resource still owned, oops. "
4749				    "(type=%d, rid=%d, addr=%lx)\n",
4750				    rle->type, rle->rid,
4751				    rman_get_start(rle->res));
4752				bus_release_resource(child, rle->type, rle->rid,
4753				    rle->res);
4754			}
4755			resource_list_unreserve(rl, dev, child, rle->type,
4756			    rle->rid);
4757		}
4758	}
4759	resource_list_free(rl);
4760
4761	device_delete_child(dev, child);
4762	pci_freecfg(dinfo);
4763}
4764
4765void
4766pci_delete_resource(device_t dev, device_t child, int type, int rid)
4767{
4768	struct pci_devinfo *dinfo;
4769	struct resource_list *rl;
4770	struct resource_list_entry *rle;
4771
4772	if (device_get_parent(child) != dev)
4773		return;
4774
4775	dinfo = device_get_ivars(child);
4776	rl = &dinfo->resources;
4777	rle = resource_list_find(rl, type, rid);
4778	if (rle == NULL)
4779		return;
4780
4781	if (rle->res) {
4782		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4783		    resource_list_busy(rl, type, rid)) {
4784			device_printf(dev, "delete_resource: "
4785			    "Resource still owned by child, oops. "
4786			    "(type=%d, rid=%d, addr=%lx)\n",
4787			    type, rid, rman_get_start(rle->res));
4788			return;
4789		}
4790		resource_list_unreserve(rl, dev, child, type, rid);
4791	}
4792	resource_list_delete(rl, type, rid);
4793}
4794
4795struct resource_list *
4796pci_get_resource_list (device_t dev, device_t child)
4797{
4798	struct pci_devinfo *dinfo = device_get_ivars(child);
4799
4800	return (&dinfo->resources);
4801}
4802
4803bus_dma_tag_t
4804pci_get_dma_tag(device_t bus, device_t dev)
4805{
4806	struct pci_softc *sc = device_get_softc(bus);
4807
4808	return (sc->sc_dma_tag);
4809}
4810
4811uint32_t
4812pci_read_config_method(device_t dev, device_t child, int reg, int width)
4813{
4814	struct pci_devinfo *dinfo = device_get_ivars(child);
4815	pcicfgregs *cfg = &dinfo->cfg;
4816
4817	return (PCIB_READ_CONFIG(device_get_parent(dev),
4818	    cfg->bus, cfg->slot, cfg->func, reg, width));
4819}
4820
4821void
4822pci_write_config_method(device_t dev, device_t child, int reg,
4823    uint32_t val, int width)
4824{
4825	struct pci_devinfo *dinfo = device_get_ivars(child);
4826	pcicfgregs *cfg = &dinfo->cfg;
4827
4828	PCIB_WRITE_CONFIG(device_get_parent(dev),
4829	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4830}
4831
4832int
4833pci_child_location_str_method(device_t dev, device_t child, char *buf,
4834    size_t buflen)
4835{
4836
4837	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4838	    pci_get_function(child));
4839	return (0);
4840}
4841
4842int
4843pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4844    size_t buflen)
4845{
4846	struct pci_devinfo *dinfo;
4847	pcicfgregs *cfg;
4848
4849	dinfo = device_get_ivars(child);
4850	cfg = &dinfo->cfg;
4851	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4852	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4853	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4854	    cfg->progif);
4855	return (0);
4856}
4857
4858int
4859pci_assign_interrupt_method(device_t dev, device_t child)
4860{
4861	struct pci_devinfo *dinfo = device_get_ivars(child);
4862	pcicfgregs *cfg = &dinfo->cfg;
4863
4864	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4865	    cfg->intpin));
4866}
4867
4868static int
4869pci_modevent(module_t mod, int what, void *arg)
4870{
4871	static struct cdev *pci_cdev;
4872
4873	switch (what) {
4874	case MOD_LOAD:
4875		STAILQ_INIT(&pci_devq);
4876		pci_generation = 0;
4877		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4878		    "pci");
4879		pci_load_vendor_data();
4880		break;
4881
4882	case MOD_UNLOAD:
4883		destroy_dev(pci_cdev);
4884		break;
4885	}
4886
4887	return (0);
4888}
4889
4890static void
4891pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4892{
4893#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4894	struct pcicfg_pcie *cfg;
4895	int version, pos;
4896
4897	cfg = &dinfo->cfg.pcie;
4898	pos = cfg->pcie_location;
4899
4900	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4901
4902	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4903
4904	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4905	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4906	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4907		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4908
4909	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4910	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4911	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4912		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4913
4914	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4915	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4916		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4917
4918	if (version > 1) {
4919		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4920		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4921		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4922	}
4923#undef WREG
4924}
4925
4926static void
4927pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4928{
4929	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4930	    dinfo->cfg.pcix.pcix_command,  2);
4931}
4932
4933void
4934pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4935{
4936
4937	/*
4938	 * Only do header type 0 devices.  Type 1 devices are bridges,
4939	 * which we know need special treatment.  Type 2 devices are
4940	 * cardbus bridges which also require special treatment.
4941	 * Other types are unknown, and we err on the side of safety
4942	 * by ignoring them.
4943	 */
4944	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4945		return;
4946
4947	/*
4948	 * Restore the device to full power mode.  We must do this
4949	 * before we restore the registers because moving from D3 to
4950	 * D0 will cause the chip's BARs and some other registers to
4951	 * be reset to some unknown power on reset values.  Cut down
4952	 * the noise on boot by doing nothing if we are already in
4953	 * state D0.
4954	 */
4955	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4956		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4957	pci_restore_bars(dev);
4958	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4959	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4960	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4961	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4962	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4963	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4964	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4965	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4966	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4967
4968	/*
4969	 * Restore extended capabilities for PCI-Express and PCI-X
4970	 */
4971	if (dinfo->cfg.pcie.pcie_location != 0)
4972		pci_cfg_restore_pcie(dev, dinfo);
4973	if (dinfo->cfg.pcix.pcix_location != 0)
4974		pci_cfg_restore_pcix(dev, dinfo);
4975
4976	/* Restore MSI and MSI-X configurations if they are present. */
4977	if (dinfo->cfg.msi.msi_location != 0)
4978		pci_resume_msi(dev);
4979	if (dinfo->cfg.msix.msix_location != 0)
4980		pci_resume_msix(dev);
4981}
4982
4983static void
4984pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4985{
4986#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4987	struct pcicfg_pcie *cfg;
4988	int version, pos;
4989
4990	cfg = &dinfo->cfg.pcie;
4991	pos = cfg->pcie_location;
4992
4993	cfg->pcie_flags = RREG(PCIER_FLAGS);
4994
4995	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4996
4997	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4998
4999	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5000	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
5001	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
5002		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
5003
5004	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5005	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
5006	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
5007		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
5008
5009	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
5010	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
5011		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
5012
5013	if (version > 1) {
5014		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
5015		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
5016		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
5017	}
5018#undef RREG
5019}
5020
5021static void
5022pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
5023{
5024	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
5025	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
5026}
5027
5028void
5029pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
5030{
5031	uint32_t cls;
5032	int ps;
5033
5034	/*
5035	 * Only do header type 0 devices.  Type 1 devices are bridges, which
5036	 * we know need special treatment.  Type 2 devices are cardbus bridges
5037	 * which also require special treatment.  Other types are unknown, and
5038	 * we err on the side of safety by ignoring them.  Powering down
5039	 * bridges should not be undertaken lightly.
5040	 */
5041	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
5042		return;
5043
5044	/*
5045	 * Some drivers apparently write to these registers w/o updating our
5046	 * cached copy.  No harm happens if we update the copy, so do so here
5047	 * so we can restore them.  The COMMAND register is modified by the
5048	 * bus w/o updating the cache.  This should represent the normally
5049	 * writable portion of the 'defined' part of type 0 headers.  In
5050	 * theory we also need to save/restore the PCI capability structures
5051	 * we know about, but apart from power we don't know any that are
5052	 * writable.
5053	 */
5054	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
5055	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
5056	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
5057	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
5058	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
5059	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
5060	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
5061	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
5062	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
5063	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
5064	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
5065	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
5066	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
5067	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
5068	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
5069
5070	if (dinfo->cfg.pcie.pcie_location != 0)
5071		pci_cfg_save_pcie(dev, dinfo);
5072
5073	if (dinfo->cfg.pcix.pcix_location != 0)
5074		pci_cfg_save_pcix(dev, dinfo);
5075
5076	/*
5077	 * don't set the state for display devices, base peripherals and
5078	 * memory devices since bad things happen when they are powered down.
5079	 * We should (a) have drivers that can easily detach and (b) use
5080	 * generic drivers for these devices so that some device actually
5081	 * attaches.  We need to make sure that when we implement (a) we don't
5082	 * power the device down on a reattach.
5083	 */
5084	cls = pci_get_class(dev);
5085	if (!setstate)
5086		return;
5087	switch (pci_do_power_nodriver)
5088	{
5089		case 0:		/* NO powerdown at all */
5090			return;
5091		case 1:		/* Conservative about what to power down */
5092			if (cls == PCIC_STORAGE)
5093				return;
5094			/*FALLTHROUGH*/
5095		case 2:		/* Agressive about what to power down */
5096			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
5097			    cls == PCIC_BASEPERIPH)
5098				return;
5099			/*FALLTHROUGH*/
5100		case 3:		/* Power down everything */
5101			break;
5102	}
5103	/*
5104	 * PCI spec says we can only go into D3 state from D0 state.
5105	 * Transition from D[12] into D0 before going to D3 state.
5106	 */
5107	ps = pci_get_powerstate(dev);
5108	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
5109		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
5110	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
5111		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
5112}
5113
5114/* Wrapper APIs suitable for device driver use. */
5115void
5116pci_save_state(device_t dev)
5117{
5118	struct pci_devinfo *dinfo;
5119
5120	dinfo = device_get_ivars(dev);
5121	pci_cfg_save(dev, dinfo, 0);
5122}
5123
5124void
5125pci_restore_state(device_t dev)
5126{
5127	struct pci_devinfo *dinfo;
5128
5129	dinfo = device_get_ivars(dev);
5130	pci_cfg_restore(dev, dinfo);
5131}
5132
5133static uint16_t
5134pci_get_rid_method(device_t dev, device_t child)
5135{
5136
5137	return (PCIB_GET_RID(device_get_parent(dev), child));
5138}
5139