pci.c revision 279470
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/pci/pci.c 279470 2015-03-01 04:22:06Z rstone $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#define	PCIR_IS_BIOS(cfg, reg)						\
74	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76
77static int		pci_has_quirk(uint32_t devid, int quirk);
78static pci_addr_t	pci_mapbase(uint64_t mapreg);
79static const char	*pci_maptype(uint64_t mapreg);
80static int		pci_mapsize(uint64_t testval);
81static int		pci_maprange(uint64_t mapreg);
82static pci_addr_t	pci_rombase(uint64_t mapreg);
83static int		pci_romsize(uint64_t testval);
84static void		pci_fixancient(pcicfgregs *cfg);
85static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86
87static int		pci_porten(device_t dev);
88static int		pci_memen(device_t dev);
89static void		pci_assign_interrupt(device_t bus, device_t dev,
90			    int force_route);
91static int		pci_add_map(device_t bus, device_t dev, int reg,
92			    struct resource_list *rl, int force, int prefetch);
93static int		pci_probe(device_t dev);
94static int		pci_attach(device_t dev);
95static void		pci_load_vendor_data(void);
96static int		pci_describe_parse_line(char **ptr, int *vendor,
97			    int *device, char **desc);
98static char		*pci_describe_device(device_t dev);
99static int		pci_modevent(module_t mod, int what, void *arg);
100static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101			    pcicfgregs *cfg);
102static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
103static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104			    int reg, uint32_t *data);
105#if 0
106static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107			    int reg, uint32_t data);
108#endif
109static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110static void		pci_disable_msi(device_t dev);
111static void		pci_enable_msi(device_t dev, uint64_t address,
112			    uint16_t data);
113static void		pci_enable_msix(device_t dev, u_int index,
114			    uint64_t address, uint32_t data);
115static void		pci_mask_msix(device_t dev, u_int index);
116static void		pci_unmask_msix(device_t dev, u_int index);
117static int		pci_msi_blacklisted(void);
118static int		pci_msix_blacklisted(void);
119static void		pci_resume_msi(device_t dev);
120static void		pci_resume_msix(device_t dev);
121static int		pci_remap_intr_method(device_t bus, device_t dev,
122			    u_int irq);
123
124static uint16_t		pci_get_rid_method(device_t dev, device_t child);
125
126static device_method_t pci_methods[] = {
127	/* Device interface */
128	DEVMETHOD(device_probe,		pci_probe),
129	DEVMETHOD(device_attach,	pci_attach),
130	DEVMETHOD(device_detach,	bus_generic_detach),
131	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
132	DEVMETHOD(device_suspend,	pci_suspend),
133	DEVMETHOD(device_resume,	pci_resume),
134
135	/* Bus interface */
136	DEVMETHOD(bus_print_child,	pci_print_child),
137	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
138	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
139	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
140	DEVMETHOD(bus_driver_added,	pci_driver_added),
141	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
142	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
143
144	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
145	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
146	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
147	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
148	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
149	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
150	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
151	DEVMETHOD(bus_release_resource,	pci_release_resource),
152	DEVMETHOD(bus_activate_resource, pci_activate_resource),
153	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
154	DEVMETHOD(bus_child_detached,	pci_child_detached),
155	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
158
159	/* PCI interface */
160	DEVMETHOD(pci_read_config,	pci_read_config_method),
161	DEVMETHOD(pci_write_config,	pci_write_config_method),
162	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
172	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
174	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
181
182	DEVMETHOD_END
183};
184
185DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
186
187static devclass_t pci_devclass;
188DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
189MODULE_VERSION(pci, 1);
190
191static char	*pci_vendordata;
192static size_t	pci_vendordata_size;
193
194struct pci_quirk {
195	uint32_t devid;	/* Vendor/device of the card */
196	int	type;
197#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
198#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
199#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
200#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
201#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
202#define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
203	int	arg1;
204	int	arg2;
205};
206
207static const struct pci_quirk pci_quirks[] = {
208	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
209	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
210	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
211	/* As does the Serverworks OSB4 (the SMBus mapping register) */
212	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
213
214	/*
215	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
216	 * or the CMIC-SL (AKA ServerWorks GC_LE).
217	 */
218	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220
221	/*
222	 * MSI doesn't work on earlier Intel chipsets including
223	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
224	 */
225	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232
233	/*
234	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
235	 * bridge.
236	 */
237	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238
239	/*
240	 * MSI-X allocation doesn't work properly for devices passed through
241	 * by VMware up to at least ESXi 5.1.
242	 */
243	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
244	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
245
246	/*
247	 * Some virtualization environments emulate an older chipset
248	 * but support MSI just fine.  QEMU uses the Intel 82440.
249	 */
250	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
251
252	/*
253	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
254	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
255	 * It prevents us from attaching hpet(4) when the bit is unset.
256	 * Note this quirk only affects SB600 revision A13 and earlier.
257	 * For SB600 A21 and later, firmware must set the bit to hide it.
258	 * For SB700 and later, it is unused and hardcoded to zero.
259	 */
260	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
261
262	/*
263	 * Atheros AR8161/AR8162/E2200 Ethernet controllers have a bug that
264	 * MSI interrupt does not assert if PCIM_CMD_INTxDIS bit of the
265	 * command register is set.
266	 */
267	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
268	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
269	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
270
271	/*
272	 * Broadcom BCM5714(S)/BCM5715(S)/BCM5780(S) Ethernet MACs don't
273	 * issue MSI interrupts with PCIM_CMD_INTxDIS set either.
274	 */
275	{ 0x166814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714 */
276	{ 0x166914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5714S */
277	{ 0x166a14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780 */
278	{ 0x166b14e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5780S */
279	{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715 */
280	{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG,	0,	0 }, /* BCM5715S */
281
282	{ 0 }
283};
284
285/* map register information */
286#define	PCI_MAPMEM	0x01	/* memory map */
287#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
288#define	PCI_MAPPORT	0x04	/* port map */
289
290struct devlist pci_devq;
291uint32_t pci_generation;
292uint32_t pci_numdevs = 0;
293static int pcie_chipset, pcix_chipset;
294
295/* sysctl vars */
296SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
297
298static int pci_enable_io_modes = 1;
299TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
300SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
301    &pci_enable_io_modes, 1,
302    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
303enable these bits correctly.  We'd like to do this all the time, but there\n\
304are some peripherals that this causes problems with.");
305
306static int pci_do_realloc_bars = 0;
307TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
308SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
309    &pci_do_realloc_bars, 0,
310    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
311
312static int pci_do_power_nodriver = 0;
313TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
314SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
315    &pci_do_power_nodriver, 0,
316  "Place a function into D3 state when no driver attaches to it.  0 means\n\
317disable.  1 means conservatively place devices into D3 state.  2 means\n\
318agressively place devices into D3 state.  3 means put absolutely everything\n\
319in D3 state.");
320
321int pci_do_power_resume = 1;
322TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
323SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
324    &pci_do_power_resume, 1,
325  "Transition from D3 -> D0 on resume.");
326
327int pci_do_power_suspend = 1;
328TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
329SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
330    &pci_do_power_suspend, 1,
331  "Transition from D0 -> D3 on suspend.");
332
333static int pci_do_msi = 1;
334TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
335SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
336    "Enable support for MSI interrupts");
337
338static int pci_do_msix = 1;
339TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
340SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
341    "Enable support for MSI-X interrupts");
342
343static int pci_honor_msi_blacklist = 1;
344TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
345SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
346    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
347
348#if defined(__i386__) || defined(__amd64__)
349static int pci_usb_takeover = 1;
350#else
351static int pci_usb_takeover = 0;
352#endif
353TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
354SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
355    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
356Disable this if you depend on BIOS emulation of USB devices, that is\n\
357you use USB devices (like keyboard or mouse) but do not load USB drivers");
358
359static int pci_clear_bars;
360TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
361SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
362    "Ignore firmware-assigned resources for BARs.");
363
364static int pci_enable_ari = 1;
365TUNABLE_INT("hw.pci.enable_ari", &pci_enable_ari);
366SYSCTL_INT(_hw_pci, OID_AUTO, enable_ari, CTLFLAG_RDTUN, &pci_enable_ari,
367    0, "Enable support for PCIe Alternative RID Interpretation");
368
369static int
370pci_has_quirk(uint32_t devid, int quirk)
371{
372	const struct pci_quirk *q;
373
374	for (q = &pci_quirks[0]; q->devid; q++) {
375		if (q->devid == devid && q->type == quirk)
376			return (1);
377	}
378	return (0);
379}
380
381/* Find a device_t by bus/slot/function in domain 0 */
382
383device_t
384pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
385{
386
387	return (pci_find_dbsf(0, bus, slot, func));
388}
389
390/* Find a device_t by domain/bus/slot/function */
391
392device_t
393pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
394{
395	struct pci_devinfo *dinfo;
396
397	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
398		if ((dinfo->cfg.domain == domain) &&
399		    (dinfo->cfg.bus == bus) &&
400		    (dinfo->cfg.slot == slot) &&
401		    (dinfo->cfg.func == func)) {
402			return (dinfo->cfg.dev);
403		}
404	}
405
406	return (NULL);
407}
408
409/* Find a device_t by vendor/device ID */
410
411device_t
412pci_find_device(uint16_t vendor, uint16_t device)
413{
414	struct pci_devinfo *dinfo;
415
416	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
417		if ((dinfo->cfg.vendor == vendor) &&
418		    (dinfo->cfg.device == device)) {
419			return (dinfo->cfg.dev);
420		}
421	}
422
423	return (NULL);
424}
425
426device_t
427pci_find_class(uint8_t class, uint8_t subclass)
428{
429	struct pci_devinfo *dinfo;
430
431	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
432		if (dinfo->cfg.baseclass == class &&
433		    dinfo->cfg.subclass == subclass) {
434			return (dinfo->cfg.dev);
435		}
436	}
437
438	return (NULL);
439}
440
441static int
442pci_printf(pcicfgregs *cfg, const char *fmt, ...)
443{
444	va_list ap;
445	int retval;
446
447	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
448	    cfg->func);
449	va_start(ap, fmt);
450	retval += vprintf(fmt, ap);
451	va_end(ap);
452	return (retval);
453}
454
455/* return base address of memory or port map */
456
457static pci_addr_t
458pci_mapbase(uint64_t mapreg)
459{
460
461	if (PCI_BAR_MEM(mapreg))
462		return (mapreg & PCIM_BAR_MEM_BASE);
463	else
464		return (mapreg & PCIM_BAR_IO_BASE);
465}
466
467/* return map type of memory or port map */
468
469static const char *
470pci_maptype(uint64_t mapreg)
471{
472
473	if (PCI_BAR_IO(mapreg))
474		return ("I/O Port");
475	if (mapreg & PCIM_BAR_MEM_PREFETCH)
476		return ("Prefetchable Memory");
477	return ("Memory");
478}
479
480/* return log2 of map size decoded for memory or port map */
481
482static int
483pci_mapsize(uint64_t testval)
484{
485	int ln2size;
486
487	testval = pci_mapbase(testval);
488	ln2size = 0;
489	if (testval != 0) {
490		while ((testval & 1) == 0)
491		{
492			ln2size++;
493			testval >>= 1;
494		}
495	}
496	return (ln2size);
497}
498
499/* return base address of device ROM */
500
501static pci_addr_t
502pci_rombase(uint64_t mapreg)
503{
504
505	return (mapreg & PCIM_BIOS_ADDR_MASK);
506}
507
508/* return log2 of map size decided for device ROM */
509
510static int
511pci_romsize(uint64_t testval)
512{
513	int ln2size;
514
515	testval = pci_rombase(testval);
516	ln2size = 0;
517	if (testval != 0) {
518		while ((testval & 1) == 0)
519		{
520			ln2size++;
521			testval >>= 1;
522		}
523	}
524	return (ln2size);
525}
526
527/* return log2 of address range supported by map register */
528
529static int
530pci_maprange(uint64_t mapreg)
531{
532	int ln2range = 0;
533
534	if (PCI_BAR_IO(mapreg))
535		ln2range = 32;
536	else
537		switch (mapreg & PCIM_BAR_MEM_TYPE) {
538		case PCIM_BAR_MEM_32:
539			ln2range = 32;
540			break;
541		case PCIM_BAR_MEM_1MB:
542			ln2range = 20;
543			break;
544		case PCIM_BAR_MEM_64:
545			ln2range = 64;
546			break;
547		}
548	return (ln2range);
549}
550
551/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
552
553static void
554pci_fixancient(pcicfgregs *cfg)
555{
556	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
557		return;
558
559	/* PCI to PCI bridges use header type 1 */
560	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
561		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
562}
563
564/* extract header type specific config data */
565
566static void
567pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
568{
569#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
570	switch (cfg->hdrtype & PCIM_HDRTYPE) {
571	case PCIM_HDRTYPE_NORMAL:
572		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
573		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
574		cfg->nummaps	    = PCI_MAXMAPS_0;
575		break;
576	case PCIM_HDRTYPE_BRIDGE:
577		cfg->nummaps	    = PCI_MAXMAPS_1;
578		break;
579	case PCIM_HDRTYPE_CARDBUS:
580		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
581		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
582		cfg->nummaps	    = PCI_MAXMAPS_2;
583		break;
584	}
585#undef REG
586}
587
588/* read configuration header into pcicfgregs structure */
589struct pci_devinfo *
590pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
591{
592#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
593	pcicfgregs *cfg = NULL;
594	struct pci_devinfo *devlist_entry;
595	struct devlist *devlist_head;
596
597	devlist_head = &pci_devq;
598
599	devlist_entry = NULL;
600
601	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
602		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
603		if (devlist_entry == NULL)
604			return (NULL);
605
606		cfg = &devlist_entry->cfg;
607
608		cfg->domain		= d;
609		cfg->bus		= b;
610		cfg->slot		= s;
611		cfg->func		= f;
612		cfg->vendor		= REG(PCIR_VENDOR, 2);
613		cfg->device		= REG(PCIR_DEVICE, 2);
614		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
615		cfg->statreg		= REG(PCIR_STATUS, 2);
616		cfg->baseclass		= REG(PCIR_CLASS, 1);
617		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
618		cfg->progif		= REG(PCIR_PROGIF, 1);
619		cfg->revid		= REG(PCIR_REVID, 1);
620		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
621		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
622		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
623		cfg->intpin		= REG(PCIR_INTPIN, 1);
624		cfg->intline		= REG(PCIR_INTLINE, 1);
625
626		cfg->mingnt		= REG(PCIR_MINGNT, 1);
627		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
628
629		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
630		cfg->hdrtype		&= ~PCIM_MFDEV;
631		STAILQ_INIT(&cfg->maps);
632
633		pci_fixancient(cfg);
634		pci_hdrtypedata(pcib, b, s, f, cfg);
635
636		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
637			pci_read_cap(pcib, cfg);
638
639		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
640
641		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
642		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
643		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
644		devlist_entry->conf.pc_sel.pc_func = cfg->func;
645		devlist_entry->conf.pc_hdr = cfg->hdrtype;
646
647		devlist_entry->conf.pc_subvendor = cfg->subvendor;
648		devlist_entry->conf.pc_subdevice = cfg->subdevice;
649		devlist_entry->conf.pc_vendor = cfg->vendor;
650		devlist_entry->conf.pc_device = cfg->device;
651
652		devlist_entry->conf.pc_class = cfg->baseclass;
653		devlist_entry->conf.pc_subclass = cfg->subclass;
654		devlist_entry->conf.pc_progif = cfg->progif;
655		devlist_entry->conf.pc_revid = cfg->revid;
656
657		pci_numdevs++;
658		pci_generation++;
659	}
660	return (devlist_entry);
661#undef REG
662}
663
664static void
665pci_read_cap(device_t pcib, pcicfgregs *cfg)
666{
667#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
668#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
669#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
670	uint64_t addr;
671#endif
672	uint32_t val;
673	int	ptr, nextptr, ptrptr;
674
675	switch (cfg->hdrtype & PCIM_HDRTYPE) {
676	case PCIM_HDRTYPE_NORMAL:
677	case PCIM_HDRTYPE_BRIDGE:
678		ptrptr = PCIR_CAP_PTR;
679		break;
680	case PCIM_HDRTYPE_CARDBUS:
681		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
682		break;
683	default:
684		return;		/* no extended capabilities support */
685	}
686	nextptr = REG(ptrptr, 1);	/* sanity check? */
687
688	/*
689	 * Read capability entries.
690	 */
691	while (nextptr != 0) {
692		/* Sanity check */
693		if (nextptr > 255) {
694			printf("illegal PCI extended capability offset %d\n",
695			    nextptr);
696			return;
697		}
698		/* Find the next entry */
699		ptr = nextptr;
700		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
701
702		/* Process this entry */
703		switch (REG(ptr + PCICAP_ID, 1)) {
704		case PCIY_PMG:		/* PCI power management */
705			if (cfg->pp.pp_cap == 0) {
706				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
707				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
708				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
709				if ((nextptr - ptr) > PCIR_POWER_DATA)
710					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
711			}
712			break;
713		case PCIY_HT:		/* HyperTransport */
714			/* Determine HT-specific capability type. */
715			val = REG(ptr + PCIR_HT_COMMAND, 2);
716
717			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
718				cfg->ht.ht_slave = ptr;
719
720#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
721			switch (val & PCIM_HTCMD_CAP_MASK) {
722			case PCIM_HTCAP_MSI_MAPPING:
723				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
724					/* Sanity check the mapping window. */
725					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
726					    4);
727					addr <<= 32;
728					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
729					    4);
730					if (addr != MSI_INTEL_ADDR_BASE)
731						device_printf(pcib,
732	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
733						    cfg->domain, cfg->bus,
734						    cfg->slot, cfg->func,
735						    (long long)addr);
736				} else
737					addr = MSI_INTEL_ADDR_BASE;
738
739				cfg->ht.ht_msimap = ptr;
740				cfg->ht.ht_msictrl = val;
741				cfg->ht.ht_msiaddr = addr;
742				break;
743			}
744#endif
745			break;
746		case PCIY_MSI:		/* PCI MSI */
747			cfg->msi.msi_location = ptr;
748			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
749			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
750						     PCIM_MSICTRL_MMC_MASK)>>1);
751			break;
752		case PCIY_MSIX:		/* PCI MSI-X */
753			cfg->msix.msix_location = ptr;
754			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
755			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
756			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
757			val = REG(ptr + PCIR_MSIX_TABLE, 4);
758			cfg->msix.msix_table_bar = PCIR_BAR(val &
759			    PCIM_MSIX_BIR_MASK);
760			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
761			val = REG(ptr + PCIR_MSIX_PBA, 4);
762			cfg->msix.msix_pba_bar = PCIR_BAR(val &
763			    PCIM_MSIX_BIR_MASK);
764			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
765			break;
766		case PCIY_VPD:		/* PCI Vital Product Data */
767			cfg->vpd.vpd_reg = ptr;
768			break;
769		case PCIY_SUBVENDOR:
770			/* Should always be true. */
771			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
772			    PCIM_HDRTYPE_BRIDGE) {
773				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
774				cfg->subvendor = val & 0xffff;
775				cfg->subdevice = val >> 16;
776			}
777			break;
778		case PCIY_PCIX:		/* PCI-X */
779			/*
780			 * Assume we have a PCI-X chipset if we have
781			 * at least one PCI-PCI bridge with a PCI-X
782			 * capability.  Note that some systems with
783			 * PCI-express or HT chipsets might match on
784			 * this check as well.
785			 */
786			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
787			    PCIM_HDRTYPE_BRIDGE)
788				pcix_chipset = 1;
789			cfg->pcix.pcix_location = ptr;
790			break;
791		case PCIY_EXPRESS:	/* PCI-express */
792			/*
793			 * Assume we have a PCI-express chipset if we have
794			 * at least one PCI-express device.
795			 */
796			pcie_chipset = 1;
797			cfg->pcie.pcie_location = ptr;
798			val = REG(ptr + PCIER_FLAGS, 2);
799			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
800			break;
801		default:
802			break;
803		}
804	}
805
806#if defined(__powerpc__)
807	/*
808	 * Enable the MSI mapping window for all HyperTransport
809	 * slaves.  PCI-PCI bridges have their windows enabled via
810	 * PCIB_MAP_MSI().
811	 */
812	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
813	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
814		device_printf(pcib,
815	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
816		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
817		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
818		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
819		     2);
820	}
821#endif
822/* REG and WREG use carry through to next functions */
823}
824
825/*
826 * PCI Vital Product Data
827 */
828
829#define	PCI_VPD_TIMEOUT		1000000
830
831static int
832pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
833{
834	int count = PCI_VPD_TIMEOUT;
835
836	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
837
838	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
839
840	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
841		if (--count < 0)
842			return (ENXIO);
843		DELAY(1);	/* limit looping */
844	}
845	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
846
847	return (0);
848}
849
850#if 0
851static int
852pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
853{
854	int count = PCI_VPD_TIMEOUT;
855
856	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
857
858	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
859	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
860	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
861		if (--count < 0)
862			return (ENXIO);
863		DELAY(1);	/* limit looping */
864	}
865
866	return (0);
867}
868#endif
869
870#undef PCI_VPD_TIMEOUT
871
872struct vpd_readstate {
873	device_t	pcib;
874	pcicfgregs	*cfg;
875	uint32_t	val;
876	int		bytesinval;
877	int		off;
878	uint8_t		cksum;
879};
880
881static int
882vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
883{
884	uint32_t reg;
885	uint8_t byte;
886
887	if (vrs->bytesinval == 0) {
888		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
889			return (ENXIO);
890		vrs->val = le32toh(reg);
891		vrs->off += 4;
892		byte = vrs->val & 0xff;
893		vrs->bytesinval = 3;
894	} else {
895		vrs->val = vrs->val >> 8;
896		byte = vrs->val & 0xff;
897		vrs->bytesinval--;
898	}
899
900	vrs->cksum += byte;
901	*data = byte;
902	return (0);
903}
904
905static void
906pci_read_vpd(device_t pcib, pcicfgregs *cfg)
907{
908	struct vpd_readstate vrs;
909	int state;
910	int name;
911	int remain;
912	int i;
913	int alloc, off;		/* alloc/off for RO/W arrays */
914	int cksumvalid;
915	int dflen;
916	uint8_t byte;
917	uint8_t byte2;
918
919	/* init vpd reader */
920	vrs.bytesinval = 0;
921	vrs.off = 0;
922	vrs.pcib = pcib;
923	vrs.cfg = cfg;
924	vrs.cksum = 0;
925
926	state = 0;
927	name = remain = i = 0;	/* shut up stupid gcc */
928	alloc = off = 0;	/* shut up stupid gcc */
929	dflen = 0;		/* shut up stupid gcc */
930	cksumvalid = -1;
931	while (state >= 0) {
932		if (vpd_nextbyte(&vrs, &byte)) {
933			state = -2;
934			break;
935		}
936#if 0
937		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
938		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
939		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
940#endif
941		switch (state) {
942		case 0:		/* item name */
943			if (byte & 0x80) {
944				if (vpd_nextbyte(&vrs, &byte2)) {
945					state = -2;
946					break;
947				}
948				remain = byte2;
949				if (vpd_nextbyte(&vrs, &byte2)) {
950					state = -2;
951					break;
952				}
953				remain |= byte2 << 8;
954				if (remain > (0x7f*4 - vrs.off)) {
955					state = -1;
956					pci_printf(cfg,
957					    "invalid VPD data, remain %#x\n",
958					    remain);
959				}
960				name = byte & 0x7f;
961			} else {
962				remain = byte & 0x7;
963				name = (byte >> 3) & 0xf;
964			}
965			switch (name) {
966			case 0x2:	/* String */
967				cfg->vpd.vpd_ident = malloc(remain + 1,
968				    M_DEVBUF, M_WAITOK);
969				i = 0;
970				state = 1;
971				break;
972			case 0xf:	/* End */
973				state = -1;
974				break;
975			case 0x10:	/* VPD-R */
976				alloc = 8;
977				off = 0;
978				cfg->vpd.vpd_ros = malloc(alloc *
979				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
980				    M_WAITOK | M_ZERO);
981				state = 2;
982				break;
983			case 0x11:	/* VPD-W */
984				alloc = 8;
985				off = 0;
986				cfg->vpd.vpd_w = malloc(alloc *
987				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
988				    M_WAITOK | M_ZERO);
989				state = 5;
990				break;
991			default:	/* Invalid data, abort */
992				state = -1;
993				break;
994			}
995			break;
996
997		case 1:	/* Identifier String */
998			cfg->vpd.vpd_ident[i++] = byte;
999			remain--;
1000			if (remain == 0)  {
1001				cfg->vpd.vpd_ident[i] = '\0';
1002				state = 0;
1003			}
1004			break;
1005
1006		case 2:	/* VPD-R Keyword Header */
1007			if (off == alloc) {
1008				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1009				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1010				    M_DEVBUF, M_WAITOK | M_ZERO);
1011			}
1012			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1013			if (vpd_nextbyte(&vrs, &byte2)) {
1014				state = -2;
1015				break;
1016			}
1017			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1018			if (vpd_nextbyte(&vrs, &byte2)) {
1019				state = -2;
1020				break;
1021			}
1022			cfg->vpd.vpd_ros[off].len = dflen = byte2;
1023			if (dflen == 0 &&
1024			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1025			    2) == 0) {
1026				/*
1027				 * if this happens, we can't trust the rest
1028				 * of the VPD.
1029				 */
1030				pci_printf(cfg, "bad keyword length: %d\n",
1031				    dflen);
1032				cksumvalid = 0;
1033				state = -1;
1034				break;
1035			} else if (dflen == 0) {
1036				cfg->vpd.vpd_ros[off].value = malloc(1 *
1037				    sizeof(*cfg->vpd.vpd_ros[off].value),
1038				    M_DEVBUF, M_WAITOK);
1039				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1040			} else
1041				cfg->vpd.vpd_ros[off].value = malloc(
1042				    (dflen + 1) *
1043				    sizeof(*cfg->vpd.vpd_ros[off].value),
1044				    M_DEVBUF, M_WAITOK);
1045			remain -= 3;
1046			i = 0;
1047			/* keep in sync w/ state 3's transistions */
1048			if (dflen == 0 && remain == 0)
1049				state = 0;
1050			else if (dflen == 0)
1051				state = 2;
1052			else
1053				state = 3;
1054			break;
1055
1056		case 3:	/* VPD-R Keyword Value */
1057			cfg->vpd.vpd_ros[off].value[i++] = byte;
1058			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1059			    "RV", 2) == 0 && cksumvalid == -1) {
1060				if (vrs.cksum == 0)
1061					cksumvalid = 1;
1062				else {
1063					if (bootverbose)
1064						pci_printf(cfg,
1065					    "bad VPD cksum, remain %hhu\n",
1066						    vrs.cksum);
1067					cksumvalid = 0;
1068					state = -1;
1069					break;
1070				}
1071			}
1072			dflen--;
1073			remain--;
1074			/* keep in sync w/ state 2's transistions */
1075			if (dflen == 0)
1076				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1077			if (dflen == 0 && remain == 0) {
1078				cfg->vpd.vpd_rocnt = off;
1079				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1080				    off * sizeof(*cfg->vpd.vpd_ros),
1081				    M_DEVBUF, M_WAITOK | M_ZERO);
1082				state = 0;
1083			} else if (dflen == 0)
1084				state = 2;
1085			break;
1086
1087		case 4:
1088			remain--;
1089			if (remain == 0)
1090				state = 0;
1091			break;
1092
1093		case 5:	/* VPD-W Keyword Header */
1094			if (off == alloc) {
1095				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1096				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1097				    M_DEVBUF, M_WAITOK | M_ZERO);
1098			}
1099			cfg->vpd.vpd_w[off].keyword[0] = byte;
1100			if (vpd_nextbyte(&vrs, &byte2)) {
1101				state = -2;
1102				break;
1103			}
1104			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1105			if (vpd_nextbyte(&vrs, &byte2)) {
1106				state = -2;
1107				break;
1108			}
1109			cfg->vpd.vpd_w[off].len = dflen = byte2;
1110			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1111			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1112			    sizeof(*cfg->vpd.vpd_w[off].value),
1113			    M_DEVBUF, M_WAITOK);
1114			remain -= 3;
1115			i = 0;
1116			/* keep in sync w/ state 6's transistions */
1117			if (dflen == 0 && remain == 0)
1118				state = 0;
1119			else if (dflen == 0)
1120				state = 5;
1121			else
1122				state = 6;
1123			break;
1124
1125		case 6:	/* VPD-W Keyword Value */
1126			cfg->vpd.vpd_w[off].value[i++] = byte;
1127			dflen--;
1128			remain--;
1129			/* keep in sync w/ state 5's transistions */
1130			if (dflen == 0)
1131				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1132			if (dflen == 0 && remain == 0) {
1133				cfg->vpd.vpd_wcnt = off;
1134				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1135				    off * sizeof(*cfg->vpd.vpd_w),
1136				    M_DEVBUF, M_WAITOK | M_ZERO);
1137				state = 0;
1138			} else if (dflen == 0)
1139				state = 5;
1140			break;
1141
1142		default:
1143			pci_printf(cfg, "invalid state: %d\n", state);
1144			state = -1;
1145			break;
1146		}
1147	}
1148
1149	if (cksumvalid == 0 || state < -1) {
1150		/* read-only data bad, clean up */
1151		if (cfg->vpd.vpd_ros != NULL) {
1152			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1153				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1154			free(cfg->vpd.vpd_ros, M_DEVBUF);
1155			cfg->vpd.vpd_ros = NULL;
1156		}
1157	}
1158	if (state < -1) {
1159		/* I/O error, clean up */
1160		pci_printf(cfg, "failed to read VPD data.\n");
1161		if (cfg->vpd.vpd_ident != NULL) {
1162			free(cfg->vpd.vpd_ident, M_DEVBUF);
1163			cfg->vpd.vpd_ident = NULL;
1164		}
1165		if (cfg->vpd.vpd_w != NULL) {
1166			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1167				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1168			free(cfg->vpd.vpd_w, M_DEVBUF);
1169			cfg->vpd.vpd_w = NULL;
1170		}
1171	}
1172	cfg->vpd.vpd_cached = 1;
1173#undef REG
1174#undef WREG
1175}
1176
1177int
1178pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1179{
1180	struct pci_devinfo *dinfo = device_get_ivars(child);
1181	pcicfgregs *cfg = &dinfo->cfg;
1182
1183	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1184		pci_read_vpd(device_get_parent(dev), cfg);
1185
1186	*identptr = cfg->vpd.vpd_ident;
1187
1188	if (*identptr == NULL)
1189		return (ENXIO);
1190
1191	return (0);
1192}
1193
1194int
1195pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1196	const char **vptr)
1197{
1198	struct pci_devinfo *dinfo = device_get_ivars(child);
1199	pcicfgregs *cfg = &dinfo->cfg;
1200	int i;
1201
1202	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1203		pci_read_vpd(device_get_parent(dev), cfg);
1204
1205	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1206		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1207		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1208			*vptr = cfg->vpd.vpd_ros[i].value;
1209			return (0);
1210		}
1211
1212	*vptr = NULL;
1213	return (ENXIO);
1214}
1215
1216struct pcicfg_vpd *
1217pci_fetch_vpd_list(device_t dev)
1218{
1219	struct pci_devinfo *dinfo = device_get_ivars(dev);
1220	pcicfgregs *cfg = &dinfo->cfg;
1221
1222	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1223		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1224	return (&cfg->vpd);
1225}
1226
1227/*
1228 * Find the requested HyperTransport capability and return the offset
1229 * in configuration space via the pointer provided.  The function
1230 * returns 0 on success and an error code otherwise.
1231 */
1232int
1233pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1234{
1235	int ptr, error;
1236	uint16_t val;
1237
1238	error = pci_find_cap(child, PCIY_HT, &ptr);
1239	if (error)
1240		return (error);
1241
1242	/*
1243	 * Traverse the capabilities list checking each HT capability
1244	 * to see if it matches the requested HT capability.
1245	 */
1246	while (ptr != 0) {
1247		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1248		if (capability == PCIM_HTCAP_SLAVE ||
1249		    capability == PCIM_HTCAP_HOST)
1250			val &= 0xe000;
1251		else
1252			val &= PCIM_HTCMD_CAP_MASK;
1253		if (val == capability) {
1254			if (capreg != NULL)
1255				*capreg = ptr;
1256			return (0);
1257		}
1258
1259		/* Skip to the next HT capability. */
1260		while (ptr != 0) {
1261			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1262			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1263			    PCIY_HT)
1264				break;
1265		}
1266	}
1267	return (ENOENT);
1268}
1269
1270/*
1271 * Find the requested capability and return the offset in
1272 * configuration space via the pointer provided.  The function returns
1273 * 0 on success and an error code otherwise.
1274 */
1275int
1276pci_find_cap_method(device_t dev, device_t child, int capability,
1277    int *capreg)
1278{
1279	struct pci_devinfo *dinfo = device_get_ivars(child);
1280	pcicfgregs *cfg = &dinfo->cfg;
1281	u_int32_t status;
1282	u_int8_t ptr;
1283
1284	/*
1285	 * Check the CAP_LIST bit of the PCI status register first.
1286	 */
1287	status = pci_read_config(child, PCIR_STATUS, 2);
1288	if (!(status & PCIM_STATUS_CAPPRESENT))
1289		return (ENXIO);
1290
1291	/*
1292	 * Determine the start pointer of the capabilities list.
1293	 */
1294	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1295	case PCIM_HDRTYPE_NORMAL:
1296	case PCIM_HDRTYPE_BRIDGE:
1297		ptr = PCIR_CAP_PTR;
1298		break;
1299	case PCIM_HDRTYPE_CARDBUS:
1300		ptr = PCIR_CAP_PTR_2;
1301		break;
1302	default:
1303		/* XXX: panic? */
1304		return (ENXIO);		/* no extended capabilities support */
1305	}
1306	ptr = pci_read_config(child, ptr, 1);
1307
1308	/*
1309	 * Traverse the capabilities list.
1310	 */
1311	while (ptr != 0) {
1312		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1313			if (capreg != NULL)
1314				*capreg = ptr;
1315			return (0);
1316		}
1317		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1318	}
1319
1320	return (ENOENT);
1321}
1322
1323/*
1324 * Find the requested extended capability and return the offset in
1325 * configuration space via the pointer provided.  The function returns
1326 * 0 on success and an error code otherwise.
1327 */
1328int
1329pci_find_extcap_method(device_t dev, device_t child, int capability,
1330    int *capreg)
1331{
1332	struct pci_devinfo *dinfo = device_get_ivars(child);
1333	pcicfgregs *cfg = &dinfo->cfg;
1334	uint32_t ecap;
1335	uint16_t ptr;
1336
1337	/* Only supported for PCI-express devices. */
1338	if (cfg->pcie.pcie_location == 0)
1339		return (ENXIO);
1340
1341	ptr = PCIR_EXTCAP;
1342	ecap = pci_read_config(child, ptr, 4);
1343	if (ecap == 0xffffffff || ecap == 0)
1344		return (ENOENT);
1345	for (;;) {
1346		if (PCI_EXTCAP_ID(ecap) == capability) {
1347			if (capreg != NULL)
1348				*capreg = ptr;
1349			return (0);
1350		}
1351		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1352		if (ptr == 0)
1353			break;
1354		ecap = pci_read_config(child, ptr, 4);
1355	}
1356
1357	return (ENOENT);
1358}
1359
1360/*
1361 * Support for MSI-X message interrupts.
1362 */
1363void
1364pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1365{
1366	struct pci_devinfo *dinfo = device_get_ivars(dev);
1367	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1368	uint32_t offset;
1369
1370	KASSERT(msix->msix_table_len > index, ("bogus index"));
1371	offset = msix->msix_table_offset + index * 16;
1372	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1373	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1374	bus_write_4(msix->msix_table_res, offset + 8, data);
1375
1376	/* Enable MSI -> HT mapping. */
1377	pci_ht_map_msi(dev, address);
1378}
1379
1380void
1381pci_mask_msix(device_t dev, u_int index)
1382{
1383	struct pci_devinfo *dinfo = device_get_ivars(dev);
1384	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1385	uint32_t offset, val;
1386
1387	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1388	offset = msix->msix_table_offset + index * 16 + 12;
1389	val = bus_read_4(msix->msix_table_res, offset);
1390	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1391		val |= PCIM_MSIX_VCTRL_MASK;
1392		bus_write_4(msix->msix_table_res, offset, val);
1393	}
1394}
1395
1396void
1397pci_unmask_msix(device_t dev, u_int index)
1398{
1399	struct pci_devinfo *dinfo = device_get_ivars(dev);
1400	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1401	uint32_t offset, val;
1402
1403	KASSERT(msix->msix_table_len > index, ("bogus index"));
1404	offset = msix->msix_table_offset + index * 16 + 12;
1405	val = bus_read_4(msix->msix_table_res, offset);
1406	if (val & PCIM_MSIX_VCTRL_MASK) {
1407		val &= ~PCIM_MSIX_VCTRL_MASK;
1408		bus_write_4(msix->msix_table_res, offset, val);
1409	}
1410}
1411
1412int
1413pci_pending_msix(device_t dev, u_int index)
1414{
1415	struct pci_devinfo *dinfo = device_get_ivars(dev);
1416	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1417	uint32_t offset, bit;
1418
1419	KASSERT(msix->msix_table_len > index, ("bogus index"));
1420	offset = msix->msix_pba_offset + (index / 32) * 4;
1421	bit = 1 << index % 32;
1422	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1423}
1424
1425/*
1426 * Restore MSI-X registers and table during resume.  If MSI-X is
1427 * enabled then walk the virtual table to restore the actual MSI-X
1428 * table.
1429 */
1430static void
1431pci_resume_msix(device_t dev)
1432{
1433	struct pci_devinfo *dinfo = device_get_ivars(dev);
1434	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1435	struct msix_table_entry *mte;
1436	struct msix_vector *mv;
1437	int i;
1438
1439	if (msix->msix_alloc > 0) {
1440		/* First, mask all vectors. */
1441		for (i = 0; i < msix->msix_msgnum; i++)
1442			pci_mask_msix(dev, i);
1443
1444		/* Second, program any messages with at least one handler. */
1445		for (i = 0; i < msix->msix_table_len; i++) {
1446			mte = &msix->msix_table[i];
1447			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1448				continue;
1449			mv = &msix->msix_vectors[mte->mte_vector - 1];
1450			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1451			pci_unmask_msix(dev, i);
1452		}
1453	}
1454	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1455	    msix->msix_ctrl, 2);
1456}
1457
1458/*
1459 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1460 * returned in *count.  After this function returns, each message will be
1461 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1462 */
1463int
1464pci_alloc_msix_method(device_t dev, device_t child, int *count)
1465{
1466	struct pci_devinfo *dinfo = device_get_ivars(child);
1467	pcicfgregs *cfg = &dinfo->cfg;
1468	struct resource_list_entry *rle;
1469	int actual, error, i, irq, max;
1470
1471	/* Don't let count == 0 get us into trouble. */
1472	if (*count == 0)
1473		return (EINVAL);
1474
1475	/* If rid 0 is allocated, then fail. */
1476	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1477	if (rle != NULL && rle->res != NULL)
1478		return (ENXIO);
1479
1480	/* Already have allocated messages? */
1481	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1482		return (ENXIO);
1483
1484	/* If MSI-X is blacklisted for this system, fail. */
1485	if (pci_msix_blacklisted())
1486		return (ENXIO);
1487
1488	/* MSI-X capability present? */
1489	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1490		return (ENODEV);
1491
1492	/* Make sure the appropriate BARs are mapped. */
1493	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1494	    cfg->msix.msix_table_bar);
1495	if (rle == NULL || rle->res == NULL ||
1496	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1497		return (ENXIO);
1498	cfg->msix.msix_table_res = rle->res;
1499	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1500		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1501		    cfg->msix.msix_pba_bar);
1502		if (rle == NULL || rle->res == NULL ||
1503		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1504			return (ENXIO);
1505	}
1506	cfg->msix.msix_pba_res = rle->res;
1507
1508	if (bootverbose)
1509		device_printf(child,
1510		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1511		    *count, cfg->msix.msix_msgnum);
1512	max = min(*count, cfg->msix.msix_msgnum);
1513	for (i = 0; i < max; i++) {
1514		/* Allocate a message. */
1515		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1516		if (error) {
1517			if (i == 0)
1518				return (error);
1519			break;
1520		}
1521		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1522		    irq, 1);
1523	}
1524	actual = i;
1525
1526	if (bootverbose) {
1527		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1528		if (actual == 1)
1529			device_printf(child, "using IRQ %lu for MSI-X\n",
1530			    rle->start);
1531		else {
1532			int run;
1533
1534			/*
1535			 * Be fancy and try to print contiguous runs of
1536			 * IRQ values as ranges.  'irq' is the previous IRQ.
1537			 * 'run' is true if we are in a range.
1538			 */
1539			device_printf(child, "using IRQs %lu", rle->start);
1540			irq = rle->start;
1541			run = 0;
1542			for (i = 1; i < actual; i++) {
1543				rle = resource_list_find(&dinfo->resources,
1544				    SYS_RES_IRQ, i + 1);
1545
1546				/* Still in a run? */
1547				if (rle->start == irq + 1) {
1548					run = 1;
1549					irq++;
1550					continue;
1551				}
1552
1553				/* Finish previous range. */
1554				if (run) {
1555					printf("-%d", irq);
1556					run = 0;
1557				}
1558
1559				/* Start new range. */
1560				printf(",%lu", rle->start);
1561				irq = rle->start;
1562			}
1563
1564			/* Unfinished range? */
1565			if (run)
1566				printf("-%d", irq);
1567			printf(" for MSI-X\n");
1568		}
1569	}
1570
1571	/* Mask all vectors. */
1572	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1573		pci_mask_msix(child, i);
1574
1575	/* Allocate and initialize vector data and virtual table. */
1576	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1577	    M_DEVBUF, M_WAITOK | M_ZERO);
1578	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1579	    M_DEVBUF, M_WAITOK | M_ZERO);
1580	for (i = 0; i < actual; i++) {
1581		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1582		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1583		cfg->msix.msix_table[i].mte_vector = i + 1;
1584	}
1585
1586	/* Update control register to enable MSI-X. */
1587	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1588	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1589	    cfg->msix.msix_ctrl, 2);
1590
1591	/* Update counts of alloc'd messages. */
1592	cfg->msix.msix_alloc = actual;
1593	cfg->msix.msix_table_len = actual;
1594	*count = actual;
1595	return (0);
1596}
1597
1598/*
1599 * By default, pci_alloc_msix() will assign the allocated IRQ
1600 * resources consecutively to the first N messages in the MSI-X table.
1601 * However, device drivers may want to use different layouts if they
1602 * either receive fewer messages than they asked for, or they wish to
1603 * populate the MSI-X table sparsely.  This method allows the driver
1604 * to specify what layout it wants.  It must be called after a
1605 * successful pci_alloc_msix() but before any of the associated
1606 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1607 *
1608 * The 'vectors' array contains 'count' message vectors.  The array
1609 * maps directly to the MSI-X table in that index 0 in the array
1610 * specifies the vector for the first message in the MSI-X table, etc.
1611 * The vector value in each array index can either be 0 to indicate
1612 * that no vector should be assigned to a message slot, or it can be a
1613 * number from 1 to N (where N is the count returned from a
1614 * succcessful call to pci_alloc_msix()) to indicate which message
1615 * vector (IRQ) to be used for the corresponding message.
1616 *
1617 * On successful return, each message with a non-zero vector will have
1618 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1619 * 1.  Additionally, if any of the IRQs allocated via the previous
1620 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1621 * will be freed back to the system automatically.
1622 *
1623 * For example, suppose a driver has a MSI-X table with 6 messages and
1624 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1625 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1626 * C.  After the call to pci_alloc_msix(), the device will be setup to
1627 * have an MSI-X table of ABC--- (where - means no vector assigned).
1628 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1629 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1630 * be freed back to the system.  This device will also have valid
1631 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1632 *
1633 * In any case, the SYS_RES_IRQ rid X will always map to the message
1634 * at MSI-X table index X - 1 and will only be valid if a vector is
1635 * assigned to that table entry.
1636 */
1637int
1638pci_remap_msix_method(device_t dev, device_t child, int count,
1639    const u_int *vectors)
1640{
1641	struct pci_devinfo *dinfo = device_get_ivars(child);
1642	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1643	struct resource_list_entry *rle;
1644	int i, irq, j, *used;
1645
1646	/*
1647	 * Have to have at least one message in the table but the
1648	 * table can't be bigger than the actual MSI-X table in the
1649	 * device.
1650	 */
1651	if (count == 0 || count > msix->msix_msgnum)
1652		return (EINVAL);
1653
1654	/* Sanity check the vectors. */
1655	for (i = 0; i < count; i++)
1656		if (vectors[i] > msix->msix_alloc)
1657			return (EINVAL);
1658
1659	/*
1660	 * Make sure there aren't any holes in the vectors to be used.
1661	 * It's a big pain to support it, and it doesn't really make
1662	 * sense anyway.  Also, at least one vector must be used.
1663	 */
1664	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1665	    M_ZERO);
1666	for (i = 0; i < count; i++)
1667		if (vectors[i] != 0)
1668			used[vectors[i] - 1] = 1;
1669	for (i = 0; i < msix->msix_alloc - 1; i++)
1670		if (used[i] == 0 && used[i + 1] == 1) {
1671			free(used, M_DEVBUF);
1672			return (EINVAL);
1673		}
1674	if (used[0] != 1) {
1675		free(used, M_DEVBUF);
1676		return (EINVAL);
1677	}
1678
1679	/* Make sure none of the resources are allocated. */
1680	for (i = 0; i < msix->msix_table_len; i++) {
1681		if (msix->msix_table[i].mte_vector == 0)
1682			continue;
1683		if (msix->msix_table[i].mte_handlers > 0)
1684			return (EBUSY);
1685		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1686		KASSERT(rle != NULL, ("missing resource"));
1687		if (rle->res != NULL)
1688			return (EBUSY);
1689	}
1690
1691	/* Free the existing resource list entries. */
1692	for (i = 0; i < msix->msix_table_len; i++) {
1693		if (msix->msix_table[i].mte_vector == 0)
1694			continue;
1695		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1696	}
1697
1698	/*
1699	 * Build the new virtual table keeping track of which vectors are
1700	 * used.
1701	 */
1702	free(msix->msix_table, M_DEVBUF);
1703	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1704	    M_DEVBUF, M_WAITOK | M_ZERO);
1705	for (i = 0; i < count; i++)
1706		msix->msix_table[i].mte_vector = vectors[i];
1707	msix->msix_table_len = count;
1708
1709	/* Free any unused IRQs and resize the vectors array if necessary. */
1710	j = msix->msix_alloc - 1;
1711	if (used[j] == 0) {
1712		struct msix_vector *vec;
1713
1714		while (used[j] == 0) {
1715			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1716			    msix->msix_vectors[j].mv_irq);
1717			j--;
1718		}
1719		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1720		    M_WAITOK);
1721		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1722		    (j + 1));
1723		free(msix->msix_vectors, M_DEVBUF);
1724		msix->msix_vectors = vec;
1725		msix->msix_alloc = j + 1;
1726	}
1727	free(used, M_DEVBUF);
1728
1729	/* Map the IRQs onto the rids. */
1730	for (i = 0; i < count; i++) {
1731		if (vectors[i] == 0)
1732			continue;
1733		irq = msix->msix_vectors[vectors[i]].mv_irq;
1734		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1735		    irq, 1);
1736	}
1737
1738	if (bootverbose) {
1739		device_printf(child, "Remapped MSI-X IRQs as: ");
1740		for (i = 0; i < count; i++) {
1741			if (i != 0)
1742				printf(", ");
1743			if (vectors[i] == 0)
1744				printf("---");
1745			else
1746				printf("%d",
1747				    msix->msix_vectors[vectors[i]].mv_irq);
1748		}
1749		printf("\n");
1750	}
1751
1752	return (0);
1753}
1754
1755static int
1756pci_release_msix(device_t dev, device_t child)
1757{
1758	struct pci_devinfo *dinfo = device_get_ivars(child);
1759	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1760	struct resource_list_entry *rle;
1761	int i;
1762
1763	/* Do we have any messages to release? */
1764	if (msix->msix_alloc == 0)
1765		return (ENODEV);
1766
1767	/* Make sure none of the resources are allocated. */
1768	for (i = 0; i < msix->msix_table_len; i++) {
1769		if (msix->msix_table[i].mte_vector == 0)
1770			continue;
1771		if (msix->msix_table[i].mte_handlers > 0)
1772			return (EBUSY);
1773		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1774		KASSERT(rle != NULL, ("missing resource"));
1775		if (rle->res != NULL)
1776			return (EBUSY);
1777	}
1778
1779	/* Update control register to disable MSI-X. */
1780	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1781	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1782	    msix->msix_ctrl, 2);
1783
1784	/* Free the resource list entries. */
1785	for (i = 0; i < msix->msix_table_len; i++) {
1786		if (msix->msix_table[i].mte_vector == 0)
1787			continue;
1788		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1789	}
1790	free(msix->msix_table, M_DEVBUF);
1791	msix->msix_table_len = 0;
1792
1793	/* Release the IRQs. */
1794	for (i = 0; i < msix->msix_alloc; i++)
1795		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1796		    msix->msix_vectors[i].mv_irq);
1797	free(msix->msix_vectors, M_DEVBUF);
1798	msix->msix_alloc = 0;
1799	return (0);
1800}
1801
1802/*
1803 * Return the max supported MSI-X messages this device supports.
1804 * Basically, assuming the MD code can alloc messages, this function
1805 * should return the maximum value that pci_alloc_msix() can return.
1806 * Thus, it is subject to the tunables, etc.
1807 */
1808int
1809pci_msix_count_method(device_t dev, device_t child)
1810{
1811	struct pci_devinfo *dinfo = device_get_ivars(child);
1812	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1813
1814	if (pci_do_msix && msix->msix_location != 0)
1815		return (msix->msix_msgnum);
1816	return (0);
1817}
1818
1819/*
1820 * HyperTransport MSI mapping control
1821 */
1822void
1823pci_ht_map_msi(device_t dev, uint64_t addr)
1824{
1825	struct pci_devinfo *dinfo = device_get_ivars(dev);
1826	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1827
1828	if (!ht->ht_msimap)
1829		return;
1830
1831	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1832	    ht->ht_msiaddr >> 20 == addr >> 20) {
1833		/* Enable MSI -> HT mapping. */
1834		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1835		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1836		    ht->ht_msictrl, 2);
1837	}
1838
1839	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1840		/* Disable MSI -> HT mapping. */
1841		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1842		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1843		    ht->ht_msictrl, 2);
1844	}
1845}
1846
1847int
1848pci_get_max_read_req(device_t dev)
1849{
1850	struct pci_devinfo *dinfo = device_get_ivars(dev);
1851	int cap;
1852	uint16_t val;
1853
1854	cap = dinfo->cfg.pcie.pcie_location;
1855	if (cap == 0)
1856		return (0);
1857	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1858	val &= PCIEM_CTL_MAX_READ_REQUEST;
1859	val >>= 12;
1860	return (1 << (val + 7));
1861}
1862
1863int
1864pci_set_max_read_req(device_t dev, int size)
1865{
1866	struct pci_devinfo *dinfo = device_get_ivars(dev);
1867	int cap;
1868	uint16_t val;
1869
1870	cap = dinfo->cfg.pcie.pcie_location;
1871	if (cap == 0)
1872		return (0);
1873	if (size < 128)
1874		size = 128;
1875	if (size > 4096)
1876		size = 4096;
1877	size = (1 << (fls(size) - 1));
1878	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1879	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1880	val |= (fls(size) - 8) << 12;
1881	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1882	return (size);
1883}
1884
1885/*
1886 * Support for MSI message signalled interrupts.
1887 */
1888void
1889pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1890{
1891	struct pci_devinfo *dinfo = device_get_ivars(dev);
1892	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1893
1894	/* Write data and address values. */
1895	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1896	    address & 0xffffffff, 4);
1897	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1898		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1899		    address >> 32, 4);
1900		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1901		    data, 2);
1902	} else
1903		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1904		    2);
1905
1906	/* Enable MSI in the control register. */
1907	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1908	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1909	    2);
1910
1911	/* Enable MSI -> HT mapping. */
1912	pci_ht_map_msi(dev, address);
1913}
1914
1915void
1916pci_disable_msi(device_t dev)
1917{
1918	struct pci_devinfo *dinfo = device_get_ivars(dev);
1919	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1920
1921	/* Disable MSI -> HT mapping. */
1922	pci_ht_map_msi(dev, 0);
1923
1924	/* Disable MSI in the control register. */
1925	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1926	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1927	    2);
1928}
1929
1930/*
1931 * Restore MSI registers during resume.  If MSI is enabled then
1932 * restore the data and address registers in addition to the control
1933 * register.
1934 */
1935static void
1936pci_resume_msi(device_t dev)
1937{
1938	struct pci_devinfo *dinfo = device_get_ivars(dev);
1939	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1940	uint64_t address;
1941	uint16_t data;
1942
1943	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1944		address = msi->msi_addr;
1945		data = msi->msi_data;
1946		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1947		    address & 0xffffffff, 4);
1948		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1949			pci_write_config(dev, msi->msi_location +
1950			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1951			pci_write_config(dev, msi->msi_location +
1952			    PCIR_MSI_DATA_64BIT, data, 2);
1953		} else
1954			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1955			    data, 2);
1956	}
1957	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1958	    2);
1959}
1960
1961static int
1962pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1963{
1964	struct pci_devinfo *dinfo = device_get_ivars(dev);
1965	pcicfgregs *cfg = &dinfo->cfg;
1966	struct resource_list_entry *rle;
1967	struct msix_table_entry *mte;
1968	struct msix_vector *mv;
1969	uint64_t addr;
1970	uint32_t data;
1971	int error, i, j;
1972
1973	/*
1974	 * Handle MSI first.  We try to find this IRQ among our list
1975	 * of MSI IRQs.  If we find it, we request updated address and
1976	 * data registers and apply the results.
1977	 */
1978	if (cfg->msi.msi_alloc > 0) {
1979
1980		/* If we don't have any active handlers, nothing to do. */
1981		if (cfg->msi.msi_handlers == 0)
1982			return (0);
1983		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1984			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1985			    i + 1);
1986			if (rle->start == irq) {
1987				error = PCIB_MAP_MSI(device_get_parent(bus),
1988				    dev, irq, &addr, &data);
1989				if (error)
1990					return (error);
1991				pci_disable_msi(dev);
1992				dinfo->cfg.msi.msi_addr = addr;
1993				dinfo->cfg.msi.msi_data = data;
1994				pci_enable_msi(dev, addr, data);
1995				return (0);
1996			}
1997		}
1998		return (ENOENT);
1999	}
2000
2001	/*
2002	 * For MSI-X, we check to see if we have this IRQ.  If we do,
2003	 * we request the updated mapping info.  If that works, we go
2004	 * through all the slots that use this IRQ and update them.
2005	 */
2006	if (cfg->msix.msix_alloc > 0) {
2007		for (i = 0; i < cfg->msix.msix_alloc; i++) {
2008			mv = &cfg->msix.msix_vectors[i];
2009			if (mv->mv_irq == irq) {
2010				error = PCIB_MAP_MSI(device_get_parent(bus),
2011				    dev, irq, &addr, &data);
2012				if (error)
2013					return (error);
2014				mv->mv_address = addr;
2015				mv->mv_data = data;
2016				for (j = 0; j < cfg->msix.msix_table_len; j++) {
2017					mte = &cfg->msix.msix_table[j];
2018					if (mte->mte_vector != i + 1)
2019						continue;
2020					if (mte->mte_handlers == 0)
2021						continue;
2022					pci_mask_msix(dev, j);
2023					pci_enable_msix(dev, j, addr, data);
2024					pci_unmask_msix(dev, j);
2025				}
2026			}
2027		}
2028		return (ENOENT);
2029	}
2030
2031	return (ENOENT);
2032}
2033
2034/*
2035 * Returns true if the specified device is blacklisted because MSI
2036 * doesn't work.
2037 */
2038int
2039pci_msi_device_blacklisted(device_t dev)
2040{
2041
2042	if (!pci_honor_msi_blacklist)
2043		return (0);
2044
2045	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2046}
2047
2048/*
2049 * Determine if MSI is blacklisted globally on this system.  Currently,
2050 * we just check for blacklisted chipsets as represented by the
2051 * host-PCI bridge at device 0:0:0.  In the future, it may become
2052 * necessary to check other system attributes, such as the kenv values
2053 * that give the motherboard manufacturer and model number.
2054 */
2055static int
2056pci_msi_blacklisted(void)
2057{
2058	device_t dev;
2059
2060	if (!pci_honor_msi_blacklist)
2061		return (0);
2062
2063	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2064	if (!(pcie_chipset || pcix_chipset)) {
2065		if (vm_guest != VM_GUEST_NO) {
2066			/*
2067			 * Whitelist older chipsets in virtual
2068			 * machines known to support MSI.
2069			 */
2070			dev = pci_find_bsf(0, 0, 0);
2071			if (dev != NULL)
2072				return (!pci_has_quirk(pci_get_devid(dev),
2073					PCI_QUIRK_ENABLE_MSI_VM));
2074		}
2075		return (1);
2076	}
2077
2078	dev = pci_find_bsf(0, 0, 0);
2079	if (dev != NULL)
2080		return (pci_msi_device_blacklisted(dev));
2081	return (0);
2082}
2083
2084/*
2085 * Returns true if the specified device is blacklisted because MSI-X
2086 * doesn't work.  Note that this assumes that if MSI doesn't work,
2087 * MSI-X doesn't either.
2088 */
2089int
2090pci_msix_device_blacklisted(device_t dev)
2091{
2092
2093	if (!pci_honor_msi_blacklist)
2094		return (0);
2095
2096	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2097		return (1);
2098
2099	return (pci_msi_device_blacklisted(dev));
2100}
2101
2102/*
2103 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2104 * is blacklisted, assume that MSI-X is as well.  Check for additional
2105 * chipsets where MSI works but MSI-X does not.
2106 */
2107static int
2108pci_msix_blacklisted(void)
2109{
2110	device_t dev;
2111
2112	if (!pci_honor_msi_blacklist)
2113		return (0);
2114
2115	dev = pci_find_bsf(0, 0, 0);
2116	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2117	    PCI_QUIRK_DISABLE_MSIX))
2118		return (1);
2119
2120	return (pci_msi_blacklisted());
2121}
2122
2123/*
2124 * Attempt to allocate *count MSI messages.  The actual number allocated is
2125 * returned in *count.  After this function returns, each message will be
2126 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2127 */
2128int
2129pci_alloc_msi_method(device_t dev, device_t child, int *count)
2130{
2131	struct pci_devinfo *dinfo = device_get_ivars(child);
2132	pcicfgregs *cfg = &dinfo->cfg;
2133	struct resource_list_entry *rle;
2134	int actual, error, i, irqs[32];
2135	uint16_t ctrl;
2136
2137	/* Don't let count == 0 get us into trouble. */
2138	if (*count == 0)
2139		return (EINVAL);
2140
2141	/* If rid 0 is allocated, then fail. */
2142	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2143	if (rle != NULL && rle->res != NULL)
2144		return (ENXIO);
2145
2146	/* Already have allocated messages? */
2147	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2148		return (ENXIO);
2149
2150	/* If MSI is blacklisted for this system, fail. */
2151	if (pci_msi_blacklisted())
2152		return (ENXIO);
2153
2154	/* MSI capability present? */
2155	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2156		return (ENODEV);
2157
2158	if (bootverbose)
2159		device_printf(child,
2160		    "attempting to allocate %d MSI vectors (%d supported)\n",
2161		    *count, cfg->msi.msi_msgnum);
2162
2163	/* Don't ask for more than the device supports. */
2164	actual = min(*count, cfg->msi.msi_msgnum);
2165
2166	/* Don't ask for more than 32 messages. */
2167	actual = min(actual, 32);
2168
2169	/* MSI requires power of 2 number of messages. */
2170	if (!powerof2(actual))
2171		return (EINVAL);
2172
2173	for (;;) {
2174		/* Try to allocate N messages. */
2175		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2176		    actual, irqs);
2177		if (error == 0)
2178			break;
2179		if (actual == 1)
2180			return (error);
2181
2182		/* Try N / 2. */
2183		actual >>= 1;
2184	}
2185
2186	/*
2187	 * We now have N actual messages mapped onto SYS_RES_IRQ
2188	 * resources in the irqs[] array, so add new resources
2189	 * starting at rid 1.
2190	 */
2191	for (i = 0; i < actual; i++)
2192		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2193		    irqs[i], irqs[i], 1);
2194
2195	if (bootverbose) {
2196		if (actual == 1)
2197			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2198		else {
2199			int run;
2200
2201			/*
2202			 * Be fancy and try to print contiguous runs
2203			 * of IRQ values as ranges.  'run' is true if
2204			 * we are in a range.
2205			 */
2206			device_printf(child, "using IRQs %d", irqs[0]);
2207			run = 0;
2208			for (i = 1; i < actual; i++) {
2209
2210				/* Still in a run? */
2211				if (irqs[i] == irqs[i - 1] + 1) {
2212					run = 1;
2213					continue;
2214				}
2215
2216				/* Finish previous range. */
2217				if (run) {
2218					printf("-%d", irqs[i - 1]);
2219					run = 0;
2220				}
2221
2222				/* Start new range. */
2223				printf(",%d", irqs[i]);
2224			}
2225
2226			/* Unfinished range? */
2227			if (run)
2228				printf("-%d", irqs[actual - 1]);
2229			printf(" for MSI\n");
2230		}
2231	}
2232
2233	/* Update control register with actual count. */
2234	ctrl = cfg->msi.msi_ctrl;
2235	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2236	ctrl |= (ffs(actual) - 1) << 4;
2237	cfg->msi.msi_ctrl = ctrl;
2238	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2239
2240	/* Update counts of alloc'd messages. */
2241	cfg->msi.msi_alloc = actual;
2242	cfg->msi.msi_handlers = 0;
2243	*count = actual;
2244	return (0);
2245}
2246
2247/* Release the MSI messages associated with this device. */
2248int
2249pci_release_msi_method(device_t dev, device_t child)
2250{
2251	struct pci_devinfo *dinfo = device_get_ivars(child);
2252	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2253	struct resource_list_entry *rle;
2254	int error, i, irqs[32];
2255
2256	/* Try MSI-X first. */
2257	error = pci_release_msix(dev, child);
2258	if (error != ENODEV)
2259		return (error);
2260
2261	/* Do we have any messages to release? */
2262	if (msi->msi_alloc == 0)
2263		return (ENODEV);
2264	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2265
2266	/* Make sure none of the resources are allocated. */
2267	if (msi->msi_handlers > 0)
2268		return (EBUSY);
2269	for (i = 0; i < msi->msi_alloc; i++) {
2270		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2271		KASSERT(rle != NULL, ("missing MSI resource"));
2272		if (rle->res != NULL)
2273			return (EBUSY);
2274		irqs[i] = rle->start;
2275	}
2276
2277	/* Update control register with 0 count. */
2278	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2279	    ("%s: MSI still enabled", __func__));
2280	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2281	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2282	    msi->msi_ctrl, 2);
2283
2284	/* Release the messages. */
2285	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2286	for (i = 0; i < msi->msi_alloc; i++)
2287		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2288
2289	/* Update alloc count. */
2290	msi->msi_alloc = 0;
2291	msi->msi_addr = 0;
2292	msi->msi_data = 0;
2293	return (0);
2294}
2295
2296/*
2297 * Return the max supported MSI messages this device supports.
2298 * Basically, assuming the MD code can alloc messages, this function
2299 * should return the maximum value that pci_alloc_msi() can return.
2300 * Thus, it is subject to the tunables, etc.
2301 */
2302int
2303pci_msi_count_method(device_t dev, device_t child)
2304{
2305	struct pci_devinfo *dinfo = device_get_ivars(child);
2306	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2307
2308	if (pci_do_msi && msi->msi_location != 0)
2309		return (msi->msi_msgnum);
2310	return (0);
2311}
2312
2313/* free pcicfgregs structure and all depending data structures */
2314
2315int
2316pci_freecfg(struct pci_devinfo *dinfo)
2317{
2318	struct devlist *devlist_head;
2319	struct pci_map *pm, *next;
2320	int i;
2321
2322	devlist_head = &pci_devq;
2323
2324	if (dinfo->cfg.vpd.vpd_reg) {
2325		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2326		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2327			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2328		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2329		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2330			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2331		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2332	}
2333	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2334		free(pm, M_DEVBUF);
2335	}
2336	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2337	free(dinfo, M_DEVBUF);
2338
2339	/* increment the generation count */
2340	pci_generation++;
2341
2342	/* we're losing one device */
2343	pci_numdevs--;
2344	return (0);
2345}
2346
2347/*
2348 * PCI power manangement
2349 */
2350int
2351pci_set_powerstate_method(device_t dev, device_t child, int state)
2352{
2353	struct pci_devinfo *dinfo = device_get_ivars(child);
2354	pcicfgregs *cfg = &dinfo->cfg;
2355	uint16_t status;
2356	int result, oldstate, highest, delay;
2357
2358	if (cfg->pp.pp_cap == 0)
2359		return (EOPNOTSUPP);
2360
2361	/*
2362	 * Optimize a no state change request away.  While it would be OK to
2363	 * write to the hardware in theory, some devices have shown odd
2364	 * behavior when going from D3 -> D3.
2365	 */
2366	oldstate = pci_get_powerstate(child);
2367	if (oldstate == state)
2368		return (0);
2369
2370	/*
2371	 * The PCI power management specification states that after a state
2372	 * transition between PCI power states, system software must
2373	 * guarantee a minimal delay before the function accesses the device.
2374	 * Compute the worst case delay that we need to guarantee before we
2375	 * access the device.  Many devices will be responsive much more
2376	 * quickly than this delay, but there are some that don't respond
2377	 * instantly to state changes.  Transitions to/from D3 state require
2378	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2379	 * is done below with DELAY rather than a sleeper function because
2380	 * this function can be called from contexts where we cannot sleep.
2381	 */
2382	highest = (oldstate > state) ? oldstate : state;
2383	if (highest == PCI_POWERSTATE_D3)
2384	    delay = 10000;
2385	else if (highest == PCI_POWERSTATE_D2)
2386	    delay = 200;
2387	else
2388	    delay = 0;
2389	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2390	    & ~PCIM_PSTAT_DMASK;
2391	result = 0;
2392	switch (state) {
2393	case PCI_POWERSTATE_D0:
2394		status |= PCIM_PSTAT_D0;
2395		break;
2396	case PCI_POWERSTATE_D1:
2397		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2398			return (EOPNOTSUPP);
2399		status |= PCIM_PSTAT_D1;
2400		break;
2401	case PCI_POWERSTATE_D2:
2402		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2403			return (EOPNOTSUPP);
2404		status |= PCIM_PSTAT_D2;
2405		break;
2406	case PCI_POWERSTATE_D3:
2407		status |= PCIM_PSTAT_D3;
2408		break;
2409	default:
2410		return (EINVAL);
2411	}
2412
2413	if (bootverbose)
2414		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2415		    state);
2416
2417	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2418	if (delay)
2419		DELAY(delay);
2420	return (0);
2421}
2422
2423int
2424pci_get_powerstate_method(device_t dev, device_t child)
2425{
2426	struct pci_devinfo *dinfo = device_get_ivars(child);
2427	pcicfgregs *cfg = &dinfo->cfg;
2428	uint16_t status;
2429	int result;
2430
2431	if (cfg->pp.pp_cap != 0) {
2432		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2433		switch (status & PCIM_PSTAT_DMASK) {
2434		case PCIM_PSTAT_D0:
2435			result = PCI_POWERSTATE_D0;
2436			break;
2437		case PCIM_PSTAT_D1:
2438			result = PCI_POWERSTATE_D1;
2439			break;
2440		case PCIM_PSTAT_D2:
2441			result = PCI_POWERSTATE_D2;
2442			break;
2443		case PCIM_PSTAT_D3:
2444			result = PCI_POWERSTATE_D3;
2445			break;
2446		default:
2447			result = PCI_POWERSTATE_UNKNOWN;
2448			break;
2449		}
2450	} else {
2451		/* No support, device is always at D0 */
2452		result = PCI_POWERSTATE_D0;
2453	}
2454	return (result);
2455}
2456
2457/*
2458 * Some convenience functions for PCI device drivers.
2459 */
2460
2461static __inline void
2462pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2463{
2464	uint16_t	command;
2465
2466	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2467	command |= bit;
2468	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2469}
2470
2471static __inline void
2472pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2473{
2474	uint16_t	command;
2475
2476	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2477	command &= ~bit;
2478	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2479}
2480
2481int
2482pci_enable_busmaster_method(device_t dev, device_t child)
2483{
2484	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2485	return (0);
2486}
2487
2488int
2489pci_disable_busmaster_method(device_t dev, device_t child)
2490{
2491	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2492	return (0);
2493}
2494
2495int
2496pci_enable_io_method(device_t dev, device_t child, int space)
2497{
2498	uint16_t bit;
2499
2500	switch(space) {
2501	case SYS_RES_IOPORT:
2502		bit = PCIM_CMD_PORTEN;
2503		break;
2504	case SYS_RES_MEMORY:
2505		bit = PCIM_CMD_MEMEN;
2506		break;
2507	default:
2508		return (EINVAL);
2509	}
2510	pci_set_command_bit(dev, child, bit);
2511	return (0);
2512}
2513
2514int
2515pci_disable_io_method(device_t dev, device_t child, int space)
2516{
2517	uint16_t bit;
2518
2519	switch(space) {
2520	case SYS_RES_IOPORT:
2521		bit = PCIM_CMD_PORTEN;
2522		break;
2523	case SYS_RES_MEMORY:
2524		bit = PCIM_CMD_MEMEN;
2525		break;
2526	default:
2527		return (EINVAL);
2528	}
2529	pci_clear_command_bit(dev, child, bit);
2530	return (0);
2531}
2532
2533/*
2534 * New style pci driver.  Parent device is either a pci-host-bridge or a
2535 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2536 */
2537
2538void
2539pci_print_verbose(struct pci_devinfo *dinfo)
2540{
2541
2542	if (bootverbose) {
2543		pcicfgregs *cfg = &dinfo->cfg;
2544
2545		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2546		    cfg->vendor, cfg->device, cfg->revid);
2547		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2548		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2549		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2550		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2551		    cfg->mfdev);
2552		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2553		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2554		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2555		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2556		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2557		if (cfg->intpin > 0)
2558			printf("\tintpin=%c, irq=%d\n",
2559			    cfg->intpin +'a' -1, cfg->intline);
2560		if (cfg->pp.pp_cap) {
2561			uint16_t status;
2562
2563			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2564			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2565			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2566			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2567			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2568			    status & PCIM_PSTAT_DMASK);
2569		}
2570		if (cfg->msi.msi_location) {
2571			int ctrl;
2572
2573			ctrl = cfg->msi.msi_ctrl;
2574			printf("\tMSI supports %d message%s%s%s\n",
2575			    cfg->msi.msi_msgnum,
2576			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2577			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2578			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2579		}
2580		if (cfg->msix.msix_location) {
2581			printf("\tMSI-X supports %d message%s ",
2582			    cfg->msix.msix_msgnum,
2583			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2584			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2585				printf("in map 0x%x\n",
2586				    cfg->msix.msix_table_bar);
2587			else
2588				printf("in maps 0x%x and 0x%x\n",
2589				    cfg->msix.msix_table_bar,
2590				    cfg->msix.msix_pba_bar);
2591		}
2592	}
2593}
2594
2595static int
2596pci_porten(device_t dev)
2597{
2598	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2599}
2600
2601static int
2602pci_memen(device_t dev)
2603{
2604	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2605}
2606
2607static void
2608pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2609{
2610	struct pci_devinfo *dinfo;
2611	pci_addr_t map, testval;
2612	int ln2range;
2613	uint16_t cmd;
2614
2615	/*
2616	 * The device ROM BAR is special.  It is always a 32-bit
2617	 * memory BAR.  Bit 0 is special and should not be set when
2618	 * sizing the BAR.
2619	 */
2620	dinfo = device_get_ivars(dev);
2621	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2622		map = pci_read_config(dev, reg, 4);
2623		pci_write_config(dev, reg, 0xfffffffe, 4);
2624		testval = pci_read_config(dev, reg, 4);
2625		pci_write_config(dev, reg, map, 4);
2626		*mapp = map;
2627		*testvalp = testval;
2628		return;
2629	}
2630
2631	map = pci_read_config(dev, reg, 4);
2632	ln2range = pci_maprange(map);
2633	if (ln2range == 64)
2634		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2635
2636	/*
2637	 * Disable decoding via the command register before
2638	 * determining the BAR's length since we will be placing it in
2639	 * a weird state.
2640	 */
2641	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2642	pci_write_config(dev, PCIR_COMMAND,
2643	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2644
2645	/*
2646	 * Determine the BAR's length by writing all 1's.  The bottom
2647	 * log_2(size) bits of the BAR will stick as 0 when we read
2648	 * the value back.
2649	 */
2650	pci_write_config(dev, reg, 0xffffffff, 4);
2651	testval = pci_read_config(dev, reg, 4);
2652	if (ln2range == 64) {
2653		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2654		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2655	}
2656
2657	/*
2658	 * Restore the original value of the BAR.  We may have reprogrammed
2659	 * the BAR of the low-level console device and when booting verbose,
2660	 * we need the console device addressable.
2661	 */
2662	pci_write_config(dev, reg, map, 4);
2663	if (ln2range == 64)
2664		pci_write_config(dev, reg + 4, map >> 32, 4);
2665	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2666
2667	*mapp = map;
2668	*testvalp = testval;
2669}
2670
2671static void
2672pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2673{
2674	struct pci_devinfo *dinfo;
2675	int ln2range;
2676
2677	/* The device ROM BAR is always a 32-bit memory BAR. */
2678	dinfo = device_get_ivars(dev);
2679	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2680		ln2range = 32;
2681	else
2682		ln2range = pci_maprange(pm->pm_value);
2683	pci_write_config(dev, pm->pm_reg, base, 4);
2684	if (ln2range == 64)
2685		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2686	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2687	if (ln2range == 64)
2688		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2689		    pm->pm_reg + 4, 4) << 32;
2690}
2691
2692struct pci_map *
2693pci_find_bar(device_t dev, int reg)
2694{
2695	struct pci_devinfo *dinfo;
2696	struct pci_map *pm;
2697
2698	dinfo = device_get_ivars(dev);
2699	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2700		if (pm->pm_reg == reg)
2701			return (pm);
2702	}
2703	return (NULL);
2704}
2705
2706int
2707pci_bar_enabled(device_t dev, struct pci_map *pm)
2708{
2709	struct pci_devinfo *dinfo;
2710	uint16_t cmd;
2711
2712	dinfo = device_get_ivars(dev);
2713	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2714	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2715		return (0);
2716	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2717	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2718		return ((cmd & PCIM_CMD_MEMEN) != 0);
2719	else
2720		return ((cmd & PCIM_CMD_PORTEN) != 0);
2721}
2722
2723static struct pci_map *
2724pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2725{
2726	struct pci_devinfo *dinfo;
2727	struct pci_map *pm, *prev;
2728
2729	dinfo = device_get_ivars(dev);
2730	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2731	pm->pm_reg = reg;
2732	pm->pm_value = value;
2733	pm->pm_size = size;
2734	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2735		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2736		    reg));
2737		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2738		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2739			break;
2740	}
2741	if (prev != NULL)
2742		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2743	else
2744		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2745	return (pm);
2746}
2747
2748static void
2749pci_restore_bars(device_t dev)
2750{
2751	struct pci_devinfo *dinfo;
2752	struct pci_map *pm;
2753	int ln2range;
2754
2755	dinfo = device_get_ivars(dev);
2756	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2757		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2758			ln2range = 32;
2759		else
2760			ln2range = pci_maprange(pm->pm_value);
2761		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2762		if (ln2range == 64)
2763			pci_write_config(dev, pm->pm_reg + 4,
2764			    pm->pm_value >> 32, 4);
2765	}
2766}
2767
2768/*
2769 * Add a resource based on a pci map register. Return 1 if the map
2770 * register is a 32bit map register or 2 if it is a 64bit register.
2771 */
2772static int
2773pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2774    int force, int prefetch)
2775{
2776	struct pci_map *pm;
2777	pci_addr_t base, map, testval;
2778	pci_addr_t start, end, count;
2779	int barlen, basezero, flags, maprange, mapsize, type;
2780	uint16_t cmd;
2781	struct resource *res;
2782
2783	/*
2784	 * The BAR may already exist if the device is a CardBus card
2785	 * whose CIS is stored in this BAR.
2786	 */
2787	pm = pci_find_bar(dev, reg);
2788	if (pm != NULL) {
2789		maprange = pci_maprange(pm->pm_value);
2790		barlen = maprange == 64 ? 2 : 1;
2791		return (barlen);
2792	}
2793
2794	pci_read_bar(dev, reg, &map, &testval);
2795	if (PCI_BAR_MEM(map)) {
2796		type = SYS_RES_MEMORY;
2797		if (map & PCIM_BAR_MEM_PREFETCH)
2798			prefetch = 1;
2799	} else
2800		type = SYS_RES_IOPORT;
2801	mapsize = pci_mapsize(testval);
2802	base = pci_mapbase(map);
2803#ifdef __PCI_BAR_ZERO_VALID
2804	basezero = 0;
2805#else
2806	basezero = base == 0;
2807#endif
2808	maprange = pci_maprange(map);
2809	barlen = maprange == 64 ? 2 : 1;
2810
2811	/*
2812	 * For I/O registers, if bottom bit is set, and the next bit up
2813	 * isn't clear, we know we have a BAR that doesn't conform to the
2814	 * spec, so ignore it.  Also, sanity check the size of the data
2815	 * areas to the type of memory involved.  Memory must be at least
2816	 * 16 bytes in size, while I/O ranges must be at least 4.
2817	 */
2818	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2819		return (barlen);
2820	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2821	    (type == SYS_RES_IOPORT && mapsize < 2))
2822		return (barlen);
2823
2824	/* Save a record of this BAR. */
2825	pm = pci_add_bar(dev, reg, map, mapsize);
2826	if (bootverbose) {
2827		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2828		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2829		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2830			printf(", port disabled\n");
2831		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2832			printf(", memory disabled\n");
2833		else
2834			printf(", enabled\n");
2835	}
2836
2837	/*
2838	 * If base is 0, then we have problems if this architecture does
2839	 * not allow that.  It is best to ignore such entries for the
2840	 * moment.  These will be allocated later if the driver specifically
2841	 * requests them.  However, some removable busses look better when
2842	 * all resources are allocated, so allow '0' to be overriden.
2843	 *
2844	 * Similarly treat maps whose values is the same as the test value
2845	 * read back.  These maps have had all f's written to them by the
2846	 * BIOS in an attempt to disable the resources.
2847	 */
2848	if (!force && (basezero || map == testval))
2849		return (barlen);
2850	if ((u_long)base != base) {
2851		device_printf(bus,
2852		    "pci%d:%d:%d:%d bar %#x too many address bits",
2853		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2854		    pci_get_function(dev), reg);
2855		return (barlen);
2856	}
2857
2858	/*
2859	 * This code theoretically does the right thing, but has
2860	 * undesirable side effects in some cases where peripherals
2861	 * respond oddly to having these bits enabled.  Let the user
2862	 * be able to turn them off (since pci_enable_io_modes is 1 by
2863	 * default).
2864	 */
2865	if (pci_enable_io_modes) {
2866		/* Turn on resources that have been left off by a lazy BIOS */
2867		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2868			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2869			cmd |= PCIM_CMD_PORTEN;
2870			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2871		}
2872		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2873			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2874			cmd |= PCIM_CMD_MEMEN;
2875			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2876		}
2877	} else {
2878		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2879			return (barlen);
2880		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2881			return (barlen);
2882	}
2883
2884	count = (pci_addr_t)1 << mapsize;
2885	flags = RF_ALIGNMENT_LOG2(mapsize);
2886	if (prefetch)
2887		flags |= RF_PREFETCHABLE;
2888	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2889		start = 0;	/* Let the parent decide. */
2890		end = ~0ul;
2891	} else {
2892		start = base;
2893		end = base + count - 1;
2894	}
2895	resource_list_add(rl, type, reg, start, end, count);
2896
2897	/*
2898	 * Try to allocate the resource for this BAR from our parent
2899	 * so that this resource range is already reserved.  The
2900	 * driver for this device will later inherit this resource in
2901	 * pci_alloc_resource().
2902	 */
2903	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2904	    flags);
2905	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2906		/*
2907		 * If the allocation fails, try to allocate a resource for
2908		 * this BAR using any available range.  The firmware felt
2909		 * it was important enough to assign a resource, so don't
2910		 * disable decoding if we can help it.
2911		 */
2912		resource_list_delete(rl, type, reg);
2913		resource_list_add(rl, type, reg, 0, ~0ul, count);
2914		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2915		    count, flags);
2916	}
2917	if (res == NULL) {
2918		/*
2919		 * If the allocation fails, delete the resource list entry
2920		 * and disable decoding for this device.
2921		 *
2922		 * If the driver requests this resource in the future,
2923		 * pci_reserve_map() will try to allocate a fresh
2924		 * resource range.
2925		 */
2926		resource_list_delete(rl, type, reg);
2927		pci_disable_io(dev, type);
2928		if (bootverbose)
2929			device_printf(bus,
2930			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2931			    pci_get_domain(dev), pci_get_bus(dev),
2932			    pci_get_slot(dev), pci_get_function(dev), reg);
2933	} else {
2934		start = rman_get_start(res);
2935		pci_write_bar(dev, pm, start);
2936	}
2937	return (barlen);
2938}
2939
2940/*
2941 * For ATA devices we need to decide early what addressing mode to use.
2942 * Legacy demands that the primary and secondary ATA ports sits on the
2943 * same addresses that old ISA hardware did. This dictates that we use
2944 * those addresses and ignore the BAR's if we cannot set PCI native
2945 * addressing mode.
2946 */
2947static void
2948pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2949    uint32_t prefetchmask)
2950{
2951	struct resource *r;
2952	int rid, type, progif;
2953#if 0
2954	/* if this device supports PCI native addressing use it */
2955	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2956	if ((progif & 0x8a) == 0x8a) {
2957		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2958		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2959			printf("Trying ATA native PCI addressing mode\n");
2960			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2961		}
2962	}
2963#endif
2964	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2965	type = SYS_RES_IOPORT;
2966	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2967		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2968		    prefetchmask & (1 << 0));
2969		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2970		    prefetchmask & (1 << 1));
2971	} else {
2972		rid = PCIR_BAR(0);
2973		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2974		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2975		    0x1f7, 8, 0);
2976		rid = PCIR_BAR(1);
2977		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2978		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2979		    0x3f6, 1, 0);
2980	}
2981	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2982		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2983		    prefetchmask & (1 << 2));
2984		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2985		    prefetchmask & (1 << 3));
2986	} else {
2987		rid = PCIR_BAR(2);
2988		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2989		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2990		    0x177, 8, 0);
2991		rid = PCIR_BAR(3);
2992		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2993		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2994		    0x376, 1, 0);
2995	}
2996	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2997	    prefetchmask & (1 << 4));
2998	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2999	    prefetchmask & (1 << 5));
3000}
3001
3002static void
3003pci_assign_interrupt(device_t bus, device_t dev, int force_route)
3004{
3005	struct pci_devinfo *dinfo = device_get_ivars(dev);
3006	pcicfgregs *cfg = &dinfo->cfg;
3007	char tunable_name[64];
3008	int irq;
3009
3010	/* Has to have an intpin to have an interrupt. */
3011	if (cfg->intpin == 0)
3012		return;
3013
3014	/* Let the user override the IRQ with a tunable. */
3015	irq = PCI_INVALID_IRQ;
3016	snprintf(tunable_name, sizeof(tunable_name),
3017	    "hw.pci%d.%d.%d.INT%c.irq",
3018	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
3019	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
3020		irq = PCI_INVALID_IRQ;
3021
3022	/*
3023	 * If we didn't get an IRQ via the tunable, then we either use the
3024	 * IRQ value in the intline register or we ask the bus to route an
3025	 * interrupt for us.  If force_route is true, then we only use the
3026	 * value in the intline register if the bus was unable to assign an
3027	 * IRQ.
3028	 */
3029	if (!PCI_INTERRUPT_VALID(irq)) {
3030		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3031			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3032		if (!PCI_INTERRUPT_VALID(irq))
3033			irq = cfg->intline;
3034	}
3035
3036	/* If after all that we don't have an IRQ, just bail. */
3037	if (!PCI_INTERRUPT_VALID(irq))
3038		return;
3039
3040	/* Update the config register if it changed. */
3041	if (irq != cfg->intline) {
3042		cfg->intline = irq;
3043		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3044	}
3045
3046	/* Add this IRQ as rid 0 interrupt resource. */
3047	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3048}
3049
3050/* Perform early OHCI takeover from SMM. */
3051static void
3052ohci_early_takeover(device_t self)
3053{
3054	struct resource *res;
3055	uint32_t ctl;
3056	int rid;
3057	int i;
3058
3059	rid = PCIR_BAR(0);
3060	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3061	if (res == NULL)
3062		return;
3063
3064	ctl = bus_read_4(res, OHCI_CONTROL);
3065	if (ctl & OHCI_IR) {
3066		if (bootverbose)
3067			printf("ohci early: "
3068			    "SMM active, request owner change\n");
3069		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3070		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3071			DELAY(1000);
3072			ctl = bus_read_4(res, OHCI_CONTROL);
3073		}
3074		if (ctl & OHCI_IR) {
3075			if (bootverbose)
3076				printf("ohci early: "
3077				    "SMM does not respond, resetting\n");
3078			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3079		}
3080		/* Disable interrupts */
3081		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3082	}
3083
3084	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3085}
3086
3087/* Perform early UHCI takeover from SMM. */
3088static void
3089uhci_early_takeover(device_t self)
3090{
3091	struct resource *res;
3092	int rid;
3093
3094	/*
3095	 * Set the PIRQD enable bit and switch off all the others. We don't
3096	 * want legacy support to interfere with us XXX Does this also mean
3097	 * that the BIOS won't touch the keyboard anymore if it is connected
3098	 * to the ports of the root hub?
3099	 */
3100	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3101
3102	/* Disable interrupts */
3103	rid = PCI_UHCI_BASE_REG;
3104	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3105	if (res != NULL) {
3106		bus_write_2(res, UHCI_INTR, 0);
3107		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3108	}
3109}
3110
3111/* Perform early EHCI takeover from SMM. */
3112static void
3113ehci_early_takeover(device_t self)
3114{
3115	struct resource *res;
3116	uint32_t cparams;
3117	uint32_t eec;
3118	uint8_t eecp;
3119	uint8_t bios_sem;
3120	uint8_t offs;
3121	int rid;
3122	int i;
3123
3124	rid = PCIR_BAR(0);
3125	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3126	if (res == NULL)
3127		return;
3128
3129	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3130
3131	/* Synchronise with the BIOS if it owns the controller. */
3132	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3133	    eecp = EHCI_EECP_NEXT(eec)) {
3134		eec = pci_read_config(self, eecp, 4);
3135		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3136			continue;
3137		}
3138		bios_sem = pci_read_config(self, eecp +
3139		    EHCI_LEGSUP_BIOS_SEM, 1);
3140		if (bios_sem == 0) {
3141			continue;
3142		}
3143		if (bootverbose)
3144			printf("ehci early: "
3145			    "SMM active, request owner change\n");
3146
3147		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3148
3149		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3150			DELAY(1000);
3151			bios_sem = pci_read_config(self, eecp +
3152			    EHCI_LEGSUP_BIOS_SEM, 1);
3153		}
3154
3155		if (bios_sem != 0) {
3156			if (bootverbose)
3157				printf("ehci early: "
3158				    "SMM does not respond\n");
3159		}
3160		/* Disable interrupts */
3161		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3162		bus_write_4(res, offs + EHCI_USBINTR, 0);
3163	}
3164	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3165}
3166
3167/* Perform early XHCI takeover from SMM. */
3168static void
3169xhci_early_takeover(device_t self)
3170{
3171	struct resource *res;
3172	uint32_t cparams;
3173	uint32_t eec;
3174	uint8_t eecp;
3175	uint8_t bios_sem;
3176	uint8_t offs;
3177	int rid;
3178	int i;
3179
3180	rid = PCIR_BAR(0);
3181	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3182	if (res == NULL)
3183		return;
3184
3185	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3186
3187	eec = -1;
3188
3189	/* Synchronise with the BIOS if it owns the controller. */
3190	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3191	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3192		eec = bus_read_4(res, eecp);
3193
3194		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3195			continue;
3196
3197		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3198		if (bios_sem == 0)
3199			continue;
3200
3201		if (bootverbose)
3202			printf("xhci early: "
3203			    "SMM active, request owner change\n");
3204
3205		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3206
3207		/* wait a maximum of 5 second */
3208
3209		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3210			DELAY(1000);
3211			bios_sem = bus_read_1(res, eecp +
3212			    XHCI_XECP_BIOS_SEM);
3213		}
3214
3215		if (bios_sem != 0) {
3216			if (bootverbose)
3217				printf("xhci early: "
3218				    "SMM does not respond\n");
3219		}
3220
3221		/* Disable interrupts */
3222		offs = bus_read_1(res, XHCI_CAPLENGTH);
3223		bus_write_4(res, offs + XHCI_USBCMD, 0);
3224		bus_read_4(res, offs + XHCI_USBSTS);
3225	}
3226	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3227}
3228
3229void
3230pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3231{
3232	struct pci_devinfo *dinfo;
3233	pcicfgregs *cfg;
3234	struct resource_list *rl;
3235	const struct pci_quirk *q;
3236	uint32_t devid;
3237	int i;
3238
3239	dinfo = device_get_ivars(dev);
3240	cfg = &dinfo->cfg;
3241	rl = &dinfo->resources;
3242	devid = (cfg->device << 16) | cfg->vendor;
3243
3244	/* ATA devices needs special map treatment */
3245	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3246	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3247	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3248	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3249	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3250		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3251	else
3252		for (i = 0; i < cfg->nummaps;) {
3253			/*
3254			 * Skip quirked resources.
3255			 */
3256			for (q = &pci_quirks[0]; q->devid != 0; q++)
3257				if (q->devid == devid &&
3258				    q->type == PCI_QUIRK_UNMAP_REG &&
3259				    q->arg1 == PCIR_BAR(i))
3260					break;
3261			if (q->devid != 0) {
3262				i++;
3263				continue;
3264			}
3265			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3266			    prefetchmask & (1 << i));
3267		}
3268
3269	/*
3270	 * Add additional, quirked resources.
3271	 */
3272	for (q = &pci_quirks[0]; q->devid != 0; q++)
3273		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3274			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3275
3276	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3277#ifdef __PCI_REROUTE_INTERRUPT
3278		/*
3279		 * Try to re-route interrupts. Sometimes the BIOS or
3280		 * firmware may leave bogus values in these registers.
3281		 * If the re-route fails, then just stick with what we
3282		 * have.
3283		 */
3284		pci_assign_interrupt(bus, dev, 1);
3285#else
3286		pci_assign_interrupt(bus, dev, 0);
3287#endif
3288	}
3289
3290	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3291	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3292		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3293			xhci_early_takeover(dev);
3294		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3295			ehci_early_takeover(dev);
3296		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3297			ohci_early_takeover(dev);
3298		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3299			uhci_early_takeover(dev);
3300	}
3301}
3302
3303static struct pci_devinfo *
3304pci_identify_function(device_t pcib, device_t dev, int domain, int busno,
3305    int slot, int func, size_t dinfo_size)
3306{
3307	struct pci_devinfo *dinfo;
3308
3309	dinfo = pci_read_device(pcib, domain, busno, slot, func, dinfo_size);
3310	if (dinfo != NULL)
3311		pci_add_child(dev, dinfo);
3312
3313	return (dinfo);
3314}
3315
3316void
3317pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3318{
3319#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3320	device_t pcib = device_get_parent(dev);
3321	struct pci_devinfo *dinfo;
3322	int maxslots;
3323	int s, f, pcifunchigh;
3324	uint8_t hdrtype;
3325	int first_func;
3326
3327	/*
3328	 * Try to detect a device at slot 0, function 0.  If it exists, try to
3329	 * enable ARI.  We must enable ARI before detecting the rest of the
3330	 * functions on this bus as ARI changes the set of slots and functions
3331	 * that are legal on this bus.
3332	 */
3333	dinfo = pci_identify_function(pcib, dev, domain, busno, 0, 0,
3334	    dinfo_size);
3335	if (dinfo != NULL && pci_enable_ari)
3336		PCIB_TRY_ENABLE_ARI(pcib, dinfo->cfg.dev);
3337
3338	/*
3339	 * Start looking for new devices on slot 0 at function 1 because we
3340	 * just identified the device at slot 0, function 0.
3341	 */
3342	first_func = 1;
3343
3344	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3345	    ("dinfo_size too small"));
3346	maxslots = PCIB_MAXSLOTS(pcib);
3347	for (s = 0; s <= maxslots; s++) {
3348		pcifunchigh = 0;
3349		f = 0;
3350		DELAY(1);
3351		hdrtype = REG(PCIR_HDRTYPE, 1);
3352		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3353			continue;
3354		if (hdrtype & PCIM_MFDEV)
3355			pcifunchigh = PCIB_MAXFUNCS(pcib);
3356		for (f = first_func; f <= pcifunchigh; f++)
3357			pci_identify_function(pcib, dev, domain, busno, s, f,
3358			    dinfo_size);
3359
3360		/* For slots after slot 0 we need to check for function 0. */
3361		first_func = 0;
3362	}
3363#undef REG
3364}
3365
3366void
3367pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3368{
3369	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3370	device_set_ivars(dinfo->cfg.dev, dinfo);
3371	resource_list_init(&dinfo->resources);
3372	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3373	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3374	pci_print_verbose(dinfo);
3375	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3376}
3377
3378static int
3379pci_probe(device_t dev)
3380{
3381
3382	device_set_desc(dev, "PCI bus");
3383
3384	/* Allow other subclasses to override this driver. */
3385	return (BUS_PROBE_GENERIC);
3386}
3387
3388int
3389pci_attach_common(device_t dev)
3390{
3391	struct pci_softc *sc;
3392	int busno, domain;
3393#ifdef PCI_DMA_BOUNDARY
3394	int error, tag_valid;
3395#endif
3396
3397	sc = device_get_softc(dev);
3398	domain = pcib_get_domain(dev);
3399	busno = pcib_get_bus(dev);
3400	if (bootverbose)
3401		device_printf(dev, "domain=%d, physical bus=%d\n",
3402		    domain, busno);
3403#ifdef PCI_DMA_BOUNDARY
3404	tag_valid = 0;
3405	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3406	    devclass_find("pci")) {
3407		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3408		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3409		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3410		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3411		if (error)
3412			device_printf(dev, "Failed to create DMA tag: %d\n",
3413			    error);
3414		else
3415			tag_valid = 1;
3416	}
3417	if (!tag_valid)
3418#endif
3419		sc->sc_dma_tag = bus_get_dma_tag(dev);
3420	return (0);
3421}
3422
3423static int
3424pci_attach(device_t dev)
3425{
3426	int busno, domain, error;
3427
3428	error = pci_attach_common(dev);
3429	if (error)
3430		return (error);
3431
3432	/*
3433	 * Since there can be multiple independantly numbered PCI
3434	 * busses on systems with multiple PCI domains, we can't use
3435	 * the unit number to decide which bus we are probing. We ask
3436	 * the parent pcib what our domain and bus numbers are.
3437	 */
3438	domain = pcib_get_domain(dev);
3439	busno = pcib_get_bus(dev);
3440	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3441	return (bus_generic_attach(dev));
3442}
3443
3444static void
3445pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3446    int state)
3447{
3448	device_t child, pcib;
3449	struct pci_devinfo *dinfo;
3450	int dstate, i;
3451
3452	/*
3453	 * Set the device to the given state.  If the firmware suggests
3454	 * a different power state, use it instead.  If power management
3455	 * is not present, the firmware is responsible for managing
3456	 * device power.  Skip children who aren't attached since they
3457	 * are handled separately.
3458	 */
3459	pcib = device_get_parent(dev);
3460	for (i = 0; i < numdevs; i++) {
3461		child = devlist[i];
3462		dinfo = device_get_ivars(child);
3463		dstate = state;
3464		if (device_is_attached(child) &&
3465		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3466			pci_set_powerstate(child, dstate);
3467	}
3468}
3469
3470int
3471pci_suspend(device_t dev)
3472{
3473	device_t child, *devlist;
3474	struct pci_devinfo *dinfo;
3475	int error, i, numdevs;
3476
3477	/*
3478	 * Save the PCI configuration space for each child and set the
3479	 * device in the appropriate power state for this sleep state.
3480	 */
3481	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3482		return (error);
3483	for (i = 0; i < numdevs; i++) {
3484		child = devlist[i];
3485		dinfo = device_get_ivars(child);
3486		pci_cfg_save(child, dinfo, 0);
3487	}
3488
3489	/* Suspend devices before potentially powering them down. */
3490	error = bus_generic_suspend(dev);
3491	if (error) {
3492		free(devlist, M_TEMP);
3493		return (error);
3494	}
3495	if (pci_do_power_suspend)
3496		pci_set_power_children(dev, devlist, numdevs,
3497		    PCI_POWERSTATE_D3);
3498	free(devlist, M_TEMP);
3499	return (0);
3500}
3501
3502int
3503pci_resume(device_t dev)
3504{
3505	device_t child, *devlist;
3506	struct pci_devinfo *dinfo;
3507	int error, i, numdevs;
3508
3509	/*
3510	 * Set each child to D0 and restore its PCI configuration space.
3511	 */
3512	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3513		return (error);
3514	if (pci_do_power_resume)
3515		pci_set_power_children(dev, devlist, numdevs,
3516		    PCI_POWERSTATE_D0);
3517
3518	/* Now the device is powered up, restore its config space. */
3519	for (i = 0; i < numdevs; i++) {
3520		child = devlist[i];
3521		dinfo = device_get_ivars(child);
3522
3523		pci_cfg_restore(child, dinfo);
3524		if (!device_is_attached(child))
3525			pci_cfg_save(child, dinfo, 1);
3526	}
3527
3528	/*
3529	 * Resume critical devices first, then everything else later.
3530	 */
3531	for (i = 0; i < numdevs; i++) {
3532		child = devlist[i];
3533		switch (pci_get_class(child)) {
3534		case PCIC_DISPLAY:
3535		case PCIC_MEMORY:
3536		case PCIC_BRIDGE:
3537		case PCIC_BASEPERIPH:
3538			DEVICE_RESUME(child);
3539			break;
3540		}
3541	}
3542	for (i = 0; i < numdevs; i++) {
3543		child = devlist[i];
3544		switch (pci_get_class(child)) {
3545		case PCIC_DISPLAY:
3546		case PCIC_MEMORY:
3547		case PCIC_BRIDGE:
3548		case PCIC_BASEPERIPH:
3549			break;
3550		default:
3551			DEVICE_RESUME(child);
3552		}
3553	}
3554	free(devlist, M_TEMP);
3555	return (0);
3556}
3557
3558static void
3559pci_load_vendor_data(void)
3560{
3561	caddr_t data;
3562	void *ptr;
3563	size_t sz;
3564
3565	data = preload_search_by_type("pci_vendor_data");
3566	if (data != NULL) {
3567		ptr = preload_fetch_addr(data);
3568		sz = preload_fetch_size(data);
3569		if (ptr != NULL && sz != 0) {
3570			pci_vendordata = ptr;
3571			pci_vendordata_size = sz;
3572			/* terminate the database */
3573			pci_vendordata[pci_vendordata_size] = '\n';
3574		}
3575	}
3576}
3577
3578void
3579pci_driver_added(device_t dev, driver_t *driver)
3580{
3581	int numdevs;
3582	device_t *devlist;
3583	device_t child;
3584	struct pci_devinfo *dinfo;
3585	int i;
3586
3587	if (bootverbose)
3588		device_printf(dev, "driver added\n");
3589	DEVICE_IDENTIFY(driver, dev);
3590	if (device_get_children(dev, &devlist, &numdevs) != 0)
3591		return;
3592	for (i = 0; i < numdevs; i++) {
3593		child = devlist[i];
3594		if (device_get_state(child) != DS_NOTPRESENT)
3595			continue;
3596		dinfo = device_get_ivars(child);
3597		pci_print_verbose(dinfo);
3598		if (bootverbose)
3599			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3600		pci_cfg_restore(child, dinfo);
3601		if (device_probe_and_attach(child) != 0)
3602			pci_child_detached(dev, child);
3603	}
3604	free(devlist, M_TEMP);
3605}
3606
3607int
3608pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3609    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3610{
3611	struct pci_devinfo *dinfo;
3612	struct msix_table_entry *mte;
3613	struct msix_vector *mv;
3614	uint64_t addr;
3615	uint32_t data;
3616	void *cookie;
3617	int error, rid;
3618
3619	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3620	    arg, &cookie);
3621	if (error)
3622		return (error);
3623
3624	/* If this is not a direct child, just bail out. */
3625	if (device_get_parent(child) != dev) {
3626		*cookiep = cookie;
3627		return(0);
3628	}
3629
3630	rid = rman_get_rid(irq);
3631	if (rid == 0) {
3632		/* Make sure that INTx is enabled */
3633		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3634	} else {
3635		/*
3636		 * Check to see if the interrupt is MSI or MSI-X.
3637		 * Ask our parent to map the MSI and give
3638		 * us the address and data register values.
3639		 * If we fail for some reason, teardown the
3640		 * interrupt handler.
3641		 */
3642		dinfo = device_get_ivars(child);
3643		if (dinfo->cfg.msi.msi_alloc > 0) {
3644			if (dinfo->cfg.msi.msi_addr == 0) {
3645				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3646			    ("MSI has handlers, but vectors not mapped"));
3647				error = PCIB_MAP_MSI(device_get_parent(dev),
3648				    child, rman_get_start(irq), &addr, &data);
3649				if (error)
3650					goto bad;
3651				dinfo->cfg.msi.msi_addr = addr;
3652				dinfo->cfg.msi.msi_data = data;
3653			}
3654			if (dinfo->cfg.msi.msi_handlers == 0)
3655				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3656				    dinfo->cfg.msi.msi_data);
3657			dinfo->cfg.msi.msi_handlers++;
3658		} else {
3659			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3660			    ("No MSI or MSI-X interrupts allocated"));
3661			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3662			    ("MSI-X index too high"));
3663			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3664			KASSERT(mte->mte_vector != 0, ("no message vector"));
3665			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3666			KASSERT(mv->mv_irq == rman_get_start(irq),
3667			    ("IRQ mismatch"));
3668			if (mv->mv_address == 0) {
3669				KASSERT(mte->mte_handlers == 0,
3670		    ("MSI-X table entry has handlers, but vector not mapped"));
3671				error = PCIB_MAP_MSI(device_get_parent(dev),
3672				    child, rman_get_start(irq), &addr, &data);
3673				if (error)
3674					goto bad;
3675				mv->mv_address = addr;
3676				mv->mv_data = data;
3677			}
3678			if (mte->mte_handlers == 0) {
3679				pci_enable_msix(child, rid - 1, mv->mv_address,
3680				    mv->mv_data);
3681				pci_unmask_msix(child, rid - 1);
3682			}
3683			mte->mte_handlers++;
3684		}
3685
3686		/*
3687		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3688		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3689		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3690		 */
3691		if (!pci_has_quirk(pci_get_devid(child),
3692		    PCI_QUIRK_MSI_INTX_BUG))
3693			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3694		else
3695			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3696	bad:
3697		if (error) {
3698			(void)bus_generic_teardown_intr(dev, child, irq,
3699			    cookie);
3700			return (error);
3701		}
3702	}
3703	*cookiep = cookie;
3704	return (0);
3705}
3706
3707int
3708pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3709    void *cookie)
3710{
3711	struct msix_table_entry *mte;
3712	struct resource_list_entry *rle;
3713	struct pci_devinfo *dinfo;
3714	int error, rid;
3715
3716	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3717		return (EINVAL);
3718
3719	/* If this isn't a direct child, just bail out */
3720	if (device_get_parent(child) != dev)
3721		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3722
3723	rid = rman_get_rid(irq);
3724	if (rid == 0) {
3725		/* Mask INTx */
3726		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3727	} else {
3728		/*
3729		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3730		 * decrement the appropriate handlers count and mask the
3731		 * MSI-X message, or disable MSI messages if the count
3732		 * drops to 0.
3733		 */
3734		dinfo = device_get_ivars(child);
3735		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3736		if (rle->res != irq)
3737			return (EINVAL);
3738		if (dinfo->cfg.msi.msi_alloc > 0) {
3739			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3740			    ("MSI-X index too high"));
3741			if (dinfo->cfg.msi.msi_handlers == 0)
3742				return (EINVAL);
3743			dinfo->cfg.msi.msi_handlers--;
3744			if (dinfo->cfg.msi.msi_handlers == 0)
3745				pci_disable_msi(child);
3746		} else {
3747			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3748			    ("No MSI or MSI-X interrupts allocated"));
3749			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3750			    ("MSI-X index too high"));
3751			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3752			if (mte->mte_handlers == 0)
3753				return (EINVAL);
3754			mte->mte_handlers--;
3755			if (mte->mte_handlers == 0)
3756				pci_mask_msix(child, rid - 1);
3757		}
3758	}
3759	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3760	if (rid > 0)
3761		KASSERT(error == 0,
3762		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3763	return (error);
3764}
3765
3766int
3767pci_print_child(device_t dev, device_t child)
3768{
3769	struct pci_devinfo *dinfo;
3770	struct resource_list *rl;
3771	int retval = 0;
3772
3773	dinfo = device_get_ivars(child);
3774	rl = &dinfo->resources;
3775
3776	retval += bus_print_child_header(dev, child);
3777
3778	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3779	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3780	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3781	if (device_get_flags(dev))
3782		retval += printf(" flags %#x", device_get_flags(dev));
3783
3784	retval += printf(" at device %d.%d", pci_get_slot(child),
3785	    pci_get_function(child));
3786
3787	retval += bus_print_child_footer(dev, child);
3788
3789	return (retval);
3790}
3791
3792static const struct
3793{
3794	int		class;
3795	int		subclass;
3796	int		report; /* 0 = bootverbose, 1 = always */
3797	const char	*desc;
3798} pci_nomatch_tab[] = {
3799	{PCIC_OLD,		-1,			1, "old"},
3800	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
3801	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
3802	{PCIC_STORAGE,		-1,			1, "mass storage"},
3803	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
3804	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
3805	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
3806	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
3807	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
3808	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
3809	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
3810	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
3811	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
3812	{PCIC_NETWORK,		-1,			1, "network"},
3813	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
3814	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
3815	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
3816	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
3817	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
3818	{PCIC_DISPLAY,		-1,			1, "display"},
3819	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
3820	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
3821	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
3822	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
3823	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
3824	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
3825	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
3826	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
3827	{PCIC_MEMORY,		-1,			1, "memory"},
3828	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
3829	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
3830	{PCIC_BRIDGE,		-1,			1, "bridge"},
3831	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
3832	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
3833	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
3834	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
3835	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
3836	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
3837	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
3838	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
3839	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
3840	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
3841	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
3842	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
3843	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
3844	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
3845	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
3846	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
3847	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
3848	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
3849	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
3850	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
3851	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
3852	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
3853	{PCIC_INPUTDEV,		-1,			1, "input device"},
3854	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
3855	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
3856	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
3857	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
3858	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
3859	{PCIC_DOCKING,		-1,			1, "docking station"},
3860	{PCIC_PROCESSOR,	-1,			1, "processor"},
3861	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
3862	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
3863	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
3864	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
3865	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
3866	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
3867	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
3868	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
3869	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
3870	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
3871	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
3872	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
3873	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
3874	{PCIC_SATCOM,		-1,			1, "satellite communication"},
3875	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
3876	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
3877	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
3878	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
3879	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
3880	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
3881	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
3882	{PCIC_DASP,		-1,			0, "dasp"},
3883	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
3884	{0, 0, 0,		NULL}
3885};
3886
3887void
3888pci_probe_nomatch(device_t dev, device_t child)
3889{
3890	int i, report;
3891	const char *cp, *scp;
3892	char *device;
3893
3894	/*
3895	 * Look for a listing for this device in a loaded device database.
3896	 */
3897	report = 1;
3898	if ((device = pci_describe_device(child)) != NULL) {
3899		device_printf(dev, "<%s>", device);
3900		free(device, M_DEVBUF);
3901	} else {
3902		/*
3903		 * Scan the class/subclass descriptions for a general
3904		 * description.
3905		 */
3906		cp = "unknown";
3907		scp = NULL;
3908		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3909			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3910				if (pci_nomatch_tab[i].subclass == -1) {
3911					cp = pci_nomatch_tab[i].desc;
3912					report = pci_nomatch_tab[i].report;
3913				} else if (pci_nomatch_tab[i].subclass ==
3914				    pci_get_subclass(child)) {
3915					scp = pci_nomatch_tab[i].desc;
3916					report = pci_nomatch_tab[i].report;
3917				}
3918			}
3919		}
3920		if (report || bootverbose) {
3921			device_printf(dev, "<%s%s%s>",
3922			    cp ? cp : "",
3923			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3924			    scp ? scp : "");
3925		}
3926	}
3927	if (report || bootverbose) {
3928		printf(" at device %d.%d (no driver attached)\n",
3929		    pci_get_slot(child), pci_get_function(child));
3930	}
3931	pci_cfg_save(child, device_get_ivars(child), 1);
3932}
3933
3934void
3935pci_child_detached(device_t dev, device_t child)
3936{
3937	struct pci_devinfo *dinfo;
3938	struct resource_list *rl;
3939
3940	dinfo = device_get_ivars(child);
3941	rl = &dinfo->resources;
3942
3943	/*
3944	 * Have to deallocate IRQs before releasing any MSI messages and
3945	 * have to release MSI messages before deallocating any memory
3946	 * BARs.
3947	 */
3948	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
3949		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
3950	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
3951		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
3952		(void)pci_release_msi(child);
3953	}
3954	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
3955		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
3956	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
3957		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
3958
3959	pci_cfg_save(child, dinfo, 1);
3960}
3961
3962/*
3963 * Parse the PCI device database, if loaded, and return a pointer to a
3964 * description of the device.
3965 *
3966 * The database is flat text formatted as follows:
3967 *
3968 * Any line not in a valid format is ignored.
3969 * Lines are terminated with newline '\n' characters.
3970 *
3971 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3972 * the vendor name.
3973 *
3974 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3975 * - devices cannot be listed without a corresponding VENDOR line.
3976 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3977 * another TAB, then the device name.
3978 */
3979
3980/*
3981 * Assuming (ptr) points to the beginning of a line in the database,
3982 * return the vendor or device and description of the next entry.
3983 * The value of (vendor) or (device) inappropriate for the entry type
3984 * is set to -1.  Returns nonzero at the end of the database.
3985 *
3986 * Note that this is slightly unrobust in the face of corrupt data;
3987 * we attempt to safeguard against this by spamming the end of the
3988 * database with a newline when we initialise.
3989 */
3990static int
3991pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3992{
3993	char	*cp = *ptr;
3994	int	left;
3995
3996	*device = -1;
3997	*vendor = -1;
3998	**desc = '\0';
3999	for (;;) {
4000		left = pci_vendordata_size - (cp - pci_vendordata);
4001		if (left <= 0) {
4002			*ptr = cp;
4003			return(1);
4004		}
4005
4006		/* vendor entry? */
4007		if (*cp != '\t' &&
4008		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
4009			break;
4010		/* device entry? */
4011		if (*cp == '\t' &&
4012		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
4013			break;
4014
4015		/* skip to next line */
4016		while (*cp != '\n' && left > 0) {
4017			cp++;
4018			left--;
4019		}
4020		if (*cp == '\n') {
4021			cp++;
4022			left--;
4023		}
4024	}
4025	/* skip to next line */
4026	while (*cp != '\n' && left > 0) {
4027		cp++;
4028		left--;
4029	}
4030	if (*cp == '\n' && left > 0)
4031		cp++;
4032	*ptr = cp;
4033	return(0);
4034}
4035
4036static char *
4037pci_describe_device(device_t dev)
4038{
4039	int	vendor, device;
4040	char	*desc, *vp, *dp, *line;
4041
4042	desc = vp = dp = NULL;
4043
4044	/*
4045	 * If we have no vendor data, we can't do anything.
4046	 */
4047	if (pci_vendordata == NULL)
4048		goto out;
4049
4050	/*
4051	 * Scan the vendor data looking for this device
4052	 */
4053	line = pci_vendordata;
4054	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4055		goto out;
4056	for (;;) {
4057		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
4058			goto out;
4059		if (vendor == pci_get_vendor(dev))
4060			break;
4061	}
4062	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
4063		goto out;
4064	for (;;) {
4065		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
4066			*dp = 0;
4067			break;
4068		}
4069		if (vendor != -1) {
4070			*dp = 0;
4071			break;
4072		}
4073		if (device == pci_get_device(dev))
4074			break;
4075	}
4076	if (dp[0] == '\0')
4077		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4078	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4079	    NULL)
4080		sprintf(desc, "%s, %s", vp, dp);
4081out:
4082	if (vp != NULL)
4083		free(vp, M_DEVBUF);
4084	if (dp != NULL)
4085		free(dp, M_DEVBUF);
4086	return(desc);
4087}
4088
4089int
4090pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4091{
4092	struct pci_devinfo *dinfo;
4093	pcicfgregs *cfg;
4094
4095	dinfo = device_get_ivars(child);
4096	cfg = &dinfo->cfg;
4097
4098	switch (which) {
4099	case PCI_IVAR_ETHADDR:
4100		/*
4101		 * The generic accessor doesn't deal with failure, so
4102		 * we set the return value, then return an error.
4103		 */
4104		*((uint8_t **) result) = NULL;
4105		return (EINVAL);
4106	case PCI_IVAR_SUBVENDOR:
4107		*result = cfg->subvendor;
4108		break;
4109	case PCI_IVAR_SUBDEVICE:
4110		*result = cfg->subdevice;
4111		break;
4112	case PCI_IVAR_VENDOR:
4113		*result = cfg->vendor;
4114		break;
4115	case PCI_IVAR_DEVICE:
4116		*result = cfg->device;
4117		break;
4118	case PCI_IVAR_DEVID:
4119		*result = (cfg->device << 16) | cfg->vendor;
4120		break;
4121	case PCI_IVAR_CLASS:
4122		*result = cfg->baseclass;
4123		break;
4124	case PCI_IVAR_SUBCLASS:
4125		*result = cfg->subclass;
4126		break;
4127	case PCI_IVAR_PROGIF:
4128		*result = cfg->progif;
4129		break;
4130	case PCI_IVAR_REVID:
4131		*result = cfg->revid;
4132		break;
4133	case PCI_IVAR_INTPIN:
4134		*result = cfg->intpin;
4135		break;
4136	case PCI_IVAR_IRQ:
4137		*result = cfg->intline;
4138		break;
4139	case PCI_IVAR_DOMAIN:
4140		*result = cfg->domain;
4141		break;
4142	case PCI_IVAR_BUS:
4143		*result = cfg->bus;
4144		break;
4145	case PCI_IVAR_SLOT:
4146		*result = cfg->slot;
4147		break;
4148	case PCI_IVAR_FUNCTION:
4149		*result = cfg->func;
4150		break;
4151	case PCI_IVAR_CMDREG:
4152		*result = cfg->cmdreg;
4153		break;
4154	case PCI_IVAR_CACHELNSZ:
4155		*result = cfg->cachelnsz;
4156		break;
4157	case PCI_IVAR_MINGNT:
4158		*result = cfg->mingnt;
4159		break;
4160	case PCI_IVAR_MAXLAT:
4161		*result = cfg->maxlat;
4162		break;
4163	case PCI_IVAR_LATTIMER:
4164		*result = cfg->lattimer;
4165		break;
4166	default:
4167		return (ENOENT);
4168	}
4169	return (0);
4170}
4171
4172int
4173pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4174{
4175	struct pci_devinfo *dinfo;
4176
4177	dinfo = device_get_ivars(child);
4178
4179	switch (which) {
4180	case PCI_IVAR_INTPIN:
4181		dinfo->cfg.intpin = value;
4182		return (0);
4183	case PCI_IVAR_ETHADDR:
4184	case PCI_IVAR_SUBVENDOR:
4185	case PCI_IVAR_SUBDEVICE:
4186	case PCI_IVAR_VENDOR:
4187	case PCI_IVAR_DEVICE:
4188	case PCI_IVAR_DEVID:
4189	case PCI_IVAR_CLASS:
4190	case PCI_IVAR_SUBCLASS:
4191	case PCI_IVAR_PROGIF:
4192	case PCI_IVAR_REVID:
4193	case PCI_IVAR_IRQ:
4194	case PCI_IVAR_DOMAIN:
4195	case PCI_IVAR_BUS:
4196	case PCI_IVAR_SLOT:
4197	case PCI_IVAR_FUNCTION:
4198		return (EINVAL);	/* disallow for now */
4199
4200	default:
4201		return (ENOENT);
4202	}
4203}
4204
4205#include "opt_ddb.h"
4206#ifdef DDB
4207#include <ddb/ddb.h>
4208#include <sys/cons.h>
4209
4210/*
4211 * List resources based on pci map registers, used for within ddb
4212 */
4213
4214DB_SHOW_COMMAND(pciregs, db_pci_dump)
4215{
4216	struct pci_devinfo *dinfo;
4217	struct devlist *devlist_head;
4218	struct pci_conf *p;
4219	const char *name;
4220	int i, error, none_count;
4221
4222	none_count = 0;
4223	/* get the head of the device queue */
4224	devlist_head = &pci_devq;
4225
4226	/*
4227	 * Go through the list of devices and print out devices
4228	 */
4229	for (error = 0, i = 0,
4230	     dinfo = STAILQ_FIRST(devlist_head);
4231	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4232	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4233
4234		/* Populate pd_name and pd_unit */
4235		name = NULL;
4236		if (dinfo->cfg.dev)
4237			name = device_get_name(dinfo->cfg.dev);
4238
4239		p = &dinfo->conf;
4240		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4241			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4242			(name && *name) ? name : "none",
4243			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4244			none_count++,
4245			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4246			p->pc_sel.pc_func, (p->pc_class << 16) |
4247			(p->pc_subclass << 8) | p->pc_progif,
4248			(p->pc_subdevice << 16) | p->pc_subvendor,
4249			(p->pc_device << 16) | p->pc_vendor,
4250			p->pc_revid, p->pc_hdr);
4251	}
4252}
4253#endif /* DDB */
4254
4255static struct resource *
4256pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4257    u_long start, u_long end, u_long count, u_int flags)
4258{
4259	struct pci_devinfo *dinfo = device_get_ivars(child);
4260	struct resource_list *rl = &dinfo->resources;
4261	struct resource *res;
4262	struct pci_map *pm;
4263	pci_addr_t map, testval;
4264	int mapsize;
4265
4266	res = NULL;
4267	pm = pci_find_bar(child, *rid);
4268	if (pm != NULL) {
4269		/* This is a BAR that we failed to allocate earlier. */
4270		mapsize = pm->pm_size;
4271		map = pm->pm_value;
4272	} else {
4273		/*
4274		 * Weed out the bogons, and figure out how large the
4275		 * BAR/map is.  BARs that read back 0 here are bogus
4276		 * and unimplemented.  Note: atapci in legacy mode are
4277		 * special and handled elsewhere in the code.  If you
4278		 * have a atapci device in legacy mode and it fails
4279		 * here, that other code is broken.
4280		 */
4281		pci_read_bar(child, *rid, &map, &testval);
4282
4283		/*
4284		 * Determine the size of the BAR and ignore BARs with a size
4285		 * of 0.  Device ROM BARs use a different mask value.
4286		 */
4287		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4288			mapsize = pci_romsize(testval);
4289		else
4290			mapsize = pci_mapsize(testval);
4291		if (mapsize == 0)
4292			goto out;
4293		pm = pci_add_bar(child, *rid, map, mapsize);
4294	}
4295
4296	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4297		if (type != SYS_RES_MEMORY) {
4298			if (bootverbose)
4299				device_printf(dev,
4300				    "child %s requested type %d for rid %#x,"
4301				    " but the BAR says it is an memio\n",
4302				    device_get_nameunit(child), type, *rid);
4303			goto out;
4304		}
4305	} else {
4306		if (type != SYS_RES_IOPORT) {
4307			if (bootverbose)
4308				device_printf(dev,
4309				    "child %s requested type %d for rid %#x,"
4310				    " but the BAR says it is an ioport\n",
4311				    device_get_nameunit(child), type, *rid);
4312			goto out;
4313		}
4314	}
4315
4316	/*
4317	 * For real BARs, we need to override the size that
4318	 * the driver requests, because that's what the BAR
4319	 * actually uses and we would otherwise have a
4320	 * situation where we might allocate the excess to
4321	 * another driver, which won't work.
4322	 */
4323	count = (pci_addr_t)1 << mapsize;
4324	if (RF_ALIGNMENT(flags) < mapsize)
4325		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4326	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4327		flags |= RF_PREFETCHABLE;
4328
4329	/*
4330	 * Allocate enough resource, and then write back the
4331	 * appropriate BAR for that resource.
4332	 */
4333	resource_list_add(rl, type, *rid, start, end, count);
4334	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4335	    count, flags & ~RF_ACTIVE);
4336	if (res == NULL) {
4337		resource_list_delete(rl, type, *rid);
4338		device_printf(child,
4339		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4340		    count, *rid, type, start, end);
4341		goto out;
4342	}
4343	if (bootverbose)
4344		device_printf(child,
4345		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4346		    count, *rid, type, rman_get_start(res));
4347	map = rman_get_start(res);
4348	pci_write_bar(child, pm, map);
4349out:
4350	return (res);
4351}
4352
4353struct resource *
4354pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4355		   u_long start, u_long end, u_long count, u_int flags)
4356{
4357	struct pci_devinfo *dinfo;
4358	struct resource_list *rl;
4359	struct resource_list_entry *rle;
4360	struct resource *res;
4361	pcicfgregs *cfg;
4362
4363	if (device_get_parent(child) != dev)
4364		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4365		    type, rid, start, end, count, flags));
4366
4367	/*
4368	 * Perform lazy resource allocation
4369	 */
4370	dinfo = device_get_ivars(child);
4371	rl = &dinfo->resources;
4372	cfg = &dinfo->cfg;
4373	switch (type) {
4374	case SYS_RES_IRQ:
4375		/*
4376		 * Can't alloc legacy interrupt once MSI messages have
4377		 * been allocated.
4378		 */
4379		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4380		    cfg->msix.msix_alloc > 0))
4381			return (NULL);
4382
4383		/*
4384		 * If the child device doesn't have an interrupt
4385		 * routed and is deserving of an interrupt, try to
4386		 * assign it one.
4387		 */
4388		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4389		    (cfg->intpin != 0))
4390			pci_assign_interrupt(dev, child, 0);
4391		break;
4392	case SYS_RES_IOPORT:
4393	case SYS_RES_MEMORY:
4394#ifdef NEW_PCIB
4395		/*
4396		 * PCI-PCI bridge I/O window resources are not BARs.
4397		 * For those allocations just pass the request up the
4398		 * tree.
4399		 */
4400		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4401			switch (*rid) {
4402			case PCIR_IOBASEL_1:
4403			case PCIR_MEMBASE_1:
4404			case PCIR_PMBASEL_1:
4405				/*
4406				 * XXX: Should we bother creating a resource
4407				 * list entry?
4408				 */
4409				return (bus_generic_alloc_resource(dev, child,
4410				    type, rid, start, end, count, flags));
4411			}
4412		}
4413#endif
4414		/* Reserve resources for this BAR if needed. */
4415		rle = resource_list_find(rl, type, *rid);
4416		if (rle == NULL) {
4417			res = pci_reserve_map(dev, child, type, rid, start, end,
4418			    count, flags);
4419			if (res == NULL)
4420				return (NULL);
4421		}
4422	}
4423	return (resource_list_alloc(rl, dev, child, type, rid,
4424	    start, end, count, flags));
4425}
4426
4427int
4428pci_release_resource(device_t dev, device_t child, int type, int rid,
4429    struct resource *r)
4430{
4431	struct pci_devinfo *dinfo;
4432	struct resource_list *rl;
4433	pcicfgregs *cfg;
4434
4435	if (device_get_parent(child) != dev)
4436		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4437		    type, rid, r));
4438
4439	dinfo = device_get_ivars(child);
4440	cfg = &dinfo->cfg;
4441#ifdef NEW_PCIB
4442	/*
4443	 * PCI-PCI bridge I/O window resources are not BARs.  For
4444	 * those allocations just pass the request up the tree.
4445	 */
4446	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4447	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4448		switch (rid) {
4449		case PCIR_IOBASEL_1:
4450		case PCIR_MEMBASE_1:
4451		case PCIR_PMBASEL_1:
4452			return (bus_generic_release_resource(dev, child, type,
4453			    rid, r));
4454		}
4455	}
4456#endif
4457
4458	rl = &dinfo->resources;
4459	return (resource_list_release(rl, dev, child, type, rid, r));
4460}
4461
4462int
4463pci_activate_resource(device_t dev, device_t child, int type, int rid,
4464    struct resource *r)
4465{
4466	struct pci_devinfo *dinfo;
4467	int error;
4468
4469	error = bus_generic_activate_resource(dev, child, type, rid, r);
4470	if (error)
4471		return (error);
4472
4473	/* Enable decoding in the command register when activating BARs. */
4474	if (device_get_parent(child) == dev) {
4475		/* Device ROMs need their decoding explicitly enabled. */
4476		dinfo = device_get_ivars(child);
4477		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4478			pci_write_bar(child, pci_find_bar(child, rid),
4479			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4480		switch (type) {
4481		case SYS_RES_IOPORT:
4482		case SYS_RES_MEMORY:
4483			error = PCI_ENABLE_IO(dev, child, type);
4484			break;
4485		}
4486	}
4487	return (error);
4488}
4489
4490int
4491pci_deactivate_resource(device_t dev, device_t child, int type,
4492    int rid, struct resource *r)
4493{
4494	struct pci_devinfo *dinfo;
4495	int error;
4496
4497	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4498	if (error)
4499		return (error);
4500
4501	/* Disable decoding for device ROMs. */
4502	if (device_get_parent(child) == dev) {
4503		dinfo = device_get_ivars(child);
4504		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4505			pci_write_bar(child, pci_find_bar(child, rid),
4506			    rman_get_start(r));
4507	}
4508	return (0);
4509}
4510
4511void
4512pci_delete_child(device_t dev, device_t child)
4513{
4514	struct resource_list_entry *rle;
4515	struct resource_list *rl;
4516	struct pci_devinfo *dinfo;
4517
4518	dinfo = device_get_ivars(child);
4519	rl = &dinfo->resources;
4520
4521	if (device_is_attached(child))
4522		device_detach(child);
4523
4524	/* Turn off access to resources we're about to free */
4525	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4526	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4527
4528	/* Free all allocated resources */
4529	STAILQ_FOREACH(rle, rl, link) {
4530		if (rle->res) {
4531			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4532			    resource_list_busy(rl, rle->type, rle->rid)) {
4533				pci_printf(&dinfo->cfg,
4534				    "Resource still owned, oops. "
4535				    "(type=%d, rid=%d, addr=%lx)\n",
4536				    rle->type, rle->rid,
4537				    rman_get_start(rle->res));
4538				bus_release_resource(child, rle->type, rle->rid,
4539				    rle->res);
4540			}
4541			resource_list_unreserve(rl, dev, child, rle->type,
4542			    rle->rid);
4543		}
4544	}
4545	resource_list_free(rl);
4546
4547	device_delete_child(dev, child);
4548	pci_freecfg(dinfo);
4549}
4550
4551void
4552pci_delete_resource(device_t dev, device_t child, int type, int rid)
4553{
4554	struct pci_devinfo *dinfo;
4555	struct resource_list *rl;
4556	struct resource_list_entry *rle;
4557
4558	if (device_get_parent(child) != dev)
4559		return;
4560
4561	dinfo = device_get_ivars(child);
4562	rl = &dinfo->resources;
4563	rle = resource_list_find(rl, type, rid);
4564	if (rle == NULL)
4565		return;
4566
4567	if (rle->res) {
4568		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4569		    resource_list_busy(rl, type, rid)) {
4570			device_printf(dev, "delete_resource: "
4571			    "Resource still owned by child, oops. "
4572			    "(type=%d, rid=%d, addr=%lx)\n",
4573			    type, rid, rman_get_start(rle->res));
4574			return;
4575		}
4576		resource_list_unreserve(rl, dev, child, type, rid);
4577	}
4578	resource_list_delete(rl, type, rid);
4579}
4580
4581struct resource_list *
4582pci_get_resource_list (device_t dev, device_t child)
4583{
4584	struct pci_devinfo *dinfo = device_get_ivars(child);
4585
4586	return (&dinfo->resources);
4587}
4588
4589bus_dma_tag_t
4590pci_get_dma_tag(device_t bus, device_t dev)
4591{
4592	struct pci_softc *sc = device_get_softc(bus);
4593
4594	return (sc->sc_dma_tag);
4595}
4596
4597uint32_t
4598pci_read_config_method(device_t dev, device_t child, int reg, int width)
4599{
4600	struct pci_devinfo *dinfo = device_get_ivars(child);
4601	pcicfgregs *cfg = &dinfo->cfg;
4602
4603	return (PCIB_READ_CONFIG(device_get_parent(dev),
4604	    cfg->bus, cfg->slot, cfg->func, reg, width));
4605}
4606
4607void
4608pci_write_config_method(device_t dev, device_t child, int reg,
4609    uint32_t val, int width)
4610{
4611	struct pci_devinfo *dinfo = device_get_ivars(child);
4612	pcicfgregs *cfg = &dinfo->cfg;
4613
4614	PCIB_WRITE_CONFIG(device_get_parent(dev),
4615	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4616}
4617
4618int
4619pci_child_location_str_method(device_t dev, device_t child, char *buf,
4620    size_t buflen)
4621{
4622
4623	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4624	    pci_get_function(child));
4625	return (0);
4626}
4627
4628int
4629pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4630    size_t buflen)
4631{
4632	struct pci_devinfo *dinfo;
4633	pcicfgregs *cfg;
4634
4635	dinfo = device_get_ivars(child);
4636	cfg = &dinfo->cfg;
4637	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4638	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4639	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4640	    cfg->progif);
4641	return (0);
4642}
4643
4644int
4645pci_assign_interrupt_method(device_t dev, device_t child)
4646{
4647	struct pci_devinfo *dinfo = device_get_ivars(child);
4648	pcicfgregs *cfg = &dinfo->cfg;
4649
4650	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4651	    cfg->intpin));
4652}
4653
4654static int
4655pci_modevent(module_t mod, int what, void *arg)
4656{
4657	static struct cdev *pci_cdev;
4658
4659	switch (what) {
4660	case MOD_LOAD:
4661		STAILQ_INIT(&pci_devq);
4662		pci_generation = 0;
4663		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4664		    "pci");
4665		pci_load_vendor_data();
4666		break;
4667
4668	case MOD_UNLOAD:
4669		destroy_dev(pci_cdev);
4670		break;
4671	}
4672
4673	return (0);
4674}
4675
4676static void
4677pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4678{
4679#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4680	struct pcicfg_pcie *cfg;
4681	int version, pos;
4682
4683	cfg = &dinfo->cfg.pcie;
4684	pos = cfg->pcie_location;
4685
4686	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4687
4688	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4689
4690	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4691	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4692	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4693		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4694
4695	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4696	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4697	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4698		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4699
4700	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4701	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4702		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4703
4704	if (version > 1) {
4705		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4706		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4707		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4708	}
4709#undef WREG
4710}
4711
4712static void
4713pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4714{
4715	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4716	    dinfo->cfg.pcix.pcix_command,  2);
4717}
4718
4719void
4720pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4721{
4722
4723	/*
4724	 * Only do header type 0 devices.  Type 1 devices are bridges,
4725	 * which we know need special treatment.  Type 2 devices are
4726	 * cardbus bridges which also require special treatment.
4727	 * Other types are unknown, and we err on the side of safety
4728	 * by ignoring them.
4729	 */
4730	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4731		return;
4732
4733	/*
4734	 * Restore the device to full power mode.  We must do this
4735	 * before we restore the registers because moving from D3 to
4736	 * D0 will cause the chip's BARs and some other registers to
4737	 * be reset to some unknown power on reset values.  Cut down
4738	 * the noise on boot by doing nothing if we are already in
4739	 * state D0.
4740	 */
4741	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4742		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4743	pci_restore_bars(dev);
4744	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4745	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4746	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4747	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4748	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4749	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4750	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4751	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4752	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4753
4754	/*
4755	 * Restore extended capabilities for PCI-Express and PCI-X
4756	 */
4757	if (dinfo->cfg.pcie.pcie_location != 0)
4758		pci_cfg_restore_pcie(dev, dinfo);
4759	if (dinfo->cfg.pcix.pcix_location != 0)
4760		pci_cfg_restore_pcix(dev, dinfo);
4761
4762	/* Restore MSI and MSI-X configurations if they are present. */
4763	if (dinfo->cfg.msi.msi_location != 0)
4764		pci_resume_msi(dev);
4765	if (dinfo->cfg.msix.msix_location != 0)
4766		pci_resume_msix(dev);
4767}
4768
4769static void
4770pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4771{
4772#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4773	struct pcicfg_pcie *cfg;
4774	int version, pos;
4775
4776	cfg = &dinfo->cfg.pcie;
4777	pos = cfg->pcie_location;
4778
4779	cfg->pcie_flags = RREG(PCIER_FLAGS);
4780
4781	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4782
4783	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4784
4785	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4786	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4787	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4788		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4789
4790	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4791	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4792	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4793		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4794
4795	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4796	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4797		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4798
4799	if (version > 1) {
4800		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4801		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4802		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4803	}
4804#undef RREG
4805}
4806
4807static void
4808pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4809{
4810	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4811	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4812}
4813
4814void
4815pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4816{
4817	uint32_t cls;
4818	int ps;
4819
4820	/*
4821	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4822	 * we know need special treatment.  Type 2 devices are cardbus bridges
4823	 * which also require special treatment.  Other types are unknown, and
4824	 * we err on the side of safety by ignoring them.  Powering down
4825	 * bridges should not be undertaken lightly.
4826	 */
4827	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4828		return;
4829
4830	/*
4831	 * Some drivers apparently write to these registers w/o updating our
4832	 * cached copy.  No harm happens if we update the copy, so do so here
4833	 * so we can restore them.  The COMMAND register is modified by the
4834	 * bus w/o updating the cache.  This should represent the normally
4835	 * writable portion of the 'defined' part of type 0 headers.  In
4836	 * theory we also need to save/restore the PCI capability structures
4837	 * we know about, but apart from power we don't know any that are
4838	 * writable.
4839	 */
4840	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4841	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4842	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4843	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4844	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4845	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4846	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4847	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4848	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4849	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4850	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4851	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4852	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4853	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4854	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4855
4856	if (dinfo->cfg.pcie.pcie_location != 0)
4857		pci_cfg_save_pcie(dev, dinfo);
4858
4859	if (dinfo->cfg.pcix.pcix_location != 0)
4860		pci_cfg_save_pcix(dev, dinfo);
4861
4862	/*
4863	 * don't set the state for display devices, base peripherals and
4864	 * memory devices since bad things happen when they are powered down.
4865	 * We should (a) have drivers that can easily detach and (b) use
4866	 * generic drivers for these devices so that some device actually
4867	 * attaches.  We need to make sure that when we implement (a) we don't
4868	 * power the device down on a reattach.
4869	 */
4870	cls = pci_get_class(dev);
4871	if (!setstate)
4872		return;
4873	switch (pci_do_power_nodriver)
4874	{
4875		case 0:		/* NO powerdown at all */
4876			return;
4877		case 1:		/* Conservative about what to power down */
4878			if (cls == PCIC_STORAGE)
4879				return;
4880			/*FALLTHROUGH*/
4881		case 2:		/* Agressive about what to power down */
4882			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4883			    cls == PCIC_BASEPERIPH)
4884				return;
4885			/*FALLTHROUGH*/
4886		case 3:		/* Power down everything */
4887			break;
4888	}
4889	/*
4890	 * PCI spec says we can only go into D3 state from D0 state.
4891	 * Transition from D[12] into D0 before going to D3 state.
4892	 */
4893	ps = pci_get_powerstate(dev);
4894	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4895		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4896	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4897		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4898}
4899
4900/* Wrapper APIs suitable for device driver use. */
4901void
4902pci_save_state(device_t dev)
4903{
4904	struct pci_devinfo *dinfo;
4905
4906	dinfo = device_get_ivars(dev);
4907	pci_cfg_save(dev, dinfo, 0);
4908}
4909
4910void
4911pci_restore_state(device_t dev)
4912{
4913	struct pci_devinfo *dinfo;
4914
4915	dinfo = device_get_ivars(dev);
4916	pci_cfg_restore(dev, dinfo);
4917}
4918
4919static uint16_t
4920pci_get_rid_method(device_t dev, device_t child)
4921{
4922
4923	return (PCIB_GET_RID(device_get_parent(dev), child));
4924}
4925