pci.c revision 267002
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/pci/pci.c 267002 2014-06-03 06:48:35Z mav $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#define	PCIR_IS_BIOS(cfg, reg)						\
74	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76
77static int		pci_has_quirk(uint32_t devid, int quirk);
78static pci_addr_t	pci_mapbase(uint64_t mapreg);
79static const char	*pci_maptype(uint64_t mapreg);
80static int		pci_mapsize(uint64_t testval);
81static int		pci_maprange(uint64_t mapreg);
82static pci_addr_t	pci_rombase(uint64_t mapreg);
83static int		pci_romsize(uint64_t testval);
84static void		pci_fixancient(pcicfgregs *cfg);
85static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86
87static int		pci_porten(device_t dev);
88static int		pci_memen(device_t dev);
89static void		pci_assign_interrupt(device_t bus, device_t dev,
90			    int force_route);
91static int		pci_add_map(device_t bus, device_t dev, int reg,
92			    struct resource_list *rl, int force, int prefetch);
93static int		pci_probe(device_t dev);
94static int		pci_attach(device_t dev);
95static void		pci_load_vendor_data(void);
96static int		pci_describe_parse_line(char **ptr, int *vendor,
97			    int *device, char **desc);
98static char		*pci_describe_device(device_t dev);
99static int		pci_modevent(module_t mod, int what, void *arg);
100static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101			    pcicfgregs *cfg);
102static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
103static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104			    int reg, uint32_t *data);
105#if 0
106static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107			    int reg, uint32_t data);
108#endif
109static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110static void		pci_disable_msi(device_t dev);
111static void		pci_enable_msi(device_t dev, uint64_t address,
112			    uint16_t data);
113static void		pci_enable_msix(device_t dev, u_int index,
114			    uint64_t address, uint32_t data);
115static void		pci_mask_msix(device_t dev, u_int index);
116static void		pci_unmask_msix(device_t dev, u_int index);
117static int		pci_msi_blacklisted(void);
118static int		pci_msix_blacklisted(void);
119static void		pci_resume_msi(device_t dev);
120static void		pci_resume_msix(device_t dev);
121static int		pci_remap_intr_method(device_t bus, device_t dev,
122			    u_int irq);
123
124static device_method_t pci_methods[] = {
125	/* Device interface */
126	DEVMETHOD(device_probe,		pci_probe),
127	DEVMETHOD(device_attach,	pci_attach),
128	DEVMETHOD(device_detach,	bus_generic_detach),
129	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
130	DEVMETHOD(device_suspend,	pci_suspend),
131	DEVMETHOD(device_resume,	pci_resume),
132
133	/* Bus interface */
134	DEVMETHOD(bus_print_child,	pci_print_child),
135	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
136	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
137	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
138	DEVMETHOD(bus_driver_added,	pci_driver_added),
139	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
140	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
141
142	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
143	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
144	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
145	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
146	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
147	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
148	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
149	DEVMETHOD(bus_release_resource,	pci_release_resource),
150	DEVMETHOD(bus_activate_resource, pci_activate_resource),
151	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
152	DEVMETHOD(bus_child_detached,	pci_child_detached),
153	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
154	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
155	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
156
157	/* PCI interface */
158	DEVMETHOD(pci_read_config,	pci_read_config_method),
159	DEVMETHOD(pci_write_config,	pci_write_config_method),
160	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
161	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
162	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
163	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
164	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
165	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
166	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
167	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
168	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
169	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
170	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
171	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
172	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
173	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
174	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
175	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
176	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
177	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
178
179	DEVMETHOD_END
180};
181
182DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
183
184static devclass_t pci_devclass;
185DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
186MODULE_VERSION(pci, 1);
187
188static char	*pci_vendordata;
189static size_t	pci_vendordata_size;
190
191struct pci_quirk {
192	uint32_t devid;	/* Vendor/device of the card */
193	int	type;
194#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
195#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
196#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
197#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
198#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
199	int	arg1;
200	int	arg2;
201};
202
203static const struct pci_quirk pci_quirks[] = {
204	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
205	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
206	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207	/* As does the Serverworks OSB4 (the SMBus mapping register) */
208	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
209
210	/*
211	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
212	 * or the CMIC-SL (AKA ServerWorks GC_LE).
213	 */
214	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216
217	/*
218	 * MSI doesn't work on earlier Intel chipsets including
219	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
220	 */
221	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228
229	/*
230	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
231	 * bridge.
232	 */
233	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234
235	/*
236	 * MSI-X allocation doesn't work properly for devices passed through
237	 * by VMware up to at least ESXi 5.1.
238	 */
239	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
240	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
241
242	/*
243	 * Some virtualization environments emulate an older chipset
244	 * but support MSI just fine.  QEMU uses the Intel 82440.
245	 */
246	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247
248	/*
249	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
250	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
251	 * It prevents us from attaching hpet(4) when the bit is unset.
252	 * Note this quirk only affects SB600 revision A13 and earlier.
253	 * For SB600 A21 and later, firmware must set the bit to hide it.
254	 * For SB700 and later, it is unused and hardcoded to zero.
255	 */
256	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
257
258	{ 0 }
259};
260
261/* map register information */
262#define	PCI_MAPMEM	0x01	/* memory map */
263#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
264#define	PCI_MAPPORT	0x04	/* port map */
265
266struct devlist pci_devq;
267uint32_t pci_generation;
268uint32_t pci_numdevs = 0;
269static int pcie_chipset, pcix_chipset;
270
271/* sysctl vars */
272SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
273
274static int pci_enable_io_modes = 1;
275TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
276SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
277    &pci_enable_io_modes, 1,
278    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
279enable these bits correctly.  We'd like to do this all the time, but there\n\
280are some peripherals that this causes problems with.");
281
282static int pci_do_realloc_bars = 0;
283TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
284SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
285    &pci_do_realloc_bars, 0,
286    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
287
288static int pci_do_power_nodriver = 0;
289TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
290SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
291    &pci_do_power_nodriver, 0,
292  "Place a function into D3 state when no driver attaches to it.  0 means\n\
293disable.  1 means conservatively place devices into D3 state.  2 means\n\
294agressively place devices into D3 state.  3 means put absolutely everything\n\
295in D3 state.");
296
297int pci_do_power_resume = 1;
298TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
299SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
300    &pci_do_power_resume, 1,
301  "Transition from D3 -> D0 on resume.");
302
303int pci_do_power_suspend = 1;
304TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
305SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
306    &pci_do_power_suspend, 1,
307  "Transition from D0 -> D3 on suspend.");
308
309static int pci_do_msi = 1;
310TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
311SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
312    "Enable support for MSI interrupts");
313
314static int pci_do_msix = 1;
315TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
316SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
317    "Enable support for MSI-X interrupts");
318
319static int pci_honor_msi_blacklist = 1;
320TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
321SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
322    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
323
324#if defined(__i386__) || defined(__amd64__)
325static int pci_usb_takeover = 1;
326#else
327static int pci_usb_takeover = 0;
328#endif
329TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
330SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
331    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
332Disable this if you depend on BIOS emulation of USB devices, that is\n\
333you use USB devices (like keyboard or mouse) but do not load USB drivers");
334
335static int pci_clear_bars;
336TUNABLE_INT("hw.pci.clear_bars", &pci_clear_bars);
337SYSCTL_INT(_hw_pci, OID_AUTO, clear_bars, CTLFLAG_RDTUN, &pci_clear_bars, 0,
338    "Ignore firmware-assigned resources for BARs.");
339
340static int
341pci_has_quirk(uint32_t devid, int quirk)
342{
343	const struct pci_quirk *q;
344
345	for (q = &pci_quirks[0]; q->devid; q++) {
346		if (q->devid == devid && q->type == quirk)
347			return (1);
348	}
349	return (0);
350}
351
352/* Find a device_t by bus/slot/function in domain 0 */
353
354device_t
355pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
356{
357
358	return (pci_find_dbsf(0, bus, slot, func));
359}
360
361/* Find a device_t by domain/bus/slot/function */
362
363device_t
364pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
365{
366	struct pci_devinfo *dinfo;
367
368	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
369		if ((dinfo->cfg.domain == domain) &&
370		    (dinfo->cfg.bus == bus) &&
371		    (dinfo->cfg.slot == slot) &&
372		    (dinfo->cfg.func == func)) {
373			return (dinfo->cfg.dev);
374		}
375	}
376
377	return (NULL);
378}
379
380/* Find a device_t by vendor/device ID */
381
382device_t
383pci_find_device(uint16_t vendor, uint16_t device)
384{
385	struct pci_devinfo *dinfo;
386
387	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
388		if ((dinfo->cfg.vendor == vendor) &&
389		    (dinfo->cfg.device == device)) {
390			return (dinfo->cfg.dev);
391		}
392	}
393
394	return (NULL);
395}
396
397device_t
398pci_find_class(uint8_t class, uint8_t subclass)
399{
400	struct pci_devinfo *dinfo;
401
402	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
403		if (dinfo->cfg.baseclass == class &&
404		    dinfo->cfg.subclass == subclass) {
405			return (dinfo->cfg.dev);
406		}
407	}
408
409	return (NULL);
410}
411
412static int
413pci_printf(pcicfgregs *cfg, const char *fmt, ...)
414{
415	va_list ap;
416	int retval;
417
418	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
419	    cfg->func);
420	va_start(ap, fmt);
421	retval += vprintf(fmt, ap);
422	va_end(ap);
423	return (retval);
424}
425
426/* return base address of memory or port map */
427
428static pci_addr_t
429pci_mapbase(uint64_t mapreg)
430{
431
432	if (PCI_BAR_MEM(mapreg))
433		return (mapreg & PCIM_BAR_MEM_BASE);
434	else
435		return (mapreg & PCIM_BAR_IO_BASE);
436}
437
438/* return map type of memory or port map */
439
440static const char *
441pci_maptype(uint64_t mapreg)
442{
443
444	if (PCI_BAR_IO(mapreg))
445		return ("I/O Port");
446	if (mapreg & PCIM_BAR_MEM_PREFETCH)
447		return ("Prefetchable Memory");
448	return ("Memory");
449}
450
451/* return log2 of map size decoded for memory or port map */
452
453static int
454pci_mapsize(uint64_t testval)
455{
456	int ln2size;
457
458	testval = pci_mapbase(testval);
459	ln2size = 0;
460	if (testval != 0) {
461		while ((testval & 1) == 0)
462		{
463			ln2size++;
464			testval >>= 1;
465		}
466	}
467	return (ln2size);
468}
469
470/* return base address of device ROM */
471
472static pci_addr_t
473pci_rombase(uint64_t mapreg)
474{
475
476	return (mapreg & PCIM_BIOS_ADDR_MASK);
477}
478
479/* return log2 of map size decided for device ROM */
480
481static int
482pci_romsize(uint64_t testval)
483{
484	int ln2size;
485
486	testval = pci_rombase(testval);
487	ln2size = 0;
488	if (testval != 0) {
489		while ((testval & 1) == 0)
490		{
491			ln2size++;
492			testval >>= 1;
493		}
494	}
495	return (ln2size);
496}
497
498/* return log2 of address range supported by map register */
499
500static int
501pci_maprange(uint64_t mapreg)
502{
503	int ln2range = 0;
504
505	if (PCI_BAR_IO(mapreg))
506		ln2range = 32;
507	else
508		switch (mapreg & PCIM_BAR_MEM_TYPE) {
509		case PCIM_BAR_MEM_32:
510			ln2range = 32;
511			break;
512		case PCIM_BAR_MEM_1MB:
513			ln2range = 20;
514			break;
515		case PCIM_BAR_MEM_64:
516			ln2range = 64;
517			break;
518		}
519	return (ln2range);
520}
521
522/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
523
524static void
525pci_fixancient(pcicfgregs *cfg)
526{
527	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
528		return;
529
530	/* PCI to PCI bridges use header type 1 */
531	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
532		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
533}
534
535/* extract header type specific config data */
536
537static void
538pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
539{
540#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
541	switch (cfg->hdrtype & PCIM_HDRTYPE) {
542	case PCIM_HDRTYPE_NORMAL:
543		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
544		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
545		cfg->nummaps	    = PCI_MAXMAPS_0;
546		break;
547	case PCIM_HDRTYPE_BRIDGE:
548		cfg->nummaps	    = PCI_MAXMAPS_1;
549		break;
550	case PCIM_HDRTYPE_CARDBUS:
551		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
552		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
553		cfg->nummaps	    = PCI_MAXMAPS_2;
554		break;
555	}
556#undef REG
557}
558
559/* read configuration header into pcicfgregs structure */
560struct pci_devinfo *
561pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
562{
563#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
564	pcicfgregs *cfg = NULL;
565	struct pci_devinfo *devlist_entry;
566	struct devlist *devlist_head;
567
568	devlist_head = &pci_devq;
569
570	devlist_entry = NULL;
571
572	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
573		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
574		if (devlist_entry == NULL)
575			return (NULL);
576
577		cfg = &devlist_entry->cfg;
578
579		cfg->domain		= d;
580		cfg->bus		= b;
581		cfg->slot		= s;
582		cfg->func		= f;
583		cfg->vendor		= REG(PCIR_VENDOR, 2);
584		cfg->device		= REG(PCIR_DEVICE, 2);
585		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
586		cfg->statreg		= REG(PCIR_STATUS, 2);
587		cfg->baseclass		= REG(PCIR_CLASS, 1);
588		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
589		cfg->progif		= REG(PCIR_PROGIF, 1);
590		cfg->revid		= REG(PCIR_REVID, 1);
591		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
592		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
593		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
594		cfg->intpin		= REG(PCIR_INTPIN, 1);
595		cfg->intline		= REG(PCIR_INTLINE, 1);
596
597		cfg->mingnt		= REG(PCIR_MINGNT, 1);
598		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
599
600		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
601		cfg->hdrtype		&= ~PCIM_MFDEV;
602		STAILQ_INIT(&cfg->maps);
603
604		pci_fixancient(cfg);
605		pci_hdrtypedata(pcib, b, s, f, cfg);
606
607		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
608			pci_read_cap(pcib, cfg);
609
610		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
611
612		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
613		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
614		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
615		devlist_entry->conf.pc_sel.pc_func = cfg->func;
616		devlist_entry->conf.pc_hdr = cfg->hdrtype;
617
618		devlist_entry->conf.pc_subvendor = cfg->subvendor;
619		devlist_entry->conf.pc_subdevice = cfg->subdevice;
620		devlist_entry->conf.pc_vendor = cfg->vendor;
621		devlist_entry->conf.pc_device = cfg->device;
622
623		devlist_entry->conf.pc_class = cfg->baseclass;
624		devlist_entry->conf.pc_subclass = cfg->subclass;
625		devlist_entry->conf.pc_progif = cfg->progif;
626		devlist_entry->conf.pc_revid = cfg->revid;
627
628		pci_numdevs++;
629		pci_generation++;
630	}
631	return (devlist_entry);
632#undef REG
633}
634
635static void
636pci_read_cap(device_t pcib, pcicfgregs *cfg)
637{
638#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
639#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
640#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
641	uint64_t addr;
642#endif
643	uint32_t val;
644	int	ptr, nextptr, ptrptr;
645
646	switch (cfg->hdrtype & PCIM_HDRTYPE) {
647	case PCIM_HDRTYPE_NORMAL:
648	case PCIM_HDRTYPE_BRIDGE:
649		ptrptr = PCIR_CAP_PTR;
650		break;
651	case PCIM_HDRTYPE_CARDBUS:
652		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
653		break;
654	default:
655		return;		/* no extended capabilities support */
656	}
657	nextptr = REG(ptrptr, 1);	/* sanity check? */
658
659	/*
660	 * Read capability entries.
661	 */
662	while (nextptr != 0) {
663		/* Sanity check */
664		if (nextptr > 255) {
665			printf("illegal PCI extended capability offset %d\n",
666			    nextptr);
667			return;
668		}
669		/* Find the next entry */
670		ptr = nextptr;
671		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
672
673		/* Process this entry */
674		switch (REG(ptr + PCICAP_ID, 1)) {
675		case PCIY_PMG:		/* PCI power management */
676			if (cfg->pp.pp_cap == 0) {
677				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
678				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
679				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
680				if ((nextptr - ptr) > PCIR_POWER_DATA)
681					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
682			}
683			break;
684		case PCIY_HT:		/* HyperTransport */
685			/* Determine HT-specific capability type. */
686			val = REG(ptr + PCIR_HT_COMMAND, 2);
687
688			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
689				cfg->ht.ht_slave = ptr;
690
691#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
692			switch (val & PCIM_HTCMD_CAP_MASK) {
693			case PCIM_HTCAP_MSI_MAPPING:
694				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
695					/* Sanity check the mapping window. */
696					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
697					    4);
698					addr <<= 32;
699					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
700					    4);
701					if (addr != MSI_INTEL_ADDR_BASE)
702						device_printf(pcib,
703	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
704						    cfg->domain, cfg->bus,
705						    cfg->slot, cfg->func,
706						    (long long)addr);
707				} else
708					addr = MSI_INTEL_ADDR_BASE;
709
710				cfg->ht.ht_msimap = ptr;
711				cfg->ht.ht_msictrl = val;
712				cfg->ht.ht_msiaddr = addr;
713				break;
714			}
715#endif
716			break;
717		case PCIY_MSI:		/* PCI MSI */
718			cfg->msi.msi_location = ptr;
719			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
720			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
721						     PCIM_MSICTRL_MMC_MASK)>>1);
722			break;
723		case PCIY_MSIX:		/* PCI MSI-X */
724			cfg->msix.msix_location = ptr;
725			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
726			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
727			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
728			val = REG(ptr + PCIR_MSIX_TABLE, 4);
729			cfg->msix.msix_table_bar = PCIR_BAR(val &
730			    PCIM_MSIX_BIR_MASK);
731			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
732			val = REG(ptr + PCIR_MSIX_PBA, 4);
733			cfg->msix.msix_pba_bar = PCIR_BAR(val &
734			    PCIM_MSIX_BIR_MASK);
735			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
736			break;
737		case PCIY_VPD:		/* PCI Vital Product Data */
738			cfg->vpd.vpd_reg = ptr;
739			break;
740		case PCIY_SUBVENDOR:
741			/* Should always be true. */
742			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
743			    PCIM_HDRTYPE_BRIDGE) {
744				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
745				cfg->subvendor = val & 0xffff;
746				cfg->subdevice = val >> 16;
747			}
748			break;
749		case PCIY_PCIX:		/* PCI-X */
750			/*
751			 * Assume we have a PCI-X chipset if we have
752			 * at least one PCI-PCI bridge with a PCI-X
753			 * capability.  Note that some systems with
754			 * PCI-express or HT chipsets might match on
755			 * this check as well.
756			 */
757			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
758			    PCIM_HDRTYPE_BRIDGE)
759				pcix_chipset = 1;
760			cfg->pcix.pcix_location = ptr;
761			break;
762		case PCIY_EXPRESS:	/* PCI-express */
763			/*
764			 * Assume we have a PCI-express chipset if we have
765			 * at least one PCI-express device.
766			 */
767			pcie_chipset = 1;
768			cfg->pcie.pcie_location = ptr;
769			val = REG(ptr + PCIER_FLAGS, 2);
770			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
771			break;
772		default:
773			break;
774		}
775	}
776
777#if defined(__powerpc__)
778	/*
779	 * Enable the MSI mapping window for all HyperTransport
780	 * slaves.  PCI-PCI bridges have their windows enabled via
781	 * PCIB_MAP_MSI().
782	 */
783	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
784	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
785		device_printf(pcib,
786	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
787		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
788		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
789		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
790		     2);
791	}
792#endif
793/* REG and WREG use carry through to next functions */
794}
795
796/*
797 * PCI Vital Product Data
798 */
799
800#define	PCI_VPD_TIMEOUT		1000000
801
802static int
803pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
804{
805	int count = PCI_VPD_TIMEOUT;
806
807	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
808
809	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
810
811	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
812		if (--count < 0)
813			return (ENXIO);
814		DELAY(1);	/* limit looping */
815	}
816	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
817
818	return (0);
819}
820
821#if 0
822static int
823pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
824{
825	int count = PCI_VPD_TIMEOUT;
826
827	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
828
829	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
830	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
831	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
832		if (--count < 0)
833			return (ENXIO);
834		DELAY(1);	/* limit looping */
835	}
836
837	return (0);
838}
839#endif
840
841#undef PCI_VPD_TIMEOUT
842
843struct vpd_readstate {
844	device_t	pcib;
845	pcicfgregs	*cfg;
846	uint32_t	val;
847	int		bytesinval;
848	int		off;
849	uint8_t		cksum;
850};
851
852static int
853vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
854{
855	uint32_t reg;
856	uint8_t byte;
857
858	if (vrs->bytesinval == 0) {
859		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
860			return (ENXIO);
861		vrs->val = le32toh(reg);
862		vrs->off += 4;
863		byte = vrs->val & 0xff;
864		vrs->bytesinval = 3;
865	} else {
866		vrs->val = vrs->val >> 8;
867		byte = vrs->val & 0xff;
868		vrs->bytesinval--;
869	}
870
871	vrs->cksum += byte;
872	*data = byte;
873	return (0);
874}
875
876static void
877pci_read_vpd(device_t pcib, pcicfgregs *cfg)
878{
879	struct vpd_readstate vrs;
880	int state;
881	int name;
882	int remain;
883	int i;
884	int alloc, off;		/* alloc/off for RO/W arrays */
885	int cksumvalid;
886	int dflen;
887	uint8_t byte;
888	uint8_t byte2;
889
890	/* init vpd reader */
891	vrs.bytesinval = 0;
892	vrs.off = 0;
893	vrs.pcib = pcib;
894	vrs.cfg = cfg;
895	vrs.cksum = 0;
896
897	state = 0;
898	name = remain = i = 0;	/* shut up stupid gcc */
899	alloc = off = 0;	/* shut up stupid gcc */
900	dflen = 0;		/* shut up stupid gcc */
901	cksumvalid = -1;
902	while (state >= 0) {
903		if (vpd_nextbyte(&vrs, &byte)) {
904			state = -2;
905			break;
906		}
907#if 0
908		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
909		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
910		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
911#endif
912		switch (state) {
913		case 0:		/* item name */
914			if (byte & 0x80) {
915				if (vpd_nextbyte(&vrs, &byte2)) {
916					state = -2;
917					break;
918				}
919				remain = byte2;
920				if (vpd_nextbyte(&vrs, &byte2)) {
921					state = -2;
922					break;
923				}
924				remain |= byte2 << 8;
925				if (remain > (0x7f*4 - vrs.off)) {
926					state = -1;
927					pci_printf(cfg,
928					    "invalid VPD data, remain %#x\n",
929					    remain);
930				}
931				name = byte & 0x7f;
932			} else {
933				remain = byte & 0x7;
934				name = (byte >> 3) & 0xf;
935			}
936			switch (name) {
937			case 0x2:	/* String */
938				cfg->vpd.vpd_ident = malloc(remain + 1,
939				    M_DEVBUF, M_WAITOK);
940				i = 0;
941				state = 1;
942				break;
943			case 0xf:	/* End */
944				state = -1;
945				break;
946			case 0x10:	/* VPD-R */
947				alloc = 8;
948				off = 0;
949				cfg->vpd.vpd_ros = malloc(alloc *
950				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
951				    M_WAITOK | M_ZERO);
952				state = 2;
953				break;
954			case 0x11:	/* VPD-W */
955				alloc = 8;
956				off = 0;
957				cfg->vpd.vpd_w = malloc(alloc *
958				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
959				    M_WAITOK | M_ZERO);
960				state = 5;
961				break;
962			default:	/* Invalid data, abort */
963				state = -1;
964				break;
965			}
966			break;
967
968		case 1:	/* Identifier String */
969			cfg->vpd.vpd_ident[i++] = byte;
970			remain--;
971			if (remain == 0)  {
972				cfg->vpd.vpd_ident[i] = '\0';
973				state = 0;
974			}
975			break;
976
977		case 2:	/* VPD-R Keyword Header */
978			if (off == alloc) {
979				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
980				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
981				    M_DEVBUF, M_WAITOK | M_ZERO);
982			}
983			cfg->vpd.vpd_ros[off].keyword[0] = byte;
984			if (vpd_nextbyte(&vrs, &byte2)) {
985				state = -2;
986				break;
987			}
988			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
989			if (vpd_nextbyte(&vrs, &byte2)) {
990				state = -2;
991				break;
992			}
993			cfg->vpd.vpd_ros[off].len = dflen = byte2;
994			if (dflen == 0 &&
995			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
996			    2) == 0) {
997				/*
998				 * if this happens, we can't trust the rest
999				 * of the VPD.
1000				 */
1001				pci_printf(cfg, "bad keyword length: %d\n",
1002				    dflen);
1003				cksumvalid = 0;
1004				state = -1;
1005				break;
1006			} else if (dflen == 0) {
1007				cfg->vpd.vpd_ros[off].value = malloc(1 *
1008				    sizeof(*cfg->vpd.vpd_ros[off].value),
1009				    M_DEVBUF, M_WAITOK);
1010				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1011			} else
1012				cfg->vpd.vpd_ros[off].value = malloc(
1013				    (dflen + 1) *
1014				    sizeof(*cfg->vpd.vpd_ros[off].value),
1015				    M_DEVBUF, M_WAITOK);
1016			remain -= 3;
1017			i = 0;
1018			/* keep in sync w/ state 3's transistions */
1019			if (dflen == 0 && remain == 0)
1020				state = 0;
1021			else if (dflen == 0)
1022				state = 2;
1023			else
1024				state = 3;
1025			break;
1026
1027		case 3:	/* VPD-R Keyword Value */
1028			cfg->vpd.vpd_ros[off].value[i++] = byte;
1029			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1030			    "RV", 2) == 0 && cksumvalid == -1) {
1031				if (vrs.cksum == 0)
1032					cksumvalid = 1;
1033				else {
1034					if (bootverbose)
1035						pci_printf(cfg,
1036					    "bad VPD cksum, remain %hhu\n",
1037						    vrs.cksum);
1038					cksumvalid = 0;
1039					state = -1;
1040					break;
1041				}
1042			}
1043			dflen--;
1044			remain--;
1045			/* keep in sync w/ state 2's transistions */
1046			if (dflen == 0)
1047				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1048			if (dflen == 0 && remain == 0) {
1049				cfg->vpd.vpd_rocnt = off;
1050				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1051				    off * sizeof(*cfg->vpd.vpd_ros),
1052				    M_DEVBUF, M_WAITOK | M_ZERO);
1053				state = 0;
1054			} else if (dflen == 0)
1055				state = 2;
1056			break;
1057
1058		case 4:
1059			remain--;
1060			if (remain == 0)
1061				state = 0;
1062			break;
1063
1064		case 5:	/* VPD-W Keyword Header */
1065			if (off == alloc) {
1066				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1067				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1068				    M_DEVBUF, M_WAITOK | M_ZERO);
1069			}
1070			cfg->vpd.vpd_w[off].keyword[0] = byte;
1071			if (vpd_nextbyte(&vrs, &byte2)) {
1072				state = -2;
1073				break;
1074			}
1075			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1076			if (vpd_nextbyte(&vrs, &byte2)) {
1077				state = -2;
1078				break;
1079			}
1080			cfg->vpd.vpd_w[off].len = dflen = byte2;
1081			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1082			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1083			    sizeof(*cfg->vpd.vpd_w[off].value),
1084			    M_DEVBUF, M_WAITOK);
1085			remain -= 3;
1086			i = 0;
1087			/* keep in sync w/ state 6's transistions */
1088			if (dflen == 0 && remain == 0)
1089				state = 0;
1090			else if (dflen == 0)
1091				state = 5;
1092			else
1093				state = 6;
1094			break;
1095
1096		case 6:	/* VPD-W Keyword Value */
1097			cfg->vpd.vpd_w[off].value[i++] = byte;
1098			dflen--;
1099			remain--;
1100			/* keep in sync w/ state 5's transistions */
1101			if (dflen == 0)
1102				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1103			if (dflen == 0 && remain == 0) {
1104				cfg->vpd.vpd_wcnt = off;
1105				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1106				    off * sizeof(*cfg->vpd.vpd_w),
1107				    M_DEVBUF, M_WAITOK | M_ZERO);
1108				state = 0;
1109			} else if (dflen == 0)
1110				state = 5;
1111			break;
1112
1113		default:
1114			pci_printf(cfg, "invalid state: %d\n", state);
1115			state = -1;
1116			break;
1117		}
1118	}
1119
1120	if (cksumvalid == 0 || state < -1) {
1121		/* read-only data bad, clean up */
1122		if (cfg->vpd.vpd_ros != NULL) {
1123			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1124				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1125			free(cfg->vpd.vpd_ros, M_DEVBUF);
1126			cfg->vpd.vpd_ros = NULL;
1127		}
1128	}
1129	if (state < -1) {
1130		/* I/O error, clean up */
1131		pci_printf(cfg, "failed to read VPD data.\n");
1132		if (cfg->vpd.vpd_ident != NULL) {
1133			free(cfg->vpd.vpd_ident, M_DEVBUF);
1134			cfg->vpd.vpd_ident = NULL;
1135		}
1136		if (cfg->vpd.vpd_w != NULL) {
1137			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1138				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1139			free(cfg->vpd.vpd_w, M_DEVBUF);
1140			cfg->vpd.vpd_w = NULL;
1141		}
1142	}
1143	cfg->vpd.vpd_cached = 1;
1144#undef REG
1145#undef WREG
1146}
1147
1148int
1149pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1150{
1151	struct pci_devinfo *dinfo = device_get_ivars(child);
1152	pcicfgregs *cfg = &dinfo->cfg;
1153
1154	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1155		pci_read_vpd(device_get_parent(dev), cfg);
1156
1157	*identptr = cfg->vpd.vpd_ident;
1158
1159	if (*identptr == NULL)
1160		return (ENXIO);
1161
1162	return (0);
1163}
1164
1165int
1166pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1167	const char **vptr)
1168{
1169	struct pci_devinfo *dinfo = device_get_ivars(child);
1170	pcicfgregs *cfg = &dinfo->cfg;
1171	int i;
1172
1173	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1174		pci_read_vpd(device_get_parent(dev), cfg);
1175
1176	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1177		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1178		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1179			*vptr = cfg->vpd.vpd_ros[i].value;
1180			return (0);
1181		}
1182
1183	*vptr = NULL;
1184	return (ENXIO);
1185}
1186
1187struct pcicfg_vpd *
1188pci_fetch_vpd_list(device_t dev)
1189{
1190	struct pci_devinfo *dinfo = device_get_ivars(dev);
1191	pcicfgregs *cfg = &dinfo->cfg;
1192
1193	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1194		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1195	return (&cfg->vpd);
1196}
1197
1198/*
1199 * Find the requested HyperTransport capability and return the offset
1200 * in configuration space via the pointer provided.  The function
1201 * returns 0 on success and an error code otherwise.
1202 */
1203int
1204pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1205{
1206	int ptr, error;
1207	uint16_t val;
1208
1209	error = pci_find_cap(child, PCIY_HT, &ptr);
1210	if (error)
1211		return (error);
1212
1213	/*
1214	 * Traverse the capabilities list checking each HT capability
1215	 * to see if it matches the requested HT capability.
1216	 */
1217	while (ptr != 0) {
1218		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1219		if (capability == PCIM_HTCAP_SLAVE ||
1220		    capability == PCIM_HTCAP_HOST)
1221			val &= 0xe000;
1222		else
1223			val &= PCIM_HTCMD_CAP_MASK;
1224		if (val == capability) {
1225			if (capreg != NULL)
1226				*capreg = ptr;
1227			return (0);
1228		}
1229
1230		/* Skip to the next HT capability. */
1231		while (ptr != 0) {
1232			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1233			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1234			    PCIY_HT)
1235				break;
1236		}
1237	}
1238	return (ENOENT);
1239}
1240
1241/*
1242 * Find the requested capability and return the offset in
1243 * configuration space via the pointer provided.  The function returns
1244 * 0 on success and an error code otherwise.
1245 */
1246int
1247pci_find_cap_method(device_t dev, device_t child, int capability,
1248    int *capreg)
1249{
1250	struct pci_devinfo *dinfo = device_get_ivars(child);
1251	pcicfgregs *cfg = &dinfo->cfg;
1252	u_int32_t status;
1253	u_int8_t ptr;
1254
1255	/*
1256	 * Check the CAP_LIST bit of the PCI status register first.
1257	 */
1258	status = pci_read_config(child, PCIR_STATUS, 2);
1259	if (!(status & PCIM_STATUS_CAPPRESENT))
1260		return (ENXIO);
1261
1262	/*
1263	 * Determine the start pointer of the capabilities list.
1264	 */
1265	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1266	case PCIM_HDRTYPE_NORMAL:
1267	case PCIM_HDRTYPE_BRIDGE:
1268		ptr = PCIR_CAP_PTR;
1269		break;
1270	case PCIM_HDRTYPE_CARDBUS:
1271		ptr = PCIR_CAP_PTR_2;
1272		break;
1273	default:
1274		/* XXX: panic? */
1275		return (ENXIO);		/* no extended capabilities support */
1276	}
1277	ptr = pci_read_config(child, ptr, 1);
1278
1279	/*
1280	 * Traverse the capabilities list.
1281	 */
1282	while (ptr != 0) {
1283		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1284			if (capreg != NULL)
1285				*capreg = ptr;
1286			return (0);
1287		}
1288		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1289	}
1290
1291	return (ENOENT);
1292}
1293
1294/*
1295 * Find the requested extended capability and return the offset in
1296 * configuration space via the pointer provided.  The function returns
1297 * 0 on success and an error code otherwise.
1298 */
1299int
1300pci_find_extcap_method(device_t dev, device_t child, int capability,
1301    int *capreg)
1302{
1303	struct pci_devinfo *dinfo = device_get_ivars(child);
1304	pcicfgregs *cfg = &dinfo->cfg;
1305	uint32_t ecap;
1306	uint16_t ptr;
1307
1308	/* Only supported for PCI-express devices. */
1309	if (cfg->pcie.pcie_location == 0)
1310		return (ENXIO);
1311
1312	ptr = PCIR_EXTCAP;
1313	ecap = pci_read_config(child, ptr, 4);
1314	if (ecap == 0xffffffff || ecap == 0)
1315		return (ENOENT);
1316	for (;;) {
1317		if (PCI_EXTCAP_ID(ecap) == capability) {
1318			if (capreg != NULL)
1319				*capreg = ptr;
1320			return (0);
1321		}
1322		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1323		if (ptr == 0)
1324			break;
1325		ecap = pci_read_config(child, ptr, 4);
1326	}
1327
1328	return (ENOENT);
1329}
1330
1331/*
1332 * Support for MSI-X message interrupts.
1333 */
1334void
1335pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1336{
1337	struct pci_devinfo *dinfo = device_get_ivars(dev);
1338	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1339	uint32_t offset;
1340
1341	KASSERT(msix->msix_table_len > index, ("bogus index"));
1342	offset = msix->msix_table_offset + index * 16;
1343	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1344	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1345	bus_write_4(msix->msix_table_res, offset + 8, data);
1346
1347	/* Enable MSI -> HT mapping. */
1348	pci_ht_map_msi(dev, address);
1349}
1350
1351void
1352pci_mask_msix(device_t dev, u_int index)
1353{
1354	struct pci_devinfo *dinfo = device_get_ivars(dev);
1355	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1356	uint32_t offset, val;
1357
1358	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1359	offset = msix->msix_table_offset + index * 16 + 12;
1360	val = bus_read_4(msix->msix_table_res, offset);
1361	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1362		val |= PCIM_MSIX_VCTRL_MASK;
1363		bus_write_4(msix->msix_table_res, offset, val);
1364	}
1365}
1366
1367void
1368pci_unmask_msix(device_t dev, u_int index)
1369{
1370	struct pci_devinfo *dinfo = device_get_ivars(dev);
1371	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1372	uint32_t offset, val;
1373
1374	KASSERT(msix->msix_table_len > index, ("bogus index"));
1375	offset = msix->msix_table_offset + index * 16 + 12;
1376	val = bus_read_4(msix->msix_table_res, offset);
1377	if (val & PCIM_MSIX_VCTRL_MASK) {
1378		val &= ~PCIM_MSIX_VCTRL_MASK;
1379		bus_write_4(msix->msix_table_res, offset, val);
1380	}
1381}
1382
1383int
1384pci_pending_msix(device_t dev, u_int index)
1385{
1386	struct pci_devinfo *dinfo = device_get_ivars(dev);
1387	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1388	uint32_t offset, bit;
1389
1390	KASSERT(msix->msix_table_len > index, ("bogus index"));
1391	offset = msix->msix_pba_offset + (index / 32) * 4;
1392	bit = 1 << index % 32;
1393	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1394}
1395
1396/*
1397 * Restore MSI-X registers and table during resume.  If MSI-X is
1398 * enabled then walk the virtual table to restore the actual MSI-X
1399 * table.
1400 */
1401static void
1402pci_resume_msix(device_t dev)
1403{
1404	struct pci_devinfo *dinfo = device_get_ivars(dev);
1405	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1406	struct msix_table_entry *mte;
1407	struct msix_vector *mv;
1408	int i;
1409
1410	if (msix->msix_alloc > 0) {
1411		/* First, mask all vectors. */
1412		for (i = 0; i < msix->msix_msgnum; i++)
1413			pci_mask_msix(dev, i);
1414
1415		/* Second, program any messages with at least one handler. */
1416		for (i = 0; i < msix->msix_table_len; i++) {
1417			mte = &msix->msix_table[i];
1418			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1419				continue;
1420			mv = &msix->msix_vectors[mte->mte_vector - 1];
1421			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1422			pci_unmask_msix(dev, i);
1423		}
1424	}
1425	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1426	    msix->msix_ctrl, 2);
1427}
1428
1429/*
1430 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1431 * returned in *count.  After this function returns, each message will be
1432 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1433 */
1434int
1435pci_alloc_msix_method(device_t dev, device_t child, int *count)
1436{
1437	struct pci_devinfo *dinfo = device_get_ivars(child);
1438	pcicfgregs *cfg = &dinfo->cfg;
1439	struct resource_list_entry *rle;
1440	int actual, error, i, irq, max;
1441
1442	/* Don't let count == 0 get us into trouble. */
1443	if (*count == 0)
1444		return (EINVAL);
1445
1446	/* If rid 0 is allocated, then fail. */
1447	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1448	if (rle != NULL && rle->res != NULL)
1449		return (ENXIO);
1450
1451	/* Already have allocated messages? */
1452	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1453		return (ENXIO);
1454
1455	/* If MSI-X is blacklisted for this system, fail. */
1456	if (pci_msix_blacklisted())
1457		return (ENXIO);
1458
1459	/* MSI-X capability present? */
1460	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1461		return (ENODEV);
1462
1463	/* Make sure the appropriate BARs are mapped. */
1464	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1465	    cfg->msix.msix_table_bar);
1466	if (rle == NULL || rle->res == NULL ||
1467	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1468		return (ENXIO);
1469	cfg->msix.msix_table_res = rle->res;
1470	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1471		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1472		    cfg->msix.msix_pba_bar);
1473		if (rle == NULL || rle->res == NULL ||
1474		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1475			return (ENXIO);
1476	}
1477	cfg->msix.msix_pba_res = rle->res;
1478
1479	if (bootverbose)
1480		device_printf(child,
1481		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1482		    *count, cfg->msix.msix_msgnum);
1483	max = min(*count, cfg->msix.msix_msgnum);
1484	for (i = 0; i < max; i++) {
1485		/* Allocate a message. */
1486		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1487		if (error) {
1488			if (i == 0)
1489				return (error);
1490			break;
1491		}
1492		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1493		    irq, 1);
1494	}
1495	actual = i;
1496
1497	if (bootverbose) {
1498		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1499		if (actual == 1)
1500			device_printf(child, "using IRQ %lu for MSI-X\n",
1501			    rle->start);
1502		else {
1503			int run;
1504
1505			/*
1506			 * Be fancy and try to print contiguous runs of
1507			 * IRQ values as ranges.  'irq' is the previous IRQ.
1508			 * 'run' is true if we are in a range.
1509			 */
1510			device_printf(child, "using IRQs %lu", rle->start);
1511			irq = rle->start;
1512			run = 0;
1513			for (i = 1; i < actual; i++) {
1514				rle = resource_list_find(&dinfo->resources,
1515				    SYS_RES_IRQ, i + 1);
1516
1517				/* Still in a run? */
1518				if (rle->start == irq + 1) {
1519					run = 1;
1520					irq++;
1521					continue;
1522				}
1523
1524				/* Finish previous range. */
1525				if (run) {
1526					printf("-%d", irq);
1527					run = 0;
1528				}
1529
1530				/* Start new range. */
1531				printf(",%lu", rle->start);
1532				irq = rle->start;
1533			}
1534
1535			/* Unfinished range? */
1536			if (run)
1537				printf("-%d", irq);
1538			printf(" for MSI-X\n");
1539		}
1540	}
1541
1542	/* Mask all vectors. */
1543	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1544		pci_mask_msix(child, i);
1545
1546	/* Allocate and initialize vector data and virtual table. */
1547	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1548	    M_DEVBUF, M_WAITOK | M_ZERO);
1549	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1550	    M_DEVBUF, M_WAITOK | M_ZERO);
1551	for (i = 0; i < actual; i++) {
1552		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1553		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1554		cfg->msix.msix_table[i].mte_vector = i + 1;
1555	}
1556
1557	/* Update control register to enable MSI-X. */
1558	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1559	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1560	    cfg->msix.msix_ctrl, 2);
1561
1562	/* Update counts of alloc'd messages. */
1563	cfg->msix.msix_alloc = actual;
1564	cfg->msix.msix_table_len = actual;
1565	*count = actual;
1566	return (0);
1567}
1568
1569/*
1570 * By default, pci_alloc_msix() will assign the allocated IRQ
1571 * resources consecutively to the first N messages in the MSI-X table.
1572 * However, device drivers may want to use different layouts if they
1573 * either receive fewer messages than they asked for, or they wish to
1574 * populate the MSI-X table sparsely.  This method allows the driver
1575 * to specify what layout it wants.  It must be called after a
1576 * successful pci_alloc_msix() but before any of the associated
1577 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1578 *
1579 * The 'vectors' array contains 'count' message vectors.  The array
1580 * maps directly to the MSI-X table in that index 0 in the array
1581 * specifies the vector for the first message in the MSI-X table, etc.
1582 * The vector value in each array index can either be 0 to indicate
1583 * that no vector should be assigned to a message slot, or it can be a
1584 * number from 1 to N (where N is the count returned from a
1585 * succcessful call to pci_alloc_msix()) to indicate which message
1586 * vector (IRQ) to be used for the corresponding message.
1587 *
1588 * On successful return, each message with a non-zero vector will have
1589 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1590 * 1.  Additionally, if any of the IRQs allocated via the previous
1591 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1592 * will be freed back to the system automatically.
1593 *
1594 * For example, suppose a driver has a MSI-X table with 6 messages and
1595 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1596 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1597 * C.  After the call to pci_alloc_msix(), the device will be setup to
1598 * have an MSI-X table of ABC--- (where - means no vector assigned).
1599 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1600 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1601 * be freed back to the system.  This device will also have valid
1602 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1603 *
1604 * In any case, the SYS_RES_IRQ rid X will always map to the message
1605 * at MSI-X table index X - 1 and will only be valid if a vector is
1606 * assigned to that table entry.
1607 */
1608int
1609pci_remap_msix_method(device_t dev, device_t child, int count,
1610    const u_int *vectors)
1611{
1612	struct pci_devinfo *dinfo = device_get_ivars(child);
1613	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1614	struct resource_list_entry *rle;
1615	int i, irq, j, *used;
1616
1617	/*
1618	 * Have to have at least one message in the table but the
1619	 * table can't be bigger than the actual MSI-X table in the
1620	 * device.
1621	 */
1622	if (count == 0 || count > msix->msix_msgnum)
1623		return (EINVAL);
1624
1625	/* Sanity check the vectors. */
1626	for (i = 0; i < count; i++)
1627		if (vectors[i] > msix->msix_alloc)
1628			return (EINVAL);
1629
1630	/*
1631	 * Make sure there aren't any holes in the vectors to be used.
1632	 * It's a big pain to support it, and it doesn't really make
1633	 * sense anyway.  Also, at least one vector must be used.
1634	 */
1635	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1636	    M_ZERO);
1637	for (i = 0; i < count; i++)
1638		if (vectors[i] != 0)
1639			used[vectors[i] - 1] = 1;
1640	for (i = 0; i < msix->msix_alloc - 1; i++)
1641		if (used[i] == 0 && used[i + 1] == 1) {
1642			free(used, M_DEVBUF);
1643			return (EINVAL);
1644		}
1645	if (used[0] != 1) {
1646		free(used, M_DEVBUF);
1647		return (EINVAL);
1648	}
1649
1650	/* Make sure none of the resources are allocated. */
1651	for (i = 0; i < msix->msix_table_len; i++) {
1652		if (msix->msix_table[i].mte_vector == 0)
1653			continue;
1654		if (msix->msix_table[i].mte_handlers > 0)
1655			return (EBUSY);
1656		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1657		KASSERT(rle != NULL, ("missing resource"));
1658		if (rle->res != NULL)
1659			return (EBUSY);
1660	}
1661
1662	/* Free the existing resource list entries. */
1663	for (i = 0; i < msix->msix_table_len; i++) {
1664		if (msix->msix_table[i].mte_vector == 0)
1665			continue;
1666		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1667	}
1668
1669	/*
1670	 * Build the new virtual table keeping track of which vectors are
1671	 * used.
1672	 */
1673	free(msix->msix_table, M_DEVBUF);
1674	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1675	    M_DEVBUF, M_WAITOK | M_ZERO);
1676	for (i = 0; i < count; i++)
1677		msix->msix_table[i].mte_vector = vectors[i];
1678	msix->msix_table_len = count;
1679
1680	/* Free any unused IRQs and resize the vectors array if necessary. */
1681	j = msix->msix_alloc - 1;
1682	if (used[j] == 0) {
1683		struct msix_vector *vec;
1684
1685		while (used[j] == 0) {
1686			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1687			    msix->msix_vectors[j].mv_irq);
1688			j--;
1689		}
1690		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1691		    M_WAITOK);
1692		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1693		    (j + 1));
1694		free(msix->msix_vectors, M_DEVBUF);
1695		msix->msix_vectors = vec;
1696		msix->msix_alloc = j + 1;
1697	}
1698	free(used, M_DEVBUF);
1699
1700	/* Map the IRQs onto the rids. */
1701	for (i = 0; i < count; i++) {
1702		if (vectors[i] == 0)
1703			continue;
1704		irq = msix->msix_vectors[vectors[i]].mv_irq;
1705		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1706		    irq, 1);
1707	}
1708
1709	if (bootverbose) {
1710		device_printf(child, "Remapped MSI-X IRQs as: ");
1711		for (i = 0; i < count; i++) {
1712			if (i != 0)
1713				printf(", ");
1714			if (vectors[i] == 0)
1715				printf("---");
1716			else
1717				printf("%d",
1718				    msix->msix_vectors[vectors[i]].mv_irq);
1719		}
1720		printf("\n");
1721	}
1722
1723	return (0);
1724}
1725
1726static int
1727pci_release_msix(device_t dev, device_t child)
1728{
1729	struct pci_devinfo *dinfo = device_get_ivars(child);
1730	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1731	struct resource_list_entry *rle;
1732	int i;
1733
1734	/* Do we have any messages to release? */
1735	if (msix->msix_alloc == 0)
1736		return (ENODEV);
1737
1738	/* Make sure none of the resources are allocated. */
1739	for (i = 0; i < msix->msix_table_len; i++) {
1740		if (msix->msix_table[i].mte_vector == 0)
1741			continue;
1742		if (msix->msix_table[i].mte_handlers > 0)
1743			return (EBUSY);
1744		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1745		KASSERT(rle != NULL, ("missing resource"));
1746		if (rle->res != NULL)
1747			return (EBUSY);
1748	}
1749
1750	/* Update control register to disable MSI-X. */
1751	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1752	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1753	    msix->msix_ctrl, 2);
1754
1755	/* Free the resource list entries. */
1756	for (i = 0; i < msix->msix_table_len; i++) {
1757		if (msix->msix_table[i].mte_vector == 0)
1758			continue;
1759		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1760	}
1761	free(msix->msix_table, M_DEVBUF);
1762	msix->msix_table_len = 0;
1763
1764	/* Release the IRQs. */
1765	for (i = 0; i < msix->msix_alloc; i++)
1766		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1767		    msix->msix_vectors[i].mv_irq);
1768	free(msix->msix_vectors, M_DEVBUF);
1769	msix->msix_alloc = 0;
1770	return (0);
1771}
1772
1773/*
1774 * Return the max supported MSI-X messages this device supports.
1775 * Basically, assuming the MD code can alloc messages, this function
1776 * should return the maximum value that pci_alloc_msix() can return.
1777 * Thus, it is subject to the tunables, etc.
1778 */
1779int
1780pci_msix_count_method(device_t dev, device_t child)
1781{
1782	struct pci_devinfo *dinfo = device_get_ivars(child);
1783	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1784
1785	if (pci_do_msix && msix->msix_location != 0)
1786		return (msix->msix_msgnum);
1787	return (0);
1788}
1789
1790/*
1791 * HyperTransport MSI mapping control
1792 */
1793void
1794pci_ht_map_msi(device_t dev, uint64_t addr)
1795{
1796	struct pci_devinfo *dinfo = device_get_ivars(dev);
1797	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1798
1799	if (!ht->ht_msimap)
1800		return;
1801
1802	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1803	    ht->ht_msiaddr >> 20 == addr >> 20) {
1804		/* Enable MSI -> HT mapping. */
1805		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1806		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1807		    ht->ht_msictrl, 2);
1808	}
1809
1810	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1811		/* Disable MSI -> HT mapping. */
1812		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1813		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1814		    ht->ht_msictrl, 2);
1815	}
1816}
1817
1818int
1819pci_get_max_read_req(device_t dev)
1820{
1821	struct pci_devinfo *dinfo = device_get_ivars(dev);
1822	int cap;
1823	uint16_t val;
1824
1825	cap = dinfo->cfg.pcie.pcie_location;
1826	if (cap == 0)
1827		return (0);
1828	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1829	val &= PCIEM_CTL_MAX_READ_REQUEST;
1830	val >>= 12;
1831	return (1 << (val + 7));
1832}
1833
1834int
1835pci_set_max_read_req(device_t dev, int size)
1836{
1837	struct pci_devinfo *dinfo = device_get_ivars(dev);
1838	int cap;
1839	uint16_t val;
1840
1841	cap = dinfo->cfg.pcie.pcie_location;
1842	if (cap == 0)
1843		return (0);
1844	if (size < 128)
1845		size = 128;
1846	if (size > 4096)
1847		size = 4096;
1848	size = (1 << (fls(size) - 1));
1849	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1850	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1851	val |= (fls(size) - 8) << 12;
1852	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1853	return (size);
1854}
1855
1856/*
1857 * Support for MSI message signalled interrupts.
1858 */
1859void
1860pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1861{
1862	struct pci_devinfo *dinfo = device_get_ivars(dev);
1863	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1864
1865	/* Write data and address values. */
1866	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1867	    address & 0xffffffff, 4);
1868	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1869		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1870		    address >> 32, 4);
1871		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1872		    data, 2);
1873	} else
1874		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1875		    2);
1876
1877	/* Enable MSI in the control register. */
1878	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1879	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1880	    2);
1881
1882	/* Enable MSI -> HT mapping. */
1883	pci_ht_map_msi(dev, address);
1884}
1885
1886void
1887pci_disable_msi(device_t dev)
1888{
1889	struct pci_devinfo *dinfo = device_get_ivars(dev);
1890	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1891
1892	/* Disable MSI -> HT mapping. */
1893	pci_ht_map_msi(dev, 0);
1894
1895	/* Disable MSI in the control register. */
1896	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1897	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1898	    2);
1899}
1900
1901/*
1902 * Restore MSI registers during resume.  If MSI is enabled then
1903 * restore the data and address registers in addition to the control
1904 * register.
1905 */
1906static void
1907pci_resume_msi(device_t dev)
1908{
1909	struct pci_devinfo *dinfo = device_get_ivars(dev);
1910	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1911	uint64_t address;
1912	uint16_t data;
1913
1914	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1915		address = msi->msi_addr;
1916		data = msi->msi_data;
1917		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1918		    address & 0xffffffff, 4);
1919		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1920			pci_write_config(dev, msi->msi_location +
1921			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1922			pci_write_config(dev, msi->msi_location +
1923			    PCIR_MSI_DATA_64BIT, data, 2);
1924		} else
1925			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1926			    data, 2);
1927	}
1928	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1929	    2);
1930}
1931
1932static int
1933pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1934{
1935	struct pci_devinfo *dinfo = device_get_ivars(dev);
1936	pcicfgregs *cfg = &dinfo->cfg;
1937	struct resource_list_entry *rle;
1938	struct msix_table_entry *mte;
1939	struct msix_vector *mv;
1940	uint64_t addr;
1941	uint32_t data;
1942	int error, i, j;
1943
1944	/*
1945	 * Handle MSI first.  We try to find this IRQ among our list
1946	 * of MSI IRQs.  If we find it, we request updated address and
1947	 * data registers and apply the results.
1948	 */
1949	if (cfg->msi.msi_alloc > 0) {
1950
1951		/* If we don't have any active handlers, nothing to do. */
1952		if (cfg->msi.msi_handlers == 0)
1953			return (0);
1954		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1955			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1956			    i + 1);
1957			if (rle->start == irq) {
1958				error = PCIB_MAP_MSI(device_get_parent(bus),
1959				    dev, irq, &addr, &data);
1960				if (error)
1961					return (error);
1962				pci_disable_msi(dev);
1963				dinfo->cfg.msi.msi_addr = addr;
1964				dinfo->cfg.msi.msi_data = data;
1965				pci_enable_msi(dev, addr, data);
1966				return (0);
1967			}
1968		}
1969		return (ENOENT);
1970	}
1971
1972	/*
1973	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1974	 * we request the updated mapping info.  If that works, we go
1975	 * through all the slots that use this IRQ and update them.
1976	 */
1977	if (cfg->msix.msix_alloc > 0) {
1978		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1979			mv = &cfg->msix.msix_vectors[i];
1980			if (mv->mv_irq == irq) {
1981				error = PCIB_MAP_MSI(device_get_parent(bus),
1982				    dev, irq, &addr, &data);
1983				if (error)
1984					return (error);
1985				mv->mv_address = addr;
1986				mv->mv_data = data;
1987				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1988					mte = &cfg->msix.msix_table[j];
1989					if (mte->mte_vector != i + 1)
1990						continue;
1991					if (mte->mte_handlers == 0)
1992						continue;
1993					pci_mask_msix(dev, j);
1994					pci_enable_msix(dev, j, addr, data);
1995					pci_unmask_msix(dev, j);
1996				}
1997			}
1998		}
1999		return (ENOENT);
2000	}
2001
2002	return (ENOENT);
2003}
2004
2005/*
2006 * Returns true if the specified device is blacklisted because MSI
2007 * doesn't work.
2008 */
2009int
2010pci_msi_device_blacklisted(device_t dev)
2011{
2012
2013	if (!pci_honor_msi_blacklist)
2014		return (0);
2015
2016	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2017}
2018
2019/*
2020 * Determine if MSI is blacklisted globally on this system.  Currently,
2021 * we just check for blacklisted chipsets as represented by the
2022 * host-PCI bridge at device 0:0:0.  In the future, it may become
2023 * necessary to check other system attributes, such as the kenv values
2024 * that give the motherboard manufacturer and model number.
2025 */
2026static int
2027pci_msi_blacklisted(void)
2028{
2029	device_t dev;
2030
2031	if (!pci_honor_msi_blacklist)
2032		return (0);
2033
2034	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2035	if (!(pcie_chipset || pcix_chipset)) {
2036		if (vm_guest != VM_GUEST_NO) {
2037			/*
2038			 * Whitelist older chipsets in virtual
2039			 * machines known to support MSI.
2040			 */
2041			dev = pci_find_bsf(0, 0, 0);
2042			if (dev != NULL)
2043				return (!pci_has_quirk(pci_get_devid(dev),
2044					PCI_QUIRK_ENABLE_MSI_VM));
2045		}
2046		return (1);
2047	}
2048
2049	dev = pci_find_bsf(0, 0, 0);
2050	if (dev != NULL)
2051		return (pci_msi_device_blacklisted(dev));
2052	return (0);
2053}
2054
2055/*
2056 * Returns true if the specified device is blacklisted because MSI-X
2057 * doesn't work.  Note that this assumes that if MSI doesn't work,
2058 * MSI-X doesn't either.
2059 */
2060int
2061pci_msix_device_blacklisted(device_t dev)
2062{
2063
2064	if (!pci_honor_msi_blacklist)
2065		return (0);
2066
2067	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2068		return (1);
2069
2070	return (pci_msi_device_blacklisted(dev));
2071}
2072
2073/*
2074 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2075 * is blacklisted, assume that MSI-X is as well.  Check for additional
2076 * chipsets where MSI works but MSI-X does not.
2077 */
2078static int
2079pci_msix_blacklisted(void)
2080{
2081	device_t dev;
2082
2083	if (!pci_honor_msi_blacklist)
2084		return (0);
2085
2086	dev = pci_find_bsf(0, 0, 0);
2087	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2088	    PCI_QUIRK_DISABLE_MSIX))
2089		return (1);
2090
2091	return (pci_msi_blacklisted());
2092}
2093
2094/*
2095 * Attempt to allocate *count MSI messages.  The actual number allocated is
2096 * returned in *count.  After this function returns, each message will be
2097 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2098 */
2099int
2100pci_alloc_msi_method(device_t dev, device_t child, int *count)
2101{
2102	struct pci_devinfo *dinfo = device_get_ivars(child);
2103	pcicfgregs *cfg = &dinfo->cfg;
2104	struct resource_list_entry *rle;
2105	int actual, error, i, irqs[32];
2106	uint16_t ctrl;
2107
2108	/* Don't let count == 0 get us into trouble. */
2109	if (*count == 0)
2110		return (EINVAL);
2111
2112	/* If rid 0 is allocated, then fail. */
2113	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2114	if (rle != NULL && rle->res != NULL)
2115		return (ENXIO);
2116
2117	/* Already have allocated messages? */
2118	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2119		return (ENXIO);
2120
2121	/* If MSI is blacklisted for this system, fail. */
2122	if (pci_msi_blacklisted())
2123		return (ENXIO);
2124
2125	/* MSI capability present? */
2126	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2127		return (ENODEV);
2128
2129	if (bootverbose)
2130		device_printf(child,
2131		    "attempting to allocate %d MSI vectors (%d supported)\n",
2132		    *count, cfg->msi.msi_msgnum);
2133
2134	/* Don't ask for more than the device supports. */
2135	actual = min(*count, cfg->msi.msi_msgnum);
2136
2137	/* Don't ask for more than 32 messages. */
2138	actual = min(actual, 32);
2139
2140	/* MSI requires power of 2 number of messages. */
2141	if (!powerof2(actual))
2142		return (EINVAL);
2143
2144	for (;;) {
2145		/* Try to allocate N messages. */
2146		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2147		    actual, irqs);
2148		if (error == 0)
2149			break;
2150		if (actual == 1)
2151			return (error);
2152
2153		/* Try N / 2. */
2154		actual >>= 1;
2155	}
2156
2157	/*
2158	 * We now have N actual messages mapped onto SYS_RES_IRQ
2159	 * resources in the irqs[] array, so add new resources
2160	 * starting at rid 1.
2161	 */
2162	for (i = 0; i < actual; i++)
2163		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2164		    irqs[i], irqs[i], 1);
2165
2166	if (bootverbose) {
2167		if (actual == 1)
2168			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2169		else {
2170			int run;
2171
2172			/*
2173			 * Be fancy and try to print contiguous runs
2174			 * of IRQ values as ranges.  'run' is true if
2175			 * we are in a range.
2176			 */
2177			device_printf(child, "using IRQs %d", irqs[0]);
2178			run = 0;
2179			for (i = 1; i < actual; i++) {
2180
2181				/* Still in a run? */
2182				if (irqs[i] == irqs[i - 1] + 1) {
2183					run = 1;
2184					continue;
2185				}
2186
2187				/* Finish previous range. */
2188				if (run) {
2189					printf("-%d", irqs[i - 1]);
2190					run = 0;
2191				}
2192
2193				/* Start new range. */
2194				printf(",%d", irqs[i]);
2195			}
2196
2197			/* Unfinished range? */
2198			if (run)
2199				printf("-%d", irqs[actual - 1]);
2200			printf(" for MSI\n");
2201		}
2202	}
2203
2204	/* Update control register with actual count. */
2205	ctrl = cfg->msi.msi_ctrl;
2206	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2207	ctrl |= (ffs(actual) - 1) << 4;
2208	cfg->msi.msi_ctrl = ctrl;
2209	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2210
2211	/* Update counts of alloc'd messages. */
2212	cfg->msi.msi_alloc = actual;
2213	cfg->msi.msi_handlers = 0;
2214	*count = actual;
2215	return (0);
2216}
2217
2218/* Release the MSI messages associated with this device. */
2219int
2220pci_release_msi_method(device_t dev, device_t child)
2221{
2222	struct pci_devinfo *dinfo = device_get_ivars(child);
2223	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2224	struct resource_list_entry *rle;
2225	int error, i, irqs[32];
2226
2227	/* Try MSI-X first. */
2228	error = pci_release_msix(dev, child);
2229	if (error != ENODEV)
2230		return (error);
2231
2232	/* Do we have any messages to release? */
2233	if (msi->msi_alloc == 0)
2234		return (ENODEV);
2235	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2236
2237	/* Make sure none of the resources are allocated. */
2238	if (msi->msi_handlers > 0)
2239		return (EBUSY);
2240	for (i = 0; i < msi->msi_alloc; i++) {
2241		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2242		KASSERT(rle != NULL, ("missing MSI resource"));
2243		if (rle->res != NULL)
2244			return (EBUSY);
2245		irqs[i] = rle->start;
2246	}
2247
2248	/* Update control register with 0 count. */
2249	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2250	    ("%s: MSI still enabled", __func__));
2251	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2252	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2253	    msi->msi_ctrl, 2);
2254
2255	/* Release the messages. */
2256	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2257	for (i = 0; i < msi->msi_alloc; i++)
2258		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2259
2260	/* Update alloc count. */
2261	msi->msi_alloc = 0;
2262	msi->msi_addr = 0;
2263	msi->msi_data = 0;
2264	return (0);
2265}
2266
2267/*
2268 * Return the max supported MSI messages this device supports.
2269 * Basically, assuming the MD code can alloc messages, this function
2270 * should return the maximum value that pci_alloc_msi() can return.
2271 * Thus, it is subject to the tunables, etc.
2272 */
2273int
2274pci_msi_count_method(device_t dev, device_t child)
2275{
2276	struct pci_devinfo *dinfo = device_get_ivars(child);
2277	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2278
2279	if (pci_do_msi && msi->msi_location != 0)
2280		return (msi->msi_msgnum);
2281	return (0);
2282}
2283
2284/* free pcicfgregs structure and all depending data structures */
2285
2286int
2287pci_freecfg(struct pci_devinfo *dinfo)
2288{
2289	struct devlist *devlist_head;
2290	struct pci_map *pm, *next;
2291	int i;
2292
2293	devlist_head = &pci_devq;
2294
2295	if (dinfo->cfg.vpd.vpd_reg) {
2296		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2297		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2298			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2299		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2300		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2301			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2302		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2303	}
2304	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2305		free(pm, M_DEVBUF);
2306	}
2307	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2308	free(dinfo, M_DEVBUF);
2309
2310	/* increment the generation count */
2311	pci_generation++;
2312
2313	/* we're losing one device */
2314	pci_numdevs--;
2315	return (0);
2316}
2317
2318/*
2319 * PCI power manangement
2320 */
2321int
2322pci_set_powerstate_method(device_t dev, device_t child, int state)
2323{
2324	struct pci_devinfo *dinfo = device_get_ivars(child);
2325	pcicfgregs *cfg = &dinfo->cfg;
2326	uint16_t status;
2327	int result, oldstate, highest, delay;
2328
2329	if (cfg->pp.pp_cap == 0)
2330		return (EOPNOTSUPP);
2331
2332	/*
2333	 * Optimize a no state change request away.  While it would be OK to
2334	 * write to the hardware in theory, some devices have shown odd
2335	 * behavior when going from D3 -> D3.
2336	 */
2337	oldstate = pci_get_powerstate(child);
2338	if (oldstate == state)
2339		return (0);
2340
2341	/*
2342	 * The PCI power management specification states that after a state
2343	 * transition between PCI power states, system software must
2344	 * guarantee a minimal delay before the function accesses the device.
2345	 * Compute the worst case delay that we need to guarantee before we
2346	 * access the device.  Many devices will be responsive much more
2347	 * quickly than this delay, but there are some that don't respond
2348	 * instantly to state changes.  Transitions to/from D3 state require
2349	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2350	 * is done below with DELAY rather than a sleeper function because
2351	 * this function can be called from contexts where we cannot sleep.
2352	 */
2353	highest = (oldstate > state) ? oldstate : state;
2354	if (highest == PCI_POWERSTATE_D3)
2355	    delay = 10000;
2356	else if (highest == PCI_POWERSTATE_D2)
2357	    delay = 200;
2358	else
2359	    delay = 0;
2360	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2361	    & ~PCIM_PSTAT_DMASK;
2362	result = 0;
2363	switch (state) {
2364	case PCI_POWERSTATE_D0:
2365		status |= PCIM_PSTAT_D0;
2366		break;
2367	case PCI_POWERSTATE_D1:
2368		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2369			return (EOPNOTSUPP);
2370		status |= PCIM_PSTAT_D1;
2371		break;
2372	case PCI_POWERSTATE_D2:
2373		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2374			return (EOPNOTSUPP);
2375		status |= PCIM_PSTAT_D2;
2376		break;
2377	case PCI_POWERSTATE_D3:
2378		status |= PCIM_PSTAT_D3;
2379		break;
2380	default:
2381		return (EINVAL);
2382	}
2383
2384	if (bootverbose)
2385		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2386		    state);
2387
2388	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2389	if (delay)
2390		DELAY(delay);
2391	return (0);
2392}
2393
2394int
2395pci_get_powerstate_method(device_t dev, device_t child)
2396{
2397	struct pci_devinfo *dinfo = device_get_ivars(child);
2398	pcicfgregs *cfg = &dinfo->cfg;
2399	uint16_t status;
2400	int result;
2401
2402	if (cfg->pp.pp_cap != 0) {
2403		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2404		switch (status & PCIM_PSTAT_DMASK) {
2405		case PCIM_PSTAT_D0:
2406			result = PCI_POWERSTATE_D0;
2407			break;
2408		case PCIM_PSTAT_D1:
2409			result = PCI_POWERSTATE_D1;
2410			break;
2411		case PCIM_PSTAT_D2:
2412			result = PCI_POWERSTATE_D2;
2413			break;
2414		case PCIM_PSTAT_D3:
2415			result = PCI_POWERSTATE_D3;
2416			break;
2417		default:
2418			result = PCI_POWERSTATE_UNKNOWN;
2419			break;
2420		}
2421	} else {
2422		/* No support, device is always at D0 */
2423		result = PCI_POWERSTATE_D0;
2424	}
2425	return (result);
2426}
2427
2428/*
2429 * Some convenience functions for PCI device drivers.
2430 */
2431
2432static __inline void
2433pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2434{
2435	uint16_t	command;
2436
2437	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2438	command |= bit;
2439	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2440}
2441
2442static __inline void
2443pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2444{
2445	uint16_t	command;
2446
2447	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2448	command &= ~bit;
2449	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2450}
2451
2452int
2453pci_enable_busmaster_method(device_t dev, device_t child)
2454{
2455	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2456	return (0);
2457}
2458
2459int
2460pci_disable_busmaster_method(device_t dev, device_t child)
2461{
2462	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2463	return (0);
2464}
2465
2466int
2467pci_enable_io_method(device_t dev, device_t child, int space)
2468{
2469	uint16_t bit;
2470
2471	switch(space) {
2472	case SYS_RES_IOPORT:
2473		bit = PCIM_CMD_PORTEN;
2474		break;
2475	case SYS_RES_MEMORY:
2476		bit = PCIM_CMD_MEMEN;
2477		break;
2478	default:
2479		return (EINVAL);
2480	}
2481	pci_set_command_bit(dev, child, bit);
2482	return (0);
2483}
2484
2485int
2486pci_disable_io_method(device_t dev, device_t child, int space)
2487{
2488	uint16_t bit;
2489
2490	switch(space) {
2491	case SYS_RES_IOPORT:
2492		bit = PCIM_CMD_PORTEN;
2493		break;
2494	case SYS_RES_MEMORY:
2495		bit = PCIM_CMD_MEMEN;
2496		break;
2497	default:
2498		return (EINVAL);
2499	}
2500	pci_clear_command_bit(dev, child, bit);
2501	return (0);
2502}
2503
2504/*
2505 * New style pci driver.  Parent device is either a pci-host-bridge or a
2506 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2507 */
2508
2509void
2510pci_print_verbose(struct pci_devinfo *dinfo)
2511{
2512
2513	if (bootverbose) {
2514		pcicfgregs *cfg = &dinfo->cfg;
2515
2516		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2517		    cfg->vendor, cfg->device, cfg->revid);
2518		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2519		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2520		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2521		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2522		    cfg->mfdev);
2523		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2524		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2525		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2526		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2527		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2528		if (cfg->intpin > 0)
2529			printf("\tintpin=%c, irq=%d\n",
2530			    cfg->intpin +'a' -1, cfg->intline);
2531		if (cfg->pp.pp_cap) {
2532			uint16_t status;
2533
2534			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2535			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2536			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2537			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2538			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2539			    status & PCIM_PSTAT_DMASK);
2540		}
2541		if (cfg->msi.msi_location) {
2542			int ctrl;
2543
2544			ctrl = cfg->msi.msi_ctrl;
2545			printf("\tMSI supports %d message%s%s%s\n",
2546			    cfg->msi.msi_msgnum,
2547			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2548			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2549			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2550		}
2551		if (cfg->msix.msix_location) {
2552			printf("\tMSI-X supports %d message%s ",
2553			    cfg->msix.msix_msgnum,
2554			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2555			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2556				printf("in map 0x%x\n",
2557				    cfg->msix.msix_table_bar);
2558			else
2559				printf("in maps 0x%x and 0x%x\n",
2560				    cfg->msix.msix_table_bar,
2561				    cfg->msix.msix_pba_bar);
2562		}
2563	}
2564}
2565
2566static int
2567pci_porten(device_t dev)
2568{
2569	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2570}
2571
2572static int
2573pci_memen(device_t dev)
2574{
2575	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2576}
2577
2578static void
2579pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2580{
2581	struct pci_devinfo *dinfo;
2582	pci_addr_t map, testval;
2583	int ln2range;
2584	uint16_t cmd;
2585
2586	/*
2587	 * The device ROM BAR is special.  It is always a 32-bit
2588	 * memory BAR.  Bit 0 is special and should not be set when
2589	 * sizing the BAR.
2590	 */
2591	dinfo = device_get_ivars(dev);
2592	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2593		map = pci_read_config(dev, reg, 4);
2594		pci_write_config(dev, reg, 0xfffffffe, 4);
2595		testval = pci_read_config(dev, reg, 4);
2596		pci_write_config(dev, reg, map, 4);
2597		*mapp = map;
2598		*testvalp = testval;
2599		return;
2600	}
2601
2602	map = pci_read_config(dev, reg, 4);
2603	ln2range = pci_maprange(map);
2604	if (ln2range == 64)
2605		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2606
2607	/*
2608	 * Disable decoding via the command register before
2609	 * determining the BAR's length since we will be placing it in
2610	 * a weird state.
2611	 */
2612	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2613	pci_write_config(dev, PCIR_COMMAND,
2614	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2615
2616	/*
2617	 * Determine the BAR's length by writing all 1's.  The bottom
2618	 * log_2(size) bits of the BAR will stick as 0 when we read
2619	 * the value back.
2620	 */
2621	pci_write_config(dev, reg, 0xffffffff, 4);
2622	testval = pci_read_config(dev, reg, 4);
2623	if (ln2range == 64) {
2624		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2625		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2626	}
2627
2628	/*
2629	 * Restore the original value of the BAR.  We may have reprogrammed
2630	 * the BAR of the low-level console device and when booting verbose,
2631	 * we need the console device addressable.
2632	 */
2633	pci_write_config(dev, reg, map, 4);
2634	if (ln2range == 64)
2635		pci_write_config(dev, reg + 4, map >> 32, 4);
2636	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2637
2638	*mapp = map;
2639	*testvalp = testval;
2640}
2641
2642static void
2643pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2644{
2645	struct pci_devinfo *dinfo;
2646	int ln2range;
2647
2648	/* The device ROM BAR is always a 32-bit memory BAR. */
2649	dinfo = device_get_ivars(dev);
2650	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2651		ln2range = 32;
2652	else
2653		ln2range = pci_maprange(pm->pm_value);
2654	pci_write_config(dev, pm->pm_reg, base, 4);
2655	if (ln2range == 64)
2656		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2657	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2658	if (ln2range == 64)
2659		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2660		    pm->pm_reg + 4, 4) << 32;
2661}
2662
2663struct pci_map *
2664pci_find_bar(device_t dev, int reg)
2665{
2666	struct pci_devinfo *dinfo;
2667	struct pci_map *pm;
2668
2669	dinfo = device_get_ivars(dev);
2670	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2671		if (pm->pm_reg == reg)
2672			return (pm);
2673	}
2674	return (NULL);
2675}
2676
2677int
2678pci_bar_enabled(device_t dev, struct pci_map *pm)
2679{
2680	struct pci_devinfo *dinfo;
2681	uint16_t cmd;
2682
2683	dinfo = device_get_ivars(dev);
2684	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2685	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2686		return (0);
2687	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2688	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2689		return ((cmd & PCIM_CMD_MEMEN) != 0);
2690	else
2691		return ((cmd & PCIM_CMD_PORTEN) != 0);
2692}
2693
2694static struct pci_map *
2695pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2696{
2697	struct pci_devinfo *dinfo;
2698	struct pci_map *pm, *prev;
2699
2700	dinfo = device_get_ivars(dev);
2701	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2702	pm->pm_reg = reg;
2703	pm->pm_value = value;
2704	pm->pm_size = size;
2705	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2706		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2707		    reg));
2708		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2709		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2710			break;
2711	}
2712	if (prev != NULL)
2713		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2714	else
2715		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2716	return (pm);
2717}
2718
2719static void
2720pci_restore_bars(device_t dev)
2721{
2722	struct pci_devinfo *dinfo;
2723	struct pci_map *pm;
2724	int ln2range;
2725
2726	dinfo = device_get_ivars(dev);
2727	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2728		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2729			ln2range = 32;
2730		else
2731			ln2range = pci_maprange(pm->pm_value);
2732		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2733		if (ln2range == 64)
2734			pci_write_config(dev, pm->pm_reg + 4,
2735			    pm->pm_value >> 32, 4);
2736	}
2737}
2738
2739/*
2740 * Add a resource based on a pci map register. Return 1 if the map
2741 * register is a 32bit map register or 2 if it is a 64bit register.
2742 */
2743static int
2744pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2745    int force, int prefetch)
2746{
2747	struct pci_map *pm;
2748	pci_addr_t base, map, testval;
2749	pci_addr_t start, end, count;
2750	int barlen, basezero, flags, maprange, mapsize, type;
2751	uint16_t cmd;
2752	struct resource *res;
2753
2754	/*
2755	 * The BAR may already exist if the device is a CardBus card
2756	 * whose CIS is stored in this BAR.
2757	 */
2758	pm = pci_find_bar(dev, reg);
2759	if (pm != NULL) {
2760		maprange = pci_maprange(pm->pm_value);
2761		barlen = maprange == 64 ? 2 : 1;
2762		return (barlen);
2763	}
2764
2765	pci_read_bar(dev, reg, &map, &testval);
2766	if (PCI_BAR_MEM(map)) {
2767		type = SYS_RES_MEMORY;
2768		if (map & PCIM_BAR_MEM_PREFETCH)
2769			prefetch = 1;
2770	} else
2771		type = SYS_RES_IOPORT;
2772	mapsize = pci_mapsize(testval);
2773	base = pci_mapbase(map);
2774#ifdef __PCI_BAR_ZERO_VALID
2775	basezero = 0;
2776#else
2777	basezero = base == 0;
2778#endif
2779	maprange = pci_maprange(map);
2780	barlen = maprange == 64 ? 2 : 1;
2781
2782	/*
2783	 * For I/O registers, if bottom bit is set, and the next bit up
2784	 * isn't clear, we know we have a BAR that doesn't conform to the
2785	 * spec, so ignore it.  Also, sanity check the size of the data
2786	 * areas to the type of memory involved.  Memory must be at least
2787	 * 16 bytes in size, while I/O ranges must be at least 4.
2788	 */
2789	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2790		return (barlen);
2791	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2792	    (type == SYS_RES_IOPORT && mapsize < 2))
2793		return (barlen);
2794
2795	/* Save a record of this BAR. */
2796	pm = pci_add_bar(dev, reg, map, mapsize);
2797	if (bootverbose) {
2798		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2799		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2800		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2801			printf(", port disabled\n");
2802		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2803			printf(", memory disabled\n");
2804		else
2805			printf(", enabled\n");
2806	}
2807
2808	/*
2809	 * If base is 0, then we have problems if this architecture does
2810	 * not allow that.  It is best to ignore such entries for the
2811	 * moment.  These will be allocated later if the driver specifically
2812	 * requests them.  However, some removable busses look better when
2813	 * all resources are allocated, so allow '0' to be overriden.
2814	 *
2815	 * Similarly treat maps whose values is the same as the test value
2816	 * read back.  These maps have had all f's written to them by the
2817	 * BIOS in an attempt to disable the resources.
2818	 */
2819	if (!force && (basezero || map == testval))
2820		return (barlen);
2821	if ((u_long)base != base) {
2822		device_printf(bus,
2823		    "pci%d:%d:%d:%d bar %#x too many address bits",
2824		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2825		    pci_get_function(dev), reg);
2826		return (barlen);
2827	}
2828
2829	/*
2830	 * This code theoretically does the right thing, but has
2831	 * undesirable side effects in some cases where peripherals
2832	 * respond oddly to having these bits enabled.  Let the user
2833	 * be able to turn them off (since pci_enable_io_modes is 1 by
2834	 * default).
2835	 */
2836	if (pci_enable_io_modes) {
2837		/* Turn on resources that have been left off by a lazy BIOS */
2838		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2839			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2840			cmd |= PCIM_CMD_PORTEN;
2841			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2842		}
2843		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2844			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2845			cmd |= PCIM_CMD_MEMEN;
2846			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2847		}
2848	} else {
2849		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2850			return (barlen);
2851		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2852			return (barlen);
2853	}
2854
2855	count = (pci_addr_t)1 << mapsize;
2856	flags = RF_ALIGNMENT_LOG2(mapsize);
2857	if (prefetch)
2858		flags |= RF_PREFETCHABLE;
2859	if (basezero || base == pci_mapbase(testval) || pci_clear_bars) {
2860		start = 0;	/* Let the parent decide. */
2861		end = ~0ul;
2862	} else {
2863		start = base;
2864		end = base + count - 1;
2865	}
2866	resource_list_add(rl, type, reg, start, end, count);
2867
2868	/*
2869	 * Try to allocate the resource for this BAR from our parent
2870	 * so that this resource range is already reserved.  The
2871	 * driver for this device will later inherit this resource in
2872	 * pci_alloc_resource().
2873	 */
2874	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2875	    flags);
2876	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2877		/*
2878		 * If the allocation fails, try to allocate a resource for
2879		 * this BAR using any available range.  The firmware felt
2880		 * it was important enough to assign a resource, so don't
2881		 * disable decoding if we can help it.
2882		 */
2883		resource_list_delete(rl, type, reg);
2884		resource_list_add(rl, type, reg, 0, ~0ul, count);
2885		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2886		    count, flags);
2887	}
2888	if (res == NULL) {
2889		/*
2890		 * If the allocation fails, delete the resource list entry
2891		 * and disable decoding for this device.
2892		 *
2893		 * If the driver requests this resource in the future,
2894		 * pci_reserve_map() will try to allocate a fresh
2895		 * resource range.
2896		 */
2897		resource_list_delete(rl, type, reg);
2898		pci_disable_io(dev, type);
2899		if (bootverbose)
2900			device_printf(bus,
2901			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2902			    pci_get_domain(dev), pci_get_bus(dev),
2903			    pci_get_slot(dev), pci_get_function(dev), reg);
2904	} else {
2905		start = rman_get_start(res);
2906		pci_write_bar(dev, pm, start);
2907	}
2908	return (barlen);
2909}
2910
2911/*
2912 * For ATA devices we need to decide early what addressing mode to use.
2913 * Legacy demands that the primary and secondary ATA ports sits on the
2914 * same addresses that old ISA hardware did. This dictates that we use
2915 * those addresses and ignore the BAR's if we cannot set PCI native
2916 * addressing mode.
2917 */
2918static void
2919pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2920    uint32_t prefetchmask)
2921{
2922	struct resource *r;
2923	int rid, type, progif;
2924#if 0
2925	/* if this device supports PCI native addressing use it */
2926	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2927	if ((progif & 0x8a) == 0x8a) {
2928		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2929		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2930			printf("Trying ATA native PCI addressing mode\n");
2931			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2932		}
2933	}
2934#endif
2935	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2936	type = SYS_RES_IOPORT;
2937	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2938		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2939		    prefetchmask & (1 << 0));
2940		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2941		    prefetchmask & (1 << 1));
2942	} else {
2943		rid = PCIR_BAR(0);
2944		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2945		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2946		    0x1f7, 8, 0);
2947		rid = PCIR_BAR(1);
2948		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2949		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2950		    0x3f6, 1, 0);
2951	}
2952	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2953		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2954		    prefetchmask & (1 << 2));
2955		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2956		    prefetchmask & (1 << 3));
2957	} else {
2958		rid = PCIR_BAR(2);
2959		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2960		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2961		    0x177, 8, 0);
2962		rid = PCIR_BAR(3);
2963		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2964		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2965		    0x376, 1, 0);
2966	}
2967	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2968	    prefetchmask & (1 << 4));
2969	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2970	    prefetchmask & (1 << 5));
2971}
2972
2973static void
2974pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2975{
2976	struct pci_devinfo *dinfo = device_get_ivars(dev);
2977	pcicfgregs *cfg = &dinfo->cfg;
2978	char tunable_name[64];
2979	int irq;
2980
2981	/* Has to have an intpin to have an interrupt. */
2982	if (cfg->intpin == 0)
2983		return;
2984
2985	/* Let the user override the IRQ with a tunable. */
2986	irq = PCI_INVALID_IRQ;
2987	snprintf(tunable_name, sizeof(tunable_name),
2988	    "hw.pci%d.%d.%d.INT%c.irq",
2989	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2990	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2991		irq = PCI_INVALID_IRQ;
2992
2993	/*
2994	 * If we didn't get an IRQ via the tunable, then we either use the
2995	 * IRQ value in the intline register or we ask the bus to route an
2996	 * interrupt for us.  If force_route is true, then we only use the
2997	 * value in the intline register if the bus was unable to assign an
2998	 * IRQ.
2999	 */
3000	if (!PCI_INTERRUPT_VALID(irq)) {
3001		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
3002			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
3003		if (!PCI_INTERRUPT_VALID(irq))
3004			irq = cfg->intline;
3005	}
3006
3007	/* If after all that we don't have an IRQ, just bail. */
3008	if (!PCI_INTERRUPT_VALID(irq))
3009		return;
3010
3011	/* Update the config register if it changed. */
3012	if (irq != cfg->intline) {
3013		cfg->intline = irq;
3014		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3015	}
3016
3017	/* Add this IRQ as rid 0 interrupt resource. */
3018	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3019}
3020
3021/* Perform early OHCI takeover from SMM. */
3022static void
3023ohci_early_takeover(device_t self)
3024{
3025	struct resource *res;
3026	uint32_t ctl;
3027	int rid;
3028	int i;
3029
3030	rid = PCIR_BAR(0);
3031	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3032	if (res == NULL)
3033		return;
3034
3035	ctl = bus_read_4(res, OHCI_CONTROL);
3036	if (ctl & OHCI_IR) {
3037		if (bootverbose)
3038			printf("ohci early: "
3039			    "SMM active, request owner change\n");
3040		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3041		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3042			DELAY(1000);
3043			ctl = bus_read_4(res, OHCI_CONTROL);
3044		}
3045		if (ctl & OHCI_IR) {
3046			if (bootverbose)
3047				printf("ohci early: "
3048				    "SMM does not respond, resetting\n");
3049			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3050		}
3051		/* Disable interrupts */
3052		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3053	}
3054
3055	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3056}
3057
3058/* Perform early UHCI takeover from SMM. */
3059static void
3060uhci_early_takeover(device_t self)
3061{
3062	struct resource *res;
3063	int rid;
3064
3065	/*
3066	 * Set the PIRQD enable bit and switch off all the others. We don't
3067	 * want legacy support to interfere with us XXX Does this also mean
3068	 * that the BIOS won't touch the keyboard anymore if it is connected
3069	 * to the ports of the root hub?
3070	 */
3071	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3072
3073	/* Disable interrupts */
3074	rid = PCI_UHCI_BASE_REG;
3075	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3076	if (res != NULL) {
3077		bus_write_2(res, UHCI_INTR, 0);
3078		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3079	}
3080}
3081
3082/* Perform early EHCI takeover from SMM. */
3083static void
3084ehci_early_takeover(device_t self)
3085{
3086	struct resource *res;
3087	uint32_t cparams;
3088	uint32_t eec;
3089	uint8_t eecp;
3090	uint8_t bios_sem;
3091	uint8_t offs;
3092	int rid;
3093	int i;
3094
3095	rid = PCIR_BAR(0);
3096	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3097	if (res == NULL)
3098		return;
3099
3100	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3101
3102	/* Synchronise with the BIOS if it owns the controller. */
3103	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3104	    eecp = EHCI_EECP_NEXT(eec)) {
3105		eec = pci_read_config(self, eecp, 4);
3106		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3107			continue;
3108		}
3109		bios_sem = pci_read_config(self, eecp +
3110		    EHCI_LEGSUP_BIOS_SEM, 1);
3111		if (bios_sem == 0) {
3112			continue;
3113		}
3114		if (bootverbose)
3115			printf("ehci early: "
3116			    "SMM active, request owner change\n");
3117
3118		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3119
3120		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3121			DELAY(1000);
3122			bios_sem = pci_read_config(self, eecp +
3123			    EHCI_LEGSUP_BIOS_SEM, 1);
3124		}
3125
3126		if (bios_sem != 0) {
3127			if (bootverbose)
3128				printf("ehci early: "
3129				    "SMM does not respond\n");
3130		}
3131		/* Disable interrupts */
3132		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3133		bus_write_4(res, offs + EHCI_USBINTR, 0);
3134	}
3135	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3136}
3137
3138/* Perform early XHCI takeover from SMM. */
3139static void
3140xhci_early_takeover(device_t self)
3141{
3142	struct resource *res;
3143	uint32_t cparams;
3144	uint32_t eec;
3145	uint8_t eecp;
3146	uint8_t bios_sem;
3147	uint8_t offs;
3148	int rid;
3149	int i;
3150
3151	rid = PCIR_BAR(0);
3152	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3153	if (res == NULL)
3154		return;
3155
3156	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3157
3158	eec = -1;
3159
3160	/* Synchronise with the BIOS if it owns the controller. */
3161	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3162	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3163		eec = bus_read_4(res, eecp);
3164
3165		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3166			continue;
3167
3168		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3169		if (bios_sem == 0)
3170			continue;
3171
3172		if (bootverbose)
3173			printf("xhci early: "
3174			    "SMM active, request owner change\n");
3175
3176		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3177
3178		/* wait a maximum of 5 second */
3179
3180		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3181			DELAY(1000);
3182			bios_sem = bus_read_1(res, eecp +
3183			    XHCI_XECP_BIOS_SEM);
3184		}
3185
3186		if (bios_sem != 0) {
3187			if (bootverbose)
3188				printf("xhci early: "
3189				    "SMM does not respond\n");
3190		}
3191
3192		/* Disable interrupts */
3193		offs = bus_read_1(res, XHCI_CAPLENGTH);
3194		bus_write_4(res, offs + XHCI_USBCMD, 0);
3195		bus_read_4(res, offs + XHCI_USBSTS);
3196	}
3197	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3198}
3199
3200void
3201pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3202{
3203	struct pci_devinfo *dinfo;
3204	pcicfgregs *cfg;
3205	struct resource_list *rl;
3206	const struct pci_quirk *q;
3207	uint32_t devid;
3208	int i;
3209
3210	dinfo = device_get_ivars(dev);
3211	cfg = &dinfo->cfg;
3212	rl = &dinfo->resources;
3213	devid = (cfg->device << 16) | cfg->vendor;
3214
3215	/* ATA devices needs special map treatment */
3216	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3217	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3218	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3219	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3220	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3221		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3222	else
3223		for (i = 0; i < cfg->nummaps;) {
3224			/*
3225			 * Skip quirked resources.
3226			 */
3227			for (q = &pci_quirks[0]; q->devid != 0; q++)
3228				if (q->devid == devid &&
3229				    q->type == PCI_QUIRK_UNMAP_REG &&
3230				    q->arg1 == PCIR_BAR(i))
3231					break;
3232			if (q->devid != 0) {
3233				i++;
3234				continue;
3235			}
3236			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3237			    prefetchmask & (1 << i));
3238		}
3239
3240	/*
3241	 * Add additional, quirked resources.
3242	 */
3243	for (q = &pci_quirks[0]; q->devid != 0; q++)
3244		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3245			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3246
3247	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3248#ifdef __PCI_REROUTE_INTERRUPT
3249		/*
3250		 * Try to re-route interrupts. Sometimes the BIOS or
3251		 * firmware may leave bogus values in these registers.
3252		 * If the re-route fails, then just stick with what we
3253		 * have.
3254		 */
3255		pci_assign_interrupt(bus, dev, 1);
3256#else
3257		pci_assign_interrupt(bus, dev, 0);
3258#endif
3259	}
3260
3261	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3262	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3263		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3264			xhci_early_takeover(dev);
3265		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3266			ehci_early_takeover(dev);
3267		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3268			ohci_early_takeover(dev);
3269		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3270			uhci_early_takeover(dev);
3271	}
3272}
3273
3274void
3275pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3276{
3277#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3278	device_t pcib = device_get_parent(dev);
3279	struct pci_devinfo *dinfo;
3280	int maxslots;
3281	int s, f, pcifunchigh;
3282	uint8_t hdrtype;
3283
3284	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3285	    ("dinfo_size too small"));
3286	maxslots = PCIB_MAXSLOTS(pcib);
3287	for (s = 0; s <= maxslots; s++) {
3288		pcifunchigh = 0;
3289		f = 0;
3290		DELAY(1);
3291		hdrtype = REG(PCIR_HDRTYPE, 1);
3292		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3293			continue;
3294		if (hdrtype & PCIM_MFDEV)
3295			pcifunchigh = PCI_FUNCMAX;
3296		for (f = 0; f <= pcifunchigh; f++) {
3297			dinfo = pci_read_device(pcib, domain, busno, s, f,
3298			    dinfo_size);
3299			if (dinfo != NULL) {
3300				pci_add_child(dev, dinfo);
3301			}
3302		}
3303	}
3304#undef REG
3305}
3306
3307void
3308pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3309{
3310	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3311	device_set_ivars(dinfo->cfg.dev, dinfo);
3312	resource_list_init(&dinfo->resources);
3313	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3314	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3315	pci_print_verbose(dinfo);
3316	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3317}
3318
3319static int
3320pci_probe(device_t dev)
3321{
3322
3323	device_set_desc(dev, "PCI bus");
3324
3325	/* Allow other subclasses to override this driver. */
3326	return (BUS_PROBE_GENERIC);
3327}
3328
3329int
3330pci_attach_common(device_t dev)
3331{
3332	struct pci_softc *sc;
3333	int busno, domain;
3334#ifdef PCI_DMA_BOUNDARY
3335	int error, tag_valid;
3336#endif
3337
3338	sc = device_get_softc(dev);
3339	domain = pcib_get_domain(dev);
3340	busno = pcib_get_bus(dev);
3341	if (bootverbose)
3342		device_printf(dev, "domain=%d, physical bus=%d\n",
3343		    domain, busno);
3344#ifdef PCI_DMA_BOUNDARY
3345	tag_valid = 0;
3346	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3347	    devclass_find("pci")) {
3348		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3349		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3350		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3351		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3352		if (error)
3353			device_printf(dev, "Failed to create DMA tag: %d\n",
3354			    error);
3355		else
3356			tag_valid = 1;
3357	}
3358	if (!tag_valid)
3359#endif
3360		sc->sc_dma_tag = bus_get_dma_tag(dev);
3361	return (0);
3362}
3363
3364static int
3365pci_attach(device_t dev)
3366{
3367	int busno, domain, error;
3368
3369	error = pci_attach_common(dev);
3370	if (error)
3371		return (error);
3372
3373	/*
3374	 * Since there can be multiple independantly numbered PCI
3375	 * busses on systems with multiple PCI domains, we can't use
3376	 * the unit number to decide which bus we are probing. We ask
3377	 * the parent pcib what our domain and bus numbers are.
3378	 */
3379	domain = pcib_get_domain(dev);
3380	busno = pcib_get_bus(dev);
3381	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3382	return (bus_generic_attach(dev));
3383}
3384
3385static void
3386pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3387    int state)
3388{
3389	device_t child, pcib;
3390	struct pci_devinfo *dinfo;
3391	int dstate, i;
3392
3393	/*
3394	 * Set the device to the given state.  If the firmware suggests
3395	 * a different power state, use it instead.  If power management
3396	 * is not present, the firmware is responsible for managing
3397	 * device power.  Skip children who aren't attached since they
3398	 * are handled separately.
3399	 */
3400	pcib = device_get_parent(dev);
3401	for (i = 0; i < numdevs; i++) {
3402		child = devlist[i];
3403		dinfo = device_get_ivars(child);
3404		dstate = state;
3405		if (device_is_attached(child) &&
3406		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3407			pci_set_powerstate(child, dstate);
3408	}
3409}
3410
3411int
3412pci_suspend(device_t dev)
3413{
3414	device_t child, *devlist;
3415	struct pci_devinfo *dinfo;
3416	int error, i, numdevs;
3417
3418	/*
3419	 * Save the PCI configuration space for each child and set the
3420	 * device in the appropriate power state for this sleep state.
3421	 */
3422	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3423		return (error);
3424	for (i = 0; i < numdevs; i++) {
3425		child = devlist[i];
3426		dinfo = device_get_ivars(child);
3427		pci_cfg_save(child, dinfo, 0);
3428	}
3429
3430	/* Suspend devices before potentially powering them down. */
3431	error = bus_generic_suspend(dev);
3432	if (error) {
3433		free(devlist, M_TEMP);
3434		return (error);
3435	}
3436	if (pci_do_power_suspend)
3437		pci_set_power_children(dev, devlist, numdevs,
3438		    PCI_POWERSTATE_D3);
3439	free(devlist, M_TEMP);
3440	return (0);
3441}
3442
3443int
3444pci_resume(device_t dev)
3445{
3446	device_t child, *devlist;
3447	struct pci_devinfo *dinfo;
3448	int error, i, numdevs;
3449
3450	/*
3451	 * Set each child to D0 and restore its PCI configuration space.
3452	 */
3453	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3454		return (error);
3455	if (pci_do_power_resume)
3456		pci_set_power_children(dev, devlist, numdevs,
3457		    PCI_POWERSTATE_D0);
3458
3459	/* Now the device is powered up, restore its config space. */
3460	for (i = 0; i < numdevs; i++) {
3461		child = devlist[i];
3462		dinfo = device_get_ivars(child);
3463
3464		pci_cfg_restore(child, dinfo);
3465		if (!device_is_attached(child))
3466			pci_cfg_save(child, dinfo, 1);
3467	}
3468
3469	/*
3470	 * Resume critical devices first, then everything else later.
3471	 */
3472	for (i = 0; i < numdevs; i++) {
3473		child = devlist[i];
3474		switch (pci_get_class(child)) {
3475		case PCIC_DISPLAY:
3476		case PCIC_MEMORY:
3477		case PCIC_BRIDGE:
3478		case PCIC_BASEPERIPH:
3479			DEVICE_RESUME(child);
3480			break;
3481		}
3482	}
3483	for (i = 0; i < numdevs; i++) {
3484		child = devlist[i];
3485		switch (pci_get_class(child)) {
3486		case PCIC_DISPLAY:
3487		case PCIC_MEMORY:
3488		case PCIC_BRIDGE:
3489		case PCIC_BASEPERIPH:
3490			break;
3491		default:
3492			DEVICE_RESUME(child);
3493		}
3494	}
3495	free(devlist, M_TEMP);
3496	return (0);
3497}
3498
3499static void
3500pci_load_vendor_data(void)
3501{
3502	caddr_t data;
3503	void *ptr;
3504	size_t sz;
3505
3506	data = preload_search_by_type("pci_vendor_data");
3507	if (data != NULL) {
3508		ptr = preload_fetch_addr(data);
3509		sz = preload_fetch_size(data);
3510		if (ptr != NULL && sz != 0) {
3511			pci_vendordata = ptr;
3512			pci_vendordata_size = sz;
3513			/* terminate the database */
3514			pci_vendordata[pci_vendordata_size] = '\n';
3515		}
3516	}
3517}
3518
3519void
3520pci_driver_added(device_t dev, driver_t *driver)
3521{
3522	int numdevs;
3523	device_t *devlist;
3524	device_t child;
3525	struct pci_devinfo *dinfo;
3526	int i;
3527
3528	if (bootverbose)
3529		device_printf(dev, "driver added\n");
3530	DEVICE_IDENTIFY(driver, dev);
3531	if (device_get_children(dev, &devlist, &numdevs) != 0)
3532		return;
3533	for (i = 0; i < numdevs; i++) {
3534		child = devlist[i];
3535		if (device_get_state(child) != DS_NOTPRESENT)
3536			continue;
3537		dinfo = device_get_ivars(child);
3538		pci_print_verbose(dinfo);
3539		if (bootverbose)
3540			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3541		pci_cfg_restore(child, dinfo);
3542		if (device_probe_and_attach(child) != 0)
3543			pci_child_detached(dev, child);
3544	}
3545	free(devlist, M_TEMP);
3546}
3547
3548int
3549pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3550    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3551{
3552	struct pci_devinfo *dinfo;
3553	struct msix_table_entry *mte;
3554	struct msix_vector *mv;
3555	uint64_t addr;
3556	uint32_t data;
3557	void *cookie;
3558	int error, rid;
3559
3560	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3561	    arg, &cookie);
3562	if (error)
3563		return (error);
3564
3565	/* If this is not a direct child, just bail out. */
3566	if (device_get_parent(child) != dev) {
3567		*cookiep = cookie;
3568		return(0);
3569	}
3570
3571	rid = rman_get_rid(irq);
3572	if (rid == 0) {
3573		/* Make sure that INTx is enabled */
3574		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3575	} else {
3576		/*
3577		 * Check to see if the interrupt is MSI or MSI-X.
3578		 * Ask our parent to map the MSI and give
3579		 * us the address and data register values.
3580		 * If we fail for some reason, teardown the
3581		 * interrupt handler.
3582		 */
3583		dinfo = device_get_ivars(child);
3584		if (dinfo->cfg.msi.msi_alloc > 0) {
3585			if (dinfo->cfg.msi.msi_addr == 0) {
3586				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3587			    ("MSI has handlers, but vectors not mapped"));
3588				error = PCIB_MAP_MSI(device_get_parent(dev),
3589				    child, rman_get_start(irq), &addr, &data);
3590				if (error)
3591					goto bad;
3592				dinfo->cfg.msi.msi_addr = addr;
3593				dinfo->cfg.msi.msi_data = data;
3594			}
3595			if (dinfo->cfg.msi.msi_handlers == 0)
3596				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3597				    dinfo->cfg.msi.msi_data);
3598			dinfo->cfg.msi.msi_handlers++;
3599		} else {
3600			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3601			    ("No MSI or MSI-X interrupts allocated"));
3602			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3603			    ("MSI-X index too high"));
3604			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3605			KASSERT(mte->mte_vector != 0, ("no message vector"));
3606			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3607			KASSERT(mv->mv_irq == rman_get_start(irq),
3608			    ("IRQ mismatch"));
3609			if (mv->mv_address == 0) {
3610				KASSERT(mte->mte_handlers == 0,
3611		    ("MSI-X table entry has handlers, but vector not mapped"));
3612				error = PCIB_MAP_MSI(device_get_parent(dev),
3613				    child, rman_get_start(irq), &addr, &data);
3614				if (error)
3615					goto bad;
3616				mv->mv_address = addr;
3617				mv->mv_data = data;
3618			}
3619			if (mte->mte_handlers == 0) {
3620				pci_enable_msix(child, rid - 1, mv->mv_address,
3621				    mv->mv_data);
3622				pci_unmask_msix(child, rid - 1);
3623			}
3624			mte->mte_handlers++;
3625		}
3626
3627		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3628		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3629	bad:
3630		if (error) {
3631			(void)bus_generic_teardown_intr(dev, child, irq,
3632			    cookie);
3633			return (error);
3634		}
3635	}
3636	*cookiep = cookie;
3637	return (0);
3638}
3639
3640int
3641pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3642    void *cookie)
3643{
3644	struct msix_table_entry *mte;
3645	struct resource_list_entry *rle;
3646	struct pci_devinfo *dinfo;
3647	int error, rid;
3648
3649	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3650		return (EINVAL);
3651
3652	/* If this isn't a direct child, just bail out */
3653	if (device_get_parent(child) != dev)
3654		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3655
3656	rid = rman_get_rid(irq);
3657	if (rid == 0) {
3658		/* Mask INTx */
3659		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3660	} else {
3661		/*
3662		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3663		 * decrement the appropriate handlers count and mask the
3664		 * MSI-X message, or disable MSI messages if the count
3665		 * drops to 0.
3666		 */
3667		dinfo = device_get_ivars(child);
3668		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3669		if (rle->res != irq)
3670			return (EINVAL);
3671		if (dinfo->cfg.msi.msi_alloc > 0) {
3672			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3673			    ("MSI-X index too high"));
3674			if (dinfo->cfg.msi.msi_handlers == 0)
3675				return (EINVAL);
3676			dinfo->cfg.msi.msi_handlers--;
3677			if (dinfo->cfg.msi.msi_handlers == 0)
3678				pci_disable_msi(child);
3679		} else {
3680			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3681			    ("No MSI or MSI-X interrupts allocated"));
3682			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3683			    ("MSI-X index too high"));
3684			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3685			if (mte->mte_handlers == 0)
3686				return (EINVAL);
3687			mte->mte_handlers--;
3688			if (mte->mte_handlers == 0)
3689				pci_mask_msix(child, rid - 1);
3690		}
3691	}
3692	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3693	if (rid > 0)
3694		KASSERT(error == 0,
3695		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3696	return (error);
3697}
3698
3699int
3700pci_print_child(device_t dev, device_t child)
3701{
3702	struct pci_devinfo *dinfo;
3703	struct resource_list *rl;
3704	int retval = 0;
3705
3706	dinfo = device_get_ivars(child);
3707	rl = &dinfo->resources;
3708
3709	retval += bus_print_child_header(dev, child);
3710
3711	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3712	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3713	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3714	if (device_get_flags(dev))
3715		retval += printf(" flags %#x", device_get_flags(dev));
3716
3717	retval += printf(" at device %d.%d", pci_get_slot(child),
3718	    pci_get_function(child));
3719
3720	retval += bus_print_child_footer(dev, child);
3721
3722	return (retval);
3723}
3724
3725static const struct
3726{
3727	int		class;
3728	int		subclass;
3729	int		report; /* 0 = bootverbose, 1 = always */
3730	const char	*desc;
3731} pci_nomatch_tab[] = {
3732	{PCIC_OLD,		-1,			1, "old"},
3733	{PCIC_OLD,		PCIS_OLD_NONVGA,	1, "non-VGA display device"},
3734	{PCIC_OLD,		PCIS_OLD_VGA,		1, "VGA-compatible display device"},
3735	{PCIC_STORAGE,		-1,			1, "mass storage"},
3736	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	1, "SCSI"},
3737	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	1, "ATA"},
3738	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	1, "floppy disk"},
3739	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	1, "IPI"},
3740	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	1, "RAID"},
3741	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	1, "ATA (ADMA)"},
3742	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	1, "SATA"},
3743	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	1, "SAS"},
3744	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	1, "NVM"},
3745	{PCIC_NETWORK,		-1,			1, "network"},
3746	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	1, "ethernet"},
3747	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	1, "token ring"},
3748	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	1, "fddi"},
3749	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	1, "ATM"},
3750	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	1, "ISDN"},
3751	{PCIC_DISPLAY,		-1,			1, "display"},
3752	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	1, "VGA"},
3753	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	1, "XGA"},
3754	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	1, "3D"},
3755	{PCIC_MULTIMEDIA,	-1,			1, "multimedia"},
3756	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	1, "video"},
3757	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	1, "audio"},
3758	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	1, "telephony"},
3759	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	1, "HDA"},
3760	{PCIC_MEMORY,		-1,			1, "memory"},
3761	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	1, "RAM"},
3762	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	1, "flash"},
3763	{PCIC_BRIDGE,		-1,			1, "bridge"},
3764	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	1, "HOST-PCI"},
3765	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	1, "PCI-ISA"},
3766	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	1, "PCI-EISA"},
3767	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	1, "PCI-MCA"},
3768	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	1, "PCI-PCI"},
3769	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	1, "PCI-PCMCIA"},
3770	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	1, "PCI-NuBus"},
3771	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	1, "PCI-CardBus"},
3772	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	1, "PCI-RACEway"},
3773	{PCIC_SIMPLECOMM,	-1,			1, "simple comms"},
3774	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	1, "UART"},	/* could detect 16550 */
3775	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	1, "parallel port"},
3776	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	1, "multiport serial"},
3777	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	1, "generic modem"},
3778	{PCIC_BASEPERIPH,	-1,			0, "base peripheral"},
3779	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	1, "interrupt controller"},
3780	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	1, "DMA controller"},
3781	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	1, "timer"},
3782	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	1, "realtime clock"},
3783	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	1, "PCI hot-plug controller"},
3784	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	1, "SD host controller"},
3785	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_IOMMU,	1, "IOMMU"},
3786	{PCIC_INPUTDEV,		-1,			1, "input device"},
3787	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	1, "keyboard"},
3788	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,1, "digitizer"},
3789	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	1, "mouse"},
3790	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	1, "scanner"},
3791	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	1, "gameport"},
3792	{PCIC_DOCKING,		-1,			1, "docking station"},
3793	{PCIC_PROCESSOR,	-1,			1, "processor"},
3794	{PCIC_SERIALBUS,	-1,			1, "serial bus"},
3795	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	1, "FireWire"},
3796	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	1, "AccessBus"},
3797	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	1, "SSA"},
3798	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	1, "USB"},
3799	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	1, "Fibre Channel"},
3800	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	0, "SMBus"},
3801	{PCIC_WIRELESS,		-1,			1, "wireless controller"},
3802	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	1, "iRDA"},
3803	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	1, "IR"},
3804	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	1, "RF"},
3805	{PCIC_INTELLIIO,	-1,			1, "intelligent I/O controller"},
3806	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	1, "I2O"},
3807	{PCIC_SATCOM,		-1,			1, "satellite communication"},
3808	{PCIC_SATCOM,		PCIS_SATCOM_TV,		1, "sat TV"},
3809	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	1, "sat audio"},
3810	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	1, "sat voice"},
3811	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	1, "sat data"},
3812	{PCIC_CRYPTO,		-1,			1, "encrypt/decrypt"},
3813	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	1, "network/computer crypto"},
3814	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	1, "entertainment crypto"},
3815	{PCIC_DASP,		-1,			0, "dasp"},
3816	{PCIC_DASP,		PCIS_DASP_DPIO,		1, "DPIO module"},
3817	{0, 0, 0,		NULL}
3818};
3819
3820void
3821pci_probe_nomatch(device_t dev, device_t child)
3822{
3823	int i, report;
3824	const char *cp, *scp;
3825	char *device;
3826
3827	/*
3828	 * Look for a listing for this device in a loaded device database.
3829	 */
3830	report = 1;
3831	if ((device = pci_describe_device(child)) != NULL) {
3832		device_printf(dev, "<%s>", device);
3833		free(device, M_DEVBUF);
3834	} else {
3835		/*
3836		 * Scan the class/subclass descriptions for a general
3837		 * description.
3838		 */
3839		cp = "unknown";
3840		scp = NULL;
3841		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3842			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3843				if (pci_nomatch_tab[i].subclass == -1) {
3844					cp = pci_nomatch_tab[i].desc;
3845					report = pci_nomatch_tab[i].report;
3846				} else if (pci_nomatch_tab[i].subclass ==
3847				    pci_get_subclass(child)) {
3848					scp = pci_nomatch_tab[i].desc;
3849					report = pci_nomatch_tab[i].report;
3850				}
3851			}
3852		}
3853		if (report || bootverbose) {
3854			device_printf(dev, "<%s%s%s>",
3855			    cp ? cp : "",
3856			    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3857			    scp ? scp : "");
3858		}
3859	}
3860	if (report || bootverbose) {
3861		printf(" at device %d.%d (no driver attached)\n",
3862		    pci_get_slot(child), pci_get_function(child));
3863	}
3864	pci_cfg_save(child, device_get_ivars(child), 1);
3865}
3866
3867void
3868pci_child_detached(device_t dev, device_t child)
3869{
3870	struct pci_devinfo *dinfo;
3871	struct resource_list *rl;
3872
3873	dinfo = device_get_ivars(child);
3874	rl = &dinfo->resources;
3875
3876	/*
3877	 * Have to deallocate IRQs before releasing any MSI messages and
3878	 * have to release MSI messages before deallocating any memory
3879	 * BARs.
3880	 */
3881	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
3882		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
3883	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
3884		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
3885		(void)pci_release_msi(child);
3886	}
3887	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
3888		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
3889	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
3890		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
3891
3892	pci_cfg_save(child, dinfo, 1);
3893}
3894
3895/*
3896 * Parse the PCI device database, if loaded, and return a pointer to a
3897 * description of the device.
3898 *
3899 * The database is flat text formatted as follows:
3900 *
3901 * Any line not in a valid format is ignored.
3902 * Lines are terminated with newline '\n' characters.
3903 *
3904 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3905 * the vendor name.
3906 *
3907 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3908 * - devices cannot be listed without a corresponding VENDOR line.
3909 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3910 * another TAB, then the device name.
3911 */
3912
3913/*
3914 * Assuming (ptr) points to the beginning of a line in the database,
3915 * return the vendor or device and description of the next entry.
3916 * The value of (vendor) or (device) inappropriate for the entry type
3917 * is set to -1.  Returns nonzero at the end of the database.
3918 *
3919 * Note that this is slightly unrobust in the face of corrupt data;
3920 * we attempt to safeguard against this by spamming the end of the
3921 * database with a newline when we initialise.
3922 */
3923static int
3924pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3925{
3926	char	*cp = *ptr;
3927	int	left;
3928
3929	*device = -1;
3930	*vendor = -1;
3931	**desc = '\0';
3932	for (;;) {
3933		left = pci_vendordata_size - (cp - pci_vendordata);
3934		if (left <= 0) {
3935			*ptr = cp;
3936			return(1);
3937		}
3938
3939		/* vendor entry? */
3940		if (*cp != '\t' &&
3941		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3942			break;
3943		/* device entry? */
3944		if (*cp == '\t' &&
3945		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3946			break;
3947
3948		/* skip to next line */
3949		while (*cp != '\n' && left > 0) {
3950			cp++;
3951			left--;
3952		}
3953		if (*cp == '\n') {
3954			cp++;
3955			left--;
3956		}
3957	}
3958	/* skip to next line */
3959	while (*cp != '\n' && left > 0) {
3960		cp++;
3961		left--;
3962	}
3963	if (*cp == '\n' && left > 0)
3964		cp++;
3965	*ptr = cp;
3966	return(0);
3967}
3968
3969static char *
3970pci_describe_device(device_t dev)
3971{
3972	int	vendor, device;
3973	char	*desc, *vp, *dp, *line;
3974
3975	desc = vp = dp = NULL;
3976
3977	/*
3978	 * If we have no vendor data, we can't do anything.
3979	 */
3980	if (pci_vendordata == NULL)
3981		goto out;
3982
3983	/*
3984	 * Scan the vendor data looking for this device
3985	 */
3986	line = pci_vendordata;
3987	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3988		goto out;
3989	for (;;) {
3990		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3991			goto out;
3992		if (vendor == pci_get_vendor(dev))
3993			break;
3994	}
3995	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3996		goto out;
3997	for (;;) {
3998		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3999			*dp = 0;
4000			break;
4001		}
4002		if (vendor != -1) {
4003			*dp = 0;
4004			break;
4005		}
4006		if (device == pci_get_device(dev))
4007			break;
4008	}
4009	if (dp[0] == '\0')
4010		snprintf(dp, 80, "0x%x", pci_get_device(dev));
4011	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
4012	    NULL)
4013		sprintf(desc, "%s, %s", vp, dp);
4014out:
4015	if (vp != NULL)
4016		free(vp, M_DEVBUF);
4017	if (dp != NULL)
4018		free(dp, M_DEVBUF);
4019	return(desc);
4020}
4021
4022int
4023pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4024{
4025	struct pci_devinfo *dinfo;
4026	pcicfgregs *cfg;
4027
4028	dinfo = device_get_ivars(child);
4029	cfg = &dinfo->cfg;
4030
4031	switch (which) {
4032	case PCI_IVAR_ETHADDR:
4033		/*
4034		 * The generic accessor doesn't deal with failure, so
4035		 * we set the return value, then return an error.
4036		 */
4037		*((uint8_t **) result) = NULL;
4038		return (EINVAL);
4039	case PCI_IVAR_SUBVENDOR:
4040		*result = cfg->subvendor;
4041		break;
4042	case PCI_IVAR_SUBDEVICE:
4043		*result = cfg->subdevice;
4044		break;
4045	case PCI_IVAR_VENDOR:
4046		*result = cfg->vendor;
4047		break;
4048	case PCI_IVAR_DEVICE:
4049		*result = cfg->device;
4050		break;
4051	case PCI_IVAR_DEVID:
4052		*result = (cfg->device << 16) | cfg->vendor;
4053		break;
4054	case PCI_IVAR_CLASS:
4055		*result = cfg->baseclass;
4056		break;
4057	case PCI_IVAR_SUBCLASS:
4058		*result = cfg->subclass;
4059		break;
4060	case PCI_IVAR_PROGIF:
4061		*result = cfg->progif;
4062		break;
4063	case PCI_IVAR_REVID:
4064		*result = cfg->revid;
4065		break;
4066	case PCI_IVAR_INTPIN:
4067		*result = cfg->intpin;
4068		break;
4069	case PCI_IVAR_IRQ:
4070		*result = cfg->intline;
4071		break;
4072	case PCI_IVAR_DOMAIN:
4073		*result = cfg->domain;
4074		break;
4075	case PCI_IVAR_BUS:
4076		*result = cfg->bus;
4077		break;
4078	case PCI_IVAR_SLOT:
4079		*result = cfg->slot;
4080		break;
4081	case PCI_IVAR_FUNCTION:
4082		*result = cfg->func;
4083		break;
4084	case PCI_IVAR_CMDREG:
4085		*result = cfg->cmdreg;
4086		break;
4087	case PCI_IVAR_CACHELNSZ:
4088		*result = cfg->cachelnsz;
4089		break;
4090	case PCI_IVAR_MINGNT:
4091		*result = cfg->mingnt;
4092		break;
4093	case PCI_IVAR_MAXLAT:
4094		*result = cfg->maxlat;
4095		break;
4096	case PCI_IVAR_LATTIMER:
4097		*result = cfg->lattimer;
4098		break;
4099	default:
4100		return (ENOENT);
4101	}
4102	return (0);
4103}
4104
4105int
4106pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4107{
4108	struct pci_devinfo *dinfo;
4109
4110	dinfo = device_get_ivars(child);
4111
4112	switch (which) {
4113	case PCI_IVAR_INTPIN:
4114		dinfo->cfg.intpin = value;
4115		return (0);
4116	case PCI_IVAR_ETHADDR:
4117	case PCI_IVAR_SUBVENDOR:
4118	case PCI_IVAR_SUBDEVICE:
4119	case PCI_IVAR_VENDOR:
4120	case PCI_IVAR_DEVICE:
4121	case PCI_IVAR_DEVID:
4122	case PCI_IVAR_CLASS:
4123	case PCI_IVAR_SUBCLASS:
4124	case PCI_IVAR_PROGIF:
4125	case PCI_IVAR_REVID:
4126	case PCI_IVAR_IRQ:
4127	case PCI_IVAR_DOMAIN:
4128	case PCI_IVAR_BUS:
4129	case PCI_IVAR_SLOT:
4130	case PCI_IVAR_FUNCTION:
4131		return (EINVAL);	/* disallow for now */
4132
4133	default:
4134		return (ENOENT);
4135	}
4136}
4137
4138#include "opt_ddb.h"
4139#ifdef DDB
4140#include <ddb/ddb.h>
4141#include <sys/cons.h>
4142
4143/*
4144 * List resources based on pci map registers, used for within ddb
4145 */
4146
4147DB_SHOW_COMMAND(pciregs, db_pci_dump)
4148{
4149	struct pci_devinfo *dinfo;
4150	struct devlist *devlist_head;
4151	struct pci_conf *p;
4152	const char *name;
4153	int i, error, none_count;
4154
4155	none_count = 0;
4156	/* get the head of the device queue */
4157	devlist_head = &pci_devq;
4158
4159	/*
4160	 * Go through the list of devices and print out devices
4161	 */
4162	for (error = 0, i = 0,
4163	     dinfo = STAILQ_FIRST(devlist_head);
4164	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4165	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4166
4167		/* Populate pd_name and pd_unit */
4168		name = NULL;
4169		if (dinfo->cfg.dev)
4170			name = device_get_name(dinfo->cfg.dev);
4171
4172		p = &dinfo->conf;
4173		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4174			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4175			(name && *name) ? name : "none",
4176			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4177			none_count++,
4178			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4179			p->pc_sel.pc_func, (p->pc_class << 16) |
4180			(p->pc_subclass << 8) | p->pc_progif,
4181			(p->pc_subdevice << 16) | p->pc_subvendor,
4182			(p->pc_device << 16) | p->pc_vendor,
4183			p->pc_revid, p->pc_hdr);
4184	}
4185}
4186#endif /* DDB */
4187
4188static struct resource *
4189pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4190    u_long start, u_long end, u_long count, u_int flags)
4191{
4192	struct pci_devinfo *dinfo = device_get_ivars(child);
4193	struct resource_list *rl = &dinfo->resources;
4194	struct resource *res;
4195	struct pci_map *pm;
4196	pci_addr_t map, testval;
4197	int mapsize;
4198
4199	res = NULL;
4200	pm = pci_find_bar(child, *rid);
4201	if (pm != NULL) {
4202		/* This is a BAR that we failed to allocate earlier. */
4203		mapsize = pm->pm_size;
4204		map = pm->pm_value;
4205	} else {
4206		/*
4207		 * Weed out the bogons, and figure out how large the
4208		 * BAR/map is.  BARs that read back 0 here are bogus
4209		 * and unimplemented.  Note: atapci in legacy mode are
4210		 * special and handled elsewhere in the code.  If you
4211		 * have a atapci device in legacy mode and it fails
4212		 * here, that other code is broken.
4213		 */
4214		pci_read_bar(child, *rid, &map, &testval);
4215
4216		/*
4217		 * Determine the size of the BAR and ignore BARs with a size
4218		 * of 0.  Device ROM BARs use a different mask value.
4219		 */
4220		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4221			mapsize = pci_romsize(testval);
4222		else
4223			mapsize = pci_mapsize(testval);
4224		if (mapsize == 0)
4225			goto out;
4226		pm = pci_add_bar(child, *rid, map, mapsize);
4227	}
4228
4229	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4230		if (type != SYS_RES_MEMORY) {
4231			if (bootverbose)
4232				device_printf(dev,
4233				    "child %s requested type %d for rid %#x,"
4234				    " but the BAR says it is an memio\n",
4235				    device_get_nameunit(child), type, *rid);
4236			goto out;
4237		}
4238	} else {
4239		if (type != SYS_RES_IOPORT) {
4240			if (bootverbose)
4241				device_printf(dev,
4242				    "child %s requested type %d for rid %#x,"
4243				    " but the BAR says it is an ioport\n",
4244				    device_get_nameunit(child), type, *rid);
4245			goto out;
4246		}
4247	}
4248
4249	/*
4250	 * For real BARs, we need to override the size that
4251	 * the driver requests, because that's what the BAR
4252	 * actually uses and we would otherwise have a
4253	 * situation where we might allocate the excess to
4254	 * another driver, which won't work.
4255	 */
4256	count = (pci_addr_t)1 << mapsize;
4257	if (RF_ALIGNMENT(flags) < mapsize)
4258		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4259	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4260		flags |= RF_PREFETCHABLE;
4261
4262	/*
4263	 * Allocate enough resource, and then write back the
4264	 * appropriate BAR for that resource.
4265	 */
4266	resource_list_add(rl, type, *rid, start, end, count);
4267	res = resource_list_reserve(rl, dev, child, type, rid, start, end,
4268	    count, flags & ~RF_ACTIVE);
4269	if (res == NULL) {
4270		resource_list_delete(rl, type, *rid);
4271		device_printf(child,
4272		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4273		    count, *rid, type, start, end);
4274		goto out;
4275	}
4276	if (bootverbose)
4277		device_printf(child,
4278		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4279		    count, *rid, type, rman_get_start(res));
4280	map = rman_get_start(res);
4281	pci_write_bar(child, pm, map);
4282out:
4283	return (res);
4284}
4285
4286struct resource *
4287pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4288		   u_long start, u_long end, u_long count, u_int flags)
4289{
4290	struct pci_devinfo *dinfo;
4291	struct resource_list *rl;
4292	struct resource_list_entry *rle;
4293	struct resource *res;
4294	pcicfgregs *cfg;
4295
4296	if (device_get_parent(child) != dev)
4297		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4298		    type, rid, start, end, count, flags));
4299
4300	/*
4301	 * Perform lazy resource allocation
4302	 */
4303	dinfo = device_get_ivars(child);
4304	rl = &dinfo->resources;
4305	cfg = &dinfo->cfg;
4306	switch (type) {
4307	case SYS_RES_IRQ:
4308		/*
4309		 * Can't alloc legacy interrupt once MSI messages have
4310		 * been allocated.
4311		 */
4312		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4313		    cfg->msix.msix_alloc > 0))
4314			return (NULL);
4315
4316		/*
4317		 * If the child device doesn't have an interrupt
4318		 * routed and is deserving of an interrupt, try to
4319		 * assign it one.
4320		 */
4321		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4322		    (cfg->intpin != 0))
4323			pci_assign_interrupt(dev, child, 0);
4324		break;
4325	case SYS_RES_IOPORT:
4326	case SYS_RES_MEMORY:
4327#ifdef NEW_PCIB
4328		/*
4329		 * PCI-PCI bridge I/O window resources are not BARs.
4330		 * For those allocations just pass the request up the
4331		 * tree.
4332		 */
4333		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4334			switch (*rid) {
4335			case PCIR_IOBASEL_1:
4336			case PCIR_MEMBASE_1:
4337			case PCIR_PMBASEL_1:
4338				/*
4339				 * XXX: Should we bother creating a resource
4340				 * list entry?
4341				 */
4342				return (bus_generic_alloc_resource(dev, child,
4343				    type, rid, start, end, count, flags));
4344			}
4345		}
4346#endif
4347		/* Reserve resources for this BAR if needed. */
4348		rle = resource_list_find(rl, type, *rid);
4349		if (rle == NULL) {
4350			res = pci_reserve_map(dev, child, type, rid, start, end,
4351			    count, flags);
4352			if (res == NULL)
4353				return (NULL);
4354		}
4355	}
4356	return (resource_list_alloc(rl, dev, child, type, rid,
4357	    start, end, count, flags));
4358}
4359
4360int
4361pci_release_resource(device_t dev, device_t child, int type, int rid,
4362    struct resource *r)
4363{
4364	struct pci_devinfo *dinfo;
4365	struct resource_list *rl;
4366	pcicfgregs *cfg;
4367
4368	if (device_get_parent(child) != dev)
4369		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4370		    type, rid, r));
4371
4372	dinfo = device_get_ivars(child);
4373	cfg = &dinfo->cfg;
4374#ifdef NEW_PCIB
4375	/*
4376	 * PCI-PCI bridge I/O window resources are not BARs.  For
4377	 * those allocations just pass the request up the tree.
4378	 */
4379	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4380	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4381		switch (rid) {
4382		case PCIR_IOBASEL_1:
4383		case PCIR_MEMBASE_1:
4384		case PCIR_PMBASEL_1:
4385			return (bus_generic_release_resource(dev, child, type,
4386			    rid, r));
4387		}
4388	}
4389#endif
4390
4391	rl = &dinfo->resources;
4392	return (resource_list_release(rl, dev, child, type, rid, r));
4393}
4394
4395int
4396pci_activate_resource(device_t dev, device_t child, int type, int rid,
4397    struct resource *r)
4398{
4399	struct pci_devinfo *dinfo;
4400	int error;
4401
4402	error = bus_generic_activate_resource(dev, child, type, rid, r);
4403	if (error)
4404		return (error);
4405
4406	/* Enable decoding in the command register when activating BARs. */
4407	if (device_get_parent(child) == dev) {
4408		/* Device ROMs need their decoding explicitly enabled. */
4409		dinfo = device_get_ivars(child);
4410		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4411			pci_write_bar(child, pci_find_bar(child, rid),
4412			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4413		switch (type) {
4414		case SYS_RES_IOPORT:
4415		case SYS_RES_MEMORY:
4416			error = PCI_ENABLE_IO(dev, child, type);
4417			break;
4418		}
4419	}
4420	return (error);
4421}
4422
4423int
4424pci_deactivate_resource(device_t dev, device_t child, int type,
4425    int rid, struct resource *r)
4426{
4427	struct pci_devinfo *dinfo;
4428	int error;
4429
4430	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4431	if (error)
4432		return (error);
4433
4434	/* Disable decoding for device ROMs. */
4435	if (device_get_parent(child) == dev) {
4436		dinfo = device_get_ivars(child);
4437		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4438			pci_write_bar(child, pci_find_bar(child, rid),
4439			    rman_get_start(r));
4440	}
4441	return (0);
4442}
4443
4444void
4445pci_delete_child(device_t dev, device_t child)
4446{
4447	struct resource_list_entry *rle;
4448	struct resource_list *rl;
4449	struct pci_devinfo *dinfo;
4450
4451	dinfo = device_get_ivars(child);
4452	rl = &dinfo->resources;
4453
4454	if (device_is_attached(child))
4455		device_detach(child);
4456
4457	/* Turn off access to resources we're about to free */
4458	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4459	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4460
4461	/* Free all allocated resources */
4462	STAILQ_FOREACH(rle, rl, link) {
4463		if (rle->res) {
4464			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4465			    resource_list_busy(rl, rle->type, rle->rid)) {
4466				pci_printf(&dinfo->cfg,
4467				    "Resource still owned, oops. "
4468				    "(type=%d, rid=%d, addr=%lx)\n",
4469				    rle->type, rle->rid,
4470				    rman_get_start(rle->res));
4471				bus_release_resource(child, rle->type, rle->rid,
4472				    rle->res);
4473			}
4474			resource_list_unreserve(rl, dev, child, rle->type,
4475			    rle->rid);
4476		}
4477	}
4478	resource_list_free(rl);
4479
4480	device_delete_child(dev, child);
4481	pci_freecfg(dinfo);
4482}
4483
4484void
4485pci_delete_resource(device_t dev, device_t child, int type, int rid)
4486{
4487	struct pci_devinfo *dinfo;
4488	struct resource_list *rl;
4489	struct resource_list_entry *rle;
4490
4491	if (device_get_parent(child) != dev)
4492		return;
4493
4494	dinfo = device_get_ivars(child);
4495	rl = &dinfo->resources;
4496	rle = resource_list_find(rl, type, rid);
4497	if (rle == NULL)
4498		return;
4499
4500	if (rle->res) {
4501		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4502		    resource_list_busy(rl, type, rid)) {
4503			device_printf(dev, "delete_resource: "
4504			    "Resource still owned by child, oops. "
4505			    "(type=%d, rid=%d, addr=%lx)\n",
4506			    type, rid, rman_get_start(rle->res));
4507			return;
4508		}
4509		resource_list_unreserve(rl, dev, child, type, rid);
4510	}
4511	resource_list_delete(rl, type, rid);
4512}
4513
4514struct resource_list *
4515pci_get_resource_list (device_t dev, device_t child)
4516{
4517	struct pci_devinfo *dinfo = device_get_ivars(child);
4518
4519	return (&dinfo->resources);
4520}
4521
4522bus_dma_tag_t
4523pci_get_dma_tag(device_t bus, device_t dev)
4524{
4525	struct pci_softc *sc = device_get_softc(bus);
4526
4527	return (sc->sc_dma_tag);
4528}
4529
4530uint32_t
4531pci_read_config_method(device_t dev, device_t child, int reg, int width)
4532{
4533	struct pci_devinfo *dinfo = device_get_ivars(child);
4534	pcicfgregs *cfg = &dinfo->cfg;
4535
4536	return (PCIB_READ_CONFIG(device_get_parent(dev),
4537	    cfg->bus, cfg->slot, cfg->func, reg, width));
4538}
4539
4540void
4541pci_write_config_method(device_t dev, device_t child, int reg,
4542    uint32_t val, int width)
4543{
4544	struct pci_devinfo *dinfo = device_get_ivars(child);
4545	pcicfgregs *cfg = &dinfo->cfg;
4546
4547	PCIB_WRITE_CONFIG(device_get_parent(dev),
4548	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4549}
4550
4551int
4552pci_child_location_str_method(device_t dev, device_t child, char *buf,
4553    size_t buflen)
4554{
4555
4556	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4557	    pci_get_function(child));
4558	return (0);
4559}
4560
4561int
4562pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4563    size_t buflen)
4564{
4565	struct pci_devinfo *dinfo;
4566	pcicfgregs *cfg;
4567
4568	dinfo = device_get_ivars(child);
4569	cfg = &dinfo->cfg;
4570	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4571	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4572	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4573	    cfg->progif);
4574	return (0);
4575}
4576
4577int
4578pci_assign_interrupt_method(device_t dev, device_t child)
4579{
4580	struct pci_devinfo *dinfo = device_get_ivars(child);
4581	pcicfgregs *cfg = &dinfo->cfg;
4582
4583	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4584	    cfg->intpin));
4585}
4586
4587static int
4588pci_modevent(module_t mod, int what, void *arg)
4589{
4590	static struct cdev *pci_cdev;
4591
4592	switch (what) {
4593	case MOD_LOAD:
4594		STAILQ_INIT(&pci_devq);
4595		pci_generation = 0;
4596		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4597		    "pci");
4598		pci_load_vendor_data();
4599		break;
4600
4601	case MOD_UNLOAD:
4602		destroy_dev(pci_cdev);
4603		break;
4604	}
4605
4606	return (0);
4607}
4608
4609static void
4610pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4611{
4612#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4613	struct pcicfg_pcie *cfg;
4614	int version, pos;
4615
4616	cfg = &dinfo->cfg.pcie;
4617	pos = cfg->pcie_location;
4618
4619	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4620
4621	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4622
4623	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4624	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4625	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4626		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4627
4628	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4629	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4630	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4631		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4632
4633	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4634	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4635		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4636
4637	if (version > 1) {
4638		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4639		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4640		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4641	}
4642#undef WREG
4643}
4644
4645static void
4646pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4647{
4648	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4649	    dinfo->cfg.pcix.pcix_command,  2);
4650}
4651
4652void
4653pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4654{
4655
4656	/*
4657	 * Only do header type 0 devices.  Type 1 devices are bridges,
4658	 * which we know need special treatment.  Type 2 devices are
4659	 * cardbus bridges which also require special treatment.
4660	 * Other types are unknown, and we err on the side of safety
4661	 * by ignoring them.
4662	 */
4663	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4664		return;
4665
4666	/*
4667	 * Restore the device to full power mode.  We must do this
4668	 * before we restore the registers because moving from D3 to
4669	 * D0 will cause the chip's BARs and some other registers to
4670	 * be reset to some unknown power on reset values.  Cut down
4671	 * the noise on boot by doing nothing if we are already in
4672	 * state D0.
4673	 */
4674	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4675		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4676	pci_restore_bars(dev);
4677	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4678	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4679	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4680	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4681	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4682	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4683	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4684	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4685	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4686
4687	/*
4688	 * Restore extended capabilities for PCI-Express and PCI-X
4689	 */
4690	if (dinfo->cfg.pcie.pcie_location != 0)
4691		pci_cfg_restore_pcie(dev, dinfo);
4692	if (dinfo->cfg.pcix.pcix_location != 0)
4693		pci_cfg_restore_pcix(dev, dinfo);
4694
4695	/* Restore MSI and MSI-X configurations if they are present. */
4696	if (dinfo->cfg.msi.msi_location != 0)
4697		pci_resume_msi(dev);
4698	if (dinfo->cfg.msix.msix_location != 0)
4699		pci_resume_msix(dev);
4700}
4701
4702static void
4703pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4704{
4705#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4706	struct pcicfg_pcie *cfg;
4707	int version, pos;
4708
4709	cfg = &dinfo->cfg.pcie;
4710	pos = cfg->pcie_location;
4711
4712	cfg->pcie_flags = RREG(PCIER_FLAGS);
4713
4714	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4715
4716	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4717
4718	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4719	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4720	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4721		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4722
4723	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4724	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4725	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4726		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4727
4728	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4729	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4730		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4731
4732	if (version > 1) {
4733		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4734		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4735		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4736	}
4737#undef RREG
4738}
4739
4740static void
4741pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4742{
4743	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4744	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4745}
4746
4747void
4748pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4749{
4750	uint32_t cls;
4751	int ps;
4752
4753	/*
4754	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4755	 * we know need special treatment.  Type 2 devices are cardbus bridges
4756	 * which also require special treatment.  Other types are unknown, and
4757	 * we err on the side of safety by ignoring them.  Powering down
4758	 * bridges should not be undertaken lightly.
4759	 */
4760	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4761		return;
4762
4763	/*
4764	 * Some drivers apparently write to these registers w/o updating our
4765	 * cached copy.  No harm happens if we update the copy, so do so here
4766	 * so we can restore them.  The COMMAND register is modified by the
4767	 * bus w/o updating the cache.  This should represent the normally
4768	 * writable portion of the 'defined' part of type 0 headers.  In
4769	 * theory we also need to save/restore the PCI capability structures
4770	 * we know about, but apart from power we don't know any that are
4771	 * writable.
4772	 */
4773	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4774	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4775	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4776	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4777	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4778	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4779	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4780	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4781	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4782	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4783	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4784	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4785	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4786	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4787	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4788
4789	if (dinfo->cfg.pcie.pcie_location != 0)
4790		pci_cfg_save_pcie(dev, dinfo);
4791
4792	if (dinfo->cfg.pcix.pcix_location != 0)
4793		pci_cfg_save_pcix(dev, dinfo);
4794
4795	/*
4796	 * don't set the state for display devices, base peripherals and
4797	 * memory devices since bad things happen when they are powered down.
4798	 * We should (a) have drivers that can easily detach and (b) use
4799	 * generic drivers for these devices so that some device actually
4800	 * attaches.  We need to make sure that when we implement (a) we don't
4801	 * power the device down on a reattach.
4802	 */
4803	cls = pci_get_class(dev);
4804	if (!setstate)
4805		return;
4806	switch (pci_do_power_nodriver)
4807	{
4808		case 0:		/* NO powerdown at all */
4809			return;
4810		case 1:		/* Conservative about what to power down */
4811			if (cls == PCIC_STORAGE)
4812				return;
4813			/*FALLTHROUGH*/
4814		case 2:		/* Agressive about what to power down */
4815			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4816			    cls == PCIC_BASEPERIPH)
4817				return;
4818			/*FALLTHROUGH*/
4819		case 3:		/* Power down everything */
4820			break;
4821	}
4822	/*
4823	 * PCI spec says we can only go into D3 state from D0 state.
4824	 * Transition from D[12] into D0 before going to D3 state.
4825	 */
4826	ps = pci_get_powerstate(dev);
4827	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4828		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4829	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4830		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4831}
4832
4833/* Wrapper APIs suitable for device driver use. */
4834void
4835pci_save_state(device_t dev)
4836{
4837	struct pci_devinfo *dinfo;
4838
4839	dinfo = device_get_ivars(dev);
4840	pci_cfg_save(dev, dinfo, 0);
4841}
4842
4843void
4844pci_restore_state(device_t dev)
4845{
4846	struct pci_devinfo *dinfo;
4847
4848	dinfo = device_get_ivars(dev);
4849	pci_cfg_restore(dev, dinfo);
4850}
4851