pci.c revision 262134
1/*-
2 * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3 * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4 * Copyright (c) 2000, BSDi
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/dev/pci/pci.c 262134 2014-02-17 22:19:49Z jhb $");
31
32#include "opt_bus.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/linker.h>
39#include <sys/fcntl.h>
40#include <sys/conf.h>
41#include <sys/kernel.h>
42#include <sys/queue.h>
43#include <sys/sysctl.h>
44#include <sys/endian.h>
45
46#include <vm/vm.h>
47#include <vm/pmap.h>
48#include <vm/vm_extern.h>
49
50#include <sys/bus.h>
51#include <machine/bus.h>
52#include <sys/rman.h>
53#include <machine/resource.h>
54#include <machine/stdarg.h>
55
56#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57#include <machine/intr_machdep.h>
58#endif
59
60#include <sys/pciio.h>
61#include <dev/pci/pcireg.h>
62#include <dev/pci/pcivar.h>
63#include <dev/pci/pci_private.h>
64
65#include <dev/usb/controller/xhcireg.h>
66#include <dev/usb/controller/ehcireg.h>
67#include <dev/usb/controller/ohcireg.h>
68#include <dev/usb/controller/uhcireg.h>
69
70#include "pcib_if.h"
71#include "pci_if.h"
72
73#define	PCIR_IS_BIOS(cfg, reg)						\
74	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76
77static int		pci_has_quirk(uint32_t devid, int quirk);
78static pci_addr_t	pci_mapbase(uint64_t mapreg);
79static const char	*pci_maptype(uint64_t mapreg);
80static int		pci_mapsize(uint64_t testval);
81static int		pci_maprange(uint64_t mapreg);
82static pci_addr_t	pci_rombase(uint64_t mapreg);
83static int		pci_romsize(uint64_t testval);
84static void		pci_fixancient(pcicfgregs *cfg);
85static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86
87static int		pci_porten(device_t dev);
88static int		pci_memen(device_t dev);
89static void		pci_assign_interrupt(device_t bus, device_t dev,
90			    int force_route);
91static int		pci_add_map(device_t bus, device_t dev, int reg,
92			    struct resource_list *rl, int force, int prefetch);
93static int		pci_probe(device_t dev);
94static int		pci_attach(device_t dev);
95static void		pci_load_vendor_data(void);
96static int		pci_describe_parse_line(char **ptr, int *vendor,
97			    int *device, char **desc);
98static char		*pci_describe_device(device_t dev);
99static int		pci_modevent(module_t mod, int what, void *arg);
100static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101			    pcicfgregs *cfg);
102static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
103static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104			    int reg, uint32_t *data);
105#if 0
106static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107			    int reg, uint32_t data);
108#endif
109static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110static void		pci_disable_msi(device_t dev);
111static void		pci_enable_msi(device_t dev, uint64_t address,
112			    uint16_t data);
113static void		pci_enable_msix(device_t dev, u_int index,
114			    uint64_t address, uint32_t data);
115static void		pci_mask_msix(device_t dev, u_int index);
116static void		pci_unmask_msix(device_t dev, u_int index);
117static int		pci_msi_blacklisted(void);
118static int		pci_msix_blacklisted(void);
119static void		pci_resume_msi(device_t dev);
120static void		pci_resume_msix(device_t dev);
121static int		pci_remap_intr_method(device_t bus, device_t dev,
122			    u_int irq);
123
124static device_method_t pci_methods[] = {
125	/* Device interface */
126	DEVMETHOD(device_probe,		pci_probe),
127	DEVMETHOD(device_attach,	pci_attach),
128	DEVMETHOD(device_detach,	bus_generic_detach),
129	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
130	DEVMETHOD(device_suspend,	pci_suspend),
131	DEVMETHOD(device_resume,	pci_resume),
132
133	/* Bus interface */
134	DEVMETHOD(bus_print_child,	pci_print_child),
135	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
136	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
137	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
138	DEVMETHOD(bus_driver_added,	pci_driver_added),
139	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
140	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
141
142	DEVMETHOD(bus_get_dma_tag,	pci_get_dma_tag),
143	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
144	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
145	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
146	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
147	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
148	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
149	DEVMETHOD(bus_release_resource,	pci_release_resource),
150	DEVMETHOD(bus_activate_resource, pci_activate_resource),
151	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
152	DEVMETHOD(bus_child_detached,	pci_child_detached),
153	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
154	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
155	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
156
157	/* PCI interface */
158	DEVMETHOD(pci_read_config,	pci_read_config_method),
159	DEVMETHOD(pci_write_config,	pci_write_config_method),
160	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
161	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
162	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
163	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
164	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
165	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
166	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
167	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
168	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
169	DEVMETHOD(pci_find_cap,		pci_find_cap_method),
170	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
171	DEVMETHOD(pci_find_htcap,	pci_find_htcap_method),
172	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
173	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
174	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
175	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
176	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
177	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
178
179	DEVMETHOD_END
180};
181
182DEFINE_CLASS_0(pci, pci_driver, pci_methods, sizeof(struct pci_softc));
183
184static devclass_t pci_devclass;
185DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
186MODULE_VERSION(pci, 1);
187
188static char	*pci_vendordata;
189static size_t	pci_vendordata_size;
190
191struct pci_quirk {
192	uint32_t devid;	/* Vendor/device of the card */
193	int	type;
194#define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
195#define	PCI_QUIRK_DISABLE_MSI	2 /* Neither MSI nor MSI-X work */
196#define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
197#define	PCI_QUIRK_UNMAP_REG	4 /* Ignore PCI map register */
198#define	PCI_QUIRK_DISABLE_MSIX	5 /* MSI-X doesn't work */
199	int	arg1;
200	int	arg2;
201};
202
203static const struct pci_quirk pci_quirks[] = {
204	/* The Intel 82371AB and 82443MX have a map register at offset 0x90. */
205	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
206	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
207	/* As does the Serverworks OSB4 (the SMBus mapping register) */
208	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
209
210	/*
211	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
212	 * or the CMIC-SL (AKA ServerWorks GC_LE).
213	 */
214	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216
217	/*
218	 * MSI doesn't work on earlier Intel chipsets including
219	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
220	 */
221	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
223	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
224	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
225	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228
229	/*
230	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
231	 * bridge.
232	 */
233	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234
235	/*
236	 * MSI-X allocation doesn't work properly for devices passed through
237	 * by VMware up to at least ESXi 5.1.
238	 */
239	{ 0x079015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCI/PCI-X */
240	{ 0x07a015ad, PCI_QUIRK_DISABLE_MSIX,	0,	0 }, /* PCIe */
241
242	/*
243	 * Some virtualization environments emulate an older chipset
244	 * but support MSI just fine.  QEMU uses the Intel 82440.
245	 */
246	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
247
248	/*
249	 * HPET MMIO base address may appear in Bar1 for AMD SB600 SMBus
250	 * controller depending on SoftPciRst register (PM_IO 0x55 [7]).
251	 * It prevents us from attaching hpet(4) when the bit is unset.
252	 * Note this quirk only affects SB600 revision A13 and earlier.
253	 * For SB600 A21 and later, firmware must set the bit to hide it.
254	 * For SB700 and later, it is unused and hardcoded to zero.
255	 */
256	{ 0x43851002, PCI_QUIRK_UNMAP_REG,	0x14,	0 },
257
258	{ 0 }
259};
260
261/* map register information */
262#define	PCI_MAPMEM	0x01	/* memory map */
263#define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
264#define	PCI_MAPPORT	0x04	/* port map */
265
266struct devlist pci_devq;
267uint32_t pci_generation;
268uint32_t pci_numdevs = 0;
269static int pcie_chipset, pcix_chipset;
270
271/* sysctl vars */
272SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
273
274static int pci_enable_io_modes = 1;
275TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
276SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
277    &pci_enable_io_modes, 1,
278    "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
279enable these bits correctly.  We'd like to do this all the time, but there\n\
280are some peripherals that this causes problems with.");
281
282static int pci_do_realloc_bars = 0;
283TUNABLE_INT("hw.pci.realloc_bars", &pci_do_realloc_bars);
284SYSCTL_INT(_hw_pci, OID_AUTO, realloc_bars, CTLFLAG_RW,
285    &pci_do_realloc_bars, 0,
286    "Attempt to allocate a new range for any BARs whose original firmware-assigned ranges fail to allocate during the initial device scan.");
287
288static int pci_do_power_nodriver = 0;
289TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
290SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
291    &pci_do_power_nodriver, 0,
292  "Place a function into D3 state when no driver attaches to it.  0 means\n\
293disable.  1 means conservatively place devices into D3 state.  2 means\n\
294agressively place devices into D3 state.  3 means put absolutely everything\n\
295in D3 state.");
296
297int pci_do_power_resume = 1;
298TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
299SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
300    &pci_do_power_resume, 1,
301  "Transition from D3 -> D0 on resume.");
302
303int pci_do_power_suspend = 1;
304TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
305SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
306    &pci_do_power_suspend, 1,
307  "Transition from D0 -> D3 on suspend.");
308
309static int pci_do_msi = 1;
310TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
311SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
312    "Enable support for MSI interrupts");
313
314static int pci_do_msix = 1;
315TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
316SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
317    "Enable support for MSI-X interrupts");
318
319static int pci_honor_msi_blacklist = 1;
320TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
321SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
322    &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI/MSI-X");
323
324#if defined(__i386__) || defined(__amd64__)
325static int pci_usb_takeover = 1;
326#else
327static int pci_usb_takeover = 0;
328#endif
329TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
330SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
331    &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
332Disable this if you depend on BIOS emulation of USB devices, that is\n\
333you use USB devices (like keyboard or mouse) but do not load USB drivers");
334
335static int
336pci_has_quirk(uint32_t devid, int quirk)
337{
338	const struct pci_quirk *q;
339
340	for (q = &pci_quirks[0]; q->devid; q++) {
341		if (q->devid == devid && q->type == quirk)
342			return (1);
343	}
344	return (0);
345}
346
347/* Find a device_t by bus/slot/function in domain 0 */
348
349device_t
350pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
351{
352
353	return (pci_find_dbsf(0, bus, slot, func));
354}
355
356/* Find a device_t by domain/bus/slot/function */
357
358device_t
359pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
360{
361	struct pci_devinfo *dinfo;
362
363	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
364		if ((dinfo->cfg.domain == domain) &&
365		    (dinfo->cfg.bus == bus) &&
366		    (dinfo->cfg.slot == slot) &&
367		    (dinfo->cfg.func == func)) {
368			return (dinfo->cfg.dev);
369		}
370	}
371
372	return (NULL);
373}
374
375/* Find a device_t by vendor/device ID */
376
377device_t
378pci_find_device(uint16_t vendor, uint16_t device)
379{
380	struct pci_devinfo *dinfo;
381
382	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
383		if ((dinfo->cfg.vendor == vendor) &&
384		    (dinfo->cfg.device == device)) {
385			return (dinfo->cfg.dev);
386		}
387	}
388
389	return (NULL);
390}
391
392device_t
393pci_find_class(uint8_t class, uint8_t subclass)
394{
395	struct pci_devinfo *dinfo;
396
397	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
398		if (dinfo->cfg.baseclass == class &&
399		    dinfo->cfg.subclass == subclass) {
400			return (dinfo->cfg.dev);
401		}
402	}
403
404	return (NULL);
405}
406
407static int
408pci_printf(pcicfgregs *cfg, const char *fmt, ...)
409{
410	va_list ap;
411	int retval;
412
413	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
414	    cfg->func);
415	va_start(ap, fmt);
416	retval += vprintf(fmt, ap);
417	va_end(ap);
418	return (retval);
419}
420
421/* return base address of memory or port map */
422
423static pci_addr_t
424pci_mapbase(uint64_t mapreg)
425{
426
427	if (PCI_BAR_MEM(mapreg))
428		return (mapreg & PCIM_BAR_MEM_BASE);
429	else
430		return (mapreg & PCIM_BAR_IO_BASE);
431}
432
433/* return map type of memory or port map */
434
435static const char *
436pci_maptype(uint64_t mapreg)
437{
438
439	if (PCI_BAR_IO(mapreg))
440		return ("I/O Port");
441	if (mapreg & PCIM_BAR_MEM_PREFETCH)
442		return ("Prefetchable Memory");
443	return ("Memory");
444}
445
446/* return log2 of map size decoded for memory or port map */
447
448static int
449pci_mapsize(uint64_t testval)
450{
451	int ln2size;
452
453	testval = pci_mapbase(testval);
454	ln2size = 0;
455	if (testval != 0) {
456		while ((testval & 1) == 0)
457		{
458			ln2size++;
459			testval >>= 1;
460		}
461	}
462	return (ln2size);
463}
464
465/* return base address of device ROM */
466
467static pci_addr_t
468pci_rombase(uint64_t mapreg)
469{
470
471	return (mapreg & PCIM_BIOS_ADDR_MASK);
472}
473
474/* return log2 of map size decided for device ROM */
475
476static int
477pci_romsize(uint64_t testval)
478{
479	int ln2size;
480
481	testval = pci_rombase(testval);
482	ln2size = 0;
483	if (testval != 0) {
484		while ((testval & 1) == 0)
485		{
486			ln2size++;
487			testval >>= 1;
488		}
489	}
490	return (ln2size);
491}
492
493/* return log2 of address range supported by map register */
494
495static int
496pci_maprange(uint64_t mapreg)
497{
498	int ln2range = 0;
499
500	if (PCI_BAR_IO(mapreg))
501		ln2range = 32;
502	else
503		switch (mapreg & PCIM_BAR_MEM_TYPE) {
504		case PCIM_BAR_MEM_32:
505			ln2range = 32;
506			break;
507		case PCIM_BAR_MEM_1MB:
508			ln2range = 20;
509			break;
510		case PCIM_BAR_MEM_64:
511			ln2range = 64;
512			break;
513		}
514	return (ln2range);
515}
516
517/* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
518
519static void
520pci_fixancient(pcicfgregs *cfg)
521{
522	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
523		return;
524
525	/* PCI to PCI bridges use header type 1 */
526	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
527		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
528}
529
530/* extract header type specific config data */
531
532static void
533pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
534{
535#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
536	switch (cfg->hdrtype & PCIM_HDRTYPE) {
537	case PCIM_HDRTYPE_NORMAL:
538		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
539		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
540		cfg->nummaps	    = PCI_MAXMAPS_0;
541		break;
542	case PCIM_HDRTYPE_BRIDGE:
543		cfg->nummaps	    = PCI_MAXMAPS_1;
544		break;
545	case PCIM_HDRTYPE_CARDBUS:
546		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
547		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
548		cfg->nummaps	    = PCI_MAXMAPS_2;
549		break;
550	}
551#undef REG
552}
553
554/* read configuration header into pcicfgregs structure */
555struct pci_devinfo *
556pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
557{
558#define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
559	pcicfgregs *cfg = NULL;
560	struct pci_devinfo *devlist_entry;
561	struct devlist *devlist_head;
562
563	devlist_head = &pci_devq;
564
565	devlist_entry = NULL;
566
567	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
568		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
569		if (devlist_entry == NULL)
570			return (NULL);
571
572		cfg = &devlist_entry->cfg;
573
574		cfg->domain		= d;
575		cfg->bus		= b;
576		cfg->slot		= s;
577		cfg->func		= f;
578		cfg->vendor		= REG(PCIR_VENDOR, 2);
579		cfg->device		= REG(PCIR_DEVICE, 2);
580		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
581		cfg->statreg		= REG(PCIR_STATUS, 2);
582		cfg->baseclass		= REG(PCIR_CLASS, 1);
583		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
584		cfg->progif		= REG(PCIR_PROGIF, 1);
585		cfg->revid		= REG(PCIR_REVID, 1);
586		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
587		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
588		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
589		cfg->intpin		= REG(PCIR_INTPIN, 1);
590		cfg->intline		= REG(PCIR_INTLINE, 1);
591
592		cfg->mingnt		= REG(PCIR_MINGNT, 1);
593		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
594
595		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
596		cfg->hdrtype		&= ~PCIM_MFDEV;
597		STAILQ_INIT(&cfg->maps);
598
599		pci_fixancient(cfg);
600		pci_hdrtypedata(pcib, b, s, f, cfg);
601
602		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
603			pci_read_cap(pcib, cfg);
604
605		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
606
607		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
608		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
609		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
610		devlist_entry->conf.pc_sel.pc_func = cfg->func;
611		devlist_entry->conf.pc_hdr = cfg->hdrtype;
612
613		devlist_entry->conf.pc_subvendor = cfg->subvendor;
614		devlist_entry->conf.pc_subdevice = cfg->subdevice;
615		devlist_entry->conf.pc_vendor = cfg->vendor;
616		devlist_entry->conf.pc_device = cfg->device;
617
618		devlist_entry->conf.pc_class = cfg->baseclass;
619		devlist_entry->conf.pc_subclass = cfg->subclass;
620		devlist_entry->conf.pc_progif = cfg->progif;
621		devlist_entry->conf.pc_revid = cfg->revid;
622
623		pci_numdevs++;
624		pci_generation++;
625	}
626	return (devlist_entry);
627#undef REG
628}
629
630static void
631pci_read_cap(device_t pcib, pcicfgregs *cfg)
632{
633#define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
634#define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
635#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
636	uint64_t addr;
637#endif
638	uint32_t val;
639	int	ptr, nextptr, ptrptr;
640
641	switch (cfg->hdrtype & PCIM_HDRTYPE) {
642	case PCIM_HDRTYPE_NORMAL:
643	case PCIM_HDRTYPE_BRIDGE:
644		ptrptr = PCIR_CAP_PTR;
645		break;
646	case PCIM_HDRTYPE_CARDBUS:
647		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
648		break;
649	default:
650		return;		/* no extended capabilities support */
651	}
652	nextptr = REG(ptrptr, 1);	/* sanity check? */
653
654	/*
655	 * Read capability entries.
656	 */
657	while (nextptr != 0) {
658		/* Sanity check */
659		if (nextptr > 255) {
660			printf("illegal PCI extended capability offset %d\n",
661			    nextptr);
662			return;
663		}
664		/* Find the next entry */
665		ptr = nextptr;
666		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
667
668		/* Process this entry */
669		switch (REG(ptr + PCICAP_ID, 1)) {
670		case PCIY_PMG:		/* PCI power management */
671			if (cfg->pp.pp_cap == 0) {
672				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
673				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
674				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
675				if ((nextptr - ptr) > PCIR_POWER_DATA)
676					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
677			}
678			break;
679		case PCIY_HT:		/* HyperTransport */
680			/* Determine HT-specific capability type. */
681			val = REG(ptr + PCIR_HT_COMMAND, 2);
682
683			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
684				cfg->ht.ht_slave = ptr;
685
686#if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
687			switch (val & PCIM_HTCMD_CAP_MASK) {
688			case PCIM_HTCAP_MSI_MAPPING:
689				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
690					/* Sanity check the mapping window. */
691					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
692					    4);
693					addr <<= 32;
694					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
695					    4);
696					if (addr != MSI_INTEL_ADDR_BASE)
697						device_printf(pcib,
698	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
699						    cfg->domain, cfg->bus,
700						    cfg->slot, cfg->func,
701						    (long long)addr);
702				} else
703					addr = MSI_INTEL_ADDR_BASE;
704
705				cfg->ht.ht_msimap = ptr;
706				cfg->ht.ht_msictrl = val;
707				cfg->ht.ht_msiaddr = addr;
708				break;
709			}
710#endif
711			break;
712		case PCIY_MSI:		/* PCI MSI */
713			cfg->msi.msi_location = ptr;
714			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
715			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
716						     PCIM_MSICTRL_MMC_MASK)>>1);
717			break;
718		case PCIY_MSIX:		/* PCI MSI-X */
719			cfg->msix.msix_location = ptr;
720			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
721			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
722			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
723			val = REG(ptr + PCIR_MSIX_TABLE, 4);
724			cfg->msix.msix_table_bar = PCIR_BAR(val &
725			    PCIM_MSIX_BIR_MASK);
726			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
727			val = REG(ptr + PCIR_MSIX_PBA, 4);
728			cfg->msix.msix_pba_bar = PCIR_BAR(val &
729			    PCIM_MSIX_BIR_MASK);
730			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
731			break;
732		case PCIY_VPD:		/* PCI Vital Product Data */
733			cfg->vpd.vpd_reg = ptr;
734			break;
735		case PCIY_SUBVENDOR:
736			/* Should always be true. */
737			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
738			    PCIM_HDRTYPE_BRIDGE) {
739				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
740				cfg->subvendor = val & 0xffff;
741				cfg->subdevice = val >> 16;
742			}
743			break;
744		case PCIY_PCIX:		/* PCI-X */
745			/*
746			 * Assume we have a PCI-X chipset if we have
747			 * at least one PCI-PCI bridge with a PCI-X
748			 * capability.  Note that some systems with
749			 * PCI-express or HT chipsets might match on
750			 * this check as well.
751			 */
752			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
753			    PCIM_HDRTYPE_BRIDGE)
754				pcix_chipset = 1;
755			cfg->pcix.pcix_location = ptr;
756			break;
757		case PCIY_EXPRESS:	/* PCI-express */
758			/*
759			 * Assume we have a PCI-express chipset if we have
760			 * at least one PCI-express device.
761			 */
762			pcie_chipset = 1;
763			cfg->pcie.pcie_location = ptr;
764			val = REG(ptr + PCIER_FLAGS, 2);
765			cfg->pcie.pcie_type = val & PCIEM_FLAGS_TYPE;
766			break;
767		default:
768			break;
769		}
770	}
771
772#if defined(__powerpc__)
773	/*
774	 * Enable the MSI mapping window for all HyperTransport
775	 * slaves.  PCI-PCI bridges have their windows enabled via
776	 * PCIB_MAP_MSI().
777	 */
778	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
779	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
780		device_printf(pcib,
781	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
782		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
783		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
784		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
785		     2);
786	}
787#endif
788/* REG and WREG use carry through to next functions */
789}
790
791/*
792 * PCI Vital Product Data
793 */
794
795#define	PCI_VPD_TIMEOUT		1000000
796
797static int
798pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
799{
800	int count = PCI_VPD_TIMEOUT;
801
802	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
803
804	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
805
806	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
807		if (--count < 0)
808			return (ENXIO);
809		DELAY(1);	/* limit looping */
810	}
811	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
812
813	return (0);
814}
815
816#if 0
817static int
818pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
819{
820	int count = PCI_VPD_TIMEOUT;
821
822	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
823
824	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
825	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
826	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
827		if (--count < 0)
828			return (ENXIO);
829		DELAY(1);	/* limit looping */
830	}
831
832	return (0);
833}
834#endif
835
836#undef PCI_VPD_TIMEOUT
837
838struct vpd_readstate {
839	device_t	pcib;
840	pcicfgregs	*cfg;
841	uint32_t	val;
842	int		bytesinval;
843	int		off;
844	uint8_t		cksum;
845};
846
847static int
848vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
849{
850	uint32_t reg;
851	uint8_t byte;
852
853	if (vrs->bytesinval == 0) {
854		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
855			return (ENXIO);
856		vrs->val = le32toh(reg);
857		vrs->off += 4;
858		byte = vrs->val & 0xff;
859		vrs->bytesinval = 3;
860	} else {
861		vrs->val = vrs->val >> 8;
862		byte = vrs->val & 0xff;
863		vrs->bytesinval--;
864	}
865
866	vrs->cksum += byte;
867	*data = byte;
868	return (0);
869}
870
871static void
872pci_read_vpd(device_t pcib, pcicfgregs *cfg)
873{
874	struct vpd_readstate vrs;
875	int state;
876	int name;
877	int remain;
878	int i;
879	int alloc, off;		/* alloc/off for RO/W arrays */
880	int cksumvalid;
881	int dflen;
882	uint8_t byte;
883	uint8_t byte2;
884
885	/* init vpd reader */
886	vrs.bytesinval = 0;
887	vrs.off = 0;
888	vrs.pcib = pcib;
889	vrs.cfg = cfg;
890	vrs.cksum = 0;
891
892	state = 0;
893	name = remain = i = 0;	/* shut up stupid gcc */
894	alloc = off = 0;	/* shut up stupid gcc */
895	dflen = 0;		/* shut up stupid gcc */
896	cksumvalid = -1;
897	while (state >= 0) {
898		if (vpd_nextbyte(&vrs, &byte)) {
899			state = -2;
900			break;
901		}
902#if 0
903		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
904		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
905		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
906#endif
907		switch (state) {
908		case 0:		/* item name */
909			if (byte & 0x80) {
910				if (vpd_nextbyte(&vrs, &byte2)) {
911					state = -2;
912					break;
913				}
914				remain = byte2;
915				if (vpd_nextbyte(&vrs, &byte2)) {
916					state = -2;
917					break;
918				}
919				remain |= byte2 << 8;
920				if (remain > (0x7f*4 - vrs.off)) {
921					state = -1;
922					pci_printf(cfg,
923					    "invalid VPD data, remain %#x\n",
924					    remain);
925				}
926				name = byte & 0x7f;
927			} else {
928				remain = byte & 0x7;
929				name = (byte >> 3) & 0xf;
930			}
931			switch (name) {
932			case 0x2:	/* String */
933				cfg->vpd.vpd_ident = malloc(remain + 1,
934				    M_DEVBUF, M_WAITOK);
935				i = 0;
936				state = 1;
937				break;
938			case 0xf:	/* End */
939				state = -1;
940				break;
941			case 0x10:	/* VPD-R */
942				alloc = 8;
943				off = 0;
944				cfg->vpd.vpd_ros = malloc(alloc *
945				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
946				    M_WAITOK | M_ZERO);
947				state = 2;
948				break;
949			case 0x11:	/* VPD-W */
950				alloc = 8;
951				off = 0;
952				cfg->vpd.vpd_w = malloc(alloc *
953				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
954				    M_WAITOK | M_ZERO);
955				state = 5;
956				break;
957			default:	/* Invalid data, abort */
958				state = -1;
959				break;
960			}
961			break;
962
963		case 1:	/* Identifier String */
964			cfg->vpd.vpd_ident[i++] = byte;
965			remain--;
966			if (remain == 0)  {
967				cfg->vpd.vpd_ident[i] = '\0';
968				state = 0;
969			}
970			break;
971
972		case 2:	/* VPD-R Keyword Header */
973			if (off == alloc) {
974				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
975				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
976				    M_DEVBUF, M_WAITOK | M_ZERO);
977			}
978			cfg->vpd.vpd_ros[off].keyword[0] = byte;
979			if (vpd_nextbyte(&vrs, &byte2)) {
980				state = -2;
981				break;
982			}
983			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
984			if (vpd_nextbyte(&vrs, &byte2)) {
985				state = -2;
986				break;
987			}
988			cfg->vpd.vpd_ros[off].len = dflen = byte2;
989			if (dflen == 0 &&
990			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
991			    2) == 0) {
992				/*
993				 * if this happens, we can't trust the rest
994				 * of the VPD.
995				 */
996				pci_printf(cfg, "bad keyword length: %d\n",
997				    dflen);
998				cksumvalid = 0;
999				state = -1;
1000				break;
1001			} else if (dflen == 0) {
1002				cfg->vpd.vpd_ros[off].value = malloc(1 *
1003				    sizeof(*cfg->vpd.vpd_ros[off].value),
1004				    M_DEVBUF, M_WAITOK);
1005				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1006			} else
1007				cfg->vpd.vpd_ros[off].value = malloc(
1008				    (dflen + 1) *
1009				    sizeof(*cfg->vpd.vpd_ros[off].value),
1010				    M_DEVBUF, M_WAITOK);
1011			remain -= 3;
1012			i = 0;
1013			/* keep in sync w/ state 3's transistions */
1014			if (dflen == 0 && remain == 0)
1015				state = 0;
1016			else if (dflen == 0)
1017				state = 2;
1018			else
1019				state = 3;
1020			break;
1021
1022		case 3:	/* VPD-R Keyword Value */
1023			cfg->vpd.vpd_ros[off].value[i++] = byte;
1024			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1025			    "RV", 2) == 0 && cksumvalid == -1) {
1026				if (vrs.cksum == 0)
1027					cksumvalid = 1;
1028				else {
1029					if (bootverbose)
1030						pci_printf(cfg,
1031					    "bad VPD cksum, remain %hhu\n",
1032						    vrs.cksum);
1033					cksumvalid = 0;
1034					state = -1;
1035					break;
1036				}
1037			}
1038			dflen--;
1039			remain--;
1040			/* keep in sync w/ state 2's transistions */
1041			if (dflen == 0)
1042				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1043			if (dflen == 0 && remain == 0) {
1044				cfg->vpd.vpd_rocnt = off;
1045				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1046				    off * sizeof(*cfg->vpd.vpd_ros),
1047				    M_DEVBUF, M_WAITOK | M_ZERO);
1048				state = 0;
1049			} else if (dflen == 0)
1050				state = 2;
1051			break;
1052
1053		case 4:
1054			remain--;
1055			if (remain == 0)
1056				state = 0;
1057			break;
1058
1059		case 5:	/* VPD-W Keyword Header */
1060			if (off == alloc) {
1061				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1062				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1063				    M_DEVBUF, M_WAITOK | M_ZERO);
1064			}
1065			cfg->vpd.vpd_w[off].keyword[0] = byte;
1066			if (vpd_nextbyte(&vrs, &byte2)) {
1067				state = -2;
1068				break;
1069			}
1070			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1071			if (vpd_nextbyte(&vrs, &byte2)) {
1072				state = -2;
1073				break;
1074			}
1075			cfg->vpd.vpd_w[off].len = dflen = byte2;
1076			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1077			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1078			    sizeof(*cfg->vpd.vpd_w[off].value),
1079			    M_DEVBUF, M_WAITOK);
1080			remain -= 3;
1081			i = 0;
1082			/* keep in sync w/ state 6's transistions */
1083			if (dflen == 0 && remain == 0)
1084				state = 0;
1085			else if (dflen == 0)
1086				state = 5;
1087			else
1088				state = 6;
1089			break;
1090
1091		case 6:	/* VPD-W Keyword Value */
1092			cfg->vpd.vpd_w[off].value[i++] = byte;
1093			dflen--;
1094			remain--;
1095			/* keep in sync w/ state 5's transistions */
1096			if (dflen == 0)
1097				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1098			if (dflen == 0 && remain == 0) {
1099				cfg->vpd.vpd_wcnt = off;
1100				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1101				    off * sizeof(*cfg->vpd.vpd_w),
1102				    M_DEVBUF, M_WAITOK | M_ZERO);
1103				state = 0;
1104			} else if (dflen == 0)
1105				state = 5;
1106			break;
1107
1108		default:
1109			pci_printf(cfg, "invalid state: %d\n", state);
1110			state = -1;
1111			break;
1112		}
1113	}
1114
1115	if (cksumvalid == 0 || state < -1) {
1116		/* read-only data bad, clean up */
1117		if (cfg->vpd.vpd_ros != NULL) {
1118			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1119				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1120			free(cfg->vpd.vpd_ros, M_DEVBUF);
1121			cfg->vpd.vpd_ros = NULL;
1122		}
1123	}
1124	if (state < -1) {
1125		/* I/O error, clean up */
1126		pci_printf(cfg, "failed to read VPD data.\n");
1127		if (cfg->vpd.vpd_ident != NULL) {
1128			free(cfg->vpd.vpd_ident, M_DEVBUF);
1129			cfg->vpd.vpd_ident = NULL;
1130		}
1131		if (cfg->vpd.vpd_w != NULL) {
1132			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1133				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1134			free(cfg->vpd.vpd_w, M_DEVBUF);
1135			cfg->vpd.vpd_w = NULL;
1136		}
1137	}
1138	cfg->vpd.vpd_cached = 1;
1139#undef REG
1140#undef WREG
1141}
1142
1143int
1144pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1145{
1146	struct pci_devinfo *dinfo = device_get_ivars(child);
1147	pcicfgregs *cfg = &dinfo->cfg;
1148
1149	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1150		pci_read_vpd(device_get_parent(dev), cfg);
1151
1152	*identptr = cfg->vpd.vpd_ident;
1153
1154	if (*identptr == NULL)
1155		return (ENXIO);
1156
1157	return (0);
1158}
1159
1160int
1161pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1162	const char **vptr)
1163{
1164	struct pci_devinfo *dinfo = device_get_ivars(child);
1165	pcicfgregs *cfg = &dinfo->cfg;
1166	int i;
1167
1168	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1169		pci_read_vpd(device_get_parent(dev), cfg);
1170
1171	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1172		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1173		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1174			*vptr = cfg->vpd.vpd_ros[i].value;
1175			return (0);
1176		}
1177
1178	*vptr = NULL;
1179	return (ENXIO);
1180}
1181
1182struct pcicfg_vpd *
1183pci_fetch_vpd_list(device_t dev)
1184{
1185	struct pci_devinfo *dinfo = device_get_ivars(dev);
1186	pcicfgregs *cfg = &dinfo->cfg;
1187
1188	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1189		pci_read_vpd(device_get_parent(device_get_parent(dev)), cfg);
1190	return (&cfg->vpd);
1191}
1192
1193/*
1194 * Find the requested HyperTransport capability and return the offset
1195 * in configuration space via the pointer provided.  The function
1196 * returns 0 on success and an error code otherwise.
1197 */
1198int
1199pci_find_htcap_method(device_t dev, device_t child, int capability, int *capreg)
1200{
1201	int ptr, error;
1202	uint16_t val;
1203
1204	error = pci_find_cap(child, PCIY_HT, &ptr);
1205	if (error)
1206		return (error);
1207
1208	/*
1209	 * Traverse the capabilities list checking each HT capability
1210	 * to see if it matches the requested HT capability.
1211	 */
1212	while (ptr != 0) {
1213		val = pci_read_config(child, ptr + PCIR_HT_COMMAND, 2);
1214		if (capability == PCIM_HTCAP_SLAVE ||
1215		    capability == PCIM_HTCAP_HOST)
1216			val &= 0xe000;
1217		else
1218			val &= PCIM_HTCMD_CAP_MASK;
1219		if (val == capability) {
1220			if (capreg != NULL)
1221				*capreg = ptr;
1222			return (0);
1223		}
1224
1225		/* Skip to the next HT capability. */
1226		while (ptr != 0) {
1227			ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1228			if (pci_read_config(child, ptr + PCICAP_ID, 1) ==
1229			    PCIY_HT)
1230				break;
1231		}
1232	}
1233	return (ENOENT);
1234}
1235
1236/*
1237 * Find the requested capability and return the offset in
1238 * configuration space via the pointer provided.  The function returns
1239 * 0 on success and an error code otherwise.
1240 */
1241int
1242pci_find_cap_method(device_t dev, device_t child, int capability,
1243    int *capreg)
1244{
1245	struct pci_devinfo *dinfo = device_get_ivars(child);
1246	pcicfgregs *cfg = &dinfo->cfg;
1247	u_int32_t status;
1248	u_int8_t ptr;
1249
1250	/*
1251	 * Check the CAP_LIST bit of the PCI status register first.
1252	 */
1253	status = pci_read_config(child, PCIR_STATUS, 2);
1254	if (!(status & PCIM_STATUS_CAPPRESENT))
1255		return (ENXIO);
1256
1257	/*
1258	 * Determine the start pointer of the capabilities list.
1259	 */
1260	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1261	case PCIM_HDRTYPE_NORMAL:
1262	case PCIM_HDRTYPE_BRIDGE:
1263		ptr = PCIR_CAP_PTR;
1264		break;
1265	case PCIM_HDRTYPE_CARDBUS:
1266		ptr = PCIR_CAP_PTR_2;
1267		break;
1268	default:
1269		/* XXX: panic? */
1270		return (ENXIO);		/* no extended capabilities support */
1271	}
1272	ptr = pci_read_config(child, ptr, 1);
1273
1274	/*
1275	 * Traverse the capabilities list.
1276	 */
1277	while (ptr != 0) {
1278		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1279			if (capreg != NULL)
1280				*capreg = ptr;
1281			return (0);
1282		}
1283		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1284	}
1285
1286	return (ENOENT);
1287}
1288
1289/*
1290 * Find the requested extended capability and return the offset in
1291 * configuration space via the pointer provided.  The function returns
1292 * 0 on success and an error code otherwise.
1293 */
1294int
1295pci_find_extcap_method(device_t dev, device_t child, int capability,
1296    int *capreg)
1297{
1298	struct pci_devinfo *dinfo = device_get_ivars(child);
1299	pcicfgregs *cfg = &dinfo->cfg;
1300	uint32_t ecap;
1301	uint16_t ptr;
1302
1303	/* Only supported for PCI-express devices. */
1304	if (cfg->pcie.pcie_location == 0)
1305		return (ENXIO);
1306
1307	ptr = PCIR_EXTCAP;
1308	ecap = pci_read_config(child, ptr, 4);
1309	if (ecap == 0xffffffff || ecap == 0)
1310		return (ENOENT);
1311	for (;;) {
1312		if (PCI_EXTCAP_ID(ecap) == capability) {
1313			if (capreg != NULL)
1314				*capreg = ptr;
1315			return (0);
1316		}
1317		ptr = PCI_EXTCAP_NEXTPTR(ecap);
1318		if (ptr == 0)
1319			break;
1320		ecap = pci_read_config(child, ptr, 4);
1321	}
1322
1323	return (ENOENT);
1324}
1325
1326/*
1327 * Support for MSI-X message interrupts.
1328 */
1329void
1330pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1331{
1332	struct pci_devinfo *dinfo = device_get_ivars(dev);
1333	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1334	uint32_t offset;
1335
1336	KASSERT(msix->msix_table_len > index, ("bogus index"));
1337	offset = msix->msix_table_offset + index * 16;
1338	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1339	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1340	bus_write_4(msix->msix_table_res, offset + 8, data);
1341
1342	/* Enable MSI -> HT mapping. */
1343	pci_ht_map_msi(dev, address);
1344}
1345
1346void
1347pci_mask_msix(device_t dev, u_int index)
1348{
1349	struct pci_devinfo *dinfo = device_get_ivars(dev);
1350	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1351	uint32_t offset, val;
1352
1353	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1354	offset = msix->msix_table_offset + index * 16 + 12;
1355	val = bus_read_4(msix->msix_table_res, offset);
1356	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1357		val |= PCIM_MSIX_VCTRL_MASK;
1358		bus_write_4(msix->msix_table_res, offset, val);
1359	}
1360}
1361
1362void
1363pci_unmask_msix(device_t dev, u_int index)
1364{
1365	struct pci_devinfo *dinfo = device_get_ivars(dev);
1366	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1367	uint32_t offset, val;
1368
1369	KASSERT(msix->msix_table_len > index, ("bogus index"));
1370	offset = msix->msix_table_offset + index * 16 + 12;
1371	val = bus_read_4(msix->msix_table_res, offset);
1372	if (val & PCIM_MSIX_VCTRL_MASK) {
1373		val &= ~PCIM_MSIX_VCTRL_MASK;
1374		bus_write_4(msix->msix_table_res, offset, val);
1375	}
1376}
1377
1378int
1379pci_pending_msix(device_t dev, u_int index)
1380{
1381	struct pci_devinfo *dinfo = device_get_ivars(dev);
1382	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1383	uint32_t offset, bit;
1384
1385	KASSERT(msix->msix_table_len > index, ("bogus index"));
1386	offset = msix->msix_pba_offset + (index / 32) * 4;
1387	bit = 1 << index % 32;
1388	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1389}
1390
1391/*
1392 * Restore MSI-X registers and table during resume.  If MSI-X is
1393 * enabled then walk the virtual table to restore the actual MSI-X
1394 * table.
1395 */
1396static void
1397pci_resume_msix(device_t dev)
1398{
1399	struct pci_devinfo *dinfo = device_get_ivars(dev);
1400	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1401	struct msix_table_entry *mte;
1402	struct msix_vector *mv;
1403	int i;
1404
1405	if (msix->msix_alloc > 0) {
1406		/* First, mask all vectors. */
1407		for (i = 0; i < msix->msix_msgnum; i++)
1408			pci_mask_msix(dev, i);
1409
1410		/* Second, program any messages with at least one handler. */
1411		for (i = 0; i < msix->msix_table_len; i++) {
1412			mte = &msix->msix_table[i];
1413			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1414				continue;
1415			mv = &msix->msix_vectors[mte->mte_vector - 1];
1416			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1417			pci_unmask_msix(dev, i);
1418		}
1419	}
1420	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1421	    msix->msix_ctrl, 2);
1422}
1423
1424/*
1425 * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1426 * returned in *count.  After this function returns, each message will be
1427 * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1428 */
1429int
1430pci_alloc_msix_method(device_t dev, device_t child, int *count)
1431{
1432	struct pci_devinfo *dinfo = device_get_ivars(child);
1433	pcicfgregs *cfg = &dinfo->cfg;
1434	struct resource_list_entry *rle;
1435	int actual, error, i, irq, max;
1436
1437	/* Don't let count == 0 get us into trouble. */
1438	if (*count == 0)
1439		return (EINVAL);
1440
1441	/* If rid 0 is allocated, then fail. */
1442	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1443	if (rle != NULL && rle->res != NULL)
1444		return (ENXIO);
1445
1446	/* Already have allocated messages? */
1447	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1448		return (ENXIO);
1449
1450	/* If MSI-X is blacklisted for this system, fail. */
1451	if (pci_msix_blacklisted())
1452		return (ENXIO);
1453
1454	/* MSI-X capability present? */
1455	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1456		return (ENODEV);
1457
1458	/* Make sure the appropriate BARs are mapped. */
1459	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1460	    cfg->msix.msix_table_bar);
1461	if (rle == NULL || rle->res == NULL ||
1462	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1463		return (ENXIO);
1464	cfg->msix.msix_table_res = rle->res;
1465	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1466		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1467		    cfg->msix.msix_pba_bar);
1468		if (rle == NULL || rle->res == NULL ||
1469		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1470			return (ENXIO);
1471	}
1472	cfg->msix.msix_pba_res = rle->res;
1473
1474	if (bootverbose)
1475		device_printf(child,
1476		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1477		    *count, cfg->msix.msix_msgnum);
1478	max = min(*count, cfg->msix.msix_msgnum);
1479	for (i = 0; i < max; i++) {
1480		/* Allocate a message. */
1481		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1482		if (error) {
1483			if (i == 0)
1484				return (error);
1485			break;
1486		}
1487		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1488		    irq, 1);
1489	}
1490	actual = i;
1491
1492	if (bootverbose) {
1493		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1494		if (actual == 1)
1495			device_printf(child, "using IRQ %lu for MSI-X\n",
1496			    rle->start);
1497		else {
1498			int run;
1499
1500			/*
1501			 * Be fancy and try to print contiguous runs of
1502			 * IRQ values as ranges.  'irq' is the previous IRQ.
1503			 * 'run' is true if we are in a range.
1504			 */
1505			device_printf(child, "using IRQs %lu", rle->start);
1506			irq = rle->start;
1507			run = 0;
1508			for (i = 1; i < actual; i++) {
1509				rle = resource_list_find(&dinfo->resources,
1510				    SYS_RES_IRQ, i + 1);
1511
1512				/* Still in a run? */
1513				if (rle->start == irq + 1) {
1514					run = 1;
1515					irq++;
1516					continue;
1517				}
1518
1519				/* Finish previous range. */
1520				if (run) {
1521					printf("-%d", irq);
1522					run = 0;
1523				}
1524
1525				/* Start new range. */
1526				printf(",%lu", rle->start);
1527				irq = rle->start;
1528			}
1529
1530			/* Unfinished range? */
1531			if (run)
1532				printf("-%d", irq);
1533			printf(" for MSI-X\n");
1534		}
1535	}
1536
1537	/* Mask all vectors. */
1538	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1539		pci_mask_msix(child, i);
1540
1541	/* Allocate and initialize vector data and virtual table. */
1542	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1543	    M_DEVBUF, M_WAITOK | M_ZERO);
1544	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1545	    M_DEVBUF, M_WAITOK | M_ZERO);
1546	for (i = 0; i < actual; i++) {
1547		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1548		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1549		cfg->msix.msix_table[i].mte_vector = i + 1;
1550	}
1551
1552	/* Update control register to enable MSI-X. */
1553	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1554	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1555	    cfg->msix.msix_ctrl, 2);
1556
1557	/* Update counts of alloc'd messages. */
1558	cfg->msix.msix_alloc = actual;
1559	cfg->msix.msix_table_len = actual;
1560	*count = actual;
1561	return (0);
1562}
1563
1564/*
1565 * By default, pci_alloc_msix() will assign the allocated IRQ
1566 * resources consecutively to the first N messages in the MSI-X table.
1567 * However, device drivers may want to use different layouts if they
1568 * either receive fewer messages than they asked for, or they wish to
1569 * populate the MSI-X table sparsely.  This method allows the driver
1570 * to specify what layout it wants.  It must be called after a
1571 * successful pci_alloc_msix() but before any of the associated
1572 * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1573 *
1574 * The 'vectors' array contains 'count' message vectors.  The array
1575 * maps directly to the MSI-X table in that index 0 in the array
1576 * specifies the vector for the first message in the MSI-X table, etc.
1577 * The vector value in each array index can either be 0 to indicate
1578 * that no vector should be assigned to a message slot, or it can be a
1579 * number from 1 to N (where N is the count returned from a
1580 * succcessful call to pci_alloc_msix()) to indicate which message
1581 * vector (IRQ) to be used for the corresponding message.
1582 *
1583 * On successful return, each message with a non-zero vector will have
1584 * an associated SYS_RES_IRQ whose rid is equal to the array index +
1585 * 1.  Additionally, if any of the IRQs allocated via the previous
1586 * call to pci_alloc_msix() are not used in the mapping, those IRQs
1587 * will be freed back to the system automatically.
1588 *
1589 * For example, suppose a driver has a MSI-X table with 6 messages and
1590 * asks for 6 messages, but pci_alloc_msix() only returns a count of
1591 * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1592 * C.  After the call to pci_alloc_msix(), the device will be setup to
1593 * have an MSI-X table of ABC--- (where - means no vector assigned).
1594 * If the driver then passes a vector array of { 1, 0, 1, 2, 0, 2 },
1595 * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1596 * be freed back to the system.  This device will also have valid
1597 * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1598 *
1599 * In any case, the SYS_RES_IRQ rid X will always map to the message
1600 * at MSI-X table index X - 1 and will only be valid if a vector is
1601 * assigned to that table entry.
1602 */
1603int
1604pci_remap_msix_method(device_t dev, device_t child, int count,
1605    const u_int *vectors)
1606{
1607	struct pci_devinfo *dinfo = device_get_ivars(child);
1608	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1609	struct resource_list_entry *rle;
1610	int i, irq, j, *used;
1611
1612	/*
1613	 * Have to have at least one message in the table but the
1614	 * table can't be bigger than the actual MSI-X table in the
1615	 * device.
1616	 */
1617	if (count == 0 || count > msix->msix_msgnum)
1618		return (EINVAL);
1619
1620	/* Sanity check the vectors. */
1621	for (i = 0; i < count; i++)
1622		if (vectors[i] > msix->msix_alloc)
1623			return (EINVAL);
1624
1625	/*
1626	 * Make sure there aren't any holes in the vectors to be used.
1627	 * It's a big pain to support it, and it doesn't really make
1628	 * sense anyway.  Also, at least one vector must be used.
1629	 */
1630	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1631	    M_ZERO);
1632	for (i = 0; i < count; i++)
1633		if (vectors[i] != 0)
1634			used[vectors[i] - 1] = 1;
1635	for (i = 0; i < msix->msix_alloc - 1; i++)
1636		if (used[i] == 0 && used[i + 1] == 1) {
1637			free(used, M_DEVBUF);
1638			return (EINVAL);
1639		}
1640	if (used[0] != 1) {
1641		free(used, M_DEVBUF);
1642		return (EINVAL);
1643	}
1644
1645	/* Make sure none of the resources are allocated. */
1646	for (i = 0; i < msix->msix_table_len; i++) {
1647		if (msix->msix_table[i].mte_vector == 0)
1648			continue;
1649		if (msix->msix_table[i].mte_handlers > 0)
1650			return (EBUSY);
1651		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1652		KASSERT(rle != NULL, ("missing resource"));
1653		if (rle->res != NULL)
1654			return (EBUSY);
1655	}
1656
1657	/* Free the existing resource list entries. */
1658	for (i = 0; i < msix->msix_table_len; i++) {
1659		if (msix->msix_table[i].mte_vector == 0)
1660			continue;
1661		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1662	}
1663
1664	/*
1665	 * Build the new virtual table keeping track of which vectors are
1666	 * used.
1667	 */
1668	free(msix->msix_table, M_DEVBUF);
1669	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1670	    M_DEVBUF, M_WAITOK | M_ZERO);
1671	for (i = 0; i < count; i++)
1672		msix->msix_table[i].mte_vector = vectors[i];
1673	msix->msix_table_len = count;
1674
1675	/* Free any unused IRQs and resize the vectors array if necessary. */
1676	j = msix->msix_alloc - 1;
1677	if (used[j] == 0) {
1678		struct msix_vector *vec;
1679
1680		while (used[j] == 0) {
1681			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1682			    msix->msix_vectors[j].mv_irq);
1683			j--;
1684		}
1685		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1686		    M_WAITOK);
1687		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1688		    (j + 1));
1689		free(msix->msix_vectors, M_DEVBUF);
1690		msix->msix_vectors = vec;
1691		msix->msix_alloc = j + 1;
1692	}
1693	free(used, M_DEVBUF);
1694
1695	/* Map the IRQs onto the rids. */
1696	for (i = 0; i < count; i++) {
1697		if (vectors[i] == 0)
1698			continue;
1699		irq = msix->msix_vectors[vectors[i]].mv_irq;
1700		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1701		    irq, 1);
1702	}
1703
1704	if (bootverbose) {
1705		device_printf(child, "Remapped MSI-X IRQs as: ");
1706		for (i = 0; i < count; i++) {
1707			if (i != 0)
1708				printf(", ");
1709			if (vectors[i] == 0)
1710				printf("---");
1711			else
1712				printf("%d",
1713				    msix->msix_vectors[vectors[i]].mv_irq);
1714		}
1715		printf("\n");
1716	}
1717
1718	return (0);
1719}
1720
1721static int
1722pci_release_msix(device_t dev, device_t child)
1723{
1724	struct pci_devinfo *dinfo = device_get_ivars(child);
1725	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1726	struct resource_list_entry *rle;
1727	int i;
1728
1729	/* Do we have any messages to release? */
1730	if (msix->msix_alloc == 0)
1731		return (ENODEV);
1732
1733	/* Make sure none of the resources are allocated. */
1734	for (i = 0; i < msix->msix_table_len; i++) {
1735		if (msix->msix_table[i].mte_vector == 0)
1736			continue;
1737		if (msix->msix_table[i].mte_handlers > 0)
1738			return (EBUSY);
1739		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1740		KASSERT(rle != NULL, ("missing resource"));
1741		if (rle->res != NULL)
1742			return (EBUSY);
1743	}
1744
1745	/* Update control register to disable MSI-X. */
1746	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1747	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1748	    msix->msix_ctrl, 2);
1749
1750	/* Free the resource list entries. */
1751	for (i = 0; i < msix->msix_table_len; i++) {
1752		if (msix->msix_table[i].mte_vector == 0)
1753			continue;
1754		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1755	}
1756	free(msix->msix_table, M_DEVBUF);
1757	msix->msix_table_len = 0;
1758
1759	/* Release the IRQs. */
1760	for (i = 0; i < msix->msix_alloc; i++)
1761		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1762		    msix->msix_vectors[i].mv_irq);
1763	free(msix->msix_vectors, M_DEVBUF);
1764	msix->msix_alloc = 0;
1765	return (0);
1766}
1767
1768/*
1769 * Return the max supported MSI-X messages this device supports.
1770 * Basically, assuming the MD code can alloc messages, this function
1771 * should return the maximum value that pci_alloc_msix() can return.
1772 * Thus, it is subject to the tunables, etc.
1773 */
1774int
1775pci_msix_count_method(device_t dev, device_t child)
1776{
1777	struct pci_devinfo *dinfo = device_get_ivars(child);
1778	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1779
1780	if (pci_do_msix && msix->msix_location != 0)
1781		return (msix->msix_msgnum);
1782	return (0);
1783}
1784
1785/*
1786 * HyperTransport MSI mapping control
1787 */
1788void
1789pci_ht_map_msi(device_t dev, uint64_t addr)
1790{
1791	struct pci_devinfo *dinfo = device_get_ivars(dev);
1792	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1793
1794	if (!ht->ht_msimap)
1795		return;
1796
1797	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1798	    ht->ht_msiaddr >> 20 == addr >> 20) {
1799		/* Enable MSI -> HT mapping. */
1800		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1801		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1802		    ht->ht_msictrl, 2);
1803	}
1804
1805	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1806		/* Disable MSI -> HT mapping. */
1807		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1808		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1809		    ht->ht_msictrl, 2);
1810	}
1811}
1812
1813int
1814pci_get_max_read_req(device_t dev)
1815{
1816	struct pci_devinfo *dinfo = device_get_ivars(dev);
1817	int cap;
1818	uint16_t val;
1819
1820	cap = dinfo->cfg.pcie.pcie_location;
1821	if (cap == 0)
1822		return (0);
1823	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1824	val &= PCIEM_CTL_MAX_READ_REQUEST;
1825	val >>= 12;
1826	return (1 << (val + 7));
1827}
1828
1829int
1830pci_set_max_read_req(device_t dev, int size)
1831{
1832	struct pci_devinfo *dinfo = device_get_ivars(dev);
1833	int cap;
1834	uint16_t val;
1835
1836	cap = dinfo->cfg.pcie.pcie_location;
1837	if (cap == 0)
1838		return (0);
1839	if (size < 128)
1840		size = 128;
1841	if (size > 4096)
1842		size = 4096;
1843	size = (1 << (fls(size) - 1));
1844	val = pci_read_config(dev, cap + PCIER_DEVICE_CTL, 2);
1845	val &= ~PCIEM_CTL_MAX_READ_REQUEST;
1846	val |= (fls(size) - 8) << 12;
1847	pci_write_config(dev, cap + PCIER_DEVICE_CTL, val, 2);
1848	return (size);
1849}
1850
1851/*
1852 * Support for MSI message signalled interrupts.
1853 */
1854void
1855pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1856{
1857	struct pci_devinfo *dinfo = device_get_ivars(dev);
1858	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1859
1860	/* Write data and address values. */
1861	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1862	    address & 0xffffffff, 4);
1863	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1864		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1865		    address >> 32, 4);
1866		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1867		    data, 2);
1868	} else
1869		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1870		    2);
1871
1872	/* Enable MSI in the control register. */
1873	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1874	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1875	    2);
1876
1877	/* Enable MSI -> HT mapping. */
1878	pci_ht_map_msi(dev, address);
1879}
1880
1881void
1882pci_disable_msi(device_t dev)
1883{
1884	struct pci_devinfo *dinfo = device_get_ivars(dev);
1885	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1886
1887	/* Disable MSI -> HT mapping. */
1888	pci_ht_map_msi(dev, 0);
1889
1890	/* Disable MSI in the control register. */
1891	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1892	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1893	    2);
1894}
1895
1896/*
1897 * Restore MSI registers during resume.  If MSI is enabled then
1898 * restore the data and address registers in addition to the control
1899 * register.
1900 */
1901static void
1902pci_resume_msi(device_t dev)
1903{
1904	struct pci_devinfo *dinfo = device_get_ivars(dev);
1905	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1906	uint64_t address;
1907	uint16_t data;
1908
1909	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1910		address = msi->msi_addr;
1911		data = msi->msi_data;
1912		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1913		    address & 0xffffffff, 4);
1914		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1915			pci_write_config(dev, msi->msi_location +
1916			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1917			pci_write_config(dev, msi->msi_location +
1918			    PCIR_MSI_DATA_64BIT, data, 2);
1919		} else
1920			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1921			    data, 2);
1922	}
1923	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1924	    2);
1925}
1926
1927static int
1928pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1929{
1930	struct pci_devinfo *dinfo = device_get_ivars(dev);
1931	pcicfgregs *cfg = &dinfo->cfg;
1932	struct resource_list_entry *rle;
1933	struct msix_table_entry *mte;
1934	struct msix_vector *mv;
1935	uint64_t addr;
1936	uint32_t data;
1937	int error, i, j;
1938
1939	/*
1940	 * Handle MSI first.  We try to find this IRQ among our list
1941	 * of MSI IRQs.  If we find it, we request updated address and
1942	 * data registers and apply the results.
1943	 */
1944	if (cfg->msi.msi_alloc > 0) {
1945
1946		/* If we don't have any active handlers, nothing to do. */
1947		if (cfg->msi.msi_handlers == 0)
1948			return (0);
1949		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1950			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1951			    i + 1);
1952			if (rle->start == irq) {
1953				error = PCIB_MAP_MSI(device_get_parent(bus),
1954				    dev, irq, &addr, &data);
1955				if (error)
1956					return (error);
1957				pci_disable_msi(dev);
1958				dinfo->cfg.msi.msi_addr = addr;
1959				dinfo->cfg.msi.msi_data = data;
1960				pci_enable_msi(dev, addr, data);
1961				return (0);
1962			}
1963		}
1964		return (ENOENT);
1965	}
1966
1967	/*
1968	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1969	 * we request the updated mapping info.  If that works, we go
1970	 * through all the slots that use this IRQ and update them.
1971	 */
1972	if (cfg->msix.msix_alloc > 0) {
1973		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1974			mv = &cfg->msix.msix_vectors[i];
1975			if (mv->mv_irq == irq) {
1976				error = PCIB_MAP_MSI(device_get_parent(bus),
1977				    dev, irq, &addr, &data);
1978				if (error)
1979					return (error);
1980				mv->mv_address = addr;
1981				mv->mv_data = data;
1982				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1983					mte = &cfg->msix.msix_table[j];
1984					if (mte->mte_vector != i + 1)
1985						continue;
1986					if (mte->mte_handlers == 0)
1987						continue;
1988					pci_mask_msix(dev, j);
1989					pci_enable_msix(dev, j, addr, data);
1990					pci_unmask_msix(dev, j);
1991				}
1992			}
1993		}
1994		return (ENOENT);
1995	}
1996
1997	return (ENOENT);
1998}
1999
2000/*
2001 * Returns true if the specified device is blacklisted because MSI
2002 * doesn't work.
2003 */
2004int
2005pci_msi_device_blacklisted(device_t dev)
2006{
2007
2008	if (!pci_honor_msi_blacklist)
2009		return (0);
2010
2011	return (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSI));
2012}
2013
2014/*
2015 * Determine if MSI is blacklisted globally on this system.  Currently,
2016 * we just check for blacklisted chipsets as represented by the
2017 * host-PCI bridge at device 0:0:0.  In the future, it may become
2018 * necessary to check other system attributes, such as the kenv values
2019 * that give the motherboard manufacturer and model number.
2020 */
2021static int
2022pci_msi_blacklisted(void)
2023{
2024	device_t dev;
2025
2026	if (!pci_honor_msi_blacklist)
2027		return (0);
2028
2029	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2030	if (!(pcie_chipset || pcix_chipset)) {
2031		if (vm_guest != VM_GUEST_NO) {
2032			/*
2033			 * Whitelist older chipsets in virtual
2034			 * machines known to support MSI.
2035			 */
2036			dev = pci_find_bsf(0, 0, 0);
2037			if (dev != NULL)
2038				return (!pci_has_quirk(pci_get_devid(dev),
2039					PCI_QUIRK_ENABLE_MSI_VM));
2040		}
2041		return (1);
2042	}
2043
2044	dev = pci_find_bsf(0, 0, 0);
2045	if (dev != NULL)
2046		return (pci_msi_device_blacklisted(dev));
2047	return (0);
2048}
2049
2050/*
2051 * Returns true if the specified device is blacklisted because MSI-X
2052 * doesn't work.  Note that this assumes that if MSI doesn't work,
2053 * MSI-X doesn't either.
2054 */
2055int
2056pci_msix_device_blacklisted(device_t dev)
2057{
2058
2059	if (!pci_honor_msi_blacklist)
2060		return (0);
2061
2062	if (pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_DISABLE_MSIX))
2063		return (1);
2064
2065	return (pci_msi_device_blacklisted(dev));
2066}
2067
2068/*
2069 * Determine if MSI-X is blacklisted globally on this system.  If MSI
2070 * is blacklisted, assume that MSI-X is as well.  Check for additional
2071 * chipsets where MSI works but MSI-X does not.
2072 */
2073static int
2074pci_msix_blacklisted(void)
2075{
2076	device_t dev;
2077
2078	if (!pci_honor_msi_blacklist)
2079		return (0);
2080
2081	dev = pci_find_bsf(0, 0, 0);
2082	if (dev != NULL && pci_has_quirk(pci_get_devid(dev),
2083	    PCI_QUIRK_DISABLE_MSIX))
2084		return (1);
2085
2086	return (pci_msi_blacklisted());
2087}
2088
2089/*
2090 * Attempt to allocate *count MSI messages.  The actual number allocated is
2091 * returned in *count.  After this function returns, each message will be
2092 * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2093 */
2094int
2095pci_alloc_msi_method(device_t dev, device_t child, int *count)
2096{
2097	struct pci_devinfo *dinfo = device_get_ivars(child);
2098	pcicfgregs *cfg = &dinfo->cfg;
2099	struct resource_list_entry *rle;
2100	int actual, error, i, irqs[32];
2101	uint16_t ctrl;
2102
2103	/* Don't let count == 0 get us into trouble. */
2104	if (*count == 0)
2105		return (EINVAL);
2106
2107	/* If rid 0 is allocated, then fail. */
2108	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2109	if (rle != NULL && rle->res != NULL)
2110		return (ENXIO);
2111
2112	/* Already have allocated messages? */
2113	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2114		return (ENXIO);
2115
2116	/* If MSI is blacklisted for this system, fail. */
2117	if (pci_msi_blacklisted())
2118		return (ENXIO);
2119
2120	/* MSI capability present? */
2121	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2122		return (ENODEV);
2123
2124	if (bootverbose)
2125		device_printf(child,
2126		    "attempting to allocate %d MSI vectors (%d supported)\n",
2127		    *count, cfg->msi.msi_msgnum);
2128
2129	/* Don't ask for more than the device supports. */
2130	actual = min(*count, cfg->msi.msi_msgnum);
2131
2132	/* Don't ask for more than 32 messages. */
2133	actual = min(actual, 32);
2134
2135	/* MSI requires power of 2 number of messages. */
2136	if (!powerof2(actual))
2137		return (EINVAL);
2138
2139	for (;;) {
2140		/* Try to allocate N messages. */
2141		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2142		    actual, irqs);
2143		if (error == 0)
2144			break;
2145		if (actual == 1)
2146			return (error);
2147
2148		/* Try N / 2. */
2149		actual >>= 1;
2150	}
2151
2152	/*
2153	 * We now have N actual messages mapped onto SYS_RES_IRQ
2154	 * resources in the irqs[] array, so add new resources
2155	 * starting at rid 1.
2156	 */
2157	for (i = 0; i < actual; i++)
2158		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2159		    irqs[i], irqs[i], 1);
2160
2161	if (bootverbose) {
2162		if (actual == 1)
2163			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2164		else {
2165			int run;
2166
2167			/*
2168			 * Be fancy and try to print contiguous runs
2169			 * of IRQ values as ranges.  'run' is true if
2170			 * we are in a range.
2171			 */
2172			device_printf(child, "using IRQs %d", irqs[0]);
2173			run = 0;
2174			for (i = 1; i < actual; i++) {
2175
2176				/* Still in a run? */
2177				if (irqs[i] == irqs[i - 1] + 1) {
2178					run = 1;
2179					continue;
2180				}
2181
2182				/* Finish previous range. */
2183				if (run) {
2184					printf("-%d", irqs[i - 1]);
2185					run = 0;
2186				}
2187
2188				/* Start new range. */
2189				printf(",%d", irqs[i]);
2190			}
2191
2192			/* Unfinished range? */
2193			if (run)
2194				printf("-%d", irqs[actual - 1]);
2195			printf(" for MSI\n");
2196		}
2197	}
2198
2199	/* Update control register with actual count. */
2200	ctrl = cfg->msi.msi_ctrl;
2201	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2202	ctrl |= (ffs(actual) - 1) << 4;
2203	cfg->msi.msi_ctrl = ctrl;
2204	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2205
2206	/* Update counts of alloc'd messages. */
2207	cfg->msi.msi_alloc = actual;
2208	cfg->msi.msi_handlers = 0;
2209	*count = actual;
2210	return (0);
2211}
2212
2213/* Release the MSI messages associated with this device. */
2214int
2215pci_release_msi_method(device_t dev, device_t child)
2216{
2217	struct pci_devinfo *dinfo = device_get_ivars(child);
2218	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2219	struct resource_list_entry *rle;
2220	int error, i, irqs[32];
2221
2222	/* Try MSI-X first. */
2223	error = pci_release_msix(dev, child);
2224	if (error != ENODEV)
2225		return (error);
2226
2227	/* Do we have any messages to release? */
2228	if (msi->msi_alloc == 0)
2229		return (ENODEV);
2230	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2231
2232	/* Make sure none of the resources are allocated. */
2233	if (msi->msi_handlers > 0)
2234		return (EBUSY);
2235	for (i = 0; i < msi->msi_alloc; i++) {
2236		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2237		KASSERT(rle != NULL, ("missing MSI resource"));
2238		if (rle->res != NULL)
2239			return (EBUSY);
2240		irqs[i] = rle->start;
2241	}
2242
2243	/* Update control register with 0 count. */
2244	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2245	    ("%s: MSI still enabled", __func__));
2246	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2247	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2248	    msi->msi_ctrl, 2);
2249
2250	/* Release the messages. */
2251	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2252	for (i = 0; i < msi->msi_alloc; i++)
2253		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2254
2255	/* Update alloc count. */
2256	msi->msi_alloc = 0;
2257	msi->msi_addr = 0;
2258	msi->msi_data = 0;
2259	return (0);
2260}
2261
2262/*
2263 * Return the max supported MSI messages this device supports.
2264 * Basically, assuming the MD code can alloc messages, this function
2265 * should return the maximum value that pci_alloc_msi() can return.
2266 * Thus, it is subject to the tunables, etc.
2267 */
2268int
2269pci_msi_count_method(device_t dev, device_t child)
2270{
2271	struct pci_devinfo *dinfo = device_get_ivars(child);
2272	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2273
2274	if (pci_do_msi && msi->msi_location != 0)
2275		return (msi->msi_msgnum);
2276	return (0);
2277}
2278
2279/* free pcicfgregs structure and all depending data structures */
2280
2281int
2282pci_freecfg(struct pci_devinfo *dinfo)
2283{
2284	struct devlist *devlist_head;
2285	struct pci_map *pm, *next;
2286	int i;
2287
2288	devlist_head = &pci_devq;
2289
2290	if (dinfo->cfg.vpd.vpd_reg) {
2291		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2292		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2293			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2294		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2295		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2296			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2297		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2298	}
2299	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2300		free(pm, M_DEVBUF);
2301	}
2302	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2303	free(dinfo, M_DEVBUF);
2304
2305	/* increment the generation count */
2306	pci_generation++;
2307
2308	/* we're losing one device */
2309	pci_numdevs--;
2310	return (0);
2311}
2312
2313/*
2314 * PCI power manangement
2315 */
2316int
2317pci_set_powerstate_method(device_t dev, device_t child, int state)
2318{
2319	struct pci_devinfo *dinfo = device_get_ivars(child);
2320	pcicfgregs *cfg = &dinfo->cfg;
2321	uint16_t status;
2322	int result, oldstate, highest, delay;
2323
2324	if (cfg->pp.pp_cap == 0)
2325		return (EOPNOTSUPP);
2326
2327	/*
2328	 * Optimize a no state change request away.  While it would be OK to
2329	 * write to the hardware in theory, some devices have shown odd
2330	 * behavior when going from D3 -> D3.
2331	 */
2332	oldstate = pci_get_powerstate(child);
2333	if (oldstate == state)
2334		return (0);
2335
2336	/*
2337	 * The PCI power management specification states that after a state
2338	 * transition between PCI power states, system software must
2339	 * guarantee a minimal delay before the function accesses the device.
2340	 * Compute the worst case delay that we need to guarantee before we
2341	 * access the device.  Many devices will be responsive much more
2342	 * quickly than this delay, but there are some that don't respond
2343	 * instantly to state changes.  Transitions to/from D3 state require
2344	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2345	 * is done below with DELAY rather than a sleeper function because
2346	 * this function can be called from contexts where we cannot sleep.
2347	 */
2348	highest = (oldstate > state) ? oldstate : state;
2349	if (highest == PCI_POWERSTATE_D3)
2350	    delay = 10000;
2351	else if (highest == PCI_POWERSTATE_D2)
2352	    delay = 200;
2353	else
2354	    delay = 0;
2355	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2356	    & ~PCIM_PSTAT_DMASK;
2357	result = 0;
2358	switch (state) {
2359	case PCI_POWERSTATE_D0:
2360		status |= PCIM_PSTAT_D0;
2361		break;
2362	case PCI_POWERSTATE_D1:
2363		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2364			return (EOPNOTSUPP);
2365		status |= PCIM_PSTAT_D1;
2366		break;
2367	case PCI_POWERSTATE_D2:
2368		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2369			return (EOPNOTSUPP);
2370		status |= PCIM_PSTAT_D2;
2371		break;
2372	case PCI_POWERSTATE_D3:
2373		status |= PCIM_PSTAT_D3;
2374		break;
2375	default:
2376		return (EINVAL);
2377	}
2378
2379	if (bootverbose)
2380		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2381		    state);
2382
2383	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2384	if (delay)
2385		DELAY(delay);
2386	return (0);
2387}
2388
2389int
2390pci_get_powerstate_method(device_t dev, device_t child)
2391{
2392	struct pci_devinfo *dinfo = device_get_ivars(child);
2393	pcicfgregs *cfg = &dinfo->cfg;
2394	uint16_t status;
2395	int result;
2396
2397	if (cfg->pp.pp_cap != 0) {
2398		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2399		switch (status & PCIM_PSTAT_DMASK) {
2400		case PCIM_PSTAT_D0:
2401			result = PCI_POWERSTATE_D0;
2402			break;
2403		case PCIM_PSTAT_D1:
2404			result = PCI_POWERSTATE_D1;
2405			break;
2406		case PCIM_PSTAT_D2:
2407			result = PCI_POWERSTATE_D2;
2408			break;
2409		case PCIM_PSTAT_D3:
2410			result = PCI_POWERSTATE_D3;
2411			break;
2412		default:
2413			result = PCI_POWERSTATE_UNKNOWN;
2414			break;
2415		}
2416	} else {
2417		/* No support, device is always at D0 */
2418		result = PCI_POWERSTATE_D0;
2419	}
2420	return (result);
2421}
2422
2423/*
2424 * Some convenience functions for PCI device drivers.
2425 */
2426
2427static __inline void
2428pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2429{
2430	uint16_t	command;
2431
2432	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2433	command |= bit;
2434	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2435}
2436
2437static __inline void
2438pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2439{
2440	uint16_t	command;
2441
2442	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2443	command &= ~bit;
2444	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2445}
2446
2447int
2448pci_enable_busmaster_method(device_t dev, device_t child)
2449{
2450	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2451	return (0);
2452}
2453
2454int
2455pci_disable_busmaster_method(device_t dev, device_t child)
2456{
2457	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2458	return (0);
2459}
2460
2461int
2462pci_enable_io_method(device_t dev, device_t child, int space)
2463{
2464	uint16_t bit;
2465
2466	switch(space) {
2467	case SYS_RES_IOPORT:
2468		bit = PCIM_CMD_PORTEN;
2469		break;
2470	case SYS_RES_MEMORY:
2471		bit = PCIM_CMD_MEMEN;
2472		break;
2473	default:
2474		return (EINVAL);
2475	}
2476	pci_set_command_bit(dev, child, bit);
2477	return (0);
2478}
2479
2480int
2481pci_disable_io_method(device_t dev, device_t child, int space)
2482{
2483	uint16_t bit;
2484
2485	switch(space) {
2486	case SYS_RES_IOPORT:
2487		bit = PCIM_CMD_PORTEN;
2488		break;
2489	case SYS_RES_MEMORY:
2490		bit = PCIM_CMD_MEMEN;
2491		break;
2492	default:
2493		return (EINVAL);
2494	}
2495	pci_clear_command_bit(dev, child, bit);
2496	return (0);
2497}
2498
2499/*
2500 * New style pci driver.  Parent device is either a pci-host-bridge or a
2501 * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2502 */
2503
2504void
2505pci_print_verbose(struct pci_devinfo *dinfo)
2506{
2507
2508	if (bootverbose) {
2509		pcicfgregs *cfg = &dinfo->cfg;
2510
2511		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2512		    cfg->vendor, cfg->device, cfg->revid);
2513		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2514		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2515		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2516		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2517		    cfg->mfdev);
2518		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2519		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2520		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2521		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2522		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2523		if (cfg->intpin > 0)
2524			printf("\tintpin=%c, irq=%d\n",
2525			    cfg->intpin +'a' -1, cfg->intline);
2526		if (cfg->pp.pp_cap) {
2527			uint16_t status;
2528
2529			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2530			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2531			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2532			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2533			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2534			    status & PCIM_PSTAT_DMASK);
2535		}
2536		if (cfg->msi.msi_location) {
2537			int ctrl;
2538
2539			ctrl = cfg->msi.msi_ctrl;
2540			printf("\tMSI supports %d message%s%s%s\n",
2541			    cfg->msi.msi_msgnum,
2542			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2543			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2544			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2545		}
2546		if (cfg->msix.msix_location) {
2547			printf("\tMSI-X supports %d message%s ",
2548			    cfg->msix.msix_msgnum,
2549			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2550			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2551				printf("in map 0x%x\n",
2552				    cfg->msix.msix_table_bar);
2553			else
2554				printf("in maps 0x%x and 0x%x\n",
2555				    cfg->msix.msix_table_bar,
2556				    cfg->msix.msix_pba_bar);
2557		}
2558	}
2559}
2560
2561static int
2562pci_porten(device_t dev)
2563{
2564	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2565}
2566
2567static int
2568pci_memen(device_t dev)
2569{
2570	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2571}
2572
2573static void
2574pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2575{
2576	struct pci_devinfo *dinfo;
2577	pci_addr_t map, testval;
2578	int ln2range;
2579	uint16_t cmd;
2580
2581	/*
2582	 * The device ROM BAR is special.  It is always a 32-bit
2583	 * memory BAR.  Bit 0 is special and should not be set when
2584	 * sizing the BAR.
2585	 */
2586	dinfo = device_get_ivars(dev);
2587	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2588		map = pci_read_config(dev, reg, 4);
2589		pci_write_config(dev, reg, 0xfffffffe, 4);
2590		testval = pci_read_config(dev, reg, 4);
2591		pci_write_config(dev, reg, map, 4);
2592		*mapp = map;
2593		*testvalp = testval;
2594		return;
2595	}
2596
2597	map = pci_read_config(dev, reg, 4);
2598	ln2range = pci_maprange(map);
2599	if (ln2range == 64)
2600		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2601
2602	/*
2603	 * Disable decoding via the command register before
2604	 * determining the BAR's length since we will be placing it in
2605	 * a weird state.
2606	 */
2607	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2608	pci_write_config(dev, PCIR_COMMAND,
2609	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2610
2611	/*
2612	 * Determine the BAR's length by writing all 1's.  The bottom
2613	 * log_2(size) bits of the BAR will stick as 0 when we read
2614	 * the value back.
2615	 */
2616	pci_write_config(dev, reg, 0xffffffff, 4);
2617	testval = pci_read_config(dev, reg, 4);
2618	if (ln2range == 64) {
2619		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2620		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2621	}
2622
2623	/*
2624	 * Restore the original value of the BAR.  We may have reprogrammed
2625	 * the BAR of the low-level console device and when booting verbose,
2626	 * we need the console device addressable.
2627	 */
2628	pci_write_config(dev, reg, map, 4);
2629	if (ln2range == 64)
2630		pci_write_config(dev, reg + 4, map >> 32, 4);
2631	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2632
2633	*mapp = map;
2634	*testvalp = testval;
2635}
2636
2637static void
2638pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2639{
2640	struct pci_devinfo *dinfo;
2641	int ln2range;
2642
2643	/* The device ROM BAR is always a 32-bit memory BAR. */
2644	dinfo = device_get_ivars(dev);
2645	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2646		ln2range = 32;
2647	else
2648		ln2range = pci_maprange(pm->pm_value);
2649	pci_write_config(dev, pm->pm_reg, base, 4);
2650	if (ln2range == 64)
2651		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2652	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2653	if (ln2range == 64)
2654		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2655		    pm->pm_reg + 4, 4) << 32;
2656}
2657
2658struct pci_map *
2659pci_find_bar(device_t dev, int reg)
2660{
2661	struct pci_devinfo *dinfo;
2662	struct pci_map *pm;
2663
2664	dinfo = device_get_ivars(dev);
2665	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2666		if (pm->pm_reg == reg)
2667			return (pm);
2668	}
2669	return (NULL);
2670}
2671
2672int
2673pci_bar_enabled(device_t dev, struct pci_map *pm)
2674{
2675	struct pci_devinfo *dinfo;
2676	uint16_t cmd;
2677
2678	dinfo = device_get_ivars(dev);
2679	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2680	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2681		return (0);
2682	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2683	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2684		return ((cmd & PCIM_CMD_MEMEN) != 0);
2685	else
2686		return ((cmd & PCIM_CMD_PORTEN) != 0);
2687}
2688
2689static struct pci_map *
2690pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2691{
2692	struct pci_devinfo *dinfo;
2693	struct pci_map *pm, *prev;
2694
2695	dinfo = device_get_ivars(dev);
2696	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2697	pm->pm_reg = reg;
2698	pm->pm_value = value;
2699	pm->pm_size = size;
2700	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2701		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2702		    reg));
2703		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2704		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2705			break;
2706	}
2707	if (prev != NULL)
2708		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2709	else
2710		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2711	return (pm);
2712}
2713
2714static void
2715pci_restore_bars(device_t dev)
2716{
2717	struct pci_devinfo *dinfo;
2718	struct pci_map *pm;
2719	int ln2range;
2720
2721	dinfo = device_get_ivars(dev);
2722	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2723		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2724			ln2range = 32;
2725		else
2726			ln2range = pci_maprange(pm->pm_value);
2727		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2728		if (ln2range == 64)
2729			pci_write_config(dev, pm->pm_reg + 4,
2730			    pm->pm_value >> 32, 4);
2731	}
2732}
2733
2734/*
2735 * Add a resource based on a pci map register. Return 1 if the map
2736 * register is a 32bit map register or 2 if it is a 64bit register.
2737 */
2738static int
2739pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2740    int force, int prefetch)
2741{
2742	struct pci_map *pm;
2743	pci_addr_t base, map, testval;
2744	pci_addr_t start, end, count;
2745	int barlen, basezero, maprange, mapsize, type;
2746	uint16_t cmd;
2747	struct resource *res;
2748
2749	/*
2750	 * The BAR may already exist if the device is a CardBus card
2751	 * whose CIS is stored in this BAR.
2752	 */
2753	pm = pci_find_bar(dev, reg);
2754	if (pm != NULL) {
2755		maprange = pci_maprange(pm->pm_value);
2756		barlen = maprange == 64 ? 2 : 1;
2757		return (barlen);
2758	}
2759
2760	pci_read_bar(dev, reg, &map, &testval);
2761	if (PCI_BAR_MEM(map)) {
2762		type = SYS_RES_MEMORY;
2763		if (map & PCIM_BAR_MEM_PREFETCH)
2764			prefetch = 1;
2765	} else
2766		type = SYS_RES_IOPORT;
2767	mapsize = pci_mapsize(testval);
2768	base = pci_mapbase(map);
2769#ifdef __PCI_BAR_ZERO_VALID
2770	basezero = 0;
2771#else
2772	basezero = base == 0;
2773#endif
2774	maprange = pci_maprange(map);
2775	barlen = maprange == 64 ? 2 : 1;
2776
2777	/*
2778	 * For I/O registers, if bottom bit is set, and the next bit up
2779	 * isn't clear, we know we have a BAR that doesn't conform to the
2780	 * spec, so ignore it.  Also, sanity check the size of the data
2781	 * areas to the type of memory involved.  Memory must be at least
2782	 * 16 bytes in size, while I/O ranges must be at least 4.
2783	 */
2784	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2785		return (barlen);
2786	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2787	    (type == SYS_RES_IOPORT && mapsize < 2))
2788		return (barlen);
2789
2790	/* Save a record of this BAR. */
2791	pm = pci_add_bar(dev, reg, map, mapsize);
2792	if (bootverbose) {
2793		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2794		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2795		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2796			printf(", port disabled\n");
2797		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2798			printf(", memory disabled\n");
2799		else
2800			printf(", enabled\n");
2801	}
2802
2803	/*
2804	 * If base is 0, then we have problems if this architecture does
2805	 * not allow that.  It is best to ignore such entries for the
2806	 * moment.  These will be allocated later if the driver specifically
2807	 * requests them.  However, some removable busses look better when
2808	 * all resources are allocated, so allow '0' to be overriden.
2809	 *
2810	 * Similarly treat maps whose values is the same as the test value
2811	 * read back.  These maps have had all f's written to them by the
2812	 * BIOS in an attempt to disable the resources.
2813	 */
2814	if (!force && (basezero || map == testval))
2815		return (barlen);
2816	if ((u_long)base != base) {
2817		device_printf(bus,
2818		    "pci%d:%d:%d:%d bar %#x too many address bits",
2819		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2820		    pci_get_function(dev), reg);
2821		return (barlen);
2822	}
2823
2824	/*
2825	 * This code theoretically does the right thing, but has
2826	 * undesirable side effects in some cases where peripherals
2827	 * respond oddly to having these bits enabled.  Let the user
2828	 * be able to turn them off (since pci_enable_io_modes is 1 by
2829	 * default).
2830	 */
2831	if (pci_enable_io_modes) {
2832		/* Turn on resources that have been left off by a lazy BIOS */
2833		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2834			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2835			cmd |= PCIM_CMD_PORTEN;
2836			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2837		}
2838		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2839			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2840			cmd |= PCIM_CMD_MEMEN;
2841			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2842		}
2843	} else {
2844		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2845			return (barlen);
2846		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2847			return (barlen);
2848	}
2849
2850	count = (pci_addr_t)1 << mapsize;
2851	if (basezero || base == pci_mapbase(testval)) {
2852		start = 0;	/* Let the parent decide. */
2853		end = ~0ul;
2854	} else {
2855		start = base;
2856		end = base + count - 1;
2857	}
2858	resource_list_add(rl, type, reg, start, end, count);
2859
2860	/*
2861	 * Try to allocate the resource for this BAR from our parent
2862	 * so that this resource range is already reserved.  The
2863	 * driver for this device will later inherit this resource in
2864	 * pci_alloc_resource().
2865	 */
2866	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2867	    prefetch ? RF_PREFETCHABLE : 0);
2868	if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0ul)) {
2869		/*
2870		 * If the allocation fails, try to allocate a resource for
2871		 * this BAR using any available range.  The firmware felt
2872		 * it was important enough to assign a resource, so don't
2873		 * disable decoding if we can help it.
2874		 */
2875		resource_list_delete(rl, type, reg);
2876		resource_list_add(rl, type, reg, 0, ~0ul, count);
2877		res = resource_list_reserve(rl, bus, dev, type, &reg, 0, ~0ul,
2878		    count, prefetch ? RF_PREFETCHABLE : 0);
2879	}
2880	if (res == NULL) {
2881		/*
2882		 * If the allocation fails, delete the resource list entry
2883		 * and disable decoding for this device.
2884		 *
2885		 * If the driver requests this resource in the future,
2886		 * pci_reserve_map() will try to allocate a fresh
2887		 * resource range.
2888		 */
2889		resource_list_delete(rl, type, reg);
2890		pci_disable_io(dev, type);
2891		if (bootverbose)
2892			device_printf(bus,
2893			    "pci%d:%d:%d:%d bar %#x failed to allocate\n",
2894			    pci_get_domain(dev), pci_get_bus(dev),
2895			    pci_get_slot(dev), pci_get_function(dev), reg);
2896	} else {
2897		start = rman_get_start(res);
2898		pci_write_bar(dev, pm, start);
2899	}
2900	return (barlen);
2901}
2902
2903/*
2904 * For ATA devices we need to decide early what addressing mode to use.
2905 * Legacy demands that the primary and secondary ATA ports sits on the
2906 * same addresses that old ISA hardware did. This dictates that we use
2907 * those addresses and ignore the BAR's if we cannot set PCI native
2908 * addressing mode.
2909 */
2910static void
2911pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2912    uint32_t prefetchmask)
2913{
2914	struct resource *r;
2915	int rid, type, progif;
2916#if 0
2917	/* if this device supports PCI native addressing use it */
2918	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2919	if ((progif & 0x8a) == 0x8a) {
2920		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2921		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2922			printf("Trying ATA native PCI addressing mode\n");
2923			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2924		}
2925	}
2926#endif
2927	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2928	type = SYS_RES_IOPORT;
2929	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2930		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2931		    prefetchmask & (1 << 0));
2932		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2933		    prefetchmask & (1 << 1));
2934	} else {
2935		rid = PCIR_BAR(0);
2936		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2937		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2938		    0x1f7, 8, 0);
2939		rid = PCIR_BAR(1);
2940		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2941		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2942		    0x3f6, 1, 0);
2943	}
2944	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2945		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2946		    prefetchmask & (1 << 2));
2947		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2948		    prefetchmask & (1 << 3));
2949	} else {
2950		rid = PCIR_BAR(2);
2951		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2952		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2953		    0x177, 8, 0);
2954		rid = PCIR_BAR(3);
2955		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2956		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2957		    0x376, 1, 0);
2958	}
2959	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2960	    prefetchmask & (1 << 4));
2961	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2962	    prefetchmask & (1 << 5));
2963}
2964
2965static void
2966pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2967{
2968	struct pci_devinfo *dinfo = device_get_ivars(dev);
2969	pcicfgregs *cfg = &dinfo->cfg;
2970	char tunable_name[64];
2971	int irq;
2972
2973	/* Has to have an intpin to have an interrupt. */
2974	if (cfg->intpin == 0)
2975		return;
2976
2977	/* Let the user override the IRQ with a tunable. */
2978	irq = PCI_INVALID_IRQ;
2979	snprintf(tunable_name, sizeof(tunable_name),
2980	    "hw.pci%d.%d.%d.INT%c.irq",
2981	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2982	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2983		irq = PCI_INVALID_IRQ;
2984
2985	/*
2986	 * If we didn't get an IRQ via the tunable, then we either use the
2987	 * IRQ value in the intline register or we ask the bus to route an
2988	 * interrupt for us.  If force_route is true, then we only use the
2989	 * value in the intline register if the bus was unable to assign an
2990	 * IRQ.
2991	 */
2992	if (!PCI_INTERRUPT_VALID(irq)) {
2993		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2994			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2995		if (!PCI_INTERRUPT_VALID(irq))
2996			irq = cfg->intline;
2997	}
2998
2999	/* If after all that we don't have an IRQ, just bail. */
3000	if (!PCI_INTERRUPT_VALID(irq))
3001		return;
3002
3003	/* Update the config register if it changed. */
3004	if (irq != cfg->intline) {
3005		cfg->intline = irq;
3006		pci_write_config(dev, PCIR_INTLINE, irq, 1);
3007	}
3008
3009	/* Add this IRQ as rid 0 interrupt resource. */
3010	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
3011}
3012
3013/* Perform early OHCI takeover from SMM. */
3014static void
3015ohci_early_takeover(device_t self)
3016{
3017	struct resource *res;
3018	uint32_t ctl;
3019	int rid;
3020	int i;
3021
3022	rid = PCIR_BAR(0);
3023	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3024	if (res == NULL)
3025		return;
3026
3027	ctl = bus_read_4(res, OHCI_CONTROL);
3028	if (ctl & OHCI_IR) {
3029		if (bootverbose)
3030			printf("ohci early: "
3031			    "SMM active, request owner change\n");
3032		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
3033		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
3034			DELAY(1000);
3035			ctl = bus_read_4(res, OHCI_CONTROL);
3036		}
3037		if (ctl & OHCI_IR) {
3038			if (bootverbose)
3039				printf("ohci early: "
3040				    "SMM does not respond, resetting\n");
3041			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
3042		}
3043		/* Disable interrupts */
3044		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
3045	}
3046
3047	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3048}
3049
3050/* Perform early UHCI takeover from SMM. */
3051static void
3052uhci_early_takeover(device_t self)
3053{
3054	struct resource *res;
3055	int rid;
3056
3057	/*
3058	 * Set the PIRQD enable bit and switch off all the others. We don't
3059	 * want legacy support to interfere with us XXX Does this also mean
3060	 * that the BIOS won't touch the keyboard anymore if it is connected
3061	 * to the ports of the root hub?
3062	 */
3063	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
3064
3065	/* Disable interrupts */
3066	rid = PCI_UHCI_BASE_REG;
3067	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
3068	if (res != NULL) {
3069		bus_write_2(res, UHCI_INTR, 0);
3070		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
3071	}
3072}
3073
3074/* Perform early EHCI takeover from SMM. */
3075static void
3076ehci_early_takeover(device_t self)
3077{
3078	struct resource *res;
3079	uint32_t cparams;
3080	uint32_t eec;
3081	uint8_t eecp;
3082	uint8_t bios_sem;
3083	uint8_t offs;
3084	int rid;
3085	int i;
3086
3087	rid = PCIR_BAR(0);
3088	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3089	if (res == NULL)
3090		return;
3091
3092	cparams = bus_read_4(res, EHCI_HCCPARAMS);
3093
3094	/* Synchronise with the BIOS if it owns the controller. */
3095	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
3096	    eecp = EHCI_EECP_NEXT(eec)) {
3097		eec = pci_read_config(self, eecp, 4);
3098		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
3099			continue;
3100		}
3101		bios_sem = pci_read_config(self, eecp +
3102		    EHCI_LEGSUP_BIOS_SEM, 1);
3103		if (bios_sem == 0) {
3104			continue;
3105		}
3106		if (bootverbose)
3107			printf("ehci early: "
3108			    "SMM active, request owner change\n");
3109
3110		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
3111
3112		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
3113			DELAY(1000);
3114			bios_sem = pci_read_config(self, eecp +
3115			    EHCI_LEGSUP_BIOS_SEM, 1);
3116		}
3117
3118		if (bios_sem != 0) {
3119			if (bootverbose)
3120				printf("ehci early: "
3121				    "SMM does not respond\n");
3122		}
3123		/* Disable interrupts */
3124		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
3125		bus_write_4(res, offs + EHCI_USBINTR, 0);
3126	}
3127	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3128}
3129
3130/* Perform early XHCI takeover from SMM. */
3131static void
3132xhci_early_takeover(device_t self)
3133{
3134	struct resource *res;
3135	uint32_t cparams;
3136	uint32_t eec;
3137	uint8_t eecp;
3138	uint8_t bios_sem;
3139	uint8_t offs;
3140	int rid;
3141	int i;
3142
3143	rid = PCIR_BAR(0);
3144	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
3145	if (res == NULL)
3146		return;
3147
3148	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
3149
3150	eec = -1;
3151
3152	/* Synchronise with the BIOS if it owns the controller. */
3153	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
3154	    eecp += XHCI_XECP_NEXT(eec) << 2) {
3155		eec = bus_read_4(res, eecp);
3156
3157		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
3158			continue;
3159
3160		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
3161		if (bios_sem == 0)
3162			continue;
3163
3164		if (bootverbose)
3165			printf("xhci early: "
3166			    "SMM active, request owner change\n");
3167
3168		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
3169
3170		/* wait a maximum of 5 second */
3171
3172		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3173			DELAY(1000);
3174			bios_sem = bus_read_1(res, eecp +
3175			    XHCI_XECP_BIOS_SEM);
3176		}
3177
3178		if (bios_sem != 0) {
3179			if (bootverbose)
3180				printf("xhci early: "
3181				    "SMM does not respond\n");
3182		}
3183
3184		/* Disable interrupts */
3185		offs = bus_read_1(res, XHCI_CAPLENGTH);
3186		bus_write_4(res, offs + XHCI_USBCMD, 0);
3187		bus_read_4(res, offs + XHCI_USBSTS);
3188	}
3189	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3190}
3191
3192void
3193pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3194{
3195	struct pci_devinfo *dinfo;
3196	pcicfgregs *cfg;
3197	struct resource_list *rl;
3198	const struct pci_quirk *q;
3199	uint32_t devid;
3200	int i;
3201
3202	dinfo = device_get_ivars(dev);
3203	cfg = &dinfo->cfg;
3204	rl = &dinfo->resources;
3205	devid = (cfg->device << 16) | cfg->vendor;
3206
3207	/* ATA devices needs special map treatment */
3208	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3209	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3210	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3211	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3212	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3213		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3214	else
3215		for (i = 0; i < cfg->nummaps;) {
3216			/*
3217			 * Skip quirked resources.
3218			 */
3219			for (q = &pci_quirks[0]; q->devid != 0; q++)
3220				if (q->devid == devid &&
3221				    q->type == PCI_QUIRK_UNMAP_REG &&
3222				    q->arg1 == PCIR_BAR(i))
3223					break;
3224			if (q->devid != 0) {
3225				i++;
3226				continue;
3227			}
3228			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3229			    prefetchmask & (1 << i));
3230		}
3231
3232	/*
3233	 * Add additional, quirked resources.
3234	 */
3235	for (q = &pci_quirks[0]; q->devid != 0; q++)
3236		if (q->devid == devid && q->type == PCI_QUIRK_MAP_REG)
3237			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3238
3239	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3240#ifdef __PCI_REROUTE_INTERRUPT
3241		/*
3242		 * Try to re-route interrupts. Sometimes the BIOS or
3243		 * firmware may leave bogus values in these registers.
3244		 * If the re-route fails, then just stick with what we
3245		 * have.
3246		 */
3247		pci_assign_interrupt(bus, dev, 1);
3248#else
3249		pci_assign_interrupt(bus, dev, 0);
3250#endif
3251	}
3252
3253	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3254	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3255		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3256			xhci_early_takeover(dev);
3257		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3258			ehci_early_takeover(dev);
3259		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3260			ohci_early_takeover(dev);
3261		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3262			uhci_early_takeover(dev);
3263	}
3264}
3265
3266void
3267pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3268{
3269#define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3270	device_t pcib = device_get_parent(dev);
3271	struct pci_devinfo *dinfo;
3272	int maxslots;
3273	int s, f, pcifunchigh;
3274	uint8_t hdrtype;
3275
3276	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3277	    ("dinfo_size too small"));
3278	maxslots = PCIB_MAXSLOTS(pcib);
3279	for (s = 0; s <= maxslots; s++) {
3280		pcifunchigh = 0;
3281		f = 0;
3282		DELAY(1);
3283		hdrtype = REG(PCIR_HDRTYPE, 1);
3284		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3285			continue;
3286		if (hdrtype & PCIM_MFDEV)
3287			pcifunchigh = PCI_FUNCMAX;
3288		for (f = 0; f <= pcifunchigh; f++) {
3289			dinfo = pci_read_device(pcib, domain, busno, s, f,
3290			    dinfo_size);
3291			if (dinfo != NULL) {
3292				pci_add_child(dev, dinfo);
3293			}
3294		}
3295	}
3296#undef REG
3297}
3298
3299void
3300pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3301{
3302	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3303	device_set_ivars(dinfo->cfg.dev, dinfo);
3304	resource_list_init(&dinfo->resources);
3305	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3306	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3307	pci_print_verbose(dinfo);
3308	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3309}
3310
3311static int
3312pci_probe(device_t dev)
3313{
3314
3315	device_set_desc(dev, "PCI bus");
3316
3317	/* Allow other subclasses to override this driver. */
3318	return (BUS_PROBE_GENERIC);
3319}
3320
3321int
3322pci_attach_common(device_t dev)
3323{
3324	struct pci_softc *sc;
3325	int busno, domain;
3326#ifdef PCI_DMA_BOUNDARY
3327	int error, tag_valid;
3328#endif
3329
3330	sc = device_get_softc(dev);
3331	domain = pcib_get_domain(dev);
3332	busno = pcib_get_bus(dev);
3333	if (bootverbose)
3334		device_printf(dev, "domain=%d, physical bus=%d\n",
3335		    domain, busno);
3336#ifdef PCI_DMA_BOUNDARY
3337	tag_valid = 0;
3338	if (device_get_devclass(device_get_parent(device_get_parent(dev))) !=
3339	    devclass_find("pci")) {
3340		error = bus_dma_tag_create(bus_get_dma_tag(dev), 1,
3341		    PCI_DMA_BOUNDARY, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
3342		    NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED,
3343		    BUS_SPACE_MAXSIZE, 0, NULL, NULL, &sc->sc_dma_tag);
3344		if (error)
3345			device_printf(dev, "Failed to create DMA tag: %d\n",
3346			    error);
3347		else
3348			tag_valid = 1;
3349	}
3350	if (!tag_valid)
3351#endif
3352		sc->sc_dma_tag = bus_get_dma_tag(dev);
3353	return (0);
3354}
3355
3356static int
3357pci_attach(device_t dev)
3358{
3359	int busno, domain, error;
3360
3361	error = pci_attach_common(dev);
3362	if (error)
3363		return (error);
3364
3365	/*
3366	 * Since there can be multiple independantly numbered PCI
3367	 * busses on systems with multiple PCI domains, we can't use
3368	 * the unit number to decide which bus we are probing. We ask
3369	 * the parent pcib what our domain and bus numbers are.
3370	 */
3371	domain = pcib_get_domain(dev);
3372	busno = pcib_get_bus(dev);
3373	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3374	return (bus_generic_attach(dev));
3375}
3376
3377static void
3378pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3379    int state)
3380{
3381	device_t child, pcib;
3382	struct pci_devinfo *dinfo;
3383	int dstate, i;
3384
3385	/*
3386	 * Set the device to the given state.  If the firmware suggests
3387	 * a different power state, use it instead.  If power management
3388	 * is not present, the firmware is responsible for managing
3389	 * device power.  Skip children who aren't attached since they
3390	 * are handled separately.
3391	 */
3392	pcib = device_get_parent(dev);
3393	for (i = 0; i < numdevs; i++) {
3394		child = devlist[i];
3395		dinfo = device_get_ivars(child);
3396		dstate = state;
3397		if (device_is_attached(child) &&
3398		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3399			pci_set_powerstate(child, dstate);
3400	}
3401}
3402
3403int
3404pci_suspend(device_t dev)
3405{
3406	device_t child, *devlist;
3407	struct pci_devinfo *dinfo;
3408	int error, i, numdevs;
3409
3410	/*
3411	 * Save the PCI configuration space for each child and set the
3412	 * device in the appropriate power state for this sleep state.
3413	 */
3414	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3415		return (error);
3416	for (i = 0; i < numdevs; i++) {
3417		child = devlist[i];
3418		dinfo = device_get_ivars(child);
3419		pci_cfg_save(child, dinfo, 0);
3420	}
3421
3422	/* Suspend devices before potentially powering them down. */
3423	error = bus_generic_suspend(dev);
3424	if (error) {
3425		free(devlist, M_TEMP);
3426		return (error);
3427	}
3428	if (pci_do_power_suspend)
3429		pci_set_power_children(dev, devlist, numdevs,
3430		    PCI_POWERSTATE_D3);
3431	free(devlist, M_TEMP);
3432	return (0);
3433}
3434
3435int
3436pci_resume(device_t dev)
3437{
3438	device_t child, *devlist;
3439	struct pci_devinfo *dinfo;
3440	int error, i, numdevs;
3441
3442	/*
3443	 * Set each child to D0 and restore its PCI configuration space.
3444	 */
3445	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3446		return (error);
3447	if (pci_do_power_resume)
3448		pci_set_power_children(dev, devlist, numdevs,
3449		    PCI_POWERSTATE_D0);
3450
3451	/* Now the device is powered up, restore its config space. */
3452	for (i = 0; i < numdevs; i++) {
3453		child = devlist[i];
3454		dinfo = device_get_ivars(child);
3455
3456		pci_cfg_restore(child, dinfo);
3457		if (!device_is_attached(child))
3458			pci_cfg_save(child, dinfo, 1);
3459	}
3460
3461	/*
3462	 * Resume critical devices first, then everything else later.
3463	 */
3464	for (i = 0; i < numdevs; i++) {
3465		child = devlist[i];
3466		switch (pci_get_class(child)) {
3467		case PCIC_DISPLAY:
3468		case PCIC_MEMORY:
3469		case PCIC_BRIDGE:
3470		case PCIC_BASEPERIPH:
3471			DEVICE_RESUME(child);
3472			break;
3473		}
3474	}
3475	for (i = 0; i < numdevs; i++) {
3476		child = devlist[i];
3477		switch (pci_get_class(child)) {
3478		case PCIC_DISPLAY:
3479		case PCIC_MEMORY:
3480		case PCIC_BRIDGE:
3481		case PCIC_BASEPERIPH:
3482			break;
3483		default:
3484			DEVICE_RESUME(child);
3485		}
3486	}
3487	free(devlist, M_TEMP);
3488	return (0);
3489}
3490
3491static void
3492pci_load_vendor_data(void)
3493{
3494	caddr_t data;
3495	void *ptr;
3496	size_t sz;
3497
3498	data = preload_search_by_type("pci_vendor_data");
3499	if (data != NULL) {
3500		ptr = preload_fetch_addr(data);
3501		sz = preload_fetch_size(data);
3502		if (ptr != NULL && sz != 0) {
3503			pci_vendordata = ptr;
3504			pci_vendordata_size = sz;
3505			/* terminate the database */
3506			pci_vendordata[pci_vendordata_size] = '\n';
3507		}
3508	}
3509}
3510
3511void
3512pci_driver_added(device_t dev, driver_t *driver)
3513{
3514	int numdevs;
3515	device_t *devlist;
3516	device_t child;
3517	struct pci_devinfo *dinfo;
3518	int i;
3519
3520	if (bootverbose)
3521		device_printf(dev, "driver added\n");
3522	DEVICE_IDENTIFY(driver, dev);
3523	if (device_get_children(dev, &devlist, &numdevs) != 0)
3524		return;
3525	for (i = 0; i < numdevs; i++) {
3526		child = devlist[i];
3527		if (device_get_state(child) != DS_NOTPRESENT)
3528			continue;
3529		dinfo = device_get_ivars(child);
3530		pci_print_verbose(dinfo);
3531		if (bootverbose)
3532			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3533		pci_cfg_restore(child, dinfo);
3534		if (device_probe_and_attach(child) != 0)
3535			pci_child_detached(dev, child);
3536	}
3537	free(devlist, M_TEMP);
3538}
3539
3540int
3541pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3542    driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3543{
3544	struct pci_devinfo *dinfo;
3545	struct msix_table_entry *mte;
3546	struct msix_vector *mv;
3547	uint64_t addr;
3548	uint32_t data;
3549	void *cookie;
3550	int error, rid;
3551
3552	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3553	    arg, &cookie);
3554	if (error)
3555		return (error);
3556
3557	/* If this is not a direct child, just bail out. */
3558	if (device_get_parent(child) != dev) {
3559		*cookiep = cookie;
3560		return(0);
3561	}
3562
3563	rid = rman_get_rid(irq);
3564	if (rid == 0) {
3565		/* Make sure that INTx is enabled */
3566		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3567	} else {
3568		/*
3569		 * Check to see if the interrupt is MSI or MSI-X.
3570		 * Ask our parent to map the MSI and give
3571		 * us the address and data register values.
3572		 * If we fail for some reason, teardown the
3573		 * interrupt handler.
3574		 */
3575		dinfo = device_get_ivars(child);
3576		if (dinfo->cfg.msi.msi_alloc > 0) {
3577			if (dinfo->cfg.msi.msi_addr == 0) {
3578				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3579			    ("MSI has handlers, but vectors not mapped"));
3580				error = PCIB_MAP_MSI(device_get_parent(dev),
3581				    child, rman_get_start(irq), &addr, &data);
3582				if (error)
3583					goto bad;
3584				dinfo->cfg.msi.msi_addr = addr;
3585				dinfo->cfg.msi.msi_data = data;
3586			}
3587			if (dinfo->cfg.msi.msi_handlers == 0)
3588				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3589				    dinfo->cfg.msi.msi_data);
3590			dinfo->cfg.msi.msi_handlers++;
3591		} else {
3592			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3593			    ("No MSI or MSI-X interrupts allocated"));
3594			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3595			    ("MSI-X index too high"));
3596			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3597			KASSERT(mte->mte_vector != 0, ("no message vector"));
3598			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3599			KASSERT(mv->mv_irq == rman_get_start(irq),
3600			    ("IRQ mismatch"));
3601			if (mv->mv_address == 0) {
3602				KASSERT(mte->mte_handlers == 0,
3603		    ("MSI-X table entry has handlers, but vector not mapped"));
3604				error = PCIB_MAP_MSI(device_get_parent(dev),
3605				    child, rman_get_start(irq), &addr, &data);
3606				if (error)
3607					goto bad;
3608				mv->mv_address = addr;
3609				mv->mv_data = data;
3610			}
3611			if (mte->mte_handlers == 0) {
3612				pci_enable_msix(child, rid - 1, mv->mv_address,
3613				    mv->mv_data);
3614				pci_unmask_msix(child, rid - 1);
3615			}
3616			mte->mte_handlers++;
3617		}
3618
3619		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3620		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3621	bad:
3622		if (error) {
3623			(void)bus_generic_teardown_intr(dev, child, irq,
3624			    cookie);
3625			return (error);
3626		}
3627	}
3628	*cookiep = cookie;
3629	return (0);
3630}
3631
3632int
3633pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3634    void *cookie)
3635{
3636	struct msix_table_entry *mte;
3637	struct resource_list_entry *rle;
3638	struct pci_devinfo *dinfo;
3639	int error, rid;
3640
3641	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3642		return (EINVAL);
3643
3644	/* If this isn't a direct child, just bail out */
3645	if (device_get_parent(child) != dev)
3646		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3647
3648	rid = rman_get_rid(irq);
3649	if (rid == 0) {
3650		/* Mask INTx */
3651		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3652	} else {
3653		/*
3654		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3655		 * decrement the appropriate handlers count and mask the
3656		 * MSI-X message, or disable MSI messages if the count
3657		 * drops to 0.
3658		 */
3659		dinfo = device_get_ivars(child);
3660		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3661		if (rle->res != irq)
3662			return (EINVAL);
3663		if (dinfo->cfg.msi.msi_alloc > 0) {
3664			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3665			    ("MSI-X index too high"));
3666			if (dinfo->cfg.msi.msi_handlers == 0)
3667				return (EINVAL);
3668			dinfo->cfg.msi.msi_handlers--;
3669			if (dinfo->cfg.msi.msi_handlers == 0)
3670				pci_disable_msi(child);
3671		} else {
3672			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3673			    ("No MSI or MSI-X interrupts allocated"));
3674			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3675			    ("MSI-X index too high"));
3676			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3677			if (mte->mte_handlers == 0)
3678				return (EINVAL);
3679			mte->mte_handlers--;
3680			if (mte->mte_handlers == 0)
3681				pci_mask_msix(child, rid - 1);
3682		}
3683	}
3684	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3685	if (rid > 0)
3686		KASSERT(error == 0,
3687		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3688	return (error);
3689}
3690
3691int
3692pci_print_child(device_t dev, device_t child)
3693{
3694	struct pci_devinfo *dinfo;
3695	struct resource_list *rl;
3696	int retval = 0;
3697
3698	dinfo = device_get_ivars(child);
3699	rl = &dinfo->resources;
3700
3701	retval += bus_print_child_header(dev, child);
3702
3703	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3704	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3705	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3706	if (device_get_flags(dev))
3707		retval += printf(" flags %#x", device_get_flags(dev));
3708
3709	retval += printf(" at device %d.%d", pci_get_slot(child),
3710	    pci_get_function(child));
3711
3712	retval += bus_print_child_footer(dev, child);
3713
3714	return (retval);
3715}
3716
3717static const struct
3718{
3719	int		class;
3720	int		subclass;
3721	const char	*desc;
3722} pci_nomatch_tab[] = {
3723	{PCIC_OLD,		-1,			"old"},
3724	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3725	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3726	{PCIC_STORAGE,		-1,			"mass storage"},
3727	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3728	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3729	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3730	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3731	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3732	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3733	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3734	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3735	{PCIC_STORAGE,		PCIS_STORAGE_NVM,	"NVM"},
3736	{PCIC_NETWORK,		-1,			"network"},
3737	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3738	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3739	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3740	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3741	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3742	{PCIC_DISPLAY,		-1,			"display"},
3743	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3744	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3745	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3746	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3747	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3748	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3749	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3750	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3751	{PCIC_MEMORY,		-1,			"memory"},
3752	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3753	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3754	{PCIC_BRIDGE,		-1,			"bridge"},
3755	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3756	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3757	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3758	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3759	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3760	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3761	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3762	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3763	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3764	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3765	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3766	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3767	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3768	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3769	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3770	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3771	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3772	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3773	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3774	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3775	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3776	{PCIC_INPUTDEV,		-1,			"input device"},
3777	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3778	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3779	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3780	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3781	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3782	{PCIC_DOCKING,		-1,			"docking station"},
3783	{PCIC_PROCESSOR,	-1,			"processor"},
3784	{PCIC_SERIALBUS,	-1,			"serial bus"},
3785	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3786	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3787	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3788	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3789	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3790	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3791	{PCIC_WIRELESS,		-1,			"wireless controller"},
3792	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3793	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3794	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3795	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3796	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3797	{PCIC_SATCOM,		-1,			"satellite communication"},
3798	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3799	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3800	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3801	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3802	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3803	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3804	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3805	{PCIC_DASP,		-1,			"dasp"},
3806	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3807	{0, 0,		NULL}
3808};
3809
3810void
3811pci_probe_nomatch(device_t dev, device_t child)
3812{
3813	int i;
3814	const char *cp, *scp;
3815	char *device;
3816
3817	/*
3818	 * Look for a listing for this device in a loaded device database.
3819	 */
3820	if ((device = pci_describe_device(child)) != NULL) {
3821		device_printf(dev, "<%s>", device);
3822		free(device, M_DEVBUF);
3823	} else {
3824		/*
3825		 * Scan the class/subclass descriptions for a general
3826		 * description.
3827		 */
3828		cp = "unknown";
3829		scp = NULL;
3830		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3831			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3832				if (pci_nomatch_tab[i].subclass == -1) {
3833					cp = pci_nomatch_tab[i].desc;
3834				} else if (pci_nomatch_tab[i].subclass ==
3835				    pci_get_subclass(child)) {
3836					scp = pci_nomatch_tab[i].desc;
3837				}
3838			}
3839		}
3840		device_printf(dev, "<%s%s%s>",
3841		    cp ? cp : "",
3842		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3843		    scp ? scp : "");
3844	}
3845	printf(" at device %d.%d (no driver attached)\n",
3846	    pci_get_slot(child), pci_get_function(child));
3847	pci_cfg_save(child, device_get_ivars(child), 1);
3848}
3849
3850void
3851pci_child_detached(device_t dev, device_t child)
3852{
3853	struct pci_devinfo *dinfo;
3854	struct resource_list *rl;
3855
3856	dinfo = device_get_ivars(child);
3857	rl = &dinfo->resources;
3858
3859	/*
3860	 * Have to deallocate IRQs before releasing any MSI messages and
3861	 * have to release MSI messages before deallocating any memory
3862	 * BARs.
3863	 */
3864	if (resource_list_release_active(rl, dev, child, SYS_RES_IRQ) != 0)
3865		pci_printf(&dinfo->cfg, "Device leaked IRQ resources\n");
3866	if (dinfo->cfg.msi.msi_alloc != 0 || dinfo->cfg.msix.msix_alloc != 0) {
3867		pci_printf(&dinfo->cfg, "Device leaked MSI vectors\n");
3868		(void)pci_release_msi(child);
3869	}
3870	if (resource_list_release_active(rl, dev, child, SYS_RES_MEMORY) != 0)
3871		pci_printf(&dinfo->cfg, "Device leaked memory resources\n");
3872	if (resource_list_release_active(rl, dev, child, SYS_RES_IOPORT) != 0)
3873		pci_printf(&dinfo->cfg, "Device leaked I/O resources\n");
3874
3875	pci_cfg_save(child, dinfo, 1);
3876}
3877
3878/*
3879 * Parse the PCI device database, if loaded, and return a pointer to a
3880 * description of the device.
3881 *
3882 * The database is flat text formatted as follows:
3883 *
3884 * Any line not in a valid format is ignored.
3885 * Lines are terminated with newline '\n' characters.
3886 *
3887 * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3888 * the vendor name.
3889 *
3890 * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3891 * - devices cannot be listed without a corresponding VENDOR line.
3892 * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3893 * another TAB, then the device name.
3894 */
3895
3896/*
3897 * Assuming (ptr) points to the beginning of a line in the database,
3898 * return the vendor or device and description of the next entry.
3899 * The value of (vendor) or (device) inappropriate for the entry type
3900 * is set to -1.  Returns nonzero at the end of the database.
3901 *
3902 * Note that this is slightly unrobust in the face of corrupt data;
3903 * we attempt to safeguard against this by spamming the end of the
3904 * database with a newline when we initialise.
3905 */
3906static int
3907pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3908{
3909	char	*cp = *ptr;
3910	int	left;
3911
3912	*device = -1;
3913	*vendor = -1;
3914	**desc = '\0';
3915	for (;;) {
3916		left = pci_vendordata_size - (cp - pci_vendordata);
3917		if (left <= 0) {
3918			*ptr = cp;
3919			return(1);
3920		}
3921
3922		/* vendor entry? */
3923		if (*cp != '\t' &&
3924		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3925			break;
3926		/* device entry? */
3927		if (*cp == '\t' &&
3928		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3929			break;
3930
3931		/* skip to next line */
3932		while (*cp != '\n' && left > 0) {
3933			cp++;
3934			left--;
3935		}
3936		if (*cp == '\n') {
3937			cp++;
3938			left--;
3939		}
3940	}
3941	/* skip to next line */
3942	while (*cp != '\n' && left > 0) {
3943		cp++;
3944		left--;
3945	}
3946	if (*cp == '\n' && left > 0)
3947		cp++;
3948	*ptr = cp;
3949	return(0);
3950}
3951
3952static char *
3953pci_describe_device(device_t dev)
3954{
3955	int	vendor, device;
3956	char	*desc, *vp, *dp, *line;
3957
3958	desc = vp = dp = NULL;
3959
3960	/*
3961	 * If we have no vendor data, we can't do anything.
3962	 */
3963	if (pci_vendordata == NULL)
3964		goto out;
3965
3966	/*
3967	 * Scan the vendor data looking for this device
3968	 */
3969	line = pci_vendordata;
3970	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3971		goto out;
3972	for (;;) {
3973		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3974			goto out;
3975		if (vendor == pci_get_vendor(dev))
3976			break;
3977	}
3978	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3979		goto out;
3980	for (;;) {
3981		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3982			*dp = 0;
3983			break;
3984		}
3985		if (vendor != -1) {
3986			*dp = 0;
3987			break;
3988		}
3989		if (device == pci_get_device(dev))
3990			break;
3991	}
3992	if (dp[0] == '\0')
3993		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3994	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3995	    NULL)
3996		sprintf(desc, "%s, %s", vp, dp);
3997out:
3998	if (vp != NULL)
3999		free(vp, M_DEVBUF);
4000	if (dp != NULL)
4001		free(dp, M_DEVBUF);
4002	return(desc);
4003}
4004
4005int
4006pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
4007{
4008	struct pci_devinfo *dinfo;
4009	pcicfgregs *cfg;
4010
4011	dinfo = device_get_ivars(child);
4012	cfg = &dinfo->cfg;
4013
4014	switch (which) {
4015	case PCI_IVAR_ETHADDR:
4016		/*
4017		 * The generic accessor doesn't deal with failure, so
4018		 * we set the return value, then return an error.
4019		 */
4020		*((uint8_t **) result) = NULL;
4021		return (EINVAL);
4022	case PCI_IVAR_SUBVENDOR:
4023		*result = cfg->subvendor;
4024		break;
4025	case PCI_IVAR_SUBDEVICE:
4026		*result = cfg->subdevice;
4027		break;
4028	case PCI_IVAR_VENDOR:
4029		*result = cfg->vendor;
4030		break;
4031	case PCI_IVAR_DEVICE:
4032		*result = cfg->device;
4033		break;
4034	case PCI_IVAR_DEVID:
4035		*result = (cfg->device << 16) | cfg->vendor;
4036		break;
4037	case PCI_IVAR_CLASS:
4038		*result = cfg->baseclass;
4039		break;
4040	case PCI_IVAR_SUBCLASS:
4041		*result = cfg->subclass;
4042		break;
4043	case PCI_IVAR_PROGIF:
4044		*result = cfg->progif;
4045		break;
4046	case PCI_IVAR_REVID:
4047		*result = cfg->revid;
4048		break;
4049	case PCI_IVAR_INTPIN:
4050		*result = cfg->intpin;
4051		break;
4052	case PCI_IVAR_IRQ:
4053		*result = cfg->intline;
4054		break;
4055	case PCI_IVAR_DOMAIN:
4056		*result = cfg->domain;
4057		break;
4058	case PCI_IVAR_BUS:
4059		*result = cfg->bus;
4060		break;
4061	case PCI_IVAR_SLOT:
4062		*result = cfg->slot;
4063		break;
4064	case PCI_IVAR_FUNCTION:
4065		*result = cfg->func;
4066		break;
4067	case PCI_IVAR_CMDREG:
4068		*result = cfg->cmdreg;
4069		break;
4070	case PCI_IVAR_CACHELNSZ:
4071		*result = cfg->cachelnsz;
4072		break;
4073	case PCI_IVAR_MINGNT:
4074		*result = cfg->mingnt;
4075		break;
4076	case PCI_IVAR_MAXLAT:
4077		*result = cfg->maxlat;
4078		break;
4079	case PCI_IVAR_LATTIMER:
4080		*result = cfg->lattimer;
4081		break;
4082	default:
4083		return (ENOENT);
4084	}
4085	return (0);
4086}
4087
4088int
4089pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
4090{
4091	struct pci_devinfo *dinfo;
4092
4093	dinfo = device_get_ivars(child);
4094
4095	switch (which) {
4096	case PCI_IVAR_INTPIN:
4097		dinfo->cfg.intpin = value;
4098		return (0);
4099	case PCI_IVAR_ETHADDR:
4100	case PCI_IVAR_SUBVENDOR:
4101	case PCI_IVAR_SUBDEVICE:
4102	case PCI_IVAR_VENDOR:
4103	case PCI_IVAR_DEVICE:
4104	case PCI_IVAR_DEVID:
4105	case PCI_IVAR_CLASS:
4106	case PCI_IVAR_SUBCLASS:
4107	case PCI_IVAR_PROGIF:
4108	case PCI_IVAR_REVID:
4109	case PCI_IVAR_IRQ:
4110	case PCI_IVAR_DOMAIN:
4111	case PCI_IVAR_BUS:
4112	case PCI_IVAR_SLOT:
4113	case PCI_IVAR_FUNCTION:
4114		return (EINVAL);	/* disallow for now */
4115
4116	default:
4117		return (ENOENT);
4118	}
4119}
4120
4121#include "opt_ddb.h"
4122#ifdef DDB
4123#include <ddb/ddb.h>
4124#include <sys/cons.h>
4125
4126/*
4127 * List resources based on pci map registers, used for within ddb
4128 */
4129
4130DB_SHOW_COMMAND(pciregs, db_pci_dump)
4131{
4132	struct pci_devinfo *dinfo;
4133	struct devlist *devlist_head;
4134	struct pci_conf *p;
4135	const char *name;
4136	int i, error, none_count;
4137
4138	none_count = 0;
4139	/* get the head of the device queue */
4140	devlist_head = &pci_devq;
4141
4142	/*
4143	 * Go through the list of devices and print out devices
4144	 */
4145	for (error = 0, i = 0,
4146	     dinfo = STAILQ_FIRST(devlist_head);
4147	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
4148	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4149
4150		/* Populate pd_name and pd_unit */
4151		name = NULL;
4152		if (dinfo->cfg.dev)
4153			name = device_get_name(dinfo->cfg.dev);
4154
4155		p = &dinfo->conf;
4156		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
4157			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
4158			(name && *name) ? name : "none",
4159			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
4160			none_count++,
4161			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
4162			p->pc_sel.pc_func, (p->pc_class << 16) |
4163			(p->pc_subclass << 8) | p->pc_progif,
4164			(p->pc_subdevice << 16) | p->pc_subvendor,
4165			(p->pc_device << 16) | p->pc_vendor,
4166			p->pc_revid, p->pc_hdr);
4167	}
4168}
4169#endif /* DDB */
4170
4171static struct resource *
4172pci_reserve_map(device_t dev, device_t child, int type, int *rid,
4173    u_long start, u_long end, u_long count, u_int flags)
4174{
4175	struct pci_devinfo *dinfo = device_get_ivars(child);
4176	struct resource_list *rl = &dinfo->resources;
4177	struct resource_list_entry *rle;
4178	struct resource *res;
4179	struct pci_map *pm;
4180	pci_addr_t map, testval;
4181	int mapsize;
4182
4183	res = NULL;
4184	pm = pci_find_bar(child, *rid);
4185	if (pm != NULL) {
4186		/* This is a BAR that we failed to allocate earlier. */
4187		mapsize = pm->pm_size;
4188		map = pm->pm_value;
4189	} else {
4190		/*
4191		 * Weed out the bogons, and figure out how large the
4192		 * BAR/map is.  BARs that read back 0 here are bogus
4193		 * and unimplemented.  Note: atapci in legacy mode are
4194		 * special and handled elsewhere in the code.  If you
4195		 * have a atapci device in legacy mode and it fails
4196		 * here, that other code is broken.
4197		 */
4198		pci_read_bar(child, *rid, &map, &testval);
4199
4200		/*
4201		 * Determine the size of the BAR and ignore BARs with a size
4202		 * of 0.  Device ROM BARs use a different mask value.
4203		 */
4204		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
4205			mapsize = pci_romsize(testval);
4206		else
4207			mapsize = pci_mapsize(testval);
4208		if (mapsize == 0)
4209			goto out;
4210		pm = pci_add_bar(child, *rid, map, mapsize);
4211	}
4212
4213	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
4214		if (type != SYS_RES_MEMORY) {
4215			if (bootverbose)
4216				device_printf(dev,
4217				    "child %s requested type %d for rid %#x,"
4218				    " but the BAR says it is an memio\n",
4219				    device_get_nameunit(child), type, *rid);
4220			goto out;
4221		}
4222	} else {
4223		if (type != SYS_RES_IOPORT) {
4224			if (bootverbose)
4225				device_printf(dev,
4226				    "child %s requested type %d for rid %#x,"
4227				    " but the BAR says it is an ioport\n",
4228				    device_get_nameunit(child), type, *rid);
4229			goto out;
4230		}
4231	}
4232
4233	/*
4234	 * For real BARs, we need to override the size that
4235	 * the driver requests, because that's what the BAR
4236	 * actually uses and we would otherwise have a
4237	 * situation where we might allocate the excess to
4238	 * another driver, which won't work.
4239	 */
4240	count = (pci_addr_t)1 << mapsize;
4241	if (RF_ALIGNMENT(flags) < mapsize)
4242		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
4243	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
4244		flags |= RF_PREFETCHABLE;
4245
4246	/*
4247	 * Allocate enough resource, and then write back the
4248	 * appropriate BAR for that resource.
4249	 */
4250	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4251	    start, end, count, flags & ~RF_ACTIVE);
4252	if (res == NULL) {
4253		device_printf(child,
4254		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4255		    count, *rid, type, start, end);
4256		goto out;
4257	}
4258	resource_list_add(rl, type, *rid, start, end, count);
4259	rle = resource_list_find(rl, type, *rid);
4260	if (rle == NULL)
4261		panic("pci_reserve_map: unexpectedly can't find resource.");
4262	rle->res = res;
4263	rle->start = rman_get_start(res);
4264	rle->end = rman_get_end(res);
4265	rle->count = count;
4266	rle->flags = RLE_RESERVED;
4267	if (bootverbose)
4268		device_printf(child,
4269		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4270		    count, *rid, type, rman_get_start(res));
4271	map = rman_get_start(res);
4272	pci_write_bar(child, pm, map);
4273out:
4274	return (res);
4275}
4276
4277struct resource *
4278pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4279		   u_long start, u_long end, u_long count, u_int flags)
4280{
4281	struct pci_devinfo *dinfo;
4282	struct resource_list *rl;
4283	struct resource_list_entry *rle;
4284	struct resource *res;
4285	pcicfgregs *cfg;
4286
4287	if (device_get_parent(child) != dev)
4288		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4289		    type, rid, start, end, count, flags));
4290
4291	/*
4292	 * Perform lazy resource allocation
4293	 */
4294	dinfo = device_get_ivars(child);
4295	rl = &dinfo->resources;
4296	cfg = &dinfo->cfg;
4297	switch (type) {
4298	case SYS_RES_IRQ:
4299		/*
4300		 * Can't alloc legacy interrupt once MSI messages have
4301		 * been allocated.
4302		 */
4303		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4304		    cfg->msix.msix_alloc > 0))
4305			return (NULL);
4306
4307		/*
4308		 * If the child device doesn't have an interrupt
4309		 * routed and is deserving of an interrupt, try to
4310		 * assign it one.
4311		 */
4312		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4313		    (cfg->intpin != 0))
4314			pci_assign_interrupt(dev, child, 0);
4315		break;
4316	case SYS_RES_IOPORT:
4317	case SYS_RES_MEMORY:
4318#ifdef NEW_PCIB
4319		/*
4320		 * PCI-PCI bridge I/O window resources are not BARs.
4321		 * For those allocations just pass the request up the
4322		 * tree.
4323		 */
4324		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4325			switch (*rid) {
4326			case PCIR_IOBASEL_1:
4327			case PCIR_MEMBASE_1:
4328			case PCIR_PMBASEL_1:
4329				/*
4330				 * XXX: Should we bother creating a resource
4331				 * list entry?
4332				 */
4333				return (bus_generic_alloc_resource(dev, child,
4334				    type, rid, start, end, count, flags));
4335			}
4336		}
4337#endif
4338		/* Reserve resources for this BAR if needed. */
4339		rle = resource_list_find(rl, type, *rid);
4340		if (rle == NULL) {
4341			res = pci_reserve_map(dev, child, type, rid, start, end,
4342			    count, flags);
4343			if (res == NULL)
4344				return (NULL);
4345		}
4346	}
4347	return (resource_list_alloc(rl, dev, child, type, rid,
4348	    start, end, count, flags));
4349}
4350
4351int
4352pci_release_resource(device_t dev, device_t child, int type, int rid,
4353    struct resource *r)
4354{
4355	struct pci_devinfo *dinfo;
4356	struct resource_list *rl;
4357	pcicfgregs *cfg;
4358
4359	if (device_get_parent(child) != dev)
4360		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
4361		    type, rid, r));
4362
4363	dinfo = device_get_ivars(child);
4364	cfg = &dinfo->cfg;
4365#ifdef NEW_PCIB
4366	/*
4367	 * PCI-PCI bridge I/O window resources are not BARs.  For
4368	 * those allocations just pass the request up the tree.
4369	 */
4370	if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE &&
4371	    (type == SYS_RES_IOPORT || type == SYS_RES_MEMORY)) {
4372		switch (rid) {
4373		case PCIR_IOBASEL_1:
4374		case PCIR_MEMBASE_1:
4375		case PCIR_PMBASEL_1:
4376			return (bus_generic_release_resource(dev, child, type,
4377			    rid, r));
4378		}
4379	}
4380#endif
4381
4382	rl = &dinfo->resources;
4383	return (resource_list_release(rl, dev, child, type, rid, r));
4384}
4385
4386int
4387pci_activate_resource(device_t dev, device_t child, int type, int rid,
4388    struct resource *r)
4389{
4390	struct pci_devinfo *dinfo;
4391	int error;
4392
4393	error = bus_generic_activate_resource(dev, child, type, rid, r);
4394	if (error)
4395		return (error);
4396
4397	/* Enable decoding in the command register when activating BARs. */
4398	if (device_get_parent(child) == dev) {
4399		/* Device ROMs need their decoding explicitly enabled. */
4400		dinfo = device_get_ivars(child);
4401		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4402			pci_write_bar(child, pci_find_bar(child, rid),
4403			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4404		switch (type) {
4405		case SYS_RES_IOPORT:
4406		case SYS_RES_MEMORY:
4407			error = PCI_ENABLE_IO(dev, child, type);
4408			break;
4409		}
4410	}
4411	return (error);
4412}
4413
4414int
4415pci_deactivate_resource(device_t dev, device_t child, int type,
4416    int rid, struct resource *r)
4417{
4418	struct pci_devinfo *dinfo;
4419	int error;
4420
4421	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4422	if (error)
4423		return (error);
4424
4425	/* Disable decoding for device ROMs. */
4426	if (device_get_parent(child) == dev) {
4427		dinfo = device_get_ivars(child);
4428		if (type == SYS_RES_MEMORY && PCIR_IS_BIOS(&dinfo->cfg, rid))
4429			pci_write_bar(child, pci_find_bar(child, rid),
4430			    rman_get_start(r));
4431	}
4432	return (0);
4433}
4434
4435void
4436pci_delete_child(device_t dev, device_t child)
4437{
4438	struct resource_list_entry *rle;
4439	struct resource_list *rl;
4440	struct pci_devinfo *dinfo;
4441
4442	dinfo = device_get_ivars(child);
4443	rl = &dinfo->resources;
4444
4445	if (device_is_attached(child))
4446		device_detach(child);
4447
4448	/* Turn off access to resources we're about to free */
4449	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4450	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4451
4452	/* Free all allocated resources */
4453	STAILQ_FOREACH(rle, rl, link) {
4454		if (rle->res) {
4455			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4456			    resource_list_busy(rl, rle->type, rle->rid)) {
4457				pci_printf(&dinfo->cfg,
4458				    "Resource still owned, oops. "
4459				    "(type=%d, rid=%d, addr=%lx)\n",
4460				    rle->type, rle->rid,
4461				    rman_get_start(rle->res));
4462				bus_release_resource(child, rle->type, rle->rid,
4463				    rle->res);
4464			}
4465			resource_list_unreserve(rl, dev, child, rle->type,
4466			    rle->rid);
4467		}
4468	}
4469	resource_list_free(rl);
4470
4471	device_delete_child(dev, child);
4472	pci_freecfg(dinfo);
4473}
4474
4475void
4476pci_delete_resource(device_t dev, device_t child, int type, int rid)
4477{
4478	struct pci_devinfo *dinfo;
4479	struct resource_list *rl;
4480	struct resource_list_entry *rle;
4481
4482	if (device_get_parent(child) != dev)
4483		return;
4484
4485	dinfo = device_get_ivars(child);
4486	rl = &dinfo->resources;
4487	rle = resource_list_find(rl, type, rid);
4488	if (rle == NULL)
4489		return;
4490
4491	if (rle->res) {
4492		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4493		    resource_list_busy(rl, type, rid)) {
4494			device_printf(dev, "delete_resource: "
4495			    "Resource still owned by child, oops. "
4496			    "(type=%d, rid=%d, addr=%lx)\n",
4497			    type, rid, rman_get_start(rle->res));
4498			return;
4499		}
4500		resource_list_unreserve(rl, dev, child, type, rid);
4501	}
4502	resource_list_delete(rl, type, rid);
4503}
4504
4505struct resource_list *
4506pci_get_resource_list (device_t dev, device_t child)
4507{
4508	struct pci_devinfo *dinfo = device_get_ivars(child);
4509
4510	return (&dinfo->resources);
4511}
4512
4513bus_dma_tag_t
4514pci_get_dma_tag(device_t bus, device_t dev)
4515{
4516	struct pci_softc *sc = device_get_softc(bus);
4517
4518	return (sc->sc_dma_tag);
4519}
4520
4521uint32_t
4522pci_read_config_method(device_t dev, device_t child, int reg, int width)
4523{
4524	struct pci_devinfo *dinfo = device_get_ivars(child);
4525	pcicfgregs *cfg = &dinfo->cfg;
4526
4527	return (PCIB_READ_CONFIG(device_get_parent(dev),
4528	    cfg->bus, cfg->slot, cfg->func, reg, width));
4529}
4530
4531void
4532pci_write_config_method(device_t dev, device_t child, int reg,
4533    uint32_t val, int width)
4534{
4535	struct pci_devinfo *dinfo = device_get_ivars(child);
4536	pcicfgregs *cfg = &dinfo->cfg;
4537
4538	PCIB_WRITE_CONFIG(device_get_parent(dev),
4539	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4540}
4541
4542int
4543pci_child_location_str_method(device_t dev, device_t child, char *buf,
4544    size_t buflen)
4545{
4546
4547	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4548	    pci_get_function(child));
4549	return (0);
4550}
4551
4552int
4553pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4554    size_t buflen)
4555{
4556	struct pci_devinfo *dinfo;
4557	pcicfgregs *cfg;
4558
4559	dinfo = device_get_ivars(child);
4560	cfg = &dinfo->cfg;
4561	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4562	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4563	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4564	    cfg->progif);
4565	return (0);
4566}
4567
4568int
4569pci_assign_interrupt_method(device_t dev, device_t child)
4570{
4571	struct pci_devinfo *dinfo = device_get_ivars(child);
4572	pcicfgregs *cfg = &dinfo->cfg;
4573
4574	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4575	    cfg->intpin));
4576}
4577
4578static int
4579pci_modevent(module_t mod, int what, void *arg)
4580{
4581	static struct cdev *pci_cdev;
4582
4583	switch (what) {
4584	case MOD_LOAD:
4585		STAILQ_INIT(&pci_devq);
4586		pci_generation = 0;
4587		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4588		    "pci");
4589		pci_load_vendor_data();
4590		break;
4591
4592	case MOD_UNLOAD:
4593		destroy_dev(pci_cdev);
4594		break;
4595	}
4596
4597	return (0);
4598}
4599
4600static void
4601pci_cfg_restore_pcie(device_t dev, struct pci_devinfo *dinfo)
4602{
4603#define	WREG(n, v)	pci_write_config(dev, pos + (n), (v), 2)
4604	struct pcicfg_pcie *cfg;
4605	int version, pos;
4606
4607	cfg = &dinfo->cfg.pcie;
4608	pos = cfg->pcie_location;
4609
4610	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4611
4612	WREG(PCIER_DEVICE_CTL, cfg->pcie_device_ctl);
4613
4614	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4615	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4616	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4617		WREG(PCIER_LINK_CTL, cfg->pcie_link_ctl);
4618
4619	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4620	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4621	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4622		WREG(PCIER_SLOT_CTL, cfg->pcie_slot_ctl);
4623
4624	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4625	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4626		WREG(PCIER_ROOT_CTL, cfg->pcie_root_ctl);
4627
4628	if (version > 1) {
4629		WREG(PCIER_DEVICE_CTL2, cfg->pcie_device_ctl2);
4630		WREG(PCIER_LINK_CTL2, cfg->pcie_link_ctl2);
4631		WREG(PCIER_SLOT_CTL2, cfg->pcie_slot_ctl2);
4632	}
4633#undef WREG
4634}
4635
4636static void
4637pci_cfg_restore_pcix(device_t dev, struct pci_devinfo *dinfo)
4638{
4639	pci_write_config(dev, dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND,
4640	    dinfo->cfg.pcix.pcix_command,  2);
4641}
4642
4643void
4644pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4645{
4646
4647	/*
4648	 * Only do header type 0 devices.  Type 1 devices are bridges,
4649	 * which we know need special treatment.  Type 2 devices are
4650	 * cardbus bridges which also require special treatment.
4651	 * Other types are unknown, and we err on the side of safety
4652	 * by ignoring them.
4653	 */
4654	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4655		return;
4656
4657	/*
4658	 * Restore the device to full power mode.  We must do this
4659	 * before we restore the registers because moving from D3 to
4660	 * D0 will cause the chip's BARs and some other registers to
4661	 * be reset to some unknown power on reset values.  Cut down
4662	 * the noise on boot by doing nothing if we are already in
4663	 * state D0.
4664	 */
4665	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4666		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4667	pci_restore_bars(dev);
4668	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4669	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4670	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4671	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4672	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4673	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4674	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4675	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4676	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4677
4678	/*
4679	 * Restore extended capabilities for PCI-Express and PCI-X
4680	 */
4681	if (dinfo->cfg.pcie.pcie_location != 0)
4682		pci_cfg_restore_pcie(dev, dinfo);
4683	if (dinfo->cfg.pcix.pcix_location != 0)
4684		pci_cfg_restore_pcix(dev, dinfo);
4685
4686	/* Restore MSI and MSI-X configurations if they are present. */
4687	if (dinfo->cfg.msi.msi_location != 0)
4688		pci_resume_msi(dev);
4689	if (dinfo->cfg.msix.msix_location != 0)
4690		pci_resume_msix(dev);
4691}
4692
4693static void
4694pci_cfg_save_pcie(device_t dev, struct pci_devinfo *dinfo)
4695{
4696#define	RREG(n)	pci_read_config(dev, pos + (n), 2)
4697	struct pcicfg_pcie *cfg;
4698	int version, pos;
4699
4700	cfg = &dinfo->cfg.pcie;
4701	pos = cfg->pcie_location;
4702
4703	cfg->pcie_flags = RREG(PCIER_FLAGS);
4704
4705	version = cfg->pcie_flags & PCIEM_FLAGS_VERSION;
4706
4707	cfg->pcie_device_ctl = RREG(PCIER_DEVICE_CTL);
4708
4709	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4710	    cfg->pcie_type == PCIEM_TYPE_ENDPOINT ||
4711	    cfg->pcie_type == PCIEM_TYPE_LEGACY_ENDPOINT)
4712		cfg->pcie_link_ctl = RREG(PCIER_LINK_CTL);
4713
4714	if (version > 1 || (cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4715	    (cfg->pcie_type == PCIEM_TYPE_DOWNSTREAM_PORT &&
4716	     (cfg->pcie_flags & PCIEM_FLAGS_SLOT))))
4717		cfg->pcie_slot_ctl = RREG(PCIER_SLOT_CTL);
4718
4719	if (version > 1 || cfg->pcie_type == PCIEM_TYPE_ROOT_PORT ||
4720	    cfg->pcie_type == PCIEM_TYPE_ROOT_EC)
4721		cfg->pcie_root_ctl = RREG(PCIER_ROOT_CTL);
4722
4723	if (version > 1) {
4724		cfg->pcie_device_ctl2 = RREG(PCIER_DEVICE_CTL2);
4725		cfg->pcie_link_ctl2 = RREG(PCIER_LINK_CTL2);
4726		cfg->pcie_slot_ctl2 = RREG(PCIER_SLOT_CTL2);
4727	}
4728#undef RREG
4729}
4730
4731static void
4732pci_cfg_save_pcix(device_t dev, struct pci_devinfo *dinfo)
4733{
4734	dinfo->cfg.pcix.pcix_command = pci_read_config(dev,
4735	    dinfo->cfg.pcix.pcix_location + PCIXR_COMMAND, 2);
4736}
4737
4738void
4739pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4740{
4741	uint32_t cls;
4742	int ps;
4743
4744	/*
4745	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4746	 * we know need special treatment.  Type 2 devices are cardbus bridges
4747	 * which also require special treatment.  Other types are unknown, and
4748	 * we err on the side of safety by ignoring them.  Powering down
4749	 * bridges should not be undertaken lightly.
4750	 */
4751	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4752		return;
4753
4754	/*
4755	 * Some drivers apparently write to these registers w/o updating our
4756	 * cached copy.  No harm happens if we update the copy, so do so here
4757	 * so we can restore them.  The COMMAND register is modified by the
4758	 * bus w/o updating the cache.  This should represent the normally
4759	 * writable portion of the 'defined' part of type 0 headers.  In
4760	 * theory we also need to save/restore the PCI capability structures
4761	 * we know about, but apart from power we don't know any that are
4762	 * writable.
4763	 */
4764	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4765	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4766	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4767	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4768	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4769	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4770	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4771	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4772	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4773	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4774	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4775	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4776	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4777	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4778	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4779
4780	if (dinfo->cfg.pcie.pcie_location != 0)
4781		pci_cfg_save_pcie(dev, dinfo);
4782
4783	if (dinfo->cfg.pcix.pcix_location != 0)
4784		pci_cfg_save_pcix(dev, dinfo);
4785
4786	/*
4787	 * don't set the state for display devices, base peripherals and
4788	 * memory devices since bad things happen when they are powered down.
4789	 * We should (a) have drivers that can easily detach and (b) use
4790	 * generic drivers for these devices so that some device actually
4791	 * attaches.  We need to make sure that when we implement (a) we don't
4792	 * power the device down on a reattach.
4793	 */
4794	cls = pci_get_class(dev);
4795	if (!setstate)
4796		return;
4797	switch (pci_do_power_nodriver)
4798	{
4799		case 0:		/* NO powerdown at all */
4800			return;
4801		case 1:		/* Conservative about what to power down */
4802			if (cls == PCIC_STORAGE)
4803				return;
4804			/*FALLTHROUGH*/
4805		case 2:		/* Agressive about what to power down */
4806			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4807			    cls == PCIC_BASEPERIPH)
4808				return;
4809			/*FALLTHROUGH*/
4810		case 3:		/* Power down everything */
4811			break;
4812	}
4813	/*
4814	 * PCI spec says we can only go into D3 state from D0 state.
4815	 * Transition from D[12] into D0 before going to D3 state.
4816	 */
4817	ps = pci_get_powerstate(dev);
4818	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4819		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4820	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4821		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4822}
4823
4824/* Wrapper APIs suitable for device driver use. */
4825void
4826pci_save_state(device_t dev)
4827{
4828	struct pci_devinfo *dinfo;
4829
4830	dinfo = device_get_ivars(dev);
4831	pci_cfg_save(dev, dinfo, 0);
4832}
4833
4834void
4835pci_restore_state(device_t dev)
4836{
4837	struct pci_devinfo *dinfo;
4838
4839	dinfo = device_get_ivars(dev);
4840	pci_cfg_restore(dev, dinfo);
4841}
4842