if_igb.c revision 273736
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/e1000/if_igb.c 273736 2014-10-27 14:38:00Z hselasky $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38
39#ifdef HAVE_KERNEL_OPTION_HEADERS
40#include "opt_device_polling.h"
41#include "opt_altq.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifndef IGB_LEGACY_TX
47#include <sys/buf_ring.h>
48#endif
49#include <sys/bus.h>
50#include <sys/endian.h>
51#include <sys/kernel.h>
52#include <sys/kthread.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/module.h>
56#include <sys/rman.h>
57#include <sys/socket.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/taskqueue.h>
61#include <sys/eventhandler.h>
62#include <sys/pcpu.h>
63#include <sys/smp.h>
64#include <machine/smp.h>
65#include <machine/bus.h>
66#include <machine/resource.h>
67
68#include <net/bpf.h>
69#include <net/ethernet.h>
70#include <net/if.h>
71#include <net/if_arp.h>
72#include <net/if_dl.h>
73#include <net/if_media.h>
74
75#include <net/if_types.h>
76#include <net/if_vlan_var.h>
77
78#include <netinet/in_systm.h>
79#include <netinet/in.h>
80#include <netinet/if_ether.h>
81#include <netinet/ip.h>
82#include <netinet/ip6.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_lro.h>
85#include <netinet/udp.h>
86
87#include <machine/in_cksum.h>
88#include <dev/led/led.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 2.4.0";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134						PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148						PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
167						PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_I354_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	/* required last entry */
172	{ 0, 0, 0, 0, 0}
173};
174
175/*********************************************************************
176 *  Table of branding strings for all supported NICs.
177 *********************************************************************/
178
179static char *igb_strings[] = {
180	"Intel(R) PRO/1000 Network Connection"
181};
182
183/*********************************************************************
184 *  Function prototypes
185 *********************************************************************/
186static int	igb_probe(device_t);
187static int	igb_attach(device_t);
188static int	igb_detach(device_t);
189static int	igb_shutdown(device_t);
190static int	igb_suspend(device_t);
191static int	igb_resume(device_t);
192#ifndef IGB_LEGACY_TX
193static int	igb_mq_start(struct ifnet *, struct mbuf *);
194static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
195static void	igb_qflush(struct ifnet *);
196static void	igb_deferred_mq_start(void *, int);
197#else
198static void	igb_start(struct ifnet *);
199static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
200#endif
201static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
202static void	igb_init(void *);
203static void	igb_init_locked(struct adapter *);
204static void	igb_stop(void *);
205static void	igb_media_status(struct ifnet *, struct ifmediareq *);
206static int	igb_media_change(struct ifnet *);
207static void	igb_identify_hardware(struct adapter *);
208static int	igb_allocate_pci_resources(struct adapter *);
209static int	igb_allocate_msix(struct adapter *);
210static int	igb_allocate_legacy(struct adapter *);
211static int	igb_setup_msix(struct adapter *);
212static void	igb_free_pci_resources(struct adapter *);
213static void	igb_local_timer(void *);
214static void	igb_reset(struct adapter *);
215static int	igb_setup_interface(device_t, struct adapter *);
216static int	igb_allocate_queues(struct adapter *);
217static void	igb_configure_queues(struct adapter *);
218
219static int	igb_allocate_transmit_buffers(struct tx_ring *);
220static void	igb_setup_transmit_structures(struct adapter *);
221static void	igb_setup_transmit_ring(struct tx_ring *);
222static void	igb_initialize_transmit_units(struct adapter *);
223static void	igb_free_transmit_structures(struct adapter *);
224static void	igb_free_transmit_buffers(struct tx_ring *);
225
226static int	igb_allocate_receive_buffers(struct rx_ring *);
227static int	igb_setup_receive_structures(struct adapter *);
228static int	igb_setup_receive_ring(struct rx_ring *);
229static void	igb_initialize_receive_units(struct adapter *);
230static void	igb_free_receive_structures(struct adapter *);
231static void	igb_free_receive_buffers(struct rx_ring *);
232static void	igb_free_receive_ring(struct rx_ring *);
233
234static void	igb_enable_intr(struct adapter *);
235static void	igb_disable_intr(struct adapter *);
236static void	igb_update_stats_counters(struct adapter *);
237static bool	igb_txeof(struct tx_ring *);
238
239static __inline	void igb_rx_discard(struct rx_ring *, int);
240static __inline void igb_rx_input(struct rx_ring *,
241		    struct ifnet *, struct mbuf *, u32);
242
243static bool	igb_rxeof(struct igb_queue *, int, int *);
244static void	igb_rx_checksum(u32, struct mbuf *, u32);
245static int	igb_tx_ctx_setup(struct tx_ring *,
246		    struct mbuf *, u32 *, u32 *);
247static int	igb_tso_setup(struct tx_ring *,
248		    struct mbuf *, u32 *, u32 *);
249static void	igb_set_promisc(struct adapter *);
250static void	igb_disable_promisc(struct adapter *);
251static void	igb_set_multi(struct adapter *);
252static void	igb_update_link_status(struct adapter *);
253static void	igb_refresh_mbufs(struct rx_ring *, int);
254
255static void	igb_register_vlan(void *, struct ifnet *, u16);
256static void	igb_unregister_vlan(void *, struct ifnet *, u16);
257static void	igb_setup_vlan_hw_support(struct adapter *);
258
259static int	igb_xmit(struct tx_ring *, struct mbuf **);
260static int	igb_dma_malloc(struct adapter *, bus_size_t,
261		    struct igb_dma_alloc *, int);
262static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
263static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
264static void	igb_print_nvm_info(struct adapter *);
265static int 	igb_is_valid_ether_addr(u8 *);
266static void     igb_add_hw_stats(struct adapter *);
267
268static void	igb_vf_init_stats(struct adapter *);
269static void	igb_update_vf_stats_counters(struct adapter *);
270
271/* Management and WOL Support */
272static void	igb_init_manageability(struct adapter *);
273static void	igb_release_manageability(struct adapter *);
274static void     igb_get_hw_control(struct adapter *);
275static void     igb_release_hw_control(struct adapter *);
276static void     igb_enable_wakeup(device_t);
277static void     igb_led_func(void *, int);
278
279static int	igb_irq_fast(void *);
280static void	igb_msix_que(void *);
281static void	igb_msix_link(void *);
282static void	igb_handle_que(void *context, int pending);
283static void	igb_handle_link(void *context, int pending);
284static void	igb_handle_link_locked(struct adapter *);
285
286static void	igb_set_sysctl_value(struct adapter *, const char *,
287		    const char *, int *, int);
288static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
289static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
290static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
291
292#ifdef DEVICE_POLLING
293static poll_handler_t igb_poll;
294#endif /* POLLING */
295
296/*********************************************************************
297 *  FreeBSD Device Interface Entry Points
298 *********************************************************************/
299
300static device_method_t igb_methods[] = {
301	/* Device interface */
302	DEVMETHOD(device_probe, igb_probe),
303	DEVMETHOD(device_attach, igb_attach),
304	DEVMETHOD(device_detach, igb_detach),
305	DEVMETHOD(device_shutdown, igb_shutdown),
306	DEVMETHOD(device_suspend, igb_suspend),
307	DEVMETHOD(device_resume, igb_resume),
308	DEVMETHOD_END
309};
310
311static driver_t igb_driver = {
312	"igb", igb_methods, sizeof(struct adapter),
313};
314
315static devclass_t igb_devclass;
316DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
317MODULE_DEPEND(igb, pci, 1, 1, 1);
318MODULE_DEPEND(igb, ether, 1, 1, 1);
319
320/*********************************************************************
321 *  Tunable default values.
322 *********************************************************************/
323
324static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
325
326/* Descriptor defaults */
327static int igb_rxd = IGB_DEFAULT_RXD;
328static int igb_txd = IGB_DEFAULT_TXD;
329TUNABLE_INT("hw.igb.rxd", &igb_rxd);
330TUNABLE_INT("hw.igb.txd", &igb_txd);
331SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
332    "Number of receive descriptors per queue");
333SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
334    "Number of transmit descriptors per queue");
335
336/*
337** AIM: Adaptive Interrupt Moderation
338** which means that the interrupt rate
339** is varied over time based on the
340** traffic for that interrupt vector
341*/
342static int igb_enable_aim = TRUE;
343TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
344SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
345    "Enable adaptive interrupt moderation");
346
347/*
348 * MSIX should be the default for best performance,
349 * but this allows it to be forced off for testing.
350 */
351static int igb_enable_msix = 1;
352TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
353SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
354    "Enable MSI-X interrupts");
355
356/*
357** Tuneable Interrupt rate
358*/
359static int igb_max_interrupt_rate = 8000;
360TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
361SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
362    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
363
364#ifndef IGB_LEGACY_TX
365/*
366** Tuneable number of buffers in the buf-ring (drbr_xxx)
367*/
368static int igb_buf_ring_size = IGB_BR_SIZE;
369TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
370SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
371    &igb_buf_ring_size, 0, "Size of the bufring");
372#endif
373
374/*
375** Header split causes the packet header to
376** be dma'd to a seperate mbuf from the payload.
377** this can have memory alignment benefits. But
378** another plus is that small packets often fit
379** into the header and thus use no cluster. Its
380** a very workload dependent type feature.
381*/
382static int igb_header_split = FALSE;
383TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
384SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
385    "Enable receive mbuf header split");
386
387/*
388** This will autoconfigure based on the
389** number of CPUs and max supported
390** MSIX messages if left at 0.
391*/
392static int igb_num_queues = 0;
393TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
394SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
395    "Number of queues to configure, 0 indicates autoconfigure");
396
397/*
398** Global variable to store last used CPU when binding queues
399** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
400** queue is bound to a cpu.
401*/
402static int igb_last_bind_cpu = -1;
403
404/* How many packets rxeof tries to clean at a time */
405static int igb_rx_process_limit = 100;
406TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
407SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
408    &igb_rx_process_limit, 0,
409    "Maximum number of received packets to process at a time, -1 means unlimited");
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_igb_netmap.h>
413#endif /* DEV_NETMAP */
414/*********************************************************************
415 *  Device identification routine
416 *
417 *  igb_probe determines if the driver should be loaded on
418 *  adapter based on PCI vendor/device id of the adapter.
419 *
420 *  return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424igb_probe(device_t dev)
425{
426	char		adapter_name[60];
427	uint16_t	pci_vendor_id = 0;
428	uint16_t	pci_device_id = 0;
429	uint16_t	pci_subvendor_id = 0;
430	uint16_t	pci_subdevice_id = 0;
431	igb_vendor_info_t *ent;
432
433	INIT_DEBUGOUT("igb_probe: begin");
434
435	pci_vendor_id = pci_get_vendor(dev);
436	if (pci_vendor_id != IGB_VENDOR_ID)
437		return (ENXIO);
438
439	pci_device_id = pci_get_device(dev);
440	pci_subvendor_id = pci_get_subvendor(dev);
441	pci_subdevice_id = pci_get_subdevice(dev);
442
443	ent = igb_vendor_info_array;
444	while (ent->vendor_id != 0) {
445		if ((pci_vendor_id == ent->vendor_id) &&
446		    (pci_device_id == ent->device_id) &&
447
448		    ((pci_subvendor_id == ent->subvendor_id) ||
449		    (ent->subvendor_id == PCI_ANY_ID)) &&
450
451		    ((pci_subdevice_id == ent->subdevice_id) ||
452		    (ent->subdevice_id == PCI_ANY_ID))) {
453			sprintf(adapter_name, "%s %s",
454				igb_strings[ent->index],
455				igb_driver_version);
456			device_set_desc_copy(dev, adapter_name);
457			return (BUS_PROBE_DEFAULT);
458		}
459		ent++;
460	}
461
462	return (ENXIO);
463}
464
465/*********************************************************************
466 *  Device initialization routine
467 *
468 *  The attach entry point is called when the driver is being loaded.
469 *  This routine identifies the type of hardware, allocates all resources
470 *  and initializes the hardware.
471 *
472 *  return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476igb_attach(device_t dev)
477{
478	struct adapter	*adapter;
479	int		error = 0;
480	u16		eeprom_data;
481
482	INIT_DEBUGOUT("igb_attach: begin");
483
484	if (resource_disabled("igb", device_get_unit(dev))) {
485		device_printf(dev, "Disabled by device hint\n");
486		return (ENXIO);
487	}
488
489	adapter = device_get_softc(dev);
490	adapter->dev = adapter->osdep.dev = dev;
491	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
492
493	/* SYSCTL stuff */
494	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497	    igb_sysctl_nvm_info, "I", "NVM Information");
498
499	igb_set_sysctl_value(adapter, "enable_aim",
500	    "Interrupt Moderation", &adapter->enable_aim,
501	    igb_enable_aim);
502
503	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
504	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
505	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
506	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
507
508	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
509
510	/* Determine hardware and mac info */
511	igb_identify_hardware(adapter);
512
513	/* Setup PCI resources */
514	if (igb_allocate_pci_resources(adapter)) {
515		device_printf(dev, "Allocation of PCI resources failed\n");
516		error = ENXIO;
517		goto err_pci;
518	}
519
520	/* Do Shared Code initialization */
521	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
522		device_printf(dev, "Setup of Shared code failed\n");
523		error = ENXIO;
524		goto err_pci;
525	}
526
527	e1000_get_bus_info(&adapter->hw);
528
529	/* Sysctl for limiting the amount of work done in the taskqueue */
530	igb_set_sysctl_value(adapter, "rx_processing_limit",
531	    "max number of rx packets to process",
532	    &adapter->rx_process_limit, igb_rx_process_limit);
533
534	/*
535	 * Validate number of transmit and receive descriptors. It
536	 * must not exceed hardware maximum, and must be multiple
537	 * of E1000_DBA_ALIGN.
538	 */
539	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
540	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
541		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
542		    IGB_DEFAULT_TXD, igb_txd);
543		adapter->num_tx_desc = IGB_DEFAULT_TXD;
544	} else
545		adapter->num_tx_desc = igb_txd;
546	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
547	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
548		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549		    IGB_DEFAULT_RXD, igb_rxd);
550		adapter->num_rx_desc = IGB_DEFAULT_RXD;
551	} else
552		adapter->num_rx_desc = igb_rxd;
553
554	adapter->hw.mac.autoneg = DO_AUTO_NEG;
555	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558	/* Copper options */
559	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560		adapter->hw.phy.mdix = AUTO_ALL_MODES;
561		adapter->hw.phy.disable_polarity_correction = FALSE;
562		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
563	}
564
565	/*
566	 * Set the frame limits assuming
567	 * standard ethernet sized frames.
568	 */
569	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570
571	/*
572	** Allocate and Setup Queues
573	*/
574	if (igb_allocate_queues(adapter)) {
575		error = ENOMEM;
576		goto err_pci;
577	}
578
579	/* Allocate the appropriate stats memory */
580	if (adapter->vf_ifp) {
581		adapter->stats =
582		    (struct e1000_vf_stats *)malloc(sizeof \
583		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
584		igb_vf_init_stats(adapter);
585	} else
586		adapter->stats =
587		    (struct e1000_hw_stats *)malloc(sizeof \
588		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
589	if (adapter->stats == NULL) {
590		device_printf(dev, "Can not allocate stats memory\n");
591		error = ENOMEM;
592		goto err_late;
593	}
594
595	/* Allocate multicast array memory. */
596	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
597	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
598	if (adapter->mta == NULL) {
599		device_printf(dev, "Can not allocate multicast setup array\n");
600		error = ENOMEM;
601		goto err_late;
602	}
603
604	/* Some adapter-specific advanced features */
605	if (adapter->hw.mac.type >= e1000_i350) {
606		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
607		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
608		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
609		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
610		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
611		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
612		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
613		    adapter, 0, igb_sysctl_eee, "I",
614		    "Disable Energy Efficient Ethernet");
615		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
616			if (adapter->hw.mac.type == e1000_i354)
617				e1000_set_eee_i354(&adapter->hw);
618			else
619				e1000_set_eee_i350(&adapter->hw);
620		}
621	}
622
623	/*
624	** Start from a known state, this is
625	** important in reading the nvm and
626	** mac from that.
627	*/
628	e1000_reset_hw(&adapter->hw);
629
630	/* Make sure we have a good EEPROM before we read from it */
631	if (((adapter->hw.mac.type != e1000_i210) &&
632	    (adapter->hw.mac.type != e1000_i211)) &&
633	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
634		/*
635		** Some PCI-E parts fail the first check due to
636		** the link being in sleep state, call it again,
637		** if it fails a second time its a real issue.
638		*/
639		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
640			device_printf(dev,
641			    "The EEPROM Checksum Is Not Valid\n");
642			error = EIO;
643			goto err_late;
644		}
645	}
646
647	/*
648	** Copy the permanent MAC address out of the EEPROM
649	*/
650	if (e1000_read_mac_addr(&adapter->hw) < 0) {
651		device_printf(dev, "EEPROM read error while reading MAC"
652		    " address\n");
653		error = EIO;
654		goto err_late;
655	}
656	/* Check its sanity */
657	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
658		device_printf(dev, "Invalid MAC address\n");
659		error = EIO;
660		goto err_late;
661	}
662
663	/* Setup OS specific network interface */
664	if (igb_setup_interface(dev, adapter) != 0)
665		goto err_late;
666
667	/* Now get a good starting state */
668	igb_reset(adapter);
669
670	/* Initialize statistics */
671	igb_update_stats_counters(adapter);
672
673	adapter->hw.mac.get_link_status = 1;
674	igb_update_link_status(adapter);
675
676	/* Indicate SOL/IDER usage */
677	if (e1000_check_reset_block(&adapter->hw))
678		device_printf(dev,
679		    "PHY reset is blocked due to SOL/IDER session.\n");
680
681	/* Determine if we have to control management hardware */
682	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
683
684	/*
685	 * Setup Wake-on-Lan
686	 */
687	/* APME bit in EEPROM is mapped to WUC.APME */
688	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
689	if (eeprom_data)
690		adapter->wol = E1000_WUFC_MAG;
691
692	/* Register for VLAN events */
693	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
695	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
697
698	igb_add_hw_stats(adapter);
699
700	/* Tell the stack that the interface is not active */
701	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
703
704	adapter->led_dev = led_create(igb_led_func, adapter,
705	    device_get_nameunit(dev));
706
707	/*
708	** Configure Interrupts
709	*/
710	if ((adapter->msix > 1) && (igb_enable_msix))
711		error = igb_allocate_msix(adapter);
712	else /* MSI or Legacy */
713		error = igb_allocate_legacy(adapter);
714	if (error)
715		goto err_late;
716
717#ifdef DEV_NETMAP
718	igb_netmap_attach(adapter);
719#endif /* DEV_NETMAP */
720	INIT_DEBUGOUT("igb_attach: end");
721
722	return (0);
723
724err_late:
725	igb_detach(dev);
726	igb_free_transmit_structures(adapter);
727	igb_free_receive_structures(adapter);
728	igb_release_hw_control(adapter);
729err_pci:
730	igb_free_pci_resources(adapter);
731	if (adapter->ifp != NULL)
732		if_free(adapter->ifp);
733	free(adapter->mta, M_DEVBUF);
734	IGB_CORE_LOCK_DESTROY(adapter);
735
736	return (error);
737}
738
739/*********************************************************************
740 *  Device removal routine
741 *
742 *  The detach entry point is called when the driver is being removed.
743 *  This routine stops the adapter and deallocates all the resources
744 *  that were allocated for driver operation.
745 *
746 *  return 0 on success, positive on failure
747 *********************************************************************/
748
749static int
750igb_detach(device_t dev)
751{
752	struct adapter	*adapter = device_get_softc(dev);
753	struct ifnet	*ifp = adapter->ifp;
754
755	INIT_DEBUGOUT("igb_detach: begin");
756
757	/* Make sure VLANS are not using driver */
758	if (adapter->ifp->if_vlantrunk != NULL) {
759		device_printf(dev,"Vlan in use, detach first\n");
760		return (EBUSY);
761	}
762
763	ether_ifdetach(adapter->ifp);
764
765	if (adapter->led_dev != NULL)
766		led_destroy(adapter->led_dev);
767
768#ifdef DEVICE_POLLING
769	if (ifp->if_capenable & IFCAP_POLLING)
770		ether_poll_deregister(ifp);
771#endif
772
773	IGB_CORE_LOCK(adapter);
774	adapter->in_detach = 1;
775	igb_stop(adapter);
776	IGB_CORE_UNLOCK(adapter);
777
778	e1000_phy_hw_reset(&adapter->hw);
779
780	/* Give control back to firmware */
781	igb_release_manageability(adapter);
782	igb_release_hw_control(adapter);
783
784	if (adapter->wol) {
785		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787		igb_enable_wakeup(dev);
788	}
789
790	/* Unregister VLAN events */
791	if (adapter->vlan_attach != NULL)
792		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793	if (adapter->vlan_detach != NULL)
794		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796	callout_drain(&adapter->timer);
797
798#ifdef DEV_NETMAP
799	netmap_detach(adapter->ifp);
800#endif /* DEV_NETMAP */
801	igb_free_pci_resources(adapter);
802	bus_generic_detach(dev);
803	if_free(ifp);
804
805	igb_free_transmit_structures(adapter);
806	igb_free_receive_structures(adapter);
807	if (adapter->mta != NULL)
808		free(adapter->mta, M_DEVBUF);
809
810	IGB_CORE_LOCK_DESTROY(adapter);
811
812	return (0);
813}
814
815/*********************************************************************
816 *
817 *  Shutdown entry point
818 *
819 **********************************************************************/
820
821static int
822igb_shutdown(device_t dev)
823{
824	return igb_suspend(dev);
825}
826
827/*
828 * Suspend/resume device methods.
829 */
830static int
831igb_suspend(device_t dev)
832{
833	struct adapter *adapter = device_get_softc(dev);
834
835	IGB_CORE_LOCK(adapter);
836
837	igb_stop(adapter);
838
839        igb_release_manageability(adapter);
840	igb_release_hw_control(adapter);
841
842        if (adapter->wol) {
843                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
844                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
845                igb_enable_wakeup(dev);
846        }
847
848	IGB_CORE_UNLOCK(adapter);
849
850	return bus_generic_suspend(dev);
851}
852
853static int
854igb_resume(device_t dev)
855{
856	struct adapter *adapter = device_get_softc(dev);
857	struct tx_ring	*txr = adapter->tx_rings;
858	struct ifnet *ifp = adapter->ifp;
859
860	IGB_CORE_LOCK(adapter);
861	igb_init_locked(adapter);
862	igb_init_manageability(adapter);
863
864	if ((ifp->if_flags & IFF_UP) &&
865	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867			IGB_TX_LOCK(txr);
868#ifndef IGB_LEGACY_TX
869			/* Process the stack queue only if not depleted */
870			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
871			    !drbr_empty(ifp, txr->br))
872				igb_mq_start_locked(ifp, txr);
873#else
874			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
875				igb_start_locked(txr, ifp);
876#endif
877			IGB_TX_UNLOCK(txr);
878		}
879	}
880	IGB_CORE_UNLOCK(adapter);
881
882	return bus_generic_resume(dev);
883}
884
885
886#ifdef IGB_LEGACY_TX
887
888/*********************************************************************
889 *  Transmit entry point
890 *
891 *  igb_start is called by the stack to initiate a transmit.
892 *  The driver will remain in this routine as long as there are
893 *  packets to transmit and transmit resources are available.
894 *  In case resources are not available stack is notified and
895 *  the packet is requeued.
896 **********************************************************************/
897
898static void
899igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
900{
901	struct adapter	*adapter = ifp->if_softc;
902	struct mbuf	*m_head;
903
904	IGB_TX_LOCK_ASSERT(txr);
905
906	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
907	    IFF_DRV_RUNNING)
908		return;
909	if (!adapter->link_active)
910		return;
911
912	/* Call cleanup if number of TX descriptors low */
913	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
914		igb_txeof(txr);
915
916	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917		if (txr->tx_avail <= IGB_MAX_SCATTER) {
918			txr->queue_status |= IGB_QUEUE_DEPLETED;
919			break;
920		}
921		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
922		if (m_head == NULL)
923			break;
924		/*
925		 *  Encapsulation can modify our pointer, and or make it
926		 *  NULL on failure.  In that event, we can't requeue.
927		 */
928		if (igb_xmit(txr, &m_head)) {
929			if (m_head != NULL)
930				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
931			if (txr->tx_avail <= IGB_MAX_SCATTER)
932				txr->queue_status |= IGB_QUEUE_DEPLETED;
933			break;
934		}
935
936		/* Send a copy of the frame to the BPF listener */
937		ETHER_BPF_MTAP(ifp, m_head);
938
939		/* Set watchdog on */
940		txr->watchdog_time = ticks;
941		txr->queue_status |= IGB_QUEUE_WORKING;
942	}
943}
944
945/*
946 * Legacy TX driver routine, called from the
947 * stack, always uses tx[0], and spins for it.
948 * Should not be used with multiqueue tx
949 */
950static void
951igb_start(struct ifnet *ifp)
952{
953	struct adapter	*adapter = ifp->if_softc;
954	struct tx_ring	*txr = adapter->tx_rings;
955
956	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
957		IGB_TX_LOCK(txr);
958		igb_start_locked(txr, ifp);
959		IGB_TX_UNLOCK(txr);
960	}
961	return;
962}
963
964#else /* ~IGB_LEGACY_TX */
965
966/*
967** Multiqueue Transmit Entry:
968**  quick turnaround to the stack
969**
970*/
971static int
972igb_mq_start(struct ifnet *ifp, struct mbuf *m)
973{
974	struct adapter		*adapter = ifp->if_softc;
975	struct igb_queue	*que;
976	struct tx_ring		*txr;
977	int 			i, err = 0;
978
979	/* Which queue to use */
980	if ((m->m_flags & M_FLOWID) != 0)
981		i = m->m_pkthdr.flowid % adapter->num_queues;
982	else
983		i = curcpu % adapter->num_queues;
984	txr = &adapter->tx_rings[i];
985	que = &adapter->queues[i];
986
987	err = drbr_enqueue(ifp, txr->br, m);
988	if (err)
989		return (err);
990	if (IGB_TX_TRYLOCK(txr)) {
991		igb_mq_start_locked(ifp, txr);
992		IGB_TX_UNLOCK(txr);
993	} else
994		taskqueue_enqueue(que->tq, &txr->txq_task);
995
996	return (0);
997}
998
999static int
1000igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1001{
1002	struct adapter  *adapter = txr->adapter;
1003        struct mbuf     *next;
1004        int             err = 0, enq = 0;
1005
1006	IGB_TX_LOCK_ASSERT(txr);
1007
1008	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1009	    adapter->link_active == 0)
1010		return (ENETDOWN);
1011
1012
1013	/* Process the queue */
1014	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1015		if ((err = igb_xmit(txr, &next)) != 0) {
1016			if (next == NULL) {
1017				/* It was freed, move forward */
1018				drbr_advance(ifp, txr->br);
1019			} else {
1020				/*
1021				 * Still have one left, it may not be
1022				 * the same since the transmit function
1023				 * may have changed it.
1024				 */
1025				drbr_putback(ifp, txr->br, next);
1026			}
1027			break;
1028		}
1029		drbr_advance(ifp, txr->br);
1030		enq++;
1031		ifp->if_obytes += next->m_pkthdr.len;
1032		if (next->m_flags & M_MCAST)
1033			ifp->if_omcasts++;
1034		ETHER_BPF_MTAP(ifp, next);
1035		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1036			break;
1037	}
1038	if (enq > 0) {
1039		/* Set the watchdog */
1040		txr->queue_status |= IGB_QUEUE_WORKING;
1041		txr->watchdog_time = ticks;
1042	}
1043	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1044		igb_txeof(txr);
1045	if (txr->tx_avail <= IGB_MAX_SCATTER)
1046		txr->queue_status |= IGB_QUEUE_DEPLETED;
1047	return (err);
1048}
1049
1050/*
1051 * Called from a taskqueue to drain queued transmit packets.
1052 */
1053static void
1054igb_deferred_mq_start(void *arg, int pending)
1055{
1056	struct tx_ring *txr = arg;
1057	struct adapter *adapter = txr->adapter;
1058	struct ifnet *ifp = adapter->ifp;
1059
1060	IGB_TX_LOCK(txr);
1061	if (!drbr_empty(ifp, txr->br))
1062		igb_mq_start_locked(ifp, txr);
1063	IGB_TX_UNLOCK(txr);
1064}
1065
1066/*
1067** Flush all ring buffers
1068*/
1069static void
1070igb_qflush(struct ifnet *ifp)
1071{
1072	struct adapter	*adapter = ifp->if_softc;
1073	struct tx_ring	*txr = adapter->tx_rings;
1074	struct mbuf	*m;
1075
1076	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1077		IGB_TX_LOCK(txr);
1078		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1079			m_freem(m);
1080		IGB_TX_UNLOCK(txr);
1081	}
1082	if_qflush(ifp);
1083}
1084#endif /* ~IGB_LEGACY_TX */
1085
1086/*********************************************************************
1087 *  Ioctl entry point
1088 *
1089 *  igb_ioctl is called when the user wants to configure the
1090 *  interface.
1091 *
1092 *  return 0 on success, positive on failure
1093 **********************************************************************/
1094
1095static int
1096igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1097{
1098	struct adapter	*adapter = ifp->if_softc;
1099	struct ifreq	*ifr = (struct ifreq *)data;
1100#if defined(INET) || defined(INET6)
1101	struct ifaddr	*ifa = (struct ifaddr *)data;
1102#endif
1103	bool		avoid_reset = FALSE;
1104	int		error = 0;
1105
1106	if (adapter->in_detach)
1107		return (error);
1108
1109	switch (command) {
1110	case SIOCSIFADDR:
1111#ifdef INET
1112		if (ifa->ifa_addr->sa_family == AF_INET)
1113			avoid_reset = TRUE;
1114#endif
1115#ifdef INET6
1116		if (ifa->ifa_addr->sa_family == AF_INET6)
1117			avoid_reset = TRUE;
1118#endif
1119		/*
1120		** Calling init results in link renegotiation,
1121		** so we avoid doing it when possible.
1122		*/
1123		if (avoid_reset) {
1124			ifp->if_flags |= IFF_UP;
1125			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1126				igb_init(adapter);
1127#ifdef INET
1128			if (!(ifp->if_flags & IFF_NOARP))
1129				arp_ifinit(ifp, ifa);
1130#endif
1131		} else
1132			error = ether_ioctl(ifp, command, data);
1133		break;
1134	case SIOCSIFMTU:
1135	    {
1136		int max_frame_size;
1137
1138		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1139
1140		IGB_CORE_LOCK(adapter);
1141		max_frame_size = 9234;
1142		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1143		    ETHER_CRC_LEN) {
1144			IGB_CORE_UNLOCK(adapter);
1145			error = EINVAL;
1146			break;
1147		}
1148
1149		ifp->if_mtu = ifr->ifr_mtu;
1150		adapter->max_frame_size =
1151		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1152		igb_init_locked(adapter);
1153		IGB_CORE_UNLOCK(adapter);
1154		break;
1155	    }
1156	case SIOCSIFFLAGS:
1157		IOCTL_DEBUGOUT("ioctl rcv'd:\
1158		    SIOCSIFFLAGS (Set Interface Flags)");
1159		IGB_CORE_LOCK(adapter);
1160		if (ifp->if_flags & IFF_UP) {
1161			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1162				if ((ifp->if_flags ^ adapter->if_flags) &
1163				    (IFF_PROMISC | IFF_ALLMULTI)) {
1164					igb_disable_promisc(adapter);
1165					igb_set_promisc(adapter);
1166				}
1167			} else
1168				igb_init_locked(adapter);
1169		} else
1170			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1171				igb_stop(adapter);
1172		adapter->if_flags = ifp->if_flags;
1173		IGB_CORE_UNLOCK(adapter);
1174		break;
1175	case SIOCADDMULTI:
1176	case SIOCDELMULTI:
1177		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1178		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1179			IGB_CORE_LOCK(adapter);
1180			igb_disable_intr(adapter);
1181			igb_set_multi(adapter);
1182#ifdef DEVICE_POLLING
1183			if (!(ifp->if_capenable & IFCAP_POLLING))
1184#endif
1185				igb_enable_intr(adapter);
1186			IGB_CORE_UNLOCK(adapter);
1187		}
1188		break;
1189	case SIOCSIFMEDIA:
1190		/* Check SOL/IDER usage */
1191		IGB_CORE_LOCK(adapter);
1192		if (e1000_check_reset_block(&adapter->hw)) {
1193			IGB_CORE_UNLOCK(adapter);
1194			device_printf(adapter->dev, "Media change is"
1195			    " blocked due to SOL/IDER session.\n");
1196			break;
1197		}
1198		IGB_CORE_UNLOCK(adapter);
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(igb_poll, ifp);
1215				if (error)
1216					return (error);
1217				IGB_CORE_LOCK(adapter);
1218				igb_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				IGB_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				IGB_CORE_LOCK(adapter);
1225				igb_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				IGB_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_TSO6) {
1240			ifp->if_capenable ^= IFCAP_TSO6;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWTAGGING) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWFILTER) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1249			reinit = 1;
1250		}
1251		if (mask & IFCAP_VLAN_HWTSO) {
1252			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1253			reinit = 1;
1254		}
1255		if (mask & IFCAP_LRO) {
1256			ifp->if_capenable ^= IFCAP_LRO;
1257			reinit = 1;
1258		}
1259		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1260			igb_init(adapter);
1261		VLAN_CAPABILITIES(ifp);
1262		break;
1263	    }
1264
1265	default:
1266		error = ether_ioctl(ifp, command, data);
1267		break;
1268	}
1269
1270	return (error);
1271}
1272
1273
1274/*********************************************************************
1275 *  Init entry point
1276 *
1277 *  This routine is used in two ways. It is used by the stack as
1278 *  init entry point in network interface structure. It is also used
1279 *  by the driver as a hw/sw initialization routine to get to a
1280 *  consistent state.
1281 *
1282 *  return 0 on success, positive on failure
1283 **********************************************************************/
1284
1285static void
1286igb_init_locked(struct adapter *adapter)
1287{
1288	struct ifnet	*ifp = adapter->ifp;
1289	device_t	dev = adapter->dev;
1290
1291	INIT_DEBUGOUT("igb_init: begin");
1292
1293	IGB_CORE_LOCK_ASSERT(adapter);
1294
1295	igb_disable_intr(adapter);
1296	callout_stop(&adapter->timer);
1297
1298	/* Get the latest mac address, User can use a LAA */
1299        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1300              ETHER_ADDR_LEN);
1301
1302	/* Put the address into the Receive Address Array */
1303	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1304
1305	igb_reset(adapter);
1306	igb_update_link_status(adapter);
1307
1308	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1309
1310	/* Set hardware offload abilities */
1311	ifp->if_hwassist = 0;
1312	if (ifp->if_capenable & IFCAP_TXCSUM) {
1313		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1314#if __FreeBSD_version >= 800000
1315		if (adapter->hw.mac.type == e1000_82576)
1316			ifp->if_hwassist |= CSUM_SCTP;
1317#endif
1318	}
1319
1320	if (ifp->if_capenable & IFCAP_TSO)
1321		ifp->if_hwassist |= CSUM_TSO;
1322
1323	/* Configure for OS presence */
1324	igb_init_manageability(adapter);
1325
1326	/* Prepare transmit descriptors and buffers */
1327	igb_setup_transmit_structures(adapter);
1328	igb_initialize_transmit_units(adapter);
1329
1330	/* Setup Multicast table */
1331	igb_set_multi(adapter);
1332
1333	/*
1334	** Figure out the desired mbuf pool
1335	** for doing jumbo/packetsplit
1336	*/
1337	if (adapter->max_frame_size <= 2048)
1338		adapter->rx_mbuf_sz = MCLBYTES;
1339	else if (adapter->max_frame_size <= 4096)
1340		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1341	else
1342		adapter->rx_mbuf_sz = MJUM9BYTES;
1343
1344	/* Prepare receive descriptors and buffers */
1345	if (igb_setup_receive_structures(adapter)) {
1346		device_printf(dev, "Could not setup receive structures\n");
1347		return;
1348	}
1349	igb_initialize_receive_units(adapter);
1350
1351        /* Enable VLAN support */
1352	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1353		igb_setup_vlan_hw_support(adapter);
1354
1355	/* Don't lose promiscuous settings */
1356	igb_set_promisc(adapter);
1357
1358	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1362	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364	if (adapter->msix > 1) /* Set up queue routing */
1365		igb_configure_queues(adapter);
1366
1367	/* this clears any pending interrupts */
1368	E1000_READ_REG(&adapter->hw, E1000_ICR);
1369#ifdef DEVICE_POLLING
1370	/*
1371	 * Only enable interrupts if we are not polling, make sure
1372	 * they are off otherwise.
1373	 */
1374	if (ifp->if_capenable & IFCAP_POLLING)
1375		igb_disable_intr(adapter);
1376	else
1377#endif /* DEVICE_POLLING */
1378	{
1379		igb_enable_intr(adapter);
1380		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1381	}
1382
1383	/* Set Energy Efficient Ethernet */
1384	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1385		if (adapter->hw.mac.type == e1000_i354)
1386			e1000_set_eee_i354(&adapter->hw);
1387		else
1388			e1000_set_eee_i350(&adapter->hw);
1389	}
1390}
1391
1392static void
1393igb_init(void *arg)
1394{
1395	struct adapter *adapter = arg;
1396
1397	IGB_CORE_LOCK(adapter);
1398	igb_init_locked(adapter);
1399	IGB_CORE_UNLOCK(adapter);
1400}
1401
1402
1403static void
1404igb_handle_que(void *context, int pending)
1405{
1406	struct igb_queue *que = context;
1407	struct adapter *adapter = que->adapter;
1408	struct tx_ring *txr = que->txr;
1409	struct ifnet	*ifp = adapter->ifp;
1410
1411	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1412		bool	more;
1413
1414		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1415
1416		IGB_TX_LOCK(txr);
1417		igb_txeof(txr);
1418#ifndef IGB_LEGACY_TX
1419		/* Process the stack queue only if not depleted */
1420		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1421		    !drbr_empty(ifp, txr->br))
1422			igb_mq_start_locked(ifp, txr);
1423#else
1424		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1425			igb_start_locked(txr, ifp);
1426#endif
1427		IGB_TX_UNLOCK(txr);
1428		/* Do we need another? */
1429		if (more) {
1430			taskqueue_enqueue(que->tq, &que->que_task);
1431			return;
1432		}
1433	}
1434
1435#ifdef DEVICE_POLLING
1436	if (ifp->if_capenable & IFCAP_POLLING)
1437		return;
1438#endif
1439	/* Reenable this interrupt */
1440	if (que->eims)
1441		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1442	else
1443		igb_enable_intr(adapter);
1444}
1445
1446/* Deal with link in a sleepable context */
1447static void
1448igb_handle_link(void *context, int pending)
1449{
1450	struct adapter *adapter = context;
1451
1452	IGB_CORE_LOCK(adapter);
1453	igb_handle_link_locked(adapter);
1454	IGB_CORE_UNLOCK(adapter);
1455}
1456
1457static void
1458igb_handle_link_locked(struct adapter *adapter)
1459{
1460	struct tx_ring	*txr = adapter->tx_rings;
1461	struct ifnet *ifp = adapter->ifp;
1462
1463	IGB_CORE_LOCK_ASSERT(adapter);
1464	adapter->hw.mac.get_link_status = 1;
1465	igb_update_link_status(adapter);
1466	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1467		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1468			IGB_TX_LOCK(txr);
1469#ifndef IGB_LEGACY_TX
1470			/* Process the stack queue only if not depleted */
1471			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1472			    !drbr_empty(ifp, txr->br))
1473				igb_mq_start_locked(ifp, txr);
1474#else
1475			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1476				igb_start_locked(txr, ifp);
1477#endif
1478			IGB_TX_UNLOCK(txr);
1479		}
1480	}
1481}
1482
1483/*********************************************************************
1484 *
1485 *  MSI/Legacy Deferred
1486 *  Interrupt Service routine
1487 *
1488 *********************************************************************/
1489static int
1490igb_irq_fast(void *arg)
1491{
1492	struct adapter		*adapter = arg;
1493	struct igb_queue	*que = adapter->queues;
1494	u32			reg_icr;
1495
1496
1497	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1498
1499	/* Hot eject?  */
1500	if (reg_icr == 0xffffffff)
1501		return FILTER_STRAY;
1502
1503	/* Definitely not our interrupt.  */
1504	if (reg_icr == 0x0)
1505		return FILTER_STRAY;
1506
1507	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1508		return FILTER_STRAY;
1509
1510	/*
1511	 * Mask interrupts until the taskqueue is finished running.  This is
1512	 * cheap, just assume that it is needed.  This also works around the
1513	 * MSI message reordering errata on certain systems.
1514	 */
1515	igb_disable_intr(adapter);
1516	taskqueue_enqueue(que->tq, &que->que_task);
1517
1518	/* Link status change */
1519	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1520		taskqueue_enqueue(que->tq, &adapter->link_task);
1521
1522	if (reg_icr & E1000_ICR_RXO)
1523		adapter->rx_overruns++;
1524	return FILTER_HANDLED;
1525}
1526
1527#ifdef DEVICE_POLLING
1528#if __FreeBSD_version >= 800000
1529#define POLL_RETURN_COUNT(a) (a)
1530static int
1531#else
1532#define POLL_RETURN_COUNT(a)
1533static void
1534#endif
1535igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1536{
1537	struct adapter		*adapter = ifp->if_softc;
1538	struct igb_queue	*que;
1539	struct tx_ring		*txr;
1540	u32			reg_icr, rx_done = 0;
1541	u32			loop = IGB_MAX_LOOP;
1542	bool			more;
1543
1544	IGB_CORE_LOCK(adapter);
1545	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1546		IGB_CORE_UNLOCK(adapter);
1547		return POLL_RETURN_COUNT(rx_done);
1548	}
1549
1550	if (cmd == POLL_AND_CHECK_STATUS) {
1551		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552		/* Link status change */
1553		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1554			igb_handle_link_locked(adapter);
1555
1556		if (reg_icr & E1000_ICR_RXO)
1557			adapter->rx_overruns++;
1558	}
1559	IGB_CORE_UNLOCK(adapter);
1560
1561	for (int i = 0; i < adapter->num_queues; i++) {
1562		que = &adapter->queues[i];
1563		txr = que->txr;
1564
1565		igb_rxeof(que, count, &rx_done);
1566
1567		IGB_TX_LOCK(txr);
1568		do {
1569			more = igb_txeof(txr);
1570		} while (loop-- && more);
1571#ifndef IGB_LEGACY_TX
1572		if (!drbr_empty(ifp, txr->br))
1573			igb_mq_start_locked(ifp, txr);
1574#else
1575		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576			igb_start_locked(txr, ifp);
1577#endif
1578		IGB_TX_UNLOCK(txr);
1579	}
1580
1581	return POLL_RETURN_COUNT(rx_done);
1582}
1583#endif /* DEVICE_POLLING */
1584
1585/*********************************************************************
1586 *
1587 *  MSIX Que Interrupt Service routine
1588 *
1589 **********************************************************************/
1590static void
1591igb_msix_que(void *arg)
1592{
1593	struct igb_queue *que = arg;
1594	struct adapter *adapter = que->adapter;
1595	struct ifnet   *ifp = adapter->ifp;
1596	struct tx_ring *txr = que->txr;
1597	struct rx_ring *rxr = que->rxr;
1598	u32		newitr = 0;
1599	bool		more_rx;
1600
1601	/* Ignore spurious interrupts */
1602	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1603		return;
1604
1605	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1606	++que->irqs;
1607
1608	IGB_TX_LOCK(txr);
1609	igb_txeof(txr);
1610#ifndef IGB_LEGACY_TX
1611	/* Process the stack queue only if not depleted */
1612	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1613	    !drbr_empty(ifp, txr->br))
1614		igb_mq_start_locked(ifp, txr);
1615#else
1616	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1617		igb_start_locked(txr, ifp);
1618#endif
1619	IGB_TX_UNLOCK(txr);
1620
1621	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1622
1623	if (adapter->enable_aim == FALSE)
1624		goto no_calc;
1625	/*
1626	** Do Adaptive Interrupt Moderation:
1627        **  - Write out last calculated setting
1628	**  - Calculate based on average size over
1629	**    the last interval.
1630	*/
1631        if (que->eitr_setting)
1632                E1000_WRITE_REG(&adapter->hw,
1633                    E1000_EITR(que->msix), que->eitr_setting);
1634
1635        que->eitr_setting = 0;
1636
1637        /* Idle, do nothing */
1638        if ((txr->bytes == 0) && (rxr->bytes == 0))
1639                goto no_calc;
1640
1641        /* Used half Default if sub-gig */
1642        if (adapter->link_speed != 1000)
1643                newitr = IGB_DEFAULT_ITR / 2;
1644        else {
1645		if ((txr->bytes) && (txr->packets))
1646                	newitr = txr->bytes/txr->packets;
1647		if ((rxr->bytes) && (rxr->packets))
1648			newitr = max(newitr,
1649			    (rxr->bytes / rxr->packets));
1650                newitr += 24; /* account for hardware frame, crc */
1651		/* set an upper boundary */
1652		newitr = min(newitr, 3000);
1653		/* Be nice to the mid range */
1654                if ((newitr > 300) && (newitr < 1200))
1655                        newitr = (newitr / 3);
1656                else
1657                        newitr = (newitr / 2);
1658        }
1659        newitr &= 0x7FFC;  /* Mask invalid bits */
1660        if (adapter->hw.mac.type == e1000_82575)
1661                newitr |= newitr << 16;
1662        else
1663                newitr |= E1000_EITR_CNT_IGNR;
1664
1665        /* save for next interrupt */
1666        que->eitr_setting = newitr;
1667
1668        /* Reset state */
1669        txr->bytes = 0;
1670        txr->packets = 0;
1671        rxr->bytes = 0;
1672        rxr->packets = 0;
1673
1674no_calc:
1675	/* Schedule a clean task if needed*/
1676	if (more_rx)
1677		taskqueue_enqueue(que->tq, &que->que_task);
1678	else
1679		/* Reenable this interrupt */
1680		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1681	return;
1682}
1683
1684
1685/*********************************************************************
1686 *
1687 *  MSIX Link Interrupt Service routine
1688 *
1689 **********************************************************************/
1690
1691static void
1692igb_msix_link(void *arg)
1693{
1694	struct adapter	*adapter = arg;
1695	u32       	icr;
1696
1697	++adapter->link_irq;
1698	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1699	if (!(icr & E1000_ICR_LSC))
1700		goto spurious;
1701	igb_handle_link(adapter, 0);
1702
1703spurious:
1704	/* Rearm */
1705	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1706	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1707	return;
1708}
1709
1710
1711/*********************************************************************
1712 *
1713 *  Media Ioctl callback
1714 *
1715 *  This routine is called whenever the user queries the status of
1716 *  the interface using ifconfig.
1717 *
1718 **********************************************************************/
1719static void
1720igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1721{
1722	struct adapter *adapter = ifp->if_softc;
1723
1724	INIT_DEBUGOUT("igb_media_status: begin");
1725
1726	IGB_CORE_LOCK(adapter);
1727	igb_update_link_status(adapter);
1728
1729	ifmr->ifm_status = IFM_AVALID;
1730	ifmr->ifm_active = IFM_ETHER;
1731
1732	if (!adapter->link_active) {
1733		IGB_CORE_UNLOCK(adapter);
1734		return;
1735	}
1736
1737	ifmr->ifm_status |= IFM_ACTIVE;
1738
1739	switch (adapter->link_speed) {
1740	case 10:
1741		ifmr->ifm_active |= IFM_10_T;
1742		break;
1743	case 100:
1744		/*
1745		** Support for 100Mb SFP - these are Fiber
1746		** but the media type appears as serdes
1747		*/
1748		if (adapter->hw.phy.media_type ==
1749		    e1000_media_type_internal_serdes)
1750			ifmr->ifm_active |= IFM_100_FX;
1751		else
1752			ifmr->ifm_active |= IFM_100_TX;
1753		break;
1754	case 1000:
1755		ifmr->ifm_active |= IFM_1000_T;
1756		break;
1757	case 2500:
1758		ifmr->ifm_active |= IFM_2500_SX;
1759		break;
1760	}
1761
1762	if (adapter->link_duplex == FULL_DUPLEX)
1763		ifmr->ifm_active |= IFM_FDX;
1764	else
1765		ifmr->ifm_active |= IFM_HDX;
1766
1767	IGB_CORE_UNLOCK(adapter);
1768}
1769
1770/*********************************************************************
1771 *
1772 *  Media Ioctl callback
1773 *
1774 *  This routine is called when the user changes speed/duplex using
1775 *  media/mediopt option with ifconfig.
1776 *
1777 **********************************************************************/
1778static int
1779igb_media_change(struct ifnet *ifp)
1780{
1781	struct adapter *adapter = ifp->if_softc;
1782	struct ifmedia  *ifm = &adapter->media;
1783
1784	INIT_DEBUGOUT("igb_media_change: begin");
1785
1786	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1787		return (EINVAL);
1788
1789	IGB_CORE_LOCK(adapter);
1790	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1791	case IFM_AUTO:
1792		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1793		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1794		break;
1795	case IFM_1000_LX:
1796	case IFM_1000_SX:
1797	case IFM_1000_T:
1798		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1799		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1800		break;
1801	case IFM_100_TX:
1802		adapter->hw.mac.autoneg = FALSE;
1803		adapter->hw.phy.autoneg_advertised = 0;
1804		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1805			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1806		else
1807			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1808		break;
1809	case IFM_10_T:
1810		adapter->hw.mac.autoneg = FALSE;
1811		adapter->hw.phy.autoneg_advertised = 0;
1812		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1813			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1814		else
1815			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1816		break;
1817	default:
1818		device_printf(adapter->dev, "Unsupported media type\n");
1819	}
1820
1821	igb_init_locked(adapter);
1822	IGB_CORE_UNLOCK(adapter);
1823
1824	return (0);
1825}
1826
1827
1828/*********************************************************************
1829 *
1830 *  This routine maps the mbufs to Advanced TX descriptors.
1831 *
1832 **********************************************************************/
1833static int
1834igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1835{
1836	struct adapter  *adapter = txr->adapter;
1837	u32		olinfo_status = 0, cmd_type_len;
1838	int             i, j, error, nsegs;
1839	int		first;
1840	bool		remap = TRUE;
1841	struct mbuf	*m_head;
1842	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1843	bus_dmamap_t	map;
1844	struct igb_tx_buf *txbuf;
1845	union e1000_adv_tx_desc *txd = NULL;
1846
1847	m_head = *m_headp;
1848
1849	/* Basic descriptor defines */
1850        cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1851	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1852
1853	if (m_head->m_flags & M_VLANTAG)
1854        	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1855
1856        /*
1857         * Important to capture the first descriptor
1858         * used because it will contain the index of
1859         * the one we tell the hardware to report back
1860         */
1861        first = txr->next_avail_desc;
1862	txbuf = &txr->tx_buffers[first];
1863	map = txbuf->map;
1864
1865	/*
1866	 * Map the packet for DMA.
1867	 */
1868retry:
1869	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1870	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1871
1872	if (__predict_false(error)) {
1873		struct mbuf *m;
1874
1875		switch (error) {
1876		case EFBIG:
1877			/* Try it again? - one try */
1878			if (remap == TRUE) {
1879				remap = FALSE;
1880				m = m_defrag(*m_headp, M_NOWAIT);
1881				if (m == NULL) {
1882					adapter->mbuf_defrag_failed++;
1883					m_freem(*m_headp);
1884					*m_headp = NULL;
1885					return (ENOBUFS);
1886				}
1887				*m_headp = m;
1888				goto retry;
1889			} else
1890				return (error);
1891		case ENOMEM:
1892			txr->no_tx_dma_setup++;
1893			return (error);
1894		default:
1895			txr->no_tx_dma_setup++;
1896			m_freem(*m_headp);
1897			*m_headp = NULL;
1898			return (error);
1899		}
1900	}
1901
1902	/* Make certain there are enough descriptors */
1903	if (nsegs > txr->tx_avail - 2) {
1904		txr->no_desc_avail++;
1905		bus_dmamap_unload(txr->txtag, map);
1906		return (ENOBUFS);
1907	}
1908	m_head = *m_headp;
1909
1910	/*
1911	** Set up the appropriate offload context
1912	** this will consume the first descriptor
1913	*/
1914	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1915	if (__predict_false(error)) {
1916		m_freem(*m_headp);
1917		*m_headp = NULL;
1918		return (error);
1919	}
1920
1921	/* 82575 needs the queue index added */
1922	if (adapter->hw.mac.type == e1000_82575)
1923		olinfo_status |= txr->me << 4;
1924
1925	i = txr->next_avail_desc;
1926	for (j = 0; j < nsegs; j++) {
1927		bus_size_t seglen;
1928		bus_addr_t segaddr;
1929
1930		txbuf = &txr->tx_buffers[i];
1931		txd = &txr->tx_base[i];
1932		seglen = segs[j].ds_len;
1933		segaddr = htole64(segs[j].ds_addr);
1934
1935		txd->read.buffer_addr = segaddr;
1936		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1937		    cmd_type_len | seglen);
1938		txd->read.olinfo_status = htole32(olinfo_status);
1939
1940		if (++i == txr->num_desc)
1941			i = 0;
1942	}
1943
1944	txd->read.cmd_type_len |=
1945	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1946	txr->tx_avail -= nsegs;
1947	txr->next_avail_desc = i;
1948
1949	txbuf->m_head = m_head;
1950	/*
1951	** Here we swap the map so the last descriptor,
1952	** which gets the completion interrupt has the
1953	** real map, and the first descriptor gets the
1954	** unused map from this descriptor.
1955	*/
1956	txr->tx_buffers[first].map = txbuf->map;
1957	txbuf->map = map;
1958	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1959
1960        /* Set the EOP descriptor that will be marked done */
1961        txbuf = &txr->tx_buffers[first];
1962	txbuf->eop = txd;
1963
1964        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1965            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966	/*
1967	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1968	 * hardware that this frame is available to transmit.
1969	 */
1970	++txr->total_packets;
1971	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1972
1973	return (0);
1974}
1975static void
1976igb_set_promisc(struct adapter *adapter)
1977{
1978	struct ifnet	*ifp = adapter->ifp;
1979	struct e1000_hw *hw = &adapter->hw;
1980	u32		reg;
1981
1982	if (adapter->vf_ifp) {
1983		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1984		return;
1985	}
1986
1987	reg = E1000_READ_REG(hw, E1000_RCTL);
1988	if (ifp->if_flags & IFF_PROMISC) {
1989		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1990		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1991	} else if (ifp->if_flags & IFF_ALLMULTI) {
1992		reg |= E1000_RCTL_MPE;
1993		reg &= ~E1000_RCTL_UPE;
1994		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1995	}
1996}
1997
1998static void
1999igb_disable_promisc(struct adapter *adapter)
2000{
2001	struct e1000_hw *hw = &adapter->hw;
2002	struct ifnet	*ifp = adapter->ifp;
2003	u32		reg;
2004	int		mcnt = 0;
2005
2006	if (adapter->vf_ifp) {
2007		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2008		return;
2009	}
2010	reg = E1000_READ_REG(hw, E1000_RCTL);
2011	reg &=  (~E1000_RCTL_UPE);
2012	if (ifp->if_flags & IFF_ALLMULTI)
2013		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2014	else {
2015		struct  ifmultiaddr *ifma;
2016#if __FreeBSD_version < 800000
2017		IF_ADDR_LOCK(ifp);
2018#else
2019		if_maddr_rlock(ifp);
2020#endif
2021		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2022			if (ifma->ifma_addr->sa_family != AF_LINK)
2023				continue;
2024			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2025				break;
2026			mcnt++;
2027		}
2028#if __FreeBSD_version < 800000
2029		IF_ADDR_UNLOCK(ifp);
2030#else
2031		if_maddr_runlock(ifp);
2032#endif
2033	}
2034	/* Don't disable if in MAX groups */
2035	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2036		reg &=  (~E1000_RCTL_MPE);
2037	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2038}
2039
2040
2041/*********************************************************************
2042 *  Multicast Update
2043 *
2044 *  This routine is called whenever multicast address list is updated.
2045 *
2046 **********************************************************************/
2047
2048static void
2049igb_set_multi(struct adapter *adapter)
2050{
2051	struct ifnet	*ifp = adapter->ifp;
2052	struct ifmultiaddr *ifma;
2053	u32 reg_rctl = 0;
2054	u8  *mta;
2055
2056	int mcnt = 0;
2057
2058	IOCTL_DEBUGOUT("igb_set_multi: begin");
2059
2060	mta = adapter->mta;
2061	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2062	    MAX_NUM_MULTICAST_ADDRESSES);
2063
2064#if __FreeBSD_version < 800000
2065	IF_ADDR_LOCK(ifp);
2066#else
2067	if_maddr_rlock(ifp);
2068#endif
2069	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2070		if (ifma->ifma_addr->sa_family != AF_LINK)
2071			continue;
2072
2073		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2074			break;
2075
2076		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2077		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2078		mcnt++;
2079	}
2080#if __FreeBSD_version < 800000
2081	IF_ADDR_UNLOCK(ifp);
2082#else
2083	if_maddr_runlock(ifp);
2084#endif
2085
2086	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2087		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2088		reg_rctl |= E1000_RCTL_MPE;
2089		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090	} else
2091		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2092}
2093
2094
2095/*********************************************************************
2096 *  Timer routine:
2097 *  	This routine checks for link status,
2098 *	updates statistics, and does the watchdog.
2099 *
2100 **********************************************************************/
2101
2102static void
2103igb_local_timer(void *arg)
2104{
2105	struct adapter		*adapter = arg;
2106	device_t		dev = adapter->dev;
2107	struct ifnet		*ifp = adapter->ifp;
2108	struct tx_ring		*txr = adapter->tx_rings;
2109	struct igb_queue	*que = adapter->queues;
2110	int			hung = 0, busy = 0;
2111
2112
2113	IGB_CORE_LOCK_ASSERT(adapter);
2114
2115	igb_update_link_status(adapter);
2116	igb_update_stats_counters(adapter);
2117
2118        /*
2119        ** Check the TX queues status
2120	**	- central locked handling of OACTIVE
2121	**	- watchdog only if all queues show hung
2122        */
2123	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2124		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2125		    (adapter->pause_frames == 0))
2126			++hung;
2127		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2128			++busy;
2129		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2130			taskqueue_enqueue(que->tq, &que->que_task);
2131	}
2132	if (hung == adapter->num_queues)
2133		goto timeout;
2134	if (busy == adapter->num_queues)
2135		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2136	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2137	    (busy < adapter->num_queues))
2138		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2139
2140	adapter->pause_frames = 0;
2141	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2142#ifndef DEVICE_POLLING
2143	/* Schedule all queue interrupts - deadlock protection */
2144	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2145#endif
2146	return;
2147
2148timeout:
2149	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2150	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2151            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2152            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2153	device_printf(dev,"TX(%d) desc avail = %d,"
2154            "Next TX to Clean = %d\n",
2155            txr->me, txr->tx_avail, txr->next_to_clean);
2156	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2157	adapter->watchdog_events++;
2158	igb_init_locked(adapter);
2159}
2160
2161static void
2162igb_update_link_status(struct adapter *adapter)
2163{
2164	struct e1000_hw		*hw = &adapter->hw;
2165	struct e1000_fc_info	*fc = &hw->fc;
2166	struct ifnet		*ifp = adapter->ifp;
2167	device_t		dev = adapter->dev;
2168	struct tx_ring		*txr = adapter->tx_rings;
2169	u32			link_check, thstat, ctrl;
2170	char			*flowctl = NULL;
2171
2172	link_check = thstat = ctrl = 0;
2173
2174	/* Get the cached link value or read for real */
2175        switch (hw->phy.media_type) {
2176        case e1000_media_type_copper:
2177                if (hw->mac.get_link_status) {
2178			/* Do the work to read phy */
2179                        e1000_check_for_link(hw);
2180                        link_check = !hw->mac.get_link_status;
2181                } else
2182                        link_check = TRUE;
2183                break;
2184        case e1000_media_type_fiber:
2185                e1000_check_for_link(hw);
2186                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2187                                 E1000_STATUS_LU);
2188                break;
2189        case e1000_media_type_internal_serdes:
2190                e1000_check_for_link(hw);
2191                link_check = adapter->hw.mac.serdes_has_link;
2192                break;
2193	/* VF device is type_unknown */
2194        case e1000_media_type_unknown:
2195                e1000_check_for_link(hw);
2196		link_check = !hw->mac.get_link_status;
2197		/* Fall thru */
2198        default:
2199                break;
2200        }
2201
2202	/* Check for thermal downshift or shutdown */
2203	if (hw->mac.type == e1000_i350) {
2204		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2205		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2206	}
2207
2208	/* Get the flow control for display */
2209	switch (fc->current_mode) {
2210	case e1000_fc_rx_pause:
2211		flowctl = "RX";
2212		break;
2213	case e1000_fc_tx_pause:
2214		flowctl = "TX";
2215		break;
2216	case e1000_fc_full:
2217		flowctl = "Full";
2218		break;
2219	case e1000_fc_none:
2220	default:
2221		flowctl = "None";
2222		break;
2223	}
2224
2225	/* Now we check if a transition has happened */
2226	if (link_check && (adapter->link_active == 0)) {
2227		e1000_get_speed_and_duplex(&adapter->hw,
2228		    &adapter->link_speed, &adapter->link_duplex);
2229		if (bootverbose)
2230			device_printf(dev, "Link is up %d Mbps %s,"
2231			    " Flow Control: %s\n",
2232			    adapter->link_speed,
2233			    ((adapter->link_duplex == FULL_DUPLEX) ?
2234			    "Full Duplex" : "Half Duplex"), flowctl);
2235		adapter->link_active = 1;
2236		ifp->if_baudrate = adapter->link_speed * 1000000;
2237		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2238		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2239			device_printf(dev, "Link: thermal downshift\n");
2240		/* Delay Link Up for Phy update */
2241		if (((hw->mac.type == e1000_i210) ||
2242		    (hw->mac.type == e1000_i211)) &&
2243		    (hw->phy.id == I210_I_PHY_ID))
2244			msec_delay(I210_LINK_DELAY);
2245		/* Reset if the media type changed. */
2246		if (hw->dev_spec._82575.media_changed) {
2247			hw->dev_spec._82575.media_changed = false;
2248			adapter->flags |= IGB_MEDIA_RESET;
2249			igb_reset(adapter);
2250		}
2251		/* This can sleep */
2252		if_link_state_change(ifp, LINK_STATE_UP);
2253	} else if (!link_check && (adapter->link_active == 1)) {
2254		ifp->if_baudrate = adapter->link_speed = 0;
2255		adapter->link_duplex = 0;
2256		if (bootverbose)
2257			device_printf(dev, "Link is Down\n");
2258		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2259		    (thstat & E1000_THSTAT_PWR_DOWN))
2260			device_printf(dev, "Link: thermal shutdown\n");
2261		adapter->link_active = 0;
2262		/* This can sleep */
2263		if_link_state_change(ifp, LINK_STATE_DOWN);
2264		/* Reset queue state */
2265		for (int i = 0; i < adapter->num_queues; i++, txr++)
2266			txr->queue_status = IGB_QUEUE_IDLE;
2267	}
2268}
2269
2270/*********************************************************************
2271 *
2272 *  This routine disables all traffic on the adapter by issuing a
2273 *  global reset on the MAC and deallocates TX/RX buffers.
2274 *
2275 **********************************************************************/
2276
2277static void
2278igb_stop(void *arg)
2279{
2280	struct adapter	*adapter = arg;
2281	struct ifnet	*ifp = adapter->ifp;
2282	struct tx_ring *txr = adapter->tx_rings;
2283
2284	IGB_CORE_LOCK_ASSERT(adapter);
2285
2286	INIT_DEBUGOUT("igb_stop: begin");
2287
2288	igb_disable_intr(adapter);
2289
2290	callout_stop(&adapter->timer);
2291
2292	/* Tell the stack that the interface is no longer active */
2293	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2294	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2295
2296	/* Disarm watchdog timer. */
2297	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2298		IGB_TX_LOCK(txr);
2299		txr->queue_status = IGB_QUEUE_IDLE;
2300		IGB_TX_UNLOCK(txr);
2301	}
2302
2303	e1000_reset_hw(&adapter->hw);
2304	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2305
2306	e1000_led_off(&adapter->hw);
2307	e1000_cleanup_led(&adapter->hw);
2308}
2309
2310
2311/*********************************************************************
2312 *
2313 *  Determine hardware revision.
2314 *
2315 **********************************************************************/
2316static void
2317igb_identify_hardware(struct adapter *adapter)
2318{
2319	device_t dev = adapter->dev;
2320
2321	/* Make sure our PCI config space has the necessary stuff set */
2322	pci_enable_busmaster(dev);
2323	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2324
2325	/* Save off the information about this board */
2326	adapter->hw.vendor_id = pci_get_vendor(dev);
2327	adapter->hw.device_id = pci_get_device(dev);
2328	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2329	adapter->hw.subsystem_vendor_id =
2330	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2331	adapter->hw.subsystem_device_id =
2332	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2333
2334	/* Set MAC type early for PCI setup */
2335	e1000_set_mac_type(&adapter->hw);
2336
2337	/* Are we a VF device? */
2338	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2339	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2340		adapter->vf_ifp = 1;
2341	else
2342		adapter->vf_ifp = 0;
2343}
2344
2345static int
2346igb_allocate_pci_resources(struct adapter *adapter)
2347{
2348	device_t	dev = adapter->dev;
2349	int		rid;
2350
2351	rid = PCIR_BAR(0);
2352	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2353	    &rid, RF_ACTIVE);
2354	if (adapter->pci_mem == NULL) {
2355		device_printf(dev, "Unable to allocate bus resource: memory\n");
2356		return (ENXIO);
2357	}
2358	adapter->osdep.mem_bus_space_tag =
2359	    rman_get_bustag(adapter->pci_mem);
2360	adapter->osdep.mem_bus_space_handle =
2361	    rman_get_bushandle(adapter->pci_mem);
2362	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2363
2364	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2365
2366	/* This will setup either MSI/X or MSI */
2367	adapter->msix = igb_setup_msix(adapter);
2368	adapter->hw.back = &adapter->osdep;
2369
2370	return (0);
2371}
2372
2373/*********************************************************************
2374 *
2375 *  Setup the Legacy or MSI Interrupt handler
2376 *
2377 **********************************************************************/
2378static int
2379igb_allocate_legacy(struct adapter *adapter)
2380{
2381	device_t		dev = adapter->dev;
2382	struct igb_queue	*que = adapter->queues;
2383#ifndef IGB_LEGACY_TX
2384	struct tx_ring		*txr = adapter->tx_rings;
2385#endif
2386	int			error, rid = 0;
2387
2388	/* Turn off all interrupts */
2389	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2390
2391	/* MSI RID is 1 */
2392	if (adapter->msix == 1)
2393		rid = 1;
2394
2395	/* We allocate a single interrupt resource */
2396	adapter->res = bus_alloc_resource_any(dev,
2397	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2398	if (adapter->res == NULL) {
2399		device_printf(dev, "Unable to allocate bus resource: "
2400		    "interrupt\n");
2401		return (ENXIO);
2402	}
2403
2404#ifndef IGB_LEGACY_TX
2405	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2406#endif
2407
2408	/*
2409	 * Try allocating a fast interrupt and the associated deferred
2410	 * processing contexts.
2411	 */
2412	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2413	/* Make tasklet for deferred link handling */
2414	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2415	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2416	    taskqueue_thread_enqueue, &que->tq);
2417	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2418	    device_get_nameunit(adapter->dev));
2419	if ((error = bus_setup_intr(dev, adapter->res,
2420	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2421	    adapter, &adapter->tag)) != 0) {
2422		device_printf(dev, "Failed to register fast interrupt "
2423			    "handler: %d\n", error);
2424		taskqueue_free(que->tq);
2425		que->tq = NULL;
2426		return (error);
2427	}
2428
2429	return (0);
2430}
2431
2432
2433/*********************************************************************
2434 *
2435 *  Setup the MSIX Queue Interrupt handlers:
2436 *
2437 **********************************************************************/
2438static int
2439igb_allocate_msix(struct adapter *adapter)
2440{
2441	device_t		dev = adapter->dev;
2442	struct igb_queue	*que = adapter->queues;
2443	int			error, rid, vector = 0;
2444
2445	/* Be sure to start with all interrupts disabled */
2446	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2447	E1000_WRITE_FLUSH(&adapter->hw);
2448
2449	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2450		rid = vector +1;
2451		que->res = bus_alloc_resource_any(dev,
2452		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2453		if (que->res == NULL) {
2454			device_printf(dev,
2455			    "Unable to allocate bus resource: "
2456			    "MSIX Queue Interrupt\n");
2457			return (ENXIO);
2458		}
2459		error = bus_setup_intr(dev, que->res,
2460	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2461		    igb_msix_que, que, &que->tag);
2462		if (error) {
2463			que->res = NULL;
2464			device_printf(dev, "Failed to register Queue handler");
2465			return (error);
2466		}
2467#if __FreeBSD_version >= 800504
2468		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2469#endif
2470		que->msix = vector;
2471		if (adapter->hw.mac.type == e1000_82575)
2472			que->eims = E1000_EICR_TX_QUEUE0 << i;
2473		else
2474			que->eims = 1 << vector;
2475		/*
2476		** Bind the msix vector, and thus the
2477		** rings to the corresponding cpu.
2478		*/
2479		if (adapter->num_queues > 1) {
2480			if (igb_last_bind_cpu < 0)
2481				igb_last_bind_cpu = CPU_FIRST();
2482			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2483			device_printf(dev,
2484				"Bound queue %d to cpu %d\n",
2485				i,igb_last_bind_cpu);
2486			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2487		}
2488#ifndef IGB_LEGACY_TX
2489		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2490		    que->txr);
2491#endif
2492		/* Make tasklet for deferred handling */
2493		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2494		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2495		    taskqueue_thread_enqueue, &que->tq);
2496		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2497		    device_get_nameunit(adapter->dev));
2498	}
2499
2500	/* And Link */
2501	rid = vector + 1;
2502	adapter->res = bus_alloc_resource_any(dev,
2503	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2504	if (adapter->res == NULL) {
2505		device_printf(dev,
2506		    "Unable to allocate bus resource: "
2507		    "MSIX Link Interrupt\n");
2508		return (ENXIO);
2509	}
2510	if ((error = bus_setup_intr(dev, adapter->res,
2511	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2512	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2513		device_printf(dev, "Failed to register Link handler");
2514		return (error);
2515	}
2516#if __FreeBSD_version >= 800504
2517	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2518#endif
2519	adapter->linkvec = vector;
2520
2521	return (0);
2522}
2523
2524
2525static void
2526igb_configure_queues(struct adapter *adapter)
2527{
2528	struct	e1000_hw	*hw = &adapter->hw;
2529	struct	igb_queue	*que;
2530	u32			tmp, ivar = 0, newitr = 0;
2531
2532	/* First turn on RSS capability */
2533	if (adapter->hw.mac.type != e1000_82575)
2534		E1000_WRITE_REG(hw, E1000_GPIE,
2535		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2536		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2537
2538	/* Turn on MSIX */
2539	switch (adapter->hw.mac.type) {
2540	case e1000_82580:
2541	case e1000_i350:
2542	case e1000_i354:
2543	case e1000_i210:
2544	case e1000_i211:
2545	case e1000_vfadapt:
2546	case e1000_vfadapt_i350:
2547		/* RX entries */
2548		for (int i = 0; i < adapter->num_queues; i++) {
2549			u32 index = i >> 1;
2550			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2551			que = &adapter->queues[i];
2552			if (i & 1) {
2553				ivar &= 0xFF00FFFF;
2554				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2555			} else {
2556				ivar &= 0xFFFFFF00;
2557				ivar |= que->msix | E1000_IVAR_VALID;
2558			}
2559			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2560		}
2561		/* TX entries */
2562		for (int i = 0; i < adapter->num_queues; i++) {
2563			u32 index = i >> 1;
2564			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2565			que = &adapter->queues[i];
2566			if (i & 1) {
2567				ivar &= 0x00FFFFFF;
2568				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2569			} else {
2570				ivar &= 0xFFFF00FF;
2571				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2572			}
2573			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2574			adapter->que_mask |= que->eims;
2575		}
2576
2577		/* And for the link interrupt */
2578		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2579		adapter->link_mask = 1 << adapter->linkvec;
2580		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2581		break;
2582	case e1000_82576:
2583		/* RX entries */
2584		for (int i = 0; i < adapter->num_queues; i++) {
2585			u32 index = i & 0x7; /* Each IVAR has two entries */
2586			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2587			que = &adapter->queues[i];
2588			if (i < 8) {
2589				ivar &= 0xFFFFFF00;
2590				ivar |= que->msix | E1000_IVAR_VALID;
2591			} else {
2592				ivar &= 0xFF00FFFF;
2593				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2594			}
2595			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2596			adapter->que_mask |= que->eims;
2597		}
2598		/* TX entries */
2599		for (int i = 0; i < adapter->num_queues; i++) {
2600			u32 index = i & 0x7; /* Each IVAR has two entries */
2601			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2602			que = &adapter->queues[i];
2603			if (i < 8) {
2604				ivar &= 0xFFFF00FF;
2605				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2606			} else {
2607				ivar &= 0x00FFFFFF;
2608				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2609			}
2610			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2611			adapter->que_mask |= que->eims;
2612		}
2613
2614		/* And for the link interrupt */
2615		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2616		adapter->link_mask = 1 << adapter->linkvec;
2617		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2618		break;
2619
2620	case e1000_82575:
2621                /* enable MSI-X support*/
2622		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2623                tmp |= E1000_CTRL_EXT_PBA_CLR;
2624                /* Auto-Mask interrupts upon ICR read. */
2625                tmp |= E1000_CTRL_EXT_EIAME;
2626                tmp |= E1000_CTRL_EXT_IRCA;
2627                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2628
2629		/* Queues */
2630		for (int i = 0; i < adapter->num_queues; i++) {
2631			que = &adapter->queues[i];
2632			tmp = E1000_EICR_RX_QUEUE0 << i;
2633			tmp |= E1000_EICR_TX_QUEUE0 << i;
2634			que->eims = tmp;
2635			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2636			    i, que->eims);
2637			adapter->que_mask |= que->eims;
2638		}
2639
2640		/* Link */
2641		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2642		    E1000_EIMS_OTHER);
2643		adapter->link_mask |= E1000_EIMS_OTHER;
2644	default:
2645		break;
2646	}
2647
2648	/* Set the starting interrupt rate */
2649	if (igb_max_interrupt_rate > 0)
2650		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2651
2652        if (hw->mac.type == e1000_82575)
2653                newitr |= newitr << 16;
2654        else
2655                newitr |= E1000_EITR_CNT_IGNR;
2656
2657	for (int i = 0; i < adapter->num_queues; i++) {
2658		que = &adapter->queues[i];
2659		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2660	}
2661
2662	return;
2663}
2664
2665
2666static void
2667igb_free_pci_resources(struct adapter *adapter)
2668{
2669	struct		igb_queue *que = adapter->queues;
2670	device_t	dev = adapter->dev;
2671	int		rid;
2672
2673	/*
2674	** There is a slight possibility of a failure mode
2675	** in attach that will result in entering this function
2676	** before interrupt resources have been initialized, and
2677	** in that case we do not want to execute the loops below
2678	** We can detect this reliably by the state of the adapter
2679	** res pointer.
2680	*/
2681	if (adapter->res == NULL)
2682		goto mem;
2683
2684	/*
2685	 * First release all the interrupt resources:
2686	 */
2687	for (int i = 0; i < adapter->num_queues; i++, que++) {
2688		rid = que->msix + 1;
2689		if (que->tag != NULL) {
2690			bus_teardown_intr(dev, que->res, que->tag);
2691			que->tag = NULL;
2692		}
2693		if (que->res != NULL)
2694			bus_release_resource(dev,
2695			    SYS_RES_IRQ, rid, que->res);
2696	}
2697
2698	/* Clean the Legacy or Link interrupt last */
2699	if (adapter->linkvec) /* we are doing MSIX */
2700		rid = adapter->linkvec + 1;
2701	else
2702		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2703
2704	que = adapter->queues;
2705	if (adapter->tag != NULL) {
2706		taskqueue_drain(que->tq, &adapter->link_task);
2707		bus_teardown_intr(dev, adapter->res, adapter->tag);
2708		adapter->tag = NULL;
2709	}
2710	if (adapter->res != NULL)
2711		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2712
2713	for (int i = 0; i < adapter->num_queues; i++, que++) {
2714		if (que->tq != NULL) {
2715#ifndef IGB_LEGACY_TX
2716			taskqueue_drain(que->tq, &que->txr->txq_task);
2717#endif
2718			taskqueue_drain(que->tq, &que->que_task);
2719			taskqueue_free(que->tq);
2720		}
2721	}
2722mem:
2723	if (adapter->msix)
2724		pci_release_msi(dev);
2725
2726	if (adapter->msix_mem != NULL)
2727		bus_release_resource(dev, SYS_RES_MEMORY,
2728		    adapter->memrid, adapter->msix_mem);
2729
2730	if (adapter->pci_mem != NULL)
2731		bus_release_resource(dev, SYS_RES_MEMORY,
2732		    PCIR_BAR(0), adapter->pci_mem);
2733
2734}
2735
2736/*
2737 * Setup Either MSI/X or MSI
2738 */
2739static int
2740igb_setup_msix(struct adapter *adapter)
2741{
2742	device_t	dev = adapter->dev;
2743	int		bar, want, queues, msgs, maxqueues;
2744
2745	/* tuneable override */
2746	if (igb_enable_msix == 0)
2747		goto msi;
2748
2749	/* First try MSI/X */
2750	msgs = pci_msix_count(dev);
2751	if (msgs == 0)
2752		goto msi;
2753	/*
2754	** Some new devices, as with ixgbe, now may
2755	** use a different BAR, so we need to keep
2756	** track of which is used.
2757	*/
2758	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2759	bar = pci_read_config(dev, adapter->memrid, 4);
2760	if (bar == 0) /* use next bar */
2761		adapter->memrid += 4;
2762	adapter->msix_mem = bus_alloc_resource_any(dev,
2763	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2764       	if (adapter->msix_mem == NULL) {
2765		/* May not be enabled */
2766		device_printf(adapter->dev,
2767		    "Unable to map MSIX table \n");
2768		goto msi;
2769	}
2770
2771	/* Figure out a reasonable auto config value */
2772	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2773
2774	/* Manual override */
2775	if (igb_num_queues != 0)
2776		queues = igb_num_queues;
2777
2778	/* Sanity check based on HW */
2779	switch (adapter->hw.mac.type) {
2780		case e1000_82575:
2781			maxqueues = 4;
2782			break;
2783		case e1000_82576:
2784		case e1000_82580:
2785		case e1000_i350:
2786		case e1000_i354:
2787			maxqueues = 8;
2788			break;
2789		case e1000_i210:
2790			maxqueues = 4;
2791			break;
2792		case e1000_i211:
2793			maxqueues = 2;
2794			break;
2795		default:  /* VF interfaces */
2796			maxqueues = 1;
2797			break;
2798	}
2799	if (queues > maxqueues)
2800		queues = maxqueues;
2801
2802	/* Manual override */
2803	if (igb_num_queues != 0)
2804		queues = igb_num_queues;
2805
2806	/*
2807	** One vector (RX/TX pair) per queue
2808	** plus an additional for Link interrupt
2809	*/
2810	want = queues + 1;
2811	if (msgs >= want)
2812		msgs = want;
2813	else {
2814               	device_printf(adapter->dev,
2815		    "MSIX Configuration Problem, "
2816		    "%d vectors configured, but %d queues wanted!\n",
2817		    msgs, want);
2818		goto msi;
2819	}
2820	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2821               	device_printf(adapter->dev,
2822		    "Using MSIX interrupts with %d vectors\n", msgs);
2823		adapter->num_queues = queues;
2824		return (msgs);
2825	}
2826	/*
2827	** If MSIX alloc failed or provided us with
2828	** less than needed, free and fall through to MSI
2829	*/
2830	pci_release_msi(dev);
2831
2832msi:
2833       	if (adapter->msix_mem != NULL) {
2834		bus_release_resource(dev, SYS_RES_MEMORY,
2835		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2836		adapter->msix_mem = NULL;
2837	}
2838       	msgs = 1;
2839	if (pci_alloc_msi(dev, &msgs) == 0) {
2840		device_printf(adapter->dev," Using an MSI interrupt\n");
2841		return (msgs);
2842	}
2843	device_printf(adapter->dev," Using a Legacy interrupt\n");
2844	return (0);
2845}
2846
2847/*********************************************************************
2848 *
2849 *  Initialize the DMA Coalescing feature
2850 *
2851 **********************************************************************/
2852static void
2853igb_init_dmac(struct adapter *adapter, u32 pba)
2854{
2855	device_t	dev = adapter->dev;
2856	struct e1000_hw *hw = &adapter->hw;
2857	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2858	u16		hwm;
2859
2860	if (hw->mac.type == e1000_i211)
2861		return;
2862
2863	if (hw->mac.type > e1000_82580) {
2864
2865		if (adapter->dmac == 0) { /* Disabling it */
2866			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2867			return;
2868		} else
2869			device_printf(dev, "DMA Coalescing enabled\n");
2870
2871		/* Set starting threshold */
2872		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2873
2874		hwm = 64 * pba - adapter->max_frame_size / 16;
2875		if (hwm < 64 * (pba - 6))
2876			hwm = 64 * (pba - 6);
2877		reg = E1000_READ_REG(hw, E1000_FCRTC);
2878		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2879		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2880		    & E1000_FCRTC_RTH_COAL_MASK);
2881		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2882
2883
2884		dmac = pba - adapter->max_frame_size / 512;
2885		if (dmac < pba - 10)
2886			dmac = pba - 10;
2887		reg = E1000_READ_REG(hw, E1000_DMACR);
2888		reg &= ~E1000_DMACR_DMACTHR_MASK;
2889		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2890		    & E1000_DMACR_DMACTHR_MASK);
2891
2892		/* transition to L0x or L1 if available..*/
2893		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2894
2895		/* Check if status is 2.5Gb backplane connection
2896		* before configuration of watchdog timer, which is
2897		* in msec values in 12.8usec intervals
2898		* watchdog timer= msec values in 32usec intervals
2899		* for non 2.5Gb connection
2900		*/
2901		if (hw->mac.type == e1000_i354) {
2902			int status = E1000_READ_REG(hw, E1000_STATUS);
2903			if ((status & E1000_STATUS_2P5_SKU) &&
2904			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2905				reg |= ((adapter->dmac * 5) >> 6);
2906			else
2907				reg |= (adapter->dmac >> 5);
2908		} else {
2909			reg |= (adapter->dmac >> 5);
2910		}
2911
2912		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2913
2914#ifdef I210_OBFF_SUPPORT
2915		/*
2916		 * Set the OBFF Rx threshold to DMA Coalescing Rx
2917		 * threshold - 2KB and enable the feature in the
2918		 * hardware for I210.
2919		 */
2920		if (hw->mac.type == e1000_i210) {
2921			int obff = dmac - 2;
2922			reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
2923			reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
2924			reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
2925			    | E1000_DOBFFCTL_EXIT_ACT_MASK;
2926			E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
2927		}
2928#endif
2929		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2930
2931		/* Set the interval before transition */
2932		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2933		if (hw->mac.type == e1000_i350)
2934			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2935		/*
2936		** in 2.5Gb connection, TTLX unit is 0.4 usec
2937		** which is 0x4*2 = 0xA. But delay is still 4 usec
2938		*/
2939		if (hw->mac.type == e1000_i354) {
2940			int status = E1000_READ_REG(hw, E1000_STATUS);
2941			if ((status & E1000_STATUS_2P5_SKU) &&
2942			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2943				reg |= 0xA;
2944			else
2945				reg |= 0x4;
2946		} else {
2947			reg |= 0x4;
2948		}
2949
2950		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2951
2952		/* free space in tx packet buffer to wake from DMA coal */
2953		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2954		    (2 * adapter->max_frame_size)) >> 6);
2955
2956		/* make low power state decision controlled by DMA coal */
2957		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2958		reg &= ~E1000_PCIEMISC_LX_DECISION;
2959		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2960
2961	} else if (hw->mac.type == e1000_82580) {
2962		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2963		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2964		    reg & ~E1000_PCIEMISC_LX_DECISION);
2965		E1000_WRITE_REG(hw, E1000_DMACR, 0);
2966	}
2967}
2968
2969
2970/*********************************************************************
2971 *
2972 *  Set up an fresh starting state
2973 *
2974 **********************************************************************/
2975static void
2976igb_reset(struct adapter *adapter)
2977{
2978	device_t	dev = adapter->dev;
2979	struct e1000_hw *hw = &adapter->hw;
2980	struct e1000_fc_info *fc = &hw->fc;
2981	struct ifnet	*ifp = adapter->ifp;
2982	u32		pba = 0;
2983	u16		hwm;
2984
2985	INIT_DEBUGOUT("igb_reset: begin");
2986
2987	/* Let the firmware know the OS is in control */
2988	igb_get_hw_control(adapter);
2989
2990	/*
2991	 * Packet Buffer Allocation (PBA)
2992	 * Writing PBA sets the receive portion of the buffer
2993	 * the remainder is used for the transmit buffer.
2994	 */
2995	switch (hw->mac.type) {
2996	case e1000_82575:
2997		pba = E1000_PBA_32K;
2998		break;
2999	case e1000_82576:
3000	case e1000_vfadapt:
3001		pba = E1000_READ_REG(hw, E1000_RXPBS);
3002		pba &= E1000_RXPBS_SIZE_MASK_82576;
3003		break;
3004	case e1000_82580:
3005	case e1000_i350:
3006	case e1000_i354:
3007	case e1000_vfadapt_i350:
3008		pba = E1000_READ_REG(hw, E1000_RXPBS);
3009		pba = e1000_rxpbs_adjust_82580(pba);
3010		break;
3011	case e1000_i210:
3012	case e1000_i211:
3013		pba = E1000_PBA_34K;
3014	default:
3015		break;
3016	}
3017
3018	/* Special needs in case of Jumbo frames */
3019	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3020		u32 tx_space, min_tx, min_rx;
3021		pba = E1000_READ_REG(hw, E1000_PBA);
3022		tx_space = pba >> 16;
3023		pba &= 0xffff;
3024		min_tx = (adapter->max_frame_size +
3025		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3026		min_tx = roundup2(min_tx, 1024);
3027		min_tx >>= 10;
3028                min_rx = adapter->max_frame_size;
3029                min_rx = roundup2(min_rx, 1024);
3030                min_rx >>= 10;
3031		if (tx_space < min_tx &&
3032		    ((min_tx - tx_space) < pba)) {
3033			pba = pba - (min_tx - tx_space);
3034			/*
3035                         * if short on rx space, rx wins
3036                         * and must trump tx adjustment
3037			 */
3038                        if (pba < min_rx)
3039                                pba = min_rx;
3040		}
3041		E1000_WRITE_REG(hw, E1000_PBA, pba);
3042	}
3043
3044	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3045
3046	/*
3047	 * These parameters control the automatic generation (Tx) and
3048	 * response (Rx) to Ethernet PAUSE frames.
3049	 * - High water mark should allow for at least two frames to be
3050	 *   received after sending an XOFF.
3051	 * - Low water mark works best when it is very near the high water mark.
3052	 *   This allows the receiver to restart by sending XON when it has
3053	 *   drained a bit.
3054	 */
3055	hwm = min(((pba << 10) * 9 / 10),
3056	    ((pba << 10) - 2 * adapter->max_frame_size));
3057
3058	if (hw->mac.type < e1000_82576) {
3059		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3060		fc->low_water = fc->high_water - 8;
3061	} else {
3062		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3063		fc->low_water = fc->high_water - 16;
3064	}
3065
3066	fc->pause_time = IGB_FC_PAUSE_TIME;
3067	fc->send_xon = TRUE;
3068	if (adapter->fc)
3069		fc->requested_mode = adapter->fc;
3070	else
3071		fc->requested_mode = e1000_fc_default;
3072
3073	/* Issue a global reset */
3074	e1000_reset_hw(hw);
3075	E1000_WRITE_REG(hw, E1000_WUC, 0);
3076
3077	/* Reset for AutoMediaDetect */
3078	if (adapter->flags & IGB_MEDIA_RESET) {
3079		e1000_setup_init_funcs(hw, TRUE);
3080		e1000_get_bus_info(hw);
3081		adapter->flags &= ~IGB_MEDIA_RESET;
3082	}
3083
3084	if (e1000_init_hw(hw) < 0)
3085		device_printf(dev, "Hardware Initialization Failed\n");
3086
3087	/* Setup DMA Coalescing */
3088	igb_init_dmac(adapter, pba);
3089
3090	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3091	e1000_get_phy_info(hw);
3092	e1000_check_for_link(hw);
3093	return;
3094}
3095
3096/*********************************************************************
3097 *
3098 *  Setup networking device structure and register an interface.
3099 *
3100 **********************************************************************/
3101static int
3102igb_setup_interface(device_t dev, struct adapter *adapter)
3103{
3104	struct ifnet   *ifp;
3105
3106	INIT_DEBUGOUT("igb_setup_interface: begin");
3107
3108	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3109	if (ifp == NULL) {
3110		device_printf(dev, "can not allocate ifnet structure\n");
3111		return (-1);
3112	}
3113	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3114	ifp->if_init =  igb_init;
3115	ifp->if_softc = adapter;
3116	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3117	ifp->if_ioctl = igb_ioctl;
3118#ifndef IGB_LEGACY_TX
3119	ifp->if_transmit = igb_mq_start;
3120	ifp->if_qflush = igb_qflush;
3121#else
3122	ifp->if_start = igb_start;
3123	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3124	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3125	IFQ_SET_READY(&ifp->if_snd);
3126#endif
3127
3128	ether_ifattach(ifp, adapter->hw.mac.addr);
3129
3130	ifp->if_capabilities = ifp->if_capenable = 0;
3131
3132	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3133	ifp->if_capabilities |= IFCAP_TSO;
3134	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3135	ifp->if_capenable = ifp->if_capabilities;
3136
3137	/* Don't enable LRO by default */
3138	ifp->if_capabilities |= IFCAP_LRO;
3139
3140#ifdef DEVICE_POLLING
3141	ifp->if_capabilities |= IFCAP_POLLING;
3142#endif
3143
3144	/*
3145	 * Tell the upper layer(s) we
3146	 * support full VLAN capability.
3147	 */
3148	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3149	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3150			     |  IFCAP_VLAN_HWTSO
3151			     |  IFCAP_VLAN_MTU;
3152	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3153			  |  IFCAP_VLAN_HWTSO
3154			  |  IFCAP_VLAN_MTU;
3155
3156	/*
3157	** Don't turn this on by default, if vlans are
3158	** created on another pseudo device (eg. lagg)
3159	** then vlan events are not passed thru, breaking
3160	** operation, but with HW FILTER off it works. If
3161	** using vlans directly on the igb driver you can
3162	** enable this and get full hardware tag filtering.
3163	*/
3164	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3165
3166	/*
3167	 * Specify the media types supported by this adapter and register
3168	 * callbacks to update media and link information
3169	 */
3170	ifmedia_init(&adapter->media, IFM_IMASK,
3171	    igb_media_change, igb_media_status);
3172	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3173	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3174		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3175			    0, NULL);
3176		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3177	} else {
3178		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3179		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3180			    0, NULL);
3181		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3182			    0, NULL);
3183		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3184			    0, NULL);
3185		if (adapter->hw.phy.type != e1000_phy_ife) {
3186			ifmedia_add(&adapter->media,
3187				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3188			ifmedia_add(&adapter->media,
3189				IFM_ETHER | IFM_1000_T, 0, NULL);
3190		}
3191	}
3192	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3193	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3194	return (0);
3195}
3196
3197
3198/*
3199 * Manage DMA'able memory.
3200 */
3201static void
3202igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3203{
3204	if (error)
3205		return;
3206	*(bus_addr_t *) arg = segs[0].ds_addr;
3207}
3208
3209static int
3210igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3211        struct igb_dma_alloc *dma, int mapflags)
3212{
3213	int error;
3214
3215	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3216				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3217				BUS_SPACE_MAXADDR,	/* lowaddr */
3218				BUS_SPACE_MAXADDR,	/* highaddr */
3219				NULL, NULL,		/* filter, filterarg */
3220				size,			/* maxsize */
3221				1,			/* nsegments */
3222				size,			/* maxsegsize */
3223				0,			/* flags */
3224				NULL,			/* lockfunc */
3225				NULL,			/* lockarg */
3226				&dma->dma_tag);
3227	if (error) {
3228		device_printf(adapter->dev,
3229		    "%s: bus_dma_tag_create failed: %d\n",
3230		    __func__, error);
3231		goto fail_0;
3232	}
3233
3234	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3235	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3236	if (error) {
3237		device_printf(adapter->dev,
3238		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3239		    __func__, (uintmax_t)size, error);
3240		goto fail_2;
3241	}
3242
3243	dma->dma_paddr = 0;
3244	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3245	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3246	if (error || dma->dma_paddr == 0) {
3247		device_printf(adapter->dev,
3248		    "%s: bus_dmamap_load failed: %d\n",
3249		    __func__, error);
3250		goto fail_3;
3251	}
3252
3253	return (0);
3254
3255fail_3:
3256	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3257fail_2:
3258	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3259	bus_dma_tag_destroy(dma->dma_tag);
3260fail_0:
3261	dma->dma_map = NULL;
3262	dma->dma_tag = NULL;
3263
3264	return (error);
3265}
3266
3267static void
3268igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3269{
3270	if (dma->dma_tag == NULL)
3271		return;
3272	if (dma->dma_map != NULL) {
3273		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3274		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3275		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3276		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3277		dma->dma_map = NULL;
3278	}
3279	bus_dma_tag_destroy(dma->dma_tag);
3280	dma->dma_tag = NULL;
3281}
3282
3283
3284/*********************************************************************
3285 *
3286 *  Allocate memory for the transmit and receive rings, and then
3287 *  the descriptors associated with each, called only once at attach.
3288 *
3289 **********************************************************************/
3290static int
3291igb_allocate_queues(struct adapter *adapter)
3292{
3293	device_t dev = adapter->dev;
3294	struct igb_queue	*que = NULL;
3295	struct tx_ring		*txr = NULL;
3296	struct rx_ring		*rxr = NULL;
3297	int rsize, tsize, error = E1000_SUCCESS;
3298	int txconf = 0, rxconf = 0;
3299
3300	/* First allocate the top level queue structs */
3301	if (!(adapter->queues =
3302	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3303	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3304		device_printf(dev, "Unable to allocate queue memory\n");
3305		error = ENOMEM;
3306		goto fail;
3307	}
3308
3309	/* Next allocate the TX ring struct memory */
3310	if (!(adapter->tx_rings =
3311	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3312	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3313		device_printf(dev, "Unable to allocate TX ring memory\n");
3314		error = ENOMEM;
3315		goto tx_fail;
3316	}
3317
3318	/* Now allocate the RX */
3319	if (!(adapter->rx_rings =
3320	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3321	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3322		device_printf(dev, "Unable to allocate RX ring memory\n");
3323		error = ENOMEM;
3324		goto rx_fail;
3325	}
3326
3327	tsize = roundup2(adapter->num_tx_desc *
3328	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3329	/*
3330	 * Now set up the TX queues, txconf is needed to handle the
3331	 * possibility that things fail midcourse and we need to
3332	 * undo memory gracefully
3333	 */
3334	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3335		/* Set up some basics */
3336		txr = &adapter->tx_rings[i];
3337		txr->adapter = adapter;
3338		txr->me = i;
3339		txr->num_desc = adapter->num_tx_desc;
3340
3341		/* Initialize the TX lock */
3342		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3343		    device_get_nameunit(dev), txr->me);
3344		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3345
3346		if (igb_dma_malloc(adapter, tsize,
3347			&txr->txdma, BUS_DMA_NOWAIT)) {
3348			device_printf(dev,
3349			    "Unable to allocate TX Descriptor memory\n");
3350			error = ENOMEM;
3351			goto err_tx_desc;
3352		}
3353		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3354		bzero((void *)txr->tx_base, tsize);
3355
3356        	/* Now allocate transmit buffers for the ring */
3357        	if (igb_allocate_transmit_buffers(txr)) {
3358			device_printf(dev,
3359			    "Critical Failure setting up transmit buffers\n");
3360			error = ENOMEM;
3361			goto err_tx_desc;
3362        	}
3363#ifndef IGB_LEGACY_TX
3364		/* Allocate a buf ring */
3365		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3366		    M_WAITOK, &txr->tx_mtx);
3367#endif
3368	}
3369
3370	/*
3371	 * Next the RX queues...
3372	 */
3373	rsize = roundup2(adapter->num_rx_desc *
3374	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3375	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3376		rxr = &adapter->rx_rings[i];
3377		rxr->adapter = adapter;
3378		rxr->me = i;
3379
3380		/* Initialize the RX lock */
3381		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3382		    device_get_nameunit(dev), txr->me);
3383		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3384
3385		if (igb_dma_malloc(adapter, rsize,
3386			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3387			device_printf(dev,
3388			    "Unable to allocate RxDescriptor memory\n");
3389			error = ENOMEM;
3390			goto err_rx_desc;
3391		}
3392		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3393		bzero((void *)rxr->rx_base, rsize);
3394
3395        	/* Allocate receive buffers for the ring*/
3396		if (igb_allocate_receive_buffers(rxr)) {
3397			device_printf(dev,
3398			    "Critical Failure setting up receive buffers\n");
3399			error = ENOMEM;
3400			goto err_rx_desc;
3401		}
3402	}
3403
3404	/*
3405	** Finally set up the queue holding structs
3406	*/
3407	for (int i = 0; i < adapter->num_queues; i++) {
3408		que = &adapter->queues[i];
3409		que->adapter = adapter;
3410		que->txr = &adapter->tx_rings[i];
3411		que->rxr = &adapter->rx_rings[i];
3412	}
3413
3414	return (0);
3415
3416err_rx_desc:
3417	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3418		igb_dma_free(adapter, &rxr->rxdma);
3419err_tx_desc:
3420	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3421		igb_dma_free(adapter, &txr->txdma);
3422	free(adapter->rx_rings, M_DEVBUF);
3423rx_fail:
3424#ifndef IGB_LEGACY_TX
3425	buf_ring_free(txr->br, M_DEVBUF);
3426#endif
3427	free(adapter->tx_rings, M_DEVBUF);
3428tx_fail:
3429	free(adapter->queues, M_DEVBUF);
3430fail:
3431	return (error);
3432}
3433
3434/*********************************************************************
3435 *
3436 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3437 *  the information needed to transmit a packet on the wire. This is
3438 *  called only once at attach, setup is done every reset.
3439 *
3440 **********************************************************************/
3441static int
3442igb_allocate_transmit_buffers(struct tx_ring *txr)
3443{
3444	struct adapter *adapter = txr->adapter;
3445	device_t dev = adapter->dev;
3446	struct igb_tx_buf *txbuf;
3447	int error, i;
3448
3449	/*
3450	 * Setup DMA descriptor areas.
3451	 */
3452	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3453			       1, 0,			/* alignment, bounds */
3454			       BUS_SPACE_MAXADDR,	/* lowaddr */
3455			       BUS_SPACE_MAXADDR,	/* highaddr */
3456			       NULL, NULL,		/* filter, filterarg */
3457			       IGB_TSO_SIZE,		/* maxsize */
3458			       IGB_MAX_SCATTER,		/* nsegments */
3459			       PAGE_SIZE,		/* maxsegsize */
3460			       0,			/* flags */
3461			       NULL,			/* lockfunc */
3462			       NULL,			/* lockfuncarg */
3463			       &txr->txtag))) {
3464		device_printf(dev,"Unable to allocate TX DMA tag\n");
3465		goto fail;
3466	}
3467
3468	if (!(txr->tx_buffers =
3469	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3470	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3471		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3472		error = ENOMEM;
3473		goto fail;
3474	}
3475
3476        /* Create the descriptor buffer dma maps */
3477	txbuf = txr->tx_buffers;
3478	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3479		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3480		if (error != 0) {
3481			device_printf(dev, "Unable to create TX DMA map\n");
3482			goto fail;
3483		}
3484	}
3485
3486	return 0;
3487fail:
3488	/* We free all, it handles case where we are in the middle */
3489	igb_free_transmit_structures(adapter);
3490	return (error);
3491}
3492
3493/*********************************************************************
3494 *
3495 *  Initialize a transmit ring.
3496 *
3497 **********************************************************************/
3498static void
3499igb_setup_transmit_ring(struct tx_ring *txr)
3500{
3501	struct adapter *adapter = txr->adapter;
3502	struct igb_tx_buf *txbuf;
3503	int i;
3504#ifdef DEV_NETMAP
3505	struct netmap_adapter *na = NA(adapter->ifp);
3506	struct netmap_slot *slot;
3507#endif /* DEV_NETMAP */
3508
3509	/* Clear the old descriptor contents */
3510	IGB_TX_LOCK(txr);
3511#ifdef DEV_NETMAP
3512	slot = netmap_reset(na, NR_TX, txr->me, 0);
3513#endif /* DEV_NETMAP */
3514	bzero((void *)txr->tx_base,
3515	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3516	/* Reset indices */
3517	txr->next_avail_desc = 0;
3518	txr->next_to_clean = 0;
3519
3520	/* Free any existing tx buffers. */
3521        txbuf = txr->tx_buffers;
3522	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3523		if (txbuf->m_head != NULL) {
3524			bus_dmamap_sync(txr->txtag, txbuf->map,
3525			    BUS_DMASYNC_POSTWRITE);
3526			bus_dmamap_unload(txr->txtag, txbuf->map);
3527			m_freem(txbuf->m_head);
3528			txbuf->m_head = NULL;
3529		}
3530#ifdef DEV_NETMAP
3531		if (slot) {
3532			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3533			/* no need to set the address */
3534			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3535		}
3536#endif /* DEV_NETMAP */
3537		/* clear the watch index */
3538		txbuf->eop = NULL;
3539        }
3540
3541	/* Set number of descriptors available */
3542	txr->tx_avail = adapter->num_tx_desc;
3543
3544	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3545	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3546	IGB_TX_UNLOCK(txr);
3547}
3548
3549/*********************************************************************
3550 *
3551 *  Initialize all transmit rings.
3552 *
3553 **********************************************************************/
3554static void
3555igb_setup_transmit_structures(struct adapter *adapter)
3556{
3557	struct tx_ring *txr = adapter->tx_rings;
3558
3559	for (int i = 0; i < adapter->num_queues; i++, txr++)
3560		igb_setup_transmit_ring(txr);
3561
3562	return;
3563}
3564
3565/*********************************************************************
3566 *
3567 *  Enable transmit unit.
3568 *
3569 **********************************************************************/
3570static void
3571igb_initialize_transmit_units(struct adapter *adapter)
3572{
3573	struct tx_ring	*txr = adapter->tx_rings;
3574	struct e1000_hw *hw = &adapter->hw;
3575	u32		tctl, txdctl;
3576
3577	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3578	tctl = txdctl = 0;
3579
3580	/* Setup the Tx Descriptor Rings */
3581	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3582		u64 bus_addr = txr->txdma.dma_paddr;
3583
3584		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3585		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3586		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3587		    (uint32_t)(bus_addr >> 32));
3588		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3589		    (uint32_t)bus_addr);
3590
3591		/* Setup the HW Tx Head and Tail descriptor pointers */
3592		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3593		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3594
3595		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3596		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3597		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3598
3599		txr->queue_status = IGB_QUEUE_IDLE;
3600
3601		txdctl |= IGB_TX_PTHRESH;
3602		txdctl |= IGB_TX_HTHRESH << 8;
3603		txdctl |= IGB_TX_WTHRESH << 16;
3604		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3605		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3606	}
3607
3608	if (adapter->vf_ifp)
3609		return;
3610
3611	e1000_config_collision_dist(hw);
3612
3613	/* Program the Transmit Control Register */
3614	tctl = E1000_READ_REG(hw, E1000_TCTL);
3615	tctl &= ~E1000_TCTL_CT;
3616	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3617		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3618
3619	/* This write will effectively turn on the transmit unit. */
3620	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3621}
3622
3623/*********************************************************************
3624 *
3625 *  Free all transmit rings.
3626 *
3627 **********************************************************************/
3628static void
3629igb_free_transmit_structures(struct adapter *adapter)
3630{
3631	struct tx_ring *txr = adapter->tx_rings;
3632
3633	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3634		IGB_TX_LOCK(txr);
3635		igb_free_transmit_buffers(txr);
3636		igb_dma_free(adapter, &txr->txdma);
3637		IGB_TX_UNLOCK(txr);
3638		IGB_TX_LOCK_DESTROY(txr);
3639	}
3640	free(adapter->tx_rings, M_DEVBUF);
3641}
3642
3643/*********************************************************************
3644 *
3645 *  Free transmit ring related data structures.
3646 *
3647 **********************************************************************/
3648static void
3649igb_free_transmit_buffers(struct tx_ring *txr)
3650{
3651	struct adapter *adapter = txr->adapter;
3652	struct igb_tx_buf *tx_buffer;
3653	int             i;
3654
3655	INIT_DEBUGOUT("free_transmit_ring: begin");
3656
3657	if (txr->tx_buffers == NULL)
3658		return;
3659
3660	tx_buffer = txr->tx_buffers;
3661	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3662		if (tx_buffer->m_head != NULL) {
3663			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3664			    BUS_DMASYNC_POSTWRITE);
3665			bus_dmamap_unload(txr->txtag,
3666			    tx_buffer->map);
3667			m_freem(tx_buffer->m_head);
3668			tx_buffer->m_head = NULL;
3669			if (tx_buffer->map != NULL) {
3670				bus_dmamap_destroy(txr->txtag,
3671				    tx_buffer->map);
3672				tx_buffer->map = NULL;
3673			}
3674		} else if (tx_buffer->map != NULL) {
3675			bus_dmamap_unload(txr->txtag,
3676			    tx_buffer->map);
3677			bus_dmamap_destroy(txr->txtag,
3678			    tx_buffer->map);
3679			tx_buffer->map = NULL;
3680		}
3681	}
3682#ifndef IGB_LEGACY_TX
3683	if (txr->br != NULL)
3684		buf_ring_free(txr->br, M_DEVBUF);
3685#endif
3686	if (txr->tx_buffers != NULL) {
3687		free(txr->tx_buffers, M_DEVBUF);
3688		txr->tx_buffers = NULL;
3689	}
3690	if (txr->txtag != NULL) {
3691		bus_dma_tag_destroy(txr->txtag);
3692		txr->txtag = NULL;
3693	}
3694	return;
3695}
3696
3697/**********************************************************************
3698 *
3699 *  Setup work for hardware segmentation offload (TSO) on
3700 *  adapters using advanced tx descriptors
3701 *
3702 **********************************************************************/
3703static int
3704igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3705    u32 *cmd_type_len, u32 *olinfo_status)
3706{
3707	struct adapter *adapter = txr->adapter;
3708	struct e1000_adv_tx_context_desc *TXD;
3709	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3710	u32 mss_l4len_idx = 0, paylen;
3711	u16 vtag = 0, eh_type;
3712	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3713	struct ether_vlan_header *eh;
3714#ifdef INET6
3715	struct ip6_hdr *ip6;
3716#endif
3717#ifdef INET
3718	struct ip *ip;
3719#endif
3720	struct tcphdr *th;
3721
3722
3723	/*
3724	 * Determine where frame payload starts.
3725	 * Jump over vlan headers if already present
3726	 */
3727	eh = mtod(mp, struct ether_vlan_header *);
3728	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3729		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3730		eh_type = eh->evl_proto;
3731	} else {
3732		ehdrlen = ETHER_HDR_LEN;
3733		eh_type = eh->evl_encap_proto;
3734	}
3735
3736	switch (ntohs(eh_type)) {
3737#ifdef INET6
3738	case ETHERTYPE_IPV6:
3739		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3740		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3741		if (ip6->ip6_nxt != IPPROTO_TCP)
3742			return (ENXIO);
3743		ip_hlen = sizeof(struct ip6_hdr);
3744		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3745		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3746		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3747		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3748		break;
3749#endif
3750#ifdef INET
3751	case ETHERTYPE_IP:
3752		ip = (struct ip *)(mp->m_data + ehdrlen);
3753		if (ip->ip_p != IPPROTO_TCP)
3754			return (ENXIO);
3755		ip->ip_sum = 0;
3756		ip_hlen = ip->ip_hl << 2;
3757		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3758		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3759		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3760		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3761		/* Tell transmit desc to also do IPv4 checksum. */
3762		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3763		break;
3764#endif
3765	default:
3766		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3767		    __func__, ntohs(eh_type));
3768		break;
3769	}
3770
3771	ctxd = txr->next_avail_desc;
3772	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3773
3774	tcp_hlen = th->th_off << 2;
3775
3776	/* This is used in the transmit desc in encap */
3777	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3778
3779	/* VLAN MACLEN IPLEN */
3780	if (mp->m_flags & M_VLANTAG) {
3781		vtag = htole16(mp->m_pkthdr.ether_vtag);
3782                vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3783	}
3784
3785	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3786	vlan_macip_lens |= ip_hlen;
3787	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3788
3789	/* ADV DTYPE TUCMD */
3790	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3791	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3792	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3793
3794	/* MSS L4LEN IDX */
3795	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3796	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3797	/* 82575 needs the queue index added */
3798	if (adapter->hw.mac.type == e1000_82575)
3799		mss_l4len_idx |= txr->me << 4;
3800	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3801
3802	TXD->seqnum_seed = htole32(0);
3803
3804	if (++ctxd == txr->num_desc)
3805		ctxd = 0;
3806
3807	txr->tx_avail--;
3808	txr->next_avail_desc = ctxd;
3809	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3810	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3811	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3812	++txr->tso_tx;
3813	return (0);
3814}
3815
3816/*********************************************************************
3817 *
3818 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3819 *
3820 **********************************************************************/
3821
3822static int
3823igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3824    u32 *cmd_type_len, u32 *olinfo_status)
3825{
3826	struct e1000_adv_tx_context_desc *TXD;
3827	struct adapter *adapter = txr->adapter;
3828	struct ether_vlan_header *eh;
3829	struct ip *ip;
3830	struct ip6_hdr *ip6;
3831	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3832	int	ehdrlen, ip_hlen = 0;
3833	u16	etype;
3834	u8	ipproto = 0;
3835	int	offload = TRUE;
3836	int	ctxd = txr->next_avail_desc;
3837	u16	vtag = 0;
3838
3839	/* First check if TSO is to be used */
3840	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3841		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3842
3843	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3844		offload = FALSE;
3845
3846	/* Indicate the whole packet as payload when not doing TSO */
3847       	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3848
3849	/* Now ready a context descriptor */
3850	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3851
3852	/*
3853	** In advanced descriptors the vlan tag must
3854	** be placed into the context descriptor. Hence
3855	** we need to make one even if not doing offloads.
3856	*/
3857	if (mp->m_flags & M_VLANTAG) {
3858		vtag = htole16(mp->m_pkthdr.ether_vtag);
3859		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3860	} else if (offload == FALSE) /* ... no offload to do */
3861		return (0);
3862
3863	/*
3864	 * Determine where frame payload starts.
3865	 * Jump over vlan headers if already present,
3866	 * helpful for QinQ too.
3867	 */
3868	eh = mtod(mp, struct ether_vlan_header *);
3869	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3870		etype = ntohs(eh->evl_proto);
3871		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3872	} else {
3873		etype = ntohs(eh->evl_encap_proto);
3874		ehdrlen = ETHER_HDR_LEN;
3875	}
3876
3877	/* Set the ether header length */
3878	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3879
3880	switch (etype) {
3881		case ETHERTYPE_IP:
3882			ip = (struct ip *)(mp->m_data + ehdrlen);
3883			ip_hlen = ip->ip_hl << 2;
3884			ipproto = ip->ip_p;
3885			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3886			break;
3887		case ETHERTYPE_IPV6:
3888			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3889			ip_hlen = sizeof(struct ip6_hdr);
3890			/* XXX-BZ this will go badly in case of ext hdrs. */
3891			ipproto = ip6->ip6_nxt;
3892			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3893			break;
3894		default:
3895			offload = FALSE;
3896			break;
3897	}
3898
3899	vlan_macip_lens |= ip_hlen;
3900	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3901
3902	switch (ipproto) {
3903		case IPPROTO_TCP:
3904			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3905				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3906			break;
3907		case IPPROTO_UDP:
3908			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3909				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3910			break;
3911
3912#if __FreeBSD_version >= 800000
3913		case IPPROTO_SCTP:
3914			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3915				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3916			break;
3917#endif
3918		default:
3919			offload = FALSE;
3920			break;
3921	}
3922
3923	if (offload) /* For the TX descriptor setup */
3924		*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3925
3926	/* 82575 needs the queue index added */
3927	if (adapter->hw.mac.type == e1000_82575)
3928		mss_l4len_idx = txr->me << 4;
3929
3930	/* Now copy bits into descriptor */
3931	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3932	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3933	TXD->seqnum_seed = htole32(0);
3934	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3935
3936	/* We've consumed the first desc, adjust counters */
3937	if (++ctxd == txr->num_desc)
3938		ctxd = 0;
3939	txr->next_avail_desc = ctxd;
3940	--txr->tx_avail;
3941
3942        return (0);
3943}
3944
3945/**********************************************************************
3946 *
3947 *  Examine each tx_buffer in the used queue. If the hardware is done
3948 *  processing the packet then free associated resources. The
3949 *  tx_buffer is put back on the free queue.
3950 *
3951 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3952 **********************************************************************/
3953static bool
3954igb_txeof(struct tx_ring *txr)
3955{
3956	struct adapter		*adapter = txr->adapter;
3957	struct ifnet		*ifp = adapter->ifp;
3958	u32			work, processed = 0;
3959	u16			limit = txr->process_limit;
3960	struct igb_tx_buf	*buf;
3961	union e1000_adv_tx_desc *txd;
3962
3963	mtx_assert(&txr->tx_mtx, MA_OWNED);
3964
3965#ifdef DEV_NETMAP
3966	if (netmap_tx_irq(ifp, txr->me))
3967		return (FALSE);
3968#endif /* DEV_NETMAP */
3969
3970	if (txr->tx_avail == txr->num_desc) {
3971		txr->queue_status = IGB_QUEUE_IDLE;
3972		return FALSE;
3973	}
3974
3975	/* Get work starting point */
3976	work = txr->next_to_clean;
3977	buf = &txr->tx_buffers[work];
3978	txd = &txr->tx_base[work];
3979	work -= txr->num_desc; /* The distance to ring end */
3980        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3981            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3982	do {
3983		union e1000_adv_tx_desc *eop = buf->eop;
3984		if (eop == NULL) /* No work */
3985			break;
3986
3987		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3988			break;	/* I/O not complete */
3989
3990		if (buf->m_head) {
3991			txr->bytes +=
3992			    buf->m_head->m_pkthdr.len;
3993			bus_dmamap_sync(txr->txtag,
3994			    buf->map,
3995			    BUS_DMASYNC_POSTWRITE);
3996			bus_dmamap_unload(txr->txtag,
3997			    buf->map);
3998			m_freem(buf->m_head);
3999			buf->m_head = NULL;
4000		}
4001		buf->eop = NULL;
4002		++txr->tx_avail;
4003
4004		/* We clean the range if multi segment */
4005		while (txd != eop) {
4006			++txd;
4007			++buf;
4008			++work;
4009			/* wrap the ring? */
4010			if (__predict_false(!work)) {
4011				work -= txr->num_desc;
4012				buf = txr->tx_buffers;
4013				txd = txr->tx_base;
4014			}
4015			if (buf->m_head) {
4016				txr->bytes +=
4017				    buf->m_head->m_pkthdr.len;
4018				bus_dmamap_sync(txr->txtag,
4019				    buf->map,
4020				    BUS_DMASYNC_POSTWRITE);
4021				bus_dmamap_unload(txr->txtag,
4022				    buf->map);
4023				m_freem(buf->m_head);
4024				buf->m_head = NULL;
4025			}
4026			++txr->tx_avail;
4027			buf->eop = NULL;
4028
4029		}
4030		++txr->packets;
4031		++processed;
4032		++ifp->if_opackets;
4033		txr->watchdog_time = ticks;
4034
4035		/* Try the next packet */
4036		++txd;
4037		++buf;
4038		++work;
4039		/* reset with a wrap */
4040		if (__predict_false(!work)) {
4041			work -= txr->num_desc;
4042			buf = txr->tx_buffers;
4043			txd = txr->tx_base;
4044		}
4045		prefetch(txd);
4046	} while (__predict_true(--limit));
4047
4048	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4049	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4050
4051	work += txr->num_desc;
4052	txr->next_to_clean = work;
4053
4054	/*
4055	** Watchdog calculation, we know there's
4056	** work outstanding or the first return
4057	** would have been taken, so none processed
4058	** for too long indicates a hang.
4059	*/
4060	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4061		txr->queue_status |= IGB_QUEUE_HUNG;
4062
4063	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4064		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4065
4066	if (txr->tx_avail == txr->num_desc) {
4067		txr->queue_status = IGB_QUEUE_IDLE;
4068		return (FALSE);
4069	}
4070
4071	return (TRUE);
4072}
4073
4074/*********************************************************************
4075 *
4076 *  Refresh mbuf buffers for RX descriptor rings
4077 *   - now keeps its own state so discards due to resource
4078 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4079 *     it just returns, keeping its placeholder, thus it can simply
4080 *     be recalled to try again.
4081 *
4082 **********************************************************************/
4083static void
4084igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4085{
4086	struct adapter		*adapter = rxr->adapter;
4087	bus_dma_segment_t	hseg[1];
4088	bus_dma_segment_t	pseg[1];
4089	struct igb_rx_buf	*rxbuf;
4090	struct mbuf		*mh, *mp;
4091	int			i, j, nsegs, error;
4092	bool			refreshed = FALSE;
4093
4094	i = j = rxr->next_to_refresh;
4095	/*
4096	** Get one descriptor beyond
4097	** our work mark to control
4098	** the loop.
4099        */
4100	if (++j == adapter->num_rx_desc)
4101		j = 0;
4102
4103	while (j != limit) {
4104		rxbuf = &rxr->rx_buffers[i];
4105		/* No hdr mbuf used with header split off */
4106		if (rxr->hdr_split == FALSE)
4107			goto no_split;
4108		if (rxbuf->m_head == NULL) {
4109			mh = m_gethdr(M_NOWAIT, MT_DATA);
4110			if (mh == NULL)
4111				goto update;
4112		} else
4113			mh = rxbuf->m_head;
4114
4115		mh->m_pkthdr.len = mh->m_len = MHLEN;
4116		mh->m_len = MHLEN;
4117		mh->m_flags |= M_PKTHDR;
4118		/* Get the memory mapping */
4119		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4120		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4121		if (error != 0) {
4122			printf("Refresh mbufs: hdr dmamap load"
4123			    " failure - %d\n", error);
4124			m_free(mh);
4125			rxbuf->m_head = NULL;
4126			goto update;
4127		}
4128		rxbuf->m_head = mh;
4129		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4130		    BUS_DMASYNC_PREREAD);
4131		rxr->rx_base[i].read.hdr_addr =
4132		    htole64(hseg[0].ds_addr);
4133no_split:
4134		if (rxbuf->m_pack == NULL) {
4135			mp = m_getjcl(M_NOWAIT, MT_DATA,
4136			    M_PKTHDR, adapter->rx_mbuf_sz);
4137			if (mp == NULL)
4138				goto update;
4139		} else
4140			mp = rxbuf->m_pack;
4141
4142		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4143		/* Get the memory mapping */
4144		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4145		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4146		if (error != 0) {
4147			printf("Refresh mbufs: payload dmamap load"
4148			    " failure - %d\n", error);
4149			m_free(mp);
4150			rxbuf->m_pack = NULL;
4151			goto update;
4152		}
4153		rxbuf->m_pack = mp;
4154		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4155		    BUS_DMASYNC_PREREAD);
4156		rxr->rx_base[i].read.pkt_addr =
4157		    htole64(pseg[0].ds_addr);
4158		refreshed = TRUE; /* I feel wefreshed :) */
4159
4160		i = j; /* our next is precalculated */
4161		rxr->next_to_refresh = i;
4162		if (++j == adapter->num_rx_desc)
4163			j = 0;
4164	}
4165update:
4166	if (refreshed) /* update tail */
4167		E1000_WRITE_REG(&adapter->hw,
4168		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4169	return;
4170}
4171
4172
4173/*********************************************************************
4174 *
4175 *  Allocate memory for rx_buffer structures. Since we use one
4176 *  rx_buffer per received packet, the maximum number of rx_buffer's
4177 *  that we'll need is equal to the number of receive descriptors
4178 *  that we've allocated.
4179 *
4180 **********************************************************************/
4181static int
4182igb_allocate_receive_buffers(struct rx_ring *rxr)
4183{
4184	struct	adapter 	*adapter = rxr->adapter;
4185	device_t 		dev = adapter->dev;
4186	struct igb_rx_buf	*rxbuf;
4187	int             	i, bsize, error;
4188
4189	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4190	if (!(rxr->rx_buffers =
4191	    (struct igb_rx_buf *) malloc(bsize,
4192	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4193		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4194		error = ENOMEM;
4195		goto fail;
4196	}
4197
4198	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4199				   1, 0,		/* alignment, bounds */
4200				   BUS_SPACE_MAXADDR,	/* lowaddr */
4201				   BUS_SPACE_MAXADDR,	/* highaddr */
4202				   NULL, NULL,		/* filter, filterarg */
4203				   MSIZE,		/* maxsize */
4204				   1,			/* nsegments */
4205				   MSIZE,		/* maxsegsize */
4206				   0,			/* flags */
4207				   NULL,		/* lockfunc */
4208				   NULL,		/* lockfuncarg */
4209				   &rxr->htag))) {
4210		device_printf(dev, "Unable to create RX DMA tag\n");
4211		goto fail;
4212	}
4213
4214	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4215				   1, 0,		/* alignment, bounds */
4216				   BUS_SPACE_MAXADDR,	/* lowaddr */
4217				   BUS_SPACE_MAXADDR,	/* highaddr */
4218				   NULL, NULL,		/* filter, filterarg */
4219				   MJUM9BYTES,		/* maxsize */
4220				   1,			/* nsegments */
4221				   MJUM9BYTES,		/* maxsegsize */
4222				   0,			/* flags */
4223				   NULL,		/* lockfunc */
4224				   NULL,		/* lockfuncarg */
4225				   &rxr->ptag))) {
4226		device_printf(dev, "Unable to create RX payload DMA tag\n");
4227		goto fail;
4228	}
4229
4230	for (i = 0; i < adapter->num_rx_desc; i++) {
4231		rxbuf = &rxr->rx_buffers[i];
4232		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4233		if (error) {
4234			device_printf(dev,
4235			    "Unable to create RX head DMA maps\n");
4236			goto fail;
4237		}
4238		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4239		if (error) {
4240			device_printf(dev,
4241			    "Unable to create RX packet DMA maps\n");
4242			goto fail;
4243		}
4244	}
4245
4246	return (0);
4247
4248fail:
4249	/* Frees all, but can handle partial completion */
4250	igb_free_receive_structures(adapter);
4251	return (error);
4252}
4253
4254
4255static void
4256igb_free_receive_ring(struct rx_ring *rxr)
4257{
4258	struct	adapter		*adapter = rxr->adapter;
4259	struct igb_rx_buf	*rxbuf;
4260
4261
4262	for (int i = 0; i < adapter->num_rx_desc; i++) {
4263		rxbuf = &rxr->rx_buffers[i];
4264		if (rxbuf->m_head != NULL) {
4265			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4266			    BUS_DMASYNC_POSTREAD);
4267			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4268			rxbuf->m_head->m_flags |= M_PKTHDR;
4269			m_freem(rxbuf->m_head);
4270		}
4271		if (rxbuf->m_pack != NULL) {
4272			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4273			    BUS_DMASYNC_POSTREAD);
4274			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4275			rxbuf->m_pack->m_flags |= M_PKTHDR;
4276			m_freem(rxbuf->m_pack);
4277		}
4278		rxbuf->m_head = NULL;
4279		rxbuf->m_pack = NULL;
4280	}
4281}
4282
4283
4284/*********************************************************************
4285 *
4286 *  Initialize a receive ring and its buffers.
4287 *
4288 **********************************************************************/
4289static int
4290igb_setup_receive_ring(struct rx_ring *rxr)
4291{
4292	struct	adapter		*adapter;
4293	struct  ifnet		*ifp;
4294	device_t		dev;
4295	struct igb_rx_buf	*rxbuf;
4296	bus_dma_segment_t	pseg[1], hseg[1];
4297	struct lro_ctrl		*lro = &rxr->lro;
4298	int			rsize, nsegs, error = 0;
4299#ifdef DEV_NETMAP
4300	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4301	struct netmap_slot *slot;
4302#endif /* DEV_NETMAP */
4303
4304	adapter = rxr->adapter;
4305	dev = adapter->dev;
4306	ifp = adapter->ifp;
4307
4308	/* Clear the ring contents */
4309	IGB_RX_LOCK(rxr);
4310#ifdef DEV_NETMAP
4311	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4312#endif /* DEV_NETMAP */
4313	rsize = roundup2(adapter->num_rx_desc *
4314	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4315	bzero((void *)rxr->rx_base, rsize);
4316
4317	/*
4318	** Free current RX buffer structures and their mbufs
4319	*/
4320	igb_free_receive_ring(rxr);
4321
4322	/* Configure for header split? */
4323	if (igb_header_split)
4324		rxr->hdr_split = TRUE;
4325
4326        /* Now replenish the ring mbufs */
4327	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4328		struct mbuf	*mh, *mp;
4329
4330		rxbuf = &rxr->rx_buffers[j];
4331#ifdef DEV_NETMAP
4332		if (slot) {
4333			/* slot sj is mapped to the i-th NIC-ring entry */
4334			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4335			uint64_t paddr;
4336			void *addr;
4337
4338			addr = PNMB(na, slot + sj, &paddr);
4339			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4340			/* Update descriptor */
4341			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4342			continue;
4343		}
4344#endif /* DEV_NETMAP */
4345		if (rxr->hdr_split == FALSE)
4346			goto skip_head;
4347
4348		/* First the header */
4349		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4350		if (rxbuf->m_head == NULL) {
4351			error = ENOBUFS;
4352                        goto fail;
4353		}
4354		m_adj(rxbuf->m_head, ETHER_ALIGN);
4355		mh = rxbuf->m_head;
4356		mh->m_len = mh->m_pkthdr.len = MHLEN;
4357		mh->m_flags |= M_PKTHDR;
4358		/* Get the memory mapping */
4359		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4360		    rxbuf->hmap, rxbuf->m_head, hseg,
4361		    &nsegs, BUS_DMA_NOWAIT);
4362		if (error != 0) /* Nothing elegant to do here */
4363                        goto fail;
4364		bus_dmamap_sync(rxr->htag,
4365		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4366		/* Update descriptor */
4367		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4368
4369skip_head:
4370		/* Now the payload cluster */
4371		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4372		    M_PKTHDR, adapter->rx_mbuf_sz);
4373		if (rxbuf->m_pack == NULL) {
4374			error = ENOBUFS;
4375                        goto fail;
4376		}
4377		mp = rxbuf->m_pack;
4378		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4379		/* Get the memory mapping */
4380		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4381		    rxbuf->pmap, mp, pseg,
4382		    &nsegs, BUS_DMA_NOWAIT);
4383		if (error != 0)
4384                        goto fail;
4385		bus_dmamap_sync(rxr->ptag,
4386		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4387		/* Update descriptor */
4388		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4389        }
4390
4391	/* Setup our descriptor indices */
4392	rxr->next_to_check = 0;
4393	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4394	rxr->lro_enabled = FALSE;
4395	rxr->rx_split_packets = 0;
4396	rxr->rx_bytes = 0;
4397
4398	rxr->fmp = NULL;
4399	rxr->lmp = NULL;
4400
4401	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4402	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4403
4404	/*
4405	** Now set up the LRO interface, we
4406	** also only do head split when LRO
4407	** is enabled, since so often they
4408	** are undesireable in similar setups.
4409	*/
4410	if (ifp->if_capenable & IFCAP_LRO) {
4411		error = tcp_lro_init(lro);
4412		if (error) {
4413			device_printf(dev, "LRO Initialization failed!\n");
4414			goto fail;
4415		}
4416		INIT_DEBUGOUT("RX LRO Initialized\n");
4417		rxr->lro_enabled = TRUE;
4418		lro->ifp = adapter->ifp;
4419	}
4420
4421	IGB_RX_UNLOCK(rxr);
4422	return (0);
4423
4424fail:
4425	igb_free_receive_ring(rxr);
4426	IGB_RX_UNLOCK(rxr);
4427	return (error);
4428}
4429
4430
4431/*********************************************************************
4432 *
4433 *  Initialize all receive rings.
4434 *
4435 **********************************************************************/
4436static int
4437igb_setup_receive_structures(struct adapter *adapter)
4438{
4439	struct rx_ring *rxr = adapter->rx_rings;
4440	int i;
4441
4442	for (i = 0; i < adapter->num_queues; i++, rxr++)
4443		if (igb_setup_receive_ring(rxr))
4444			goto fail;
4445
4446	return (0);
4447fail:
4448	/*
4449	 * Free RX buffers allocated so far, we will only handle
4450	 * the rings that completed, the failing case will have
4451	 * cleaned up for itself. 'i' is the endpoint.
4452	 */
4453	for (int j = 0; j < i; ++j) {
4454		rxr = &adapter->rx_rings[j];
4455		IGB_RX_LOCK(rxr);
4456		igb_free_receive_ring(rxr);
4457		IGB_RX_UNLOCK(rxr);
4458	}
4459
4460	return (ENOBUFS);
4461}
4462
4463/*********************************************************************
4464 *
4465 *  Enable receive unit.
4466 *
4467 **********************************************************************/
4468static void
4469igb_initialize_receive_units(struct adapter *adapter)
4470{
4471	struct rx_ring	*rxr = adapter->rx_rings;
4472	struct ifnet	*ifp = adapter->ifp;
4473	struct e1000_hw *hw = &adapter->hw;
4474	u32		rctl, rxcsum, psize, srrctl = 0;
4475
4476	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4477
4478	/*
4479	 * Make sure receives are disabled while setting
4480	 * up the descriptor ring
4481	 */
4482	rctl = E1000_READ_REG(hw, E1000_RCTL);
4483	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4484
4485	/*
4486	** Set up for header split
4487	*/
4488	if (igb_header_split) {
4489		/* Use a standard mbuf for the header */
4490		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4491		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4492	} else
4493		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4494
4495	/*
4496	** Set up for jumbo frames
4497	*/
4498	if (ifp->if_mtu > ETHERMTU) {
4499		rctl |= E1000_RCTL_LPE;
4500		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4501			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4502			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4503		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4504			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4505			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4506		}
4507		/* Set maximum packet len */
4508		psize = adapter->max_frame_size;
4509		/* are we on a vlan? */
4510		if (adapter->ifp->if_vlantrunk != NULL)
4511			psize += VLAN_TAG_SIZE;
4512		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4513	} else {
4514		rctl &= ~E1000_RCTL_LPE;
4515		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4516		rctl |= E1000_RCTL_SZ_2048;
4517	}
4518
4519	/*
4520	 * If TX flow control is disabled and there's >1 queue defined,
4521	 * enable DROP.
4522	 *
4523	 * This drops frames rather than hanging the RX MAC for all queues.
4524	 */
4525	if ((adapter->num_queues > 1) &&
4526	    (adapter->fc == e1000_fc_none ||
4527	     adapter->fc == e1000_fc_rx_pause)) {
4528		srrctl |= E1000_SRRCTL_DROP_EN;
4529	}
4530
4531	/* Setup the Base and Length of the Rx Descriptor Rings */
4532	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4533		u64 bus_addr = rxr->rxdma.dma_paddr;
4534		u32 rxdctl;
4535
4536		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4537		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4538		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4539		    (uint32_t)(bus_addr >> 32));
4540		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4541		    (uint32_t)bus_addr);
4542		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4543		/* Enable this Queue */
4544		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4545		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4546		rxdctl &= 0xFFF00000;
4547		rxdctl |= IGB_RX_PTHRESH;
4548		rxdctl |= IGB_RX_HTHRESH << 8;
4549		rxdctl |= IGB_RX_WTHRESH << 16;
4550		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4551	}
4552
4553	/*
4554	** Setup for RX MultiQueue
4555	*/
4556	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4557	if (adapter->num_queues >1) {
4558		u32 random[10], mrqc, shift = 0;
4559		union igb_reta {
4560			u32 dword;
4561			u8  bytes[4];
4562		} reta;
4563
4564		arc4rand(&random, sizeof(random), 0);
4565		if (adapter->hw.mac.type == e1000_82575)
4566			shift = 6;
4567		/* Warning FM follows */
4568		for (int i = 0; i < 128; i++) {
4569			reta.bytes[i & 3] =
4570			    (i % adapter->num_queues) << shift;
4571			if ((i & 3) == 3)
4572				E1000_WRITE_REG(hw,
4573				    E1000_RETA(i >> 2), reta.dword);
4574		}
4575		/* Now fill in hash table */
4576		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4577		for (int i = 0; i < 10; i++)
4578			E1000_WRITE_REG_ARRAY(hw,
4579			    E1000_RSSRK(0), i, random[i]);
4580
4581		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4582		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4583		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4584		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4585		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4586		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4587		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4588		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4589
4590		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4591
4592		/*
4593		** NOTE: Receive Full-Packet Checksum Offload
4594		** is mutually exclusive with Multiqueue. However
4595		** this is not the same as TCP/IP checksums which
4596		** still work.
4597		*/
4598		rxcsum |= E1000_RXCSUM_PCSD;
4599#if __FreeBSD_version >= 800000
4600		/* For SCTP Offload */
4601		if ((hw->mac.type == e1000_82576)
4602		    && (ifp->if_capenable & IFCAP_RXCSUM))
4603			rxcsum |= E1000_RXCSUM_CRCOFL;
4604#endif
4605	} else {
4606		/* Non RSS setup */
4607		if (ifp->if_capenable & IFCAP_RXCSUM) {
4608			rxcsum |= E1000_RXCSUM_IPPCSE;
4609#if __FreeBSD_version >= 800000
4610			if (adapter->hw.mac.type == e1000_82576)
4611				rxcsum |= E1000_RXCSUM_CRCOFL;
4612#endif
4613		} else
4614			rxcsum &= ~E1000_RXCSUM_TUOFL;
4615	}
4616	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4617
4618	/* Setup the Receive Control Register */
4619	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4620	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4621		   E1000_RCTL_RDMTS_HALF |
4622		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4623	/* Strip CRC bytes. */
4624	rctl |= E1000_RCTL_SECRC;
4625	/* Make sure VLAN Filters are off */
4626	rctl &= ~E1000_RCTL_VFE;
4627	/* Don't store bad packets */
4628	rctl &= ~E1000_RCTL_SBP;
4629
4630	/* Enable Receives */
4631	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4632
4633	/*
4634	 * Setup the HW Rx Head and Tail Descriptor Pointers
4635	 *   - needs to be after enable
4636	 */
4637	for (int i = 0; i < adapter->num_queues; i++) {
4638		rxr = &adapter->rx_rings[i];
4639		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4640#ifdef DEV_NETMAP
4641		/*
4642		 * an init() while a netmap client is active must
4643		 * preserve the rx buffers passed to userspace.
4644		 * In this driver it means we adjust RDT to
4645		 * something different from next_to_refresh
4646		 * (which is not used in netmap mode).
4647		 */
4648		if (ifp->if_capenable & IFCAP_NETMAP) {
4649			struct netmap_adapter *na = NA(adapter->ifp);
4650			struct netmap_kring *kring = &na->rx_rings[i];
4651			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4652
4653			if (t >= adapter->num_rx_desc)
4654				t -= adapter->num_rx_desc;
4655			else if (t < 0)
4656				t += adapter->num_rx_desc;
4657			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4658		} else
4659#endif /* DEV_NETMAP */
4660		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4661	}
4662	return;
4663}
4664
4665/*********************************************************************
4666 *
4667 *  Free receive rings.
4668 *
4669 **********************************************************************/
4670static void
4671igb_free_receive_structures(struct adapter *adapter)
4672{
4673	struct rx_ring *rxr = adapter->rx_rings;
4674
4675	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4676		struct lro_ctrl	*lro = &rxr->lro;
4677		igb_free_receive_buffers(rxr);
4678		tcp_lro_free(lro);
4679		igb_dma_free(adapter, &rxr->rxdma);
4680	}
4681
4682	free(adapter->rx_rings, M_DEVBUF);
4683}
4684
4685/*********************************************************************
4686 *
4687 *  Free receive ring data structures.
4688 *
4689 **********************************************************************/
4690static void
4691igb_free_receive_buffers(struct rx_ring *rxr)
4692{
4693	struct adapter		*adapter = rxr->adapter;
4694	struct igb_rx_buf	*rxbuf;
4695	int i;
4696
4697	INIT_DEBUGOUT("free_receive_structures: begin");
4698
4699	/* Cleanup any existing buffers */
4700	if (rxr->rx_buffers != NULL) {
4701		for (i = 0; i < adapter->num_rx_desc; i++) {
4702			rxbuf = &rxr->rx_buffers[i];
4703			if (rxbuf->m_head != NULL) {
4704				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4705				    BUS_DMASYNC_POSTREAD);
4706				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4707				rxbuf->m_head->m_flags |= M_PKTHDR;
4708				m_freem(rxbuf->m_head);
4709			}
4710			if (rxbuf->m_pack != NULL) {
4711				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4712				    BUS_DMASYNC_POSTREAD);
4713				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4714				rxbuf->m_pack->m_flags |= M_PKTHDR;
4715				m_freem(rxbuf->m_pack);
4716			}
4717			rxbuf->m_head = NULL;
4718			rxbuf->m_pack = NULL;
4719			if (rxbuf->hmap != NULL) {
4720				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4721				rxbuf->hmap = NULL;
4722			}
4723			if (rxbuf->pmap != NULL) {
4724				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4725				rxbuf->pmap = NULL;
4726			}
4727		}
4728		if (rxr->rx_buffers != NULL) {
4729			free(rxr->rx_buffers, M_DEVBUF);
4730			rxr->rx_buffers = NULL;
4731		}
4732	}
4733
4734	if (rxr->htag != NULL) {
4735		bus_dma_tag_destroy(rxr->htag);
4736		rxr->htag = NULL;
4737	}
4738	if (rxr->ptag != NULL) {
4739		bus_dma_tag_destroy(rxr->ptag);
4740		rxr->ptag = NULL;
4741	}
4742}
4743
4744static __inline void
4745igb_rx_discard(struct rx_ring *rxr, int i)
4746{
4747	struct igb_rx_buf	*rbuf;
4748
4749	rbuf = &rxr->rx_buffers[i];
4750
4751	/* Partially received? Free the chain */
4752	if (rxr->fmp != NULL) {
4753		rxr->fmp->m_flags |= M_PKTHDR;
4754		m_freem(rxr->fmp);
4755		rxr->fmp = NULL;
4756		rxr->lmp = NULL;
4757	}
4758
4759	/*
4760	** With advanced descriptors the writeback
4761	** clobbers the buffer addrs, so its easier
4762	** to just free the existing mbufs and take
4763	** the normal refresh path to get new buffers
4764	** and mapping.
4765	*/
4766	if (rbuf->m_head) {
4767		m_free(rbuf->m_head);
4768		rbuf->m_head = NULL;
4769		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4770	}
4771
4772	if (rbuf->m_pack) {
4773		m_free(rbuf->m_pack);
4774		rbuf->m_pack = NULL;
4775		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4776	}
4777
4778	return;
4779}
4780
4781static __inline void
4782igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4783{
4784
4785	/*
4786	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4787	 * should be computed by hardware. Also it should not have VLAN tag in
4788	 * ethernet header.
4789	 */
4790	if (rxr->lro_enabled &&
4791	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4792	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4793	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4794	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4795	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4796	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4797		/*
4798		 * Send to the stack if:
4799		 **  - LRO not enabled, or
4800		 **  - no LRO resources, or
4801		 **  - lro enqueue fails
4802		 */
4803		if (rxr->lro.lro_cnt != 0)
4804			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4805				return;
4806	}
4807	IGB_RX_UNLOCK(rxr);
4808	(*ifp->if_input)(ifp, m);
4809	IGB_RX_LOCK(rxr);
4810}
4811
4812/*********************************************************************
4813 *
4814 *  This routine executes in interrupt context. It replenishes
4815 *  the mbufs in the descriptor and sends data which has been
4816 *  dma'ed into host memory to upper layer.
4817 *
4818 *  We loop at most count times if count is > 0, or until done if
4819 *  count < 0.
4820 *
4821 *  Return TRUE if more to clean, FALSE otherwise
4822 *********************************************************************/
4823static bool
4824igb_rxeof(struct igb_queue *que, int count, int *done)
4825{
4826	struct adapter		*adapter = que->adapter;
4827	struct rx_ring		*rxr = que->rxr;
4828	struct ifnet		*ifp = adapter->ifp;
4829	struct lro_ctrl		*lro = &rxr->lro;
4830	struct lro_entry	*queued;
4831	int			i, processed = 0, rxdone = 0;
4832	u32			ptype, staterr = 0;
4833	union e1000_adv_rx_desc	*cur;
4834
4835	IGB_RX_LOCK(rxr);
4836	/* Sync the ring. */
4837	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4838	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4839
4840#ifdef DEV_NETMAP
4841	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4842		IGB_RX_UNLOCK(rxr);
4843		return (FALSE);
4844	}
4845#endif /* DEV_NETMAP */
4846
4847	/* Main clean loop */
4848	for (i = rxr->next_to_check; count != 0;) {
4849		struct mbuf		*sendmp, *mh, *mp;
4850		struct igb_rx_buf	*rxbuf;
4851		u16			hlen, plen, hdr, vtag;
4852		bool			eop = FALSE;
4853
4854		cur = &rxr->rx_base[i];
4855		staterr = le32toh(cur->wb.upper.status_error);
4856		if ((staterr & E1000_RXD_STAT_DD) == 0)
4857			break;
4858		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4859			break;
4860		count--;
4861		sendmp = mh = mp = NULL;
4862		cur->wb.upper.status_error = 0;
4863		rxbuf = &rxr->rx_buffers[i];
4864		plen = le16toh(cur->wb.upper.length);
4865		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4866		if (((adapter->hw.mac.type == e1000_i350) ||
4867		    (adapter->hw.mac.type == e1000_i354)) &&
4868		    (staterr & E1000_RXDEXT_STATERR_LB))
4869			vtag = be16toh(cur->wb.upper.vlan);
4870		else
4871			vtag = le16toh(cur->wb.upper.vlan);
4872		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4873		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4874
4875		/*
4876		 * Free the frame (all segments) if we're at EOP and
4877		 * it's an error.
4878		 *
4879		 * The datasheet states that EOP + status is only valid for
4880		 * the final segment in a multi-segment frame.
4881		 */
4882		if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4883			adapter->dropped_pkts++;
4884			++rxr->rx_discarded;
4885			igb_rx_discard(rxr, i);
4886			goto next_desc;
4887		}
4888
4889		/*
4890		** The way the hardware is configured to
4891		** split, it will ONLY use the header buffer
4892		** when header split is enabled, otherwise we
4893		** get normal behavior, ie, both header and
4894		** payload are DMA'd into the payload buffer.
4895		**
4896		** The fmp test is to catch the case where a
4897		** packet spans multiple descriptors, in that
4898		** case only the first header is valid.
4899		*/
4900		if (rxr->hdr_split && rxr->fmp == NULL) {
4901			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4902			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4903			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4904			if (hlen > IGB_HDR_BUF)
4905				hlen = IGB_HDR_BUF;
4906			mh = rxr->rx_buffers[i].m_head;
4907			mh->m_len = hlen;
4908			/* clear buf pointer for refresh */
4909			rxbuf->m_head = NULL;
4910			/*
4911			** Get the payload length, this
4912			** could be zero if its a small
4913			** packet.
4914			*/
4915			if (plen > 0) {
4916				mp = rxr->rx_buffers[i].m_pack;
4917				mp->m_len = plen;
4918				mh->m_next = mp;
4919				/* clear buf pointer */
4920				rxbuf->m_pack = NULL;
4921				rxr->rx_split_packets++;
4922			}
4923		} else {
4924			/*
4925			** Either no header split, or a
4926			** secondary piece of a fragmented
4927			** split packet.
4928			*/
4929			mh = rxr->rx_buffers[i].m_pack;
4930			mh->m_len = plen;
4931			/* clear buf info for refresh */
4932			rxbuf->m_pack = NULL;
4933		}
4934		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4935
4936		++processed; /* So we know when to refresh */
4937
4938		/* Initial frame - setup */
4939		if (rxr->fmp == NULL) {
4940			mh->m_pkthdr.len = mh->m_len;
4941			/* Save the head of the chain */
4942			rxr->fmp = mh;
4943			rxr->lmp = mh;
4944			if (mp != NULL) {
4945				/* Add payload if split */
4946				mh->m_pkthdr.len += mp->m_len;
4947				rxr->lmp = mh->m_next;
4948			}
4949		} else {
4950			/* Chain mbuf's together */
4951			rxr->lmp->m_next = mh;
4952			rxr->lmp = rxr->lmp->m_next;
4953			rxr->fmp->m_pkthdr.len += mh->m_len;
4954		}
4955
4956		if (eop) {
4957			rxr->fmp->m_pkthdr.rcvif = ifp;
4958			ifp->if_ipackets++;
4959			rxr->rx_packets++;
4960			/* capture data for AIM */
4961			rxr->packets++;
4962			rxr->bytes += rxr->fmp->m_pkthdr.len;
4963			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4964
4965			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4966				igb_rx_checksum(staterr, rxr->fmp, ptype);
4967
4968			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4969			    (staterr & E1000_RXD_STAT_VP) != 0) {
4970				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4971				rxr->fmp->m_flags |= M_VLANTAG;
4972			}
4973#ifndef IGB_LEGACY_TX
4974			rxr->fmp->m_pkthdr.flowid = que->msix;
4975			rxr->fmp->m_flags |= M_FLOWID;
4976#endif
4977			sendmp = rxr->fmp;
4978			/* Make sure to set M_PKTHDR. */
4979			sendmp->m_flags |= M_PKTHDR;
4980			rxr->fmp = NULL;
4981			rxr->lmp = NULL;
4982		}
4983
4984next_desc:
4985		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4986		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4987
4988		/* Advance our pointers to the next descriptor. */
4989		if (++i == adapter->num_rx_desc)
4990			i = 0;
4991		/*
4992		** Send to the stack or LRO
4993		*/
4994		if (sendmp != NULL) {
4995			rxr->next_to_check = i;
4996			igb_rx_input(rxr, ifp, sendmp, ptype);
4997			i = rxr->next_to_check;
4998			rxdone++;
4999		}
5000
5001		/* Every 8 descriptors we go to refresh mbufs */
5002		if (processed == 8) {
5003                        igb_refresh_mbufs(rxr, i);
5004                        processed = 0;
5005		}
5006	}
5007
5008	/* Catch any remainders */
5009	if (igb_rx_unrefreshed(rxr))
5010		igb_refresh_mbufs(rxr, i);
5011
5012	rxr->next_to_check = i;
5013
5014	/*
5015	 * Flush any outstanding LRO work
5016	 */
5017	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5018		SLIST_REMOVE_HEAD(&lro->lro_active, next);
5019		tcp_lro_flush(lro, queued);
5020	}
5021
5022	if (done != NULL)
5023		*done += rxdone;
5024
5025	IGB_RX_UNLOCK(rxr);
5026	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5027}
5028
5029/*********************************************************************
5030 *
5031 *  Verify that the hardware indicated that the checksum is valid.
5032 *  Inform the stack about the status of checksum so that stack
5033 *  doesn't spend time verifying the checksum.
5034 *
5035 *********************************************************************/
5036static void
5037igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5038{
5039	u16 status = (u16)staterr;
5040	u8  errors = (u8) (staterr >> 24);
5041	int sctp;
5042
5043	/* Ignore Checksum bit is set */
5044	if (status & E1000_RXD_STAT_IXSM) {
5045		mp->m_pkthdr.csum_flags = 0;
5046		return;
5047	}
5048
5049	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5050	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5051		sctp = 1;
5052	else
5053		sctp = 0;
5054	if (status & E1000_RXD_STAT_IPCS) {
5055		/* Did it pass? */
5056		if (!(errors & E1000_RXD_ERR_IPE)) {
5057			/* IP Checksum Good */
5058			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5059			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5060		} else
5061			mp->m_pkthdr.csum_flags = 0;
5062	}
5063
5064	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5065		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5066#if __FreeBSD_version >= 800000
5067		if (sctp) /* reassign */
5068			type = CSUM_SCTP_VALID;
5069#endif
5070		/* Did it pass? */
5071		if (!(errors & E1000_RXD_ERR_TCPE)) {
5072			mp->m_pkthdr.csum_flags |= type;
5073			if (sctp == 0)
5074				mp->m_pkthdr.csum_data = htons(0xffff);
5075		}
5076	}
5077	return;
5078}
5079
5080/*
5081 * This routine is run via an vlan
5082 * config EVENT
5083 */
5084static void
5085igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5086{
5087	struct adapter	*adapter = ifp->if_softc;
5088	u32		index, bit;
5089
5090	if (ifp->if_softc !=  arg)   /* Not our event */
5091		return;
5092
5093	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5094                return;
5095
5096	IGB_CORE_LOCK(adapter);
5097	index = (vtag >> 5) & 0x7F;
5098	bit = vtag & 0x1F;
5099	adapter->shadow_vfta[index] |= (1 << bit);
5100	++adapter->num_vlans;
5101	/* Change hw filter setting */
5102	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5103		igb_setup_vlan_hw_support(adapter);
5104	IGB_CORE_UNLOCK(adapter);
5105}
5106
5107/*
5108 * This routine is run via an vlan
5109 * unconfig EVENT
5110 */
5111static void
5112igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5113{
5114	struct adapter	*adapter = ifp->if_softc;
5115	u32		index, bit;
5116
5117	if (ifp->if_softc !=  arg)
5118		return;
5119
5120	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5121                return;
5122
5123	IGB_CORE_LOCK(adapter);
5124	index = (vtag >> 5) & 0x7F;
5125	bit = vtag & 0x1F;
5126	adapter->shadow_vfta[index] &= ~(1 << bit);
5127	--adapter->num_vlans;
5128	/* Change hw filter setting */
5129	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5130		igb_setup_vlan_hw_support(adapter);
5131	IGB_CORE_UNLOCK(adapter);
5132}
5133
5134static void
5135igb_setup_vlan_hw_support(struct adapter *adapter)
5136{
5137	struct e1000_hw *hw = &adapter->hw;
5138	struct ifnet	*ifp = adapter->ifp;
5139	u32             reg;
5140
5141	if (adapter->vf_ifp) {
5142		e1000_rlpml_set_vf(hw,
5143		    adapter->max_frame_size + VLAN_TAG_SIZE);
5144		return;
5145	}
5146
5147	reg = E1000_READ_REG(hw, E1000_CTRL);
5148	reg |= E1000_CTRL_VME;
5149	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5150
5151	/* Enable the Filter Table */
5152	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5153		reg = E1000_READ_REG(hw, E1000_RCTL);
5154		reg &= ~E1000_RCTL_CFIEN;
5155		reg |= E1000_RCTL_VFE;
5156		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5157	}
5158
5159	/* Update the frame size */
5160	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5161	    adapter->max_frame_size + VLAN_TAG_SIZE);
5162
5163	/* Don't bother with table if no vlans */
5164	if ((adapter->num_vlans == 0) ||
5165	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5166                return;
5167	/*
5168	** A soft reset zero's out the VFTA, so
5169	** we need to repopulate it now.
5170	*/
5171	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5172                if (adapter->shadow_vfta[i] != 0) {
5173			if (adapter->vf_ifp)
5174				e1000_vfta_set_vf(hw,
5175				    adapter->shadow_vfta[i], TRUE);
5176			else
5177				e1000_write_vfta(hw,
5178				    i, adapter->shadow_vfta[i]);
5179		}
5180}
5181
5182static void
5183igb_enable_intr(struct adapter *adapter)
5184{
5185	/* With RSS set up what to auto clear */
5186	if (adapter->msix_mem) {
5187		u32 mask = (adapter->que_mask | adapter->link_mask);
5188		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5189		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5190		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5191		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5192		    E1000_IMS_LSC);
5193	} else {
5194		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5195		    IMS_ENABLE_MASK);
5196	}
5197	E1000_WRITE_FLUSH(&adapter->hw);
5198
5199	return;
5200}
5201
5202static void
5203igb_disable_intr(struct adapter *adapter)
5204{
5205	if (adapter->msix_mem) {
5206		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5207		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5208	}
5209	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5210	E1000_WRITE_FLUSH(&adapter->hw);
5211	return;
5212}
5213
5214/*
5215 * Bit of a misnomer, what this really means is
5216 * to enable OS management of the system... aka
5217 * to disable special hardware management features
5218 */
5219static void
5220igb_init_manageability(struct adapter *adapter)
5221{
5222	if (adapter->has_manage) {
5223		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5224		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5225
5226		/* disable hardware interception of ARP */
5227		manc &= ~(E1000_MANC_ARP_EN);
5228
5229                /* enable receiving management packets to the host */
5230		manc |= E1000_MANC_EN_MNG2HOST;
5231		manc2h |= 1 << 5;  /* Mng Port 623 */
5232		manc2h |= 1 << 6;  /* Mng Port 664 */
5233		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5234		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5235	}
5236}
5237
5238/*
5239 * Give control back to hardware management
5240 * controller if there is one.
5241 */
5242static void
5243igb_release_manageability(struct adapter *adapter)
5244{
5245	if (adapter->has_manage) {
5246		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5247
5248		/* re-enable hardware interception of ARP */
5249		manc |= E1000_MANC_ARP_EN;
5250		manc &= ~E1000_MANC_EN_MNG2HOST;
5251
5252		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5253	}
5254}
5255
5256/*
5257 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5258 * For ASF and Pass Through versions of f/w this means that
5259 * the driver is loaded.
5260 *
5261 */
5262static void
5263igb_get_hw_control(struct adapter *adapter)
5264{
5265	u32 ctrl_ext;
5266
5267	if (adapter->vf_ifp)
5268		return;
5269
5270	/* Let firmware know the driver has taken over */
5271	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5272	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5273	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5274}
5275
5276/*
5277 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5278 * For ASF and Pass Through versions of f/w this means that the
5279 * driver is no longer loaded.
5280 *
5281 */
5282static void
5283igb_release_hw_control(struct adapter *adapter)
5284{
5285	u32 ctrl_ext;
5286
5287	if (adapter->vf_ifp)
5288		return;
5289
5290	/* Let firmware taken over control of h/w */
5291	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5292	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5293	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5294}
5295
5296static int
5297igb_is_valid_ether_addr(uint8_t *addr)
5298{
5299	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5300
5301	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5302		return (FALSE);
5303	}
5304
5305	return (TRUE);
5306}
5307
5308
5309/*
5310 * Enable PCI Wake On Lan capability
5311 */
5312static void
5313igb_enable_wakeup(device_t dev)
5314{
5315	u16     cap, status;
5316	u8      id;
5317
5318	/* First find the capabilities pointer*/
5319	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5320	/* Read the PM Capabilities */
5321	id = pci_read_config(dev, cap, 1);
5322	if (id != PCIY_PMG)     /* Something wrong */
5323		return;
5324	/* OK, we have the power capabilities, so
5325	   now get the status register */
5326	cap += PCIR_POWER_STATUS;
5327	status = pci_read_config(dev, cap, 2);
5328	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5329	pci_write_config(dev, cap, status, 2);
5330	return;
5331}
5332
5333static void
5334igb_led_func(void *arg, int onoff)
5335{
5336	struct adapter	*adapter = arg;
5337
5338	IGB_CORE_LOCK(adapter);
5339	if (onoff) {
5340		e1000_setup_led(&adapter->hw);
5341		e1000_led_on(&adapter->hw);
5342	} else {
5343		e1000_led_off(&adapter->hw);
5344		e1000_cleanup_led(&adapter->hw);
5345	}
5346	IGB_CORE_UNLOCK(adapter);
5347}
5348
5349/**********************************************************************
5350 *
5351 *  Update the board statistics counters.
5352 *
5353 **********************************************************************/
5354static void
5355igb_update_stats_counters(struct adapter *adapter)
5356{
5357	struct ifnet		*ifp;
5358        struct e1000_hw		*hw = &adapter->hw;
5359	struct e1000_hw_stats	*stats;
5360
5361	/*
5362	** The virtual function adapter has only a
5363	** small controlled set of stats, do only
5364	** those and return.
5365	*/
5366	if (adapter->vf_ifp) {
5367		igb_update_vf_stats_counters(adapter);
5368		return;
5369	}
5370
5371	stats = (struct e1000_hw_stats	*)adapter->stats;
5372
5373	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5374	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5375		stats->symerrs +=
5376		    E1000_READ_REG(hw,E1000_SYMERRS);
5377		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5378	}
5379
5380	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5381	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5382	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5383	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5384
5385	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5386	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5387	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5388	stats->dc += E1000_READ_REG(hw, E1000_DC);
5389	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5390	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5391	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5392	/*
5393	** For watchdog management we need to know if we have been
5394	** paused during the last interval, so capture that here.
5395	*/
5396        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5397        stats->xoffrxc += adapter->pause_frames;
5398	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5399	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5400	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5401	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5402	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5403	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5404	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5405	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5406	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5407	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5408	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5409	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5410
5411	/* For the 64-bit byte counters the low dword must be read first. */
5412	/* Both registers clear on the read of the high dword */
5413
5414	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5415	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5416	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5417	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5418
5419	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5420	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5421	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5422	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5423	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5424
5425	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5426	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5427
5428	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5429	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5430	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5431	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5432	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5433	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5434	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5435	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5436	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5437	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5438
5439	/* Interrupt Counts */
5440
5441	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5442	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5443	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5444	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5445	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5446	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5447	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5448	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5449	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5450
5451	/* Host to Card Statistics */
5452
5453	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5454	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5455	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5456	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5457	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5458	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5459	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5460	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5461	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5462	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5463	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5464	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5465	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5466	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5467
5468	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5469	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5470	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5471	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5472	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5473	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5474
5475	ifp = adapter->ifp;
5476	ifp->if_collisions = stats->colc;
5477
5478	/* Rx Errors */
5479	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5480	    stats->crcerrs + stats->algnerrc +
5481	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5482
5483	/* Tx Errors */
5484	ifp->if_oerrors = stats->ecol +
5485	    stats->latecol + adapter->watchdog_events;
5486
5487	/* Driver specific counters */
5488	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5489	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5490	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5491	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5492	adapter->packet_buf_alloc_tx =
5493	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5494	adapter->packet_buf_alloc_rx =
5495	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5496}
5497
5498
5499/**********************************************************************
5500 *
5501 *  Initialize the VF board statistics counters.
5502 *
5503 **********************************************************************/
5504static void
5505igb_vf_init_stats(struct adapter *adapter)
5506{
5507        struct e1000_hw *hw = &adapter->hw;
5508	struct e1000_vf_stats	*stats;
5509
5510	stats = (struct e1000_vf_stats	*)adapter->stats;
5511	if (stats == NULL)
5512		return;
5513        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5514        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5515        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5516        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5517        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5518}
5519
5520/**********************************************************************
5521 *
5522 *  Update the VF board statistics counters.
5523 *
5524 **********************************************************************/
5525static void
5526igb_update_vf_stats_counters(struct adapter *adapter)
5527{
5528	struct e1000_hw *hw = &adapter->hw;
5529	struct e1000_vf_stats	*stats;
5530
5531	if (adapter->link_speed == 0)
5532		return;
5533
5534	stats = (struct e1000_vf_stats	*)adapter->stats;
5535
5536	UPDATE_VF_REG(E1000_VFGPRC,
5537	    stats->last_gprc, stats->gprc);
5538	UPDATE_VF_REG(E1000_VFGORC,
5539	    stats->last_gorc, stats->gorc);
5540	UPDATE_VF_REG(E1000_VFGPTC,
5541	    stats->last_gptc, stats->gptc);
5542	UPDATE_VF_REG(E1000_VFGOTC,
5543	    stats->last_gotc, stats->gotc);
5544	UPDATE_VF_REG(E1000_VFMPRC,
5545	    stats->last_mprc, stats->mprc);
5546}
5547
5548/* Export a single 32-bit register via a read-only sysctl. */
5549static int
5550igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5551{
5552	struct adapter *adapter;
5553	u_int val;
5554
5555	adapter = oidp->oid_arg1;
5556	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5557	return (sysctl_handle_int(oidp, &val, 0, req));
5558}
5559
5560/*
5561**  Tuneable interrupt rate handler
5562*/
5563static int
5564igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5565{
5566	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5567	int			error;
5568	u32			reg, usec, rate;
5569
5570	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5571	usec = ((reg & 0x7FFC) >> 2);
5572	if (usec > 0)
5573		rate = 1000000 / usec;
5574	else
5575		rate = 0;
5576	error = sysctl_handle_int(oidp, &rate, 0, req);
5577	if (error || !req->newptr)
5578		return error;
5579	return 0;
5580}
5581
5582/*
5583 * Add sysctl variables, one per statistic, to the system.
5584 */
5585static void
5586igb_add_hw_stats(struct adapter *adapter)
5587{
5588	device_t dev = adapter->dev;
5589
5590	struct tx_ring *txr = adapter->tx_rings;
5591	struct rx_ring *rxr = adapter->rx_rings;
5592
5593	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5594	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5595	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5596	struct e1000_hw_stats *stats = adapter->stats;
5597
5598	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5599	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5600
5601#define QUEUE_NAME_LEN 32
5602	char namebuf[QUEUE_NAME_LEN];
5603
5604	/* Driver Statistics */
5605	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5606			CTLFLAG_RD, &adapter->link_irq,
5607			"Link MSIX IRQ Handled");
5608	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5609			CTLFLAG_RD, &adapter->dropped_pkts,
5610			"Driver dropped packets");
5611	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5612			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5613			"Driver tx dma failure in xmit");
5614	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5615			CTLFLAG_RD, &adapter->rx_overruns,
5616			"RX overruns");
5617	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5618			CTLFLAG_RD, &adapter->watchdog_events,
5619			"Watchdog timeouts");
5620
5621	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5622			CTLFLAG_RD, &adapter->device_control,
5623			"Device Control Register");
5624	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5625			CTLFLAG_RD, &adapter->rx_control,
5626			"Receiver Control Register");
5627	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5628			CTLFLAG_RD, &adapter->int_mask,
5629			"Interrupt Mask");
5630	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5631			CTLFLAG_RD, &adapter->eint_mask,
5632			"Extended Interrupt Mask");
5633	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5634			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5635			"Transmit Buffer Packet Allocation");
5636	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5637			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5638			"Receive Buffer Packet Allocation");
5639	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5640			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5641			"Flow Control High Watermark");
5642	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5643			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5644			"Flow Control Low Watermark");
5645
5646	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5647		struct lro_ctrl *lro = &rxr->lro;
5648
5649		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5650		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5651					    CTLFLAG_RD, NULL, "Queue Name");
5652		queue_list = SYSCTL_CHILDREN(queue_node);
5653
5654		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5655				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5656				sizeof(&adapter->queues[i]),
5657				igb_sysctl_interrupt_rate_handler,
5658				"IU", "Interrupt Rate");
5659
5660		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5661				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5662				igb_sysctl_reg_handler, "IU",
5663 				"Transmit Descriptor Head");
5664		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5665				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5666				igb_sysctl_reg_handler, "IU",
5667 				"Transmit Descriptor Tail");
5668		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5669				CTLFLAG_RD, &txr->no_desc_avail,
5670				"Queue No Descriptor Available");
5671		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5672				CTLFLAG_RD, &txr->total_packets,
5673				"Queue Packets Transmitted");
5674
5675		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5676				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5677				igb_sysctl_reg_handler, "IU",
5678				"Receive Descriptor Head");
5679		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5680				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5681				igb_sysctl_reg_handler, "IU",
5682				"Receive Descriptor Tail");
5683		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5684				CTLFLAG_RD, &rxr->rx_packets,
5685				"Queue Packets Received");
5686		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5687				CTLFLAG_RD, &rxr->rx_bytes,
5688				"Queue Bytes Received");
5689		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5690				CTLFLAG_RD, &lro->lro_queued, 0,
5691				"LRO Queued");
5692		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5693				CTLFLAG_RD, &lro->lro_flushed, 0,
5694				"LRO Flushed");
5695	}
5696
5697	/* MAC stats get their own sub node */
5698
5699	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5700				    CTLFLAG_RD, NULL, "MAC Statistics");
5701	stat_list = SYSCTL_CHILDREN(stat_node);
5702
5703	/*
5704	** VF adapter has a very limited set of stats
5705	** since its not managing the metal, so to speak.
5706	*/
5707	if (adapter->vf_ifp) {
5708	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5709			CTLFLAG_RD, &stats->gprc,
5710			"Good Packets Received");
5711	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5712			CTLFLAG_RD, &stats->gptc,
5713			"Good Packets Transmitted");
5714 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5715 			CTLFLAG_RD, &stats->gorc,
5716 			"Good Octets Received");
5717 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5718 			CTLFLAG_RD, &stats->gotc,
5719 			"Good Octets Transmitted");
5720	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5721			CTLFLAG_RD, &stats->mprc,
5722			"Multicast Packets Received");
5723		return;
5724	}
5725
5726	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5727			CTLFLAG_RD, &stats->ecol,
5728			"Excessive collisions");
5729	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5730			CTLFLAG_RD, &stats->scc,
5731			"Single collisions");
5732	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5733			CTLFLAG_RD, &stats->mcc,
5734			"Multiple collisions");
5735	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5736			CTLFLAG_RD, &stats->latecol,
5737			"Late collisions");
5738	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5739			CTLFLAG_RD, &stats->colc,
5740			"Collision Count");
5741	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5742			CTLFLAG_RD, &stats->symerrs,
5743			"Symbol Errors");
5744	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5745			CTLFLAG_RD, &stats->sec,
5746			"Sequence Errors");
5747	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5748			CTLFLAG_RD, &stats->dc,
5749			"Defer Count");
5750	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5751			CTLFLAG_RD, &stats->mpc,
5752			"Missed Packets");
5753	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5754			CTLFLAG_RD, &stats->rnbc,
5755			"Receive No Buffers");
5756	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5757			CTLFLAG_RD, &stats->ruc,
5758			"Receive Undersize");
5759	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5760			CTLFLAG_RD, &stats->rfc,
5761			"Fragmented Packets Received ");
5762	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5763			CTLFLAG_RD, &stats->roc,
5764			"Oversized Packets Received");
5765	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5766			CTLFLAG_RD, &stats->rjc,
5767			"Recevied Jabber");
5768	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5769			CTLFLAG_RD, &stats->rxerrc,
5770			"Receive Errors");
5771	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5772			CTLFLAG_RD, &stats->crcerrs,
5773			"CRC errors");
5774	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5775			CTLFLAG_RD, &stats->algnerrc,
5776			"Alignment Errors");
5777	/* On 82575 these are collision counts */
5778	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5779			CTLFLAG_RD, &stats->cexterr,
5780			"Collision/Carrier extension errors");
5781	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5782			CTLFLAG_RD, &stats->xonrxc,
5783			"XON Received");
5784	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5785			CTLFLAG_RD, &stats->xontxc,
5786			"XON Transmitted");
5787	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5788			CTLFLAG_RD, &stats->xoffrxc,
5789			"XOFF Received");
5790	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5791			CTLFLAG_RD, &stats->xofftxc,
5792			"XOFF Transmitted");
5793	/* Packet Reception Stats */
5794	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5795			CTLFLAG_RD, &stats->tpr,
5796			"Total Packets Received ");
5797	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5798			CTLFLAG_RD, &stats->gprc,
5799			"Good Packets Received");
5800	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5801			CTLFLAG_RD, &stats->bprc,
5802			"Broadcast Packets Received");
5803	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5804			CTLFLAG_RD, &stats->mprc,
5805			"Multicast Packets Received");
5806	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5807			CTLFLAG_RD, &stats->prc64,
5808			"64 byte frames received ");
5809	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5810			CTLFLAG_RD, &stats->prc127,
5811			"65-127 byte frames received");
5812	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5813			CTLFLAG_RD, &stats->prc255,
5814			"128-255 byte frames received");
5815	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5816			CTLFLAG_RD, &stats->prc511,
5817			"256-511 byte frames received");
5818	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5819			CTLFLAG_RD, &stats->prc1023,
5820			"512-1023 byte frames received");
5821	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5822			CTLFLAG_RD, &stats->prc1522,
5823			"1023-1522 byte frames received");
5824 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5825 			CTLFLAG_RD, &stats->gorc,
5826 			"Good Octets Received");
5827
5828	/* Packet Transmission Stats */
5829 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5830 			CTLFLAG_RD, &stats->gotc,
5831 			"Good Octets Transmitted");
5832	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5833			CTLFLAG_RD, &stats->tpt,
5834			"Total Packets Transmitted");
5835	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5836			CTLFLAG_RD, &stats->gptc,
5837			"Good Packets Transmitted");
5838	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5839			CTLFLAG_RD, &stats->bptc,
5840			"Broadcast Packets Transmitted");
5841	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5842			CTLFLAG_RD, &stats->mptc,
5843			"Multicast Packets Transmitted");
5844	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5845			CTLFLAG_RD, &stats->ptc64,
5846			"64 byte frames transmitted ");
5847	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5848			CTLFLAG_RD, &stats->ptc127,
5849			"65-127 byte frames transmitted");
5850	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5851			CTLFLAG_RD, &stats->ptc255,
5852			"128-255 byte frames transmitted");
5853	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5854			CTLFLAG_RD, &stats->ptc511,
5855			"256-511 byte frames transmitted");
5856	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5857			CTLFLAG_RD, &stats->ptc1023,
5858			"512-1023 byte frames transmitted");
5859	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5860			CTLFLAG_RD, &stats->ptc1522,
5861			"1024-1522 byte frames transmitted");
5862	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5863			CTLFLAG_RD, &stats->tsctc,
5864			"TSO Contexts Transmitted");
5865	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5866			CTLFLAG_RD, &stats->tsctfc,
5867			"TSO Contexts Failed");
5868
5869
5870	/* Interrupt Stats */
5871
5872	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5873				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5874	int_list = SYSCTL_CHILDREN(int_node);
5875
5876	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5877			CTLFLAG_RD, &stats->iac,
5878			"Interrupt Assertion Count");
5879
5880	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5881			CTLFLAG_RD, &stats->icrxptc,
5882			"Interrupt Cause Rx Pkt Timer Expire Count");
5883
5884	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5885			CTLFLAG_RD, &stats->icrxatc,
5886			"Interrupt Cause Rx Abs Timer Expire Count");
5887
5888	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5889			CTLFLAG_RD, &stats->ictxptc,
5890			"Interrupt Cause Tx Pkt Timer Expire Count");
5891
5892	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5893			CTLFLAG_RD, &stats->ictxatc,
5894			"Interrupt Cause Tx Abs Timer Expire Count");
5895
5896	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5897			CTLFLAG_RD, &stats->ictxqec,
5898			"Interrupt Cause Tx Queue Empty Count");
5899
5900	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5901			CTLFLAG_RD, &stats->ictxqmtc,
5902			"Interrupt Cause Tx Queue Min Thresh Count");
5903
5904	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5905			CTLFLAG_RD, &stats->icrxdmtc,
5906			"Interrupt Cause Rx Desc Min Thresh Count");
5907
5908	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5909			CTLFLAG_RD, &stats->icrxoc,
5910			"Interrupt Cause Receiver Overrun Count");
5911
5912	/* Host to Card Stats */
5913
5914	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5915				    CTLFLAG_RD, NULL,
5916				    "Host to Card Statistics");
5917
5918	host_list = SYSCTL_CHILDREN(host_node);
5919
5920	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5921			CTLFLAG_RD, &stats->cbtmpc,
5922			"Circuit Breaker Tx Packet Count");
5923
5924	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5925			CTLFLAG_RD, &stats->htdpmc,
5926			"Host Transmit Discarded Packets");
5927
5928	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5929			CTLFLAG_RD, &stats->rpthc,
5930			"Rx Packets To Host");
5931
5932	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5933			CTLFLAG_RD, &stats->cbrmpc,
5934			"Circuit Breaker Rx Packet Count");
5935
5936	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5937			CTLFLAG_RD, &stats->cbrdpc,
5938			"Circuit Breaker Rx Dropped Count");
5939
5940	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5941			CTLFLAG_RD, &stats->hgptc,
5942			"Host Good Packets Tx Count");
5943
5944	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5945			CTLFLAG_RD, &stats->htcbdpc,
5946			"Host Tx Circuit Breaker Dropped Count");
5947
5948	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5949			CTLFLAG_RD, &stats->hgorc,
5950			"Host Good Octets Received Count");
5951
5952	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5953			CTLFLAG_RD, &stats->hgotc,
5954			"Host Good Octets Transmit Count");
5955
5956	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5957			CTLFLAG_RD, &stats->lenerrs,
5958			"Length Errors");
5959
5960	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5961			CTLFLAG_RD, &stats->scvpc,
5962			"SerDes/SGMII Code Violation Pkt Count");
5963
5964	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5965			CTLFLAG_RD, &stats->hrmpc,
5966			"Header Redirection Missed Packet Count");
5967}
5968
5969
5970/**********************************************************************
5971 *
5972 *  This routine provides a way to dump out the adapter eeprom,
5973 *  often a useful debug/service tool. This only dumps the first
5974 *  32 words, stuff that matters is in that extent.
5975 *
5976 **********************************************************************/
5977static int
5978igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5979{
5980	struct adapter *adapter;
5981	int error;
5982	int result;
5983
5984	result = -1;
5985	error = sysctl_handle_int(oidp, &result, 0, req);
5986
5987	if (error || !req->newptr)
5988		return (error);
5989
5990	/*
5991	 * This value will cause a hex dump of the
5992	 * first 32 16-bit words of the EEPROM to
5993	 * the screen.
5994	 */
5995	if (result == 1) {
5996		adapter = (struct adapter *)arg1;
5997		igb_print_nvm_info(adapter);
5998        }
5999
6000	return (error);
6001}
6002
6003static void
6004igb_print_nvm_info(struct adapter *adapter)
6005{
6006	u16	eeprom_data;
6007	int	i, j, row = 0;
6008
6009	/* Its a bit crude, but it gets the job done */
6010	printf("\nInterface EEPROM Dump:\n");
6011	printf("Offset\n0x0000  ");
6012	for (i = 0, j = 0; i < 32; i++, j++) {
6013		if (j == 8) { /* Make the offset block */
6014			j = 0; ++row;
6015			printf("\n0x00%x0  ",row);
6016		}
6017		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6018		printf("%04x ", eeprom_data);
6019	}
6020	printf("\n");
6021}
6022
6023static void
6024igb_set_sysctl_value(struct adapter *adapter, const char *name,
6025	const char *description, int *limit, int value)
6026{
6027	*limit = value;
6028	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6029	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6030	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6031}
6032
6033/*
6034** Set flow control using sysctl:
6035** Flow control values:
6036** 	0 - off
6037**	1 - rx pause
6038**	2 - tx pause
6039**	3 - full
6040*/
6041static int
6042igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6043{
6044	int		error;
6045	static int	input = 3; /* default is full */
6046	struct adapter	*adapter = (struct adapter *) arg1;
6047
6048	error = sysctl_handle_int(oidp, &input, 0, req);
6049
6050	if ((error) || (req->newptr == NULL))
6051		return (error);
6052
6053	switch (input) {
6054		case e1000_fc_rx_pause:
6055		case e1000_fc_tx_pause:
6056		case e1000_fc_full:
6057		case e1000_fc_none:
6058			adapter->hw.fc.requested_mode = input;
6059			adapter->fc = input;
6060			break;
6061		default:
6062			/* Do nothing */
6063			return (error);
6064	}
6065
6066	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6067	e1000_force_mac_fc(&adapter->hw);
6068	/* XXX TODO: update DROP_EN on each RX queue if appropriate */
6069	return (error);
6070}
6071
6072/*
6073** Manage DMA Coalesce:
6074** Control values:
6075** 	0/1 - off/on
6076**	Legal timer values are:
6077**	250,500,1000-10000 in thousands
6078*/
6079static int
6080igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6081{
6082	struct adapter *adapter = (struct adapter *) arg1;
6083	int		error;
6084
6085	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6086
6087	if ((error) || (req->newptr == NULL))
6088		return (error);
6089
6090	switch (adapter->dmac) {
6091		case 0:
6092			/*Disabling */
6093			break;
6094		case 1: /* Just enable and use default */
6095			adapter->dmac = 1000;
6096			break;
6097		case 250:
6098		case 500:
6099		case 1000:
6100		case 2000:
6101		case 3000:
6102		case 4000:
6103		case 5000:
6104		case 6000:
6105		case 7000:
6106		case 8000:
6107		case 9000:
6108		case 10000:
6109			/* Legal values - allow */
6110			break;
6111		default:
6112			/* Do nothing, illegal value */
6113			adapter->dmac = 0;
6114			return (EINVAL);
6115	}
6116	/* Reinit the interface */
6117	igb_init(adapter);
6118	return (error);
6119}
6120
6121/*
6122** Manage Energy Efficient Ethernet:
6123** Control values:
6124**     0/1 - enabled/disabled
6125*/
6126static int
6127igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6128{
6129	struct adapter	*adapter = (struct adapter *) arg1;
6130	int		error, value;
6131
6132	value = adapter->hw.dev_spec._82575.eee_disable;
6133	error = sysctl_handle_int(oidp, &value, 0, req);
6134	if (error || req->newptr == NULL)
6135		return (error);
6136	IGB_CORE_LOCK(adapter);
6137	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6138	igb_init_locked(adapter);
6139	IGB_CORE_UNLOCK(adapter);
6140	return (0);
6141}
6142