if_em.c revision 331722
1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/e1000/if_em.c 331722 2018-03-29 02:50:57Z eadler $*/
34
35#include "opt_em.h"
36#include "opt_ddb.h"
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifdef DDB
47#include <sys/types.h>
48#include <ddb/ddb.h>
49#endif
50#if __FreeBSD_version >= 800000
51#include <sys/buf_ring.h>
52#endif
53#include <sys/bus.h>
54#include <sys/endian.h>
55#include <sys/kernel.h>
56#include <sys/kthread.h>
57#include <sys/malloc.h>
58#include <sys/mbuf.h>
59#include <sys/module.h>
60#include <sys/rman.h>
61#include <sys/smp.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/sysctl.h>
65#include <sys/taskqueue.h>
66#include <sys/eventhandler.h>
67#include <machine/bus.h>
68#include <machine/resource.h>
69
70#include <net/bpf.h>
71#include <net/ethernet.h>
72#include <net/if.h>
73#include <net/if_var.h>
74#include <net/if_arp.h>
75#include <net/if_dl.h>
76#include <net/if_media.h>
77
78#include <net/if_types.h>
79#include <net/if_vlan_var.h>
80
81#include <netinet/in_systm.h>
82#include <netinet/in.h>
83#include <netinet/if_ether.h>
84#include <netinet/ip.h>
85#include <netinet/ip6.h>
86#include <netinet/tcp.h>
87#include <netinet/udp.h>
88
89#include <machine/in_cksum.h>
90#include <dev/led/led.h>
91#include <dev/pci/pcivar.h>
92#include <dev/pci/pcireg.h>
93
94#include "e1000_api.h"
95#include "e1000_82571.h"
96#include "if_em.h"
97
98/*********************************************************************
99 *  Driver version:
100 *********************************************************************/
101char em_driver_version[] = "7.6.1-k";
102
103/*********************************************************************
104 *  PCI Device ID Table
105 *
106 *  Used by probe to select devices to load on
107 *  Last field stores an index into e1000_strings
108 *  Last entry must be all 0s
109 *
110 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
111 *********************************************************************/
112
113static em_vendor_info_t em_vendor_info_array[] =
114{
115	/* Intel(R) PRO/1000 Network Connection */
116	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
124						PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
126						PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
135
136	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
145						PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
179	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
181	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
182						PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
184						PCI_ANY_ID, PCI_ANY_ID, 0},
185	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
186	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
187	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
188	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
189	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
190	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
191	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
192                                                PCI_ANY_ID, PCI_ANY_ID, 0},
193	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
194	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
195						PCI_ANY_ID, PCI_ANY_ID, 0},
196	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
197						PCI_ANY_ID, PCI_ANY_ID, 0},
198	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
199	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
200						PCI_ANY_ID, PCI_ANY_ID, 0},
201	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
202	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
203						PCI_ANY_ID, PCI_ANY_ID, 0},
204	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
205	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
206						PCI_ANY_ID, PCI_ANY_ID, 0},
207	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
208	/* required last entry */
209	{ 0, 0, 0, 0, 0}
210};
211
212/*********************************************************************
213 *  Table of branding strings for all supported NICs.
214 *********************************************************************/
215
216static char *em_strings[] = {
217	"Intel(R) PRO/1000 Network Connection"
218};
219
220/*********************************************************************
221 *  Function prototypes
222 *********************************************************************/
223static int	em_probe(device_t);
224static int	em_attach(device_t);
225static int	em_detach(device_t);
226static int	em_shutdown(device_t);
227static int	em_suspend(device_t);
228static int	em_resume(device_t);
229#ifdef EM_MULTIQUEUE
230static int	em_mq_start(if_t, struct mbuf *);
231static int	em_mq_start_locked(if_t,
232		    struct tx_ring *);
233static void	em_qflush(if_t);
234#else
235static void	em_start(if_t);
236static void	em_start_locked(if_t, struct tx_ring *);
237#endif
238static int	em_ioctl(if_t, u_long, caddr_t);
239static uint64_t	em_get_counter(if_t, ift_counter);
240static void	em_init(void *);
241static void	em_init_locked(struct adapter *);
242static void	em_stop(void *);
243static void	em_media_status(if_t, struct ifmediareq *);
244static int	em_media_change(if_t);
245static void	em_identify_hardware(struct adapter *);
246static int	em_allocate_pci_resources(struct adapter *);
247static int	em_allocate_legacy(struct adapter *);
248static int	em_allocate_msix(struct adapter *);
249static int	em_allocate_queues(struct adapter *);
250static int	em_setup_msix(struct adapter *);
251static void	em_free_pci_resources(struct adapter *);
252static void	em_local_timer(void *);
253static void	em_reset(struct adapter *);
254static int	em_setup_interface(device_t, struct adapter *);
255static void	em_flush_desc_rings(struct adapter *);
256
257static void	em_setup_transmit_structures(struct adapter *);
258static void	em_initialize_transmit_unit(struct adapter *);
259static int	em_allocate_transmit_buffers(struct tx_ring *);
260static void	em_free_transmit_structures(struct adapter *);
261static void	em_free_transmit_buffers(struct tx_ring *);
262
263static int	em_setup_receive_structures(struct adapter *);
264static int	em_allocate_receive_buffers(struct rx_ring *);
265static void	em_initialize_receive_unit(struct adapter *);
266static void	em_free_receive_structures(struct adapter *);
267static void	em_free_receive_buffers(struct rx_ring *);
268
269static void	em_enable_intr(struct adapter *);
270static void	em_disable_intr(struct adapter *);
271static void	em_update_stats_counters(struct adapter *);
272static void	em_add_hw_stats(struct adapter *adapter);
273static void	em_txeof(struct tx_ring *);
274static bool	em_rxeof(struct rx_ring *, int, int *);
275#ifndef __NO_STRICT_ALIGNMENT
276static int	em_fixup_rx(struct rx_ring *);
277#endif
278static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
279		    const struct em_rxbuffer *rxbuf);
280static void	em_receive_checksum(uint32_t status, struct mbuf *);
281static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
282		    struct ip *, u32 *, u32 *);
283static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
284		    struct tcphdr *, u32 *, u32 *);
285static void	em_set_promisc(struct adapter *);
286static void	em_disable_promisc(struct adapter *);
287static void	em_set_multi(struct adapter *);
288static void	em_update_link_status(struct adapter *);
289static void	em_refresh_mbufs(struct rx_ring *, int);
290static void	em_register_vlan(void *, if_t, u16);
291static void	em_unregister_vlan(void *, if_t, u16);
292static void	em_setup_vlan_hw_support(struct adapter *);
293static int	em_xmit(struct tx_ring *, struct mbuf **);
294static int	em_dma_malloc(struct adapter *, bus_size_t,
295		    struct em_dma_alloc *, int);
296static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
297static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
298static void	em_print_nvm_info(struct adapter *);
299static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
300static void	em_print_debug_info(struct adapter *);
301static int 	em_is_valid_ether_addr(u8 *);
302static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
303static void	em_add_int_delay_sysctl(struct adapter *, const char *,
304		    const char *, struct em_int_delay_info *, int, int);
305/* Management and WOL Support */
306static void	em_init_manageability(struct adapter *);
307static void	em_release_manageability(struct adapter *);
308static void     em_get_hw_control(struct adapter *);
309static void     em_release_hw_control(struct adapter *);
310static void	em_get_wakeup(device_t);
311static void     em_enable_wakeup(device_t);
312static int	em_enable_phy_wakeup(struct adapter *);
313static void	em_led_func(void *, int);
314static void	em_disable_aspm(struct adapter *);
315
316static int	em_irq_fast(void *);
317
318/* MSIX handlers */
319static void	em_msix_tx(void *);
320static void	em_msix_rx(void *);
321static void	em_msix_link(void *);
322static void	em_handle_tx(void *context, int pending);
323static void	em_handle_rx(void *context, int pending);
324static void	em_handle_link(void *context, int pending);
325
326#ifdef EM_MULTIQUEUE
327static void	em_enable_vectors_82574(struct adapter *);
328#endif
329
330static void	em_set_sysctl_value(struct adapter *, const char *,
331		    const char *, int *, int);
332static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
333static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
334
335static __inline void em_rx_discard(struct rx_ring *, int);
336
337#ifdef DEVICE_POLLING
338static poll_handler_t em_poll;
339#endif /* POLLING */
340
341/*********************************************************************
342 *  FreeBSD Device Interface Entry Points
343 *********************************************************************/
344
345static device_method_t em_methods[] = {
346	/* Device interface */
347	DEVMETHOD(device_probe, em_probe),
348	DEVMETHOD(device_attach, em_attach),
349	DEVMETHOD(device_detach, em_detach),
350	DEVMETHOD(device_shutdown, em_shutdown),
351	DEVMETHOD(device_suspend, em_suspend),
352	DEVMETHOD(device_resume, em_resume),
353	DEVMETHOD_END
354};
355
356static driver_t em_driver = {
357	"em", em_methods, sizeof(struct adapter),
358};
359
360devclass_t em_devclass;
361DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
362MODULE_DEPEND(em, pci, 1, 1, 1);
363MODULE_DEPEND(em, ether, 1, 1, 1);
364#ifdef DEV_NETMAP
365MODULE_DEPEND(em, netmap, 1, 1, 1);
366#endif /* DEV_NETMAP */
367
368/*********************************************************************
369 *  Tunable default values.
370 *********************************************************************/
371
372#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
373#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
374#define M_TSO_LEN			66
375
376#define MAX_INTS_PER_SEC	8000
377#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
378
379#define TSO_WORKAROUND	4
380
381static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
382
383static int em_disable_crc_stripping = 0;
384SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
385    &em_disable_crc_stripping, 0, "Disable CRC Stripping");
386
387static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
388static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
389SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
390    0, "Default transmit interrupt delay in usecs");
391SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
392    0, "Default receive interrupt delay in usecs");
393
394static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
395static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
396SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
397    &em_tx_abs_int_delay_dflt, 0,
398    "Default transmit interrupt delay limit in usecs");
399SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
400    &em_rx_abs_int_delay_dflt, 0,
401    "Default receive interrupt delay limit in usecs");
402
403static int em_rxd = EM_DEFAULT_RXD;
404static int em_txd = EM_DEFAULT_TXD;
405SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
406    "Number of receive descriptors per queue");
407SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
408    "Number of transmit descriptors per queue");
409
410static int em_smart_pwr_down = FALSE;
411SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
412    0, "Set to true to leave smart power down enabled on newer adapters");
413
414/* Controls whether promiscuous also shows bad packets */
415static int em_debug_sbp = FALSE;
416SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
417    "Show bad packets in promiscuous mode");
418
419static int em_enable_msix = TRUE;
420SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
421    "Enable MSI-X interrupts");
422
423#ifdef EM_MULTIQUEUE
424static int em_num_queues = 1;
425SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
426    "82574 only: Number of queues to configure, 0 indicates autoconfigure");
427#endif
428
429/*
430** Global variable to store last used CPU when binding queues
431** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
432** queue is bound to a cpu.
433*/
434static int em_last_bind_cpu = -1;
435
436/* How many packets rxeof tries to clean at a time */
437static int em_rx_process_limit = 100;
438SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
439    &em_rx_process_limit, 0,
440    "Maximum number of received packets to process "
441    "at a time, -1 means unlimited");
442
443/* Energy efficient ethernet - default to OFF */
444static int eee_setting = 1;
445SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
446    "Enable Energy Efficient Ethernet");
447
448/* Global used in WOL setup with multiport cards */
449static int global_quad_port_a = 0;
450
451#ifdef DEV_NETMAP	/* see ixgbe.c for details */
452#include <dev/netmap/if_em_netmap.h>
453#endif /* DEV_NETMAP */
454
455/*********************************************************************
456 *  Device identification routine
457 *
458 *  em_probe determines if the driver should be loaded on
459 *  adapter based on PCI vendor/device id of the adapter.
460 *
461 *  return BUS_PROBE_DEFAULT on success, positive on failure
462 *********************************************************************/
463
464static int
465em_probe(device_t dev)
466{
467	char		adapter_name[60];
468	uint16_t	pci_vendor_id = 0;
469	uint16_t	pci_device_id = 0;
470	uint16_t	pci_subvendor_id = 0;
471	uint16_t	pci_subdevice_id = 0;
472	em_vendor_info_t *ent;
473
474	INIT_DEBUGOUT("em_probe: begin");
475
476	pci_vendor_id = pci_get_vendor(dev);
477	if (pci_vendor_id != EM_VENDOR_ID)
478		return (ENXIO);
479
480	pci_device_id = pci_get_device(dev);
481	pci_subvendor_id = pci_get_subvendor(dev);
482	pci_subdevice_id = pci_get_subdevice(dev);
483
484	ent = em_vendor_info_array;
485	while (ent->vendor_id != 0) {
486		if ((pci_vendor_id == ent->vendor_id) &&
487		    (pci_device_id == ent->device_id) &&
488
489		    ((pci_subvendor_id == ent->subvendor_id) ||
490		    (ent->subvendor_id == PCI_ANY_ID)) &&
491
492		    ((pci_subdevice_id == ent->subdevice_id) ||
493		    (ent->subdevice_id == PCI_ANY_ID))) {
494			sprintf(adapter_name, "%s %s",
495				em_strings[ent->index],
496				em_driver_version);
497			device_set_desc_copy(dev, adapter_name);
498			return (BUS_PROBE_DEFAULT);
499		}
500		ent++;
501	}
502
503	return (ENXIO);
504}
505
506/*********************************************************************
507 *  Device initialization routine
508 *
509 *  The attach entry point is called when the driver is being loaded.
510 *  This routine identifies the type of hardware, allocates all resources
511 *  and initializes the hardware.
512 *
513 *  return 0 on success, positive on failure
514 *********************************************************************/
515
516static int
517em_attach(device_t dev)
518{
519	struct adapter	*adapter;
520	struct e1000_hw	*hw;
521	int		error = 0;
522
523	INIT_DEBUGOUT("em_attach: begin");
524
525	if (resource_disabled("em", device_get_unit(dev))) {
526		device_printf(dev, "Disabled by device hint\n");
527		return (ENXIO);
528	}
529
530	adapter = device_get_softc(dev);
531	adapter->dev = adapter->osdep.dev = dev;
532	hw = &adapter->hw;
533	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
534
535	/* SYSCTL stuff */
536	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
537	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
538	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
539	    em_sysctl_nvm_info, "I", "NVM Information");
540
541	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
542	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
543	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
544	    em_sysctl_debug_info, "I", "Debug Information");
545
546	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
547	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
548	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
549	    em_set_flowcntl, "I", "Flow Control");
550
551	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
552
553	/* Determine hardware and mac info */
554	em_identify_hardware(adapter);
555
556	/* Setup PCI resources */
557	if (em_allocate_pci_resources(adapter)) {
558		device_printf(dev, "Allocation of PCI resources failed\n");
559		error = ENXIO;
560		goto err_pci;
561	}
562
563	/*
564	** For ICH8 and family we need to
565	** map the flash memory, and this
566	** must happen after the MAC is
567	** identified
568	*/
569	if ((hw->mac.type == e1000_ich8lan) ||
570	    (hw->mac.type == e1000_ich9lan) ||
571	    (hw->mac.type == e1000_ich10lan) ||
572	    (hw->mac.type == e1000_pchlan) ||
573	    (hw->mac.type == e1000_pch2lan) ||
574	    (hw->mac.type == e1000_pch_lpt)) {
575		int rid = EM_BAR_TYPE_FLASH;
576		adapter->flash = bus_alloc_resource_any(dev,
577		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
578		if (adapter->flash == NULL) {
579			device_printf(dev, "Mapping of Flash failed\n");
580			error = ENXIO;
581			goto err_pci;
582		}
583		/* This is used in the shared code */
584		hw->flash_address = (u8 *)adapter->flash;
585		adapter->osdep.flash_bus_space_tag =
586		    rman_get_bustag(adapter->flash);
587		adapter->osdep.flash_bus_space_handle =
588		    rman_get_bushandle(adapter->flash);
589	}
590	/*
591	** In the new SPT device flash is not  a
592	** separate BAR, rather it is also in BAR0,
593	** so use the same tag and an offset handle for the
594	** FLASH read/write macros in the shared code.
595	*/
596	else if (hw->mac.type == e1000_pch_spt) {
597		adapter->osdep.flash_bus_space_tag =
598		    adapter->osdep.mem_bus_space_tag;
599		adapter->osdep.flash_bus_space_handle =
600		    adapter->osdep.mem_bus_space_handle
601		    + E1000_FLASH_BASE_ADDR;
602	}
603
604	/* Do Shared Code initialization */
605	error = e1000_setup_init_funcs(hw, TRUE);
606	if (error) {
607		device_printf(dev, "Setup of Shared code failed, error %d\n",
608		    error);
609		error = ENXIO;
610		goto err_pci;
611	}
612
613	/*
614	 * Setup MSI/X or MSI if PCI Express
615	 */
616	adapter->msix = em_setup_msix(adapter);
617
618	e1000_get_bus_info(hw);
619
620	/* Set up some sysctls for the tunable interrupt delays */
621	em_add_int_delay_sysctl(adapter, "rx_int_delay",
622	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
623	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
624	em_add_int_delay_sysctl(adapter, "tx_int_delay",
625	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
626	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
627	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
628	    "receive interrupt delay limit in usecs",
629	    &adapter->rx_abs_int_delay,
630	    E1000_REGISTER(hw, E1000_RADV),
631	    em_rx_abs_int_delay_dflt);
632	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
633	    "transmit interrupt delay limit in usecs",
634	    &adapter->tx_abs_int_delay,
635	    E1000_REGISTER(hw, E1000_TADV),
636	    em_tx_abs_int_delay_dflt);
637	em_add_int_delay_sysctl(adapter, "itr",
638	    "interrupt delay limit in usecs/4",
639	    &adapter->tx_itr,
640	    E1000_REGISTER(hw, E1000_ITR),
641	    DEFAULT_ITR);
642
643	/* Sysctl for limiting the amount of work done in the taskqueue */
644	em_set_sysctl_value(adapter, "rx_processing_limit",
645	    "max number of rx packets to process", &adapter->rx_process_limit,
646	    em_rx_process_limit);
647
648	/*
649	 * Validate number of transmit and receive descriptors. It
650	 * must not exceed hardware maximum, and must be multiple
651	 * of E1000_DBA_ALIGN.
652	 */
653	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
654	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
655		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
656		    EM_DEFAULT_TXD, em_txd);
657		adapter->num_tx_desc = EM_DEFAULT_TXD;
658	} else
659		adapter->num_tx_desc = em_txd;
660
661	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
662	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
663		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
664		    EM_DEFAULT_RXD, em_rxd);
665		adapter->num_rx_desc = EM_DEFAULT_RXD;
666	} else
667		adapter->num_rx_desc = em_rxd;
668
669	hw->mac.autoneg = DO_AUTO_NEG;
670	hw->phy.autoneg_wait_to_complete = FALSE;
671	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
672
673	/* Copper options */
674	if (hw->phy.media_type == e1000_media_type_copper) {
675		hw->phy.mdix = AUTO_ALL_MODES;
676		hw->phy.disable_polarity_correction = FALSE;
677		hw->phy.ms_type = EM_MASTER_SLAVE;
678	}
679
680	/*
681	 * Set the frame limits assuming
682	 * standard ethernet sized frames.
683	 */
684	adapter->hw.mac.max_frame_size =
685	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
686
687	/*
688	 * This controls when hardware reports transmit completion
689	 * status.
690	 */
691	hw->mac.report_tx_early = 1;
692
693	/*
694	** Get queue/ring memory
695	*/
696	if (em_allocate_queues(adapter)) {
697		error = ENOMEM;
698		goto err_pci;
699	}
700
701	/* Allocate multicast array memory. */
702	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
703	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
704	if (adapter->mta == NULL) {
705		device_printf(dev, "Can not allocate multicast setup array\n");
706		error = ENOMEM;
707		goto err_late;
708	}
709
710	/* Check SOL/IDER usage */
711	if (e1000_check_reset_block(hw))
712		device_printf(dev, "PHY reset is blocked"
713		    " due to SOL/IDER session.\n");
714
715	/* Sysctl for setting Energy Efficient Ethernet */
716	hw->dev_spec.ich8lan.eee_disable = eee_setting;
717	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
718	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
719	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
720	    adapter, 0, em_sysctl_eee, "I",
721	    "Disable Energy Efficient Ethernet");
722
723	/*
724	** Start from a known state, this is
725	** important in reading the nvm and
726	** mac from that.
727	*/
728	e1000_reset_hw(hw);
729
730
731	/* Make sure we have a good EEPROM before we read from it */
732	if (e1000_validate_nvm_checksum(hw) < 0) {
733		/*
734		** Some PCI-E parts fail the first check due to
735		** the link being in sleep state, call it again,
736		** if it fails a second time its a real issue.
737		*/
738		if (e1000_validate_nvm_checksum(hw) < 0) {
739			device_printf(dev,
740			    "The EEPROM Checksum Is Not Valid\n");
741			error = EIO;
742			goto err_late;
743		}
744	}
745
746	/* Copy the permanent MAC address out of the EEPROM */
747	if (e1000_read_mac_addr(hw) < 0) {
748		device_printf(dev, "EEPROM read error while reading MAC"
749		    " address\n");
750		error = EIO;
751		goto err_late;
752	}
753
754	if (!em_is_valid_ether_addr(hw->mac.addr)) {
755		device_printf(dev, "Invalid MAC address\n");
756		error = EIO;
757		goto err_late;
758	}
759
760	/* Disable ULP support */
761	e1000_disable_ulp_lpt_lp(hw, TRUE);
762
763	/*
764	**  Do interrupt configuration
765	*/
766	if (adapter->msix > 1) /* Do MSIX */
767		error = em_allocate_msix(adapter);
768	else  /* MSI or Legacy */
769		error = em_allocate_legacy(adapter);
770	if (error)
771		goto err_late;
772
773	/*
774	 * Get Wake-on-Lan and Management info for later use
775	 */
776	em_get_wakeup(dev);
777
778	/* Setup OS specific network interface */
779	if (em_setup_interface(dev, adapter) != 0)
780		goto err_late;
781
782	em_reset(adapter);
783
784	/* Initialize statistics */
785	em_update_stats_counters(adapter);
786
787	hw->mac.get_link_status = 1;
788	em_update_link_status(adapter);
789
790	/* Register for VLAN events */
791	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
792	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
793	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
794	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
795
796	em_add_hw_stats(adapter);
797
798	/* Non-AMT based hardware can now take control from firmware */
799	if (adapter->has_manage && !adapter->has_amt)
800		em_get_hw_control(adapter);
801
802	/* Tell the stack that the interface is not active */
803	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
804
805	adapter->led_dev = led_create(em_led_func, adapter,
806	    device_get_nameunit(dev));
807#ifdef DEV_NETMAP
808	em_netmap_attach(adapter);
809#endif /* DEV_NETMAP */
810
811	INIT_DEBUGOUT("em_attach: end");
812
813	return (0);
814
815err_late:
816	em_free_transmit_structures(adapter);
817	em_free_receive_structures(adapter);
818	em_release_hw_control(adapter);
819	if (adapter->ifp != (void *)NULL)
820		if_free(adapter->ifp);
821err_pci:
822	em_free_pci_resources(adapter);
823	free(adapter->mta, M_DEVBUF);
824	EM_CORE_LOCK_DESTROY(adapter);
825
826	return (error);
827}
828
829/*********************************************************************
830 *  Device removal routine
831 *
832 *  The detach entry point is called when the driver is being removed.
833 *  This routine stops the adapter and deallocates all the resources
834 *  that were allocated for driver operation.
835 *
836 *  return 0 on success, positive on failure
837 *********************************************************************/
838
839static int
840em_detach(device_t dev)
841{
842	struct adapter	*adapter = device_get_softc(dev);
843	if_t ifp = adapter->ifp;
844
845	INIT_DEBUGOUT("em_detach: begin");
846
847	/* Make sure VLANS are not using driver */
848	if (if_vlantrunkinuse(ifp)) {
849		device_printf(dev,"Vlan in use, detach first\n");
850		return (EBUSY);
851	}
852
853#ifdef DEVICE_POLLING
854	if (if_getcapenable(ifp) & IFCAP_POLLING)
855		ether_poll_deregister(ifp);
856#endif
857
858	if (adapter->led_dev != NULL)
859		led_destroy(adapter->led_dev);
860
861	EM_CORE_LOCK(adapter);
862	adapter->in_detach = 1;
863	em_stop(adapter);
864	EM_CORE_UNLOCK(adapter);
865	EM_CORE_LOCK_DESTROY(adapter);
866
867	e1000_phy_hw_reset(&adapter->hw);
868
869	em_release_manageability(adapter);
870	em_release_hw_control(adapter);
871
872	/* Unregister VLAN events */
873	if (adapter->vlan_attach != NULL)
874		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
875	if (adapter->vlan_detach != NULL)
876		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
877
878	ether_ifdetach(adapter->ifp);
879	callout_drain(&adapter->timer);
880
881#ifdef DEV_NETMAP
882	netmap_detach(ifp);
883#endif /* DEV_NETMAP */
884
885	em_free_pci_resources(adapter);
886	bus_generic_detach(dev);
887	if_free(ifp);
888
889	em_free_transmit_structures(adapter);
890	em_free_receive_structures(adapter);
891
892	em_release_hw_control(adapter);
893	free(adapter->mta, M_DEVBUF);
894
895	return (0);
896}
897
898/*********************************************************************
899 *
900 *  Shutdown entry point
901 *
902 **********************************************************************/
903
904static int
905em_shutdown(device_t dev)
906{
907	return em_suspend(dev);
908}
909
910/*
911 * Suspend/resume device methods.
912 */
913static int
914em_suspend(device_t dev)
915{
916	struct adapter *adapter = device_get_softc(dev);
917
918	EM_CORE_LOCK(adapter);
919
920        em_release_manageability(adapter);
921	em_release_hw_control(adapter);
922	em_enable_wakeup(dev);
923
924	EM_CORE_UNLOCK(adapter);
925
926	return bus_generic_suspend(dev);
927}
928
929static int
930em_resume(device_t dev)
931{
932	struct adapter *adapter = device_get_softc(dev);
933	struct tx_ring	*txr = adapter->tx_rings;
934	if_t ifp = adapter->ifp;
935
936	EM_CORE_LOCK(adapter);
937	if (adapter->hw.mac.type == e1000_pch2lan)
938		e1000_resume_workarounds_pchlan(&adapter->hw);
939	em_init_locked(adapter);
940	em_init_manageability(adapter);
941
942	if ((if_getflags(ifp) & IFF_UP) &&
943	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
944		for (int i = 0; i < adapter->num_queues; i++, txr++) {
945			EM_TX_LOCK(txr);
946#ifdef EM_MULTIQUEUE
947			if (!drbr_empty(ifp, txr->br))
948				em_mq_start_locked(ifp, txr);
949#else
950			if (!if_sendq_empty(ifp))
951				em_start_locked(ifp, txr);
952#endif
953			EM_TX_UNLOCK(txr);
954		}
955	}
956	EM_CORE_UNLOCK(adapter);
957
958	return bus_generic_resume(dev);
959}
960
961
962#ifndef EM_MULTIQUEUE
963static void
964em_start_locked(if_t ifp, struct tx_ring *txr)
965{
966	struct adapter	*adapter = if_getsoftc(ifp);
967	struct mbuf	*m_head;
968
969	EM_TX_LOCK_ASSERT(txr);
970
971	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
972	    IFF_DRV_RUNNING)
973		return;
974
975	if (!adapter->link_active)
976		return;
977
978	while (!if_sendq_empty(ifp)) {
979        	/* Call cleanup if number of TX descriptors low */
980		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
981			em_txeof(txr);
982		if (txr->tx_avail < EM_MAX_SCATTER) {
983			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
984			break;
985		}
986		m_head = if_dequeue(ifp);
987		if (m_head == NULL)
988			break;
989		/*
990		 *  Encapsulation can modify our pointer, and or make it
991		 *  NULL on failure.  In that event, we can't requeue.
992		 */
993		if (em_xmit(txr, &m_head)) {
994			if (m_head == NULL)
995				break;
996			if_sendq_prepend(ifp, m_head);
997			break;
998		}
999
1000		/* Mark the queue as having work */
1001		if (txr->busy == EM_TX_IDLE)
1002			txr->busy = EM_TX_BUSY;
1003
1004		/* Send a copy of the frame to the BPF listener */
1005		ETHER_BPF_MTAP(ifp, m_head);
1006
1007	}
1008
1009	return;
1010}
1011
1012static void
1013em_start(if_t ifp)
1014{
1015	struct adapter	*adapter = if_getsoftc(ifp);
1016	struct tx_ring	*txr = adapter->tx_rings;
1017
1018	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1019		EM_TX_LOCK(txr);
1020		em_start_locked(ifp, txr);
1021		EM_TX_UNLOCK(txr);
1022	}
1023	return;
1024}
1025#else /* EM_MULTIQUEUE */
1026/*********************************************************************
1027 *  Multiqueue Transmit routines
1028 *
1029 *  em_mq_start is called by the stack to initiate a transmit.
1030 *  however, if busy the driver can queue the request rather
1031 *  than do an immediate send. It is this that is an advantage
1032 *  in this driver, rather than also having multiple tx queues.
1033 **********************************************************************/
1034/*
1035** Multiqueue capable stack interface
1036*/
1037static int
1038em_mq_start(if_t ifp, struct mbuf *m)
1039{
1040	struct adapter	*adapter = if_getsoftc(ifp);
1041	struct tx_ring	*txr = adapter->tx_rings;
1042	unsigned int	i, error;
1043
1044	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1045		i = m->m_pkthdr.flowid % adapter->num_queues;
1046	else
1047		i = curcpu % adapter->num_queues;
1048
1049	txr = &adapter->tx_rings[i];
1050
1051	error = drbr_enqueue(ifp, txr->br, m);
1052	if (error)
1053		return (error);
1054
1055	if (EM_TX_TRYLOCK(txr)) {
1056		em_mq_start_locked(ifp, txr);
1057		EM_TX_UNLOCK(txr);
1058	} else
1059		taskqueue_enqueue(txr->tq, &txr->tx_task);
1060
1061	return (0);
1062}
1063
1064static int
1065em_mq_start_locked(if_t ifp, struct tx_ring *txr)
1066{
1067	struct adapter  *adapter = txr->adapter;
1068        struct mbuf     *next;
1069        int             err = 0, enq = 0;
1070
1071	EM_TX_LOCK_ASSERT(txr);
1072
1073	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
1074	    adapter->link_active == 0) {
1075		return (ENETDOWN);
1076	}
1077
1078	/* Process the queue */
1079	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1080		if ((err = em_xmit(txr, &next)) != 0) {
1081			if (next == NULL) {
1082				/* It was freed, move forward */
1083				drbr_advance(ifp, txr->br);
1084			} else {
1085				/*
1086				 * Still have one left, it may not be
1087				 * the same since the transmit function
1088				 * may have changed it.
1089				 */
1090				drbr_putback(ifp, txr->br, next);
1091			}
1092			break;
1093		}
1094		drbr_advance(ifp, txr->br);
1095		enq++;
1096		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
1097		if (next->m_flags & M_MCAST)
1098			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1099		ETHER_BPF_MTAP(ifp, next);
1100		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
1101                        break;
1102	}
1103
1104	/* Mark the queue as having work */
1105	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1106		txr->busy = EM_TX_BUSY;
1107
1108	if (txr->tx_avail < EM_MAX_SCATTER)
1109		em_txeof(txr);
1110	if (txr->tx_avail < EM_MAX_SCATTER) {
1111		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
1112	}
1113	return (err);
1114}
1115
1116/*
1117** Flush all ring buffers
1118*/
1119static void
1120em_qflush(if_t ifp)
1121{
1122	struct adapter  *adapter = if_getsoftc(ifp);
1123	struct tx_ring  *txr = adapter->tx_rings;
1124	struct mbuf     *m;
1125
1126	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1127		EM_TX_LOCK(txr);
1128		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1129			m_freem(m);
1130		EM_TX_UNLOCK(txr);
1131	}
1132	if_qflush(ifp);
1133}
1134#endif /* EM_MULTIQUEUE */
1135
1136/*********************************************************************
1137 *  Ioctl entry point
1138 *
1139 *  em_ioctl is called when the user wants to configure the
1140 *  interface.
1141 *
1142 *  return 0 on success, positive on failure
1143 **********************************************************************/
1144
1145static int
1146em_ioctl(if_t ifp, u_long command, caddr_t data)
1147{
1148	struct adapter	*adapter = if_getsoftc(ifp);
1149	struct ifreq	*ifr = (struct ifreq *)data;
1150#if defined(INET) || defined(INET6)
1151	struct ifaddr	*ifa = (struct ifaddr *)data;
1152#endif
1153	bool		avoid_reset = FALSE;
1154	int		error = 0;
1155
1156	if (adapter->in_detach)
1157		return (error);
1158
1159	switch (command) {
1160	case SIOCSIFADDR:
1161#ifdef INET
1162		if (ifa->ifa_addr->sa_family == AF_INET)
1163			avoid_reset = TRUE;
1164#endif
1165#ifdef INET6
1166		if (ifa->ifa_addr->sa_family == AF_INET6)
1167			avoid_reset = TRUE;
1168#endif
1169		/*
1170		** Calling init results in link renegotiation,
1171		** so we avoid doing it when possible.
1172		*/
1173		if (avoid_reset) {
1174			if_setflagbits(ifp,IFF_UP,0);
1175			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
1176				em_init(adapter);
1177#ifdef INET
1178			if (!(if_getflags(ifp) & IFF_NOARP))
1179				arp_ifinit(ifp, ifa);
1180#endif
1181		} else
1182			error = ether_ioctl(ifp, command, data);
1183		break;
1184	case SIOCSIFMTU:
1185	    {
1186		int max_frame_size;
1187
1188		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1189
1190		EM_CORE_LOCK(adapter);
1191		switch (adapter->hw.mac.type) {
1192		case e1000_82571:
1193		case e1000_82572:
1194		case e1000_ich9lan:
1195		case e1000_ich10lan:
1196		case e1000_pch2lan:
1197		case e1000_pch_lpt:
1198		case e1000_pch_spt:
1199		case e1000_82574:
1200		case e1000_82583:
1201		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1202			max_frame_size = 9234;
1203			break;
1204		case e1000_pchlan:
1205			max_frame_size = 4096;
1206			break;
1207			/* Adapters that do not support jumbo frames */
1208		case e1000_ich8lan:
1209			max_frame_size = ETHER_MAX_LEN;
1210			break;
1211		default:
1212			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1213		}
1214		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1215		    ETHER_CRC_LEN) {
1216			EM_CORE_UNLOCK(adapter);
1217			error = EINVAL;
1218			break;
1219		}
1220
1221		if_setmtu(ifp, ifr->ifr_mtu);
1222		adapter->hw.mac.max_frame_size =
1223		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1224		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1225			em_init_locked(adapter);
1226		EM_CORE_UNLOCK(adapter);
1227		break;
1228	    }
1229	case SIOCSIFFLAGS:
1230		IOCTL_DEBUGOUT("ioctl rcv'd:\
1231		    SIOCSIFFLAGS (Set Interface Flags)");
1232		EM_CORE_LOCK(adapter);
1233		if (if_getflags(ifp) & IFF_UP) {
1234			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1235				if ((if_getflags(ifp) ^ adapter->if_flags) &
1236				    (IFF_PROMISC | IFF_ALLMULTI)) {
1237					em_disable_promisc(adapter);
1238					em_set_promisc(adapter);
1239				}
1240			} else
1241				em_init_locked(adapter);
1242		} else
1243			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1244				em_stop(adapter);
1245		adapter->if_flags = if_getflags(ifp);
1246		EM_CORE_UNLOCK(adapter);
1247		break;
1248	case SIOCADDMULTI:
1249	case SIOCDELMULTI:
1250		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1251		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1252			EM_CORE_LOCK(adapter);
1253			em_disable_intr(adapter);
1254			em_set_multi(adapter);
1255#ifdef DEVICE_POLLING
1256			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1257#endif
1258				em_enable_intr(adapter);
1259			EM_CORE_UNLOCK(adapter);
1260		}
1261		break;
1262	case SIOCSIFMEDIA:
1263		/* Check SOL/IDER usage */
1264		EM_CORE_LOCK(adapter);
1265		if (e1000_check_reset_block(&adapter->hw)) {
1266			EM_CORE_UNLOCK(adapter);
1267			device_printf(adapter->dev, "Media change is"
1268			    " blocked due to SOL/IDER session.\n");
1269			break;
1270		}
1271		EM_CORE_UNLOCK(adapter);
1272		/* falls thru */
1273	case SIOCGIFMEDIA:
1274		IOCTL_DEBUGOUT("ioctl rcv'd: \
1275		    SIOCxIFMEDIA (Get/Set Interface Media)");
1276		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1277		break;
1278	case SIOCSIFCAP:
1279	    {
1280		int mask, reinit;
1281
1282		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1283		reinit = 0;
1284		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1285#ifdef DEVICE_POLLING
1286		if (mask & IFCAP_POLLING) {
1287			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1288				error = ether_poll_register(em_poll, ifp);
1289				if (error)
1290					return (error);
1291				EM_CORE_LOCK(adapter);
1292				em_disable_intr(adapter);
1293				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1294				EM_CORE_UNLOCK(adapter);
1295			} else {
1296				error = ether_poll_deregister(ifp);
1297				/* Enable interrupt even in error case */
1298				EM_CORE_LOCK(adapter);
1299				em_enable_intr(adapter);
1300				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1301				EM_CORE_UNLOCK(adapter);
1302			}
1303		}
1304#endif
1305		if (mask & IFCAP_HWCSUM) {
1306			if_togglecapenable(ifp,IFCAP_HWCSUM);
1307			reinit = 1;
1308		}
1309		if (mask & IFCAP_TSO4) {
1310			if_togglecapenable(ifp,IFCAP_TSO4);
1311			reinit = 1;
1312		}
1313		if (mask & IFCAP_VLAN_HWTAGGING) {
1314			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
1315			reinit = 1;
1316		}
1317		if (mask & IFCAP_VLAN_HWFILTER) {
1318			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1319			reinit = 1;
1320		}
1321		if (mask & IFCAP_VLAN_HWTSO) {
1322			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1323			reinit = 1;
1324		}
1325		if ((mask & IFCAP_WOL) &&
1326		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1327			if (mask & IFCAP_WOL_MCAST)
1328				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1329			if (mask & IFCAP_WOL_MAGIC)
1330				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1331		}
1332		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1333			em_init(adapter);
1334		if_vlancap(ifp);
1335		break;
1336	    }
1337
1338	default:
1339		error = ether_ioctl(ifp, command, data);
1340		break;
1341	}
1342
1343	return (error);
1344}
1345
1346
1347/*********************************************************************
1348 *  Init entry point
1349 *
1350 *  This routine is used in two ways. It is used by the stack as
1351 *  init entry point in network interface structure. It is also used
1352 *  by the driver as a hw/sw initialization routine to get to a
1353 *  consistent state.
1354 *
1355 *  return 0 on success, positive on failure
1356 **********************************************************************/
1357
1358static void
1359em_init_locked(struct adapter *adapter)
1360{
1361	if_t ifp = adapter->ifp;
1362	device_t	dev = adapter->dev;
1363
1364	INIT_DEBUGOUT("em_init: begin");
1365
1366	EM_CORE_LOCK_ASSERT(adapter);
1367
1368	em_disable_intr(adapter);
1369	callout_stop(&adapter->timer);
1370
1371	/* Get the latest mac address, User can use a LAA */
1372        bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1373              ETHER_ADDR_LEN);
1374
1375	/* Put the address into the Receive Address Array */
1376	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1377
1378	/*
1379	 * With the 82571 adapter, RAR[0] may be overwritten
1380	 * when the other port is reset, we make a duplicate
1381	 * in RAR[14] for that eventuality, this assures
1382	 * the interface continues to function.
1383	 */
1384	if (adapter->hw.mac.type == e1000_82571) {
1385		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1386		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1387		    E1000_RAR_ENTRIES - 1);
1388	}
1389
1390	/* Initialize the hardware */
1391	em_reset(adapter);
1392	em_update_link_status(adapter);
1393
1394	/* Setup VLAN support, basic and offload if available */
1395	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1396
1397	/* Set hardware offload abilities */
1398	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1399		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1400	else
1401		if_sethwassistbits(ifp, 0, CSUM_TCP | CSUM_UDP);
1402
1403	/* Configure for OS presence */
1404	em_init_manageability(adapter);
1405
1406	/* Prepare transmit descriptors and buffers */
1407	em_setup_transmit_structures(adapter);
1408	em_initialize_transmit_unit(adapter);
1409
1410	/* Setup Multicast table */
1411	em_set_multi(adapter);
1412
1413	/*
1414	** Figure out the desired mbuf
1415	** pool for doing jumbos
1416	*/
1417	if (adapter->hw.mac.max_frame_size <= 2048)
1418		adapter->rx_mbuf_sz = MCLBYTES;
1419	else if (adapter->hw.mac.max_frame_size <= 4096)
1420		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1421	else
1422		adapter->rx_mbuf_sz = MJUM9BYTES;
1423
1424	/* Prepare receive descriptors and buffers */
1425	if (em_setup_receive_structures(adapter)) {
1426		device_printf(dev, "Could not setup receive structures\n");
1427		em_stop(adapter);
1428		return;
1429	}
1430	em_initialize_receive_unit(adapter);
1431
1432	/* Use real VLAN Filter support? */
1433	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1434		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1435			/* Use real VLAN Filter support */
1436			em_setup_vlan_hw_support(adapter);
1437		else {
1438			u32 ctrl;
1439			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1440			ctrl |= E1000_CTRL_VME;
1441			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1442		}
1443	}
1444
1445	/* Don't lose promiscuous settings */
1446	em_set_promisc(adapter);
1447
1448	/* Set the interface as ACTIVE */
1449	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1450
1451	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1452	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1453
1454	/* MSI/X configuration for 82574 */
1455	if (adapter->hw.mac.type == e1000_82574) {
1456		int tmp;
1457		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1458		tmp |= E1000_CTRL_EXT_PBA_CLR;
1459		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1460		/* Set the IVAR - interrupt vector routing. */
1461		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1462	}
1463
1464#ifdef DEVICE_POLLING
1465	/*
1466	 * Only enable interrupts if we are not polling, make sure
1467	 * they are off otherwise.
1468	 */
1469	if (if_getcapenable(ifp) & IFCAP_POLLING)
1470		em_disable_intr(adapter);
1471	else
1472#endif /* DEVICE_POLLING */
1473		em_enable_intr(adapter);
1474
1475	/* AMT based hardware can now take control from firmware */
1476	if (adapter->has_manage && adapter->has_amt)
1477		em_get_hw_control(adapter);
1478}
1479
1480static void
1481em_init(void *arg)
1482{
1483	struct adapter *adapter = arg;
1484
1485	EM_CORE_LOCK(adapter);
1486	em_init_locked(adapter);
1487	EM_CORE_UNLOCK(adapter);
1488}
1489
1490
1491#ifdef DEVICE_POLLING
1492/*********************************************************************
1493 *
1494 *  Legacy polling routine: note this only works with single queue
1495 *
1496 *********************************************************************/
1497static int
1498em_poll(if_t ifp, enum poll_cmd cmd, int count)
1499{
1500	struct adapter *adapter = if_getsoftc(ifp);
1501	struct tx_ring	*txr = adapter->tx_rings;
1502	struct rx_ring	*rxr = adapter->rx_rings;
1503	u32		reg_icr;
1504	int		rx_done;
1505
1506	EM_CORE_LOCK(adapter);
1507	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1508		EM_CORE_UNLOCK(adapter);
1509		return (0);
1510	}
1511
1512	if (cmd == POLL_AND_CHECK_STATUS) {
1513		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1514		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1515			callout_stop(&adapter->timer);
1516			adapter->hw.mac.get_link_status = 1;
1517			em_update_link_status(adapter);
1518			callout_reset(&adapter->timer, hz,
1519			    em_local_timer, adapter);
1520		}
1521	}
1522	EM_CORE_UNLOCK(adapter);
1523
1524	em_rxeof(rxr, count, &rx_done);
1525
1526	EM_TX_LOCK(txr);
1527	em_txeof(txr);
1528#ifdef EM_MULTIQUEUE
1529	if (!drbr_empty(ifp, txr->br))
1530		em_mq_start_locked(ifp, txr);
1531#else
1532	if (!if_sendq_empty(ifp))
1533		em_start_locked(ifp, txr);
1534#endif
1535	EM_TX_UNLOCK(txr);
1536
1537	return (rx_done);
1538}
1539#endif /* DEVICE_POLLING */
1540
1541
1542/*********************************************************************
1543 *
1544 *  Fast Legacy/MSI Combined Interrupt Service routine
1545 *
1546 *********************************************************************/
1547static int
1548em_irq_fast(void *arg)
1549{
1550	struct adapter	*adapter = arg;
1551	if_t ifp;
1552	u32		reg_icr;
1553
1554	ifp = adapter->ifp;
1555
1556	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1557
1558	/* Hot eject?  */
1559	if (reg_icr == 0xffffffff)
1560		return FILTER_STRAY;
1561
1562	/* Definitely not our interrupt.  */
1563	if (reg_icr == 0x0)
1564		return FILTER_STRAY;
1565
1566	/*
1567	 * Starting with the 82571 chip, bit 31 should be used to
1568	 * determine whether the interrupt belongs to us.
1569	 */
1570	if (adapter->hw.mac.type >= e1000_82571 &&
1571	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1572		return FILTER_STRAY;
1573
1574	em_disable_intr(adapter);
1575	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1576
1577	/* Link status change */
1578	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1579		adapter->hw.mac.get_link_status = 1;
1580		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1581	}
1582
1583	if (reg_icr & E1000_ICR_RXO)
1584		adapter->rx_overruns++;
1585	return FILTER_HANDLED;
1586}
1587
1588/* Combined RX/TX handler, used by Legacy and MSI */
1589static void
1590em_handle_que(void *context, int pending)
1591{
1592	struct adapter	*adapter = context;
1593	if_t ifp = adapter->ifp;
1594	struct tx_ring	*txr = adapter->tx_rings;
1595	struct rx_ring	*rxr = adapter->rx_rings;
1596
1597	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1598		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1599
1600		EM_TX_LOCK(txr);
1601		em_txeof(txr);
1602#ifdef EM_MULTIQUEUE
1603		if (!drbr_empty(ifp, txr->br))
1604			em_mq_start_locked(ifp, txr);
1605#else
1606		if (!if_sendq_empty(ifp))
1607			em_start_locked(ifp, txr);
1608#endif
1609		EM_TX_UNLOCK(txr);
1610		if (more) {
1611			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1612			return;
1613		}
1614	}
1615
1616	em_enable_intr(adapter);
1617	return;
1618}
1619
1620
1621/*********************************************************************
1622 *
1623 *  MSIX Interrupt Service Routines
1624 *
1625 **********************************************************************/
1626static void
1627em_msix_tx(void *arg)
1628{
1629	struct tx_ring *txr = arg;
1630	struct adapter *adapter = txr->adapter;
1631	if_t ifp = adapter->ifp;
1632
1633	++txr->tx_irq;
1634	EM_TX_LOCK(txr);
1635	em_txeof(txr);
1636#ifdef EM_MULTIQUEUE
1637	if (!drbr_empty(ifp, txr->br))
1638		em_mq_start_locked(ifp, txr);
1639#else
1640	if (!if_sendq_empty(ifp))
1641		em_start_locked(ifp, txr);
1642#endif
1643
1644	/* Reenable this interrupt */
1645	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1646	EM_TX_UNLOCK(txr);
1647	return;
1648}
1649
1650/*********************************************************************
1651 *
1652 *  MSIX RX Interrupt Service routine
1653 *
1654 **********************************************************************/
1655
1656static void
1657em_msix_rx(void *arg)
1658{
1659	struct rx_ring	*rxr = arg;
1660	struct adapter	*adapter = rxr->adapter;
1661	bool		more;
1662
1663	++rxr->rx_irq;
1664	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
1665		return;
1666	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1667	if (more)
1668		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1669	else {
1670		/* Reenable this interrupt */
1671		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1672	}
1673	return;
1674}
1675
1676/*********************************************************************
1677 *
1678 *  MSIX Link Fast Interrupt Service routine
1679 *
1680 **********************************************************************/
1681static void
1682em_msix_link(void *arg)
1683{
1684	struct adapter	*adapter = arg;
1685	u32		reg_icr;
1686
1687	++adapter->link_irq;
1688	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1689
1690	if (reg_icr & E1000_ICR_RXO)
1691		adapter->rx_overruns++;
1692
1693	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1694		adapter->hw.mac.get_link_status = 1;
1695		em_handle_link(adapter, 0);
1696	} else
1697		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1698		    EM_MSIX_LINK | E1000_IMS_LSC);
1699	/*
1700 	** Because we must read the ICR for this interrupt
1701 	** it may clear other causes using autoclear, for
1702 	** this reason we simply create a soft interrupt
1703 	** for all these vectors.
1704 	*/
1705	if (reg_icr) {
1706		E1000_WRITE_REG(&adapter->hw,
1707			E1000_ICS, adapter->ims);
1708	}
1709	return;
1710}
1711
1712static void
1713em_handle_rx(void *context, int pending)
1714{
1715	struct rx_ring	*rxr = context;
1716	struct adapter	*adapter = rxr->adapter;
1717        bool            more;
1718
1719	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1720	if (more)
1721		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1722	else {
1723		/* Reenable this interrupt */
1724		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1725	}
1726}
1727
1728static void
1729em_handle_tx(void *context, int pending)
1730{
1731	struct tx_ring	*txr = context;
1732	struct adapter	*adapter = txr->adapter;
1733	if_t ifp = adapter->ifp;
1734
1735	EM_TX_LOCK(txr);
1736	em_txeof(txr);
1737#ifdef EM_MULTIQUEUE
1738	if (!drbr_empty(ifp, txr->br))
1739		em_mq_start_locked(ifp, txr);
1740#else
1741	if (!if_sendq_empty(ifp))
1742		em_start_locked(ifp, txr);
1743#endif
1744	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1745	EM_TX_UNLOCK(txr);
1746}
1747
1748static void
1749em_handle_link(void *context, int pending)
1750{
1751	struct adapter	*adapter = context;
1752	struct tx_ring	*txr = adapter->tx_rings;
1753	if_t ifp = adapter->ifp;
1754
1755	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1756		return;
1757
1758	EM_CORE_LOCK(adapter);
1759	callout_stop(&adapter->timer);
1760	em_update_link_status(adapter);
1761	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1762	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1763	    EM_MSIX_LINK | E1000_IMS_LSC);
1764	if (adapter->link_active) {
1765		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1766			EM_TX_LOCK(txr);
1767#ifdef EM_MULTIQUEUE
1768			if (!drbr_empty(ifp, txr->br))
1769				em_mq_start_locked(ifp, txr);
1770#else
1771			if (if_sendq_empty(ifp))
1772				em_start_locked(ifp, txr);
1773#endif
1774			EM_TX_UNLOCK(txr);
1775		}
1776	}
1777	EM_CORE_UNLOCK(adapter);
1778}
1779
1780
1781/*********************************************************************
1782 *
1783 *  Media Ioctl callback
1784 *
1785 *  This routine is called whenever the user queries the status of
1786 *  the interface using ifconfig.
1787 *
1788 **********************************************************************/
1789static void
1790em_media_status(if_t ifp, struct ifmediareq *ifmr)
1791{
1792	struct adapter *adapter = if_getsoftc(ifp);
1793	u_char fiber_type = IFM_1000_SX;
1794
1795	INIT_DEBUGOUT("em_media_status: begin");
1796
1797	EM_CORE_LOCK(adapter);
1798	em_update_link_status(adapter);
1799
1800	ifmr->ifm_status = IFM_AVALID;
1801	ifmr->ifm_active = IFM_ETHER;
1802
1803	if (!adapter->link_active) {
1804		EM_CORE_UNLOCK(adapter);
1805		return;
1806	}
1807
1808	ifmr->ifm_status |= IFM_ACTIVE;
1809
1810	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1811	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1812		ifmr->ifm_active |= fiber_type | IFM_FDX;
1813	} else {
1814		switch (adapter->link_speed) {
1815		case 10:
1816			ifmr->ifm_active |= IFM_10_T;
1817			break;
1818		case 100:
1819			ifmr->ifm_active |= IFM_100_TX;
1820			break;
1821		case 1000:
1822			ifmr->ifm_active |= IFM_1000_T;
1823			break;
1824		}
1825		if (adapter->link_duplex == FULL_DUPLEX)
1826			ifmr->ifm_active |= IFM_FDX;
1827		else
1828			ifmr->ifm_active |= IFM_HDX;
1829	}
1830	EM_CORE_UNLOCK(adapter);
1831}
1832
1833/*********************************************************************
1834 *
1835 *  Media Ioctl callback
1836 *
1837 *  This routine is called when the user changes speed/duplex using
1838 *  media/mediopt option with ifconfig.
1839 *
1840 **********************************************************************/
1841static int
1842em_media_change(if_t ifp)
1843{
1844	struct adapter *adapter = if_getsoftc(ifp);
1845	struct ifmedia  *ifm = &adapter->media;
1846
1847	INIT_DEBUGOUT("em_media_change: begin");
1848
1849	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1850		return (EINVAL);
1851
1852	EM_CORE_LOCK(adapter);
1853	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1854	case IFM_AUTO:
1855		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1856		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1857		break;
1858	case IFM_1000_LX:
1859	case IFM_1000_SX:
1860	case IFM_1000_T:
1861		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1862		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1863		break;
1864	case IFM_100_TX:
1865		adapter->hw.mac.autoneg = FALSE;
1866		adapter->hw.phy.autoneg_advertised = 0;
1867		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1868			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1869		else
1870			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1871		break;
1872	case IFM_10_T:
1873		adapter->hw.mac.autoneg = FALSE;
1874		adapter->hw.phy.autoneg_advertised = 0;
1875		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1876			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1877		else
1878			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1879		break;
1880	default:
1881		device_printf(adapter->dev, "Unsupported media type\n");
1882	}
1883
1884	em_init_locked(adapter);
1885	EM_CORE_UNLOCK(adapter);
1886
1887	return (0);
1888}
1889
1890/*********************************************************************
1891 *
1892 *  This routine maps the mbufs to tx descriptors.
1893 *
1894 *  return 0 on success, positive on failure
1895 **********************************************************************/
1896
1897static int
1898em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1899{
1900	struct adapter		*adapter = txr->adapter;
1901	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1902	bus_dmamap_t		map;
1903	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
1904	struct e1000_tx_desc	*ctxd = NULL;
1905	struct mbuf		*m_head;
1906	struct ether_header	*eh;
1907	struct ip		*ip = NULL;
1908	struct tcphdr		*tp = NULL;
1909	u32			txd_upper = 0, txd_lower = 0;
1910	int			ip_off, poff;
1911	int			nsegs, i, j, first, last = 0;
1912	int			error;
1913	bool			do_tso, tso_desc, remap = TRUE;
1914
1915	m_head = *m_headp;
1916	do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1917	tso_desc = FALSE;
1918	ip_off = poff = 0;
1919
1920	/*
1921	 * Intel recommends entire IP/TCP header length reside in a single
1922	 * buffer. If multiple descriptors are used to describe the IP and
1923	 * TCP header, each descriptor should describe one or more
1924	 * complete headers; descriptors referencing only parts of headers
1925	 * are not supported. If all layer headers are not coalesced into
1926	 * a single buffer, each buffer should not cross a 4KB boundary,
1927	 * or be larger than the maximum read request size.
1928	 * Controller also requires modifing IP/TCP header to make TSO work
1929	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1930	 * IP/TCP header into a single buffer to meet the requirement of
1931	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1932	 * which also has similar restrictions.
1933	 */
1934	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1935		if (do_tso || (m_head->m_next != NULL &&
1936		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1937			if (M_WRITABLE(*m_headp) == 0) {
1938				m_head = m_dup(*m_headp, M_NOWAIT);
1939				m_freem(*m_headp);
1940				if (m_head == NULL) {
1941					*m_headp = NULL;
1942					return (ENOBUFS);
1943				}
1944				*m_headp = m_head;
1945			}
1946		}
1947		/*
1948		 * XXX
1949		 * Assume IPv4, we don't have TSO/checksum offload support
1950		 * for IPv6 yet.
1951		 */
1952		ip_off = sizeof(struct ether_header);
1953		if (m_head->m_len < ip_off) {
1954			m_head = m_pullup(m_head, ip_off);
1955			if (m_head == NULL) {
1956				*m_headp = NULL;
1957				return (ENOBUFS);
1958			}
1959		}
1960		eh = mtod(m_head, struct ether_header *);
1961		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1962			ip_off = sizeof(struct ether_vlan_header);
1963			if (m_head->m_len < ip_off) {
1964				m_head = m_pullup(m_head, ip_off);
1965				if (m_head == NULL) {
1966					*m_headp = NULL;
1967					return (ENOBUFS);
1968				}
1969			}
1970		}
1971		if (m_head->m_len < ip_off + sizeof(struct ip)) {
1972			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1973			if (m_head == NULL) {
1974				*m_headp = NULL;
1975				return (ENOBUFS);
1976			}
1977		}
1978		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1979		poff = ip_off + (ip->ip_hl << 2);
1980
1981		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1982			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1983				m_head = m_pullup(m_head, poff +
1984				    sizeof(struct tcphdr));
1985				if (m_head == NULL) {
1986					*m_headp = NULL;
1987					return (ENOBUFS);
1988				}
1989			}
1990			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1991			/*
1992			 * TSO workaround:
1993			 *   pull 4 more bytes of data into it.
1994			 */
1995			if (m_head->m_len < poff + (tp->th_off << 2)) {
1996				m_head = m_pullup(m_head, poff +
1997				                 (tp->th_off << 2) +
1998				                 TSO_WORKAROUND);
1999				if (m_head == NULL) {
2000					*m_headp = NULL;
2001					return (ENOBUFS);
2002				}
2003			}
2004			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2006			if (do_tso) {
2007				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2008				                  (ip->ip_hl << 2) +
2009				                  (tp->th_off << 2));
2010				ip->ip_sum = 0;
2011				/*
2012				 * The pseudo TCP checksum does not include TCP
2013				 * payload length so driver should recompute
2014				 * the checksum here what hardware expect to
2015				 * see. This is adherence of Microsoft's Large
2016				 * Send specification.
2017			 	*/
2018				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2019				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2020			}
2021		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2022			if (m_head->m_len < poff + sizeof(struct udphdr)) {
2023				m_head = m_pullup(m_head, poff +
2024				    sizeof(struct udphdr));
2025				if (m_head == NULL) {
2026					*m_headp = NULL;
2027					return (ENOBUFS);
2028				}
2029			}
2030			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2031		}
2032		*m_headp = m_head;
2033	}
2034
2035	/*
2036	 * Map the packet for DMA
2037	 *
2038	 * Capture the first descriptor index,
2039	 * this descriptor will have the index
2040	 * of the EOP which is the only one that
2041	 * now gets a DONE bit writeback.
2042	 */
2043	first = txr->next_avail_desc;
2044	tx_buffer = &txr->tx_buffers[first];
2045	tx_buffer_mapped = tx_buffer;
2046	map = tx_buffer->map;
2047
2048retry:
2049	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2050	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2051
2052	/*
2053	 * There are two types of errors we can (try) to handle:
2054	 * - EFBIG means the mbuf chain was too long and bus_dma ran
2055	 *   out of segments.  Defragment the mbuf chain and try again.
2056	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2057	 *   at this point in time.  Defer sending and try again later.
2058	 * All other errors, in particular EINVAL, are fatal and prevent the
2059	 * mbuf chain from ever going through.  Drop it and report error.
2060	 */
2061	if (error == EFBIG && remap) {
2062		struct mbuf *m;
2063
2064		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2065		if (m == NULL) {
2066			adapter->mbuf_defrag_failed++;
2067			m_freem(*m_headp);
2068			*m_headp = NULL;
2069			return (ENOBUFS);
2070		}
2071		*m_headp = m;
2072
2073		/* Try it again, but only once */
2074		remap = FALSE;
2075		goto retry;
2076	} else if (error != 0) {
2077		adapter->no_tx_dma_setup++;
2078		m_freem(*m_headp);
2079		*m_headp = NULL;
2080		return (error);
2081	}
2082
2083	/*
2084	 * TSO Hardware workaround, if this packet is not
2085	 * TSO, and is only a single descriptor long, and
2086	 * it follows a TSO burst, then we need to add a
2087	 * sentinel descriptor to prevent premature writeback.
2088	 */
2089	if ((!do_tso) && (txr->tx_tso == TRUE)) {
2090		if (nsegs == 1)
2091			tso_desc = TRUE;
2092		txr->tx_tso = FALSE;
2093	}
2094
2095        if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2096                txr->no_desc_avail++;
2097		bus_dmamap_unload(txr->txtag, map);
2098		return (ENOBUFS);
2099        }
2100	m_head = *m_headp;
2101
2102	/* Do hardware assists */
2103	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2104		em_tso_setup(txr, m_head, ip_off, ip, tp,
2105		    &txd_upper, &txd_lower);
2106		/* we need to make a final sentinel transmit desc */
2107		tso_desc = TRUE;
2108	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2109		em_transmit_checksum_setup(txr, m_head,
2110		    ip_off, ip, &txd_upper, &txd_lower);
2111
2112	if (m_head->m_flags & M_VLANTAG) {
2113		/* Set the vlan id. */
2114		txd_upper |= htole16(if_getvtag(m_head)) << 16;
2115                /* Tell hardware to add tag */
2116                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2117        }
2118
2119	i = txr->next_avail_desc;
2120
2121	/* Set up our transmit descriptors */
2122	for (j = 0; j < nsegs; j++) {
2123		bus_size_t seg_len;
2124		bus_addr_t seg_addr;
2125
2126		tx_buffer = &txr->tx_buffers[i];
2127		ctxd = &txr->tx_base[i];
2128		seg_addr = segs[j].ds_addr;
2129		seg_len  = segs[j].ds_len;
2130		/*
2131		** TSO Workaround:
2132		** If this is the last descriptor, we want to
2133		** split it so we have a small final sentinel
2134		*/
2135		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2136			seg_len -= TSO_WORKAROUND;
2137			ctxd->buffer_addr = htole64(seg_addr);
2138			ctxd->lower.data = htole32(
2139				adapter->txd_cmd | txd_lower | seg_len);
2140			ctxd->upper.data = htole32(txd_upper);
2141			if (++i == adapter->num_tx_desc)
2142				i = 0;
2143
2144			/* Now make the sentinel */
2145			txr->tx_avail--;
2146			ctxd = &txr->tx_base[i];
2147			tx_buffer = &txr->tx_buffers[i];
2148			ctxd->buffer_addr =
2149			    htole64(seg_addr + seg_len);
2150			ctxd->lower.data = htole32(
2151			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2152			ctxd->upper.data =
2153			    htole32(txd_upper);
2154			last = i;
2155			if (++i == adapter->num_tx_desc)
2156				i = 0;
2157		} else {
2158			ctxd->buffer_addr = htole64(seg_addr);
2159			ctxd->lower.data = htole32(
2160			adapter->txd_cmd | txd_lower | seg_len);
2161			ctxd->upper.data = htole32(txd_upper);
2162			last = i;
2163			if (++i == adapter->num_tx_desc)
2164				i = 0;
2165		}
2166		tx_buffer->m_head = NULL;
2167		tx_buffer->next_eop = -1;
2168	}
2169
2170	txr->next_avail_desc = i;
2171	txr->tx_avail -= nsegs;
2172
2173        tx_buffer->m_head = m_head;
2174	/*
2175	** Here we swap the map so the last descriptor,
2176	** which gets the completion interrupt has the
2177	** real map, and the first descriptor gets the
2178	** unused map from this descriptor.
2179	*/
2180	tx_buffer_mapped->map = tx_buffer->map;
2181	tx_buffer->map = map;
2182        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2183
2184        /*
2185         * Last Descriptor of Packet
2186	 * needs End Of Packet (EOP)
2187	 * and Report Status (RS)
2188         */
2189        ctxd->lower.data |=
2190	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2191	/*
2192	 * Keep track in the first buffer which
2193	 * descriptor will be written back
2194	 */
2195	tx_buffer = &txr->tx_buffers[first];
2196	tx_buffer->next_eop = last;
2197
2198	/*
2199	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2200	 * that this frame is available to transmit.
2201	 */
2202	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2203	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2204	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2205
2206	return (0);
2207}
2208
2209static void
2210em_set_promisc(struct adapter *adapter)
2211{
2212	if_t ifp = adapter->ifp;
2213	u32		reg_rctl;
2214
2215	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2216
2217	if (if_getflags(ifp) & IFF_PROMISC) {
2218		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2219		/* Turn this on if you want to see bad packets */
2220		if (em_debug_sbp)
2221			reg_rctl |= E1000_RCTL_SBP;
2222		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2223	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2224		reg_rctl |= E1000_RCTL_MPE;
2225		reg_rctl &= ~E1000_RCTL_UPE;
2226		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2227	}
2228}
2229
2230static void
2231em_disable_promisc(struct adapter *adapter)
2232{
2233	if_t		ifp = adapter->ifp;
2234	u32		reg_rctl;
2235	int		mcnt = 0;
2236
2237	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2238	reg_rctl &=  (~E1000_RCTL_UPE);
2239	if (if_getflags(ifp) & IFF_ALLMULTI)
2240		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2241	else
2242		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2243	/* Don't disable if in MAX groups */
2244	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2245		reg_rctl &=  (~E1000_RCTL_MPE);
2246	reg_rctl &=  (~E1000_RCTL_SBP);
2247	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2248}
2249
2250
2251/*********************************************************************
2252 *  Multicast Update
2253 *
2254 *  This routine is called whenever multicast address list is updated.
2255 *
2256 **********************************************************************/
2257
2258static void
2259em_set_multi(struct adapter *adapter)
2260{
2261	if_t ifp = adapter->ifp;
2262	u32 reg_rctl = 0;
2263	u8  *mta; /* Multicast array memory */
2264	int mcnt = 0;
2265
2266	IOCTL_DEBUGOUT("em_set_multi: begin");
2267
2268	mta = adapter->mta;
2269	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2270
2271	if (adapter->hw.mac.type == e1000_82542 &&
2272	    adapter->hw.revision_id == E1000_REVISION_2) {
2273		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2274		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2275			e1000_pci_clear_mwi(&adapter->hw);
2276		reg_rctl |= E1000_RCTL_RST;
2277		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2278		msec_delay(5);
2279	}
2280
2281	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2282
2283	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2284		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2285		reg_rctl |= E1000_RCTL_MPE;
2286		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2287	} else
2288		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2289
2290	if (adapter->hw.mac.type == e1000_82542 &&
2291	    adapter->hw.revision_id == E1000_REVISION_2) {
2292		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2293		reg_rctl &= ~E1000_RCTL_RST;
2294		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2295		msec_delay(5);
2296		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2297			e1000_pci_set_mwi(&adapter->hw);
2298	}
2299}
2300
2301
2302/*********************************************************************
2303 *  Timer routine
2304 *
2305 *  This routine checks for link status and updates statistics.
2306 *
2307 **********************************************************************/
2308
2309static void
2310em_local_timer(void *arg)
2311{
2312	struct adapter	*adapter = arg;
2313	if_t ifp = adapter->ifp;
2314	struct tx_ring	*txr = adapter->tx_rings;
2315	struct rx_ring	*rxr = adapter->rx_rings;
2316	u32		trigger = 0;
2317
2318	EM_CORE_LOCK_ASSERT(adapter);
2319
2320	em_update_link_status(adapter);
2321	em_update_stats_counters(adapter);
2322
2323	/* Reset LAA into RAR[0] on 82571 */
2324	if ((adapter->hw.mac.type == e1000_82571) &&
2325	    e1000_get_laa_state_82571(&adapter->hw))
2326		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2327
2328	/* Mask to use in the irq trigger */
2329	if (adapter->msix_mem) {
2330		for (int i = 0; i < adapter->num_queues; i++, rxr++)
2331			trigger |= rxr->ims;
2332		rxr = adapter->rx_rings;
2333	} else
2334		trigger = E1000_ICS_RXDMT0;
2335
2336	/*
2337	** Check on the state of the TX queue(s), this
2338	** can be done without the lock because its RO
2339	** and the HUNG state will be static if set.
2340	*/
2341	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2342		if (txr->busy == EM_TX_HUNG)
2343			goto hung;
2344		if (txr->busy >= EM_TX_MAXTRIES)
2345			txr->busy = EM_TX_HUNG;
2346		/* Schedule a TX tasklet if needed */
2347		if (txr->tx_avail <= EM_MAX_SCATTER)
2348			taskqueue_enqueue(txr->tq, &txr->tx_task);
2349	}
2350
2351	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2352#ifndef DEVICE_POLLING
2353	/* Trigger an RX interrupt to guarantee mbuf refresh */
2354	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2355#endif
2356	return;
2357hung:
2358	/* Looks like we're hung */
2359	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2360			txr->me);
2361	em_print_debug_info(adapter);
2362	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
2363	adapter->watchdog_events++;
2364	em_init_locked(adapter);
2365}
2366
2367
2368static void
2369em_update_link_status(struct adapter *adapter)
2370{
2371	struct e1000_hw *hw = &adapter->hw;
2372	if_t ifp = adapter->ifp;
2373	device_t dev = adapter->dev;
2374	struct tx_ring *txr = adapter->tx_rings;
2375	u32 link_check = 0;
2376
2377	/* Get the cached link value or read phy for real */
2378	switch (hw->phy.media_type) {
2379	case e1000_media_type_copper:
2380		if (hw->mac.get_link_status) {
2381			if (hw->mac.type == e1000_pch_spt)
2382				msec_delay(50);
2383			/* Do the work to read phy */
2384			e1000_check_for_link(hw);
2385			link_check = !hw->mac.get_link_status;
2386			if (link_check) /* ESB2 fix */
2387				e1000_cfg_on_link_up(hw);
2388		} else
2389			link_check = TRUE;
2390		break;
2391	case e1000_media_type_fiber:
2392		e1000_check_for_link(hw);
2393		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2394                                 E1000_STATUS_LU);
2395		break;
2396	case e1000_media_type_internal_serdes:
2397		e1000_check_for_link(hw);
2398		link_check = adapter->hw.mac.serdes_has_link;
2399		break;
2400	default:
2401	case e1000_media_type_unknown:
2402		break;
2403	}
2404
2405	/* Now check for a transition */
2406	if (link_check && (adapter->link_active == 0)) {
2407		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2408		    &adapter->link_duplex);
2409
2410		/*
2411		** There have proven to be problems with TSO when not at full
2412		** gigabit speed, so disable the assist automatically when at
2413		** lower speeds.  -jfv
2414		*/
2415		if (if_getcapenable(ifp) & IFCAP_TSO4) {
2416			if (adapter->link_speed == SPEED_1000)
2417				if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
2418			else
2419				if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
2420		}
2421
2422		/* Check if we must disable SPEED_MODE bit on PCI-E */
2423		if ((adapter->link_speed != SPEED_1000) &&
2424		    ((hw->mac.type == e1000_82571) ||
2425		    (hw->mac.type == e1000_82572))) {
2426			int tarc0;
2427			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2428			tarc0 &= ~TARC_SPEED_MODE_BIT;
2429			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2430		}
2431		if (bootverbose)
2432			device_printf(dev, "Link is up %d Mbps %s\n",
2433			    adapter->link_speed,
2434			    ((adapter->link_duplex == FULL_DUPLEX) ?
2435			    "Full Duplex" : "Half Duplex"));
2436		adapter->link_active = 1;
2437		adapter->smartspeed = 0;
2438		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2439		if_link_state_change(ifp, LINK_STATE_UP);
2440	} else if (!link_check && (adapter->link_active == 1)) {
2441		if_setbaudrate(ifp, 0);
2442		adapter->link_speed = 0;
2443		adapter->link_duplex = 0;
2444		if (bootverbose)
2445			device_printf(dev, "Link is Down\n");
2446		adapter->link_active = 0;
2447		/* Link down, disable hang detection */
2448		for (int i = 0; i < adapter->num_queues; i++, txr++)
2449			txr->busy = EM_TX_IDLE;
2450		if_link_state_change(ifp, LINK_STATE_DOWN);
2451	}
2452}
2453
2454/*********************************************************************
2455 *
2456 *  This routine disables all traffic on the adapter by issuing a
2457 *  global reset on the MAC and deallocates TX/RX buffers.
2458 *
2459 *  This routine should always be called with BOTH the CORE
2460 *  and TX locks.
2461 **********************************************************************/
2462
2463static void
2464em_stop(void *arg)
2465{
2466	struct adapter	*adapter = arg;
2467	if_t ifp = adapter->ifp;
2468	struct tx_ring	*txr = adapter->tx_rings;
2469
2470	EM_CORE_LOCK_ASSERT(adapter);
2471
2472	INIT_DEBUGOUT("em_stop: begin");
2473
2474	em_disable_intr(adapter);
2475	callout_stop(&adapter->timer);
2476
2477	/* Tell the stack that the interface is no longer active */
2478	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
2479
2480        /* Disarm Hang Detection. */
2481	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2482		EM_TX_LOCK(txr);
2483		txr->busy = EM_TX_IDLE;
2484		EM_TX_UNLOCK(txr);
2485	}
2486
2487	/* I219 needs some special flushing to avoid hangs */
2488	if (adapter->hw.mac.type == e1000_pch_spt)
2489		em_flush_desc_rings(adapter);
2490
2491	e1000_reset_hw(&adapter->hw);
2492	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2493
2494	e1000_led_off(&adapter->hw);
2495	e1000_cleanup_led(&adapter->hw);
2496}
2497
2498
2499/*********************************************************************
2500 *
2501 *  Determine hardware revision.
2502 *
2503 **********************************************************************/
2504static void
2505em_identify_hardware(struct adapter *adapter)
2506{
2507	device_t dev = adapter->dev;
2508
2509	/* Make sure our PCI config space has the necessary stuff set */
2510	pci_enable_busmaster(dev);
2511	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2512
2513	/* Save off the information about this board */
2514	adapter->hw.vendor_id = pci_get_vendor(dev);
2515	adapter->hw.device_id = pci_get_device(dev);
2516	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2517	adapter->hw.subsystem_vendor_id =
2518	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2519	adapter->hw.subsystem_device_id =
2520	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2521
2522	/* Do Shared Code Init and Setup */
2523	if (e1000_set_mac_type(&adapter->hw)) {
2524		device_printf(dev, "Setup init failure\n");
2525		return;
2526	}
2527}
2528
2529static int
2530em_allocate_pci_resources(struct adapter *adapter)
2531{
2532	device_t	dev = adapter->dev;
2533	int		rid;
2534
2535	rid = PCIR_BAR(0);
2536	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2537	    &rid, RF_ACTIVE);
2538	if (adapter->memory == NULL) {
2539		device_printf(dev, "Unable to allocate bus resource: memory\n");
2540		return (ENXIO);
2541	}
2542	adapter->osdep.mem_bus_space_tag =
2543	    rman_get_bustag(adapter->memory);
2544	adapter->osdep.mem_bus_space_handle =
2545	    rman_get_bushandle(adapter->memory);
2546	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2547
2548	adapter->hw.back = &adapter->osdep;
2549
2550	return (0);
2551}
2552
2553/*********************************************************************
2554 *
2555 *  Setup the Legacy or MSI Interrupt handler
2556 *
2557 **********************************************************************/
2558static int
2559em_allocate_legacy(struct adapter *adapter)
2560{
2561	device_t dev = adapter->dev;
2562	struct tx_ring	*txr = adapter->tx_rings;
2563	int error, rid = 0;
2564
2565	/* Manually turn off all interrupts */
2566	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2567
2568	if (adapter->msix == 1) /* using MSI */
2569		rid = 1;
2570	/* We allocate a single interrupt resource */
2571	adapter->res = bus_alloc_resource_any(dev,
2572	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2573	if (adapter->res == NULL) {
2574		device_printf(dev, "Unable to allocate bus resource: "
2575		    "interrupt\n");
2576		return (ENXIO);
2577	}
2578
2579	/*
2580	 * Allocate a fast interrupt and the associated
2581	 * deferred processing contexts.
2582	 */
2583	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2584	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2585	    taskqueue_thread_enqueue, &adapter->tq);
2586	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2587	    device_get_nameunit(adapter->dev));
2588	/* Use a TX only tasklet for local timer */
2589	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2590	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2591	    taskqueue_thread_enqueue, &txr->tq);
2592	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2593	    device_get_nameunit(adapter->dev));
2594	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2595	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2596	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2597		device_printf(dev, "Failed to register fast interrupt "
2598			    "handler: %d\n", error);
2599		taskqueue_free(adapter->tq);
2600		adapter->tq = NULL;
2601		return (error);
2602	}
2603
2604	return (0);
2605}
2606
2607/*********************************************************************
2608 *
2609 *  Setup the MSIX Interrupt handlers
2610 *   This is not really Multiqueue, rather
2611 *   its just separate interrupt vectors
2612 *   for TX, RX, and Link.
2613 *
2614 **********************************************************************/
2615static int
2616em_allocate_msix(struct adapter *adapter)
2617{
2618	device_t	dev = adapter->dev;
2619	struct		tx_ring *txr = adapter->tx_rings;
2620	struct		rx_ring *rxr = adapter->rx_rings;
2621	int		error, rid, vector = 0;
2622	int		cpu_id = 0;
2623
2624
2625	/* Make sure all interrupts are disabled */
2626	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2627
2628	/* First set up ring resources */
2629	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2630
2631		/* RX ring */
2632		rid = vector + 1;
2633
2634		rxr->res = bus_alloc_resource_any(dev,
2635		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2636		if (rxr->res == NULL) {
2637			device_printf(dev,
2638			    "Unable to allocate bus resource: "
2639			    "RX MSIX Interrupt %d\n", i);
2640			return (ENXIO);
2641		}
2642		if ((error = bus_setup_intr(dev, rxr->res,
2643		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2644		    rxr, &rxr->tag)) != 0) {
2645			device_printf(dev, "Failed to register RX handler");
2646			return (error);
2647		}
2648#if __FreeBSD_version >= 800504
2649		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2650#endif
2651		rxr->msix = vector;
2652
2653		if (em_last_bind_cpu < 0)
2654			em_last_bind_cpu = CPU_FIRST();
2655		cpu_id = em_last_bind_cpu;
2656		bus_bind_intr(dev, rxr->res, cpu_id);
2657
2658		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2659		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2660		    taskqueue_thread_enqueue, &rxr->tq);
2661		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2662		    device_get_nameunit(adapter->dev), cpu_id);
2663		/*
2664		** Set the bit to enable interrupt
2665		** in E1000_IMS -- bits 20 and 21
2666		** are for RX0 and RX1, note this has
2667		** NOTHING to do with the MSIX vector
2668		*/
2669		rxr->ims = 1 << (20 + i);
2670		adapter->ims |= rxr->ims;
2671		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2672
2673		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2674	}
2675
2676	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2677		/* TX ring */
2678		rid = vector + 1;
2679		txr->res = bus_alloc_resource_any(dev,
2680		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2681		if (txr->res == NULL) {
2682			device_printf(dev,
2683			    "Unable to allocate bus resource: "
2684			    "TX MSIX Interrupt %d\n", i);
2685			return (ENXIO);
2686		}
2687		if ((error = bus_setup_intr(dev, txr->res,
2688		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2689		    txr, &txr->tag)) != 0) {
2690			device_printf(dev, "Failed to register TX handler");
2691			return (error);
2692		}
2693#if __FreeBSD_version >= 800504
2694		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2695#endif
2696		txr->msix = vector;
2697
2698                if (em_last_bind_cpu < 0)
2699                        em_last_bind_cpu = CPU_FIRST();
2700                cpu_id = em_last_bind_cpu;
2701                bus_bind_intr(dev, txr->res, cpu_id);
2702
2703		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2704		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2705		    taskqueue_thread_enqueue, &txr->tq);
2706		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2707		    device_get_nameunit(adapter->dev), cpu_id);
2708		/*
2709		** Set the bit to enable interrupt
2710		** in E1000_IMS -- bits 22 and 23
2711		** are for TX0 and TX1, note this has
2712		** NOTHING to do with the MSIX vector
2713		*/
2714		txr->ims = 1 << (22 + i);
2715		adapter->ims |= txr->ims;
2716		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2717
2718		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2719	}
2720
2721	/* Link interrupt */
2722	rid = vector + 1;
2723	adapter->res = bus_alloc_resource_any(dev,
2724	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2725	if (!adapter->res) {
2726		device_printf(dev,"Unable to allocate "
2727		    "bus resource: Link interrupt [%d]\n", rid);
2728		return (ENXIO);
2729        }
2730	/* Set the link handler function */
2731	error = bus_setup_intr(dev, adapter->res,
2732	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2733	    em_msix_link, adapter, &adapter->tag);
2734	if (error) {
2735		adapter->res = NULL;
2736		device_printf(dev, "Failed to register LINK handler");
2737		return (error);
2738	}
2739#if __FreeBSD_version >= 800504
2740	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2741#endif
2742	adapter->linkvec = vector;
2743	adapter->ivars |=  (8 | vector) << 16;
2744	adapter->ivars |= 0x80000000;
2745
2746	return (0);
2747}
2748
2749
2750static void
2751em_free_pci_resources(struct adapter *adapter)
2752{
2753	device_t	dev = adapter->dev;
2754	struct tx_ring	*txr;
2755	struct rx_ring	*rxr;
2756	int		rid;
2757
2758
2759	/*
2760	** Release all the queue interrupt resources:
2761	*/
2762	for (int i = 0; i < adapter->num_queues; i++) {
2763		txr = &adapter->tx_rings[i];
2764		/* an early abort? */
2765		if (txr == NULL)
2766			break;
2767		rid = txr->msix +1;
2768		if (txr->tag != NULL) {
2769			bus_teardown_intr(dev, txr->res, txr->tag);
2770			txr->tag = NULL;
2771		}
2772		if (txr->res != NULL)
2773			bus_release_resource(dev, SYS_RES_IRQ,
2774			    rid, txr->res);
2775
2776		rxr = &adapter->rx_rings[i];
2777		/* an early abort? */
2778		if (rxr == NULL)
2779			break;
2780		rid = rxr->msix +1;
2781		if (rxr->tag != NULL) {
2782			bus_teardown_intr(dev, rxr->res, rxr->tag);
2783			rxr->tag = NULL;
2784		}
2785		if (rxr->res != NULL)
2786			bus_release_resource(dev, SYS_RES_IRQ,
2787			    rid, rxr->res);
2788	}
2789
2790        if (adapter->linkvec) /* we are doing MSIX */
2791                rid = adapter->linkvec + 1;
2792        else
2793                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2794
2795	if (adapter->tag != NULL) {
2796		bus_teardown_intr(dev, adapter->res, adapter->tag);
2797		adapter->tag = NULL;
2798	}
2799
2800	if (adapter->res != NULL)
2801		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2802
2803
2804	if (adapter->msix)
2805		pci_release_msi(dev);
2806
2807	if (adapter->msix_mem != NULL)
2808		bus_release_resource(dev, SYS_RES_MEMORY,
2809		    adapter->memrid, adapter->msix_mem);
2810
2811	if (adapter->memory != NULL)
2812		bus_release_resource(dev, SYS_RES_MEMORY,
2813		    PCIR_BAR(0), adapter->memory);
2814
2815	if (adapter->flash != NULL)
2816		bus_release_resource(dev, SYS_RES_MEMORY,
2817		    EM_FLASH, adapter->flash);
2818}
2819
2820/*
2821 * Setup MSI or MSI/X
2822 */
2823static int
2824em_setup_msix(struct adapter *adapter)
2825{
2826	device_t dev = adapter->dev;
2827	int val;
2828
2829	/* Nearly always going to use one queue */
2830	adapter->num_queues = 1;
2831
2832	/*
2833	** Try using MSI-X for Hartwell adapters
2834	*/
2835	if ((adapter->hw.mac.type == e1000_82574) &&
2836	    (em_enable_msix == TRUE)) {
2837#ifdef EM_MULTIQUEUE
2838		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2839		if (adapter->num_queues > 1)
2840			em_enable_vectors_82574(adapter);
2841#endif
2842		/* Map the MSIX BAR */
2843		adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2844		adapter->msix_mem = bus_alloc_resource_any(dev,
2845		    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2846       		if (adapter->msix_mem == NULL) {
2847			/* May not be enabled */
2848               		device_printf(adapter->dev,
2849			    "Unable to map MSIX table \n");
2850			goto msi;
2851       		}
2852		val = pci_msix_count(dev);
2853
2854#ifdef EM_MULTIQUEUE
2855		/* We need 5 vectors in the multiqueue case */
2856		if (adapter->num_queues > 1 ) {
2857			if (val >= 5)
2858				val = 5;
2859			else {
2860				adapter->num_queues = 1;
2861				device_printf(adapter->dev,
2862				    "Insufficient MSIX vectors for >1 queue, "
2863				    "using single queue...\n");
2864				goto msix_one;
2865			}
2866		} else {
2867msix_one:
2868#endif
2869			if (val >= 3)
2870				val = 3;
2871			else {
2872				device_printf(adapter->dev,
2873			    	"Insufficient MSIX vectors, using MSI\n");
2874				goto msi;
2875			}
2876#ifdef EM_MULTIQUEUE
2877		}
2878#endif
2879
2880		if ((pci_alloc_msix(dev, &val) == 0)) {
2881			device_printf(adapter->dev,
2882			    "Using MSIX interrupts "
2883			    "with %d vectors\n", val);
2884			return (val);
2885		}
2886
2887		/*
2888		** If MSIX alloc failed or provided us with
2889		** less than needed, free and fall through to MSI
2890		*/
2891		pci_release_msi(dev);
2892	}
2893msi:
2894	if (adapter->msix_mem != NULL) {
2895		bus_release_resource(dev, SYS_RES_MEMORY,
2896		    adapter->memrid, adapter->msix_mem);
2897		adapter->msix_mem = NULL;
2898	}
2899       	val = 1;
2900       	if (pci_alloc_msi(dev, &val) == 0) {
2901               	device_printf(adapter->dev, "Using an MSI interrupt\n");
2902		return (val);
2903	}
2904	/* Should only happen due to manual configuration */
2905	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2906	return (0);
2907}
2908
2909
2910/*
2911** The 3 following flush routines are used as a workaround in the
2912** I219 client parts and only for them.
2913**
2914** em_flush_tx_ring - remove all descriptors from the tx_ring
2915**
2916** We want to clear all pending descriptors from the TX ring.
2917** zeroing happens when the HW reads the regs. We  assign the ring itself as
2918** the data of the next descriptor. We don't care about the data we are about
2919** to reset the HW.
2920*/
2921static void
2922em_flush_tx_ring(struct adapter *adapter)
2923{
2924	struct e1000_hw		*hw = &adapter->hw;
2925	struct tx_ring		*txr = adapter->tx_rings;
2926	struct e1000_tx_desc	*txd;
2927	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
2928	u16			size = 512;
2929
2930	tctl = E1000_READ_REG(hw, E1000_TCTL);
2931	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2932
2933	txd = &txr->tx_base[txr->next_avail_desc++];
2934	if (txr->next_avail_desc == adapter->num_tx_desc)
2935		txr->next_avail_desc = 0;
2936
2937	/* Just use the ring as a dummy buffer addr */
2938	txd->buffer_addr = txr->txdma.dma_paddr;
2939	txd->lower.data = htole32(txd_lower | size);
2940	txd->upper.data = 0;
2941
2942	/* flush descriptors to memory before notifying the HW */
2943	wmb();
2944
2945	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2946	mb();
2947	usec_delay(250);
2948}
2949
2950/*
2951** em_flush_rx_ring - remove all descriptors from the rx_ring
2952**
2953** Mark all descriptors in the RX ring as consumed and disable the rx ring
2954*/
2955static void
2956em_flush_rx_ring(struct adapter *adapter)
2957{
2958	struct e1000_hw	*hw = &adapter->hw;
2959	u32		rctl, rxdctl;
2960
2961	rctl = E1000_READ_REG(hw, E1000_RCTL);
2962	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2963	E1000_WRITE_FLUSH(hw);
2964	usec_delay(150);
2965
2966	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
2967	/* zero the lower 14 bits (prefetch and host thresholds) */
2968	rxdctl &= 0xffffc000;
2969	/*
2970	 * update thresholds: prefetch threshold to 31, host threshold to 1
2971	 * and make sure the granularity is "descriptors" and not "cache lines"
2972	 */
2973	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
2974	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
2975
2976	/* momentarily enable the RX ring for the changes to take effect */
2977	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
2978	E1000_WRITE_FLUSH(hw);
2979	usec_delay(150);
2980	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2981}
2982
2983/*
2984** em_flush_desc_rings - remove all descriptors from the descriptor rings
2985**
2986** In i219, the descriptor rings must be emptied before resetting the HW
2987** or before changing the device state to D3 during runtime (runtime PM).
2988**
2989** Failure to do this will cause the HW to enter a unit hang state which can
2990** only be released by PCI reset on the device
2991**
2992*/
2993static void
2994em_flush_desc_rings(struct adapter *adapter)
2995{
2996	struct e1000_hw	*hw = &adapter->hw;
2997	device_t	dev = adapter->dev;
2998	u16		hang_state;
2999	u32		fext_nvm11, tdlen;
3000
3001	/* First, disable MULR fix in FEXTNVM11 */
3002	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3003	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3004	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3005
3006	/* do nothing if we're not in faulty state, or if the queue is empty */
3007	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3008	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3009	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3010		return;
3011	em_flush_tx_ring(adapter);
3012
3013	/* recheck, maybe the fault is caused by the rx ring */
3014	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3015	if (hang_state & FLUSH_DESC_REQUIRED)
3016		em_flush_rx_ring(adapter);
3017}
3018
3019
3020/*********************************************************************
3021 *
3022 *  Initialize the hardware to a configuration
3023 *  as specified by the adapter structure.
3024 *
3025 **********************************************************************/
3026static void
3027em_reset(struct adapter *adapter)
3028{
3029	device_t	dev = adapter->dev;
3030	if_t ifp = adapter->ifp;
3031	struct e1000_hw	*hw = &adapter->hw;
3032	u16		rx_buffer_size;
3033	u32		pba;
3034
3035	INIT_DEBUGOUT("em_reset: begin");
3036
3037	/* Set up smart power down as default off on newer adapters. */
3038	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3039	    hw->mac.type == e1000_82572)) {
3040		u16 phy_tmp = 0;
3041
3042		/* Speed up time to link by disabling smart power down. */
3043		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3044		phy_tmp &= ~IGP02E1000_PM_SPD;
3045		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3046	}
3047
3048	/*
3049	 * Packet Buffer Allocation (PBA)
3050	 * Writing PBA sets the receive portion of the buffer
3051	 * the remainder is used for the transmit buffer.
3052	 */
3053	switch (hw->mac.type) {
3054	/* Total Packet Buffer on these is 48K */
3055	case e1000_82571:
3056	case e1000_82572:
3057	case e1000_80003es2lan:
3058			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3059		break;
3060	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3061			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3062		break;
3063	case e1000_82574:
3064	case e1000_82583:
3065			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3066		break;
3067	case e1000_ich8lan:
3068		pba = E1000_PBA_8K;
3069		break;
3070	case e1000_ich9lan:
3071	case e1000_ich10lan:
3072		/* Boost Receive side for jumbo frames */
3073		if (adapter->hw.mac.max_frame_size > 4096)
3074			pba = E1000_PBA_14K;
3075		else
3076			pba = E1000_PBA_10K;
3077		break;
3078	case e1000_pchlan:
3079	case e1000_pch2lan:
3080	case e1000_pch_lpt:
3081	case e1000_pch_spt:
3082		pba = E1000_PBA_26K;
3083		break;
3084	default:
3085		if (adapter->hw.mac.max_frame_size > 8192)
3086			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3087		else
3088			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3089	}
3090	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3091
3092	/*
3093	 * These parameters control the automatic generation (Tx) and
3094	 * response (Rx) to Ethernet PAUSE frames.
3095	 * - High water mark should allow for at least two frames to be
3096	 *   received after sending an XOFF.
3097	 * - Low water mark works best when it is very near the high water mark.
3098	 *   This allows the receiver to restart by sending XON when it has
3099	 *   drained a bit. Here we use an arbitrary value of 1500 which will
3100	 *   restart after one full frame is pulled from the buffer. There
3101	 *   could be several smaller frames in the buffer and if so they will
3102	 *   not trigger the XON until their total number reduces the buffer
3103	 *   by 1500.
3104	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3105	 */
3106	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3107	hw->fc.high_water = rx_buffer_size -
3108	    roundup2(adapter->hw.mac.max_frame_size, 1024);
3109	hw->fc.low_water = hw->fc.high_water - 1500;
3110
3111	if (adapter->fc) /* locally set flow control value? */
3112		hw->fc.requested_mode = adapter->fc;
3113	else
3114		hw->fc.requested_mode = e1000_fc_full;
3115
3116	if (hw->mac.type == e1000_80003es2lan)
3117		hw->fc.pause_time = 0xFFFF;
3118	else
3119		hw->fc.pause_time = EM_FC_PAUSE_TIME;
3120
3121	hw->fc.send_xon = TRUE;
3122
3123	/* Device specific overrides/settings */
3124	switch (hw->mac.type) {
3125	case e1000_pchlan:
3126		/* Workaround: no TX flow ctrl for PCH */
3127                hw->fc.requested_mode = e1000_fc_rx_pause;
3128		hw->fc.pause_time = 0xFFFF; /* override */
3129		if (if_getmtu(ifp) > ETHERMTU) {
3130			hw->fc.high_water = 0x3500;
3131			hw->fc.low_water = 0x1500;
3132		} else {
3133			hw->fc.high_water = 0x5000;
3134			hw->fc.low_water = 0x3000;
3135		}
3136		hw->fc.refresh_time = 0x1000;
3137		break;
3138	case e1000_pch2lan:
3139	case e1000_pch_lpt:
3140	case e1000_pch_spt:
3141		hw->fc.high_water = 0x5C20;
3142		hw->fc.low_water = 0x5048;
3143		hw->fc.pause_time = 0x0650;
3144		hw->fc.refresh_time = 0x0400;
3145		/* Jumbos need adjusted PBA */
3146		if (if_getmtu(ifp) > ETHERMTU)
3147			E1000_WRITE_REG(hw, E1000_PBA, 12);
3148		else
3149			E1000_WRITE_REG(hw, E1000_PBA, 26);
3150		break;
3151        case e1000_ich9lan:
3152        case e1000_ich10lan:
3153		if (if_getmtu(ifp) > ETHERMTU) {
3154			hw->fc.high_water = 0x2800;
3155			hw->fc.low_water = hw->fc.high_water - 8;
3156			break;
3157		}
3158		/* else fall thru */
3159	default:
3160		if (hw->mac.type == e1000_80003es2lan)
3161			hw->fc.pause_time = 0xFFFF;
3162		break;
3163	}
3164
3165	/* I219 needs some special flushing to avoid hangs */
3166	if (hw->mac.type == e1000_pch_spt)
3167		em_flush_desc_rings(adapter);
3168
3169	/* Issue a global reset */
3170	e1000_reset_hw(hw);
3171	E1000_WRITE_REG(hw, E1000_WUC, 0);
3172	em_disable_aspm(adapter);
3173	/* and a re-init */
3174	if (e1000_init_hw(hw) < 0) {
3175		device_printf(dev, "Hardware Initialization Failed\n");
3176		return;
3177	}
3178
3179	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3180	e1000_get_phy_info(hw);
3181	e1000_check_for_link(hw);
3182	return;
3183}
3184
3185/*********************************************************************
3186 *
3187 *  Setup networking device structure and register an interface.
3188 *
3189 **********************************************************************/
3190static int
3191em_setup_interface(device_t dev, struct adapter *adapter)
3192{
3193	if_t ifp;
3194
3195	INIT_DEBUGOUT("em_setup_interface: begin");
3196
3197	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
3198	if (ifp == 0) {
3199		device_printf(dev, "can not allocate ifnet structure\n");
3200		return (-1);
3201	}
3202	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3203	if_setdev(ifp, dev);
3204	if_setinitfn(ifp, em_init);
3205	if_setsoftc(ifp, adapter);
3206	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
3207	if_setioctlfn(ifp, em_ioctl);
3208	if_setgetcounterfn(ifp, em_get_counter);
3209
3210	/* TSO parameters */
3211	ifp->if_hw_tsomax = IP_MAXPACKET;
3212	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3213	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3214	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3215
3216#ifdef EM_MULTIQUEUE
3217	/* Multiqueue stack interface */
3218	if_settransmitfn(ifp, em_mq_start);
3219	if_setqflushfn(ifp, em_qflush);
3220#else
3221	if_setstartfn(ifp, em_start);
3222	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
3223	if_setsendqready(ifp);
3224#endif
3225
3226	ether_ifattach(ifp, adapter->hw.mac.addr);
3227
3228	if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM);
3229	if_setcapenable(ifp, if_getcapabilities(ifp));
3230
3231	/*
3232	 * Tell the upper layer(s) we
3233	 * support full VLAN capability
3234	 */
3235	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
3236	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
3237	    IFCAP_VLAN_MTU, 0);
3238	if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
3239
3240	/*
3241	 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3242	 * - Although the silicon bug of TSO only working at gigabit speed is
3243	 *   worked around in em_update_link_status() by selectively setting
3244	 *   CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3245	 *   descriptors.  Thus, such descriptors may still cause the MAC to
3246	 *   hang and, consequently, TSO is only safe to be used in setups
3247	 *   where the link isn't expected to switch from gigabit to lower
3248	 *   speeds.
3249	 * - Similarly, there's currently no way to trigger a reconfiguration
3250	 *   of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3251	 *   runtime.  Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3252	 *   when link speed changes are not to be expected.
3253	 * - Despite all the workarounds for TSO-related silicon bugs, at
3254	 *   least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3255	 */
3256	if_setcapabilitiesbit(ifp, IFCAP_TSO4 | IFCAP_VLAN_HWTSO, 0);
3257
3258	/*
3259	** Don't turn this on by default, if vlans are
3260	** created on another pseudo device (eg. lagg)
3261	** then vlan events are not passed thru, breaking
3262	** operation, but with HW FILTER off it works. If
3263	** using vlans directly on the em driver you can
3264	** enable this and get full hardware tag filtering.
3265	*/
3266	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
3267
3268#ifdef DEVICE_POLLING
3269	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
3270#endif
3271
3272	/* Enable only WOL MAGIC by default */
3273	if (adapter->wol) {
3274		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
3275		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
3276	}
3277
3278	/*
3279	 * Specify the media types supported by this adapter and register
3280	 * callbacks to update media and link information
3281	 */
3282	ifmedia_init(&adapter->media, IFM_IMASK,
3283	    em_media_change, em_media_status);
3284	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3285	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3286		u_char fiber_type = IFM_1000_SX;	/* default type */
3287
3288		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3289			    0, NULL);
3290		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3291	} else {
3292		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3293		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3294			    0, NULL);
3295		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3296			    0, NULL);
3297		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3298			    0, NULL);
3299		if (adapter->hw.phy.type != e1000_phy_ife) {
3300			ifmedia_add(&adapter->media,
3301				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3302			ifmedia_add(&adapter->media,
3303				IFM_ETHER | IFM_1000_T, 0, NULL);
3304		}
3305	}
3306	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3307	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3308	return (0);
3309}
3310
3311
3312/*
3313 * Manage DMA'able memory.
3314 */
3315static void
3316em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3317{
3318	if (error)
3319		return;
3320	*(bus_addr_t *) arg = segs[0].ds_addr;
3321}
3322
3323static int
3324em_dma_malloc(struct adapter *adapter, bus_size_t size,
3325        struct em_dma_alloc *dma, int mapflags)
3326{
3327	int error;
3328
3329	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3330				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3331				BUS_SPACE_MAXADDR,	/* lowaddr */
3332				BUS_SPACE_MAXADDR,	/* highaddr */
3333				NULL, NULL,		/* filter, filterarg */
3334				size,			/* maxsize */
3335				1,			/* nsegments */
3336				size,			/* maxsegsize */
3337				0,			/* flags */
3338				NULL,			/* lockfunc */
3339				NULL,			/* lockarg */
3340				&dma->dma_tag);
3341	if (error) {
3342		device_printf(adapter->dev,
3343		    "%s: bus_dma_tag_create failed: %d\n",
3344		    __func__, error);
3345		goto fail_0;
3346	}
3347
3348	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3349	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3350	if (error) {
3351		device_printf(adapter->dev,
3352		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3353		    __func__, (uintmax_t)size, error);
3354		goto fail_2;
3355	}
3356
3357	dma->dma_paddr = 0;
3358	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3359	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3360	if (error || dma->dma_paddr == 0) {
3361		device_printf(adapter->dev,
3362		    "%s: bus_dmamap_load failed: %d\n",
3363		    __func__, error);
3364		goto fail_3;
3365	}
3366
3367	return (0);
3368
3369fail_3:
3370	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3371fail_2:
3372	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3373	bus_dma_tag_destroy(dma->dma_tag);
3374fail_0:
3375	dma->dma_tag = NULL;
3376
3377	return (error);
3378}
3379
3380static void
3381em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3382{
3383	if (dma->dma_tag == NULL)
3384		return;
3385	if (dma->dma_paddr != 0) {
3386		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3387		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3388		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3389		dma->dma_paddr = 0;
3390	}
3391	if (dma->dma_vaddr != NULL) {
3392		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3393		dma->dma_vaddr = NULL;
3394	}
3395	bus_dma_tag_destroy(dma->dma_tag);
3396	dma->dma_tag = NULL;
3397}
3398
3399
3400/*********************************************************************
3401 *
3402 *  Allocate memory for the transmit and receive rings, and then
3403 *  the descriptors associated with each, called only once at attach.
3404 *
3405 **********************************************************************/
3406static int
3407em_allocate_queues(struct adapter *adapter)
3408{
3409	device_t		dev = adapter->dev;
3410	struct tx_ring		*txr = NULL;
3411	struct rx_ring		*rxr = NULL;
3412	int rsize, tsize, error = E1000_SUCCESS;
3413	int txconf = 0, rxconf = 0;
3414
3415
3416	/* Allocate the TX ring struct memory */
3417	if (!(adapter->tx_rings =
3418	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3419	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3420		device_printf(dev, "Unable to allocate TX ring memory\n");
3421		error = ENOMEM;
3422		goto fail;
3423	}
3424
3425	/* Now allocate the RX */
3426	if (!(adapter->rx_rings =
3427	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3428	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3429		device_printf(dev, "Unable to allocate RX ring memory\n");
3430		error = ENOMEM;
3431		goto rx_fail;
3432	}
3433
3434	tsize = roundup2(adapter->num_tx_desc *
3435	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3436	/*
3437	 * Now set up the TX queues, txconf is needed to handle the
3438	 * possibility that things fail midcourse and we need to
3439	 * undo memory gracefully
3440	 */
3441	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3442		/* Set up some basics */
3443		txr = &adapter->tx_rings[i];
3444		txr->adapter = adapter;
3445		txr->me = i;
3446
3447		/* Initialize the TX lock */
3448		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3449		    device_get_nameunit(dev), txr->me);
3450		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3451
3452		if (em_dma_malloc(adapter, tsize,
3453			&txr->txdma, BUS_DMA_NOWAIT)) {
3454			device_printf(dev,
3455			    "Unable to allocate TX Descriptor memory\n");
3456			error = ENOMEM;
3457			goto err_tx_desc;
3458		}
3459		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3460		bzero((void *)txr->tx_base, tsize);
3461
3462        	if (em_allocate_transmit_buffers(txr)) {
3463			device_printf(dev,
3464			    "Critical Failure setting up transmit buffers\n");
3465			error = ENOMEM;
3466			goto err_tx_desc;
3467        	}
3468#if __FreeBSD_version >= 800000
3469		/* Allocate a buf ring */
3470		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3471		    M_WAITOK, &txr->tx_mtx);
3472#endif
3473	}
3474
3475	/*
3476	 * Next the RX queues...
3477	 */
3478	rsize = roundup2(adapter->num_rx_desc *
3479	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3480	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3481		rxr = &adapter->rx_rings[i];
3482		rxr->adapter = adapter;
3483		rxr->me = i;
3484
3485		/* Initialize the RX lock */
3486		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3487		    device_get_nameunit(dev), txr->me);
3488		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3489
3490		if (em_dma_malloc(adapter, rsize,
3491			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3492			device_printf(dev,
3493			    "Unable to allocate RxDescriptor memory\n");
3494			error = ENOMEM;
3495			goto err_rx_desc;
3496		}
3497		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3498		bzero((void *)rxr->rx_base, rsize);
3499
3500        	/* Allocate receive buffers for the ring*/
3501		if (em_allocate_receive_buffers(rxr)) {
3502			device_printf(dev,
3503			    "Critical Failure setting up receive buffers\n");
3504			error = ENOMEM;
3505			goto err_rx_desc;
3506		}
3507	}
3508
3509	return (0);
3510
3511err_rx_desc:
3512	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3513		em_dma_free(adapter, &rxr->rxdma);
3514err_tx_desc:
3515	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3516		em_dma_free(adapter, &txr->txdma);
3517	free(adapter->rx_rings, M_DEVBUF);
3518rx_fail:
3519#if __FreeBSD_version >= 800000
3520	buf_ring_free(txr->br, M_DEVBUF);
3521#endif
3522	free(adapter->tx_rings, M_DEVBUF);
3523fail:
3524	return (error);
3525}
3526
3527
3528/*********************************************************************
3529 *
3530 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3531 *  the information needed to transmit a packet on the wire. This is
3532 *  called only once at attach, setup is done every reset.
3533 *
3534 **********************************************************************/
3535static int
3536em_allocate_transmit_buffers(struct tx_ring *txr)
3537{
3538	struct adapter *adapter = txr->adapter;
3539	device_t dev = adapter->dev;
3540	struct em_txbuffer *txbuf;
3541	int error, i;
3542
3543	/*
3544	 * Setup DMA descriptor areas.
3545	 */
3546	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3547			       1, 0,			/* alignment, bounds */
3548			       BUS_SPACE_MAXADDR,	/* lowaddr */
3549			       BUS_SPACE_MAXADDR,	/* highaddr */
3550			       NULL, NULL,		/* filter, filterarg */
3551			       EM_TSO_SIZE,		/* maxsize */
3552			       EM_MAX_SCATTER,		/* nsegments */
3553			       PAGE_SIZE,		/* maxsegsize */
3554			       0,			/* flags */
3555			       NULL,			/* lockfunc */
3556			       NULL,			/* lockfuncarg */
3557			       &txr->txtag))) {
3558		device_printf(dev,"Unable to allocate TX DMA tag\n");
3559		goto fail;
3560	}
3561
3562	if (!(txr->tx_buffers =
3563	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3564	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3565		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3566		error = ENOMEM;
3567		goto fail;
3568	}
3569
3570        /* Create the descriptor buffer dma maps */
3571	txbuf = txr->tx_buffers;
3572	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3573		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3574		if (error != 0) {
3575			device_printf(dev, "Unable to create TX DMA map\n");
3576			goto fail;
3577		}
3578	}
3579
3580	return 0;
3581fail:
3582	/* We free all, it handles case where we are in the middle */
3583	em_free_transmit_structures(adapter);
3584	return (error);
3585}
3586
3587/*********************************************************************
3588 *
3589 *  Initialize a transmit ring.
3590 *
3591 **********************************************************************/
3592static void
3593em_setup_transmit_ring(struct tx_ring *txr)
3594{
3595	struct adapter *adapter = txr->adapter;
3596	struct em_txbuffer *txbuf;
3597	int i;
3598#ifdef DEV_NETMAP
3599	struct netmap_slot *slot;
3600	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3601#endif /* DEV_NETMAP */
3602
3603	/* Clear the old descriptor contents */
3604	EM_TX_LOCK(txr);
3605#ifdef DEV_NETMAP
3606	slot = netmap_reset(na, NR_TX, txr->me, 0);
3607#endif /* DEV_NETMAP */
3608
3609	bzero((void *)txr->tx_base,
3610	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3611	/* Reset indices */
3612	txr->next_avail_desc = 0;
3613	txr->next_to_clean = 0;
3614
3615	/* Free any existing tx buffers. */
3616        txbuf = txr->tx_buffers;
3617	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3618		if (txbuf->m_head != NULL) {
3619			bus_dmamap_sync(txr->txtag, txbuf->map,
3620			    BUS_DMASYNC_POSTWRITE);
3621			bus_dmamap_unload(txr->txtag, txbuf->map);
3622			m_freem(txbuf->m_head);
3623			txbuf->m_head = NULL;
3624		}
3625#ifdef DEV_NETMAP
3626		if (slot) {
3627			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3628			uint64_t paddr;
3629			void *addr;
3630
3631			addr = PNMB(na, slot + si, &paddr);
3632			txr->tx_base[i].buffer_addr = htole64(paddr);
3633			/* reload the map for netmap mode */
3634			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3635		}
3636#endif /* DEV_NETMAP */
3637
3638		/* clear the watch index */
3639		txbuf->next_eop = -1;
3640        }
3641
3642	/* Set number of descriptors available */
3643	txr->tx_avail = adapter->num_tx_desc;
3644	txr->busy = EM_TX_IDLE;
3645
3646	/* Clear checksum offload context. */
3647	txr->last_hw_offload = 0;
3648	txr->last_hw_ipcss = 0;
3649	txr->last_hw_ipcso = 0;
3650	txr->last_hw_tucss = 0;
3651	txr->last_hw_tucso = 0;
3652
3653	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3654	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3655	EM_TX_UNLOCK(txr);
3656}
3657
3658/*********************************************************************
3659 *
3660 *  Initialize all transmit rings.
3661 *
3662 **********************************************************************/
3663static void
3664em_setup_transmit_structures(struct adapter *adapter)
3665{
3666	struct tx_ring *txr = adapter->tx_rings;
3667
3668	for (int i = 0; i < adapter->num_queues; i++, txr++)
3669		em_setup_transmit_ring(txr);
3670
3671	return;
3672}
3673
3674/*********************************************************************
3675 *
3676 *  Enable transmit unit.
3677 *
3678 **********************************************************************/
3679static void
3680em_initialize_transmit_unit(struct adapter *adapter)
3681{
3682	struct tx_ring	*txr = adapter->tx_rings;
3683	struct e1000_hw	*hw = &adapter->hw;
3684	u32	tctl, txdctl = 0, tarc, tipg = 0;
3685
3686	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3687
3688	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3689		u64 bus_addr = txr->txdma.dma_paddr;
3690		/* Base and Len of TX Ring */
3691		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3692	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3693		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3694	    	    (u32)(bus_addr >> 32));
3695		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3696	    	    (u32)bus_addr);
3697		/* Init the HEAD/TAIL indices */
3698		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3699		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3700
3701		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3702		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3703		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3704
3705		txr->busy = EM_TX_IDLE;
3706		txdctl = 0; /* clear txdctl */
3707                txdctl |= 0x1f; /* PTHRESH */
3708                txdctl |= 1 << 8; /* HTHRESH */
3709                txdctl |= 1 << 16;/* WTHRESH */
3710		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3711		txdctl |= E1000_TXDCTL_GRAN;
3712                txdctl |= 1 << 25; /* LWTHRESH */
3713
3714                E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3715	}
3716
3717	/* Set the default values for the Tx Inter Packet Gap timer */
3718	switch (adapter->hw.mac.type) {
3719	case e1000_80003es2lan:
3720		tipg = DEFAULT_82543_TIPG_IPGR1;
3721		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3722		    E1000_TIPG_IPGR2_SHIFT;
3723		break;
3724	default:
3725		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3726		    (adapter->hw.phy.media_type ==
3727		    e1000_media_type_internal_serdes))
3728			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3729		else
3730			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3731		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3732		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3733	}
3734
3735	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3736	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3737
3738	if(adapter->hw.mac.type >= e1000_82540)
3739		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3740		    adapter->tx_abs_int_delay.value);
3741
3742	if ((adapter->hw.mac.type == e1000_82571) ||
3743	    (adapter->hw.mac.type == e1000_82572)) {
3744		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3745		tarc |= TARC_SPEED_MODE_BIT;
3746		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3747	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3748		/* errata: program both queues to unweighted RR */
3749		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3750		tarc |= 1;
3751		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3752		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3753		tarc |= 1;
3754		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3755	} else if (adapter->hw.mac.type == e1000_82574) {
3756		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3757		tarc |= TARC_ERRATA_BIT;
3758		if ( adapter->num_queues > 1) {
3759			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3760			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3761			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3762		} else
3763			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3764	}
3765
3766	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3767	if (adapter->tx_int_delay.value > 0)
3768		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3769
3770	/* Program the Transmit Control Register */
3771	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3772	tctl &= ~E1000_TCTL_CT;
3773	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3774		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3775
3776	if (adapter->hw.mac.type >= e1000_82571)
3777		tctl |= E1000_TCTL_MULR;
3778
3779	/* This write will effectively turn on the transmit unit. */
3780	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3781
3782	if (hw->mac.type == e1000_pch_spt) {
3783		u32 reg;
3784		reg = E1000_READ_REG(hw, E1000_IOSFPC);
3785		reg |= E1000_RCTL_RDMTS_HEX;
3786		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3787		reg = E1000_READ_REG(hw, E1000_TARC(0));
3788		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3789		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3790	}
3791}
3792
3793
3794/*********************************************************************
3795 *
3796 *  Free all transmit rings.
3797 *
3798 **********************************************************************/
3799static void
3800em_free_transmit_structures(struct adapter *adapter)
3801{
3802	struct tx_ring *txr = adapter->tx_rings;
3803
3804	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3805		EM_TX_LOCK(txr);
3806		em_free_transmit_buffers(txr);
3807		em_dma_free(adapter, &txr->txdma);
3808		EM_TX_UNLOCK(txr);
3809		EM_TX_LOCK_DESTROY(txr);
3810	}
3811
3812	free(adapter->tx_rings, M_DEVBUF);
3813}
3814
3815/*********************************************************************
3816 *
3817 *  Free transmit ring related data structures.
3818 *
3819 **********************************************************************/
3820static void
3821em_free_transmit_buffers(struct tx_ring *txr)
3822{
3823	struct adapter		*adapter = txr->adapter;
3824	struct em_txbuffer	*txbuf;
3825
3826	INIT_DEBUGOUT("free_transmit_ring: begin");
3827
3828	if (txr->tx_buffers == NULL)
3829		return;
3830
3831	for (int i = 0; i < adapter->num_tx_desc; i++) {
3832		txbuf = &txr->tx_buffers[i];
3833		if (txbuf->m_head != NULL) {
3834			bus_dmamap_sync(txr->txtag, txbuf->map,
3835			    BUS_DMASYNC_POSTWRITE);
3836			bus_dmamap_unload(txr->txtag,
3837			    txbuf->map);
3838			m_freem(txbuf->m_head);
3839			txbuf->m_head = NULL;
3840			if (txbuf->map != NULL) {
3841				bus_dmamap_destroy(txr->txtag,
3842				    txbuf->map);
3843				txbuf->map = NULL;
3844			}
3845		} else if (txbuf->map != NULL) {
3846			bus_dmamap_unload(txr->txtag,
3847			    txbuf->map);
3848			bus_dmamap_destroy(txr->txtag,
3849			    txbuf->map);
3850			txbuf->map = NULL;
3851		}
3852	}
3853#if __FreeBSD_version >= 800000
3854	if (txr->br != NULL)
3855		buf_ring_free(txr->br, M_DEVBUF);
3856#endif
3857	if (txr->tx_buffers != NULL) {
3858		free(txr->tx_buffers, M_DEVBUF);
3859		txr->tx_buffers = NULL;
3860	}
3861	if (txr->txtag != NULL) {
3862		bus_dma_tag_destroy(txr->txtag);
3863		txr->txtag = NULL;
3864	}
3865	return;
3866}
3867
3868
3869/*********************************************************************
3870 *  The offload context is protocol specific (TCP/UDP) and thus
3871 *  only needs to be set when the protocol changes. The occasion
3872 *  of a context change can be a performance detriment, and
3873 *  might be better just disabled. The reason arises in the way
3874 *  in which the controller supports pipelined requests from the
3875 *  Tx data DMA. Up to four requests can be pipelined, and they may
3876 *  belong to the same packet or to multiple packets. However all
3877 *  requests for one packet are issued before a request is issued
3878 *  for a subsequent packet and if a request for the next packet
3879 *  requires a context change, that request will be stalled
3880 *  until the previous request completes. This means setting up
3881 *  a new context effectively disables pipelined Tx data DMA which
3882 *  in turn greatly slow down performance to send small sized
3883 *  frames.
3884 **********************************************************************/
3885static void
3886em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3887    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3888{
3889	struct adapter			*adapter = txr->adapter;
3890	struct e1000_context_desc	*TXD = NULL;
3891	struct em_txbuffer		*tx_buffer;
3892	int				cur, hdr_len;
3893	u32				cmd = 0;
3894	u16				offload = 0;
3895	u8				ipcso, ipcss, tucso, tucss;
3896
3897	ipcss = ipcso = tucss = tucso = 0;
3898	hdr_len = ip_off + (ip->ip_hl << 2);
3899	cur = txr->next_avail_desc;
3900
3901	/* Setup of IP header checksum. */
3902	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3903		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3904		offload |= CSUM_IP;
3905		ipcss = ip_off;
3906		ipcso = ip_off + offsetof(struct ip, ip_sum);
3907		/*
3908		 * Start offset for header checksum calculation.
3909		 * End offset for header checksum calculation.
3910		 * Offset of place to put the checksum.
3911		 */
3912		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3913		TXD->lower_setup.ip_fields.ipcss = ipcss;
3914		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3915		TXD->lower_setup.ip_fields.ipcso = ipcso;
3916		cmd |= E1000_TXD_CMD_IP;
3917	}
3918
3919	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3920 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3921 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3922 		offload |= CSUM_TCP;
3923 		tucss = hdr_len;
3924 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3925		/*
3926		 * The 82574L can only remember the *last* context used
3927		 * regardless of queue that it was use for.  We cannot reuse
3928		 * contexts on this hardware platform and must generate a new
3929		 * context every time.  82574L hardware spec, section 7.2.6,
3930		 * second note.
3931		 */
3932		if (adapter->num_queues < 2) {
3933 			/*
3934 		 	* Setting up new checksum offload context for every
3935			* frames takes a lot of processing time for hardware.
3936			* This also reduces performance a lot for small sized
3937			* frames so avoid it if driver can use previously
3938			* configured checksum offload context.
3939 		 	*/
3940 			if (txr->last_hw_offload == offload) {
3941 				if (offload & CSUM_IP) {
3942 					if (txr->last_hw_ipcss == ipcss &&
3943 				    	txr->last_hw_ipcso == ipcso &&
3944 				    	txr->last_hw_tucss == tucss &&
3945 				    	txr->last_hw_tucso == tucso)
3946 						return;
3947 				} else {
3948 					if (txr->last_hw_tucss == tucss &&
3949 				    	txr->last_hw_tucso == tucso)
3950 						return;
3951 				}
3952  			}
3953 			txr->last_hw_offload = offload;
3954 			txr->last_hw_tucss = tucss;
3955 			txr->last_hw_tucso = tucso;
3956		}
3957 		/*
3958 		 * Start offset for payload checksum calculation.
3959 		 * End offset for payload checksum calculation.
3960 		 * Offset of place to put the checksum.
3961 		 */
3962		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3963 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3964 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3965 		TXD->upper_setup.tcp_fields.tucso = tucso;
3966 		cmd |= E1000_TXD_CMD_TCP;
3967 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3968 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3969 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3970 		tucss = hdr_len;
3971 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3972		/*
3973		 * The 82574L can only remember the *last* context used
3974		 * regardless of queue that it was use for.  We cannot reuse
3975		 * contexts on this hardware platform and must generate a new
3976		 * context every time.  82574L hardware spec, section 7.2.6,
3977		 * second note.
3978		 */
3979		if (adapter->num_queues < 2) {
3980 			/*
3981 		 	* Setting up new checksum offload context for every
3982			* frames takes a lot of processing time for hardware.
3983			* This also reduces performance a lot for small sized
3984			* frames so avoid it if driver can use previously
3985			* configured checksum offload context.
3986 		 	*/
3987 			if (txr->last_hw_offload == offload) {
3988 				if (offload & CSUM_IP) {
3989 					if (txr->last_hw_ipcss == ipcss &&
3990 				    	txr->last_hw_ipcso == ipcso &&
3991 				    	txr->last_hw_tucss == tucss &&
3992 				    	txr->last_hw_tucso == tucso)
3993 						return;
3994 				} else {
3995 					if (txr->last_hw_tucss == tucss &&
3996 				    	txr->last_hw_tucso == tucso)
3997 						return;
3998 				}
3999 			}
4000 			txr->last_hw_offload = offload;
4001 			txr->last_hw_tucss = tucss;
4002 			txr->last_hw_tucso = tucso;
4003		}
4004 		/*
4005 		 * Start offset for header checksum calculation.
4006 		 * End offset for header checksum calculation.
4007 		 * Offset of place to put the checksum.
4008 		 */
4009		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4010 		TXD->upper_setup.tcp_fields.tucss = tucss;
4011 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
4012 		TXD->upper_setup.tcp_fields.tucso = tucso;
4013  	}
4014
4015 	if (offload & CSUM_IP) {
4016 		txr->last_hw_ipcss = ipcss;
4017 		txr->last_hw_ipcso = ipcso;
4018  	}
4019
4020	TXD->tcp_seg_setup.data = htole32(0);
4021	TXD->cmd_and_length =
4022	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4023	tx_buffer = &txr->tx_buffers[cur];
4024	tx_buffer->m_head = NULL;
4025	tx_buffer->next_eop = -1;
4026
4027	if (++cur == adapter->num_tx_desc)
4028		cur = 0;
4029
4030	txr->tx_avail--;
4031	txr->next_avail_desc = cur;
4032}
4033
4034
4035/**********************************************************************
4036 *
4037 *  Setup work for hardware segmentation offload (TSO)
4038 *
4039 **********************************************************************/
4040static void
4041em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4042    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4043{
4044	struct adapter			*adapter = txr->adapter;
4045	struct e1000_context_desc	*TXD;
4046	struct em_txbuffer		*tx_buffer;
4047	int cur, hdr_len;
4048
4049	/*
4050	 * In theory we can use the same TSO context if and only if
4051	 * frame is the same type(IP/TCP) and the same MSS. However
4052	 * checking whether a frame has the same IP/TCP structure is
4053	 * hard thing so just ignore that and always restablish a
4054	 * new TSO context.
4055	 */
4056	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4057	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
4058		      E1000_TXD_DTYP_D |	/* Data descr type */
4059		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
4060
4061	/* IP and/or TCP header checksum calculation and insertion. */
4062	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4063
4064	cur = txr->next_avail_desc;
4065	tx_buffer = &txr->tx_buffers[cur];
4066	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4067
4068	/*
4069	 * Start offset for header checksum calculation.
4070	 * End offset for header checksum calculation.
4071	 * Offset of place put the checksum.
4072	 */
4073	TXD->lower_setup.ip_fields.ipcss = ip_off;
4074	TXD->lower_setup.ip_fields.ipcse =
4075	    htole16(ip_off + (ip->ip_hl << 2) - 1);
4076	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4077	/*
4078	 * Start offset for payload checksum calculation.
4079	 * End offset for payload checksum calculation.
4080	 * Offset of place to put the checksum.
4081	 */
4082	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4083	TXD->upper_setup.tcp_fields.tucse = 0;
4084	TXD->upper_setup.tcp_fields.tucso =
4085	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4086	/*
4087	 * Payload size per packet w/o any headers.
4088	 * Length of all headers up to payload.
4089	 */
4090	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4091	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4092
4093	TXD->cmd_and_length = htole32(adapter->txd_cmd |
4094				E1000_TXD_CMD_DEXT |	/* Extended descr */
4095				E1000_TXD_CMD_TSE |	/* TSE context */
4096				E1000_TXD_CMD_IP |	/* Do IP csum */
4097				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
4098				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
4099
4100	tx_buffer->m_head = NULL;
4101	tx_buffer->next_eop = -1;
4102
4103	if (++cur == adapter->num_tx_desc)
4104		cur = 0;
4105
4106	txr->tx_avail--;
4107	txr->next_avail_desc = cur;
4108	txr->tx_tso = TRUE;
4109}
4110
4111
4112/**********************************************************************
4113 *
4114 *  Examine each tx_buffer in the used queue. If the hardware is done
4115 *  processing the packet then free associated resources. The
4116 *  tx_buffer is put back on the free queue.
4117 *
4118 **********************************************************************/
4119static void
4120em_txeof(struct tx_ring *txr)
4121{
4122	struct adapter	*adapter = txr->adapter;
4123        int first, last, done, processed;
4124        struct em_txbuffer *tx_buffer;
4125        struct e1000_tx_desc   *tx_desc, *eop_desc;
4126	if_t ifp = adapter->ifp;
4127
4128	EM_TX_LOCK_ASSERT(txr);
4129#ifdef DEV_NETMAP
4130	if (netmap_tx_irq(ifp, txr->me))
4131		return;
4132#endif /* DEV_NETMAP */
4133
4134	/* No work, make sure hang detection is disabled */
4135        if (txr->tx_avail == adapter->num_tx_desc) {
4136		txr->busy = EM_TX_IDLE;
4137                return;
4138	}
4139
4140	processed = 0;
4141        first = txr->next_to_clean;
4142        tx_desc = &txr->tx_base[first];
4143        tx_buffer = &txr->tx_buffers[first];
4144	last = tx_buffer->next_eop;
4145        eop_desc = &txr->tx_base[last];
4146
4147	/*
4148	 * What this does is get the index of the
4149	 * first descriptor AFTER the EOP of the
4150	 * first packet, that way we can do the
4151	 * simple comparison on the inner while loop.
4152	 */
4153	if (++last == adapter->num_tx_desc)
4154 		last = 0;
4155	done = last;
4156
4157        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4158            BUS_DMASYNC_POSTREAD);
4159
4160        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4161		/* We clean the range of the packet */
4162		while (first != done) {
4163                	tx_desc->upper.data = 0;
4164                	tx_desc->lower.data = 0;
4165                	tx_desc->buffer_addr = 0;
4166                	++txr->tx_avail;
4167			++processed;
4168
4169			if (tx_buffer->m_head) {
4170				bus_dmamap_sync(txr->txtag,
4171				    tx_buffer->map,
4172				    BUS_DMASYNC_POSTWRITE);
4173				bus_dmamap_unload(txr->txtag,
4174				    tx_buffer->map);
4175                        	m_freem(tx_buffer->m_head);
4176                        	tx_buffer->m_head = NULL;
4177                	}
4178			tx_buffer->next_eop = -1;
4179
4180	                if (++first == adapter->num_tx_desc)
4181				first = 0;
4182
4183	                tx_buffer = &txr->tx_buffers[first];
4184			tx_desc = &txr->tx_base[first];
4185		}
4186		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
4187		/* See if we can continue to the next packet */
4188		last = tx_buffer->next_eop;
4189		if (last != -1) {
4190        		eop_desc = &txr->tx_base[last];
4191			/* Get new done point */
4192			if (++last == adapter->num_tx_desc) last = 0;
4193			done = last;
4194		} else
4195			break;
4196        }
4197        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4198            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4199
4200        txr->next_to_clean = first;
4201
4202	/*
4203	** Hang detection: we know there's work outstanding
4204	** or the entry return would have been taken, so no
4205	** descriptor processed here indicates a potential hang.
4206	** The local timer will examine this and do a reset if needed.
4207	*/
4208	if (processed == 0) {
4209		if (txr->busy != EM_TX_HUNG)
4210			++txr->busy;
4211	} else /* At least one descriptor was cleaned */
4212		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4213
4214        /*
4215         * If we have a minimum free, clear IFF_DRV_OACTIVE
4216         * to tell the stack that it is OK to send packets.
4217	 * Notice that all writes of OACTIVE happen under the
4218	 * TX lock which, with a single queue, guarantees
4219	 * sanity.
4220         */
4221        if (txr->tx_avail >= EM_MAX_SCATTER) {
4222		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
4223	}
4224
4225	/* Disable hang detection if all clean */
4226	if (txr->tx_avail == adapter->num_tx_desc)
4227		txr->busy = EM_TX_IDLE;
4228}
4229
4230/*********************************************************************
4231 *
4232 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
4233 *
4234 **********************************************************************/
4235static void
4236em_refresh_mbufs(struct rx_ring *rxr, int limit)
4237{
4238	struct adapter		*adapter = rxr->adapter;
4239	struct mbuf		*m;
4240	bus_dma_segment_t	segs;
4241	struct em_rxbuffer	*rxbuf;
4242	int			i, j, error, nsegs;
4243	bool			cleaned = FALSE;
4244
4245	i = j = rxr->next_to_refresh;
4246	/*
4247	** Get one descriptor beyond
4248	** our work mark to control
4249	** the loop.
4250	*/
4251	if (++j == adapter->num_rx_desc)
4252		j = 0;
4253
4254	while (j != limit) {
4255		rxbuf = &rxr->rx_buffers[i];
4256		if (rxbuf->m_head == NULL) {
4257			m = m_getjcl(M_NOWAIT, MT_DATA,
4258			    M_PKTHDR, adapter->rx_mbuf_sz);
4259			/*
4260			** If we have a temporary resource shortage
4261			** that causes a failure, just abort refresh
4262			** for now, we will return to this point when
4263			** reinvoked from em_rxeof.
4264			*/
4265			if (m == NULL)
4266				goto update;
4267		} else
4268			m = rxbuf->m_head;
4269
4270		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4271		m->m_flags |= M_PKTHDR;
4272		m->m_data = m->m_ext.ext_buf;
4273
4274		/* Use bus_dma machinery to setup the memory mapping  */
4275		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4276		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
4277		if (error != 0) {
4278			printf("Refresh mbufs: hdr dmamap load"
4279			    " failure - %d\n", error);
4280			m_free(m);
4281			rxbuf->m_head = NULL;
4282			goto update;
4283		}
4284		rxbuf->m_head = m;
4285		rxbuf->paddr = segs.ds_addr;
4286		bus_dmamap_sync(rxr->rxtag,
4287		    rxbuf->map, BUS_DMASYNC_PREREAD);
4288		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4289		cleaned = TRUE;
4290
4291		i = j; /* Next is precalulated for us */
4292		rxr->next_to_refresh = i;
4293		/* Calculate next controlling index */
4294		if (++j == adapter->num_rx_desc)
4295			j = 0;
4296	}
4297update:
4298	/*
4299	** Update the tail pointer only if,
4300	** and as far as we have refreshed.
4301	*/
4302	if (cleaned)
4303		E1000_WRITE_REG(&adapter->hw,
4304		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4305
4306	return;
4307}
4308
4309
4310/*********************************************************************
4311 *
4312 *  Allocate memory for rx_buffer structures. Since we use one
4313 *  rx_buffer per received packet, the maximum number of rx_buffer's
4314 *  that we'll need is equal to the number of receive descriptors
4315 *  that we've allocated.
4316 *
4317 **********************************************************************/
4318static int
4319em_allocate_receive_buffers(struct rx_ring *rxr)
4320{
4321	struct adapter		*adapter = rxr->adapter;
4322	device_t		dev = adapter->dev;
4323	struct em_rxbuffer	*rxbuf;
4324	int			error;
4325
4326	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4327	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4328	if (rxr->rx_buffers == NULL) {
4329		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4330		return (ENOMEM);
4331	}
4332
4333	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4334				1, 0,			/* alignment, bounds */
4335				BUS_SPACE_MAXADDR,	/* lowaddr */
4336				BUS_SPACE_MAXADDR,	/* highaddr */
4337				NULL, NULL,		/* filter, filterarg */
4338				MJUM9BYTES,		/* maxsize */
4339				1,			/* nsegments */
4340				MJUM9BYTES,		/* maxsegsize */
4341				0,			/* flags */
4342				NULL,			/* lockfunc */
4343				NULL,			/* lockarg */
4344				&rxr->rxtag);
4345	if (error) {
4346		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4347		    __func__, error);
4348		goto fail;
4349	}
4350
4351	rxbuf = rxr->rx_buffers;
4352	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4353		rxbuf = &rxr->rx_buffers[i];
4354		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4355		if (error) {
4356			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4357			    __func__, error);
4358			goto fail;
4359		}
4360	}
4361
4362	return (0);
4363
4364fail:
4365	em_free_receive_structures(adapter);
4366	return (error);
4367}
4368
4369
4370/*********************************************************************
4371 *
4372 *  Initialize a receive ring and its buffers.
4373 *
4374 **********************************************************************/
4375static int
4376em_setup_receive_ring(struct rx_ring *rxr)
4377{
4378	struct	adapter 	*adapter = rxr->adapter;
4379	struct em_rxbuffer	*rxbuf;
4380	bus_dma_segment_t	seg[1];
4381	int			rsize, nsegs, error = 0;
4382#ifdef DEV_NETMAP
4383	struct netmap_slot *slot;
4384	struct netmap_adapter *na = netmap_getna(adapter->ifp);
4385#endif
4386
4387
4388	/* Clear the ring contents */
4389	EM_RX_LOCK(rxr);
4390	rsize = roundup2(adapter->num_rx_desc *
4391	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4392	bzero((void *)rxr->rx_base, rsize);
4393#ifdef DEV_NETMAP
4394	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4395#endif
4396
4397	/*
4398	** Free current RX buffer structs and their mbufs
4399	*/
4400	for (int i = 0; i < adapter->num_rx_desc; i++) {
4401		rxbuf = &rxr->rx_buffers[i];
4402		if (rxbuf->m_head != NULL) {
4403			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4404			    BUS_DMASYNC_POSTREAD);
4405			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4406			m_freem(rxbuf->m_head);
4407			rxbuf->m_head = NULL; /* mark as freed */
4408		}
4409	}
4410
4411	/* Now replenish the mbufs */
4412        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4413		rxbuf = &rxr->rx_buffers[j];
4414#ifdef DEV_NETMAP
4415		if (slot) {
4416			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4417			uint64_t paddr;
4418			void *addr;
4419
4420			addr = PNMB(na, slot + si, &paddr);
4421			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4422			rxbuf->paddr = paddr;
4423			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4424			continue;
4425		}
4426#endif /* DEV_NETMAP */
4427		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4428		    M_PKTHDR, adapter->rx_mbuf_sz);
4429		if (rxbuf->m_head == NULL) {
4430			error = ENOBUFS;
4431			goto fail;
4432		}
4433		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4434		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4435		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4436
4437		/* Get the memory mapping */
4438		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4439		    rxbuf->map, rxbuf->m_head, seg,
4440		    &nsegs, BUS_DMA_NOWAIT);
4441		if (error != 0) {
4442			m_freem(rxbuf->m_head);
4443			rxbuf->m_head = NULL;
4444			goto fail;
4445		}
4446		bus_dmamap_sync(rxr->rxtag,
4447		    rxbuf->map, BUS_DMASYNC_PREREAD);
4448
4449		rxbuf->paddr = seg[0].ds_addr;
4450		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4451	}
4452	rxr->next_to_check = 0;
4453	rxr->next_to_refresh = 0;
4454	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4455	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4456
4457fail:
4458	EM_RX_UNLOCK(rxr);
4459	return (error);
4460}
4461
4462/*********************************************************************
4463 *
4464 *  Initialize all receive rings.
4465 *
4466 **********************************************************************/
4467static int
4468em_setup_receive_structures(struct adapter *adapter)
4469{
4470	struct rx_ring *rxr = adapter->rx_rings;
4471	int q;
4472
4473	for (q = 0; q < adapter->num_queues; q++, rxr++)
4474		if (em_setup_receive_ring(rxr))
4475			goto fail;
4476
4477	return (0);
4478fail:
4479	/*
4480	 * Free RX buffers allocated so far, we will only handle
4481	 * the rings that completed, the failing case will have
4482	 * cleaned up for itself. 'q' failed, so its the terminus.
4483	 */
4484	for (int i = 0; i < q; ++i) {
4485		rxr = &adapter->rx_rings[i];
4486		for (int n = 0; n < adapter->num_rx_desc; n++) {
4487			struct em_rxbuffer *rxbuf;
4488			rxbuf = &rxr->rx_buffers[n];
4489			if (rxbuf->m_head != NULL) {
4490				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4491			  	  BUS_DMASYNC_POSTREAD);
4492				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4493				m_freem(rxbuf->m_head);
4494				rxbuf->m_head = NULL;
4495			}
4496		}
4497		rxr->next_to_check = 0;
4498		rxr->next_to_refresh = 0;
4499	}
4500
4501	return (ENOBUFS);
4502}
4503
4504/*********************************************************************
4505 *
4506 *  Free all receive rings.
4507 *
4508 **********************************************************************/
4509static void
4510em_free_receive_structures(struct adapter *adapter)
4511{
4512	struct rx_ring *rxr = adapter->rx_rings;
4513
4514	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4515		em_free_receive_buffers(rxr);
4516		/* Free the ring memory as well */
4517		em_dma_free(adapter, &rxr->rxdma);
4518		EM_RX_LOCK_DESTROY(rxr);
4519	}
4520
4521	free(adapter->rx_rings, M_DEVBUF);
4522}
4523
4524
4525/*********************************************************************
4526 *
4527 *  Free receive ring data structures
4528 *
4529 **********************************************************************/
4530static void
4531em_free_receive_buffers(struct rx_ring *rxr)
4532{
4533	struct adapter		*adapter = rxr->adapter;
4534	struct em_rxbuffer	*rxbuf = NULL;
4535
4536	INIT_DEBUGOUT("free_receive_buffers: begin");
4537
4538	if (rxr->rx_buffers != NULL) {
4539		for (int i = 0; i < adapter->num_rx_desc; i++) {
4540			rxbuf = &rxr->rx_buffers[i];
4541			if (rxbuf->map != NULL) {
4542				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4543				    BUS_DMASYNC_POSTREAD);
4544				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4545				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4546			}
4547			if (rxbuf->m_head != NULL) {
4548				m_freem(rxbuf->m_head);
4549				rxbuf->m_head = NULL;
4550			}
4551		}
4552		free(rxr->rx_buffers, M_DEVBUF);
4553		rxr->rx_buffers = NULL;
4554		rxr->next_to_check = 0;
4555		rxr->next_to_refresh = 0;
4556	}
4557
4558	if (rxr->rxtag != NULL) {
4559		bus_dma_tag_destroy(rxr->rxtag);
4560		rxr->rxtag = NULL;
4561	}
4562
4563	return;
4564}
4565
4566
4567/*********************************************************************
4568 *
4569 *  Enable receive unit.
4570 *
4571 **********************************************************************/
4572
4573static void
4574em_initialize_receive_unit(struct adapter *adapter)
4575{
4576	struct rx_ring *rxr = adapter->rx_rings;
4577	if_t ifp = adapter->ifp;
4578	struct e1000_hw	*hw = &adapter->hw;
4579	u32	rctl, rxcsum, rfctl;
4580
4581	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4582
4583	/*
4584	 * Make sure receives are disabled while setting
4585	 * up the descriptor ring
4586	 */
4587	rctl = E1000_READ_REG(hw, E1000_RCTL);
4588	/* Do not disable if ever enabled on this hardware */
4589	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4590		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4591
4592	/* Setup the Receive Control Register */
4593	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4594	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4595	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4596	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4597
4598	/* Do not store bad packets */
4599	rctl &= ~E1000_RCTL_SBP;
4600
4601	/* Enable Long Packet receive */
4602	if (if_getmtu(ifp) > ETHERMTU)
4603		rctl |= E1000_RCTL_LPE;
4604	else
4605		rctl &= ~E1000_RCTL_LPE;
4606
4607        /* Strip the CRC */
4608        if (!em_disable_crc_stripping)
4609		rctl |= E1000_RCTL_SECRC;
4610
4611	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4612	    adapter->rx_abs_int_delay.value);
4613
4614	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4615	    adapter->rx_int_delay.value);
4616	/*
4617	 * Set the interrupt throttling rate. Value is calculated
4618	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4619	 */
4620	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4621
4622	/* Use extended rx descriptor formats */
4623	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4624	rfctl |= E1000_RFCTL_EXTEN;
4625	/*
4626	** When using MSIX interrupts we need to throttle
4627	** using the EITR register (82574 only)
4628	*/
4629	if (hw->mac.type == e1000_82574) {
4630		for (int i = 0; i < 4; i++)
4631			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4632			    DEFAULT_ITR);
4633		/* Disable accelerated acknowledge */
4634		rfctl |= E1000_RFCTL_ACK_DIS;
4635	}
4636	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4637
4638	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4639	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
4640#ifdef EM_MULTIQUEUE
4641		rxcsum |= E1000_RXCSUM_TUOFL |
4642			  E1000_RXCSUM_IPOFL |
4643			  E1000_RXCSUM_PCSD;
4644#else
4645		rxcsum |= E1000_RXCSUM_TUOFL;
4646#endif
4647	} else
4648		rxcsum &= ~E1000_RXCSUM_TUOFL;
4649
4650	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4651
4652#ifdef EM_MULTIQUEUE
4653#define RSSKEYLEN 10
4654	if (adapter->num_queues > 1) {
4655		uint8_t  rss_key[4 * RSSKEYLEN];
4656		uint32_t reta = 0;
4657		int i;
4658
4659		/*
4660		* Configure RSS key
4661		*/
4662		arc4rand(rss_key, sizeof(rss_key), 0);
4663		for (i = 0; i < RSSKEYLEN; ++i) {
4664			uint32_t rssrk = 0;
4665
4666			rssrk = EM_RSSRK_VAL(rss_key, i);
4667			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4668		}
4669
4670		/*
4671		* Configure RSS redirect table in following fashion:
4672		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4673		*/
4674		for (i = 0; i < sizeof(reta); ++i) {
4675			uint32_t q;
4676
4677			q = (i % adapter->num_queues) << 7;
4678			reta |= q << (8 * i);
4679		}
4680
4681		for (i = 0; i < 32; ++i) {
4682			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4683		}
4684
4685		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4686				E1000_MRQC_RSS_FIELD_IPV4_TCP |
4687				E1000_MRQC_RSS_FIELD_IPV4 |
4688				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4689				E1000_MRQC_RSS_FIELD_IPV6_EX |
4690				E1000_MRQC_RSS_FIELD_IPV6);
4691	}
4692#endif
4693	/*
4694	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4695	** long latencies are observed, like Lenovo X60. This
4696	** change eliminates the problem, but since having positive
4697	** values in RDTR is a known source of problems on other
4698	** platforms another solution is being sought.
4699	*/
4700	if (hw->mac.type == e1000_82573)
4701		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4702
4703	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4704		/* Setup the Base and Length of the Rx Descriptor Ring */
4705		u64 bus_addr = rxr->rxdma.dma_paddr;
4706		u32 rdt = adapter->num_rx_desc - 1; /* default */
4707
4708		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4709		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4710		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4711		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4712		/* Setup the Head and Tail Descriptor Pointers */
4713		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4714#ifdef DEV_NETMAP
4715		/*
4716		 * an init() while a netmap client is active must
4717		 * preserve the rx buffers passed to userspace.
4718		 */
4719		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
4720			struct netmap_adapter *na = netmap_getna(adapter->ifp);
4721			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
4722		}
4723#endif /* DEV_NETMAP */
4724		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4725	}
4726
4727	/*
4728	 * Set PTHRESH for improved jumbo performance
4729	 * According to 10.2.5.11 of Intel 82574 Datasheet,
4730	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4731	 * Only write to RXDCTL(1) if there is a need for different
4732	 * settings.
4733	 */
4734	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4735	    (adapter->hw.mac.type == e1000_pch2lan) ||
4736	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4737	    (if_getmtu(ifp) > ETHERMTU)) {
4738		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4739		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4740	} else if (adapter->hw.mac.type == e1000_82574) {
4741		for (int i = 0; i < adapter->num_queues; i++) {
4742			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4743
4744			rxdctl |= 0x20; /* PTHRESH */
4745			rxdctl |= 4 << 8; /* HTHRESH */
4746			rxdctl |= 4 << 16;/* WTHRESH */
4747			rxdctl |= 1 << 24; /* Switch to granularity */
4748			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4749		}
4750	}
4751
4752	if (adapter->hw.mac.type >= e1000_pch2lan) {
4753		if (if_getmtu(ifp) > ETHERMTU)
4754			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4755		else
4756			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4757	}
4758
4759        /* Make sure VLAN Filters are off */
4760        rctl &= ~E1000_RCTL_VFE;
4761
4762	if (adapter->rx_mbuf_sz == MCLBYTES)
4763		rctl |= E1000_RCTL_SZ_2048;
4764	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4765		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4766	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4767		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4768
4769	/* ensure we clear use DTYPE of 00 here */
4770	rctl &= ~0x00000C00;
4771	/* Write out the settings */
4772	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4773
4774	return;
4775}
4776
4777
4778/*********************************************************************
4779 *
4780 *  This routine executes in interrupt context. It replenishes
4781 *  the mbufs in the descriptor and sends data which has been
4782 *  dma'ed into host memory to upper layer.
4783 *
4784 *  We loop at most count times if count is > 0, or until done if
4785 *  count < 0.
4786 *
4787 *  For polling we also now return the number of cleaned packets
4788 *********************************************************************/
4789static bool
4790em_rxeof(struct rx_ring *rxr, int count, int *done)
4791{
4792	struct adapter		*adapter = rxr->adapter;
4793	if_t ifp = adapter->ifp;
4794	struct mbuf		*mp, *sendmp;
4795	u32			status = 0;
4796	u16 			len;
4797	int			i, processed, rxdone = 0;
4798	bool			eop;
4799	union e1000_rx_desc_extended	*cur;
4800
4801	EM_RX_LOCK(rxr);
4802
4803	/* Sync the ring */
4804	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4805	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4806
4807
4808#ifdef DEV_NETMAP
4809	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4810		EM_RX_UNLOCK(rxr);
4811		return (FALSE);
4812	}
4813#endif /* DEV_NETMAP */
4814
4815	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4816		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
4817			break;
4818
4819		cur = &rxr->rx_base[i];
4820		status = le32toh(cur->wb.upper.status_error);
4821		mp = sendmp = NULL;
4822
4823		if ((status & E1000_RXD_STAT_DD) == 0)
4824			break;
4825
4826		len = le16toh(cur->wb.upper.length);
4827		eop = (status & E1000_RXD_STAT_EOP) != 0;
4828
4829		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4830		    (rxr->discard == TRUE)) {
4831			adapter->dropped_pkts++;
4832			++rxr->rx_discarded;
4833			if (!eop) /* Catch subsequent segs */
4834				rxr->discard = TRUE;
4835			else
4836				rxr->discard = FALSE;
4837			em_rx_discard(rxr, i);
4838			goto next_desc;
4839		}
4840		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4841
4842		/* Assign correct length to the current fragment */
4843		mp = rxr->rx_buffers[i].m_head;
4844		mp->m_len = len;
4845
4846		/* Trigger for refresh */
4847		rxr->rx_buffers[i].m_head = NULL;
4848
4849		/* First segment? */
4850		if (rxr->fmp == NULL) {
4851			mp->m_pkthdr.len = len;
4852			rxr->fmp = rxr->lmp = mp;
4853		} else {
4854			/* Chain mbuf's together */
4855			mp->m_flags &= ~M_PKTHDR;
4856			rxr->lmp->m_next = mp;
4857			rxr->lmp = mp;
4858			rxr->fmp->m_pkthdr.len += len;
4859		}
4860
4861		if (eop) {
4862			--count;
4863			sendmp = rxr->fmp;
4864			if_setrcvif(sendmp, ifp);
4865			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
4866			em_receive_checksum(status, sendmp);
4867#ifndef __NO_STRICT_ALIGNMENT
4868			if (adapter->hw.mac.max_frame_size >
4869			    (MCLBYTES - ETHER_ALIGN) &&
4870			    em_fixup_rx(rxr) != 0)
4871				goto skip;
4872#endif
4873			if (status & E1000_RXD_STAT_VP) {
4874				if_setvtag(sendmp,
4875				    le16toh(cur->wb.upper.vlan));
4876				sendmp->m_flags |= M_VLANTAG;
4877			}
4878#ifndef __NO_STRICT_ALIGNMENT
4879skip:
4880#endif
4881			rxr->fmp = rxr->lmp = NULL;
4882		}
4883next_desc:
4884		/* Sync the ring */
4885		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4886	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4887
4888		/* Zero out the receive descriptors status. */
4889		cur->wb.upper.status_error &= htole32(~0xFF);
4890		++rxdone;	/* cumulative for POLL */
4891		++processed;
4892
4893		/* Advance our pointers to the next descriptor. */
4894		if (++i == adapter->num_rx_desc)
4895			i = 0;
4896
4897		/* Send to the stack */
4898		if (sendmp != NULL) {
4899			rxr->next_to_check = i;
4900			EM_RX_UNLOCK(rxr);
4901			if_input(ifp, sendmp);
4902			EM_RX_LOCK(rxr);
4903			i = rxr->next_to_check;
4904		}
4905
4906		/* Only refresh mbufs every 8 descriptors */
4907		if (processed == 8) {
4908			em_refresh_mbufs(rxr, i);
4909			processed = 0;
4910		}
4911	}
4912
4913	/* Catch any remaining refresh work */
4914	if (e1000_rx_unrefreshed(rxr))
4915		em_refresh_mbufs(rxr, i);
4916
4917	rxr->next_to_check = i;
4918	if (done != NULL)
4919		*done = rxdone;
4920	EM_RX_UNLOCK(rxr);
4921
4922	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4923}
4924
4925static __inline void
4926em_rx_discard(struct rx_ring *rxr, int i)
4927{
4928	struct em_rxbuffer	*rbuf;
4929
4930	rbuf = &rxr->rx_buffers[i];
4931	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4932
4933	/* Free any previous pieces */
4934	if (rxr->fmp != NULL) {
4935		rxr->fmp->m_flags |= M_PKTHDR;
4936		m_freem(rxr->fmp);
4937		rxr->fmp = NULL;
4938		rxr->lmp = NULL;
4939	}
4940	/*
4941	** Free buffer and allow em_refresh_mbufs()
4942	** to clean up and recharge buffer.
4943	*/
4944	if (rbuf->m_head) {
4945		m_free(rbuf->m_head);
4946		rbuf->m_head = NULL;
4947	}
4948	return;
4949}
4950
4951#ifndef __NO_STRICT_ALIGNMENT
4952/*
4953 * When jumbo frames are enabled we should realign entire payload on
4954 * architecures with strict alignment. This is serious design mistake of 8254x
4955 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4956 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4957 * payload. On architecures without strict alignment restrictions 8254x still
4958 * performs unaligned memory access which would reduce the performance too.
4959 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4960 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4961 * existing mbuf chain.
4962 *
4963 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4964 * not used at all on architectures with strict alignment.
4965 */
4966static int
4967em_fixup_rx(struct rx_ring *rxr)
4968{
4969	struct adapter *adapter = rxr->adapter;
4970	struct mbuf *m, *n;
4971	int error;
4972
4973	error = 0;
4974	m = rxr->fmp;
4975	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4976		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4977		m->m_data += ETHER_HDR_LEN;
4978	} else {
4979		MGETHDR(n, M_NOWAIT, MT_DATA);
4980		if (n != NULL) {
4981			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4982			m->m_data += ETHER_HDR_LEN;
4983			m->m_len -= ETHER_HDR_LEN;
4984			n->m_len = ETHER_HDR_LEN;
4985			M_MOVE_PKTHDR(n, m);
4986			n->m_next = m;
4987			rxr->fmp = n;
4988		} else {
4989			adapter->dropped_pkts++;
4990			m_freem(rxr->fmp);
4991			rxr->fmp = NULL;
4992			error = ENOMEM;
4993		}
4994	}
4995
4996	return (error);
4997}
4998#endif
4999
5000static void
5001em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5002{
5003	rxd->read.buffer_addr = htole64(rxbuf->paddr);
5004	/* DD bits must be cleared */
5005	rxd->wb.upper.status_error= 0;
5006}
5007
5008/*********************************************************************
5009 *
5010 *  Verify that the hardware indicated that the checksum is valid.
5011 *  Inform the stack about the status of checksum so that stack
5012 *  doesn't spend time verifying the checksum.
5013 *
5014 *********************************************************************/
5015static void
5016em_receive_checksum(uint32_t status, struct mbuf *mp)
5017{
5018	mp->m_pkthdr.csum_flags = 0;
5019
5020	/* Ignore Checksum bit is set */
5021	if (status & E1000_RXD_STAT_IXSM)
5022		return;
5023
5024	/* If the IP checksum exists and there is no IP Checksum error */
5025	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5026		E1000_RXD_STAT_IPCS) {
5027		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5028	}
5029
5030	/* TCP or UDP checksum */
5031	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5032	    E1000_RXD_STAT_TCPCS) {
5033		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5034		mp->m_pkthdr.csum_data = htons(0xffff);
5035	}
5036	if (status & E1000_RXD_STAT_UDPCS) {
5037		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5038		mp->m_pkthdr.csum_data = htons(0xffff);
5039	}
5040}
5041
5042/*
5043 * This routine is run via an vlan
5044 * config EVENT
5045 */
5046static void
5047em_register_vlan(void *arg, if_t ifp, u16 vtag)
5048{
5049	struct adapter	*adapter = if_getsoftc(ifp);
5050	u32		index, bit;
5051
5052	if ((void*)adapter !=  arg)   /* Not our event */
5053		return;
5054
5055	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
5056                return;
5057
5058	EM_CORE_LOCK(adapter);
5059	index = (vtag >> 5) & 0x7F;
5060	bit = vtag & 0x1F;
5061	adapter->shadow_vfta[index] |= (1 << bit);
5062	++adapter->num_vlans;
5063	/* Re-init to load the changes */
5064	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5065		em_init_locked(adapter);
5066	EM_CORE_UNLOCK(adapter);
5067}
5068
5069/*
5070 * This routine is run via an vlan
5071 * unconfig EVENT
5072 */
5073static void
5074em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
5075{
5076	struct adapter	*adapter = if_getsoftc(ifp);
5077	u32		index, bit;
5078
5079	if (adapter != arg)
5080		return;
5081
5082	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5083                return;
5084
5085	EM_CORE_LOCK(adapter);
5086	index = (vtag >> 5) & 0x7F;
5087	bit = vtag & 0x1F;
5088	adapter->shadow_vfta[index] &= ~(1 << bit);
5089	--adapter->num_vlans;
5090	/* Re-init to load the changes */
5091	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
5092		em_init_locked(adapter);
5093	EM_CORE_UNLOCK(adapter);
5094}
5095
5096static void
5097em_setup_vlan_hw_support(struct adapter *adapter)
5098{
5099	struct e1000_hw *hw = &adapter->hw;
5100	u32             reg;
5101
5102	/*
5103	** We get here thru init_locked, meaning
5104	** a soft reset, this has already cleared
5105	** the VFTA and other state, so if there
5106	** have been no vlan's registered do nothing.
5107	*/
5108	if (adapter->num_vlans == 0)
5109                return;
5110
5111	/*
5112	** A soft reset zero's out the VFTA, so
5113	** we need to repopulate it now.
5114	*/
5115	for (int i = 0; i < EM_VFTA_SIZE; i++)
5116                if (adapter->shadow_vfta[i] != 0)
5117			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5118                            i, adapter->shadow_vfta[i]);
5119
5120	reg = E1000_READ_REG(hw, E1000_CTRL);
5121	reg |= E1000_CTRL_VME;
5122	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5123
5124	/* Enable the Filter Table */
5125	reg = E1000_READ_REG(hw, E1000_RCTL);
5126	reg &= ~E1000_RCTL_CFIEN;
5127	reg |= E1000_RCTL_VFE;
5128	E1000_WRITE_REG(hw, E1000_RCTL, reg);
5129}
5130
5131static void
5132em_enable_intr(struct adapter *adapter)
5133{
5134	struct e1000_hw *hw = &adapter->hw;
5135	u32 ims_mask = IMS_ENABLE_MASK;
5136
5137	if (hw->mac.type == e1000_82574) {
5138		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5139		ims_mask |= EM_MSIX_MASK;
5140	}
5141	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5142}
5143
5144static void
5145em_disable_intr(struct adapter *adapter)
5146{
5147	struct e1000_hw *hw = &adapter->hw;
5148
5149	if (hw->mac.type == e1000_82574)
5150		E1000_WRITE_REG(hw, EM_EIAC, 0);
5151	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5152}
5153
5154/*
5155 * Bit of a misnomer, what this really means is
5156 * to enable OS management of the system... aka
5157 * to disable special hardware management features
5158 */
5159static void
5160em_init_manageability(struct adapter *adapter)
5161{
5162	/* A shared code workaround */
5163#define E1000_82542_MANC2H E1000_MANC2H
5164	if (adapter->has_manage) {
5165		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5166		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5167
5168		/* disable hardware interception of ARP */
5169		manc &= ~(E1000_MANC_ARP_EN);
5170
5171                /* enable receiving management packets to the host */
5172		manc |= E1000_MANC_EN_MNG2HOST;
5173#define E1000_MNG2HOST_PORT_623 (1 << 5)
5174#define E1000_MNG2HOST_PORT_664 (1 << 6)
5175		manc2h |= E1000_MNG2HOST_PORT_623;
5176		manc2h |= E1000_MNG2HOST_PORT_664;
5177		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5178		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5179	}
5180}
5181
5182/*
5183 * Give control back to hardware management
5184 * controller if there is one.
5185 */
5186static void
5187em_release_manageability(struct adapter *adapter)
5188{
5189	if (adapter->has_manage) {
5190		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5191
5192		/* re-enable hardware interception of ARP */
5193		manc |= E1000_MANC_ARP_EN;
5194		manc &= ~E1000_MANC_EN_MNG2HOST;
5195
5196		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5197	}
5198}
5199
5200/*
5201 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5202 * For ASF and Pass Through versions of f/w this means
5203 * that the driver is loaded. For AMT version type f/w
5204 * this means that the network i/f is open.
5205 */
5206static void
5207em_get_hw_control(struct adapter *adapter)
5208{
5209	u32 ctrl_ext, swsm;
5210
5211	if (adapter->hw.mac.type == e1000_82573) {
5212		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5213		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5214		    swsm | E1000_SWSM_DRV_LOAD);
5215		return;
5216	}
5217	/* else */
5218	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5219	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5220	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5221	return;
5222}
5223
5224/*
5225 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5226 * For ASF and Pass Through versions of f/w this means that
5227 * the driver is no longer loaded. For AMT versions of the
5228 * f/w this means that the network i/f is closed.
5229 */
5230static void
5231em_release_hw_control(struct adapter *adapter)
5232{
5233	u32 ctrl_ext, swsm;
5234
5235	if (!adapter->has_manage)
5236		return;
5237
5238	if (adapter->hw.mac.type == e1000_82573) {
5239		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5240		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5241		    swsm & ~E1000_SWSM_DRV_LOAD);
5242		return;
5243	}
5244	/* else */
5245	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5246	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5247	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5248	return;
5249}
5250
5251static int
5252em_is_valid_ether_addr(u8 *addr)
5253{
5254	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5255
5256	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5257		return (FALSE);
5258	}
5259
5260	return (TRUE);
5261}
5262
5263/*
5264** Parse the interface capabilities with regard
5265** to both system management and wake-on-lan for
5266** later use.
5267*/
5268static void
5269em_get_wakeup(device_t dev)
5270{
5271	struct adapter	*adapter = device_get_softc(dev);
5272	u16		eeprom_data = 0, device_id, apme_mask;
5273
5274	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5275	apme_mask = EM_EEPROM_APME;
5276
5277	switch (adapter->hw.mac.type) {
5278	case e1000_82573:
5279	case e1000_82583:
5280		adapter->has_amt = TRUE;
5281		/* Falls thru */
5282	case e1000_82571:
5283	case e1000_82572:
5284	case e1000_80003es2lan:
5285		if (adapter->hw.bus.func == 1) {
5286			e1000_read_nvm(&adapter->hw,
5287			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5288			break;
5289		} else
5290			e1000_read_nvm(&adapter->hw,
5291			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5292		break;
5293	case e1000_ich8lan:
5294	case e1000_ich9lan:
5295	case e1000_ich10lan:
5296	case e1000_pchlan:
5297	case e1000_pch2lan:
5298	case e1000_pch_lpt:
5299	case e1000_pch_spt:
5300		apme_mask = E1000_WUC_APME;
5301		adapter->has_amt = TRUE;
5302		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5303		break;
5304	default:
5305		e1000_read_nvm(&adapter->hw,
5306		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5307		break;
5308	}
5309	if (eeprom_data & apme_mask)
5310		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5311	/*
5312         * We have the eeprom settings, now apply the special cases
5313         * where the eeprom may be wrong or the board won't support
5314         * wake on lan on a particular port
5315	 */
5316	device_id = pci_get_device(dev);
5317        switch (device_id) {
5318	case E1000_DEV_ID_82571EB_FIBER:
5319		/* Wake events only supported on port A for dual fiber
5320		 * regardless of eeprom setting */
5321		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5322		    E1000_STATUS_FUNC_1)
5323			adapter->wol = 0;
5324		break;
5325	case E1000_DEV_ID_82571EB_QUAD_COPPER:
5326	case E1000_DEV_ID_82571EB_QUAD_FIBER:
5327	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5328                /* if quad port adapter, disable WoL on all but port A */
5329		if (global_quad_port_a != 0)
5330			adapter->wol = 0;
5331		/* Reset for multiple quad port adapters */
5332		if (++global_quad_port_a == 4)
5333			global_quad_port_a = 0;
5334                break;
5335	}
5336	return;
5337}
5338
5339
5340/*
5341 * Enable PCI Wake On Lan capability
5342 */
5343static void
5344em_enable_wakeup(device_t dev)
5345{
5346	struct adapter	*adapter = device_get_softc(dev);
5347	if_t ifp = adapter->ifp;
5348	int		error = 0;
5349	u32		pmc, ctrl, ctrl_ext, rctl;
5350	u16     	status;
5351
5352	if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5353		return;
5354
5355	/*
5356	** Determine type of Wakeup: note that wol
5357	** is set with all bits on by default.
5358	*/
5359	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
5360		adapter->wol &= ~E1000_WUFC_MAG;
5361
5362	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
5363		adapter->wol &= ~E1000_WUFC_MC;
5364	else {
5365		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5366		rctl |= E1000_RCTL_MPE;
5367		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5368	}
5369
5370	if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5371		goto pme;
5372
5373	/* Advertise the wakeup capability */
5374	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5375	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5376	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5377
5378	/* Keep the laser running on Fiber adapters */
5379	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5380	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5381		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5382		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5383		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5384	}
5385
5386	if ((adapter->hw.mac.type == e1000_ich8lan) ||
5387	    (adapter->hw.mac.type == e1000_pchlan) ||
5388	    (adapter->hw.mac.type == e1000_ich9lan) ||
5389	    (adapter->hw.mac.type == e1000_ich10lan))
5390		e1000_suspend_workarounds_ich8lan(&adapter->hw);
5391
5392	if ((adapter->hw.mac.type == e1000_pchlan)  ||
5393	    (adapter->hw.mac.type == e1000_pch2lan) ||
5394	    (adapter->hw.mac.type == e1000_pch_lpt) ||
5395	    (adapter->hw.mac.type == e1000_pch_spt)) {
5396		error = em_enable_phy_wakeup(adapter);
5397		if (error)
5398			goto pme;
5399	} else {
5400		/* Enable wakeup by the MAC */
5401		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5402		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5403	}
5404
5405	if (adapter->hw.phy.type == e1000_phy_igp_3)
5406		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5407
5408pme:
5409        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5410	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5411	if (!error && (if_getcapenable(ifp) & IFCAP_WOL))
5412		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5413        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5414
5415	return;
5416}
5417
5418/*
5419** WOL in the newer chipset interfaces (pchlan)
5420** require thing to be copied into the phy
5421*/
5422static int
5423em_enable_phy_wakeup(struct adapter *adapter)
5424{
5425	struct e1000_hw *hw = &adapter->hw;
5426	u32 mreg, ret = 0;
5427	u16 preg;
5428
5429	/* copy MAC RARs to PHY RARs */
5430	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5431
5432	/* copy MAC MTA to PHY MTA */
5433	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5434		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5435		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5436		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5437		    (u16)((mreg >> 16) & 0xFFFF));
5438	}
5439
5440	/* configure PHY Rx Control register */
5441	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5442	mreg = E1000_READ_REG(hw, E1000_RCTL);
5443	if (mreg & E1000_RCTL_UPE)
5444		preg |= BM_RCTL_UPE;
5445	if (mreg & E1000_RCTL_MPE)
5446		preg |= BM_RCTL_MPE;
5447	preg &= ~(BM_RCTL_MO_MASK);
5448	if (mreg & E1000_RCTL_MO_3)
5449		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5450				<< BM_RCTL_MO_SHIFT);
5451	if (mreg & E1000_RCTL_BAM)
5452		preg |= BM_RCTL_BAM;
5453	if (mreg & E1000_RCTL_PMCF)
5454		preg |= BM_RCTL_PMCF;
5455	mreg = E1000_READ_REG(hw, E1000_CTRL);
5456	if (mreg & E1000_CTRL_RFCE)
5457		preg |= BM_RCTL_RFCE;
5458	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5459
5460	/* enable PHY wakeup in MAC register */
5461	E1000_WRITE_REG(hw, E1000_WUC,
5462	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5463	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5464
5465	/* configure and enable PHY wakeup in PHY registers */
5466	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5467	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5468
5469	/* activate PHY wakeup */
5470	ret = hw->phy.ops.acquire(hw);
5471	if (ret) {
5472		printf("Could not acquire PHY\n");
5473		return ret;
5474	}
5475	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5476	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5477	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5478	if (ret) {
5479		printf("Could not read PHY page 769\n");
5480		goto out;
5481	}
5482	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5483	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5484	if (ret)
5485		printf("Could not set PHY Host Wakeup bit\n");
5486out:
5487	hw->phy.ops.release(hw);
5488
5489	return ret;
5490}
5491
5492static void
5493em_led_func(void *arg, int onoff)
5494{
5495	struct adapter	*adapter = arg;
5496
5497	EM_CORE_LOCK(adapter);
5498	if (onoff) {
5499		e1000_setup_led(&adapter->hw);
5500		e1000_led_on(&adapter->hw);
5501	} else {
5502		e1000_led_off(&adapter->hw);
5503		e1000_cleanup_led(&adapter->hw);
5504	}
5505	EM_CORE_UNLOCK(adapter);
5506}
5507
5508/*
5509** Disable the L0S and L1 LINK states
5510*/
5511static void
5512em_disable_aspm(struct adapter *adapter)
5513{
5514	int		base, reg;
5515	u16		link_cap,link_ctrl;
5516	device_t	dev = adapter->dev;
5517
5518	switch (adapter->hw.mac.type) {
5519		case e1000_82573:
5520		case e1000_82574:
5521		case e1000_82583:
5522			break;
5523		default:
5524			return;
5525	}
5526	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5527		return;
5528	reg = base + PCIER_LINK_CAP;
5529	link_cap = pci_read_config(dev, reg, 2);
5530	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5531		return;
5532	reg = base + PCIER_LINK_CTL;
5533	link_ctrl = pci_read_config(dev, reg, 2);
5534	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5535	pci_write_config(dev, reg, link_ctrl, 2);
5536	return;
5537}
5538
5539/**********************************************************************
5540 *
5541 *  Update the board statistics counters.
5542 *
5543 **********************************************************************/
5544static void
5545em_update_stats_counters(struct adapter *adapter)
5546{
5547
5548	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5549	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5550		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5551		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5552	}
5553	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5554	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5555	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5556	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5557
5558	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5559	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5560	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5561	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5562	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5563	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5564	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5565	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5566	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5567	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5568	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5569	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5570	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5571	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5572	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5573	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5574	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5575	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5576	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5577	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5578
5579	/* For the 64-bit byte counters the low dword must be read first. */
5580	/* Both registers clear on the read of the high dword */
5581
5582	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5583	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5584	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5585	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5586
5587	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5588	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5589	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5590	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5591	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5592
5593	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5594	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5595
5596	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5597	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5598	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5599	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5600	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5601	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5602	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5603	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5604	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5605	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5606
5607	/* Interrupt Counts */
5608
5609	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5610	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5611	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5612	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5613	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5614	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5615	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5616	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5617	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5618
5619	if (adapter->hw.mac.type >= e1000_82543) {
5620		adapter->stats.algnerrc +=
5621		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5622		adapter->stats.rxerrc +=
5623		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5624		adapter->stats.tncrs +=
5625		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5626		adapter->stats.cexterr +=
5627		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5628		adapter->stats.tsctc +=
5629		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5630		adapter->stats.tsctfc +=
5631		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5632	}
5633}
5634
5635static uint64_t
5636em_get_counter(if_t ifp, ift_counter cnt)
5637{
5638	struct adapter *adapter;
5639
5640	adapter = if_getsoftc(ifp);
5641
5642	switch (cnt) {
5643	case IFCOUNTER_COLLISIONS:
5644		return (adapter->stats.colc);
5645	case IFCOUNTER_IERRORS:
5646		return (adapter->dropped_pkts + adapter->stats.rxerrc +
5647		    adapter->stats.crcerrs + adapter->stats.algnerrc +
5648		    adapter->stats.ruc + adapter->stats.roc +
5649		    adapter->stats.mpc + adapter->stats.cexterr);
5650	case IFCOUNTER_OERRORS:
5651		return (adapter->stats.ecol + adapter->stats.latecol +
5652		    adapter->watchdog_events);
5653	default:
5654		return (if_get_counter_default(ifp, cnt));
5655	}
5656}
5657
5658/* Export a single 32-bit register via a read-only sysctl. */
5659static int
5660em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5661{
5662	struct adapter *adapter;
5663	u_int val;
5664
5665	adapter = oidp->oid_arg1;
5666	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5667	return (sysctl_handle_int(oidp, &val, 0, req));
5668}
5669
5670/*
5671 * Add sysctl variables, one per statistic, to the system.
5672 */
5673static void
5674em_add_hw_stats(struct adapter *adapter)
5675{
5676	device_t dev = adapter->dev;
5677
5678	struct tx_ring *txr = adapter->tx_rings;
5679	struct rx_ring *rxr = adapter->rx_rings;
5680
5681	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5682	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5683	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5684	struct e1000_hw_stats *stats = &adapter->stats;
5685
5686	struct sysctl_oid *stat_node, *queue_node, *int_node;
5687	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5688
5689#define QUEUE_NAME_LEN 32
5690	char namebuf[QUEUE_NAME_LEN];
5691
5692	/* Driver Statistics */
5693	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5694			CTLFLAG_RD, &adapter->dropped_pkts,
5695			"Driver dropped packets");
5696	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5697			CTLFLAG_RD, &adapter->link_irq,
5698			"Link MSIX IRQ Handled");
5699	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5700			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5701			 "Defragmenting mbuf chain failed");
5702	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5703			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5704			"Driver tx dma failure in xmit");
5705	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5706			CTLFLAG_RD, &adapter->rx_overruns,
5707			"RX overruns");
5708	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5709			CTLFLAG_RD, &adapter->watchdog_events,
5710			"Watchdog timeouts");
5711
5712	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5713			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5714			em_sysctl_reg_handler, "IU",
5715			"Device Control Register");
5716	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5717			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5718			em_sysctl_reg_handler, "IU",
5719			"Receiver Control Register");
5720	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5721			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5722			"Flow Control High Watermark");
5723	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5724			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5725			"Flow Control Low Watermark");
5726
5727	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5728		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5729		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5730					    CTLFLAG_RD, NULL, "TX Queue Name");
5731		queue_list = SYSCTL_CHILDREN(queue_node);
5732
5733		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5734				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5735				E1000_TDH(txr->me),
5736				em_sysctl_reg_handler, "IU",
5737 				"Transmit Descriptor Head");
5738		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5739				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5740				E1000_TDT(txr->me),
5741				em_sysctl_reg_handler, "IU",
5742 				"Transmit Descriptor Tail");
5743		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5744				CTLFLAG_RD, &txr->tx_irq,
5745				"Queue MSI-X Transmit Interrupts");
5746		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5747				CTLFLAG_RD, &txr->no_desc_avail,
5748				"Queue No Descriptor Available");
5749
5750		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5751		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5752					    CTLFLAG_RD, NULL, "RX Queue Name");
5753		queue_list = SYSCTL_CHILDREN(queue_node);
5754
5755		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5756				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5757				E1000_RDH(rxr->me),
5758				em_sysctl_reg_handler, "IU",
5759				"Receive Descriptor Head");
5760		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5761				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5762				E1000_RDT(rxr->me),
5763				em_sysctl_reg_handler, "IU",
5764				"Receive Descriptor Tail");
5765		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5766				CTLFLAG_RD, &rxr->rx_irq,
5767				"Queue MSI-X Receive Interrupts");
5768	}
5769
5770	/* MAC stats get their own sub node */
5771
5772	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5773				    CTLFLAG_RD, NULL, "Statistics");
5774	stat_list = SYSCTL_CHILDREN(stat_node);
5775
5776	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5777			CTLFLAG_RD, &stats->ecol,
5778			"Excessive collisions");
5779	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5780			CTLFLAG_RD, &stats->scc,
5781			"Single collisions");
5782	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5783			CTLFLAG_RD, &stats->mcc,
5784			"Multiple collisions");
5785	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5786			CTLFLAG_RD, &stats->latecol,
5787			"Late collisions");
5788	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5789			CTLFLAG_RD, &stats->colc,
5790			"Collision Count");
5791	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5792			CTLFLAG_RD, &adapter->stats.symerrs,
5793			"Symbol Errors");
5794	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5795			CTLFLAG_RD, &adapter->stats.sec,
5796			"Sequence Errors");
5797	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5798			CTLFLAG_RD, &adapter->stats.dc,
5799			"Defer Count");
5800	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5801			CTLFLAG_RD, &adapter->stats.mpc,
5802			"Missed Packets");
5803	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5804			CTLFLAG_RD, &adapter->stats.rnbc,
5805			"Receive No Buffers");
5806	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5807			CTLFLAG_RD, &adapter->stats.ruc,
5808			"Receive Undersize");
5809	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5810			CTLFLAG_RD, &adapter->stats.rfc,
5811			"Fragmented Packets Received ");
5812	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5813			CTLFLAG_RD, &adapter->stats.roc,
5814			"Oversized Packets Received");
5815	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5816			CTLFLAG_RD, &adapter->stats.rjc,
5817			"Recevied Jabber");
5818	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5819			CTLFLAG_RD, &adapter->stats.rxerrc,
5820			"Receive Errors");
5821	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5822			CTLFLAG_RD, &adapter->stats.crcerrs,
5823			"CRC errors");
5824	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5825			CTLFLAG_RD, &adapter->stats.algnerrc,
5826			"Alignment Errors");
5827	/* On 82575 these are collision counts */
5828	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5829			CTLFLAG_RD, &adapter->stats.cexterr,
5830			"Collision/Carrier extension errors");
5831	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5832			CTLFLAG_RD, &adapter->stats.xonrxc,
5833			"XON Received");
5834	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5835			CTLFLAG_RD, &adapter->stats.xontxc,
5836			"XON Transmitted");
5837	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5838			CTLFLAG_RD, &adapter->stats.xoffrxc,
5839			"XOFF Received");
5840	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5841			CTLFLAG_RD, &adapter->stats.xofftxc,
5842			"XOFF Transmitted");
5843
5844	/* Packet Reception Stats */
5845	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5846			CTLFLAG_RD, &adapter->stats.tpr,
5847			"Total Packets Received ");
5848	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5849			CTLFLAG_RD, &adapter->stats.gprc,
5850			"Good Packets Received");
5851	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5852			CTLFLAG_RD, &adapter->stats.bprc,
5853			"Broadcast Packets Received");
5854	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5855			CTLFLAG_RD, &adapter->stats.mprc,
5856			"Multicast Packets Received");
5857	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5858			CTLFLAG_RD, &adapter->stats.prc64,
5859			"64 byte frames received ");
5860	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5861			CTLFLAG_RD, &adapter->stats.prc127,
5862			"65-127 byte frames received");
5863	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5864			CTLFLAG_RD, &adapter->stats.prc255,
5865			"128-255 byte frames received");
5866	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5867			CTLFLAG_RD, &adapter->stats.prc511,
5868			"256-511 byte frames received");
5869	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5870			CTLFLAG_RD, &adapter->stats.prc1023,
5871			"512-1023 byte frames received");
5872	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5873			CTLFLAG_RD, &adapter->stats.prc1522,
5874			"1023-1522 byte frames received");
5875 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5876 			CTLFLAG_RD, &adapter->stats.gorc,
5877 			"Good Octets Received");
5878
5879	/* Packet Transmission Stats */
5880 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5881 			CTLFLAG_RD, &adapter->stats.gotc,
5882 			"Good Octets Transmitted");
5883	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5884			CTLFLAG_RD, &adapter->stats.tpt,
5885			"Total Packets Transmitted");
5886	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5887			CTLFLAG_RD, &adapter->stats.gptc,
5888			"Good Packets Transmitted");
5889	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5890			CTLFLAG_RD, &adapter->stats.bptc,
5891			"Broadcast Packets Transmitted");
5892	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5893			CTLFLAG_RD, &adapter->stats.mptc,
5894			"Multicast Packets Transmitted");
5895	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5896			CTLFLAG_RD, &adapter->stats.ptc64,
5897			"64 byte frames transmitted ");
5898	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5899			CTLFLAG_RD, &adapter->stats.ptc127,
5900			"65-127 byte frames transmitted");
5901	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5902			CTLFLAG_RD, &adapter->stats.ptc255,
5903			"128-255 byte frames transmitted");
5904	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5905			CTLFLAG_RD, &adapter->stats.ptc511,
5906			"256-511 byte frames transmitted");
5907	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5908			CTLFLAG_RD, &adapter->stats.ptc1023,
5909			"512-1023 byte frames transmitted");
5910	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5911			CTLFLAG_RD, &adapter->stats.ptc1522,
5912			"1024-1522 byte frames transmitted");
5913	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5914			CTLFLAG_RD, &adapter->stats.tsctc,
5915			"TSO Contexts Transmitted");
5916	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5917			CTLFLAG_RD, &adapter->stats.tsctfc,
5918			"TSO Contexts Failed");
5919
5920
5921	/* Interrupt Stats */
5922
5923	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5924				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5925	int_list = SYSCTL_CHILDREN(int_node);
5926
5927	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5928			CTLFLAG_RD, &adapter->stats.iac,
5929			"Interrupt Assertion Count");
5930
5931	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5932			CTLFLAG_RD, &adapter->stats.icrxptc,
5933			"Interrupt Cause Rx Pkt Timer Expire Count");
5934
5935	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5936			CTLFLAG_RD, &adapter->stats.icrxatc,
5937			"Interrupt Cause Rx Abs Timer Expire Count");
5938
5939	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5940			CTLFLAG_RD, &adapter->stats.ictxptc,
5941			"Interrupt Cause Tx Pkt Timer Expire Count");
5942
5943	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5944			CTLFLAG_RD, &adapter->stats.ictxatc,
5945			"Interrupt Cause Tx Abs Timer Expire Count");
5946
5947	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5948			CTLFLAG_RD, &adapter->stats.ictxqec,
5949			"Interrupt Cause Tx Queue Empty Count");
5950
5951	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5952			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5953			"Interrupt Cause Tx Queue Min Thresh Count");
5954
5955	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5956			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5957			"Interrupt Cause Rx Desc Min Thresh Count");
5958
5959	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5960			CTLFLAG_RD, &adapter->stats.icrxoc,
5961			"Interrupt Cause Receiver Overrun Count");
5962}
5963
5964/**********************************************************************
5965 *
5966 *  This routine provides a way to dump out the adapter eeprom,
5967 *  often a useful debug/service tool. This only dumps the first
5968 *  32 words, stuff that matters is in that extent.
5969 *
5970 **********************************************************************/
5971static int
5972em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5973{
5974	struct adapter *adapter = (struct adapter *)arg1;
5975	int error;
5976	int result;
5977
5978	result = -1;
5979	error = sysctl_handle_int(oidp, &result, 0, req);
5980
5981	if (error || !req->newptr)
5982		return (error);
5983
5984	/*
5985	 * This value will cause a hex dump of the
5986	 * first 32 16-bit words of the EEPROM to
5987	 * the screen.
5988	 */
5989	if (result == 1)
5990		em_print_nvm_info(adapter);
5991
5992	return (error);
5993}
5994
5995static void
5996em_print_nvm_info(struct adapter *adapter)
5997{
5998	u16	eeprom_data;
5999	int	i, j, row = 0;
6000
6001	/* Its a bit crude, but it gets the job done */
6002	printf("\nInterface EEPROM Dump:\n");
6003	printf("Offset\n0x0000  ");
6004	for (i = 0, j = 0; i < 32; i++, j++) {
6005		if (j == 8) { /* Make the offset block */
6006			j = 0; ++row;
6007			printf("\n0x00%x0  ",row);
6008		}
6009		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6010		printf("%04x ", eeprom_data);
6011	}
6012	printf("\n");
6013}
6014
6015static int
6016em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6017{
6018	struct em_int_delay_info *info;
6019	struct adapter *adapter;
6020	u32 regval;
6021	int error, usecs, ticks;
6022
6023	info = (struct em_int_delay_info *)arg1;
6024	usecs = info->value;
6025	error = sysctl_handle_int(oidp, &usecs, 0, req);
6026	if (error != 0 || req->newptr == NULL)
6027		return (error);
6028	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6029		return (EINVAL);
6030	info->value = usecs;
6031	ticks = EM_USECS_TO_TICKS(usecs);
6032	if (info->offset == E1000_ITR)	/* units are 256ns here */
6033		ticks *= 4;
6034
6035	adapter = info->adapter;
6036
6037	EM_CORE_LOCK(adapter);
6038	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6039	regval = (regval & ~0xffff) | (ticks & 0xffff);
6040	/* Handle a few special cases. */
6041	switch (info->offset) {
6042	case E1000_RDTR:
6043		break;
6044	case E1000_TIDV:
6045		if (ticks == 0) {
6046			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6047			/* Don't write 0 into the TIDV register. */
6048			regval++;
6049		} else
6050			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6051		break;
6052	}
6053	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6054	EM_CORE_UNLOCK(adapter);
6055	return (0);
6056}
6057
6058static void
6059em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6060	const char *description, struct em_int_delay_info *info,
6061	int offset, int value)
6062{
6063	info->adapter = adapter;
6064	info->offset = offset;
6065	info->value = value;
6066	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6067	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6068	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6069	    info, 0, em_sysctl_int_delay, "I", description);
6070}
6071
6072static void
6073em_set_sysctl_value(struct adapter *adapter, const char *name,
6074	const char *description, int *limit, int value)
6075{
6076	*limit = value;
6077	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6078	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6079	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6080}
6081
6082
6083/*
6084** Set flow control using sysctl:
6085** Flow control values:
6086**      0 - off
6087**      1 - rx pause
6088**      2 - tx pause
6089**      3 - full
6090*/
6091static int
6092em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6093{
6094        int		error;
6095	static int	input = 3; /* default is full */
6096        struct adapter	*adapter = (struct adapter *) arg1;
6097
6098        error = sysctl_handle_int(oidp, &input, 0, req);
6099
6100        if ((error) || (req->newptr == NULL))
6101                return (error);
6102
6103	if (input == adapter->fc) /* no change? */
6104		return (error);
6105
6106        switch (input) {
6107                case e1000_fc_rx_pause:
6108                case e1000_fc_tx_pause:
6109                case e1000_fc_full:
6110                case e1000_fc_none:
6111                        adapter->hw.fc.requested_mode = input;
6112			adapter->fc = input;
6113                        break;
6114                default:
6115			/* Do nothing */
6116			return (error);
6117        }
6118
6119        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6120        e1000_force_mac_fc(&adapter->hw);
6121        return (error);
6122}
6123
6124/*
6125** Manage Energy Efficient Ethernet:
6126** Control values:
6127**     0/1 - enabled/disabled
6128*/
6129static int
6130em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6131{
6132       struct adapter *adapter = (struct adapter *) arg1;
6133       int             error, value;
6134
6135       value = adapter->hw.dev_spec.ich8lan.eee_disable;
6136       error = sysctl_handle_int(oidp, &value, 0, req);
6137       if (error || req->newptr == NULL)
6138               return (error);
6139       EM_CORE_LOCK(adapter);
6140       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6141       em_init_locked(adapter);
6142       EM_CORE_UNLOCK(adapter);
6143       return (0);
6144}
6145
6146static int
6147em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6148{
6149	struct adapter *adapter;
6150	int error;
6151	int result;
6152
6153	result = -1;
6154	error = sysctl_handle_int(oidp, &result, 0, req);
6155
6156	if (error || !req->newptr)
6157		return (error);
6158
6159	if (result == 1) {
6160		adapter = (struct adapter *)arg1;
6161		em_print_debug_info(adapter);
6162        }
6163
6164	return (error);
6165}
6166
6167/*
6168** This routine is meant to be fluid, add whatever is
6169** needed for debugging a problem.  -jfv
6170*/
6171static void
6172em_print_debug_info(struct adapter *adapter)
6173{
6174	device_t dev = adapter->dev;
6175	struct tx_ring *txr = adapter->tx_rings;
6176	struct rx_ring *rxr = adapter->rx_rings;
6177
6178	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
6179		printf("Interface is RUNNING ");
6180	else
6181		printf("Interface is NOT RUNNING\n");
6182
6183	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
6184		printf("and INACTIVE\n");
6185	else
6186		printf("and ACTIVE\n");
6187
6188	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6189		device_printf(dev, "TX Queue %d ------\n", i);
6190		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6191	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6192	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6193		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6194		device_printf(dev, "TX descriptors avail = %d\n",
6195	    		txr->tx_avail);
6196		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6197	    		txr->no_desc_avail);
6198		device_printf(dev, "RX Queue %d ------\n", i);
6199		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6200	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6201	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6202		device_printf(dev, "RX discarded packets = %ld\n",
6203	    		rxr->rx_discarded);
6204		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6205		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6206	}
6207}
6208
6209#ifdef EM_MULTIQUEUE
6210/*
6211 * 82574 only:
6212 * Write a new value to the EEPROM increasing the number of MSIX
6213 * vectors from 3 to 5, for proper multiqueue support.
6214 */
6215static void
6216em_enable_vectors_82574(struct adapter *adapter)
6217{
6218	struct e1000_hw *hw = &adapter->hw;
6219	device_t dev = adapter->dev;
6220	u16 edata;
6221
6222	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6223	printf("Current cap: %#06x\n", edata);
6224	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6225		device_printf(dev, "Writing to eeprom: increasing "
6226		    "reported MSIX vectors from 3 to 5...\n");
6227		edata &= ~(EM_NVM_MSIX_N_MASK);
6228		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6229		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6230		e1000_update_nvm_checksum(hw);
6231		device_printf(dev, "Writing to eeprom: done\n");
6232	}
6233}
6234#endif
6235
6236#ifdef DDB
6237DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6238{
6239	devclass_t	dc;
6240	int max_em;
6241
6242	dc = devclass_find("em");
6243	max_em = devclass_get_maxunit(dc);
6244
6245	for (int index = 0; index < (max_em - 1); index++) {
6246		device_t dev;
6247		dev = devclass_get_device(dc, index);
6248		if (device_get_driver(dev) == &em_driver) {
6249			struct adapter *adapter = device_get_softc(dev);
6250			EM_CORE_LOCK(adapter);
6251			em_init_locked(adapter);
6252			EM_CORE_UNLOCK(adapter);
6253		}
6254	}
6255}
6256DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6257{
6258	devclass_t	dc;
6259	int max_em;
6260
6261	dc = devclass_find("em");
6262	max_em = devclass_get_maxunit(dc);
6263
6264	for (int index = 0; index < (max_em - 1); index++) {
6265		device_t dev;
6266		dev = devclass_get_device(dc, index);
6267		if (device_get_driver(dev) == &em_driver)
6268			em_print_debug_info(device_get_softc(dev));
6269	}
6270
6271}
6272#endif
6273