if_em.c revision 273736
1/******************************************************************************
2
3  Copyright (c) 2001-2014, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/e1000/if_em.c 273736 2014-10-27 14:38:00Z hselasky $*/
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#ifdef HAVE_KERNEL_OPTION_HEADERS
39#include "opt_device_polling.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <machine/bus.h>
61#include <machine/resource.h>
62
63#include <net/bpf.h>
64#include <net/ethernet.h>
65#include <net/if.h>
66#include <net/if_arp.h>
67#include <net/if_dl.h>
68#include <net/if_media.h>
69
70#include <net/if_types.h>
71#include <net/if_vlan_var.h>
72
73#include <netinet/in_systm.h>
74#include <netinet/in.h>
75#include <netinet/if_ether.h>
76#include <netinet/ip.h>
77#include <netinet/ip6.h>
78#include <netinet/tcp.h>
79#include <netinet/udp.h>
80
81#include <machine/in_cksum.h>
82#include <dev/led/led.h>
83#include <dev/pci/pcivar.h>
84#include <dev/pci/pcireg.h>
85
86#include "e1000_api.h"
87#include "e1000_82571.h"
88#include "if_em.h"
89
90/*********************************************************************
91 *  Set this to one to display debug statistics
92 *********************************************************************/
93int	em_display_debug_stats = 0;
94
95/*********************************************************************
96 *  Driver version:
97 *********************************************************************/
98char em_driver_version[] = "7.4.2";
99
100/*********************************************************************
101 *  PCI Device ID Table
102 *
103 *  Used by probe to select devices to load on
104 *  Last field stores an index into e1000_strings
105 *  Last entry must be all 0s
106 *
107 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108 *********************************************************************/
109
110static em_vendor_info_t em_vendor_info_array[] =
111{
112	/* Intel(R) PRO/1000 Network Connection */
113	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
125						PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
127						PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
132
133	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
144						PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
167	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
169	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
172	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
173	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
174	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
175	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
176	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
177	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
178	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
179						PCI_ANY_ID, PCI_ANY_ID, 0},
180	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
181						PCI_ANY_ID, PCI_ANY_ID, 0},
182	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
183	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
184	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
185	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
186	/* required last entry */
187	{ 0, 0, 0, 0, 0}
188};
189
190/*********************************************************************
191 *  Table of branding strings for all supported NICs.
192 *********************************************************************/
193
194static char *em_strings[] = {
195	"Intel(R) PRO/1000 Network Connection"
196};
197
198/*********************************************************************
199 *  Function prototypes
200 *********************************************************************/
201static int	em_probe(device_t);
202static int	em_attach(device_t);
203static int	em_detach(device_t);
204static int	em_shutdown(device_t);
205static int	em_suspend(device_t);
206static int	em_resume(device_t);
207#ifdef EM_MULTIQUEUE
208static int	em_mq_start(struct ifnet *, struct mbuf *);
209static int	em_mq_start_locked(struct ifnet *,
210		    struct tx_ring *, struct mbuf *);
211static void	em_qflush(struct ifnet *);
212#else
213static void	em_start(struct ifnet *);
214static void	em_start_locked(struct ifnet *, struct tx_ring *);
215#endif
216static int	em_ioctl(struct ifnet *, u_long, caddr_t);
217static void	em_init(void *);
218static void	em_init_locked(struct adapter *);
219static void	em_stop(void *);
220static void	em_media_status(struct ifnet *, struct ifmediareq *);
221static int	em_media_change(struct ifnet *);
222static void	em_identify_hardware(struct adapter *);
223static int	em_allocate_pci_resources(struct adapter *);
224static int	em_allocate_legacy(struct adapter *);
225static int	em_allocate_msix(struct adapter *);
226static int	em_allocate_queues(struct adapter *);
227static int	em_setup_msix(struct adapter *);
228static void	em_free_pci_resources(struct adapter *);
229static void	em_local_timer(void *);
230static void	em_reset(struct adapter *);
231static int	em_setup_interface(device_t, struct adapter *);
232
233static void	em_setup_transmit_structures(struct adapter *);
234static void	em_initialize_transmit_unit(struct adapter *);
235static int	em_allocate_transmit_buffers(struct tx_ring *);
236static void	em_free_transmit_structures(struct adapter *);
237static void	em_free_transmit_buffers(struct tx_ring *);
238
239static int	em_setup_receive_structures(struct adapter *);
240static int	em_allocate_receive_buffers(struct rx_ring *);
241static void	em_initialize_receive_unit(struct adapter *);
242static void	em_free_receive_structures(struct adapter *);
243static void	em_free_receive_buffers(struct rx_ring *);
244
245static void	em_enable_intr(struct adapter *);
246static void	em_disable_intr(struct adapter *);
247static void	em_update_stats_counters(struct adapter *);
248static void	em_add_hw_stats(struct adapter *adapter);
249static void	em_txeof(struct tx_ring *);
250static bool	em_rxeof(struct rx_ring *, int, int *);
251#ifndef __NO_STRICT_ALIGNMENT
252static int	em_fixup_rx(struct rx_ring *);
253#endif
254static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
255static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
256		    struct ip *, u32 *, u32 *);
257static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
258		    struct tcphdr *, u32 *, u32 *);
259static void	em_set_promisc(struct adapter *);
260static void	em_disable_promisc(struct adapter *);
261static void	em_set_multi(struct adapter *);
262static void	em_update_link_status(struct adapter *);
263static void	em_refresh_mbufs(struct rx_ring *, int);
264static void	em_register_vlan(void *, struct ifnet *, u16);
265static void	em_unregister_vlan(void *, struct ifnet *, u16);
266static void	em_setup_vlan_hw_support(struct adapter *);
267static int	em_xmit(struct tx_ring *, struct mbuf **);
268static int	em_dma_malloc(struct adapter *, bus_size_t,
269		    struct em_dma_alloc *, int);
270static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
271static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
272static void	em_print_nvm_info(struct adapter *);
273static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
274static void	em_print_debug_info(struct adapter *);
275static int 	em_is_valid_ether_addr(u8 *);
276static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
277static void	em_add_int_delay_sysctl(struct adapter *, const char *,
278		    const char *, struct em_int_delay_info *, int, int);
279/* Management and WOL Support */
280static void	em_init_manageability(struct adapter *);
281static void	em_release_manageability(struct adapter *);
282static void     em_get_hw_control(struct adapter *);
283static void     em_release_hw_control(struct adapter *);
284static void	em_get_wakeup(device_t);
285static void     em_enable_wakeup(device_t);
286static int	em_enable_phy_wakeup(struct adapter *);
287static void	em_led_func(void *, int);
288static void	em_disable_aspm(struct adapter *);
289
290static int	em_irq_fast(void *);
291
292/* MSIX handlers */
293static void	em_msix_tx(void *);
294static void	em_msix_rx(void *);
295static void	em_msix_link(void *);
296static void	em_handle_tx(void *context, int pending);
297static void	em_handle_rx(void *context, int pending);
298static void	em_handle_link(void *context, int pending);
299
300static void	em_set_sysctl_value(struct adapter *, const char *,
301		    const char *, int *, int);
302static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
303static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
304
305static __inline void em_rx_discard(struct rx_ring *, int);
306
307#ifdef DEVICE_POLLING
308static poll_handler_t em_poll;
309#endif /* POLLING */
310
311/*********************************************************************
312 *  FreeBSD Device Interface Entry Points
313 *********************************************************************/
314
315static device_method_t em_methods[] = {
316	/* Device interface */
317	DEVMETHOD(device_probe, em_probe),
318	DEVMETHOD(device_attach, em_attach),
319	DEVMETHOD(device_detach, em_detach),
320	DEVMETHOD(device_shutdown, em_shutdown),
321	DEVMETHOD(device_suspend, em_suspend),
322	DEVMETHOD(device_resume, em_resume),
323	DEVMETHOD_END
324};
325
326static driver_t em_driver = {
327	"em", em_methods, sizeof(struct adapter),
328};
329
330devclass_t em_devclass;
331DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
332MODULE_DEPEND(em, pci, 1, 1, 1);
333MODULE_DEPEND(em, ether, 1, 1, 1);
334
335/*********************************************************************
336 *  Tunable default values.
337 *********************************************************************/
338
339#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
340#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
341#define M_TSO_LEN			66
342
343#define MAX_INTS_PER_SEC	8000
344#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
345
346/* Allow common code without TSO */
347#ifndef CSUM_TSO
348#define CSUM_TSO	0
349#endif
350
351static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
352
353static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
354static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
355TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
356TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
357SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
358    0, "Default transmit interrupt delay in usecs");
359SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
360    0, "Default receive interrupt delay in usecs");
361
362static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
363static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
364TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
365TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
366SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
367    &em_tx_abs_int_delay_dflt, 0,
368    "Default transmit interrupt delay limit in usecs");
369SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
370    &em_rx_abs_int_delay_dflt, 0,
371    "Default receive interrupt delay limit in usecs");
372
373static int em_rxd = EM_DEFAULT_RXD;
374static int em_txd = EM_DEFAULT_TXD;
375TUNABLE_INT("hw.em.rxd", &em_rxd);
376TUNABLE_INT("hw.em.txd", &em_txd);
377SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
378    "Number of receive descriptors per queue");
379SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
380    "Number of transmit descriptors per queue");
381
382static int em_smart_pwr_down = FALSE;
383TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
384SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
385    0, "Set to true to leave smart power down enabled on newer adapters");
386
387/* Controls whether promiscuous also shows bad packets */
388static int em_debug_sbp = FALSE;
389TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
390SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
391    "Show bad packets in promiscuous mode");
392
393static int em_enable_msix = TRUE;
394TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
395SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
396    "Enable MSI-X interrupts");
397
398/* How many packets rxeof tries to clean at a time */
399static int em_rx_process_limit = 100;
400TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
401SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
402    &em_rx_process_limit, 0,
403    "Maximum number of received packets to process "
404    "at a time, -1 means unlimited");
405
406/* Energy efficient ethernet - default to OFF */
407static int eee_setting = 1;
408TUNABLE_INT("hw.em.eee_setting", &eee_setting);
409SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
410    "Enable Energy Efficient Ethernet");
411
412/* Global used in WOL setup with multiport cards */
413static int global_quad_port_a = 0;
414
415#ifdef DEV_NETMAP	/* see ixgbe.c for details */
416#include <dev/netmap/if_em_netmap.h>
417#endif /* DEV_NETMAP */
418
419/*********************************************************************
420 *  Device identification routine
421 *
422 *  em_probe determines if the driver should be loaded on
423 *  adapter based on PCI vendor/device id of the adapter.
424 *
425 *  return BUS_PROBE_DEFAULT on success, positive on failure
426 *********************************************************************/
427
428static int
429em_probe(device_t dev)
430{
431	char		adapter_name[60];
432	u16		pci_vendor_id = 0;
433	u16		pci_device_id = 0;
434	u16		pci_subvendor_id = 0;
435	u16		pci_subdevice_id = 0;
436	em_vendor_info_t *ent;
437
438	INIT_DEBUGOUT("em_probe: begin");
439
440	pci_vendor_id = pci_get_vendor(dev);
441	if (pci_vendor_id != EM_VENDOR_ID)
442		return (ENXIO);
443
444	pci_device_id = pci_get_device(dev);
445	pci_subvendor_id = pci_get_subvendor(dev);
446	pci_subdevice_id = pci_get_subdevice(dev);
447
448	ent = em_vendor_info_array;
449	while (ent->vendor_id != 0) {
450		if ((pci_vendor_id == ent->vendor_id) &&
451		    (pci_device_id == ent->device_id) &&
452
453		    ((pci_subvendor_id == ent->subvendor_id) ||
454		    (ent->subvendor_id == PCI_ANY_ID)) &&
455
456		    ((pci_subdevice_id == ent->subdevice_id) ||
457		    (ent->subdevice_id == PCI_ANY_ID))) {
458			sprintf(adapter_name, "%s %s",
459				em_strings[ent->index],
460				em_driver_version);
461			device_set_desc_copy(dev, adapter_name);
462			return (BUS_PROBE_DEFAULT);
463		}
464		ent++;
465	}
466
467	return (ENXIO);
468}
469
470/*********************************************************************
471 *  Device initialization routine
472 *
473 *  The attach entry point is called when the driver is being loaded.
474 *  This routine identifies the type of hardware, allocates all resources
475 *  and initializes the hardware.
476 *
477 *  return 0 on success, positive on failure
478 *********************************************************************/
479
480static int
481em_attach(device_t dev)
482{
483	struct adapter	*adapter;
484	struct e1000_hw	*hw;
485	int		error = 0;
486
487	INIT_DEBUGOUT("em_attach: begin");
488
489	if (resource_disabled("em", device_get_unit(dev))) {
490		device_printf(dev, "Disabled by device hint\n");
491		return (ENXIO);
492	}
493
494	adapter = device_get_softc(dev);
495	adapter->dev = adapter->osdep.dev = dev;
496	hw = &adapter->hw;
497	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
498
499	/* SYSCTL stuff */
500	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
501	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
502	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
503	    em_sysctl_nvm_info, "I", "NVM Information");
504
505	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
506	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
507	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
508	    em_sysctl_debug_info, "I", "Debug Information");
509
510	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
511	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
512	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
513	    em_set_flowcntl, "I", "Flow Control");
514
515	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
516
517	/* Determine hardware and mac info */
518	em_identify_hardware(adapter);
519
520	/* Setup PCI resources */
521	if (em_allocate_pci_resources(adapter)) {
522		device_printf(dev, "Allocation of PCI resources failed\n");
523		error = ENXIO;
524		goto err_pci;
525	}
526
527	/*
528	** For ICH8 and family we need to
529	** map the flash memory, and this
530	** must happen after the MAC is
531	** identified
532	*/
533	if ((hw->mac.type == e1000_ich8lan) ||
534	    (hw->mac.type == e1000_ich9lan) ||
535	    (hw->mac.type == e1000_ich10lan) ||
536	    (hw->mac.type == e1000_pchlan) ||
537	    (hw->mac.type == e1000_pch2lan) ||
538	    (hw->mac.type == e1000_pch_lpt)) {
539		int rid = EM_BAR_TYPE_FLASH;
540		adapter->flash = bus_alloc_resource_any(dev,
541		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
542		if (adapter->flash == NULL) {
543			device_printf(dev, "Mapping of Flash failed\n");
544			error = ENXIO;
545			goto err_pci;
546		}
547		/* This is used in the shared code */
548		hw->flash_address = (u8 *)adapter->flash;
549		adapter->osdep.flash_bus_space_tag =
550		    rman_get_bustag(adapter->flash);
551		adapter->osdep.flash_bus_space_handle =
552		    rman_get_bushandle(adapter->flash);
553	}
554
555	/* Do Shared Code initialization */
556	if (e1000_setup_init_funcs(hw, TRUE)) {
557		device_printf(dev, "Setup of Shared code failed\n");
558		error = ENXIO;
559		goto err_pci;
560	}
561
562	e1000_get_bus_info(hw);
563
564	/* Set up some sysctls for the tunable interrupt delays */
565	em_add_int_delay_sysctl(adapter, "rx_int_delay",
566	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
567	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
568	em_add_int_delay_sysctl(adapter, "tx_int_delay",
569	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
570	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
571	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
572	    "receive interrupt delay limit in usecs",
573	    &adapter->rx_abs_int_delay,
574	    E1000_REGISTER(hw, E1000_RADV),
575	    em_rx_abs_int_delay_dflt);
576	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
577	    "transmit interrupt delay limit in usecs",
578	    &adapter->tx_abs_int_delay,
579	    E1000_REGISTER(hw, E1000_TADV),
580	    em_tx_abs_int_delay_dflt);
581	em_add_int_delay_sysctl(adapter, "itr",
582	    "interrupt delay limit in usecs/4",
583	    &adapter->tx_itr,
584	    E1000_REGISTER(hw, E1000_ITR),
585	    DEFAULT_ITR);
586
587	/* Sysctl for limiting the amount of work done in the taskqueue */
588	em_set_sysctl_value(adapter, "rx_processing_limit",
589	    "max number of rx packets to process", &adapter->rx_process_limit,
590	    em_rx_process_limit);
591
592	/*
593	 * Validate number of transmit and receive descriptors. It
594	 * must not exceed hardware maximum, and must be multiple
595	 * of E1000_DBA_ALIGN.
596	 */
597	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
598	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
599		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
600		    EM_DEFAULT_TXD, em_txd);
601		adapter->num_tx_desc = EM_DEFAULT_TXD;
602	} else
603		adapter->num_tx_desc = em_txd;
604
605	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
606	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
607		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
608		    EM_DEFAULT_RXD, em_rxd);
609		adapter->num_rx_desc = EM_DEFAULT_RXD;
610	} else
611		adapter->num_rx_desc = em_rxd;
612
613	hw->mac.autoneg = DO_AUTO_NEG;
614	hw->phy.autoneg_wait_to_complete = FALSE;
615	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
616
617	/* Copper options */
618	if (hw->phy.media_type == e1000_media_type_copper) {
619		hw->phy.mdix = AUTO_ALL_MODES;
620		hw->phy.disable_polarity_correction = FALSE;
621		hw->phy.ms_type = EM_MASTER_SLAVE;
622	}
623
624	/*
625	 * Set the frame limits assuming
626	 * standard ethernet sized frames.
627	 */
628	adapter->hw.mac.max_frame_size =
629	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
630
631	/*
632	 * This controls when hardware reports transmit completion
633	 * status.
634	 */
635	hw->mac.report_tx_early = 1;
636
637	/*
638	** Get queue/ring memory
639	*/
640	if (em_allocate_queues(adapter)) {
641		error = ENOMEM;
642		goto err_pci;
643	}
644
645	/* Allocate multicast array memory. */
646	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
647	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
648	if (adapter->mta == NULL) {
649		device_printf(dev, "Can not allocate multicast setup array\n");
650		error = ENOMEM;
651		goto err_late;
652	}
653
654	/* Check SOL/IDER usage */
655	if (e1000_check_reset_block(hw))
656		device_printf(dev, "PHY reset is blocked"
657		    " due to SOL/IDER session.\n");
658
659	/* Sysctl for setting Energy Efficient Ethernet */
660	hw->dev_spec.ich8lan.eee_disable = eee_setting;
661	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
662	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
663	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
664	    adapter, 0, em_sysctl_eee, "I",
665	    "Disable Energy Efficient Ethernet");
666
667	/*
668	** Start from a known state, this is
669	** important in reading the nvm and
670	** mac from that.
671	*/
672	e1000_reset_hw(hw);
673
674
675	/* Make sure we have a good EEPROM before we read from it */
676	if (e1000_validate_nvm_checksum(hw) < 0) {
677		/*
678		** Some PCI-E parts fail the first check due to
679		** the link being in sleep state, call it again,
680		** if it fails a second time its a real issue.
681		*/
682		if (e1000_validate_nvm_checksum(hw) < 0) {
683			device_printf(dev,
684			    "The EEPROM Checksum Is Not Valid\n");
685			error = EIO;
686			goto err_late;
687		}
688	}
689
690	/* Copy the permanent MAC address out of the EEPROM */
691	if (e1000_read_mac_addr(hw) < 0) {
692		device_printf(dev, "EEPROM read error while reading MAC"
693		    " address\n");
694		error = EIO;
695		goto err_late;
696	}
697
698	if (!em_is_valid_ether_addr(hw->mac.addr)) {
699		device_printf(dev, "Invalid MAC address\n");
700		error = EIO;
701		goto err_late;
702	}
703
704	/* Disable ULP support */
705	e1000_disable_ulp_lpt_lp(hw, TRUE);
706
707	/*
708	**  Do interrupt configuration
709	*/
710	if (adapter->msix > 1) /* Do MSIX */
711		error = em_allocate_msix(adapter);
712	else  /* MSI or Legacy */
713		error = em_allocate_legacy(adapter);
714	if (error)
715		goto err_late;
716
717	/*
718	 * Get Wake-on-Lan and Management info for later use
719	 */
720	em_get_wakeup(dev);
721
722	/* Setup OS specific network interface */
723	if (em_setup_interface(dev, adapter) != 0)
724		goto err_late;
725
726	em_reset(adapter);
727
728	/* Initialize statistics */
729	em_update_stats_counters(adapter);
730
731	hw->mac.get_link_status = 1;
732	em_update_link_status(adapter);
733
734	/* Register for VLAN events */
735	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
736	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
737	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
738	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
739
740	em_add_hw_stats(adapter);
741
742	/* Non-AMT based hardware can now take control from firmware */
743	if (adapter->has_manage && !adapter->has_amt)
744		em_get_hw_control(adapter);
745
746	/* Tell the stack that the interface is not active */
747	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
748	adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
749
750	adapter->led_dev = led_create(em_led_func, adapter,
751	    device_get_nameunit(dev));
752#ifdef DEV_NETMAP
753	em_netmap_attach(adapter);
754#endif /* DEV_NETMAP */
755
756	INIT_DEBUGOUT("em_attach: end");
757
758	return (0);
759
760err_late:
761	em_free_transmit_structures(adapter);
762	em_free_receive_structures(adapter);
763	em_release_hw_control(adapter);
764	if (adapter->ifp != NULL)
765		if_free(adapter->ifp);
766err_pci:
767	em_free_pci_resources(adapter);
768	free(adapter->mta, M_DEVBUF);
769	EM_CORE_LOCK_DESTROY(adapter);
770
771	return (error);
772}
773
774/*********************************************************************
775 *  Device removal routine
776 *
777 *  The detach entry point is called when the driver is being removed.
778 *  This routine stops the adapter and deallocates all the resources
779 *  that were allocated for driver operation.
780 *
781 *  return 0 on success, positive on failure
782 *********************************************************************/
783
784static int
785em_detach(device_t dev)
786{
787	struct adapter	*adapter = device_get_softc(dev);
788	struct ifnet	*ifp = adapter->ifp;
789
790	INIT_DEBUGOUT("em_detach: begin");
791
792	/* Make sure VLANS are not using driver */
793	if (adapter->ifp->if_vlantrunk != NULL) {
794		device_printf(dev,"Vlan in use, detach first\n");
795		return (EBUSY);
796	}
797
798#ifdef DEVICE_POLLING
799	if (ifp->if_capenable & IFCAP_POLLING)
800		ether_poll_deregister(ifp);
801#endif
802
803	if (adapter->led_dev != NULL)
804		led_destroy(adapter->led_dev);
805
806	EM_CORE_LOCK(adapter);
807	adapter->in_detach = 1;
808	em_stop(adapter);
809	EM_CORE_UNLOCK(adapter);
810	EM_CORE_LOCK_DESTROY(adapter);
811
812	e1000_phy_hw_reset(&adapter->hw);
813
814	em_release_manageability(adapter);
815	em_release_hw_control(adapter);
816
817	/* Unregister VLAN events */
818	if (adapter->vlan_attach != NULL)
819		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
820	if (adapter->vlan_detach != NULL)
821		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
822
823	ether_ifdetach(adapter->ifp);
824	callout_drain(&adapter->timer);
825
826#ifdef DEV_NETMAP
827	netmap_detach(ifp);
828#endif /* DEV_NETMAP */
829
830	em_free_pci_resources(adapter);
831	bus_generic_detach(dev);
832	if_free(ifp);
833
834	em_free_transmit_structures(adapter);
835	em_free_receive_structures(adapter);
836
837	em_release_hw_control(adapter);
838	free(adapter->mta, M_DEVBUF);
839
840	return (0);
841}
842
843/*********************************************************************
844 *
845 *  Shutdown entry point
846 *
847 **********************************************************************/
848
849static int
850em_shutdown(device_t dev)
851{
852	return em_suspend(dev);
853}
854
855/*
856 * Suspend/resume device methods.
857 */
858static int
859em_suspend(device_t dev)
860{
861	struct adapter *adapter = device_get_softc(dev);
862
863	EM_CORE_LOCK(adapter);
864
865        em_release_manageability(adapter);
866	em_release_hw_control(adapter);
867	em_enable_wakeup(dev);
868
869	EM_CORE_UNLOCK(adapter);
870
871	return bus_generic_suspend(dev);
872}
873
874static int
875em_resume(device_t dev)
876{
877	struct adapter *adapter = device_get_softc(dev);
878	struct tx_ring	*txr = adapter->tx_rings;
879	struct ifnet *ifp = adapter->ifp;
880
881	EM_CORE_LOCK(adapter);
882	if (adapter->hw.mac.type == e1000_pch2lan)
883		e1000_resume_workarounds_pchlan(&adapter->hw);
884	em_init_locked(adapter);
885	em_init_manageability(adapter);
886
887	if ((ifp->if_flags & IFF_UP) &&
888	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
889		for (int i = 0; i < adapter->num_queues; i++, txr++) {
890			EM_TX_LOCK(txr);
891#ifdef EM_MULTIQUEUE
892			if (!drbr_empty(ifp, txr->br))
893				em_mq_start_locked(ifp, txr, NULL);
894#else
895			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
896				em_start_locked(ifp, txr);
897#endif
898			EM_TX_UNLOCK(txr);
899		}
900	}
901	EM_CORE_UNLOCK(adapter);
902
903	return bus_generic_resume(dev);
904}
905
906
907#ifdef EM_MULTIQUEUE
908/*********************************************************************
909 *  Multiqueue Transmit routines
910 *
911 *  em_mq_start is called by the stack to initiate a transmit.
912 *  however, if busy the driver can queue the request rather
913 *  than do an immediate send. It is this that is an advantage
914 *  in this driver, rather than also having multiple tx queues.
915 **********************************************************************/
916static int
917em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
918{
919	struct adapter  *adapter = txr->adapter;
920        struct mbuf     *next;
921        int             err = 0, enq = 0;
922
923	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
924	    IFF_DRV_RUNNING || adapter->link_active == 0) {
925		if (m != NULL)
926			err = drbr_enqueue(ifp, txr->br, m);
927		return (err);
928	}
929
930	enq = 0;
931	if (m != NULL) {
932		err = drbr_enqueue(ifp, txr->br, m);
933		if (err)
934			return (err);
935	}
936
937	/* Process the queue */
938	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
939		if ((err = em_xmit(txr, &next)) != 0) {
940			if (next == NULL)
941				drbr_advance(ifp, txr->br);
942			else
943				drbr_putback(ifp, txr->br, next);
944			break;
945		}
946		drbr_advance(ifp, txr->br);
947		enq++;
948		ifp->if_obytes += next->m_pkthdr.len;
949		if (next->m_flags & M_MCAST)
950			ifp->if_omcasts++;
951		ETHER_BPF_MTAP(ifp, next);
952		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
953                        break;
954	}
955
956	if (enq > 0) {
957                /* Set the watchdog */
958                txr->queue_status = EM_QUEUE_WORKING;
959		txr->watchdog_time = ticks;
960	}
961
962	if (txr->tx_avail < EM_MAX_SCATTER)
963		em_txeof(txr);
964	if (txr->tx_avail < EM_MAX_SCATTER)
965		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
966	return (err);
967}
968
969/*
970** Multiqueue capable stack interface
971*/
972static int
973em_mq_start(struct ifnet *ifp, struct mbuf *m)
974{
975	struct adapter	*adapter = ifp->if_softc;
976	struct tx_ring	*txr = adapter->tx_rings;
977	int 		error;
978
979	if (EM_TX_TRYLOCK(txr)) {
980		error = em_mq_start_locked(ifp, txr, m);
981		EM_TX_UNLOCK(txr);
982	} else
983		error = drbr_enqueue(ifp, txr->br, m);
984
985	return (error);
986}
987
988/*
989** Flush all ring buffers
990*/
991static void
992em_qflush(struct ifnet *ifp)
993{
994	struct adapter  *adapter = ifp->if_softc;
995	struct tx_ring  *txr = adapter->tx_rings;
996	struct mbuf     *m;
997
998	for (int i = 0; i < adapter->num_queues; i++, txr++) {
999		EM_TX_LOCK(txr);
1000		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1001			m_freem(m);
1002		EM_TX_UNLOCK(txr);
1003	}
1004	if_qflush(ifp);
1005}
1006#else  /* !EM_MULTIQUEUE */
1007
1008static void
1009em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1010{
1011	struct adapter	*adapter = ifp->if_softc;
1012	struct mbuf	*m_head;
1013
1014	EM_TX_LOCK_ASSERT(txr);
1015
1016	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
1017	    IFF_DRV_RUNNING)
1018		return;
1019
1020	if (!adapter->link_active)
1021		return;
1022
1023	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1024        	/* Call cleanup if number of TX descriptors low */
1025		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1026			em_txeof(txr);
1027		if (txr->tx_avail < EM_MAX_SCATTER) {
1028			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1029			break;
1030		}
1031                IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1032		if (m_head == NULL)
1033			break;
1034		/*
1035		 *  Encapsulation can modify our pointer, and or make it
1036		 *  NULL on failure.  In that event, we can't requeue.
1037		 */
1038		if (em_xmit(txr, &m_head)) {
1039			if (m_head == NULL)
1040				break;
1041			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1042			break;
1043		}
1044
1045		/* Send a copy of the frame to the BPF listener */
1046		ETHER_BPF_MTAP(ifp, m_head);
1047
1048		/* Set timeout in case hardware has problems transmitting. */
1049		txr->watchdog_time = ticks;
1050                txr->queue_status = EM_QUEUE_WORKING;
1051	}
1052
1053	return;
1054}
1055
1056static void
1057em_start(struct ifnet *ifp)
1058{
1059	struct adapter	*adapter = ifp->if_softc;
1060	struct tx_ring	*txr = adapter->tx_rings;
1061
1062	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1063		EM_TX_LOCK(txr);
1064		em_start_locked(ifp, txr);
1065		EM_TX_UNLOCK(txr);
1066	}
1067	return;
1068}
1069#endif /* EM_MULTIQUEUE */
1070
1071/*********************************************************************
1072 *  Ioctl entry point
1073 *
1074 *  em_ioctl is called when the user wants to configure the
1075 *  interface.
1076 *
1077 *  return 0 on success, positive on failure
1078 **********************************************************************/
1079
1080static int
1081em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1082{
1083	struct adapter	*adapter = ifp->if_softc;
1084	struct ifreq	*ifr = (struct ifreq *)data;
1085#if defined(INET) || defined(INET6)
1086	struct ifaddr	*ifa = (struct ifaddr *)data;
1087#endif
1088	bool		avoid_reset = FALSE;
1089	int		error = 0;
1090
1091	if (adapter->in_detach)
1092		return (error);
1093
1094	switch (command) {
1095	case SIOCSIFADDR:
1096#ifdef INET
1097		if (ifa->ifa_addr->sa_family == AF_INET)
1098			avoid_reset = TRUE;
1099#endif
1100#ifdef INET6
1101		if (ifa->ifa_addr->sa_family == AF_INET6)
1102			avoid_reset = TRUE;
1103#endif
1104		/*
1105		** Calling init results in link renegotiation,
1106		** so we avoid doing it when possible.
1107		*/
1108		if (avoid_reset) {
1109			ifp->if_flags |= IFF_UP;
1110			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1111				em_init(adapter);
1112#ifdef INET
1113			if (!(ifp->if_flags & IFF_NOARP))
1114				arp_ifinit(ifp, ifa);
1115#endif
1116		} else
1117			error = ether_ioctl(ifp, command, data);
1118		break;
1119	case SIOCSIFMTU:
1120	    {
1121		int max_frame_size;
1122
1123		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1124
1125		EM_CORE_LOCK(adapter);
1126		switch (adapter->hw.mac.type) {
1127		case e1000_82571:
1128		case e1000_82572:
1129		case e1000_ich9lan:
1130		case e1000_ich10lan:
1131		case e1000_pch2lan:
1132		case e1000_pch_lpt:
1133		case e1000_82574:
1134		case e1000_82583:
1135		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
1136			max_frame_size = 9234;
1137			break;
1138		case e1000_pchlan:
1139			max_frame_size = 4096;
1140			break;
1141			/* Adapters that do not support jumbo frames */
1142		case e1000_ich8lan:
1143			max_frame_size = ETHER_MAX_LEN;
1144			break;
1145		default:
1146			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1147		}
1148		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1149		    ETHER_CRC_LEN) {
1150			EM_CORE_UNLOCK(adapter);
1151			error = EINVAL;
1152			break;
1153		}
1154
1155		ifp->if_mtu = ifr->ifr_mtu;
1156		adapter->hw.mac.max_frame_size =
1157		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1158		em_init_locked(adapter);
1159		EM_CORE_UNLOCK(adapter);
1160		break;
1161	    }
1162	case SIOCSIFFLAGS:
1163		IOCTL_DEBUGOUT("ioctl rcv'd:\
1164		    SIOCSIFFLAGS (Set Interface Flags)");
1165		EM_CORE_LOCK(adapter);
1166		if (ifp->if_flags & IFF_UP) {
1167			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1168				if ((ifp->if_flags ^ adapter->if_flags) &
1169				    (IFF_PROMISC | IFF_ALLMULTI)) {
1170					em_disable_promisc(adapter);
1171					em_set_promisc(adapter);
1172				}
1173			} else
1174				em_init_locked(adapter);
1175		} else
1176			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1177				em_stop(adapter);
1178		adapter->if_flags = ifp->if_flags;
1179		EM_CORE_UNLOCK(adapter);
1180		break;
1181	case SIOCADDMULTI:
1182	case SIOCDELMULTI:
1183		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1184		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1185			EM_CORE_LOCK(adapter);
1186			em_disable_intr(adapter);
1187			em_set_multi(adapter);
1188#ifdef DEVICE_POLLING
1189			if (!(ifp->if_capenable & IFCAP_POLLING))
1190#endif
1191				em_enable_intr(adapter);
1192			EM_CORE_UNLOCK(adapter);
1193		}
1194		break;
1195	case SIOCSIFMEDIA:
1196		/* Check SOL/IDER usage */
1197		EM_CORE_LOCK(adapter);
1198		if (e1000_check_reset_block(&adapter->hw)) {
1199			EM_CORE_UNLOCK(adapter);
1200			device_printf(adapter->dev, "Media change is"
1201			    " blocked due to SOL/IDER session.\n");
1202			break;
1203		}
1204		EM_CORE_UNLOCK(adapter);
1205		/* falls thru */
1206	case SIOCGIFMEDIA:
1207		IOCTL_DEBUGOUT("ioctl rcv'd: \
1208		    SIOCxIFMEDIA (Get/Set Interface Media)");
1209		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1210		break;
1211	case SIOCSIFCAP:
1212	    {
1213		int mask, reinit;
1214
1215		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1216		reinit = 0;
1217		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1218#ifdef DEVICE_POLLING
1219		if (mask & IFCAP_POLLING) {
1220			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1221				error = ether_poll_register(em_poll, ifp);
1222				if (error)
1223					return (error);
1224				EM_CORE_LOCK(adapter);
1225				em_disable_intr(adapter);
1226				ifp->if_capenable |= IFCAP_POLLING;
1227				EM_CORE_UNLOCK(adapter);
1228			} else {
1229				error = ether_poll_deregister(ifp);
1230				/* Enable interrupt even in error case */
1231				EM_CORE_LOCK(adapter);
1232				em_enable_intr(adapter);
1233				ifp->if_capenable &= ~IFCAP_POLLING;
1234				EM_CORE_UNLOCK(adapter);
1235			}
1236		}
1237#endif
1238		if (mask & IFCAP_HWCSUM) {
1239			ifp->if_capenable ^= IFCAP_HWCSUM;
1240			reinit = 1;
1241		}
1242		if (mask & IFCAP_TSO4) {
1243			ifp->if_capenable ^= IFCAP_TSO4;
1244			reinit = 1;
1245		}
1246		if (mask & IFCAP_VLAN_HWTAGGING) {
1247			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1248			reinit = 1;
1249		}
1250		if (mask & IFCAP_VLAN_HWFILTER) {
1251			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1252			reinit = 1;
1253		}
1254		if (mask & IFCAP_VLAN_HWTSO) {
1255			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1256			reinit = 1;
1257		}
1258		if ((mask & IFCAP_WOL) &&
1259		    (ifp->if_capabilities & IFCAP_WOL) != 0) {
1260			if (mask & IFCAP_WOL_MCAST)
1261				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1262			if (mask & IFCAP_WOL_MAGIC)
1263				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1264		}
1265		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1266			em_init(adapter);
1267		VLAN_CAPABILITIES(ifp);
1268		break;
1269	    }
1270
1271	default:
1272		error = ether_ioctl(ifp, command, data);
1273		break;
1274	}
1275
1276	return (error);
1277}
1278
1279
1280/*********************************************************************
1281 *  Init entry point
1282 *
1283 *  This routine is used in two ways. It is used by the stack as
1284 *  init entry point in network interface structure. It is also used
1285 *  by the driver as a hw/sw initialization routine to get to a
1286 *  consistent state.
1287 *
1288 *  return 0 on success, positive on failure
1289 **********************************************************************/
1290
1291static void
1292em_init_locked(struct adapter *adapter)
1293{
1294	struct ifnet	*ifp = adapter->ifp;
1295	device_t	dev = adapter->dev;
1296
1297	INIT_DEBUGOUT("em_init: begin");
1298
1299	EM_CORE_LOCK_ASSERT(adapter);
1300
1301	em_disable_intr(adapter);
1302	callout_stop(&adapter->timer);
1303
1304	/* Get the latest mac address, User can use a LAA */
1305        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1306              ETHER_ADDR_LEN);
1307
1308	/* Put the address into the Receive Address Array */
1309	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1310
1311	/*
1312	 * With the 82571 adapter, RAR[0] may be overwritten
1313	 * when the other port is reset, we make a duplicate
1314	 * in RAR[14] for that eventuality, this assures
1315	 * the interface continues to function.
1316	 */
1317	if (adapter->hw.mac.type == e1000_82571) {
1318		e1000_set_laa_state_82571(&adapter->hw, TRUE);
1319		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1320		    E1000_RAR_ENTRIES - 1);
1321	}
1322
1323	/* Initialize the hardware */
1324	em_reset(adapter);
1325	em_update_link_status(adapter);
1326
1327	/* Setup VLAN support, basic and offload if available */
1328	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1329
1330	/* Set hardware offload abilities */
1331	ifp->if_hwassist = 0;
1332	if (ifp->if_capenable & IFCAP_TXCSUM)
1333		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1334	if (ifp->if_capenable & IFCAP_TSO4)
1335		ifp->if_hwassist |= CSUM_TSO;
1336
1337	/* Configure for OS presence */
1338	em_init_manageability(adapter);
1339
1340	/* Prepare transmit descriptors and buffers */
1341	em_setup_transmit_structures(adapter);
1342	em_initialize_transmit_unit(adapter);
1343
1344	/* Setup Multicast table */
1345	em_set_multi(adapter);
1346
1347	/*
1348	** Figure out the desired mbuf
1349	** pool for doing jumbos
1350	*/
1351	if (adapter->hw.mac.max_frame_size <= 2048)
1352		adapter->rx_mbuf_sz = MCLBYTES;
1353	else if (adapter->hw.mac.max_frame_size <= 4096)
1354		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1355	else
1356		adapter->rx_mbuf_sz = MJUM9BYTES;
1357
1358	/* Prepare receive descriptors and buffers */
1359	if (em_setup_receive_structures(adapter)) {
1360		device_printf(dev, "Could not setup receive structures\n");
1361		em_stop(adapter);
1362		return;
1363	}
1364	em_initialize_receive_unit(adapter);
1365
1366	/* Use real VLAN Filter support? */
1367	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1368		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1369			/* Use real VLAN Filter support */
1370			em_setup_vlan_hw_support(adapter);
1371		else {
1372			u32 ctrl;
1373			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1374			ctrl |= E1000_CTRL_VME;
1375			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1376		}
1377	}
1378
1379	/* Don't lose promiscuous settings */
1380	em_set_promisc(adapter);
1381
1382	/* Set the interface as ACTIVE */
1383	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1384	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1385
1386	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1387	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1388
1389	/* MSI/X configuration for 82574 */
1390	if (adapter->hw.mac.type == e1000_82574) {
1391		int tmp;
1392		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1393		tmp |= E1000_CTRL_EXT_PBA_CLR;
1394		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1395		/* Set the IVAR - interrupt vector routing. */
1396		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1397	}
1398
1399#ifdef DEVICE_POLLING
1400	/*
1401	 * Only enable interrupts if we are not polling, make sure
1402	 * they are off otherwise.
1403	 */
1404	if (ifp->if_capenable & IFCAP_POLLING)
1405		em_disable_intr(adapter);
1406	else
1407#endif /* DEVICE_POLLING */
1408		em_enable_intr(adapter);
1409
1410	/* AMT based hardware can now take control from firmware */
1411	if (adapter->has_manage && adapter->has_amt)
1412		em_get_hw_control(adapter);
1413}
1414
1415static void
1416em_init(void *arg)
1417{
1418	struct adapter *adapter = arg;
1419
1420	EM_CORE_LOCK(adapter);
1421	em_init_locked(adapter);
1422	EM_CORE_UNLOCK(adapter);
1423}
1424
1425
1426#ifdef DEVICE_POLLING
1427/*********************************************************************
1428 *
1429 *  Legacy polling routine: note this only works with single queue
1430 *
1431 *********************************************************************/
1432static int
1433em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1434{
1435	struct adapter *adapter = ifp->if_softc;
1436	struct tx_ring	*txr = adapter->tx_rings;
1437	struct rx_ring	*rxr = adapter->rx_rings;
1438	u32		reg_icr;
1439	int		rx_done;
1440
1441	EM_CORE_LOCK(adapter);
1442	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1443		EM_CORE_UNLOCK(adapter);
1444		return (0);
1445	}
1446
1447	if (cmd == POLL_AND_CHECK_STATUS) {
1448		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1449		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1450			callout_stop(&adapter->timer);
1451			adapter->hw.mac.get_link_status = 1;
1452			em_update_link_status(adapter);
1453			callout_reset(&adapter->timer, hz,
1454			    em_local_timer, adapter);
1455		}
1456	}
1457	EM_CORE_UNLOCK(adapter);
1458
1459	em_rxeof(rxr, count, &rx_done);
1460
1461	EM_TX_LOCK(txr);
1462	em_txeof(txr);
1463#ifdef EM_MULTIQUEUE
1464	if (!drbr_empty(ifp, txr->br))
1465		em_mq_start_locked(ifp, txr, NULL);
1466#else
1467	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1468		em_start_locked(ifp, txr);
1469#endif
1470	EM_TX_UNLOCK(txr);
1471
1472	return (rx_done);
1473}
1474#endif /* DEVICE_POLLING */
1475
1476
1477/*********************************************************************
1478 *
1479 *  Fast Legacy/MSI Combined Interrupt Service routine
1480 *
1481 *********************************************************************/
1482static int
1483em_irq_fast(void *arg)
1484{
1485	struct adapter	*adapter = arg;
1486	struct ifnet	*ifp;
1487	u32		reg_icr;
1488
1489	ifp = adapter->ifp;
1490
1491	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1492
1493	/* Hot eject?  */
1494	if (reg_icr == 0xffffffff)
1495		return FILTER_STRAY;
1496
1497	/* Definitely not our interrupt.  */
1498	if (reg_icr == 0x0)
1499		return FILTER_STRAY;
1500
1501	/*
1502	 * Starting with the 82571 chip, bit 31 should be used to
1503	 * determine whether the interrupt belongs to us.
1504	 */
1505	if (adapter->hw.mac.type >= e1000_82571 &&
1506	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1507		return FILTER_STRAY;
1508
1509	em_disable_intr(adapter);
1510	taskqueue_enqueue(adapter->tq, &adapter->que_task);
1511
1512	/* Link status change */
1513	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1514		adapter->hw.mac.get_link_status = 1;
1515		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1516	}
1517
1518	if (reg_icr & E1000_ICR_RXO)
1519		adapter->rx_overruns++;
1520	return FILTER_HANDLED;
1521}
1522
1523/* Combined RX/TX handler, used by Legacy and MSI */
1524static void
1525em_handle_que(void *context, int pending)
1526{
1527	struct adapter	*adapter = context;
1528	struct ifnet	*ifp = adapter->ifp;
1529	struct tx_ring	*txr = adapter->tx_rings;
1530	struct rx_ring	*rxr = adapter->rx_rings;
1531
1532
1533	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1534		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1535		EM_TX_LOCK(txr);
1536		em_txeof(txr);
1537#ifdef EM_MULTIQUEUE
1538		if (!drbr_empty(ifp, txr->br))
1539			em_mq_start_locked(ifp, txr, NULL);
1540#else
1541		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1542			em_start_locked(ifp, txr);
1543#endif
1544		EM_TX_UNLOCK(txr);
1545		if (more) {
1546			taskqueue_enqueue(adapter->tq, &adapter->que_task);
1547			return;
1548		}
1549	}
1550
1551	em_enable_intr(adapter);
1552	return;
1553}
1554
1555
1556/*********************************************************************
1557 *
1558 *  MSIX Interrupt Service Routines
1559 *
1560 **********************************************************************/
1561static void
1562em_msix_tx(void *arg)
1563{
1564	struct tx_ring *txr = arg;
1565	struct adapter *adapter = txr->adapter;
1566	struct ifnet	*ifp = adapter->ifp;
1567
1568	++txr->tx_irq;
1569	EM_TX_LOCK(txr);
1570	em_txeof(txr);
1571#ifdef EM_MULTIQUEUE
1572	if (!drbr_empty(ifp, txr->br))
1573		em_mq_start_locked(ifp, txr, NULL);
1574#else
1575	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576		em_start_locked(ifp, txr);
1577#endif
1578	/* Reenable this interrupt */
1579	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1580	EM_TX_UNLOCK(txr);
1581	return;
1582}
1583
1584/*********************************************************************
1585 *
1586 *  MSIX RX Interrupt Service routine
1587 *
1588 **********************************************************************/
1589
1590static void
1591em_msix_rx(void *arg)
1592{
1593	struct rx_ring	*rxr = arg;
1594	struct adapter	*adapter = rxr->adapter;
1595	bool		more;
1596
1597	++rxr->rx_irq;
1598	if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1599		return;
1600	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1601	if (more)
1602		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1603	else
1604		/* Reenable this interrupt */
1605		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1606	return;
1607}
1608
1609/*********************************************************************
1610 *
1611 *  MSIX Link Fast Interrupt Service routine
1612 *
1613 **********************************************************************/
1614static void
1615em_msix_link(void *arg)
1616{
1617	struct adapter	*adapter = arg;
1618	u32		reg_icr;
1619
1620	++adapter->link_irq;
1621	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1622
1623	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1624		adapter->hw.mac.get_link_status = 1;
1625		em_handle_link(adapter, 0);
1626	} else
1627		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1628		    EM_MSIX_LINK | E1000_IMS_LSC);
1629	return;
1630}
1631
1632static void
1633em_handle_rx(void *context, int pending)
1634{
1635	struct rx_ring	*rxr = context;
1636	struct adapter	*adapter = rxr->adapter;
1637        bool            more;
1638
1639	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1640	if (more)
1641		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1642	else
1643		/* Reenable this interrupt */
1644		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1645}
1646
1647static void
1648em_handle_tx(void *context, int pending)
1649{
1650	struct tx_ring	*txr = context;
1651	struct adapter	*adapter = txr->adapter;
1652	struct ifnet	*ifp = adapter->ifp;
1653
1654	EM_TX_LOCK(txr);
1655	em_txeof(txr);
1656#ifdef EM_MULTIQUEUE
1657	if (!drbr_empty(ifp, txr->br))
1658		em_mq_start_locked(ifp, txr, NULL);
1659#else
1660	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1661		em_start_locked(ifp, txr);
1662#endif
1663	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1664	EM_TX_UNLOCK(txr);
1665}
1666
1667static void
1668em_handle_link(void *context, int pending)
1669{
1670	struct adapter	*adapter = context;
1671	struct tx_ring	*txr = adapter->tx_rings;
1672	struct ifnet *ifp = adapter->ifp;
1673
1674	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1675		return;
1676
1677	EM_CORE_LOCK(adapter);
1678	callout_stop(&adapter->timer);
1679	em_update_link_status(adapter);
1680	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1681	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1682	    EM_MSIX_LINK | E1000_IMS_LSC);
1683	if (adapter->link_active) {
1684		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1685			EM_TX_LOCK(txr);
1686#ifdef EM_MULTIQUEUE
1687			if (!drbr_empty(ifp, txr->br))
1688				em_mq_start_locked(ifp, txr, NULL);
1689#else
1690			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1691				em_start_locked(ifp, txr);
1692#endif
1693			EM_TX_UNLOCK(txr);
1694		}
1695	}
1696	EM_CORE_UNLOCK(adapter);
1697}
1698
1699
1700/*********************************************************************
1701 *
1702 *  Media Ioctl callback
1703 *
1704 *  This routine is called whenever the user queries the status of
1705 *  the interface using ifconfig.
1706 *
1707 **********************************************************************/
1708static void
1709em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1710{
1711	struct adapter *adapter = ifp->if_softc;
1712	u_char fiber_type = IFM_1000_SX;
1713
1714	INIT_DEBUGOUT("em_media_status: begin");
1715
1716	EM_CORE_LOCK(adapter);
1717	em_update_link_status(adapter);
1718
1719	ifmr->ifm_status = IFM_AVALID;
1720	ifmr->ifm_active = IFM_ETHER;
1721
1722	if (!adapter->link_active) {
1723		EM_CORE_UNLOCK(adapter);
1724		return;
1725	}
1726
1727	ifmr->ifm_status |= IFM_ACTIVE;
1728
1729	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1730	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1731		ifmr->ifm_active |= fiber_type | IFM_FDX;
1732	} else {
1733		switch (adapter->link_speed) {
1734		case 10:
1735			ifmr->ifm_active |= IFM_10_T;
1736			break;
1737		case 100:
1738			ifmr->ifm_active |= IFM_100_TX;
1739			break;
1740		case 1000:
1741			ifmr->ifm_active |= IFM_1000_T;
1742			break;
1743		}
1744		if (adapter->link_duplex == FULL_DUPLEX)
1745			ifmr->ifm_active |= IFM_FDX;
1746		else
1747			ifmr->ifm_active |= IFM_HDX;
1748	}
1749	EM_CORE_UNLOCK(adapter);
1750}
1751
1752/*********************************************************************
1753 *
1754 *  Media Ioctl callback
1755 *
1756 *  This routine is called when the user changes speed/duplex using
1757 *  media/mediopt option with ifconfig.
1758 *
1759 **********************************************************************/
1760static int
1761em_media_change(struct ifnet *ifp)
1762{
1763	struct adapter *adapter = ifp->if_softc;
1764	struct ifmedia  *ifm = &adapter->media;
1765
1766	INIT_DEBUGOUT("em_media_change: begin");
1767
1768	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1769		return (EINVAL);
1770
1771	EM_CORE_LOCK(adapter);
1772	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1773	case IFM_AUTO:
1774		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1775		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1776		break;
1777	case IFM_1000_LX:
1778	case IFM_1000_SX:
1779	case IFM_1000_T:
1780		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1781		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1782		break;
1783	case IFM_100_TX:
1784		adapter->hw.mac.autoneg = FALSE;
1785		adapter->hw.phy.autoneg_advertised = 0;
1786		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1787			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1788		else
1789			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1790		break;
1791	case IFM_10_T:
1792		adapter->hw.mac.autoneg = FALSE;
1793		adapter->hw.phy.autoneg_advertised = 0;
1794		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1795			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1796		else
1797			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1798		break;
1799	default:
1800		device_printf(adapter->dev, "Unsupported media type\n");
1801	}
1802
1803	em_init_locked(adapter);
1804	EM_CORE_UNLOCK(adapter);
1805
1806	return (0);
1807}
1808
1809/*********************************************************************
1810 *
1811 *  This routine maps the mbufs to tx descriptors.
1812 *
1813 *  return 0 on success, positive on failure
1814 **********************************************************************/
1815
1816static int
1817em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1818{
1819	struct adapter		*adapter = txr->adapter;
1820	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1821	bus_dmamap_t		map;
1822	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1823	struct e1000_tx_desc	*ctxd = NULL;
1824	struct mbuf		*m_head;
1825	struct ether_header	*eh;
1826	struct ip		*ip = NULL;
1827	struct tcphdr		*tp = NULL;
1828	u32			txd_upper, txd_lower, txd_used, txd_saved;
1829	int			ip_off, poff;
1830	int			nsegs, i, j, first, last = 0;
1831	int			error, do_tso, tso_desc = 0, remap = 1;
1832
1833	m_head = *m_headp;
1834	txd_upper = txd_lower = txd_used = txd_saved = 0;
1835	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1836	ip_off = poff = 0;
1837
1838	/*
1839	 * Intel recommends entire IP/TCP header length reside in a single
1840	 * buffer. If multiple descriptors are used to describe the IP and
1841	 * TCP header, each descriptor should describe one or more
1842	 * complete headers; descriptors referencing only parts of headers
1843	 * are not supported. If all layer headers are not coalesced into
1844	 * a single buffer, each buffer should not cross a 4KB boundary,
1845	 * or be larger than the maximum read request size.
1846	 * Controller also requires modifing IP/TCP header to make TSO work
1847	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1848	 * IP/TCP header into a single buffer to meet the requirement of
1849	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1850	 * which also has similiar restrictions.
1851	 */
1852	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1853		if (do_tso || (m_head->m_next != NULL &&
1854		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1855			if (M_WRITABLE(*m_headp) == 0) {
1856				m_head = m_dup(*m_headp, M_NOWAIT);
1857				m_freem(*m_headp);
1858				if (m_head == NULL) {
1859					*m_headp = NULL;
1860					return (ENOBUFS);
1861				}
1862				*m_headp = m_head;
1863			}
1864		}
1865		/*
1866		 * XXX
1867		 * Assume IPv4, we don't have TSO/checksum offload support
1868		 * for IPv6 yet.
1869		 */
1870		ip_off = sizeof(struct ether_header);
1871		m_head = m_pullup(m_head, ip_off);
1872		if (m_head == NULL) {
1873			*m_headp = NULL;
1874			return (ENOBUFS);
1875		}
1876		eh = mtod(m_head, struct ether_header *);
1877		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1878			ip_off = sizeof(struct ether_vlan_header);
1879			m_head = m_pullup(m_head, ip_off);
1880			if (m_head == NULL) {
1881				*m_headp = NULL;
1882				return (ENOBUFS);
1883			}
1884		}
1885		m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1886		if (m_head == NULL) {
1887			*m_headp = NULL;
1888			return (ENOBUFS);
1889		}
1890		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1891		poff = ip_off + (ip->ip_hl << 2);
1892		if (do_tso) {
1893			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1894			if (m_head == NULL) {
1895				*m_headp = NULL;
1896				return (ENOBUFS);
1897			}
1898			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1899			/*
1900			 * TSO workaround:
1901			 *   pull 4 more bytes of data into it.
1902			 */
1903			m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4);
1904			if (m_head == NULL) {
1905				*m_headp = NULL;
1906				return (ENOBUFS);
1907			}
1908			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1909			ip->ip_len = 0;
1910			ip->ip_sum = 0;
1911			/*
1912			 * The pseudo TCP checksum does not include TCP payload
1913			 * length so driver should recompute the checksum here
1914			 * what hardware expect to see. This is adherence of
1915			 * Microsoft's Large Send specification.
1916			 */
1917			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1918			tp->th_sum = in_pseudo(ip->ip_src.s_addr,
1919			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1920		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1921			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1922			if (m_head == NULL) {
1923				*m_headp = NULL;
1924				return (ENOBUFS);
1925			}
1926			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1927			m_head = m_pullup(m_head, poff + (tp->th_off << 2));
1928			if (m_head == NULL) {
1929				*m_headp = NULL;
1930				return (ENOBUFS);
1931			}
1932			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1933			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
1934		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1935			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1936			if (m_head == NULL) {
1937				*m_headp = NULL;
1938				return (ENOBUFS);
1939			}
1940			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1941		}
1942		*m_headp = m_head;
1943	}
1944
1945	/*
1946	 * Map the packet for DMA
1947	 *
1948	 * Capture the first descriptor index,
1949	 * this descriptor will have the index
1950	 * of the EOP which is the only one that
1951	 * now gets a DONE bit writeback.
1952	 */
1953	first = txr->next_avail_desc;
1954	tx_buffer = &txr->tx_buffers[first];
1955	tx_buffer_mapped = tx_buffer;
1956	map = tx_buffer->map;
1957
1958retry:
1959	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1960	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1961
1962	/*
1963	 * There are two types of errors we can (try) to handle:
1964	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1965	 *   out of segments.  Defragment the mbuf chain and try again.
1966	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1967	 *   at this point in time.  Defer sending and try again later.
1968	 * All other errors, in particular EINVAL, are fatal and prevent the
1969	 * mbuf chain from ever going through.  Drop it and report error.
1970	 */
1971	if (error == EFBIG && remap) {
1972		struct mbuf *m;
1973
1974		m = m_defrag(*m_headp, M_NOWAIT);
1975		if (m == NULL) {
1976			adapter->mbuf_alloc_failed++;
1977			m_freem(*m_headp);
1978			*m_headp = NULL;
1979			return (ENOBUFS);
1980		}
1981		*m_headp = m;
1982
1983		/* Try it again, but only once */
1984		remap = 0;
1985		goto retry;
1986	} else if (error == ENOMEM) {
1987		adapter->no_tx_dma_setup++;
1988		return (error);
1989	} else if (error != 0) {
1990		adapter->no_tx_dma_setup++;
1991		m_freem(*m_headp);
1992		*m_headp = NULL;
1993		return (error);
1994	}
1995
1996	/*
1997	 * TSO Hardware workaround, if this packet is not
1998	 * TSO, and is only a single descriptor long, and
1999	 * it follows a TSO burst, then we need to add a
2000	 * sentinel descriptor to prevent premature writeback.
2001	 */
2002	if ((do_tso == 0) && (txr->tx_tso == TRUE)) {
2003		if (nsegs == 1)
2004			tso_desc = TRUE;
2005		txr->tx_tso = FALSE;
2006	}
2007
2008        if (nsegs > (txr->tx_avail - 2)) {
2009                txr->no_desc_avail++;
2010		bus_dmamap_unload(txr->txtag, map);
2011		return (ENOBUFS);
2012        }
2013	m_head = *m_headp;
2014
2015	/* Do hardware assists */
2016	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2017		em_tso_setup(txr, m_head, ip_off, ip, tp,
2018		    &txd_upper, &txd_lower);
2019		/* we need to make a final sentinel transmit desc */
2020		tso_desc = TRUE;
2021	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2022		em_transmit_checksum_setup(txr, m_head,
2023		    ip_off, ip, &txd_upper, &txd_lower);
2024
2025	if (m_head->m_flags & M_VLANTAG) {
2026		/* Set the vlan id. */
2027		txd_upper |=
2028		    (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2029                /* Tell hardware to add tag */
2030                txd_lower |= htole32(E1000_TXD_CMD_VLE);
2031        }
2032
2033	i = txr->next_avail_desc;
2034
2035	/* Set up our transmit descriptors */
2036	for (j = 0; j < nsegs; j++) {
2037		bus_size_t seg_len;
2038		bus_addr_t seg_addr;
2039
2040		tx_buffer = &txr->tx_buffers[i];
2041		ctxd = &txr->tx_base[i];
2042		seg_addr = segs[j].ds_addr;
2043		seg_len  = segs[j].ds_len;
2044		/*
2045		** TSO Workaround:
2046		** If this is the last descriptor, we want to
2047		** split it so we have a small final sentinel
2048		*/
2049		if (tso_desc && (j == (nsegs -1)) && (seg_len > 8)) {
2050			seg_len -= 4;
2051			ctxd->buffer_addr = htole64(seg_addr);
2052			ctxd->lower.data = htole32(
2053			adapter->txd_cmd | txd_lower | seg_len);
2054			ctxd->upper.data =
2055			    htole32(txd_upper);
2056			if (++i == adapter->num_tx_desc)
2057				i = 0;
2058			/* Now make the sentinel */
2059			++txd_used; /* using an extra txd */
2060			ctxd = &txr->tx_base[i];
2061			tx_buffer = &txr->tx_buffers[i];
2062			ctxd->buffer_addr =
2063			    htole64(seg_addr + seg_len);
2064			ctxd->lower.data = htole32(
2065			adapter->txd_cmd | txd_lower | 4);
2066			ctxd->upper.data =
2067			    htole32(txd_upper);
2068			last = i;
2069			if (++i == adapter->num_tx_desc)
2070				i = 0;
2071		} else {
2072			ctxd->buffer_addr = htole64(seg_addr);
2073			ctxd->lower.data = htole32(
2074			adapter->txd_cmd | txd_lower | seg_len);
2075			ctxd->upper.data =
2076			    htole32(txd_upper);
2077			last = i;
2078			if (++i == adapter->num_tx_desc)
2079				i = 0;
2080		}
2081		tx_buffer->m_head = NULL;
2082		tx_buffer->next_eop = -1;
2083	}
2084
2085	txr->next_avail_desc = i;
2086	txr->tx_avail -= nsegs;
2087	if (tso_desc) /* TSO used an extra for sentinel */
2088		txr->tx_avail -= txd_used;
2089
2090        tx_buffer->m_head = m_head;
2091	/*
2092	** Here we swap the map so the last descriptor,
2093	** which gets the completion interrupt has the
2094	** real map, and the first descriptor gets the
2095	** unused map from this descriptor.
2096	*/
2097	tx_buffer_mapped->map = tx_buffer->map;
2098	tx_buffer->map = map;
2099        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2100
2101        /*
2102         * Last Descriptor of Packet
2103	 * needs End Of Packet (EOP)
2104	 * and Report Status (RS)
2105         */
2106        ctxd->lower.data |=
2107	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2108	/*
2109	 * Keep track in the first buffer which
2110	 * descriptor will be written back
2111	 */
2112	tx_buffer = &txr->tx_buffers[first];
2113	tx_buffer->next_eop = last;
2114	/* Update the watchdog time early and often */
2115	txr->watchdog_time = ticks;
2116
2117	/*
2118	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2119	 * that this frame is available to transmit.
2120	 */
2121	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2122	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2123	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2124
2125	return (0);
2126}
2127
2128static void
2129em_set_promisc(struct adapter *adapter)
2130{
2131	struct ifnet	*ifp = adapter->ifp;
2132	u32		reg_rctl;
2133
2134	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2135
2136	if (ifp->if_flags & IFF_PROMISC) {
2137		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2138		/* Turn this on if you want to see bad packets */
2139		if (em_debug_sbp)
2140			reg_rctl |= E1000_RCTL_SBP;
2141		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2142	} else if (ifp->if_flags & IFF_ALLMULTI) {
2143		reg_rctl |= E1000_RCTL_MPE;
2144		reg_rctl &= ~E1000_RCTL_UPE;
2145		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2146	}
2147}
2148
2149static void
2150em_disable_promisc(struct adapter *adapter)
2151{
2152	struct ifnet	*ifp = adapter->ifp;
2153	u32		reg_rctl;
2154	int		mcnt = 0;
2155
2156	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2157	reg_rctl &=  (~E1000_RCTL_UPE);
2158	if (ifp->if_flags & IFF_ALLMULTI)
2159		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2160	else {
2161		struct  ifmultiaddr *ifma;
2162#if __FreeBSD_version < 800000
2163		IF_ADDR_LOCK(ifp);
2164#else
2165		if_maddr_rlock(ifp);
2166#endif
2167		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2168			if (ifma->ifma_addr->sa_family != AF_LINK)
2169				continue;
2170			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2171				break;
2172			mcnt++;
2173		}
2174#if __FreeBSD_version < 800000
2175		IF_ADDR_UNLOCK(ifp);
2176#else
2177		if_maddr_runlock(ifp);
2178#endif
2179	}
2180	/* Don't disable if in MAX groups */
2181	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2182		reg_rctl &=  (~E1000_RCTL_MPE);
2183	reg_rctl &=  (~E1000_RCTL_SBP);
2184	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2185}
2186
2187
2188/*********************************************************************
2189 *  Multicast Update
2190 *
2191 *  This routine is called whenever multicast address list is updated.
2192 *
2193 **********************************************************************/
2194
2195static void
2196em_set_multi(struct adapter *adapter)
2197{
2198	struct ifnet	*ifp = adapter->ifp;
2199	struct ifmultiaddr *ifma;
2200	u32 reg_rctl = 0;
2201	u8  *mta; /* Multicast array memory */
2202	int mcnt = 0;
2203
2204	IOCTL_DEBUGOUT("em_set_multi: begin");
2205
2206	mta = adapter->mta;
2207	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2208
2209	if (adapter->hw.mac.type == e1000_82542 &&
2210	    adapter->hw.revision_id == E1000_REVISION_2) {
2211		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2212		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2213			e1000_pci_clear_mwi(&adapter->hw);
2214		reg_rctl |= E1000_RCTL_RST;
2215		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2216		msec_delay(5);
2217	}
2218
2219#if __FreeBSD_version < 800000
2220	IF_ADDR_LOCK(ifp);
2221#else
2222	if_maddr_rlock(ifp);
2223#endif
2224	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2225		if (ifma->ifma_addr->sa_family != AF_LINK)
2226			continue;
2227
2228		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2229			break;
2230
2231		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2232		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2233		mcnt++;
2234	}
2235#if __FreeBSD_version < 800000
2236	IF_ADDR_UNLOCK(ifp);
2237#else
2238	if_maddr_runlock(ifp);
2239#endif
2240	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2241		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2242		reg_rctl |= E1000_RCTL_MPE;
2243		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244	} else
2245		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2246
2247	if (adapter->hw.mac.type == e1000_82542 &&
2248	    adapter->hw.revision_id == E1000_REVISION_2) {
2249		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2250		reg_rctl &= ~E1000_RCTL_RST;
2251		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2252		msec_delay(5);
2253		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2254			e1000_pci_set_mwi(&adapter->hw);
2255	}
2256}
2257
2258
2259/*********************************************************************
2260 *  Timer routine
2261 *
2262 *  This routine checks for link status and updates statistics.
2263 *
2264 **********************************************************************/
2265
2266static void
2267em_local_timer(void *arg)
2268{
2269	struct adapter	*adapter = arg;
2270	struct ifnet	*ifp = adapter->ifp;
2271	struct tx_ring	*txr = adapter->tx_rings;
2272	struct rx_ring	*rxr = adapter->rx_rings;
2273	u32		trigger;
2274
2275	EM_CORE_LOCK_ASSERT(adapter);
2276
2277	em_update_link_status(adapter);
2278	em_update_stats_counters(adapter);
2279
2280	/* Reset LAA into RAR[0] on 82571 */
2281	if ((adapter->hw.mac.type == e1000_82571) &&
2282	    e1000_get_laa_state_82571(&adapter->hw))
2283		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2284
2285	/* Mask to use in the irq trigger */
2286	if (adapter->msix_mem)
2287		trigger = rxr->ims;
2288	else
2289		trigger = E1000_ICS_RXDMT0;
2290
2291	/*
2292	** Check on the state of the TX queue(s), this
2293	** can be done without the lock because its RO
2294	** and the HUNG state will be static if set.
2295	*/
2296	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2297		if ((txr->queue_status == EM_QUEUE_HUNG) &&
2298		    (adapter->pause_frames == 0))
2299			goto hung;
2300		/* Schedule a TX tasklet if needed */
2301		if (txr->tx_avail <= EM_MAX_SCATTER)
2302			taskqueue_enqueue(txr->tq, &txr->tx_task);
2303	}
2304
2305	adapter->pause_frames = 0;
2306	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2307#ifndef DEVICE_POLLING
2308	/* Trigger an RX interrupt to guarantee mbuf refresh */
2309	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2310#endif
2311	return;
2312hung:
2313	/* Looks like we're hung */
2314	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2315	device_printf(adapter->dev,
2316	    "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2317	    E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2318	    E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2319	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
2320	    "Next TX to Clean = %d\n",
2321	    txr->me, txr->tx_avail, txr->next_to_clean);
2322	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2323	adapter->watchdog_events++;
2324	adapter->pause_frames = 0;
2325	em_init_locked(adapter);
2326}
2327
2328
2329static void
2330em_update_link_status(struct adapter *adapter)
2331{
2332	struct e1000_hw *hw = &adapter->hw;
2333	struct ifnet *ifp = adapter->ifp;
2334	device_t dev = adapter->dev;
2335	struct tx_ring *txr = adapter->tx_rings;
2336	u32 link_check = 0;
2337
2338	/* Get the cached link value or read phy for real */
2339	switch (hw->phy.media_type) {
2340	case e1000_media_type_copper:
2341		if (hw->mac.get_link_status) {
2342			/* Do the work to read phy */
2343			e1000_check_for_link(hw);
2344			link_check = !hw->mac.get_link_status;
2345			if (link_check) /* ESB2 fix */
2346				e1000_cfg_on_link_up(hw);
2347		} else
2348			link_check = TRUE;
2349		break;
2350	case e1000_media_type_fiber:
2351		e1000_check_for_link(hw);
2352		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2353                                 E1000_STATUS_LU);
2354		break;
2355	case e1000_media_type_internal_serdes:
2356		e1000_check_for_link(hw);
2357		link_check = adapter->hw.mac.serdes_has_link;
2358		break;
2359	default:
2360	case e1000_media_type_unknown:
2361		break;
2362	}
2363
2364	/* Now check for a transition */
2365	if (link_check && (adapter->link_active == 0)) {
2366		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2367		    &adapter->link_duplex);
2368		/* Check if we must disable SPEED_MODE bit on PCI-E */
2369		if ((adapter->link_speed != SPEED_1000) &&
2370		    ((hw->mac.type == e1000_82571) ||
2371		    (hw->mac.type == e1000_82572))) {
2372			int tarc0;
2373			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2374			tarc0 &= ~SPEED_MODE_BIT;
2375			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2376		}
2377		if (bootverbose)
2378			device_printf(dev, "Link is up %d Mbps %s\n",
2379			    adapter->link_speed,
2380			    ((adapter->link_duplex == FULL_DUPLEX) ?
2381			    "Full Duplex" : "Half Duplex"));
2382		adapter->link_active = 1;
2383		adapter->smartspeed = 0;
2384		ifp->if_baudrate = adapter->link_speed * 1000000;
2385		if_link_state_change(ifp, LINK_STATE_UP);
2386	} else if (!link_check && (adapter->link_active == 1)) {
2387		ifp->if_baudrate = adapter->link_speed = 0;
2388		adapter->link_duplex = 0;
2389		if (bootverbose)
2390			device_printf(dev, "Link is Down\n");
2391		adapter->link_active = 0;
2392		/* Link down, disable watchdog */
2393		for (int i = 0; i < adapter->num_queues; i++, txr++)
2394			txr->queue_status = EM_QUEUE_IDLE;
2395		if_link_state_change(ifp, LINK_STATE_DOWN);
2396	}
2397}
2398
2399/*********************************************************************
2400 *
2401 *  This routine disables all traffic on the adapter by issuing a
2402 *  global reset on the MAC and deallocates TX/RX buffers.
2403 *
2404 *  This routine should always be called with BOTH the CORE
2405 *  and TX locks.
2406 **********************************************************************/
2407
2408static void
2409em_stop(void *arg)
2410{
2411	struct adapter	*adapter = arg;
2412	struct ifnet	*ifp = adapter->ifp;
2413	struct tx_ring	*txr = adapter->tx_rings;
2414
2415	EM_CORE_LOCK_ASSERT(adapter);
2416
2417	INIT_DEBUGOUT("em_stop: begin");
2418
2419	em_disable_intr(adapter);
2420	callout_stop(&adapter->timer);
2421
2422	/* Tell the stack that the interface is no longer active */
2423	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2424	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2425
2426        /* Unarm watchdog timer. */
2427	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2428		EM_TX_LOCK(txr);
2429		txr->queue_status = EM_QUEUE_IDLE;
2430		EM_TX_UNLOCK(txr);
2431	}
2432
2433	e1000_reset_hw(&adapter->hw);
2434	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2435
2436	e1000_led_off(&adapter->hw);
2437	e1000_cleanup_led(&adapter->hw);
2438}
2439
2440
2441/*********************************************************************
2442 *
2443 *  Determine hardware revision.
2444 *
2445 **********************************************************************/
2446static void
2447em_identify_hardware(struct adapter *adapter)
2448{
2449	device_t dev = adapter->dev;
2450
2451	/* Make sure our PCI config space has the necessary stuff set */
2452	pci_enable_busmaster(dev);
2453	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2454
2455	/* Save off the information about this board */
2456	adapter->hw.vendor_id = pci_get_vendor(dev);
2457	adapter->hw.device_id = pci_get_device(dev);
2458	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2459	adapter->hw.subsystem_vendor_id =
2460	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2461	adapter->hw.subsystem_device_id =
2462	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2463
2464	/* Do Shared Code Init and Setup */
2465	if (e1000_set_mac_type(&adapter->hw)) {
2466		device_printf(dev, "Setup init failure\n");
2467		return;
2468	}
2469}
2470
2471static int
2472em_allocate_pci_resources(struct adapter *adapter)
2473{
2474	device_t	dev = adapter->dev;
2475	int		rid;
2476
2477	rid = PCIR_BAR(0);
2478	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2479	    &rid, RF_ACTIVE);
2480	if (adapter->memory == NULL) {
2481		device_printf(dev, "Unable to allocate bus resource: memory\n");
2482		return (ENXIO);
2483	}
2484	adapter->osdep.mem_bus_space_tag =
2485	    rman_get_bustag(adapter->memory);
2486	adapter->osdep.mem_bus_space_handle =
2487	    rman_get_bushandle(adapter->memory);
2488	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2489
2490	/* Default to a single queue */
2491	adapter->num_queues = 1;
2492
2493	/*
2494	 * Setup MSI/X or MSI if PCI Express
2495	 */
2496	adapter->msix = em_setup_msix(adapter);
2497
2498	adapter->hw.back = &adapter->osdep;
2499
2500	return (0);
2501}
2502
2503/*********************************************************************
2504 *
2505 *  Setup the Legacy or MSI Interrupt handler
2506 *
2507 **********************************************************************/
2508int
2509em_allocate_legacy(struct adapter *adapter)
2510{
2511	device_t dev = adapter->dev;
2512	struct tx_ring	*txr = adapter->tx_rings;
2513	int error, rid = 0;
2514
2515	/* Manually turn off all interrupts */
2516	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2517
2518	if (adapter->msix == 1) /* using MSI */
2519		rid = 1;
2520	/* We allocate a single interrupt resource */
2521	adapter->res = bus_alloc_resource_any(dev,
2522	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2523	if (adapter->res == NULL) {
2524		device_printf(dev, "Unable to allocate bus resource: "
2525		    "interrupt\n");
2526		return (ENXIO);
2527	}
2528
2529	/*
2530	 * Allocate a fast interrupt and the associated
2531	 * deferred processing contexts.
2532	 */
2533	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2534	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2535	    taskqueue_thread_enqueue, &adapter->tq);
2536	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2537	    device_get_nameunit(adapter->dev));
2538	/* Use a TX only tasklet for local timer */
2539	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2540	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2541	    taskqueue_thread_enqueue, &txr->tq);
2542	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2543	    device_get_nameunit(adapter->dev));
2544	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2545	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2546	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2547		device_printf(dev, "Failed to register fast interrupt "
2548			    "handler: %d\n", error);
2549		taskqueue_free(adapter->tq);
2550		adapter->tq = NULL;
2551		return (error);
2552	}
2553
2554	return (0);
2555}
2556
2557/*********************************************************************
2558 *
2559 *  Setup the MSIX Interrupt handlers
2560 *   This is not really Multiqueue, rather
2561 *   its just seperate interrupt vectors
2562 *   for TX, RX, and Link.
2563 *
2564 **********************************************************************/
2565int
2566em_allocate_msix(struct adapter *adapter)
2567{
2568	device_t	dev = adapter->dev;
2569	struct		tx_ring *txr = adapter->tx_rings;
2570	struct		rx_ring *rxr = adapter->rx_rings;
2571	int		error, rid, vector = 0;
2572
2573
2574	/* Make sure all interrupts are disabled */
2575	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2576
2577	/* First set up ring resources */
2578	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
2579
2580		/* RX ring */
2581		rid = vector + 1;
2582
2583		rxr->res = bus_alloc_resource_any(dev,
2584		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2585		if (rxr->res == NULL) {
2586			device_printf(dev,
2587			    "Unable to allocate bus resource: "
2588			    "RX MSIX Interrupt %d\n", i);
2589			return (ENXIO);
2590		}
2591		if ((error = bus_setup_intr(dev, rxr->res,
2592		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2593		    rxr, &rxr->tag)) != 0) {
2594			device_printf(dev, "Failed to register RX handler");
2595			return (error);
2596		}
2597#if __FreeBSD_version >= 800504
2598		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
2599#endif
2600		rxr->msix = vector++; /* NOTE increment vector for TX */
2601		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2602		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2603		    taskqueue_thread_enqueue, &rxr->tq);
2604		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq",
2605		    device_get_nameunit(adapter->dev));
2606		/*
2607		** Set the bit to enable interrupt
2608		** in E1000_IMS -- bits 20 and 21
2609		** are for RX0 and RX1, note this has
2610		** NOTHING to do with the MSIX vector
2611		*/
2612		rxr->ims = 1 << (20 + i);
2613		adapter->ivars |= (8 | rxr->msix) << (i * 4);
2614
2615		/* TX ring */
2616		rid = vector + 1;
2617		txr->res = bus_alloc_resource_any(dev,
2618		    SYS_RES_IRQ, &rid, RF_ACTIVE);
2619		if (txr->res == NULL) {
2620			device_printf(dev,
2621			    "Unable to allocate bus resource: "
2622			    "TX MSIX Interrupt %d\n", i);
2623			return (ENXIO);
2624		}
2625		if ((error = bus_setup_intr(dev, txr->res,
2626		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2627		    txr, &txr->tag)) != 0) {
2628			device_printf(dev, "Failed to register TX handler");
2629			return (error);
2630		}
2631#if __FreeBSD_version >= 800504
2632		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
2633#endif
2634		txr->msix = vector++; /* Increment vector for next pass */
2635		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2636		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2637		    taskqueue_thread_enqueue, &txr->tq);
2638		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2639		    device_get_nameunit(adapter->dev));
2640		/*
2641		** Set the bit to enable interrupt
2642		** in E1000_IMS -- bits 22 and 23
2643		** are for TX0 and TX1, note this has
2644		** NOTHING to do with the MSIX vector
2645		*/
2646		txr->ims = 1 << (22 + i);
2647		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2648	}
2649
2650	/* Link interrupt */
2651	++rid;
2652	adapter->res = bus_alloc_resource_any(dev,
2653	    SYS_RES_IRQ, &rid, RF_ACTIVE);
2654	if (!adapter->res) {
2655		device_printf(dev,"Unable to allocate "
2656		    "bus resource: Link interrupt [%d]\n", rid);
2657		return (ENXIO);
2658        }
2659	/* Set the link handler function */
2660	error = bus_setup_intr(dev, adapter->res,
2661	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2662	    em_msix_link, adapter, &adapter->tag);
2663	if (error) {
2664		adapter->res = NULL;
2665		device_printf(dev, "Failed to register LINK handler");
2666		return (error);
2667	}
2668#if __FreeBSD_version >= 800504
2669		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2670#endif
2671	adapter->linkvec = vector;
2672	adapter->ivars |=  (8 | vector) << 16;
2673	adapter->ivars |= 0x80000000;
2674
2675	return (0);
2676}
2677
2678
2679static void
2680em_free_pci_resources(struct adapter *adapter)
2681{
2682	device_t	dev = adapter->dev;
2683	struct tx_ring	*txr;
2684	struct rx_ring	*rxr;
2685	int		rid;
2686
2687
2688	/*
2689	** Release all the queue interrupt resources:
2690	*/
2691	for (int i = 0; i < adapter->num_queues; i++) {
2692		txr = &adapter->tx_rings[i];
2693		rxr = &adapter->rx_rings[i];
2694		/* an early abort? */
2695		if ((txr == NULL) || (rxr == NULL))
2696			break;
2697		rid = txr->msix +1;
2698		if (txr->tag != NULL) {
2699			bus_teardown_intr(dev, txr->res, txr->tag);
2700			txr->tag = NULL;
2701		}
2702		if (txr->res != NULL)
2703			bus_release_resource(dev, SYS_RES_IRQ,
2704			    rid, txr->res);
2705		rid = rxr->msix +1;
2706		if (rxr->tag != NULL) {
2707			bus_teardown_intr(dev, rxr->res, rxr->tag);
2708			rxr->tag = NULL;
2709		}
2710		if (rxr->res != NULL)
2711			bus_release_resource(dev, SYS_RES_IRQ,
2712			    rid, rxr->res);
2713	}
2714
2715        if (adapter->linkvec) /* we are doing MSIX */
2716                rid = adapter->linkvec + 1;
2717        else
2718                (adapter->msix != 0) ? (rid = 1):(rid = 0);
2719
2720	if (adapter->tag != NULL) {
2721		bus_teardown_intr(dev, adapter->res, adapter->tag);
2722		adapter->tag = NULL;
2723	}
2724
2725	if (adapter->res != NULL)
2726		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2727
2728
2729	if (adapter->msix)
2730		pci_release_msi(dev);
2731
2732	if (adapter->msix_mem != NULL)
2733		bus_release_resource(dev, SYS_RES_MEMORY,
2734		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2735
2736	if (adapter->memory != NULL)
2737		bus_release_resource(dev, SYS_RES_MEMORY,
2738		    PCIR_BAR(0), adapter->memory);
2739
2740	if (adapter->flash != NULL)
2741		bus_release_resource(dev, SYS_RES_MEMORY,
2742		    EM_FLASH, adapter->flash);
2743}
2744
2745/*
2746 * Setup MSI or MSI/X
2747 */
2748static int
2749em_setup_msix(struct adapter *adapter)
2750{
2751	device_t dev = adapter->dev;
2752	int val;
2753
2754	/*
2755	** Setup MSI/X for Hartwell: tests have shown
2756	** use of two queues to be unstable, and to
2757	** provide no great gain anyway, so we simply
2758	** seperate the interrupts and use a single queue.
2759	*/
2760	if ((adapter->hw.mac.type == e1000_82574) &&
2761	    (em_enable_msix == TRUE)) {
2762		/* Map the MSIX BAR */
2763		int rid = PCIR_BAR(EM_MSIX_BAR);
2764		adapter->msix_mem = bus_alloc_resource_any(dev,
2765		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2766       		if (adapter->msix_mem == NULL) {
2767			/* May not be enabled */
2768               		device_printf(adapter->dev,
2769			    "Unable to map MSIX table \n");
2770			goto msi;
2771       		}
2772		val = pci_msix_count(dev);
2773		/* We only need/want 3 vectors */
2774		if (val >= 3)
2775			val = 3;
2776		else {
2777               		device_printf(adapter->dev,
2778			    "MSIX: insufficient vectors, using MSI\n");
2779			goto msi;
2780		}
2781
2782		if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) {
2783			device_printf(adapter->dev,
2784			    "Using MSIX interrupts "
2785			    "with %d vectors\n", val);
2786			return (val);
2787		}
2788
2789		/*
2790		** If MSIX alloc failed or provided us with
2791		** less than needed, free and fall through to MSI
2792		*/
2793		pci_release_msi(dev);
2794	}
2795msi:
2796	if (adapter->msix_mem != NULL) {
2797		bus_release_resource(dev, SYS_RES_MEMORY,
2798		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
2799		adapter->msix_mem = NULL;
2800	}
2801       	val = 1;
2802       	if (pci_alloc_msi(dev, &val) == 0) {
2803               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2804		return (val);
2805	}
2806	/* Should only happen due to manual configuration */
2807	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2808	return (0);
2809}
2810
2811
2812/*********************************************************************
2813 *
2814 *  Initialize the hardware to a configuration
2815 *  as specified by the adapter structure.
2816 *
2817 **********************************************************************/
2818static void
2819em_reset(struct adapter *adapter)
2820{
2821	device_t	dev = adapter->dev;
2822	struct ifnet	*ifp = adapter->ifp;
2823	struct e1000_hw	*hw = &adapter->hw;
2824	u16		rx_buffer_size;
2825	u32		pba;
2826
2827	INIT_DEBUGOUT("em_reset: begin");
2828
2829	/* Set up smart power down as default off on newer adapters. */
2830	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
2831	    hw->mac.type == e1000_82572)) {
2832		u16 phy_tmp = 0;
2833
2834		/* Speed up time to link by disabling smart power down. */
2835		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
2836		phy_tmp &= ~IGP02E1000_PM_SPD;
2837		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
2838	}
2839
2840	/*
2841	 * Packet Buffer Allocation (PBA)
2842	 * Writing PBA sets the receive portion of the buffer
2843	 * the remainder is used for the transmit buffer.
2844	 */
2845	switch (hw->mac.type) {
2846	/* Total Packet Buffer on these is 48K */
2847	case e1000_82571:
2848	case e1000_82572:
2849	case e1000_80003es2lan:
2850			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
2851		break;
2852	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
2853			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
2854		break;
2855	case e1000_82574:
2856	case e1000_82583:
2857			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
2858		break;
2859	case e1000_ich8lan:
2860		pba = E1000_PBA_8K;
2861		break;
2862	case e1000_ich9lan:
2863	case e1000_ich10lan:
2864		/* Boost Receive side for jumbo frames */
2865		if (adapter->hw.mac.max_frame_size > 4096)
2866			pba = E1000_PBA_14K;
2867		else
2868			pba = E1000_PBA_10K;
2869		break;
2870	case e1000_pchlan:
2871	case e1000_pch2lan:
2872	case e1000_pch_lpt:
2873		pba = E1000_PBA_26K;
2874		break;
2875	default:
2876		if (adapter->hw.mac.max_frame_size > 8192)
2877			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
2878		else
2879			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
2880	}
2881	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
2882
2883	/*
2884	 * These parameters control the automatic generation (Tx) and
2885	 * response (Rx) to Ethernet PAUSE frames.
2886	 * - High water mark should allow for at least two frames to be
2887	 *   received after sending an XOFF.
2888	 * - Low water mark works best when it is very near the high water mark.
2889	 *   This allows the receiver to restart by sending XON when it has
2890	 *   drained a bit. Here we use an arbitary value of 1500 which will
2891	 *   restart after one full frame is pulled from the buffer. There
2892	 *   could be several smaller frames in the buffer and if so they will
2893	 *   not trigger the XON until their total number reduces the buffer
2894	 *   by 1500.
2895	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2896	 */
2897	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
2898	hw->fc.high_water = rx_buffer_size -
2899	    roundup2(adapter->hw.mac.max_frame_size, 1024);
2900	hw->fc.low_water = hw->fc.high_water - 1500;
2901
2902	if (adapter->fc) /* locally set flow control value? */
2903		hw->fc.requested_mode = adapter->fc;
2904	else
2905		hw->fc.requested_mode = e1000_fc_full;
2906
2907	if (hw->mac.type == e1000_80003es2lan)
2908		hw->fc.pause_time = 0xFFFF;
2909	else
2910		hw->fc.pause_time = EM_FC_PAUSE_TIME;
2911
2912	hw->fc.send_xon = TRUE;
2913
2914	/* Device specific overrides/settings */
2915	switch (hw->mac.type) {
2916	case e1000_pchlan:
2917		/* Workaround: no TX flow ctrl for PCH */
2918                hw->fc.requested_mode = e1000_fc_rx_pause;
2919		hw->fc.pause_time = 0xFFFF; /* override */
2920		if (ifp->if_mtu > ETHERMTU) {
2921			hw->fc.high_water = 0x3500;
2922			hw->fc.low_water = 0x1500;
2923		} else {
2924			hw->fc.high_water = 0x5000;
2925			hw->fc.low_water = 0x3000;
2926		}
2927		hw->fc.refresh_time = 0x1000;
2928		break;
2929	case e1000_pch2lan:
2930	case e1000_pch_lpt:
2931		hw->fc.high_water = 0x5C20;
2932		hw->fc.low_water = 0x5048;
2933		hw->fc.pause_time = 0x0650;
2934		hw->fc.refresh_time = 0x0400;
2935		/* Jumbos need adjusted PBA */
2936		if (ifp->if_mtu > ETHERMTU)
2937			E1000_WRITE_REG(hw, E1000_PBA, 12);
2938		else
2939			E1000_WRITE_REG(hw, E1000_PBA, 26);
2940		break;
2941        case e1000_ich9lan:
2942        case e1000_ich10lan:
2943		if (ifp->if_mtu > ETHERMTU) {
2944			hw->fc.high_water = 0x2800;
2945			hw->fc.low_water = hw->fc.high_water - 8;
2946			break;
2947		}
2948		/* else fall thru */
2949	default:
2950		if (hw->mac.type == e1000_80003es2lan)
2951			hw->fc.pause_time = 0xFFFF;
2952		break;
2953	}
2954
2955	/* Issue a global reset */
2956	e1000_reset_hw(hw);
2957	E1000_WRITE_REG(hw, E1000_WUC, 0);
2958	em_disable_aspm(adapter);
2959	/* and a re-init */
2960	if (e1000_init_hw(hw) < 0) {
2961		device_printf(dev, "Hardware Initialization Failed\n");
2962		return;
2963	}
2964
2965	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
2966	e1000_get_phy_info(hw);
2967	e1000_check_for_link(hw);
2968	return;
2969}
2970
2971/*********************************************************************
2972 *
2973 *  Setup networking device structure and register an interface.
2974 *
2975 **********************************************************************/
2976static int
2977em_setup_interface(device_t dev, struct adapter *adapter)
2978{
2979	struct ifnet   *ifp;
2980
2981	INIT_DEBUGOUT("em_setup_interface: begin");
2982
2983	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2984	if (ifp == NULL) {
2985		device_printf(dev, "can not allocate ifnet structure\n");
2986		return (-1);
2987	}
2988	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2989	ifp->if_init =  em_init;
2990	ifp->if_softc = adapter;
2991	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2992	ifp->if_ioctl = em_ioctl;
2993#ifdef EM_MULTIQUEUE
2994	/* Multiqueue stack interface */
2995	ifp->if_transmit = em_mq_start;
2996	ifp->if_qflush = em_qflush;
2997#else
2998	ifp->if_start = em_start;
2999	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3000	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3001	IFQ_SET_READY(&ifp->if_snd);
3002#endif
3003
3004	ether_ifattach(ifp, adapter->hw.mac.addr);
3005
3006	ifp->if_capabilities = ifp->if_capenable = 0;
3007
3008
3009	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3010	ifp->if_capabilities |= IFCAP_TSO4;
3011	/*
3012	 * Tell the upper layer(s) we
3013	 * support full VLAN capability
3014	 */
3015	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3016	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3017			     |  IFCAP_VLAN_HWTSO
3018			     |  IFCAP_VLAN_MTU;
3019	ifp->if_capenable = ifp->if_capabilities;
3020
3021	/*
3022	** Don't turn this on by default, if vlans are
3023	** created on another pseudo device (eg. lagg)
3024	** then vlan events are not passed thru, breaking
3025	** operation, but with HW FILTER off it works. If
3026	** using vlans directly on the em driver you can
3027	** enable this and get full hardware tag filtering.
3028	*/
3029	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3030
3031#ifdef DEVICE_POLLING
3032	ifp->if_capabilities |= IFCAP_POLLING;
3033#endif
3034
3035	/* Enable only WOL MAGIC by default */
3036	if (adapter->wol) {
3037		ifp->if_capabilities |= IFCAP_WOL;
3038		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3039	}
3040
3041	/*
3042	 * Specify the media types supported by this adapter and register
3043	 * callbacks to update media and link information
3044	 */
3045	ifmedia_init(&adapter->media, IFM_IMASK,
3046	    em_media_change, em_media_status);
3047	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3048	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3049		u_char fiber_type = IFM_1000_SX;	/* default type */
3050
3051		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3052			    0, NULL);
3053		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3054	} else {
3055		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3056		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3057			    0, NULL);
3058		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3059			    0, NULL);
3060		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3061			    0, NULL);
3062		if (adapter->hw.phy.type != e1000_phy_ife) {
3063			ifmedia_add(&adapter->media,
3064				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3065			ifmedia_add(&adapter->media,
3066				IFM_ETHER | IFM_1000_T, 0, NULL);
3067		}
3068	}
3069	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3070	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3071	return (0);
3072}
3073
3074
3075/*
3076 * Manage DMA'able memory.
3077 */
3078static void
3079em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3080{
3081	if (error)
3082		return;
3083	*(bus_addr_t *) arg = segs[0].ds_addr;
3084}
3085
3086static int
3087em_dma_malloc(struct adapter *adapter, bus_size_t size,
3088        struct em_dma_alloc *dma, int mapflags)
3089{
3090	int error;
3091
3092	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3093				EM_DBA_ALIGN, 0,	/* alignment, bounds */
3094				BUS_SPACE_MAXADDR,	/* lowaddr */
3095				BUS_SPACE_MAXADDR,	/* highaddr */
3096				NULL, NULL,		/* filter, filterarg */
3097				size,			/* maxsize */
3098				1,			/* nsegments */
3099				size,			/* maxsegsize */
3100				0,			/* flags */
3101				NULL,			/* lockfunc */
3102				NULL,			/* lockarg */
3103				&dma->dma_tag);
3104	if (error) {
3105		device_printf(adapter->dev,
3106		    "%s: bus_dma_tag_create failed: %d\n",
3107		    __func__, error);
3108		goto fail_0;
3109	}
3110
3111	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3112	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3113	if (error) {
3114		device_printf(adapter->dev,
3115		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3116		    __func__, (uintmax_t)size, error);
3117		goto fail_2;
3118	}
3119
3120	dma->dma_paddr = 0;
3121	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3122	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3123	if (error || dma->dma_paddr == 0) {
3124		device_printf(adapter->dev,
3125		    "%s: bus_dmamap_load failed: %d\n",
3126		    __func__, error);
3127		goto fail_3;
3128	}
3129
3130	return (0);
3131
3132fail_3:
3133	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3134fail_2:
3135	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3136	bus_dma_tag_destroy(dma->dma_tag);
3137fail_0:
3138	dma->dma_map = NULL;
3139	dma->dma_tag = NULL;
3140
3141	return (error);
3142}
3143
3144static void
3145em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3146{
3147	if (dma->dma_tag == NULL)
3148		return;
3149	if (dma->dma_map != NULL) {
3150		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3151		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3152		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3153		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3154		dma->dma_map = NULL;
3155	}
3156	bus_dma_tag_destroy(dma->dma_tag);
3157	dma->dma_tag = NULL;
3158}
3159
3160
3161/*********************************************************************
3162 *
3163 *  Allocate memory for the transmit and receive rings, and then
3164 *  the descriptors associated with each, called only once at attach.
3165 *
3166 **********************************************************************/
3167static int
3168em_allocate_queues(struct adapter *adapter)
3169{
3170	device_t		dev = adapter->dev;
3171	struct tx_ring		*txr = NULL;
3172	struct rx_ring		*rxr = NULL;
3173	int rsize, tsize, error = E1000_SUCCESS;
3174	int txconf = 0, rxconf = 0;
3175
3176
3177	/* Allocate the TX ring struct memory */
3178	if (!(adapter->tx_rings =
3179	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3180	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3181		device_printf(dev, "Unable to allocate TX ring memory\n");
3182		error = ENOMEM;
3183		goto fail;
3184	}
3185
3186	/* Now allocate the RX */
3187	if (!(adapter->rx_rings =
3188	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3189	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3190		device_printf(dev, "Unable to allocate RX ring memory\n");
3191		error = ENOMEM;
3192		goto rx_fail;
3193	}
3194
3195	tsize = roundup2(adapter->num_tx_desc *
3196	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3197	/*
3198	 * Now set up the TX queues, txconf is needed to handle the
3199	 * possibility that things fail midcourse and we need to
3200	 * undo memory gracefully
3201	 */
3202	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3203		/* Set up some basics */
3204		txr = &adapter->tx_rings[i];
3205		txr->adapter = adapter;
3206		txr->me = i;
3207
3208		/* Initialize the TX lock */
3209		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3210		    device_get_nameunit(dev), txr->me);
3211		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3212
3213		if (em_dma_malloc(adapter, tsize,
3214			&txr->txdma, BUS_DMA_NOWAIT)) {
3215			device_printf(dev,
3216			    "Unable to allocate TX Descriptor memory\n");
3217			error = ENOMEM;
3218			goto err_tx_desc;
3219		}
3220		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3221		bzero((void *)txr->tx_base, tsize);
3222
3223        	if (em_allocate_transmit_buffers(txr)) {
3224			device_printf(dev,
3225			    "Critical Failure setting up transmit buffers\n");
3226			error = ENOMEM;
3227			goto err_tx_desc;
3228        	}
3229#if __FreeBSD_version >= 800000
3230		/* Allocate a buf ring */
3231		txr->br = buf_ring_alloc(4096, M_DEVBUF,
3232		    M_WAITOK, &txr->tx_mtx);
3233#endif
3234	}
3235
3236	/*
3237	 * Next the RX queues...
3238	 */
3239	rsize = roundup2(adapter->num_rx_desc *
3240	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
3241	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3242		rxr = &adapter->rx_rings[i];
3243		rxr->adapter = adapter;
3244		rxr->me = i;
3245
3246		/* Initialize the RX lock */
3247		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3248		    device_get_nameunit(dev), txr->me);
3249		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3250
3251		if (em_dma_malloc(adapter, rsize,
3252			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3253			device_printf(dev,
3254			    "Unable to allocate RxDescriptor memory\n");
3255			error = ENOMEM;
3256			goto err_rx_desc;
3257		}
3258		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
3259		bzero((void *)rxr->rx_base, rsize);
3260
3261        	/* Allocate receive buffers for the ring*/
3262		if (em_allocate_receive_buffers(rxr)) {
3263			device_printf(dev,
3264			    "Critical Failure setting up receive buffers\n");
3265			error = ENOMEM;
3266			goto err_rx_desc;
3267		}
3268	}
3269
3270	return (0);
3271
3272err_rx_desc:
3273	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3274		em_dma_free(adapter, &rxr->rxdma);
3275err_tx_desc:
3276	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3277		em_dma_free(adapter, &txr->txdma);
3278	free(adapter->rx_rings, M_DEVBUF);
3279rx_fail:
3280#if __FreeBSD_version >= 800000
3281	buf_ring_free(txr->br, M_DEVBUF);
3282#endif
3283	free(adapter->tx_rings, M_DEVBUF);
3284fail:
3285	return (error);
3286}
3287
3288
3289/*********************************************************************
3290 *
3291 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3292 *  the information needed to transmit a packet on the wire. This is
3293 *  called only once at attach, setup is done every reset.
3294 *
3295 **********************************************************************/
3296static int
3297em_allocate_transmit_buffers(struct tx_ring *txr)
3298{
3299	struct adapter *adapter = txr->adapter;
3300	device_t dev = adapter->dev;
3301	struct em_buffer *txbuf;
3302	int error, i;
3303
3304	/*
3305	 * Setup DMA descriptor areas.
3306	 */
3307	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3308			       1, 0,			/* alignment, bounds */
3309			       BUS_SPACE_MAXADDR,	/* lowaddr */
3310			       BUS_SPACE_MAXADDR,	/* highaddr */
3311			       NULL, NULL,		/* filter, filterarg */
3312			       EM_TSO_SIZE,		/* maxsize */
3313			       EM_MAX_SCATTER,		/* nsegments */
3314			       PAGE_SIZE,		/* maxsegsize */
3315			       0,			/* flags */
3316			       NULL,			/* lockfunc */
3317			       NULL,			/* lockfuncarg */
3318			       &txr->txtag))) {
3319		device_printf(dev,"Unable to allocate TX DMA tag\n");
3320		goto fail;
3321	}
3322
3323	if (!(txr->tx_buffers =
3324	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
3325	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3326		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3327		error = ENOMEM;
3328		goto fail;
3329	}
3330
3331        /* Create the descriptor buffer dma maps */
3332	txbuf = txr->tx_buffers;
3333	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3334		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3335		if (error != 0) {
3336			device_printf(dev, "Unable to create TX DMA map\n");
3337			goto fail;
3338		}
3339	}
3340
3341	return 0;
3342fail:
3343	/* We free all, it handles case where we are in the middle */
3344	em_free_transmit_structures(adapter);
3345	return (error);
3346}
3347
3348/*********************************************************************
3349 *
3350 *  Initialize a transmit ring.
3351 *
3352 **********************************************************************/
3353static void
3354em_setup_transmit_ring(struct tx_ring *txr)
3355{
3356	struct adapter *adapter = txr->adapter;
3357	struct em_buffer *txbuf;
3358	int i;
3359#ifdef DEV_NETMAP
3360	struct netmap_adapter *na = NA(adapter->ifp);
3361	struct netmap_slot *slot;
3362#endif /* DEV_NETMAP */
3363
3364	/* Clear the old descriptor contents */
3365	EM_TX_LOCK(txr);
3366#ifdef DEV_NETMAP
3367	slot = netmap_reset(na, NR_TX, txr->me, 0);
3368#endif /* DEV_NETMAP */
3369
3370	bzero((void *)txr->tx_base,
3371	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3372	/* Reset indices */
3373	txr->next_avail_desc = 0;
3374	txr->next_to_clean = 0;
3375
3376	/* Free any existing tx buffers. */
3377        txbuf = txr->tx_buffers;
3378	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3379		if (txbuf->m_head != NULL) {
3380			bus_dmamap_sync(txr->txtag, txbuf->map,
3381			    BUS_DMASYNC_POSTWRITE);
3382			bus_dmamap_unload(txr->txtag, txbuf->map);
3383			m_freem(txbuf->m_head);
3384			txbuf->m_head = NULL;
3385		}
3386#ifdef DEV_NETMAP
3387		if (slot) {
3388			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3389			uint64_t paddr;
3390			void *addr;
3391
3392			addr = PNMB(na, slot + si, &paddr);
3393			txr->tx_base[i].buffer_addr = htole64(paddr);
3394			/* reload the map for netmap mode */
3395			netmap_load_map(na, txr->txtag, txbuf->map, addr);
3396		}
3397#endif /* DEV_NETMAP */
3398
3399		/* clear the watch index */
3400		txbuf->next_eop = -1;
3401        }
3402
3403	/* Set number of descriptors available */
3404	txr->tx_avail = adapter->num_tx_desc;
3405	txr->queue_status = EM_QUEUE_IDLE;
3406
3407	/* Clear checksum offload context. */
3408	txr->last_hw_offload = 0;
3409	txr->last_hw_ipcss = 0;
3410	txr->last_hw_ipcso = 0;
3411	txr->last_hw_tucss = 0;
3412	txr->last_hw_tucso = 0;
3413
3414	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3415	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3416	EM_TX_UNLOCK(txr);
3417}
3418
3419/*********************************************************************
3420 *
3421 *  Initialize all transmit rings.
3422 *
3423 **********************************************************************/
3424static void
3425em_setup_transmit_structures(struct adapter *adapter)
3426{
3427	struct tx_ring *txr = adapter->tx_rings;
3428
3429	for (int i = 0; i < adapter->num_queues; i++, txr++)
3430		em_setup_transmit_ring(txr);
3431
3432	return;
3433}
3434
3435/*********************************************************************
3436 *
3437 *  Enable transmit unit.
3438 *
3439 **********************************************************************/
3440static void
3441em_initialize_transmit_unit(struct adapter *adapter)
3442{
3443	struct tx_ring	*txr = adapter->tx_rings;
3444	struct e1000_hw	*hw = &adapter->hw;
3445	u32	tctl, tarc, tipg = 0;
3446
3447	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3448
3449	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3450		u64 bus_addr = txr->txdma.dma_paddr;
3451		/* Base and Len of TX Ring */
3452		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3453	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3454		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3455	    	    (u32)(bus_addr >> 32));
3456		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3457	    	    (u32)bus_addr);
3458		/* Init the HEAD/TAIL indices */
3459		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3460		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3461
3462		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3463		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3464		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3465
3466		txr->queue_status = EM_QUEUE_IDLE;
3467	}
3468
3469	/* Set the default values for the Tx Inter Packet Gap timer */
3470	switch (adapter->hw.mac.type) {
3471	case e1000_80003es2lan:
3472		tipg = DEFAULT_82543_TIPG_IPGR1;
3473		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3474		    E1000_TIPG_IPGR2_SHIFT;
3475		break;
3476	default:
3477		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3478		    (adapter->hw.phy.media_type ==
3479		    e1000_media_type_internal_serdes))
3480			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3481		else
3482			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3483		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3484		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3485	}
3486
3487	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3488	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3489
3490	if(adapter->hw.mac.type >= e1000_82540)
3491		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3492		    adapter->tx_abs_int_delay.value);
3493
3494	if ((adapter->hw.mac.type == e1000_82571) ||
3495	    (adapter->hw.mac.type == e1000_82572)) {
3496		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3497		tarc |= SPEED_MODE_BIT;
3498		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3499	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
3500		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3501		tarc |= 1;
3502		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3503		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3504		tarc |= 1;
3505		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3506	}
3507
3508	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3509	if (adapter->tx_int_delay.value > 0)
3510		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3511
3512	/* Program the Transmit Control Register */
3513	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3514	tctl &= ~E1000_TCTL_CT;
3515	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3516		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3517
3518	if (adapter->hw.mac.type >= e1000_82571)
3519		tctl |= E1000_TCTL_MULR;
3520
3521	/* This write will effectively turn on the transmit unit. */
3522	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3523
3524}
3525
3526
3527/*********************************************************************
3528 *
3529 *  Free all transmit rings.
3530 *
3531 **********************************************************************/
3532static void
3533em_free_transmit_structures(struct adapter *adapter)
3534{
3535	struct tx_ring *txr = adapter->tx_rings;
3536
3537	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3538		EM_TX_LOCK(txr);
3539		em_free_transmit_buffers(txr);
3540		em_dma_free(adapter, &txr->txdma);
3541		EM_TX_UNLOCK(txr);
3542		EM_TX_LOCK_DESTROY(txr);
3543	}
3544
3545	free(adapter->tx_rings, M_DEVBUF);
3546}
3547
3548/*********************************************************************
3549 *
3550 *  Free transmit ring related data structures.
3551 *
3552 **********************************************************************/
3553static void
3554em_free_transmit_buffers(struct tx_ring *txr)
3555{
3556	struct adapter		*adapter = txr->adapter;
3557	struct em_buffer	*txbuf;
3558
3559	INIT_DEBUGOUT("free_transmit_ring: begin");
3560
3561	if (txr->tx_buffers == NULL)
3562		return;
3563
3564	for (int i = 0; i < adapter->num_tx_desc; i++) {
3565		txbuf = &txr->tx_buffers[i];
3566		if (txbuf->m_head != NULL) {
3567			bus_dmamap_sync(txr->txtag, txbuf->map,
3568			    BUS_DMASYNC_POSTWRITE);
3569			bus_dmamap_unload(txr->txtag,
3570			    txbuf->map);
3571			m_freem(txbuf->m_head);
3572			txbuf->m_head = NULL;
3573			if (txbuf->map != NULL) {
3574				bus_dmamap_destroy(txr->txtag,
3575				    txbuf->map);
3576				txbuf->map = NULL;
3577			}
3578		} else if (txbuf->map != NULL) {
3579			bus_dmamap_unload(txr->txtag,
3580			    txbuf->map);
3581			bus_dmamap_destroy(txr->txtag,
3582			    txbuf->map);
3583			txbuf->map = NULL;
3584		}
3585	}
3586#if __FreeBSD_version >= 800000
3587	if (txr->br != NULL)
3588		buf_ring_free(txr->br, M_DEVBUF);
3589#endif
3590	if (txr->tx_buffers != NULL) {
3591		free(txr->tx_buffers, M_DEVBUF);
3592		txr->tx_buffers = NULL;
3593	}
3594	if (txr->txtag != NULL) {
3595		bus_dma_tag_destroy(txr->txtag);
3596		txr->txtag = NULL;
3597	}
3598	return;
3599}
3600
3601
3602/*********************************************************************
3603 *  The offload context is protocol specific (TCP/UDP) and thus
3604 *  only needs to be set when the protocol changes. The occasion
3605 *  of a context change can be a performance detriment, and
3606 *  might be better just disabled. The reason arises in the way
3607 *  in which the controller supports pipelined requests from the
3608 *  Tx data DMA. Up to four requests can be pipelined, and they may
3609 *  belong to the same packet or to multiple packets. However all
3610 *  requests for one packet are issued before a request is issued
3611 *  for a subsequent packet and if a request for the next packet
3612 *  requires a context change, that request will be stalled
3613 *  until the previous request completes. This means setting up
3614 *  a new context effectively disables pipelined Tx data DMA which
3615 *  in turn greatly slow down performance to send small sized
3616 *  frames.
3617 **********************************************************************/
3618static void
3619em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3620    struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3621{
3622	struct adapter			*adapter = txr->adapter;
3623	struct e1000_context_desc	*TXD = NULL;
3624	struct em_buffer		*tx_buffer;
3625	int				cur, hdr_len;
3626	u32				cmd = 0;
3627	u16				offload = 0;
3628	u8				ipcso, ipcss, tucso, tucss;
3629
3630	ipcss = ipcso = tucss = tucso = 0;
3631	hdr_len = ip_off + (ip->ip_hl << 2);
3632	cur = txr->next_avail_desc;
3633
3634	/* Setup of IP header checksum. */
3635	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3636		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3637		offload |= CSUM_IP;
3638		ipcss = ip_off;
3639		ipcso = ip_off + offsetof(struct ip, ip_sum);
3640		/*
3641		 * Start offset for header checksum calculation.
3642		 * End offset for header checksum calculation.
3643		 * Offset of place to put the checksum.
3644		 */
3645		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3646		TXD->lower_setup.ip_fields.ipcss = ipcss;
3647		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3648		TXD->lower_setup.ip_fields.ipcso = ipcso;
3649		cmd |= E1000_TXD_CMD_IP;
3650	}
3651
3652	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3653 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3654 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3655 		offload |= CSUM_TCP;
3656 		tucss = hdr_len;
3657 		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3658 		/*
3659 		 * Setting up new checksum offload context for every frames
3660 		 * takes a lot of processing time for hardware. This also
3661 		 * reduces performance a lot for small sized frames so avoid
3662 		 * it if driver can use previously configured checksum
3663 		 * offload context.
3664 		 */
3665 		if (txr->last_hw_offload == offload) {
3666 			if (offload & CSUM_IP) {
3667 				if (txr->last_hw_ipcss == ipcss &&
3668 				    txr->last_hw_ipcso == ipcso &&
3669 				    txr->last_hw_tucss == tucss &&
3670 				    txr->last_hw_tucso == tucso)
3671 					return;
3672 			} else {
3673 				if (txr->last_hw_tucss == tucss &&
3674 				    txr->last_hw_tucso == tucso)
3675 					return;
3676 			}
3677  		}
3678 		txr->last_hw_offload = offload;
3679 		txr->last_hw_tucss = tucss;
3680 		txr->last_hw_tucso = tucso;
3681 		/*
3682 		 * Start offset for payload checksum calculation.
3683 		 * End offset for payload checksum calculation.
3684 		 * Offset of place to put the checksum.
3685 		 */
3686		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3687 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
3688 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3689 		TXD->upper_setup.tcp_fields.tucso = tucso;
3690 		cmd |= E1000_TXD_CMD_TCP;
3691 	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3692 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3693 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3694 		tucss = hdr_len;
3695 		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
3696 		/*
3697 		 * Setting up new checksum offload context for every frames
3698 		 * takes a lot of processing time for hardware. This also
3699 		 * reduces performance a lot for small sized frames so avoid
3700 		 * it if driver can use previously configured checksum
3701 		 * offload context.
3702 		 */
3703 		if (txr->last_hw_offload == offload) {
3704 			if (offload & CSUM_IP) {
3705 				if (txr->last_hw_ipcss == ipcss &&
3706 				    txr->last_hw_ipcso == ipcso &&
3707 				    txr->last_hw_tucss == tucss &&
3708 				    txr->last_hw_tucso == tucso)
3709 					return;
3710 			} else {
3711 				if (txr->last_hw_tucss == tucss &&
3712 				    txr->last_hw_tucso == tucso)
3713 					return;
3714 			}
3715 		}
3716 		txr->last_hw_offload = offload;
3717 		txr->last_hw_tucss = tucss;
3718 		txr->last_hw_tucso = tucso;
3719 		/*
3720 		 * Start offset for header checksum calculation.
3721 		 * End offset for header checksum calculation.
3722 		 * Offset of place to put the checksum.
3723 		 */
3724		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3725 		TXD->upper_setup.tcp_fields.tucss = tucss;
3726 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
3727 		TXD->upper_setup.tcp_fields.tucso = tucso;
3728  	}
3729
3730 	if (offload & CSUM_IP) {
3731 		txr->last_hw_ipcss = ipcss;
3732 		txr->last_hw_ipcso = ipcso;
3733  	}
3734
3735	TXD->tcp_seg_setup.data = htole32(0);
3736	TXD->cmd_and_length =
3737	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3738	tx_buffer = &txr->tx_buffers[cur];
3739	tx_buffer->m_head = NULL;
3740	tx_buffer->next_eop = -1;
3741
3742	if (++cur == adapter->num_tx_desc)
3743		cur = 0;
3744
3745	txr->tx_avail--;
3746	txr->next_avail_desc = cur;
3747}
3748
3749
3750/**********************************************************************
3751 *
3752 *  Setup work for hardware segmentation offload (TSO)
3753 *
3754 **********************************************************************/
3755static void
3756em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3757    struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
3758{
3759	struct adapter			*adapter = txr->adapter;
3760	struct e1000_context_desc	*TXD;
3761	struct em_buffer		*tx_buffer;
3762	int cur, hdr_len;
3763
3764	/*
3765	 * In theory we can use the same TSO context if and only if
3766	 * frame is the same type(IP/TCP) and the same MSS. However
3767	 * checking whether a frame has the same IP/TCP structure is
3768	 * hard thing so just ignore that and always restablish a
3769	 * new TSO context.
3770	 */
3771	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
3772	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
3773		      E1000_TXD_DTYP_D |	/* Data descr type */
3774		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
3775
3776	/* IP and/or TCP header checksum calculation and insertion. */
3777	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
3778
3779	cur = txr->next_avail_desc;
3780	tx_buffer = &txr->tx_buffers[cur];
3781	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
3782
3783	/*
3784	 * Start offset for header checksum calculation.
3785	 * End offset for header checksum calculation.
3786	 * Offset of place put the checksum.
3787	 */
3788	TXD->lower_setup.ip_fields.ipcss = ip_off;
3789	TXD->lower_setup.ip_fields.ipcse =
3790	    htole16(ip_off + (ip->ip_hl << 2) - 1);
3791	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
3792	/*
3793	 * Start offset for payload checksum calculation.
3794	 * End offset for payload checksum calculation.
3795	 * Offset of place to put the checksum.
3796	 */
3797	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
3798	TXD->upper_setup.tcp_fields.tucse = 0;
3799	TXD->upper_setup.tcp_fields.tucso =
3800	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
3801	/*
3802	 * Payload size per packet w/o any headers.
3803	 * Length of all headers up to payload.
3804	 */
3805	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
3806	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
3807
3808	TXD->cmd_and_length = htole32(adapter->txd_cmd |
3809				E1000_TXD_CMD_DEXT |	/* Extended descr */
3810				E1000_TXD_CMD_TSE |	/* TSE context */
3811				E1000_TXD_CMD_IP |	/* Do IP csum */
3812				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
3813				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
3814
3815	tx_buffer->m_head = NULL;
3816	tx_buffer->next_eop = -1;
3817
3818	if (++cur == adapter->num_tx_desc)
3819		cur = 0;
3820
3821	txr->tx_avail--;
3822	txr->next_avail_desc = cur;
3823	txr->tx_tso = TRUE;
3824}
3825
3826
3827/**********************************************************************
3828 *
3829 *  Examine each tx_buffer in the used queue. If the hardware is done
3830 *  processing the packet then free associated resources. The
3831 *  tx_buffer is put back on the free queue.
3832 *
3833 **********************************************************************/
3834static void
3835em_txeof(struct tx_ring *txr)
3836{
3837	struct adapter	*adapter = txr->adapter;
3838        int first, last, done, processed;
3839        struct em_buffer *tx_buffer;
3840        struct e1000_tx_desc   *tx_desc, *eop_desc;
3841	struct ifnet   *ifp = adapter->ifp;
3842
3843	EM_TX_LOCK_ASSERT(txr);
3844#ifdef DEV_NETMAP
3845	if (netmap_tx_irq(ifp, txr->me))
3846		return;
3847#endif /* DEV_NETMAP */
3848
3849	/* No work, make sure watchdog is off */
3850        if (txr->tx_avail == adapter->num_tx_desc) {
3851		txr->queue_status = EM_QUEUE_IDLE;
3852                return;
3853	}
3854
3855	processed = 0;
3856        first = txr->next_to_clean;
3857        tx_desc = &txr->tx_base[first];
3858        tx_buffer = &txr->tx_buffers[first];
3859	last = tx_buffer->next_eop;
3860        eop_desc = &txr->tx_base[last];
3861
3862	/*
3863	 * What this does is get the index of the
3864	 * first descriptor AFTER the EOP of the
3865	 * first packet, that way we can do the
3866	 * simple comparison on the inner while loop.
3867	 */
3868	if (++last == adapter->num_tx_desc)
3869 		last = 0;
3870	done = last;
3871
3872        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3873            BUS_DMASYNC_POSTREAD);
3874
3875        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3876		/* We clean the range of the packet */
3877		while (first != done) {
3878                	tx_desc->upper.data = 0;
3879                	tx_desc->lower.data = 0;
3880                	tx_desc->buffer_addr = 0;
3881                	++txr->tx_avail;
3882			++processed;
3883
3884			if (tx_buffer->m_head) {
3885				bus_dmamap_sync(txr->txtag,
3886				    tx_buffer->map,
3887				    BUS_DMASYNC_POSTWRITE);
3888				bus_dmamap_unload(txr->txtag,
3889				    tx_buffer->map);
3890                        	m_freem(tx_buffer->m_head);
3891                        	tx_buffer->m_head = NULL;
3892                	}
3893			tx_buffer->next_eop = -1;
3894			txr->watchdog_time = ticks;
3895
3896	                if (++first == adapter->num_tx_desc)
3897				first = 0;
3898
3899	                tx_buffer = &txr->tx_buffers[first];
3900			tx_desc = &txr->tx_base[first];
3901		}
3902		++ifp->if_opackets;
3903		/* See if we can continue to the next packet */
3904		last = tx_buffer->next_eop;
3905		if (last != -1) {
3906        		eop_desc = &txr->tx_base[last];
3907			/* Get new done point */
3908			if (++last == adapter->num_tx_desc) last = 0;
3909			done = last;
3910		} else
3911			break;
3912        }
3913        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3914            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3915
3916        txr->next_to_clean = first;
3917
3918	/*
3919	** Watchdog calculation, we know there's
3920	** work outstanding or the first return
3921	** would have been taken, so none processed
3922	** for too long indicates a hang. local timer
3923	** will examine this and do a reset if needed.
3924	*/
3925	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
3926		txr->queue_status = EM_QUEUE_HUNG;
3927
3928        /*
3929         * If we have a minimum free, clear IFF_DRV_OACTIVE
3930         * to tell the stack that it is OK to send packets.
3931	 * Notice that all writes of OACTIVE happen under the
3932	 * TX lock which, with a single queue, guarantees
3933	 * sanity.
3934         */
3935        if (txr->tx_avail >= EM_MAX_SCATTER)
3936		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3937
3938	/* Disable watchdog if all clean */
3939	if (txr->tx_avail == adapter->num_tx_desc) {
3940		txr->queue_status = EM_QUEUE_IDLE;
3941	}
3942}
3943
3944
3945/*********************************************************************
3946 *
3947 *  Refresh RX descriptor mbufs from system mbuf buffer pool.
3948 *
3949 **********************************************************************/
3950static void
3951em_refresh_mbufs(struct rx_ring *rxr, int limit)
3952{
3953	struct adapter		*adapter = rxr->adapter;
3954	struct mbuf		*m;
3955	bus_dma_segment_t	segs[1];
3956	struct em_buffer	*rxbuf;
3957	int			i, j, error, nsegs;
3958	bool			cleaned = FALSE;
3959
3960	i = j = rxr->next_to_refresh;
3961	/*
3962	** Get one descriptor beyond
3963	** our work mark to control
3964	** the loop.
3965	*/
3966	if (++j == adapter->num_rx_desc)
3967		j = 0;
3968
3969	while (j != limit) {
3970		rxbuf = &rxr->rx_buffers[i];
3971		if (rxbuf->m_head == NULL) {
3972			m = m_getjcl(M_NOWAIT, MT_DATA,
3973			    M_PKTHDR, adapter->rx_mbuf_sz);
3974			/*
3975			** If we have a temporary resource shortage
3976			** that causes a failure, just abort refresh
3977			** for now, we will return to this point when
3978			** reinvoked from em_rxeof.
3979			*/
3980			if (m == NULL)
3981				goto update;
3982		} else
3983			m = rxbuf->m_head;
3984
3985		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
3986		m->m_flags |= M_PKTHDR;
3987		m->m_data = m->m_ext.ext_buf;
3988
3989		/* Use bus_dma machinery to setup the memory mapping  */
3990		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
3991		    m, segs, &nsegs, BUS_DMA_NOWAIT);
3992		if (error != 0) {
3993			printf("Refresh mbufs: hdr dmamap load"
3994			    " failure - %d\n", error);
3995			m_free(m);
3996			rxbuf->m_head = NULL;
3997			goto update;
3998		}
3999		rxbuf->m_head = m;
4000		bus_dmamap_sync(rxr->rxtag,
4001		    rxbuf->map, BUS_DMASYNC_PREREAD);
4002		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
4003		cleaned = TRUE;
4004
4005		i = j; /* Next is precalulated for us */
4006		rxr->next_to_refresh = i;
4007		/* Calculate next controlling index */
4008		if (++j == adapter->num_rx_desc)
4009			j = 0;
4010	}
4011update:
4012	/*
4013	** Update the tail pointer only if,
4014	** and as far as we have refreshed.
4015	*/
4016	if (cleaned)
4017		E1000_WRITE_REG(&adapter->hw,
4018		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4019
4020	return;
4021}
4022
4023
4024/*********************************************************************
4025 *
4026 *  Allocate memory for rx_buffer structures. Since we use one
4027 *  rx_buffer per received packet, the maximum number of rx_buffer's
4028 *  that we'll need is equal to the number of receive descriptors
4029 *  that we've allocated.
4030 *
4031 **********************************************************************/
4032static int
4033em_allocate_receive_buffers(struct rx_ring *rxr)
4034{
4035	struct adapter		*adapter = rxr->adapter;
4036	device_t		dev = adapter->dev;
4037	struct em_buffer	*rxbuf;
4038	int			error;
4039
4040	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
4041	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4042	if (rxr->rx_buffers == NULL) {
4043		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4044		return (ENOMEM);
4045	}
4046
4047	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4048				1, 0,			/* alignment, bounds */
4049				BUS_SPACE_MAXADDR,	/* lowaddr */
4050				BUS_SPACE_MAXADDR,	/* highaddr */
4051				NULL, NULL,		/* filter, filterarg */
4052				MJUM9BYTES,		/* maxsize */
4053				1,			/* nsegments */
4054				MJUM9BYTES,		/* maxsegsize */
4055				0,			/* flags */
4056				NULL,			/* lockfunc */
4057				NULL,			/* lockarg */
4058				&rxr->rxtag);
4059	if (error) {
4060		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4061		    __func__, error);
4062		goto fail;
4063	}
4064
4065	rxbuf = rxr->rx_buffers;
4066	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4067		rxbuf = &rxr->rx_buffers[i];
4068		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4069		if (error) {
4070			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4071			    __func__, error);
4072			goto fail;
4073		}
4074	}
4075
4076	return (0);
4077
4078fail:
4079	em_free_receive_structures(adapter);
4080	return (error);
4081}
4082
4083
4084/*********************************************************************
4085 *
4086 *  Initialize a receive ring and its buffers.
4087 *
4088 **********************************************************************/
4089static int
4090em_setup_receive_ring(struct rx_ring *rxr)
4091{
4092	struct	adapter 	*adapter = rxr->adapter;
4093	struct em_buffer	*rxbuf;
4094	bus_dma_segment_t	seg[1];
4095	int			rsize, nsegs, error = 0;
4096#ifdef DEV_NETMAP
4097	struct netmap_adapter *na = NA(adapter->ifp);
4098	struct netmap_slot *slot;
4099#endif
4100
4101
4102	/* Clear the ring contents */
4103	EM_RX_LOCK(rxr);
4104	rsize = roundup2(adapter->num_rx_desc *
4105	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
4106	bzero((void *)rxr->rx_base, rsize);
4107#ifdef DEV_NETMAP
4108	slot = netmap_reset(na, NR_RX, 0, 0);
4109#endif
4110
4111	/*
4112	** Free current RX buffer structs and their mbufs
4113	*/
4114	for (int i = 0; i < adapter->num_rx_desc; i++) {
4115		rxbuf = &rxr->rx_buffers[i];
4116		if (rxbuf->m_head != NULL) {
4117			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4118			    BUS_DMASYNC_POSTREAD);
4119			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4120			m_freem(rxbuf->m_head);
4121			rxbuf->m_head = NULL; /* mark as freed */
4122		}
4123	}
4124
4125	/* Now replenish the mbufs */
4126        for (int j = 0; j != adapter->num_rx_desc; ++j) {
4127		rxbuf = &rxr->rx_buffers[j];
4128#ifdef DEV_NETMAP
4129		if (slot) {
4130			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4131			uint64_t paddr;
4132			void *addr;
4133
4134			addr = PNMB(na, slot + si, &paddr);
4135			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4136			/* Update descriptor */
4137			rxr->rx_base[j].buffer_addr = htole64(paddr);
4138			continue;
4139		}
4140#endif /* DEV_NETMAP */
4141		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4142		    M_PKTHDR, adapter->rx_mbuf_sz);
4143		if (rxbuf->m_head == NULL) {
4144			error = ENOBUFS;
4145			goto fail;
4146		}
4147		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4148		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4149		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4150
4151		/* Get the memory mapping */
4152		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4153		    rxbuf->map, rxbuf->m_head, seg,
4154		    &nsegs, BUS_DMA_NOWAIT);
4155		if (error != 0) {
4156			m_freem(rxbuf->m_head);
4157			rxbuf->m_head = NULL;
4158			goto fail;
4159		}
4160		bus_dmamap_sync(rxr->rxtag,
4161		    rxbuf->map, BUS_DMASYNC_PREREAD);
4162
4163		/* Update descriptor */
4164		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
4165	}
4166	rxr->next_to_check = 0;
4167	rxr->next_to_refresh = 0;
4168	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4169	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4170
4171fail:
4172	EM_RX_UNLOCK(rxr);
4173	return (error);
4174}
4175
4176/*********************************************************************
4177 *
4178 *  Initialize all receive rings.
4179 *
4180 **********************************************************************/
4181static int
4182em_setup_receive_structures(struct adapter *adapter)
4183{
4184	struct rx_ring *rxr = adapter->rx_rings;
4185	int q;
4186
4187	for (q = 0; q < adapter->num_queues; q++, rxr++)
4188		if (em_setup_receive_ring(rxr))
4189			goto fail;
4190
4191	return (0);
4192fail:
4193	/*
4194	 * Free RX buffers allocated so far, we will only handle
4195	 * the rings that completed, the failing case will have
4196	 * cleaned up for itself. 'q' failed, so its the terminus.
4197	 */
4198	for (int i = 0; i < q; ++i) {
4199		rxr = &adapter->rx_rings[i];
4200		for (int n = 0; n < adapter->num_rx_desc; n++) {
4201			struct em_buffer *rxbuf;
4202			rxbuf = &rxr->rx_buffers[n];
4203			if (rxbuf->m_head != NULL) {
4204				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4205			  	  BUS_DMASYNC_POSTREAD);
4206				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4207				m_freem(rxbuf->m_head);
4208				rxbuf->m_head = NULL;
4209			}
4210		}
4211		rxr->next_to_check = 0;
4212		rxr->next_to_refresh = 0;
4213	}
4214
4215	return (ENOBUFS);
4216}
4217
4218/*********************************************************************
4219 *
4220 *  Free all receive rings.
4221 *
4222 **********************************************************************/
4223static void
4224em_free_receive_structures(struct adapter *adapter)
4225{
4226	struct rx_ring *rxr = adapter->rx_rings;
4227
4228	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4229		em_free_receive_buffers(rxr);
4230		/* Free the ring memory as well */
4231		em_dma_free(adapter, &rxr->rxdma);
4232		EM_RX_LOCK_DESTROY(rxr);
4233	}
4234
4235	free(adapter->rx_rings, M_DEVBUF);
4236}
4237
4238
4239/*********************************************************************
4240 *
4241 *  Free receive ring data structures
4242 *
4243 **********************************************************************/
4244static void
4245em_free_receive_buffers(struct rx_ring *rxr)
4246{
4247	struct adapter		*adapter = rxr->adapter;
4248	struct em_buffer	*rxbuf = NULL;
4249
4250	INIT_DEBUGOUT("free_receive_buffers: begin");
4251
4252	if (rxr->rx_buffers != NULL) {
4253		for (int i = 0; i < adapter->num_rx_desc; i++) {
4254			rxbuf = &rxr->rx_buffers[i];
4255			if (rxbuf->map != NULL) {
4256				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4257				    BUS_DMASYNC_POSTREAD);
4258				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4259				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4260			}
4261			if (rxbuf->m_head != NULL) {
4262				m_freem(rxbuf->m_head);
4263				rxbuf->m_head = NULL;
4264			}
4265		}
4266		free(rxr->rx_buffers, M_DEVBUF);
4267		rxr->rx_buffers = NULL;
4268		rxr->next_to_check = 0;
4269		rxr->next_to_refresh = 0;
4270	}
4271
4272	if (rxr->rxtag != NULL) {
4273		bus_dma_tag_destroy(rxr->rxtag);
4274		rxr->rxtag = NULL;
4275	}
4276
4277	return;
4278}
4279
4280
4281/*********************************************************************
4282 *
4283 *  Enable receive unit.
4284 *
4285 **********************************************************************/
4286
4287static void
4288em_initialize_receive_unit(struct adapter *adapter)
4289{
4290	struct rx_ring	*rxr = adapter->rx_rings;
4291	struct ifnet	*ifp = adapter->ifp;
4292	struct e1000_hw	*hw = &adapter->hw;
4293	u64	bus_addr;
4294	u32	rctl, rxcsum;
4295
4296	INIT_DEBUGOUT("em_initialize_receive_units: begin");
4297
4298	/*
4299	 * Make sure receives are disabled while setting
4300	 * up the descriptor ring
4301	 */
4302	rctl = E1000_READ_REG(hw, E1000_RCTL);
4303	/* Do not disable if ever enabled on this hardware */
4304	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4305		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4306
4307	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4308	    adapter->rx_abs_int_delay.value);
4309	/*
4310	 * Set the interrupt throttling rate. Value is calculated
4311	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4312	 */
4313	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4314
4315	/*
4316	** When using MSIX interrupts we need to throttle
4317	** using the EITR register (82574 only)
4318	*/
4319	if (hw->mac.type == e1000_82574) {
4320		for (int i = 0; i < 4; i++)
4321			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4322			    DEFAULT_ITR);
4323		/* Disable accelerated acknowledge */
4324		E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS);
4325	}
4326
4327	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4328	if (ifp->if_capenable & IFCAP_RXCSUM)
4329		rxcsum |= E1000_RXCSUM_TUOFL;
4330	else
4331		rxcsum &= ~E1000_RXCSUM_TUOFL;
4332	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4333
4334	/*
4335	** XXX TEMPORARY WORKAROUND: on some systems with 82573
4336	** long latencies are observed, like Lenovo X60. This
4337	** change eliminates the problem, but since having positive
4338	** values in RDTR is a known source of problems on other
4339	** platforms another solution is being sought.
4340	*/
4341	if (hw->mac.type == e1000_82573)
4342		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4343
4344	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4345		/* Setup the Base and Length of the Rx Descriptor Ring */
4346		u32 rdt = adapter->num_rx_desc - 1; /* default */
4347
4348		bus_addr = rxr->rxdma.dma_paddr;
4349		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4350		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4351		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4352		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4353		/* Setup the Head and Tail Descriptor Pointers */
4354		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4355#ifdef DEV_NETMAP
4356		/*
4357		 * an init() while a netmap client is active must
4358		 * preserve the rx buffers passed to userspace.
4359		 */
4360		if (ifp->if_capenable & IFCAP_NETMAP)
4361			rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4362#endif /* DEV_NETMAP */
4363		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4364	}
4365
4366	/* Set PTHRESH for improved jumbo performance */
4367	if (((adapter->hw.mac.type == e1000_ich9lan) ||
4368	    (adapter->hw.mac.type == e1000_pch2lan) ||
4369	    (adapter->hw.mac.type == e1000_ich10lan)) &&
4370	    (ifp->if_mtu > ETHERMTU)) {
4371		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4372		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4373	}
4374
4375	if (adapter->hw.mac.type >= e1000_pch2lan) {
4376		if (ifp->if_mtu > ETHERMTU)
4377			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4378		else
4379			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4380	}
4381
4382	/* Setup the Receive Control Register */
4383	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4384	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4385	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4386	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4387
4388        /* Strip the CRC */
4389        rctl |= E1000_RCTL_SECRC;
4390
4391        /* Make sure VLAN Filters are off */
4392        rctl &= ~E1000_RCTL_VFE;
4393	rctl &= ~E1000_RCTL_SBP;
4394
4395	if (adapter->rx_mbuf_sz == MCLBYTES)
4396		rctl |= E1000_RCTL_SZ_2048;
4397	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4398		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4399	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4400		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4401
4402	if (ifp->if_mtu > ETHERMTU)
4403		rctl |= E1000_RCTL_LPE;
4404	else
4405		rctl &= ~E1000_RCTL_LPE;
4406
4407	/* Write out the settings */
4408	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4409
4410	return;
4411}
4412
4413
4414/*********************************************************************
4415 *
4416 *  This routine executes in interrupt context. It replenishes
4417 *  the mbufs in the descriptor and sends data which has been
4418 *  dma'ed into host memory to upper layer.
4419 *
4420 *  We loop at most count times if count is > 0, or until done if
4421 *  count < 0.
4422 *
4423 *  For polling we also now return the number of cleaned packets
4424 *********************************************************************/
4425static bool
4426em_rxeof(struct rx_ring *rxr, int count, int *done)
4427{
4428	struct adapter		*adapter = rxr->adapter;
4429	struct ifnet		*ifp = adapter->ifp;
4430	struct mbuf		*mp, *sendmp;
4431	u8			status = 0;
4432	u16 			len;
4433	int			i, processed, rxdone = 0;
4434	bool			eop;
4435	struct e1000_rx_desc	*cur;
4436
4437	EM_RX_LOCK(rxr);
4438
4439#ifdef DEV_NETMAP
4440	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4441		EM_RX_UNLOCK(rxr);
4442		return (FALSE);
4443	}
4444#endif /* DEV_NETMAP */
4445
4446	for (i = rxr->next_to_check, processed = 0; count != 0;) {
4447
4448		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4449			break;
4450
4451		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4452		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4453
4454		cur = &rxr->rx_base[i];
4455		status = cur->status;
4456		mp = sendmp = NULL;
4457
4458		if ((status & E1000_RXD_STAT_DD) == 0)
4459			break;
4460
4461		len = le16toh(cur->length);
4462		eop = (status & E1000_RXD_STAT_EOP) != 0;
4463
4464		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
4465		    (rxr->discard == TRUE)) {
4466			adapter->dropped_pkts++;
4467			++rxr->rx_discarded;
4468			if (!eop) /* Catch subsequent segs */
4469				rxr->discard = TRUE;
4470			else
4471				rxr->discard = FALSE;
4472			em_rx_discard(rxr, i);
4473			goto next_desc;
4474		}
4475		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4476
4477		/* Assign correct length to the current fragment */
4478		mp = rxr->rx_buffers[i].m_head;
4479		mp->m_len = len;
4480
4481		/* Trigger for refresh */
4482		rxr->rx_buffers[i].m_head = NULL;
4483
4484		/* First segment? */
4485		if (rxr->fmp == NULL) {
4486			mp->m_pkthdr.len = len;
4487			rxr->fmp = rxr->lmp = mp;
4488		} else {
4489			/* Chain mbuf's together */
4490			mp->m_flags &= ~M_PKTHDR;
4491			rxr->lmp->m_next = mp;
4492			rxr->lmp = mp;
4493			rxr->fmp->m_pkthdr.len += len;
4494		}
4495
4496		if (eop) {
4497			--count;
4498			sendmp = rxr->fmp;
4499			sendmp->m_pkthdr.rcvif = ifp;
4500			ifp->if_ipackets++;
4501			em_receive_checksum(cur, sendmp);
4502#ifndef __NO_STRICT_ALIGNMENT
4503			if (adapter->hw.mac.max_frame_size >
4504			    (MCLBYTES - ETHER_ALIGN) &&
4505			    em_fixup_rx(rxr) != 0)
4506				goto skip;
4507#endif
4508			if (status & E1000_RXD_STAT_VP) {
4509				sendmp->m_pkthdr.ether_vtag =
4510				    le16toh(cur->special);
4511				sendmp->m_flags |= M_VLANTAG;
4512			}
4513#ifndef __NO_STRICT_ALIGNMENT
4514skip:
4515#endif
4516			rxr->fmp = rxr->lmp = NULL;
4517		}
4518next_desc:
4519		/* Zero out the receive descriptors status. */
4520		cur->status = 0;
4521		++rxdone;	/* cumulative for POLL */
4522		++processed;
4523
4524		/* Advance our pointers to the next descriptor. */
4525		if (++i == adapter->num_rx_desc)
4526			i = 0;
4527
4528		/* Send to the stack */
4529		if (sendmp != NULL) {
4530			rxr->next_to_check = i;
4531			EM_RX_UNLOCK(rxr);
4532			(*ifp->if_input)(ifp, sendmp);
4533			EM_RX_LOCK(rxr);
4534			i = rxr->next_to_check;
4535		}
4536
4537		/* Only refresh mbufs every 8 descriptors */
4538		if (processed == 8) {
4539			em_refresh_mbufs(rxr, i);
4540			processed = 0;
4541		}
4542	}
4543
4544	/* Catch any remaining refresh work */
4545	if (e1000_rx_unrefreshed(rxr))
4546		em_refresh_mbufs(rxr, i);
4547
4548	rxr->next_to_check = i;
4549	if (done != NULL)
4550		*done = rxdone;
4551	EM_RX_UNLOCK(rxr);
4552
4553	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4554}
4555
4556static __inline void
4557em_rx_discard(struct rx_ring *rxr, int i)
4558{
4559	struct em_buffer	*rbuf;
4560
4561	rbuf = &rxr->rx_buffers[i];
4562	bus_dmamap_unload(rxr->rxtag, rbuf->map);
4563
4564	/* Free any previous pieces */
4565	if (rxr->fmp != NULL) {
4566		rxr->fmp->m_flags |= M_PKTHDR;
4567		m_freem(rxr->fmp);
4568		rxr->fmp = NULL;
4569		rxr->lmp = NULL;
4570	}
4571	/*
4572	** Free buffer and allow em_refresh_mbufs()
4573	** to clean up and recharge buffer.
4574	*/
4575	if (rbuf->m_head) {
4576		m_free(rbuf->m_head);
4577		rbuf->m_head = NULL;
4578	}
4579	return;
4580}
4581
4582#ifndef __NO_STRICT_ALIGNMENT
4583/*
4584 * When jumbo frames are enabled we should realign entire payload on
4585 * architecures with strict alignment. This is serious design mistake of 8254x
4586 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4587 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4588 * payload. On architecures without strict alignment restrictions 8254x still
4589 * performs unaligned memory access which would reduce the performance too.
4590 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4591 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4592 * existing mbuf chain.
4593 *
4594 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4595 * not used at all on architectures with strict alignment.
4596 */
4597static int
4598em_fixup_rx(struct rx_ring *rxr)
4599{
4600	struct adapter *adapter = rxr->adapter;
4601	struct mbuf *m, *n;
4602	int error;
4603
4604	error = 0;
4605	m = rxr->fmp;
4606	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4607		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4608		m->m_data += ETHER_HDR_LEN;
4609	} else {
4610		MGETHDR(n, M_NOWAIT, MT_DATA);
4611		if (n != NULL) {
4612			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4613			m->m_data += ETHER_HDR_LEN;
4614			m->m_len -= ETHER_HDR_LEN;
4615			n->m_len = ETHER_HDR_LEN;
4616			M_MOVE_PKTHDR(n, m);
4617			n->m_next = m;
4618			rxr->fmp = n;
4619		} else {
4620			adapter->dropped_pkts++;
4621			m_freem(rxr->fmp);
4622			rxr->fmp = NULL;
4623			error = ENOMEM;
4624		}
4625	}
4626
4627	return (error);
4628}
4629#endif
4630
4631/*********************************************************************
4632 *
4633 *  Verify that the hardware indicated that the checksum is valid.
4634 *  Inform the stack about the status of checksum so that stack
4635 *  doesn't spend time verifying the checksum.
4636 *
4637 *********************************************************************/
4638static void
4639em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
4640{
4641	mp->m_pkthdr.csum_flags = 0;
4642
4643	/* Ignore Checksum bit is set */
4644	if (rx_desc->status & E1000_RXD_STAT_IXSM)
4645		return;
4646
4647	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
4648		return;
4649
4650	/* IP Checksum Good? */
4651	if (rx_desc->status & E1000_RXD_STAT_IPCS)
4652		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
4653
4654	/* TCP or UDP checksum */
4655	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4656		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4657		mp->m_pkthdr.csum_data = htons(0xffff);
4658	}
4659}
4660
4661/*
4662 * This routine is run via an vlan
4663 * config EVENT
4664 */
4665static void
4666em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4667{
4668	struct adapter	*adapter = ifp->if_softc;
4669	u32		index, bit;
4670
4671	if (ifp->if_softc !=  arg)   /* Not our event */
4672		return;
4673
4674	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
4675                return;
4676
4677	EM_CORE_LOCK(adapter);
4678	index = (vtag >> 5) & 0x7F;
4679	bit = vtag & 0x1F;
4680	adapter->shadow_vfta[index] |= (1 << bit);
4681	++adapter->num_vlans;
4682	/* Re-init to load the changes */
4683	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4684		em_init_locked(adapter);
4685	EM_CORE_UNLOCK(adapter);
4686}
4687
4688/*
4689 * This routine is run via an vlan
4690 * unconfig EVENT
4691 */
4692static void
4693em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4694{
4695	struct adapter	*adapter = ifp->if_softc;
4696	u32		index, bit;
4697
4698	if (ifp->if_softc !=  arg)
4699		return;
4700
4701	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4702                return;
4703
4704	EM_CORE_LOCK(adapter);
4705	index = (vtag >> 5) & 0x7F;
4706	bit = vtag & 0x1F;
4707	adapter->shadow_vfta[index] &= ~(1 << bit);
4708	--adapter->num_vlans;
4709	/* Re-init to load the changes */
4710	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4711		em_init_locked(adapter);
4712	EM_CORE_UNLOCK(adapter);
4713}
4714
4715static void
4716em_setup_vlan_hw_support(struct adapter *adapter)
4717{
4718	struct e1000_hw *hw = &adapter->hw;
4719	u32             reg;
4720
4721	/*
4722	** We get here thru init_locked, meaning
4723	** a soft reset, this has already cleared
4724	** the VFTA and other state, so if there
4725	** have been no vlan's registered do nothing.
4726	*/
4727	if (adapter->num_vlans == 0)
4728                return;
4729
4730	/*
4731	** A soft reset zero's out the VFTA, so
4732	** we need to repopulate it now.
4733	*/
4734	for (int i = 0; i < EM_VFTA_SIZE; i++)
4735                if (adapter->shadow_vfta[i] != 0)
4736			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4737                            i, adapter->shadow_vfta[i]);
4738
4739	reg = E1000_READ_REG(hw, E1000_CTRL);
4740	reg |= E1000_CTRL_VME;
4741	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4742
4743	/* Enable the Filter Table */
4744	reg = E1000_READ_REG(hw, E1000_RCTL);
4745	reg &= ~E1000_RCTL_CFIEN;
4746	reg |= E1000_RCTL_VFE;
4747	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4748}
4749
4750static void
4751em_enable_intr(struct adapter *adapter)
4752{
4753	struct e1000_hw *hw = &adapter->hw;
4754	u32 ims_mask = IMS_ENABLE_MASK;
4755
4756	if (hw->mac.type == e1000_82574) {
4757		E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
4758		ims_mask |= EM_MSIX_MASK;
4759	}
4760	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4761}
4762
4763static void
4764em_disable_intr(struct adapter *adapter)
4765{
4766	struct e1000_hw *hw = &adapter->hw;
4767
4768	if (hw->mac.type == e1000_82574)
4769		E1000_WRITE_REG(hw, EM_EIAC, 0);
4770	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
4771}
4772
4773/*
4774 * Bit of a misnomer, what this really means is
4775 * to enable OS management of the system... aka
4776 * to disable special hardware management features
4777 */
4778static void
4779em_init_manageability(struct adapter *adapter)
4780{
4781	/* A shared code workaround */
4782#define E1000_82542_MANC2H E1000_MANC2H
4783	if (adapter->has_manage) {
4784		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4785		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4786
4787		/* disable hardware interception of ARP */
4788		manc &= ~(E1000_MANC_ARP_EN);
4789
4790                /* enable receiving management packets to the host */
4791		manc |= E1000_MANC_EN_MNG2HOST;
4792#define E1000_MNG2HOST_PORT_623 (1 << 5)
4793#define E1000_MNG2HOST_PORT_664 (1 << 6)
4794		manc2h |= E1000_MNG2HOST_PORT_623;
4795		manc2h |= E1000_MNG2HOST_PORT_664;
4796		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4797		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4798	}
4799}
4800
4801/*
4802 * Give control back to hardware management
4803 * controller if there is one.
4804 */
4805static void
4806em_release_manageability(struct adapter *adapter)
4807{
4808	if (adapter->has_manage) {
4809		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4810
4811		/* re-enable hardware interception of ARP */
4812		manc |= E1000_MANC_ARP_EN;
4813		manc &= ~E1000_MANC_EN_MNG2HOST;
4814
4815		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4816	}
4817}
4818
4819/*
4820 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4821 * For ASF and Pass Through versions of f/w this means
4822 * that the driver is loaded. For AMT version type f/w
4823 * this means that the network i/f is open.
4824 */
4825static void
4826em_get_hw_control(struct adapter *adapter)
4827{
4828	u32 ctrl_ext, swsm;
4829
4830	if (adapter->hw.mac.type == e1000_82573) {
4831		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4832		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4833		    swsm | E1000_SWSM_DRV_LOAD);
4834		return;
4835	}
4836	/* else */
4837	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4838	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4839	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4840	return;
4841}
4842
4843/*
4844 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4845 * For ASF and Pass Through versions of f/w this means that
4846 * the driver is no longer loaded. For AMT versions of the
4847 * f/w this means that the network i/f is closed.
4848 */
4849static void
4850em_release_hw_control(struct adapter *adapter)
4851{
4852	u32 ctrl_ext, swsm;
4853
4854	if (!adapter->has_manage)
4855		return;
4856
4857	if (adapter->hw.mac.type == e1000_82573) {
4858		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
4859		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
4860		    swsm & ~E1000_SWSM_DRV_LOAD);
4861		return;
4862	}
4863	/* else */
4864	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4865	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4866	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4867	return;
4868}
4869
4870static int
4871em_is_valid_ether_addr(u8 *addr)
4872{
4873	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4874
4875	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4876		return (FALSE);
4877	}
4878
4879	return (TRUE);
4880}
4881
4882/*
4883** Parse the interface capabilities with regard
4884** to both system management and wake-on-lan for
4885** later use.
4886*/
4887static void
4888em_get_wakeup(device_t dev)
4889{
4890	struct adapter	*adapter = device_get_softc(dev);
4891	u16		eeprom_data = 0, device_id, apme_mask;
4892
4893	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4894	apme_mask = EM_EEPROM_APME;
4895
4896	switch (adapter->hw.mac.type) {
4897	case e1000_82573:
4898	case e1000_82583:
4899		adapter->has_amt = TRUE;
4900		/* Falls thru */
4901	case e1000_82571:
4902	case e1000_82572:
4903	case e1000_80003es2lan:
4904		if (adapter->hw.bus.func == 1) {
4905			e1000_read_nvm(&adapter->hw,
4906			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4907			break;
4908		} else
4909			e1000_read_nvm(&adapter->hw,
4910			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4911		break;
4912	case e1000_ich8lan:
4913	case e1000_ich9lan:
4914	case e1000_ich10lan:
4915	case e1000_pchlan:
4916	case e1000_pch2lan:
4917		apme_mask = E1000_WUC_APME;
4918		adapter->has_amt = TRUE;
4919		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
4920		break;
4921	default:
4922		e1000_read_nvm(&adapter->hw,
4923		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4924		break;
4925	}
4926	if (eeprom_data & apme_mask)
4927		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4928	/*
4929         * We have the eeprom settings, now apply the special cases
4930         * where the eeprom may be wrong or the board won't support
4931         * wake on lan on a particular port
4932	 */
4933	device_id = pci_get_device(dev);
4934        switch (device_id) {
4935	case E1000_DEV_ID_82571EB_FIBER:
4936		/* Wake events only supported on port A for dual fiber
4937		 * regardless of eeprom setting */
4938		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4939		    E1000_STATUS_FUNC_1)
4940			adapter->wol = 0;
4941		break;
4942	case E1000_DEV_ID_82571EB_QUAD_COPPER:
4943	case E1000_DEV_ID_82571EB_QUAD_FIBER:
4944	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
4945                /* if quad port adapter, disable WoL on all but port A */
4946		if (global_quad_port_a != 0)
4947			adapter->wol = 0;
4948		/* Reset for multiple quad port adapters */
4949		if (++global_quad_port_a == 4)
4950			global_quad_port_a = 0;
4951                break;
4952	}
4953	return;
4954}
4955
4956
4957/*
4958 * Enable PCI Wake On Lan capability
4959 */
4960static void
4961em_enable_wakeup(device_t dev)
4962{
4963	struct adapter	*adapter = device_get_softc(dev);
4964	struct ifnet	*ifp = adapter->ifp;
4965	u32		pmc, ctrl, ctrl_ext, rctl;
4966	u16     	status;
4967
4968	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4969		return;
4970
4971	/* Advertise the wakeup capability */
4972	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4973	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4974	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4975	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4976
4977	if ((adapter->hw.mac.type == e1000_ich8lan) ||
4978	    (adapter->hw.mac.type == e1000_pchlan) ||
4979	    (adapter->hw.mac.type == e1000_ich9lan) ||
4980	    (adapter->hw.mac.type == e1000_ich10lan))
4981		e1000_suspend_workarounds_ich8lan(&adapter->hw);
4982
4983	/* Keep the laser running on Fiber adapters */
4984	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4985	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4986		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4987		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4988		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4989	}
4990
4991	/*
4992	** Determine type of Wakeup: note that wol
4993	** is set with all bits on by default.
4994	*/
4995	if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4996		adapter->wol &= ~E1000_WUFC_MAG;
4997
4998	if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4999		adapter->wol &= ~E1000_WUFC_MC;
5000	else {
5001		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5002		rctl |= E1000_RCTL_MPE;
5003		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5004	}
5005
5006	if ((adapter->hw.mac.type == e1000_pchlan) ||
5007	    (adapter->hw.mac.type == e1000_pch2lan)) {
5008		if (em_enable_phy_wakeup(adapter))
5009			return;
5010	} else {
5011		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5012		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5013	}
5014
5015	if (adapter->hw.phy.type == e1000_phy_igp_3)
5016		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5017
5018        /* Request PME */
5019        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5020	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5021	if (ifp->if_capenable & IFCAP_WOL)
5022		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5023        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5024
5025	return;
5026}
5027
5028/*
5029** WOL in the newer chipset interfaces (pchlan)
5030** require thing to be copied into the phy
5031*/
5032static int
5033em_enable_phy_wakeup(struct adapter *adapter)
5034{
5035	struct e1000_hw *hw = &adapter->hw;
5036	u32 mreg, ret = 0;
5037	u16 preg;
5038
5039	/* copy MAC RARs to PHY RARs */
5040	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5041
5042	/* copy MAC MTA to PHY MTA */
5043	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5044		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5045		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5046		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5047		    (u16)((mreg >> 16) & 0xFFFF));
5048	}
5049
5050	/* configure PHY Rx Control register */
5051	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5052	mreg = E1000_READ_REG(hw, E1000_RCTL);
5053	if (mreg & E1000_RCTL_UPE)
5054		preg |= BM_RCTL_UPE;
5055	if (mreg & E1000_RCTL_MPE)
5056		preg |= BM_RCTL_MPE;
5057	preg &= ~(BM_RCTL_MO_MASK);
5058	if (mreg & E1000_RCTL_MO_3)
5059		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5060				<< BM_RCTL_MO_SHIFT);
5061	if (mreg & E1000_RCTL_BAM)
5062		preg |= BM_RCTL_BAM;
5063	if (mreg & E1000_RCTL_PMCF)
5064		preg |= BM_RCTL_PMCF;
5065	mreg = E1000_READ_REG(hw, E1000_CTRL);
5066	if (mreg & E1000_CTRL_RFCE)
5067		preg |= BM_RCTL_RFCE;
5068	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5069
5070	/* enable PHY wakeup in MAC register */
5071	E1000_WRITE_REG(hw, E1000_WUC,
5072	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5073	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5074
5075	/* configure and enable PHY wakeup in PHY registers */
5076	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5077	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5078
5079	/* activate PHY wakeup */
5080	ret = hw->phy.ops.acquire(hw);
5081	if (ret) {
5082		printf("Could not acquire PHY\n");
5083		return ret;
5084	}
5085	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5086	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5087	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5088	if (ret) {
5089		printf("Could not read PHY page 769\n");
5090		goto out;
5091	}
5092	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5093	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5094	if (ret)
5095		printf("Could not set PHY Host Wakeup bit\n");
5096out:
5097	hw->phy.ops.release(hw);
5098
5099	return ret;
5100}
5101
5102static void
5103em_led_func(void *arg, int onoff)
5104{
5105	struct adapter	*adapter = arg;
5106
5107	EM_CORE_LOCK(adapter);
5108	if (onoff) {
5109		e1000_setup_led(&adapter->hw);
5110		e1000_led_on(&adapter->hw);
5111	} else {
5112		e1000_led_off(&adapter->hw);
5113		e1000_cleanup_led(&adapter->hw);
5114	}
5115	EM_CORE_UNLOCK(adapter);
5116}
5117
5118/*
5119** Disable the L0S and L1 LINK states
5120*/
5121static void
5122em_disable_aspm(struct adapter *adapter)
5123{
5124	int		base, reg;
5125	u16		link_cap,link_ctrl;
5126	device_t	dev = adapter->dev;
5127
5128	switch (adapter->hw.mac.type) {
5129		case e1000_82573:
5130		case e1000_82574:
5131		case e1000_82583:
5132			break;
5133		default:
5134			return;
5135	}
5136	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5137		return;
5138	reg = base + PCIER_LINK_CAP;
5139	link_cap = pci_read_config(dev, reg, 2);
5140	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5141		return;
5142	reg = base + PCIER_LINK_CTL;
5143	link_ctrl = pci_read_config(dev, reg, 2);
5144	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5145	pci_write_config(dev, reg, link_ctrl, 2);
5146	return;
5147}
5148
5149/**********************************************************************
5150 *
5151 *  Update the board statistics counters.
5152 *
5153 **********************************************************************/
5154static void
5155em_update_stats_counters(struct adapter *adapter)
5156{
5157	struct ifnet   *ifp;
5158
5159	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5160	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5161		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5162		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5163	}
5164	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5165	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5166	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5167	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5168
5169	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5170	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5171	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5172	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5173	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5174	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5175	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5176	/*
5177	** For watchdog management we need to know if we have been
5178	** paused during the last interval, so capture that here.
5179	*/
5180	adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5181	adapter->stats.xoffrxc += adapter->pause_frames;
5182	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5183	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5184	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5185	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5186	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5187	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5188	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5189	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5190	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5191	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5192	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5193	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5194
5195	/* For the 64-bit byte counters the low dword must be read first. */
5196	/* Both registers clear on the read of the high dword */
5197
5198	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5199	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5200	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5201	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5202
5203	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5204	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5205	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5206	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5207	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5208
5209	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5210	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5211
5212	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5213	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5214	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5215	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5216	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5217	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5218	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5219	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5220	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5221	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5222
5223	/* Interrupt Counts */
5224
5225	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5226	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5227	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5228	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5229	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5230	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5231	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5232	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5233	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5234
5235	if (adapter->hw.mac.type >= e1000_82543) {
5236		adapter->stats.algnerrc +=
5237		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5238		adapter->stats.rxerrc +=
5239		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5240		adapter->stats.tncrs +=
5241		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5242		adapter->stats.cexterr +=
5243		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5244		adapter->stats.tsctc +=
5245		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5246		adapter->stats.tsctfc +=
5247		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5248	}
5249	ifp = adapter->ifp;
5250
5251	ifp->if_collisions = adapter->stats.colc;
5252
5253	/* Rx Errors */
5254	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5255	    adapter->stats.crcerrs + adapter->stats.algnerrc +
5256	    adapter->stats.ruc + adapter->stats.roc +
5257	    adapter->stats.mpc + adapter->stats.cexterr;
5258
5259	/* Tx Errors */
5260	ifp->if_oerrors = adapter->stats.ecol +
5261	    adapter->stats.latecol + adapter->watchdog_events;
5262}
5263
5264/* Export a single 32-bit register via a read-only sysctl. */
5265static int
5266em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5267{
5268	struct adapter *adapter;
5269	u_int val;
5270
5271	adapter = oidp->oid_arg1;
5272	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5273	return (sysctl_handle_int(oidp, &val, 0, req));
5274}
5275
5276/*
5277 * Add sysctl variables, one per statistic, to the system.
5278 */
5279static void
5280em_add_hw_stats(struct adapter *adapter)
5281{
5282	device_t dev = adapter->dev;
5283
5284	struct tx_ring *txr = adapter->tx_rings;
5285	struct rx_ring *rxr = adapter->rx_rings;
5286
5287	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5288	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5289	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5290	struct e1000_hw_stats *stats = &adapter->stats;
5291
5292	struct sysctl_oid *stat_node, *queue_node, *int_node;
5293	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5294
5295#define QUEUE_NAME_LEN 32
5296	char namebuf[QUEUE_NAME_LEN];
5297
5298	/* Driver Statistics */
5299	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5300			CTLFLAG_RD, &adapter->link_irq,
5301			"Link MSIX IRQ Handled");
5302	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail",
5303			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
5304			 "Std mbuf failed");
5305	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
5306			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
5307			 "Std mbuf cluster failed");
5308	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5309			CTLFLAG_RD, &adapter->dropped_pkts,
5310			"Driver dropped packets");
5311	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5312			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5313			"Driver tx dma failure in xmit");
5314	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5315			CTLFLAG_RD, &adapter->rx_overruns,
5316			"RX overruns");
5317	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5318			CTLFLAG_RD, &adapter->watchdog_events,
5319			"Watchdog timeouts");
5320
5321	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5322			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5323			em_sysctl_reg_handler, "IU",
5324			"Device Control Register");
5325	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5326			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5327			em_sysctl_reg_handler, "IU",
5328			"Receiver Control Register");
5329	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5330			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5331			"Flow Control High Watermark");
5332	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5333			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5334			"Flow Control Low Watermark");
5335
5336	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5337		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5338		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5339					    CTLFLAG_RD, NULL, "Queue Name");
5340		queue_list = SYSCTL_CHILDREN(queue_node);
5341
5342		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5343				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5344				E1000_TDH(txr->me),
5345				em_sysctl_reg_handler, "IU",
5346 				"Transmit Descriptor Head");
5347		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5348				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5349				E1000_TDT(txr->me),
5350				em_sysctl_reg_handler, "IU",
5351 				"Transmit Descriptor Tail");
5352		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5353				CTLFLAG_RD, &txr->tx_irq,
5354				"Queue MSI-X Transmit Interrupts");
5355		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5356				CTLFLAG_RD, &txr->no_desc_avail,
5357				"Queue No Descriptor Available");
5358
5359		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5360				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5361				E1000_RDH(rxr->me),
5362				em_sysctl_reg_handler, "IU",
5363				"Receive Descriptor Head");
5364		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5365				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5366				E1000_RDT(rxr->me),
5367				em_sysctl_reg_handler, "IU",
5368				"Receive Descriptor Tail");
5369		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5370				CTLFLAG_RD, &rxr->rx_irq,
5371				"Queue MSI-X Receive Interrupts");
5372	}
5373
5374	/* MAC stats get their own sub node */
5375
5376	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5377				    CTLFLAG_RD, NULL, "Statistics");
5378	stat_list = SYSCTL_CHILDREN(stat_node);
5379
5380	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5381			CTLFLAG_RD, &stats->ecol,
5382			"Excessive collisions");
5383	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5384			CTLFLAG_RD, &stats->scc,
5385			"Single collisions");
5386	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5387			CTLFLAG_RD, &stats->mcc,
5388			"Multiple collisions");
5389	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5390			CTLFLAG_RD, &stats->latecol,
5391			"Late collisions");
5392	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5393			CTLFLAG_RD, &stats->colc,
5394			"Collision Count");
5395	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5396			CTLFLAG_RD, &adapter->stats.symerrs,
5397			"Symbol Errors");
5398	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5399			CTLFLAG_RD, &adapter->stats.sec,
5400			"Sequence Errors");
5401	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5402			CTLFLAG_RD, &adapter->stats.dc,
5403			"Defer Count");
5404	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5405			CTLFLAG_RD, &adapter->stats.mpc,
5406			"Missed Packets");
5407	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5408			CTLFLAG_RD, &adapter->stats.rnbc,
5409			"Receive No Buffers");
5410	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5411			CTLFLAG_RD, &adapter->stats.ruc,
5412			"Receive Undersize");
5413	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5414			CTLFLAG_RD, &adapter->stats.rfc,
5415			"Fragmented Packets Received ");
5416	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5417			CTLFLAG_RD, &adapter->stats.roc,
5418			"Oversized Packets Received");
5419	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5420			CTLFLAG_RD, &adapter->stats.rjc,
5421			"Recevied Jabber");
5422	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5423			CTLFLAG_RD, &adapter->stats.rxerrc,
5424			"Receive Errors");
5425	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5426			CTLFLAG_RD, &adapter->stats.crcerrs,
5427			"CRC errors");
5428	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5429			CTLFLAG_RD, &adapter->stats.algnerrc,
5430			"Alignment Errors");
5431	/* On 82575 these are collision counts */
5432	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5433			CTLFLAG_RD, &adapter->stats.cexterr,
5434			"Collision/Carrier extension errors");
5435	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5436			CTLFLAG_RD, &adapter->stats.xonrxc,
5437			"XON Received");
5438	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5439			CTLFLAG_RD, &adapter->stats.xontxc,
5440			"XON Transmitted");
5441	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5442			CTLFLAG_RD, &adapter->stats.xoffrxc,
5443			"XOFF Received");
5444	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5445			CTLFLAG_RD, &adapter->stats.xofftxc,
5446			"XOFF Transmitted");
5447
5448	/* Packet Reception Stats */
5449	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5450			CTLFLAG_RD, &adapter->stats.tpr,
5451			"Total Packets Received ");
5452	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5453			CTLFLAG_RD, &adapter->stats.gprc,
5454			"Good Packets Received");
5455	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5456			CTLFLAG_RD, &adapter->stats.bprc,
5457			"Broadcast Packets Received");
5458	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5459			CTLFLAG_RD, &adapter->stats.mprc,
5460			"Multicast Packets Received");
5461	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5462			CTLFLAG_RD, &adapter->stats.prc64,
5463			"64 byte frames received ");
5464	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5465			CTLFLAG_RD, &adapter->stats.prc127,
5466			"65-127 byte frames received");
5467	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5468			CTLFLAG_RD, &adapter->stats.prc255,
5469			"128-255 byte frames received");
5470	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5471			CTLFLAG_RD, &adapter->stats.prc511,
5472			"256-511 byte frames received");
5473	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5474			CTLFLAG_RD, &adapter->stats.prc1023,
5475			"512-1023 byte frames received");
5476	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5477			CTLFLAG_RD, &adapter->stats.prc1522,
5478			"1023-1522 byte frames received");
5479 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5480 			CTLFLAG_RD, &adapter->stats.gorc,
5481 			"Good Octets Received");
5482
5483	/* Packet Transmission Stats */
5484 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5485 			CTLFLAG_RD, &adapter->stats.gotc,
5486 			"Good Octets Transmitted");
5487	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5488			CTLFLAG_RD, &adapter->stats.tpt,
5489			"Total Packets Transmitted");
5490	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5491			CTLFLAG_RD, &adapter->stats.gptc,
5492			"Good Packets Transmitted");
5493	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5494			CTLFLAG_RD, &adapter->stats.bptc,
5495			"Broadcast Packets Transmitted");
5496	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5497			CTLFLAG_RD, &adapter->stats.mptc,
5498			"Multicast Packets Transmitted");
5499	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5500			CTLFLAG_RD, &adapter->stats.ptc64,
5501			"64 byte frames transmitted ");
5502	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5503			CTLFLAG_RD, &adapter->stats.ptc127,
5504			"65-127 byte frames transmitted");
5505	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5506			CTLFLAG_RD, &adapter->stats.ptc255,
5507			"128-255 byte frames transmitted");
5508	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5509			CTLFLAG_RD, &adapter->stats.ptc511,
5510			"256-511 byte frames transmitted");
5511	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5512			CTLFLAG_RD, &adapter->stats.ptc1023,
5513			"512-1023 byte frames transmitted");
5514	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5515			CTLFLAG_RD, &adapter->stats.ptc1522,
5516			"1024-1522 byte frames transmitted");
5517	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5518			CTLFLAG_RD, &adapter->stats.tsctc,
5519			"TSO Contexts Transmitted");
5520	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5521			CTLFLAG_RD, &adapter->stats.tsctfc,
5522			"TSO Contexts Failed");
5523
5524
5525	/* Interrupt Stats */
5526
5527	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5528				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5529	int_list = SYSCTL_CHILDREN(int_node);
5530
5531	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5532			CTLFLAG_RD, &adapter->stats.iac,
5533			"Interrupt Assertion Count");
5534
5535	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5536			CTLFLAG_RD, &adapter->stats.icrxptc,
5537			"Interrupt Cause Rx Pkt Timer Expire Count");
5538
5539	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5540			CTLFLAG_RD, &adapter->stats.icrxatc,
5541			"Interrupt Cause Rx Abs Timer Expire Count");
5542
5543	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5544			CTLFLAG_RD, &adapter->stats.ictxptc,
5545			"Interrupt Cause Tx Pkt Timer Expire Count");
5546
5547	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5548			CTLFLAG_RD, &adapter->stats.ictxatc,
5549			"Interrupt Cause Tx Abs Timer Expire Count");
5550
5551	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5552			CTLFLAG_RD, &adapter->stats.ictxqec,
5553			"Interrupt Cause Tx Queue Empty Count");
5554
5555	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5556			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5557			"Interrupt Cause Tx Queue Min Thresh Count");
5558
5559	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5560			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5561			"Interrupt Cause Rx Desc Min Thresh Count");
5562
5563	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5564			CTLFLAG_RD, &adapter->stats.icrxoc,
5565			"Interrupt Cause Receiver Overrun Count");
5566}
5567
5568/**********************************************************************
5569 *
5570 *  This routine provides a way to dump out the adapter eeprom,
5571 *  often a useful debug/service tool. This only dumps the first
5572 *  32 words, stuff that matters is in that extent.
5573 *
5574 **********************************************************************/
5575static int
5576em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5577{
5578	struct adapter *adapter = (struct adapter *)arg1;
5579	int error;
5580	int result;
5581
5582	result = -1;
5583	error = sysctl_handle_int(oidp, &result, 0, req);
5584
5585	if (error || !req->newptr)
5586		return (error);
5587
5588	/*
5589	 * This value will cause a hex dump of the
5590	 * first 32 16-bit words of the EEPROM to
5591	 * the screen.
5592	 */
5593	if (result == 1)
5594		em_print_nvm_info(adapter);
5595
5596	return (error);
5597}
5598
5599static void
5600em_print_nvm_info(struct adapter *adapter)
5601{
5602	u16	eeprom_data;
5603	int	i, j, row = 0;
5604
5605	/* Its a bit crude, but it gets the job done */
5606	printf("\nInterface EEPROM Dump:\n");
5607	printf("Offset\n0x0000  ");
5608	for (i = 0, j = 0; i < 32; i++, j++) {
5609		if (j == 8) { /* Make the offset block */
5610			j = 0; ++row;
5611			printf("\n0x00%x0  ",row);
5612		}
5613		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5614		printf("%04x ", eeprom_data);
5615	}
5616	printf("\n");
5617}
5618
5619static int
5620em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
5621{
5622	struct em_int_delay_info *info;
5623	struct adapter *adapter;
5624	u32 regval;
5625	int error, usecs, ticks;
5626
5627	info = (struct em_int_delay_info *)arg1;
5628	usecs = info->value;
5629	error = sysctl_handle_int(oidp, &usecs, 0, req);
5630	if (error != 0 || req->newptr == NULL)
5631		return (error);
5632	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
5633		return (EINVAL);
5634	info->value = usecs;
5635	ticks = EM_USECS_TO_TICKS(usecs);
5636	if (info->offset == E1000_ITR)	/* units are 256ns here */
5637		ticks *= 4;
5638
5639	adapter = info->adapter;
5640
5641	EM_CORE_LOCK(adapter);
5642	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
5643	regval = (regval & ~0xffff) | (ticks & 0xffff);
5644	/* Handle a few special cases. */
5645	switch (info->offset) {
5646	case E1000_RDTR:
5647		break;
5648	case E1000_TIDV:
5649		if (ticks == 0) {
5650			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
5651			/* Don't write 0 into the TIDV register. */
5652			regval++;
5653		} else
5654			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
5655		break;
5656	}
5657	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
5658	EM_CORE_UNLOCK(adapter);
5659	return (0);
5660}
5661
5662static void
5663em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
5664	const char *description, struct em_int_delay_info *info,
5665	int offset, int value)
5666{
5667	info->adapter = adapter;
5668	info->offset = offset;
5669	info->value = value;
5670	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
5671	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5672	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
5673	    info, 0, em_sysctl_int_delay, "I", description);
5674}
5675
5676static void
5677em_set_sysctl_value(struct adapter *adapter, const char *name,
5678	const char *description, int *limit, int value)
5679{
5680	*limit = value;
5681	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5682	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5683	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
5684}
5685
5686
5687/*
5688** Set flow control using sysctl:
5689** Flow control values:
5690**      0 - off
5691**      1 - rx pause
5692**      2 - tx pause
5693**      3 - full
5694*/
5695static int
5696em_set_flowcntl(SYSCTL_HANDLER_ARGS)
5697{
5698        int		error;
5699	static int	input = 3; /* default is full */
5700        struct adapter	*adapter = (struct adapter *) arg1;
5701
5702        error = sysctl_handle_int(oidp, &input, 0, req);
5703
5704        if ((error) || (req->newptr == NULL))
5705                return (error);
5706
5707	if (input == adapter->fc) /* no change? */
5708		return (error);
5709
5710        switch (input) {
5711                case e1000_fc_rx_pause:
5712                case e1000_fc_tx_pause:
5713                case e1000_fc_full:
5714                case e1000_fc_none:
5715                        adapter->hw.fc.requested_mode = input;
5716			adapter->fc = input;
5717                        break;
5718                default:
5719			/* Do nothing */
5720			return (error);
5721        }
5722
5723        adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5724        e1000_force_mac_fc(&adapter->hw);
5725        return (error);
5726}
5727
5728/*
5729** Manage Energy Efficient Ethernet:
5730** Control values:
5731**     0/1 - enabled/disabled
5732*/
5733static int
5734em_sysctl_eee(SYSCTL_HANDLER_ARGS)
5735{
5736       struct adapter *adapter = (struct adapter *) arg1;
5737       int             error, value;
5738
5739       value = adapter->hw.dev_spec.ich8lan.eee_disable;
5740       error = sysctl_handle_int(oidp, &value, 0, req);
5741       if (error || req->newptr == NULL)
5742               return (error);
5743       EM_CORE_LOCK(adapter);
5744       adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
5745       em_init_locked(adapter);
5746       EM_CORE_UNLOCK(adapter);
5747       return (0);
5748}
5749
5750static int
5751em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
5752{
5753	struct adapter *adapter;
5754	int error;
5755	int result;
5756
5757	result = -1;
5758	error = sysctl_handle_int(oidp, &result, 0, req);
5759
5760	if (error || !req->newptr)
5761		return (error);
5762
5763	if (result == 1) {
5764		adapter = (struct adapter *)arg1;
5765		em_print_debug_info(adapter);
5766        }
5767
5768	return (error);
5769}
5770
5771/*
5772** This routine is meant to be fluid, add whatever is
5773** needed for debugging a problem.  -jfv
5774*/
5775static void
5776em_print_debug_info(struct adapter *adapter)
5777{
5778	device_t dev = adapter->dev;
5779	struct tx_ring *txr = adapter->tx_rings;
5780	struct rx_ring *rxr = adapter->rx_rings;
5781
5782	if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
5783		printf("Interface is RUNNING ");
5784	else
5785		printf("Interface is NOT RUNNING\n");
5786
5787	if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
5788		printf("and INACTIVE\n");
5789	else
5790		printf("and ACTIVE\n");
5791
5792	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
5793	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
5794	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
5795	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
5796	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
5797	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
5798	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
5799	device_printf(dev, "TX descriptors avail = %d\n",
5800	    txr->tx_avail);
5801	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
5802	    txr->no_desc_avail);
5803	device_printf(dev, "RX discarded packets = %ld\n",
5804	    rxr->rx_discarded);
5805	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
5806	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
5807}
5808