sfxge.c revision 284555
1/*-
2 * Copyright (c) 2010-2015 Solarflare Communications Inc.
3 * All rights reserved.
4 *
5 * This software was developed in part by Philip Paeps under contract for
6 * Solarflare Communications, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice,
12 *    this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 *    this list of conditions and the following disclaimer in the documentation
15 *    and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * The views and conclusions contained in the software and documentation are
30 * those of the authors and should not be interpreted as representing official
31 * policies, either expressed or implied, of the FreeBSD Project.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/dev/sfxge/sfxge.c 284555 2015-06-18 15:46:39Z arybchik $");
36
37#include <sys/param.h>
38#include <sys/kernel.h>
39#include <sys/bus.h>
40#include <sys/rman.h>
41#include <sys/lock.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/smp.h>
45#include <sys/socket.h>
46#include <sys/taskqueue.h>
47#include <sys/sockio.h>
48#include <sys/sysctl.h>
49#include <sys/priv.h>
50#include <sys/syslog.h>
51
52#include <dev/pci/pcireg.h>
53#include <dev/pci/pcivar.h>
54
55#include <net/ethernet.h>
56#include <net/if.h>
57#include <net/if_media.h>
58#include <net/if_types.h>
59
60#include "common/efx.h"
61
62#include "sfxge.h"
63#include "sfxge_rx.h"
64#include "sfxge_ioc.h"
65#include "sfxge_version.h"
66
67#define	SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |			\
68		   IFCAP_RXCSUM | IFCAP_TXCSUM |			\
69		   IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 |		\
70		   IFCAP_TSO4 | IFCAP_TSO6 |				\
71		   IFCAP_JUMBO_MTU |					\
72		   IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
73#define	SFXGE_CAP_ENABLE SFXGE_CAP
74#define	SFXGE_CAP_FIXED (IFCAP_VLAN_MTU |				\
75			 IFCAP_JUMBO_MTU | IFCAP_LINKSTATE)
76
77MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver");
78
79
80SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0,
81	    "SFXGE driver parameters");
82
83#define	SFXGE_PARAM_RX_RING	SFXGE_PARAM(rx_ring)
84static int sfxge_rx_ring_entries = SFXGE_NDESCS;
85TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries);
86SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN,
87	   &sfxge_rx_ring_entries, 0,
88	   "Maximum number of descriptors in a receive ring");
89
90#define	SFXGE_PARAM_TX_RING	SFXGE_PARAM(tx_ring)
91static int sfxge_tx_ring_entries = SFXGE_NDESCS;
92TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries);
93SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN,
94	   &sfxge_tx_ring_entries, 0,
95	   "Maximum number of descriptors in a transmit ring");
96
97static void
98sfxge_reset(void *arg, int npending);
99
100static int
101sfxge_estimate_rsrc_limits(struct sfxge_softc *sc)
102{
103	efx_drv_limits_t limits;
104	int rc;
105	unsigned int evq_max;
106	uint32_t evq_allocated;
107	uint32_t rxq_allocated;
108	uint32_t txq_allocated;
109
110	/*
111	 * Limit the number of event queues to:
112	 *  - number of CPUs
113	 *  - hardwire maximum RSS channels
114	 *  - administratively specified maximum RSS channels
115	 */
116	evq_max = MIN(mp_ncpus, EFX_MAXRSS);
117	if (sc->max_rss_channels > 0)
118		evq_max = MIN(evq_max, sc->max_rss_channels);
119
120	memset(&limits, 0, sizeof(limits));
121
122	limits.edl_min_evq_count = 1;
123	limits.edl_max_evq_count = evq_max;
124	limits.edl_min_txq_count = SFXGE_TXQ_NTYPES;
125	limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1;
126	limits.edl_min_rxq_count = 1;
127	limits.edl_max_rxq_count = evq_max;
128
129	efx_nic_set_drv_limits(sc->enp, &limits);
130
131	if ((rc = efx_nic_init(sc->enp)) != 0)
132		return (rc);
133
134	rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated,
135				 &txq_allocated);
136	if (rc != 0) {
137		efx_nic_fini(sc->enp);
138		return (rc);
139	}
140
141	KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES,
142		("txq_allocated < SFXGE_TXQ_NTYPES"));
143
144	sc->evq_max = MIN(evq_allocated, evq_max);
145	sc->evq_max = MIN(rxq_allocated, sc->evq_max);
146	sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1),
147			  sc->evq_max);
148
149	KASSERT(sc->evq_max <= evq_max,
150		("allocated more than maximum requested"));
151
152	/*
153	 * NIC is kept initialized in the case of success to be able to
154	 * initialize port to find out media types.
155	 */
156	return (0);
157}
158
159static int
160sfxge_set_drv_limits(struct sfxge_softc *sc)
161{
162	efx_drv_limits_t limits;
163
164	memset(&limits, 0, sizeof(limits));
165
166	/* Limits are strict since take into account initial estimation */
167	limits.edl_min_evq_count = limits.edl_max_evq_count =
168	    sc->intr.n_alloc;
169	limits.edl_min_txq_count = limits.edl_max_txq_count =
170	    sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1;
171	limits.edl_min_rxq_count = limits.edl_max_rxq_count =
172	    sc->intr.n_alloc;
173
174	return (efx_nic_set_drv_limits(sc->enp, &limits));
175}
176
177static int
178sfxge_start(struct sfxge_softc *sc)
179{
180	int rc;
181
182	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
183
184	if (sc->init_state == SFXGE_STARTED)
185		return (0);
186
187	if (sc->init_state != SFXGE_REGISTERED) {
188		rc = EINVAL;
189		goto fail;
190	}
191
192	/* Set required resource limits */
193	if ((rc = sfxge_set_drv_limits(sc)) != 0)
194		goto fail;
195
196	if ((rc = efx_nic_init(sc->enp)) != 0)
197		goto fail;
198
199	/* Start processing interrupts. */
200	if ((rc = sfxge_intr_start(sc)) != 0)
201		goto fail2;
202
203	/* Start processing events. */
204	if ((rc = sfxge_ev_start(sc)) != 0)
205		goto fail3;
206
207	/* Fire up the port. */
208	if ((rc = sfxge_port_start(sc)) != 0)
209		goto fail4;
210
211	/* Start the receiver side. */
212	if ((rc = sfxge_rx_start(sc)) != 0)
213		goto fail5;
214
215	/* Start the transmitter side. */
216	if ((rc = sfxge_tx_start(sc)) != 0)
217		goto fail6;
218
219	sc->init_state = SFXGE_STARTED;
220
221	/* Tell the stack we're running. */
222	sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING;
223	sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE;
224
225	return (0);
226
227fail6:
228	sfxge_rx_stop(sc);
229
230fail5:
231	sfxge_port_stop(sc);
232
233fail4:
234	sfxge_ev_stop(sc);
235
236fail3:
237	sfxge_intr_stop(sc);
238
239fail2:
240	efx_nic_fini(sc->enp);
241
242fail:
243	device_printf(sc->dev, "sfxge_start: %d\n", rc);
244
245	return (rc);
246}
247
248static void
249sfxge_if_init(void *arg)
250{
251	struct sfxge_softc *sc;
252
253	sc = (struct sfxge_softc *)arg;
254
255	SFXGE_ADAPTER_LOCK(sc);
256	(void)sfxge_start(sc);
257	SFXGE_ADAPTER_UNLOCK(sc);
258}
259
260static void
261sfxge_stop(struct sfxge_softc *sc)
262{
263	SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc);
264
265	if (sc->init_state != SFXGE_STARTED)
266		return;
267
268	sc->init_state = SFXGE_REGISTERED;
269
270	/* Stop the transmitter. */
271	sfxge_tx_stop(sc);
272
273	/* Stop the receiver. */
274	sfxge_rx_stop(sc);
275
276	/* Stop the port. */
277	sfxge_port_stop(sc);
278
279	/* Stop processing events. */
280	sfxge_ev_stop(sc);
281
282	/* Stop processing interrupts. */
283	sfxge_intr_stop(sc);
284
285	efx_nic_fini(sc->enp);
286
287	sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING;
288}
289
290
291static int
292sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
293{
294	efx_vpd_value_t value;
295	int rc = 0;
296
297	switch (ioc->u.vpd.op) {
298	case SFXGE_VPD_OP_GET_KEYWORD:
299		value.evv_tag = ioc->u.vpd.tag;
300		value.evv_keyword = ioc->u.vpd.keyword;
301		rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value);
302		if (rc != 0)
303			break;
304		ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length);
305		if (ioc->u.vpd.payload != 0) {
306			rc = copyout(value.evv_value, ioc->u.vpd.payload,
307				     ioc->u.vpd.len);
308		}
309		break;
310	case SFXGE_VPD_OP_SET_KEYWORD:
311		if (ioc->u.vpd.len > sizeof(value.evv_value))
312			return (EINVAL);
313		value.evv_tag = ioc->u.vpd.tag;
314		value.evv_keyword = ioc->u.vpd.keyword;
315		value.evv_length = ioc->u.vpd.len;
316		rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length);
317		if (rc != 0)
318			break;
319		rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value);
320		if (rc != 0)
321			break;
322		rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size);
323		if (rc != 0)
324			break;
325		rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size);
326		break;
327	default:
328		rc = EOPNOTSUPP;
329		break;
330	}
331
332	return (rc);
333}
334
335static int
336sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc)
337{
338	switch (ioc->op) {
339	case SFXGE_MCDI_IOC:
340		return (sfxge_mcdi_ioctl(sc, ioc));
341	case SFXGE_NVRAM_IOC:
342		return (sfxge_nvram_ioctl(sc, ioc));
343	case SFXGE_VPD_IOC:
344		return (sfxge_vpd_ioctl(sc, ioc));
345	default:
346		return (EOPNOTSUPP);
347	}
348}
349
350
351static int
352sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
353{
354	struct sfxge_softc *sc;
355	struct ifreq *ifr;
356	sfxge_ioc_t ioc;
357	int error;
358
359	ifr = (struct ifreq *)data;
360	sc = ifp->if_softc;
361	error = 0;
362
363	switch (command) {
364	case SIOCSIFFLAGS:
365		SFXGE_ADAPTER_LOCK(sc);
366		if (ifp->if_flags & IFF_UP) {
367			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
368				if ((ifp->if_flags ^ sc->if_flags) &
369				    (IFF_PROMISC | IFF_ALLMULTI)) {
370					sfxge_mac_filter_set(sc);
371				}
372			} else
373				sfxge_start(sc);
374		} else
375			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
376				sfxge_stop(sc);
377		sc->if_flags = ifp->if_flags;
378		SFXGE_ADAPTER_UNLOCK(sc);
379		break;
380	case SIOCSIFMTU:
381		if (ifr->ifr_mtu == ifp->if_mtu) {
382			/* Nothing to do */
383			error = 0;
384		} else if (ifr->ifr_mtu > SFXGE_MAX_MTU) {
385			error = EINVAL;
386		} else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
387			ifp->if_mtu = ifr->ifr_mtu;
388			error = 0;
389		} else {
390			/* Restart required */
391			SFXGE_ADAPTER_LOCK(sc);
392			sfxge_stop(sc);
393			ifp->if_mtu = ifr->ifr_mtu;
394			error = sfxge_start(sc);
395			SFXGE_ADAPTER_UNLOCK(sc);
396			if (error != 0) {
397				ifp->if_flags &= ~IFF_UP;
398				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
399				if_down(ifp);
400			}
401		}
402		break;
403	case SIOCADDMULTI:
404	case SIOCDELMULTI:
405		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
406			sfxge_mac_filter_set(sc);
407		break;
408	case SIOCSIFCAP:
409	{
410		int reqcap = ifr->ifr_reqcap;
411		int capchg_mask;
412
413		SFXGE_ADAPTER_LOCK(sc);
414
415		/* Capabilities to be changed in accordance with request */
416		capchg_mask = ifp->if_capenable ^ reqcap;
417
418		/*
419		 * The networking core already rejects attempts to
420		 * enable capabilities we don't have.  We still have
421		 * to reject attempts to disable capabilities that we
422		 * can't (yet) disable.
423		 */
424		KASSERT((reqcap & ~ifp->if_capabilities) == 0,
425		    ("Unsupported capabilities 0x%x requested 0x%x vs "
426		     "supported 0x%x",
427		     reqcap & ~ifp->if_capabilities,
428		     reqcap , ifp->if_capabilities));
429		if (capchg_mask & SFXGE_CAP_FIXED) {
430			error = EINVAL;
431			SFXGE_ADAPTER_UNLOCK(sc);
432			break;
433		}
434
435		/* Check request before any changes */
436		if ((capchg_mask & IFCAP_TSO4) &&
437		    (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) {
438			error = EAGAIN;
439			SFXGE_ADAPTER_UNLOCK(sc);
440			if_printf(ifp, "enable txcsum before tso4\n");
441			break;
442		}
443		if ((capchg_mask & IFCAP_TSO6) &&
444		    (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) {
445			error = EAGAIN;
446			SFXGE_ADAPTER_UNLOCK(sc);
447			if_printf(ifp, "enable txcsum6 before tso6\n");
448			break;
449		}
450
451		if (reqcap & IFCAP_TXCSUM) {
452			ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
453		} else {
454			ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP);
455			if (reqcap & IFCAP_TSO4) {
456				reqcap &= ~IFCAP_TSO4;
457				if_printf(ifp,
458				    "tso4 disabled due to -txcsum\n");
459			}
460		}
461		if (reqcap & IFCAP_TXCSUM_IPV6) {
462			ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
463		} else {
464			ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6);
465			if (reqcap & IFCAP_TSO6) {
466				reqcap &= ~IFCAP_TSO6;
467				if_printf(ifp,
468				    "tso6 disabled due to -txcsum6\n");
469			}
470		}
471
472		/*
473		 * The kernel takes both IFCAP_TSOx and CSUM_TSO into
474		 * account before using TSO. So, we do not touch
475		 * checksum flags when IFCAP_TSOx is modified.
476		 * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO),
477		 * but both bits are set in IPv4 and IPv6 mbufs.
478		 */
479
480		ifp->if_capenable = reqcap;
481
482		SFXGE_ADAPTER_UNLOCK(sc);
483		break;
484	}
485	case SIOCSIFMEDIA:
486	case SIOCGIFMEDIA:
487		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
488		break;
489	case SIOCGPRIVATE_0:
490		error = priv_check(curthread, PRIV_DRIVER);
491		if (error != 0)
492			break;
493		error = copyin(ifr->ifr_data, &ioc, sizeof(ioc));
494		if (error != 0)
495			return (error);
496		error = sfxge_private_ioctl(sc, &ioc);
497		if (error == 0) {
498			error = copyout(&ioc, ifr->ifr_data, sizeof(ioc));
499		}
500		break;
501	default:
502		error = ether_ioctl(ifp, command, data);
503	}
504
505	return (error);
506}
507
508static void
509sfxge_ifnet_fini(struct ifnet *ifp)
510{
511	struct sfxge_softc *sc = ifp->if_softc;
512
513	SFXGE_ADAPTER_LOCK(sc);
514	sfxge_stop(sc);
515	SFXGE_ADAPTER_UNLOCK(sc);
516
517	ifmedia_removeall(&sc->media);
518	ether_ifdetach(ifp);
519	if_free(ifp);
520}
521
522static int
523sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc)
524{
525	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp);
526	device_t dev;
527	int rc;
528
529	dev = sc->dev;
530	sc->ifnet = ifp;
531
532	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
533	ifp->if_init = sfxge_if_init;
534	ifp->if_softc = sc;
535	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
536	ifp->if_ioctl = sfxge_if_ioctl;
537
538	ifp->if_capabilities = SFXGE_CAP;
539	ifp->if_capenable = SFXGE_CAP_ENABLE;
540
541#ifdef SFXGE_LRO
542	ifp->if_capabilities |= IFCAP_LRO;
543	ifp->if_capenable |= IFCAP_LRO;
544#endif
545
546	if (encp->enc_hw_tx_insert_vlan_enabled) {
547		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
548		ifp->if_capenable |= IFCAP_VLAN_HWTAGGING;
549	}
550	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
551			   CSUM_TCP_IPV6 | CSUM_UDP_IPV6;
552
553	ether_ifattach(ifp, encp->enc_mac_addr);
554
555	ifp->if_transmit = sfxge_if_transmit;
556	ifp->if_qflush = sfxge_if_qflush;
557
558	DBGPRINT(sc->dev, "ifmedia_init");
559	if ((rc = sfxge_port_ifmedia_init(sc)) != 0)
560		goto fail;
561
562	return (0);
563
564fail:
565	ether_ifdetach(sc->ifnet);
566	return (rc);
567}
568
569void
570sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp)
571{
572	KASSERT(sc->buffer_table_next + n <=
573		efx_nic_cfg_get(sc->enp)->enc_buftbl_limit,
574		("buffer table full"));
575
576	*idp = sc->buffer_table_next;
577	sc->buffer_table_next += n;
578}
579
580static int
581sfxge_bar_init(struct sfxge_softc *sc)
582{
583	efsys_bar_t *esbp = &sc->bar;
584
585	esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR);
586	if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
587	    &esbp->esb_rid, RF_ACTIVE)) == NULL) {
588		device_printf(sc->dev, "Cannot allocate BAR region %d\n",
589		    EFX_MEM_BAR);
590		return (ENXIO);
591	}
592	esbp->esb_tag = rman_get_bustag(esbp->esb_res);
593	esbp->esb_handle = rman_get_bushandle(esbp->esb_res);
594
595	SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev));
596
597	return (0);
598}
599
600static void
601sfxge_bar_fini(struct sfxge_softc *sc)
602{
603	efsys_bar_t *esbp = &sc->bar;
604
605	bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid,
606	    esbp->esb_res);
607	SFXGE_BAR_LOCK_DESTROY(esbp);
608}
609
610static int
611sfxge_create(struct sfxge_softc *sc)
612{
613	device_t dev;
614	efx_nic_t *enp;
615	int error;
616	char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))];
617
618	dev = sc->dev;
619
620	SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev));
621
622	sc->max_rss_channels = 0;
623	snprintf(rss_param_name, sizeof(rss_param_name),
624		 SFXGE_PARAM(%d.max_rss_channels),
625		 (int)device_get_unit(dev));
626	TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels);
627
628	sc->stats_node = SYSCTL_ADD_NODE(
629		device_get_sysctl_ctx(dev),
630		SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
631		OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics");
632	if (sc->stats_node == NULL) {
633		error = ENOMEM;
634		goto fail;
635	}
636
637	TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc);
638
639	(void) pci_enable_busmaster(dev);
640
641	/* Initialize DMA mappings. */
642	DBGPRINT(sc->dev, "dma_init...");
643	if ((error = sfxge_dma_init(sc)) != 0)
644		goto fail;
645
646	/* Map the device registers. */
647	DBGPRINT(sc->dev, "bar_init...");
648	if ((error = sfxge_bar_init(sc)) != 0)
649		goto fail;
650
651	error = efx_family(pci_get_vendor(dev), pci_get_device(dev),
652	    &sc->family);
653	KASSERT(error == 0, ("Family should be filtered by sfxge_probe()"));
654
655	DBGPRINT(sc->dev, "nic_create...");
656
657	/* Create the common code nic object. */
658	SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock,
659			      device_get_nameunit(sc->dev), "nic");
660	if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc,
661	    &sc->bar, &sc->enp_lock, &enp)) != 0)
662		goto fail3;
663	sc->enp = enp;
664
665	if (!ISP2(sfxge_rx_ring_entries) ||
666	    (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) ||
667	    (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) {
668		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
669		    SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries,
670		    EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS);
671		error = EINVAL;
672		goto fail_rx_ring_entries;
673	}
674	sc->rxq_entries = sfxge_rx_ring_entries;
675
676	if (!ISP2(sfxge_tx_ring_entries) ||
677	    (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) ||
678	    (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) {
679		log(LOG_ERR, "%s=%d must be power of 2 from %u to %u",
680		    SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries,
681		    EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)));
682		error = EINVAL;
683		goto fail_tx_ring_entries;
684	}
685	sc->txq_entries = sfxge_tx_ring_entries;
686
687	/* Initialize MCDI to talk to the microcontroller. */
688	DBGPRINT(sc->dev, "mcdi_init...");
689	if ((error = sfxge_mcdi_init(sc)) != 0)
690		goto fail4;
691
692	/* Probe the NIC and build the configuration data area. */
693	DBGPRINT(sc->dev, "nic_probe...");
694	if ((error = efx_nic_probe(enp)) != 0)
695		goto fail5;
696
697	SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev),
698			  SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
699			  OID_AUTO, "version", CTLFLAG_RD,
700			  SFXGE_VERSION_STRING, 0,
701			  "Driver version");
702
703	SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev),
704			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
705			OID_AUTO, "phy_type", CTLFLAG_RD,
706			NULL, efx_nic_cfg_get(enp)->enc_phy_type,
707			"PHY type");
708
709	/* Initialize the NVRAM. */
710	DBGPRINT(sc->dev, "nvram_init...");
711	if ((error = efx_nvram_init(enp)) != 0)
712		goto fail6;
713
714	/* Initialize the VPD. */
715	DBGPRINT(sc->dev, "vpd_init...");
716	if ((error = efx_vpd_init(enp)) != 0)
717		goto fail7;
718
719	efx_mcdi_new_epoch(enp);
720
721	/* Reset the NIC. */
722	DBGPRINT(sc->dev, "nic_reset...");
723	if ((error = efx_nic_reset(enp)) != 0)
724		goto fail8;
725
726	/* Initialize buffer table allocation. */
727	sc->buffer_table_next = 0;
728
729	/*
730	 * Guarantee minimum and estimate maximum number of event queues
731	 * to take it into account when MSI-X interrupts are allocated.
732	 * It initializes NIC and keeps it initialized on success.
733	 */
734	if ((error = sfxge_estimate_rsrc_limits(sc)) != 0)
735		goto fail8;
736
737	/* Set up interrupts. */
738	DBGPRINT(sc->dev, "intr_init...");
739	if ((error = sfxge_intr_init(sc)) != 0)
740		goto fail9;
741
742	/* Initialize event processing state. */
743	DBGPRINT(sc->dev, "ev_init...");
744	if ((error = sfxge_ev_init(sc)) != 0)
745		goto fail11;
746
747	/* Initialize port state. */
748	DBGPRINT(sc->dev, "port_init...");
749	if ((error = sfxge_port_init(sc)) != 0)
750		goto fail12;
751
752	/* Initialize receive state. */
753	DBGPRINT(sc->dev, "rx_init...");
754	if ((error = sfxge_rx_init(sc)) != 0)
755		goto fail13;
756
757	/* Initialize transmit state. */
758	DBGPRINT(sc->dev, "tx_init...");
759	if ((error = sfxge_tx_init(sc)) != 0)
760		goto fail14;
761
762	sc->init_state = SFXGE_INITIALIZED;
763
764	DBGPRINT(sc->dev, "success");
765	return (0);
766
767fail14:
768	sfxge_rx_fini(sc);
769
770fail13:
771	sfxge_port_fini(sc);
772
773fail12:
774	sfxge_ev_fini(sc);
775
776fail11:
777	sfxge_intr_fini(sc);
778
779fail9:
780	efx_nic_fini(sc->enp);
781
782fail8:
783	efx_vpd_fini(enp);
784
785fail7:
786	efx_nvram_fini(enp);
787
788fail6:
789	efx_nic_unprobe(enp);
790
791fail5:
792	sfxge_mcdi_fini(sc);
793
794fail4:
795fail_tx_ring_entries:
796fail_rx_ring_entries:
797	sc->enp = NULL;
798	efx_nic_destroy(enp);
799	SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock);
800
801fail3:
802	sfxge_bar_fini(sc);
803	(void) pci_disable_busmaster(sc->dev);
804
805fail:
806	DBGPRINT(sc->dev, "failed %d", error);
807	sc->dev = NULL;
808	SFXGE_ADAPTER_LOCK_DESTROY(sc);
809	return (error);
810}
811
812static void
813sfxge_destroy(struct sfxge_softc *sc)
814{
815	efx_nic_t *enp;
816
817	/* Clean up transmit state. */
818	sfxge_tx_fini(sc);
819
820	/* Clean up receive state. */
821	sfxge_rx_fini(sc);
822
823	/* Clean up port state. */
824	sfxge_port_fini(sc);
825
826	/* Clean up event processing state. */
827	sfxge_ev_fini(sc);
828
829	/* Clean up interrupts. */
830	sfxge_intr_fini(sc);
831
832	/* Tear down common code subsystems. */
833	efx_nic_reset(sc->enp);
834	efx_vpd_fini(sc->enp);
835	efx_nvram_fini(sc->enp);
836	efx_nic_unprobe(sc->enp);
837
838	/* Tear down MCDI. */
839	sfxge_mcdi_fini(sc);
840
841	/* Destroy common code context. */
842	enp = sc->enp;
843	sc->enp = NULL;
844	efx_nic_destroy(enp);
845
846	/* Free DMA memory. */
847	sfxge_dma_fini(sc);
848
849	/* Free mapped BARs. */
850	sfxge_bar_fini(sc);
851
852	(void) pci_disable_busmaster(sc->dev);
853
854	taskqueue_drain(taskqueue_thread, &sc->task_reset);
855
856	/* Destroy the softc lock. */
857	SFXGE_ADAPTER_LOCK_DESTROY(sc);
858}
859
860static int
861sfxge_vpd_handler(SYSCTL_HANDLER_ARGS)
862{
863	struct sfxge_softc *sc = arg1;
864	efx_vpd_value_t value;
865	int rc;
866
867	value.evv_tag = arg2 >> 16;
868	value.evv_keyword = arg2 & 0xffff;
869	if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value))
870	    != 0)
871		return (rc);
872
873	return (SYSCTL_OUT(req, value.evv_value, value.evv_length));
874}
875
876static void
877sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list,
878		  efx_vpd_tag_t tag, const char *keyword)
879{
880	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
881	efx_vpd_value_t value;
882
883	/* Check whether VPD tag/keyword is present */
884	value.evv_tag = tag;
885	value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]);
886	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0)
887		return;
888
889	SYSCTL_ADD_PROC(
890		ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD,
891		sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]),
892		sfxge_vpd_handler, "A", "");
893}
894
895static int
896sfxge_vpd_init(struct sfxge_softc *sc)
897{
898	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
899	struct sysctl_oid *vpd_node;
900	struct sysctl_oid_list *vpd_list;
901	char keyword[3];
902	efx_vpd_value_t value;
903	int rc;
904
905	if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0)
906		goto fail;
907	sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK);
908	if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0)
909		goto fail2;
910
911	/* Copy ID (product name) into device description, and log it. */
912	value.evv_tag = EFX_VPD_ID;
913	if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) {
914		value.evv_value[value.evv_length] = 0;
915		device_set_desc_copy(sc->dev, value.evv_value);
916		device_printf(sc->dev, "%s\n", value.evv_value);
917	}
918
919	vpd_node = SYSCTL_ADD_NODE(
920		ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
921		OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data");
922	vpd_list = SYSCTL_CHILDREN(vpd_node);
923
924	/* Add sysctls for all expected and any vendor-defined keywords. */
925	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN");
926	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC");
927	sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN");
928	keyword[0] = 'V';
929	keyword[2] = 0;
930	for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++)
931		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
932	for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++)
933		sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword);
934
935	return (0);
936
937fail2:
938	free(sc->vpd_data, M_SFXGE);
939fail:
940	return (rc);
941}
942
943static void
944sfxge_vpd_fini(struct sfxge_softc *sc)
945{
946	free(sc->vpd_data, M_SFXGE);
947}
948
949static void
950sfxge_reset(void *arg, int npending)
951{
952	struct sfxge_softc *sc;
953	int rc;
954	unsigned attempt;
955
956	(void)npending;
957
958	sc = (struct sfxge_softc *)arg;
959
960	SFXGE_ADAPTER_LOCK(sc);
961
962	if (sc->init_state != SFXGE_STARTED)
963		goto done;
964
965	sfxge_stop(sc);
966	efx_nic_reset(sc->enp);
967	for (attempt = 0; attempt < 3; ++attempt) {
968		if ((rc = sfxge_start(sc)) == 0)
969			goto done;
970
971		device_printf(sc->dev, "start on reset failed (%d)\n", rc);
972		DELAY(100000);
973	}
974
975	device_printf(sc->dev, "reset failed; interface is now stopped\n");
976
977done:
978	SFXGE_ADAPTER_UNLOCK(sc);
979}
980
981void
982sfxge_schedule_reset(struct sfxge_softc *sc)
983{
984	taskqueue_enqueue(taskqueue_thread, &sc->task_reset);
985}
986
987static int
988sfxge_attach(device_t dev)
989{
990	struct sfxge_softc *sc;
991	struct ifnet *ifp;
992	int error;
993
994	sc = device_get_softc(dev);
995	sc->dev = dev;
996
997	/* Allocate ifnet. */
998	ifp = if_alloc(IFT_ETHER);
999	if (ifp == NULL) {
1000		device_printf(dev, "Couldn't allocate ifnet\n");
1001		error = ENOMEM;
1002		goto fail;
1003	}
1004	sc->ifnet = ifp;
1005
1006	/* Initialize hardware. */
1007	DBGPRINT(sc->dev, "create nic");
1008	if ((error = sfxge_create(sc)) != 0)
1009		goto fail2;
1010
1011	/* Create the ifnet for the port. */
1012	DBGPRINT(sc->dev, "init ifnet");
1013	if ((error = sfxge_ifnet_init(ifp, sc)) != 0)
1014		goto fail3;
1015
1016	DBGPRINT(sc->dev, "init vpd");
1017	if ((error = sfxge_vpd_init(sc)) != 0)
1018		goto fail4;
1019
1020	/*
1021	 * NIC is initialized inside sfxge_create() and kept inialized
1022	 * to be able to initialize port to discover media types in
1023	 * sfxge_ifnet_init().
1024	 */
1025	efx_nic_fini(sc->enp);
1026
1027	sc->init_state = SFXGE_REGISTERED;
1028
1029	DBGPRINT(sc->dev, "success");
1030	return (0);
1031
1032fail4:
1033	sfxge_ifnet_fini(ifp);
1034fail3:
1035	efx_nic_fini(sc->enp);
1036	sfxge_destroy(sc);
1037
1038fail2:
1039	if_free(sc->ifnet);
1040
1041fail:
1042	DBGPRINT(sc->dev, "failed %d", error);
1043	return (error);
1044}
1045
1046static int
1047sfxge_detach(device_t dev)
1048{
1049	struct sfxge_softc *sc;
1050
1051	sc = device_get_softc(dev);
1052
1053	sfxge_vpd_fini(sc);
1054
1055	/* Destroy the ifnet. */
1056	sfxge_ifnet_fini(sc->ifnet);
1057
1058	/* Tear down hardware. */
1059	sfxge_destroy(sc);
1060
1061	return (0);
1062}
1063
1064static int
1065sfxge_probe(device_t dev)
1066{
1067	uint16_t pci_vendor_id;
1068	uint16_t pci_device_id;
1069	efx_family_t family;
1070	int rc;
1071
1072	pci_vendor_id = pci_get_vendor(dev);
1073	pci_device_id = pci_get_device(dev);
1074
1075	DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id);
1076	rc = efx_family(pci_vendor_id, pci_device_id, &family);
1077	if (rc != 0) {
1078		DBGPRINT(dev, "efx_family fail %d", rc);
1079		return (ENXIO);
1080	}
1081
1082	if (family == EFX_FAMILY_SIENA) {
1083		device_set_desc(dev, "Solarflare SFC9000 family");
1084		return (0);
1085	}
1086
1087	if (family == EFX_FAMILY_HUNTINGTON) {
1088		device_set_desc(dev, "Solarflare SFC9100 family");
1089		return (0);
1090	}
1091
1092	DBGPRINT(dev, "impossible controller family %d", family);
1093	return (ENXIO);
1094}
1095
1096static device_method_t sfxge_methods[] = {
1097	DEVMETHOD(device_probe,		sfxge_probe),
1098	DEVMETHOD(device_attach,	sfxge_attach),
1099	DEVMETHOD(device_detach,	sfxge_detach),
1100
1101	DEVMETHOD_END
1102};
1103
1104static devclass_t sfxge_devclass;
1105
1106static driver_t sfxge_driver = {
1107	"sfxge",
1108	sfxge_methods,
1109	sizeof(struct sfxge_softc)
1110};
1111
1112DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0);
1113