if_vmx.c revision 274043
1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18 */
19
20/* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22#include <sys/cdefs.h>
23__FBSDID("$FreeBSD: stable/10/sys/dev/vmware/vmxnet3/if_vmx.c 274043 2014-11-03 12:38:29Z hselasky $");
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/kernel.h>
28#include <sys/endian.h>
29#include <sys/sockio.h>
30#include <sys/mbuf.h>
31#include <sys/malloc.h>
32#include <sys/module.h>
33#include <sys/socket.h>
34#include <sys/sysctl.h>
35#include <sys/smp.h>
36#include <sys/taskqueue.h>
37#include <vm/vm.h>
38#include <vm/pmap.h>
39
40#include <net/ethernet.h>
41#include <net/if.h>
42#include <net/if_arp.h>
43#include <net/if_dl.h>
44#include <net/if_types.h>
45#include <net/if_media.h>
46#include <net/if_vlan_var.h>
47
48#include <net/bpf.h>
49
50#include <netinet/in_systm.h>
51#include <netinet/in.h>
52#include <netinet/ip.h>
53#include <netinet/ip6.h>
54#include <netinet6/ip6_var.h>
55#include <netinet/udp.h>
56#include <netinet/tcp.h>
57
58#include <machine/in_cksum.h>
59
60#include <machine/bus.h>
61#include <machine/resource.h>
62#include <sys/bus.h>
63#include <sys/rman.h>
64
65#include <dev/pci/pcireg.h>
66#include <dev/pci/pcivar.h>
67
68#include "if_vmxreg.h"
69#include "if_vmxvar.h"
70
71#include "opt_inet.h"
72#include "opt_inet6.h"
73
74#ifdef VMXNET3_FAILPOINTS
75#include <sys/fail.h>
76static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
77    "vmxnet3 fail points");
78#define VMXNET3_FP	_debug_fail_point_vmxnet3
79#endif
80
81static int	vmxnet3_probe(device_t);
82static int	vmxnet3_attach(device_t);
83static int	vmxnet3_detach(device_t);
84static int	vmxnet3_shutdown(device_t);
85
86static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
87static void	vmxnet3_free_resources(struct vmxnet3_softc *);
88static int	vmxnet3_check_version(struct vmxnet3_softc *);
89static void	vmxnet3_initial_config(struct vmxnet3_softc *);
90static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
91
92static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
93static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
94static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
95static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
96		    struct vmxnet3_interrupt *);
97static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
98static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
99static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
100static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
101static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
102
103static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
104		    struct vmxnet3_interrupt *);
105static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
106
107#ifndef VMXNET3_LEGACY_TX
108static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
109static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
110static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
111static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
112#endif
113
114static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
115static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
116static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
117static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
118static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
119static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
120
121static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
122static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
123static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
124static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
125static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
126static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
127static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
128static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
129static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
130static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
131static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
132static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
133static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
134static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
135static void	vmxnet3_free_data(struct vmxnet3_softc *);
136static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
137
138static void	vmxnet3_evintr(struct vmxnet3_softc *);
139static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
140static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
141static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
142static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
143		    struct vmxnet3_rxring *, int);
144static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
145static void	vmxnet3_legacy_intr(void *);
146static void	vmxnet3_txq_intr(void *);
147static void	vmxnet3_rxq_intr(void *);
148static void	vmxnet3_event_intr(void *);
149
150static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
151static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
152static void	vmxnet3_stop(struct vmxnet3_softc *);
153
154static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
155static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
156static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
157static int	vmxnet3_enable_device(struct vmxnet3_softc *);
158static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
159static int	vmxnet3_reinit(struct vmxnet3_softc *);
160static void	vmxnet3_init_locked(struct vmxnet3_softc *);
161static void	vmxnet3_init(void *);
162
163static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
164		    int *, int *, int *);
165static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
166		    bus_dmamap_t, bus_dma_segment_t [], int *);
167static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
168static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
169#ifdef VMXNET3_LEGACY_TX
170static void	vmxnet3_start_locked(struct ifnet *);
171static void	vmxnet3_start(struct ifnet *);
172#else
173static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
174		    struct mbuf *);
175static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
176static void	vmxnet3_txq_tq_deferred(void *, int);
177#endif
178static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
179static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
180
181static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
182		    uint16_t);
183static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
184static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
185static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
186static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
187static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
188
189#ifndef VMXNET3_LEGACY_TX
190static void	vmxnet3_qflush(struct ifnet *);
191#endif
192
193static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
194static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
195static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
196		    struct vmxnet3_txq_stats *);
197static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
198		    struct vmxnet3_rxq_stats *);
199static void	vmxnet3_tick(void *);
200static void	vmxnet3_link_status(struct vmxnet3_softc *);
201static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
202static int	vmxnet3_media_change(struct ifnet *);
203static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
204static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
205
206static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
207		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
208static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
209		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
210static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
211		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
212static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
213
214static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
215		    uint32_t);
216static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
217static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
218		    uint32_t);
219static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
220static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
221
222static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
223static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
224static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
225static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
226
227static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
228		    bus_size_t, struct vmxnet3_dma_alloc *);
229static void	vmxnet3_dma_free(struct vmxnet3_softc *,
230		    struct vmxnet3_dma_alloc *);
231static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
232		    const char *, int);
233
234typedef enum {
235	VMXNET3_BARRIER_RD,
236	VMXNET3_BARRIER_WR,
237	VMXNET3_BARRIER_RDWR,
238} vmxnet3_barrier_t;
239
240static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
241
242/* Tunables. */
243static int vmxnet3_mq_disable = 0;
244TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
245static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
246TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
247static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
248TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
249static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
250TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
251static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
252TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
253
254static device_method_t vmxnet3_methods[] = {
255	/* Device interface. */
256	DEVMETHOD(device_probe,		vmxnet3_probe),
257	DEVMETHOD(device_attach,	vmxnet3_attach),
258	DEVMETHOD(device_detach,	vmxnet3_detach),
259	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
260
261	DEVMETHOD_END
262};
263
264static driver_t vmxnet3_driver = {
265	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
266};
267
268static devclass_t vmxnet3_devclass;
269DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
270
271MODULE_DEPEND(vmx, pci, 1, 1, 1);
272MODULE_DEPEND(vmx, ether, 1, 1, 1);
273
274#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
275#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
276
277static int
278vmxnet3_probe(device_t dev)
279{
280
281	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
282	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
283		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
284		return (BUS_PROBE_DEFAULT);
285	}
286
287	return (ENXIO);
288}
289
290static int
291vmxnet3_attach(device_t dev)
292{
293	struct vmxnet3_softc *sc;
294	int error;
295
296	sc = device_get_softc(dev);
297	sc->vmx_dev = dev;
298
299	pci_enable_busmaster(dev);
300
301	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
302	callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
303
304	vmxnet3_initial_config(sc);
305
306	error = vmxnet3_alloc_resources(sc);
307	if (error)
308		goto fail;
309
310	error = vmxnet3_check_version(sc);
311	if (error)
312		goto fail;
313
314	error = vmxnet3_alloc_rxtx_queues(sc);
315	if (error)
316		goto fail;
317
318#ifndef VMXNET3_LEGACY_TX
319	error = vmxnet3_alloc_taskqueue(sc);
320	if (error)
321		goto fail;
322#endif
323
324	error = vmxnet3_alloc_interrupts(sc);
325	if (error)
326		goto fail;
327
328	vmxnet3_check_multiqueue(sc);
329
330	error = vmxnet3_alloc_data(sc);
331	if (error)
332		goto fail;
333
334	error = vmxnet3_setup_interface(sc);
335	if (error)
336		goto fail;
337
338	error = vmxnet3_setup_interrupts(sc);
339	if (error) {
340		ether_ifdetach(sc->vmx_ifp);
341		device_printf(dev, "could not set up interrupt\n");
342		goto fail;
343	}
344
345	vmxnet3_setup_sysctl(sc);
346#ifndef VMXNET3_LEGACY_TX
347	vmxnet3_start_taskqueue(sc);
348#endif
349
350fail:
351	if (error)
352		vmxnet3_detach(dev);
353
354	return (error);
355}
356
357static int
358vmxnet3_detach(device_t dev)
359{
360	struct vmxnet3_softc *sc;
361	struct ifnet *ifp;
362
363	sc = device_get_softc(dev);
364	ifp = sc->vmx_ifp;
365
366	if (device_is_attached(dev)) {
367		VMXNET3_CORE_LOCK(sc);
368		vmxnet3_stop(sc);
369		VMXNET3_CORE_UNLOCK(sc);
370
371		callout_drain(&sc->vmx_tick);
372#ifndef VMXNET3_LEGACY_TX
373		vmxnet3_drain_taskqueue(sc);
374#endif
375
376		ether_ifdetach(ifp);
377	}
378
379	if (sc->vmx_vlan_attach != NULL) {
380		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
381		sc->vmx_vlan_attach = NULL;
382	}
383	if (sc->vmx_vlan_detach != NULL) {
384		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
385		sc->vmx_vlan_detach = NULL;
386	}
387
388#ifndef VMXNET3_LEGACY_TX
389	vmxnet3_free_taskqueue(sc);
390#endif
391	vmxnet3_free_interrupts(sc);
392
393	if (ifp != NULL) {
394		if_free(ifp);
395		sc->vmx_ifp = NULL;
396	}
397
398	ifmedia_removeall(&sc->vmx_media);
399
400	vmxnet3_free_data(sc);
401	vmxnet3_free_resources(sc);
402	vmxnet3_free_rxtx_queues(sc);
403
404	VMXNET3_CORE_LOCK_DESTROY(sc);
405
406	return (0);
407}
408
409static int
410vmxnet3_shutdown(device_t dev)
411{
412
413	return (0);
414}
415
416static int
417vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
418{
419	device_t dev;
420	int rid;
421
422	dev = sc->vmx_dev;
423
424	rid = PCIR_BAR(0);
425	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
426	    RF_ACTIVE);
427	if (sc->vmx_res0 == NULL) {
428		device_printf(dev,
429		    "could not map BAR0 memory\n");
430		return (ENXIO);
431	}
432
433	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
434	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
435
436	rid = PCIR_BAR(1);
437	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
438	    RF_ACTIVE);
439	if (sc->vmx_res1 == NULL) {
440		device_printf(dev,
441		    "could not map BAR1 memory\n");
442		return (ENXIO);
443	}
444
445	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
446	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
447
448	if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
449		rid = PCIR_BAR(2);
450		sc->vmx_msix_res = bus_alloc_resource_any(dev,
451		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
452	}
453
454	if (sc->vmx_msix_res == NULL)
455		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
456
457	return (0);
458}
459
460static void
461vmxnet3_free_resources(struct vmxnet3_softc *sc)
462{
463	device_t dev;
464	int rid;
465
466	dev = sc->vmx_dev;
467
468	if (sc->vmx_res0 != NULL) {
469		rid = PCIR_BAR(0);
470		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
471		sc->vmx_res0 = NULL;
472	}
473
474	if (sc->vmx_res1 != NULL) {
475		rid = PCIR_BAR(1);
476		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
477		sc->vmx_res1 = NULL;
478	}
479
480	if (sc->vmx_msix_res != NULL) {
481		rid = PCIR_BAR(2);
482		bus_release_resource(dev, SYS_RES_MEMORY, rid,
483		    sc->vmx_msix_res);
484		sc->vmx_msix_res = NULL;
485	}
486}
487
488static int
489vmxnet3_check_version(struct vmxnet3_softc *sc)
490{
491	device_t dev;
492	uint32_t version;
493
494	dev = sc->vmx_dev;
495
496	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
497	if ((version & 0x01) == 0) {
498		device_printf(dev, "unsupported hardware version %#x\n",
499		    version);
500		return (ENOTSUP);
501	}
502	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
503
504	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
505	if ((version & 0x01) == 0) {
506		device_printf(dev, "unsupported UPT version %#x\n", version);
507		return (ENOTSUP);
508	}
509	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
510
511	return (0);
512}
513
514static void
515vmxnet3_initial_config(struct vmxnet3_softc *sc)
516{
517	int nqueue, ndesc;
518
519	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
520	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
521		nqueue = VMXNET3_DEF_TX_QUEUES;
522	if (nqueue > mp_ncpus)
523		nqueue = mp_ncpus;
524	sc->vmx_max_ntxqueues = nqueue;
525
526	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
527	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
528		nqueue = VMXNET3_DEF_RX_QUEUES;
529	if (nqueue > mp_ncpus)
530		nqueue = mp_ncpus;
531	sc->vmx_max_nrxqueues = nqueue;
532
533	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
534		sc->vmx_max_nrxqueues = 1;
535		sc->vmx_max_ntxqueues = 1;
536	}
537
538	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
539	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
540		ndesc = VMXNET3_DEF_TX_NDESC;
541	if (ndesc & VMXNET3_MASK_TX_NDESC)
542		ndesc &= ~VMXNET3_MASK_TX_NDESC;
543	sc->vmx_ntxdescs = ndesc;
544
545	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
546	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
547		ndesc = VMXNET3_DEF_RX_NDESC;
548	if (ndesc & VMXNET3_MASK_RX_NDESC)
549		ndesc &= ~VMXNET3_MASK_RX_NDESC;
550	sc->vmx_nrxdescs = ndesc;
551	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
552}
553
554static void
555vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
556{
557
558	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
559		goto out;
560
561	/* BMV: Just use the maximum configured for now. */
562	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
563	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
564
565	if (sc->vmx_nrxqueues > 1)
566		sc->vmx_flags |= VMXNET3_FLAG_RSS;
567
568	return;
569
570out:
571	sc->vmx_ntxqueues = 1;
572	sc->vmx_nrxqueues = 1;
573}
574
575static int
576vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
577{
578	device_t dev;
579	int nmsix, cnt, required;
580
581	dev = sc->vmx_dev;
582
583	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
584		return (1);
585
586	/* Allocate an additional vector for the events interrupt. */
587	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
588
589	nmsix = pci_msix_count(dev);
590	if (nmsix < required)
591		return (1);
592
593	cnt = required;
594	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
595		sc->vmx_nintrs = required;
596		return (0);
597	} else
598		pci_release_msi(dev);
599
600	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
601
602	return (1);
603}
604
605static int
606vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
607{
608	device_t dev;
609	int nmsi, cnt, required;
610
611	dev = sc->vmx_dev;
612	required = 1;
613
614	nmsi = pci_msi_count(dev);
615	if (nmsi < required)
616		return (1);
617
618	cnt = required;
619	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
620		sc->vmx_nintrs = 1;
621		return (0);
622	} else
623		pci_release_msi(dev);
624
625	return (1);
626}
627
628static int
629vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
630{
631
632	sc->vmx_nintrs = 1;
633	return (0);
634}
635
636static int
637vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
638    struct vmxnet3_interrupt *intr)
639{
640	struct resource *irq;
641
642	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
643	if (irq == NULL)
644		return (ENXIO);
645
646	intr->vmxi_irq = irq;
647	intr->vmxi_rid = rid;
648
649	return (0);
650}
651
652static int
653vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
654{
655	int i, rid, flags, error;
656
657	rid = 0;
658	flags = RF_ACTIVE;
659
660	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
661		flags |= RF_SHAREABLE;
662	else
663		rid = 1;
664
665	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
666		error = vmxnet3_alloc_interrupt(sc, rid, flags,
667		    &sc->vmx_intrs[i]);
668		if (error)
669			return (error);
670	}
671
672	return (0);
673}
674
675static int
676vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
677{
678	device_t dev;
679	struct vmxnet3_txqueue *txq;
680	struct vmxnet3_rxqueue *rxq;
681	struct vmxnet3_interrupt *intr;
682	enum intr_type type;
683	int i, error;
684
685	dev = sc->vmx_dev;
686	intr = &sc->vmx_intrs[0];
687	type = INTR_TYPE_NET | INTR_MPSAFE;
688
689	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
690		txq = &sc->vmx_txq[i];
691		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
692		     vmxnet3_txq_intr, txq, &intr->vmxi_handler);
693		if (error)
694			return (error);
695		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
696		    "tq%d", i);
697		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
698	}
699
700	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
701		rxq = &sc->vmx_rxq[i];
702		error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
703		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
704		if (error)
705			return (error);
706		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
707		    "rq%d", i);
708		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
709	}
710
711	error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
712	    vmxnet3_event_intr, sc, &intr->vmxi_handler);
713	if (error)
714		return (error);
715	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
716	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
717
718	return (0);
719}
720
721static int
722vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
723{
724	struct vmxnet3_interrupt *intr;
725	int i, error;
726
727	intr = &sc->vmx_intrs[0];
728	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
729	    INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
730	    &intr->vmxi_handler);
731
732	for (i = 0; i < sc->vmx_ntxqueues; i++)
733		sc->vmx_txq[i].vxtxq_intr_idx = 0;
734	for (i = 0; i < sc->vmx_nrxqueues; i++)
735		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
736	sc->vmx_event_intr_idx = 0;
737
738	return (error);
739}
740
741static void
742vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
743{
744	struct vmxnet3_txqueue *txq;
745	struct vmxnet3_txq_shared *txs;
746	struct vmxnet3_rxqueue *rxq;
747	struct vmxnet3_rxq_shared *rxs;
748	int i;
749
750	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
751
752	for (i = 0; i < sc->vmx_ntxqueues; i++) {
753		txq = &sc->vmx_txq[i];
754		txs = txq->vxtxq_ts;
755		txs->intr_idx = txq->vxtxq_intr_idx;
756	}
757
758	for (i = 0; i < sc->vmx_nrxqueues; i++) {
759		rxq = &sc->vmx_rxq[i];
760		rxs = rxq->vxrxq_rs;
761		rxs->intr_idx = rxq->vxrxq_intr_idx;
762	}
763}
764
765static int
766vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
767{
768	int error;
769
770	error = vmxnet3_alloc_intr_resources(sc);
771	if (error)
772		return (error);
773
774	switch (sc->vmx_intr_type) {
775	case VMXNET3_IT_MSIX:
776		error = vmxnet3_setup_msix_interrupts(sc);
777		break;
778	case VMXNET3_IT_MSI:
779	case VMXNET3_IT_LEGACY:
780		error = vmxnet3_setup_legacy_interrupt(sc);
781		break;
782	default:
783		panic("%s: invalid interrupt type %d", __func__,
784		    sc->vmx_intr_type);
785	}
786
787	if (error == 0)
788		vmxnet3_set_interrupt_idx(sc);
789
790	return (error);
791}
792
793static int
794vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
795{
796	device_t dev;
797	uint32_t config;
798	int error;
799
800	dev = sc->vmx_dev;
801	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
802
803	sc->vmx_intr_type = config & 0x03;
804	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
805
806	switch (sc->vmx_intr_type) {
807	case VMXNET3_IT_AUTO:
808		sc->vmx_intr_type = VMXNET3_IT_MSIX;
809		/* FALLTHROUGH */
810	case VMXNET3_IT_MSIX:
811		error = vmxnet3_alloc_msix_interrupts(sc);
812		if (error == 0)
813			break;
814		sc->vmx_intr_type = VMXNET3_IT_MSI;
815		/* FALLTHROUGH */
816	case VMXNET3_IT_MSI:
817		error = vmxnet3_alloc_msi_interrupts(sc);
818		if (error == 0)
819			break;
820		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
821		/* FALLTHROUGH */
822	case VMXNET3_IT_LEGACY:
823		error = vmxnet3_alloc_legacy_interrupts(sc);
824		if (error == 0)
825			break;
826		/* FALLTHROUGH */
827	default:
828		sc->vmx_intr_type = -1;
829		device_printf(dev, "cannot allocate any interrupt resources\n");
830		return (ENXIO);
831	}
832
833	return (error);
834}
835
836static void
837vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
838    struct vmxnet3_interrupt *intr)
839{
840	device_t dev;
841
842	dev = sc->vmx_dev;
843
844	if (intr->vmxi_handler != NULL) {
845		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
846		intr->vmxi_handler = NULL;
847	}
848
849	if (intr->vmxi_irq != NULL) {
850		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
851		    intr->vmxi_irq);
852		intr->vmxi_irq = NULL;
853		intr->vmxi_rid = -1;
854	}
855}
856
857static void
858vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
859{
860	int i;
861
862	for (i = 0; i < sc->vmx_nintrs; i++)
863		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
864
865	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
866	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
867		pci_release_msi(sc->vmx_dev);
868}
869
870#ifndef VMXNET3_LEGACY_TX
871static int
872vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
873{
874	device_t dev;
875
876	dev = sc->vmx_dev;
877
878	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
879	    taskqueue_thread_enqueue, &sc->vmx_tq);
880	if (sc->vmx_tq == NULL)
881		return (ENOMEM);
882
883	return (0);
884}
885
886static void
887vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
888{
889	device_t dev;
890	int nthreads, error;
891
892	dev = sc->vmx_dev;
893
894	/*
895	 * The taskqueue is typically not frequently used, so a dedicated
896	 * thread for each queue is unnecessary.
897	 */
898	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
899
900	/*
901	 * Most drivers just ignore the return value - it only fails
902	 * with ENOMEM so an error is not likely. It is hard for us
903	 * to recover from an error here.
904	 */
905	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
906	    "%s taskq", device_get_nameunit(dev));
907	if (error)
908		device_printf(dev, "failed to start taskqueue: %d", error);
909}
910
911static void
912vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
913{
914	struct vmxnet3_txqueue *txq;
915	int i;
916
917	if (sc->vmx_tq != NULL) {
918		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
919			txq = &sc->vmx_txq[i];
920			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
921		}
922	}
923}
924
925static void
926vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
927{
928	if (sc->vmx_tq != NULL) {
929		taskqueue_free(sc->vmx_tq);
930		sc->vmx_tq = NULL;
931	}
932}
933#endif
934
935static int
936vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
937{
938	struct vmxnet3_rxqueue *rxq;
939	struct vmxnet3_rxring *rxr;
940	int i;
941
942	rxq = &sc->vmx_rxq[q];
943
944	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
945	    device_get_nameunit(sc->vmx_dev), q);
946	mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
947
948	rxq->vxrxq_sc = sc;
949	rxq->vxrxq_id = q;
950
951	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
952		rxr = &rxq->vxrxq_cmd_ring[i];
953		rxr->vxrxr_rid = i;
954		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
955		rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
956		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
957		if (rxr->vxrxr_rxbuf == NULL)
958			return (ENOMEM);
959
960		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
961	}
962
963	return (0);
964}
965
966static int
967vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
968{
969	struct vmxnet3_txqueue *txq;
970	struct vmxnet3_txring *txr;
971
972	txq = &sc->vmx_txq[q];
973	txr = &txq->vxtxq_cmd_ring;
974
975	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
976	    device_get_nameunit(sc->vmx_dev), q);
977	mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
978
979	txq->vxtxq_sc = sc;
980	txq->vxtxq_id = q;
981
982	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
983	txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
984	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
985	if (txr->vxtxr_txbuf == NULL)
986		return (ENOMEM);
987
988	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
989
990#ifndef VMXNET3_LEGACY_TX
991	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
992
993	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
994	    M_NOWAIT, &txq->vxtxq_mtx);
995	if (txq->vxtxq_br == NULL)
996		return (ENOMEM);
997#endif
998
999	return (0);
1000}
1001
1002static int
1003vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1004{
1005	int i, error;
1006
1007	/*
1008	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1009	 * disabled by default because its apparently broken for devices passed
1010	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1011	 * must be set to zero for MSIX. This check prevents us from allocating
1012	 * queue structures that we will not use.
1013	 */
1014	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1015		sc->vmx_max_nrxqueues = 1;
1016		sc->vmx_max_ntxqueues = 1;
1017	}
1018
1019	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1020	    sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1021	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1022	    sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1023	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1024		return (ENOMEM);
1025
1026	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1027		error = vmxnet3_init_rxq(sc, i);
1028		if (error)
1029			return (error);
1030	}
1031
1032	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1033		error = vmxnet3_init_txq(sc, i);
1034		if (error)
1035			return (error);
1036	}
1037
1038	return (0);
1039}
1040
1041static void
1042vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1043{
1044	struct vmxnet3_rxring *rxr;
1045	int i;
1046
1047	rxq->vxrxq_sc = NULL;
1048	rxq->vxrxq_id = -1;
1049
1050	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1051		rxr = &rxq->vxrxq_cmd_ring[i];
1052
1053		if (rxr->vxrxr_rxbuf != NULL) {
1054			free(rxr->vxrxr_rxbuf, M_DEVBUF);
1055			rxr->vxrxr_rxbuf = NULL;
1056		}
1057	}
1058
1059	if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1060		mtx_destroy(&rxq->vxrxq_mtx);
1061}
1062
1063static void
1064vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1065{
1066	struct vmxnet3_txring *txr;
1067
1068	txr = &txq->vxtxq_cmd_ring;
1069
1070	txq->vxtxq_sc = NULL;
1071	txq->vxtxq_id = -1;
1072
1073#ifndef VMXNET3_LEGACY_TX
1074	if (txq->vxtxq_br != NULL) {
1075		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1076		txq->vxtxq_br = NULL;
1077	}
1078#endif
1079
1080	if (txr->vxtxr_txbuf != NULL) {
1081		free(txr->vxtxr_txbuf, M_DEVBUF);
1082		txr->vxtxr_txbuf = NULL;
1083	}
1084
1085	if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1086		mtx_destroy(&txq->vxtxq_mtx);
1087}
1088
1089static void
1090vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1091{
1092	int i;
1093
1094	if (sc->vmx_rxq != NULL) {
1095		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1096			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1097		free(sc->vmx_rxq, M_DEVBUF);
1098		sc->vmx_rxq = NULL;
1099	}
1100
1101	if (sc->vmx_txq != NULL) {
1102		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1103			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1104		free(sc->vmx_txq, M_DEVBUF);
1105		sc->vmx_txq = NULL;
1106	}
1107}
1108
1109static int
1110vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1111{
1112	device_t dev;
1113	uint8_t *kva;
1114	size_t size;
1115	int i, error;
1116
1117	dev = sc->vmx_dev;
1118
1119	size = sizeof(struct vmxnet3_driver_shared);
1120	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1121	if (error) {
1122		device_printf(dev, "cannot alloc shared memory\n");
1123		return (error);
1124	}
1125	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1126
1127	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1128	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1129	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1130	if (error) {
1131		device_printf(dev, "cannot alloc queue shared memory\n");
1132		return (error);
1133	}
1134	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1135	kva = sc->vmx_qs;
1136
1137	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1138		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1139		kva += sizeof(struct vmxnet3_txq_shared);
1140	}
1141	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1142		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1143		kva += sizeof(struct vmxnet3_rxq_shared);
1144	}
1145
1146	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1147		size = sizeof(struct vmxnet3_rss_shared);
1148		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1149		if (error) {
1150			device_printf(dev, "cannot alloc rss shared memory\n");
1151			return (error);
1152		}
1153		sc->vmx_rss =
1154		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1155	}
1156
1157	return (0);
1158}
1159
1160static void
1161vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1162{
1163
1164	if (sc->vmx_rss != NULL) {
1165		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1166		sc->vmx_rss = NULL;
1167	}
1168
1169	if (sc->vmx_qs != NULL) {
1170		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1171		sc->vmx_qs = NULL;
1172	}
1173
1174	if (sc->vmx_ds != NULL) {
1175		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1176		sc->vmx_ds = NULL;
1177	}
1178}
1179
1180static int
1181vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1182{
1183	device_t dev;
1184	struct vmxnet3_txqueue *txq;
1185	struct vmxnet3_txring *txr;
1186	struct vmxnet3_comp_ring *txc;
1187	size_t descsz, compsz;
1188	int i, q, error;
1189
1190	dev = sc->vmx_dev;
1191
1192	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1193		txq = &sc->vmx_txq[q];
1194		txr = &txq->vxtxq_cmd_ring;
1195		txc = &txq->vxtxq_comp_ring;
1196
1197		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1198		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1199
1200		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1201		    1, 0,			/* alignment, boundary */
1202		    BUS_SPACE_MAXADDR,		/* lowaddr */
1203		    BUS_SPACE_MAXADDR,		/* highaddr */
1204		    NULL, NULL,			/* filter, filterarg */
1205		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1206		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1207		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1208		    0,				/* flags */
1209		    NULL, NULL,			/* lockfunc, lockarg */
1210		    &txr->vxtxr_txtag);
1211		if (error) {
1212			device_printf(dev,
1213			    "unable to create Tx buffer tag for queue %d\n", q);
1214			return (error);
1215		}
1216
1217		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1218		if (error) {
1219			device_printf(dev, "cannot alloc Tx descriptors for "
1220			    "queue %d error %d\n", q, error);
1221			return (error);
1222		}
1223		txr->vxtxr_txd =
1224		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1225
1226		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1227		if (error) {
1228			device_printf(dev, "cannot alloc Tx comp descriptors "
1229			   "for queue %d error %d\n", q, error);
1230			return (error);
1231		}
1232		txc->vxcr_u.txcd =
1233		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1234
1235		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1236			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1237			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1238			if (error) {
1239				device_printf(dev, "unable to create Tx buf "
1240				    "dmamap for queue %d idx %d\n", q, i);
1241				return (error);
1242			}
1243		}
1244	}
1245
1246	return (0);
1247}
1248
1249static void
1250vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1251{
1252	device_t dev;
1253	struct vmxnet3_txqueue *txq;
1254	struct vmxnet3_txring *txr;
1255	struct vmxnet3_comp_ring *txc;
1256	struct vmxnet3_txbuf *txb;
1257	int i, q;
1258
1259	dev = sc->vmx_dev;
1260
1261	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1262		txq = &sc->vmx_txq[q];
1263		txr = &txq->vxtxq_cmd_ring;
1264		txc = &txq->vxtxq_comp_ring;
1265
1266		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1267			txb = &txr->vxtxr_txbuf[i];
1268			if (txb->vtxb_dmamap != NULL) {
1269				bus_dmamap_destroy(txr->vxtxr_txtag,
1270				    txb->vtxb_dmamap);
1271				txb->vtxb_dmamap = NULL;
1272			}
1273		}
1274
1275		if (txc->vxcr_u.txcd != NULL) {
1276			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1277			txc->vxcr_u.txcd = NULL;
1278		}
1279
1280		if (txr->vxtxr_txd != NULL) {
1281			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1282			txr->vxtxr_txd = NULL;
1283		}
1284
1285		if (txr->vxtxr_txtag != NULL) {
1286			bus_dma_tag_destroy(txr->vxtxr_txtag);
1287			txr->vxtxr_txtag = NULL;
1288		}
1289	}
1290}
1291
1292static int
1293vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1294{
1295	device_t dev;
1296	struct vmxnet3_rxqueue *rxq;
1297	struct vmxnet3_rxring *rxr;
1298	struct vmxnet3_comp_ring *rxc;
1299	int descsz, compsz;
1300	int i, j, q, error;
1301
1302	dev = sc->vmx_dev;
1303
1304	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1305		rxq = &sc->vmx_rxq[q];
1306		rxc = &rxq->vxrxq_comp_ring;
1307		compsz = 0;
1308
1309		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1310			rxr = &rxq->vxrxq_cmd_ring[i];
1311
1312			descsz = rxr->vxrxr_ndesc *
1313			    sizeof(struct vmxnet3_rxdesc);
1314			compsz += rxr->vxrxr_ndesc *
1315			    sizeof(struct vmxnet3_rxcompdesc);
1316
1317			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1318			    1, 0,		/* alignment, boundary */
1319			    BUS_SPACE_MAXADDR,	/* lowaddr */
1320			    BUS_SPACE_MAXADDR,	/* highaddr */
1321			    NULL, NULL,		/* filter, filterarg */
1322			    MJUMPAGESIZE,	/* maxsize */
1323			    1,			/* nsegments */
1324			    MJUMPAGESIZE,	/* maxsegsize */
1325			    0,			/* flags */
1326			    NULL, NULL,		/* lockfunc, lockarg */
1327			    &rxr->vxrxr_rxtag);
1328			if (error) {
1329				device_printf(dev,
1330				    "unable to create Rx buffer tag for "
1331				    "queue %d\n", q);
1332				return (error);
1333			}
1334
1335			error = vmxnet3_dma_malloc(sc, descsz, 512,
1336			    &rxr->vxrxr_dma);
1337			if (error) {
1338				device_printf(dev, "cannot allocate Rx "
1339				    "descriptors for queue %d/%d error %d\n",
1340				    i, q, error);
1341				return (error);
1342			}
1343			rxr->vxrxr_rxd =
1344			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1345		}
1346
1347		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1348		if (error) {
1349			device_printf(dev, "cannot alloc Rx comp descriptors "
1350			    "for queue %d error %d\n", q, error);
1351			return (error);
1352		}
1353		rxc->vxcr_u.rxcd =
1354		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1355
1356		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1357			rxr = &rxq->vxrxq_cmd_ring[i];
1358
1359			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1360			    &rxr->vxrxr_spare_dmap);
1361			if (error) {
1362				device_printf(dev, "unable to create spare "
1363				    "dmamap for queue %d/%d error %d\n",
1364				    q, i, error);
1365				return (error);
1366			}
1367
1368			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1369				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1370				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1371				if (error) {
1372					device_printf(dev, "unable to create "
1373					    "dmamap for queue %d/%d slot %d "
1374					    "error %d\n",
1375					    q, i, j, error);
1376					return (error);
1377				}
1378			}
1379		}
1380	}
1381
1382	return (0);
1383}
1384
1385static void
1386vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1387{
1388	device_t dev;
1389	struct vmxnet3_rxqueue *rxq;
1390	struct vmxnet3_rxring *rxr;
1391	struct vmxnet3_comp_ring *rxc;
1392	struct vmxnet3_rxbuf *rxb;
1393	int i, j, q;
1394
1395	dev = sc->vmx_dev;
1396
1397	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1398		rxq = &sc->vmx_rxq[q];
1399		rxc = &rxq->vxrxq_comp_ring;
1400
1401		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1402			rxr = &rxq->vxrxq_cmd_ring[i];
1403
1404			if (rxr->vxrxr_spare_dmap != NULL) {
1405				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1406				    rxr->vxrxr_spare_dmap);
1407				rxr->vxrxr_spare_dmap = NULL;
1408			}
1409
1410			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1411				rxb = &rxr->vxrxr_rxbuf[j];
1412				if (rxb->vrxb_dmamap != NULL) {
1413					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1414					    rxb->vrxb_dmamap);
1415					rxb->vrxb_dmamap = NULL;
1416				}
1417			}
1418		}
1419
1420		if (rxc->vxcr_u.rxcd != NULL) {
1421			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1422			rxc->vxcr_u.rxcd = NULL;
1423		}
1424
1425		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1426			rxr = &rxq->vxrxq_cmd_ring[i];
1427
1428			if (rxr->vxrxr_rxd != NULL) {
1429				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1430				rxr->vxrxr_rxd = NULL;
1431			}
1432
1433			if (rxr->vxrxr_rxtag != NULL) {
1434				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1435				rxr->vxrxr_rxtag = NULL;
1436			}
1437		}
1438	}
1439}
1440
1441static int
1442vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1443{
1444	int error;
1445
1446	error = vmxnet3_alloc_txq_data(sc);
1447	if (error)
1448		return (error);
1449
1450	error = vmxnet3_alloc_rxq_data(sc);
1451	if (error)
1452		return (error);
1453
1454	return (0);
1455}
1456
1457static void
1458vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1459{
1460
1461	if (sc->vmx_rxq != NULL)
1462		vmxnet3_free_rxq_data(sc);
1463
1464	if (sc->vmx_txq != NULL)
1465		vmxnet3_free_txq_data(sc);
1466}
1467
1468static int
1469vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1470{
1471	int error;
1472
1473	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1474	    32, &sc->vmx_mcast_dma);
1475	if (error)
1476		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1477	else
1478		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1479
1480	return (error);
1481}
1482
1483static void
1484vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1485{
1486
1487	if (sc->vmx_mcast != NULL) {
1488		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1489		sc->vmx_mcast = NULL;
1490	}
1491}
1492
1493static void
1494vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1495{
1496	struct vmxnet3_driver_shared *ds;
1497	struct vmxnet3_txqueue *txq;
1498	struct vmxnet3_txq_shared *txs;
1499	struct vmxnet3_rxqueue *rxq;
1500	struct vmxnet3_rxq_shared *rxs;
1501	int i;
1502
1503	ds = sc->vmx_ds;
1504
1505	/*
1506	 * Initialize fields of the shared data that remains the same across
1507	 * reinits. Note the shared data is zero'd when allocated.
1508	 */
1509
1510	ds->magic = VMXNET3_REV1_MAGIC;
1511
1512	/* DriverInfo */
1513	ds->version = VMXNET3_DRIVER_VERSION;
1514	ds->guest = VMXNET3_GOS_FREEBSD |
1515#ifdef __LP64__
1516	    VMXNET3_GOS_64BIT;
1517#else
1518	    VMXNET3_GOS_32BIT;
1519#endif
1520	ds->vmxnet3_revision = 1;
1521	ds->upt_version = 1;
1522
1523	/* Misc. conf */
1524	ds->driver_data = vtophys(sc);
1525	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1526	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1527	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1528	ds->nrxsg_max = sc->vmx_max_rxsegs;
1529
1530	/* RSS conf */
1531	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1532		ds->rss.version = 1;
1533		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1534		ds->rss.len = sc->vmx_rss_dma.dma_size;
1535	}
1536
1537	/* Interrupt control. */
1538	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1539	ds->nintr = sc->vmx_nintrs;
1540	ds->evintr = sc->vmx_event_intr_idx;
1541	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1542
1543	for (i = 0; i < sc->vmx_nintrs; i++)
1544		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1545
1546	/* Receive filter. */
1547	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1548	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1549
1550	/* Tx queues */
1551	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1552		txq = &sc->vmx_txq[i];
1553		txs = txq->vxtxq_ts;
1554
1555		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1556		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1557		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1558		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1559		txs->driver_data = vtophys(txq);
1560		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1561	}
1562
1563	/* Rx queues */
1564	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1565		rxq = &sc->vmx_rxq[i];
1566		rxs = rxq->vxrxq_rs;
1567
1568		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1569		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1570		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1571		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1572		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1573		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1574		rxs->driver_data = vtophys(rxq);
1575		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1576	}
1577}
1578
1579static void
1580vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1581{
1582	struct ifnet *ifp;
1583
1584	ifp = sc->vmx_ifp;
1585
1586	/* Use the current MAC address. */
1587	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1588	vmxnet3_set_lladdr(sc);
1589
1590	ifp->if_hwassist = 0;
1591	if (ifp->if_capenable & IFCAP_TXCSUM)
1592		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1593	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1594		ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1595	if (ifp->if_capenable & IFCAP_TSO4)
1596		ifp->if_hwassist |= CSUM_IP_TSO;
1597	if (ifp->if_capenable & IFCAP_TSO6)
1598		ifp->if_hwassist |= CSUM_IP6_TSO;
1599}
1600
1601static void
1602vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1603{
1604	/*
1605	 * Use the same key as the Linux driver until FreeBSD can do
1606	 * RSS (presumably Toeplitz) in software.
1607	 */
1608	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1609	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1610	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1611	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1612	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1613	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1614	};
1615
1616	struct vmxnet3_driver_shared *ds;
1617	struct vmxnet3_rss_shared *rss;
1618	int i;
1619
1620	ds = sc->vmx_ds;
1621	rss = sc->vmx_rss;
1622
1623	rss->hash_type =
1624	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1625	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1626	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1627	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1628	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1629	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1630
1631	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1632		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1633}
1634
1635static void
1636vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1637{
1638	struct ifnet *ifp;
1639	struct vmxnet3_driver_shared *ds;
1640
1641	ifp = sc->vmx_ifp;
1642	ds = sc->vmx_ds;
1643
1644	ds->mtu = ifp->if_mtu;
1645	ds->ntxqueue = sc->vmx_ntxqueues;
1646	ds->nrxqueue = sc->vmx_nrxqueues;
1647
1648	ds->upt_features = 0;
1649	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1650		ds->upt_features |= UPT1_F_CSUM;
1651	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1652		ds->upt_features |= UPT1_F_VLAN;
1653	if (ifp->if_capenable & IFCAP_LRO)
1654		ds->upt_features |= UPT1_F_LRO;
1655
1656	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1657		ds->upt_features |= UPT1_F_RSS;
1658		vmxnet3_reinit_rss_shared_data(sc);
1659	}
1660
1661	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1662	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1663	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1664}
1665
1666static int
1667vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1668{
1669	int error;
1670
1671	error = vmxnet3_alloc_shared_data(sc);
1672	if (error)
1673		return (error);
1674
1675	error = vmxnet3_alloc_queue_data(sc);
1676	if (error)
1677		return (error);
1678
1679	error = vmxnet3_alloc_mcast_table(sc);
1680	if (error)
1681		return (error);
1682
1683	vmxnet3_init_shared_data(sc);
1684
1685	return (0);
1686}
1687
1688static void
1689vmxnet3_free_data(struct vmxnet3_softc *sc)
1690{
1691
1692	vmxnet3_free_mcast_table(sc);
1693	vmxnet3_free_queue_data(sc);
1694	vmxnet3_free_shared_data(sc);
1695}
1696
1697static int
1698vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1699{
1700	device_t dev;
1701	struct ifnet *ifp;
1702
1703	dev = sc->vmx_dev;
1704
1705	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1706	if (ifp == NULL) {
1707		device_printf(dev, "cannot allocate ifnet structure\n");
1708		return (ENOSPC);
1709	}
1710
1711	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1712#if __FreeBSD_version < 1000025
1713	ifp->if_baudrate = 1000000000;
1714#elif __FreeBSD_version < 1100011
1715	if_initbaudrate(ifp, IF_Gbps(10));
1716#else
1717	ifp->if_baudrate = IF_Gbps(10);
1718#endif
1719	ifp->if_softc = sc;
1720	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1721	ifp->if_init = vmxnet3_init;
1722	ifp->if_ioctl = vmxnet3_ioctl;
1723	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1724	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1725	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1726
1727#ifdef VMXNET3_LEGACY_TX
1728	ifp->if_start = vmxnet3_start;
1729	ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1730	IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1731	IFQ_SET_READY(&ifp->if_snd);
1732#else
1733	ifp->if_transmit = vmxnet3_txq_mq_start;
1734	ifp->if_qflush = vmxnet3_qflush;
1735#endif
1736
1737	vmxnet3_get_lladdr(sc);
1738	ether_ifattach(ifp, sc->vmx_lladdr);
1739
1740	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1741	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1742	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1743	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1744	    IFCAP_VLAN_HWCSUM;
1745	ifp->if_capenable = ifp->if_capabilities;
1746
1747	/* These capabilities are not enabled by default. */
1748	ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1749
1750	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1751	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1752	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1753	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1754
1755	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1756	    vmxnet3_media_status);
1757	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1758	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1759
1760	return (0);
1761}
1762
1763static void
1764vmxnet3_evintr(struct vmxnet3_softc *sc)
1765{
1766	device_t dev;
1767	struct ifnet *ifp;
1768	struct vmxnet3_txq_shared *ts;
1769	struct vmxnet3_rxq_shared *rs;
1770	uint32_t event;
1771	int reset;
1772
1773	dev = sc->vmx_dev;
1774	ifp = sc->vmx_ifp;
1775	reset = 0;
1776
1777	VMXNET3_CORE_LOCK(sc);
1778
1779	/* Clear events. */
1780	event = sc->vmx_ds->event;
1781	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1782
1783	if (event & VMXNET3_EVENT_LINK) {
1784		vmxnet3_link_status(sc);
1785		if (sc->vmx_link_active != 0)
1786			vmxnet3_tx_start_all(sc);
1787	}
1788
1789	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1790		reset = 1;
1791		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1792		ts = sc->vmx_txq[0].vxtxq_ts;
1793		if (ts->stopped != 0)
1794			device_printf(dev, "Tx queue error %#x\n", ts->error);
1795		rs = sc->vmx_rxq[0].vxrxq_rs;
1796		if (rs->stopped != 0)
1797			device_printf(dev, "Rx queue error %#x\n", rs->error);
1798		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1799	}
1800
1801	if (event & VMXNET3_EVENT_DIC)
1802		device_printf(dev, "device implementation change event\n");
1803	if (event & VMXNET3_EVENT_DEBUG)
1804		device_printf(dev, "debug event\n");
1805
1806	if (reset != 0) {
1807		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1808		vmxnet3_init_locked(sc);
1809	}
1810
1811	VMXNET3_CORE_UNLOCK(sc);
1812}
1813
1814static void
1815vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1816{
1817	struct vmxnet3_softc *sc;
1818	struct ifnet *ifp;
1819	struct vmxnet3_txring *txr;
1820	struct vmxnet3_comp_ring *txc;
1821	struct vmxnet3_txcompdesc *txcd;
1822	struct vmxnet3_txbuf *txb;
1823	struct mbuf *m;
1824	u_int sop;
1825
1826	sc = txq->vxtxq_sc;
1827	ifp = sc->vmx_ifp;
1828	txr = &txq->vxtxq_cmd_ring;
1829	txc = &txq->vxtxq_comp_ring;
1830
1831	VMXNET3_TXQ_LOCK_ASSERT(txq);
1832
1833	for (;;) {
1834		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1835		if (txcd->gen != txc->vxcr_gen)
1836			break;
1837		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1838
1839		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1840			txc->vxcr_next = 0;
1841			txc->vxcr_gen ^= 1;
1842		}
1843
1844		sop = txr->vxtxr_next;
1845		txb = &txr->vxtxr_txbuf[sop];
1846
1847		if ((m = txb->vtxb_m) != NULL) {
1848			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1849			    BUS_DMASYNC_POSTWRITE);
1850			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1851
1852			txq->vxtxq_stats.vmtxs_opackets++;
1853			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1854			if (m->m_flags & M_MCAST)
1855				txq->vxtxq_stats.vmtxs_omcasts++;
1856
1857			m_freem(m);
1858			txb->vtxb_m = NULL;
1859		}
1860
1861		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1862	}
1863
1864	if (txr->vxtxr_head == txr->vxtxr_next)
1865		txq->vxtxq_watchdog = 0;
1866}
1867
1868static int
1869vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1870{
1871	struct ifnet *ifp;
1872	struct mbuf *m;
1873	struct vmxnet3_rxdesc *rxd;
1874	struct vmxnet3_rxbuf *rxb;
1875	bus_dma_tag_t tag;
1876	bus_dmamap_t dmap;
1877	bus_dma_segment_t segs[1];
1878	int idx, clsize, btype, flags, nsegs, error;
1879
1880	ifp = sc->vmx_ifp;
1881	tag = rxr->vxrxr_rxtag;
1882	dmap = rxr->vxrxr_spare_dmap;
1883	idx = rxr->vxrxr_fill;
1884	rxd = &rxr->vxrxr_rxd[idx];
1885	rxb = &rxr->vxrxr_rxbuf[idx];
1886
1887#ifdef VMXNET3_FAILPOINTS
1888	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1889	if (rxr->vxrxr_rid != 0)
1890		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1891#endif
1892
1893	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1894		flags = M_PKTHDR;
1895		clsize = MCLBYTES;
1896		btype = VMXNET3_BTYPE_HEAD;
1897	} else {
1898#if __FreeBSD_version < 902001
1899		/*
1900		 * These mbufs will never be used for the start of a frame.
1901		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1902		 * required the mbuf to always be a packet header. Avoid
1903		 * unnecessary mbuf initialization in newer versions where
1904		 * that is not the case.
1905		 */
1906		flags = M_PKTHDR;
1907#else
1908		flags = 0;
1909#endif
1910		clsize = MJUMPAGESIZE;
1911		btype = VMXNET3_BTYPE_BODY;
1912	}
1913
1914	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1915	if (m == NULL) {
1916		sc->vmx_stats.vmst_mgetcl_failed++;
1917		return (ENOBUFS);
1918	}
1919
1920	if (btype == VMXNET3_BTYPE_HEAD) {
1921		m->m_len = m->m_pkthdr.len = clsize;
1922		m_adj(m, ETHER_ALIGN);
1923	} else
1924		m->m_len = clsize;
1925
1926	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1927	    BUS_DMA_NOWAIT);
1928	if (error) {
1929		m_freem(m);
1930		sc->vmx_stats.vmst_mbuf_load_failed++;
1931		return (error);
1932	}
1933	KASSERT(nsegs == 1,
1934	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1935#if __FreeBSD_version < 902001
1936	if (btype == VMXNET3_BTYPE_BODY)
1937		m->m_flags &= ~M_PKTHDR;
1938#endif
1939
1940	if (rxb->vrxb_m != NULL) {
1941		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1942		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1943	}
1944
1945	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1946	rxb->vrxb_dmamap = dmap;
1947	rxb->vrxb_m = m;
1948
1949	rxd->addr = segs[0].ds_addr;
1950	rxd->len = segs[0].ds_len;
1951	rxd->btype = btype;
1952	rxd->gen = rxr->vxrxr_gen;
1953
1954	vmxnet3_rxr_increment_fill(rxr);
1955	return (0);
1956}
1957
1958static void
1959vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1960    struct vmxnet3_rxring *rxr, int idx)
1961{
1962	struct vmxnet3_rxdesc *rxd;
1963
1964	rxd = &rxr->vxrxr_rxd[idx];
1965	rxd->gen = rxr->vxrxr_gen;
1966	vmxnet3_rxr_increment_fill(rxr);
1967}
1968
1969static void
1970vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1971{
1972	struct vmxnet3_softc *sc;
1973	struct vmxnet3_rxring *rxr;
1974	struct vmxnet3_comp_ring *rxc;
1975	struct vmxnet3_rxcompdesc *rxcd;
1976	int idx, eof;
1977
1978	sc = rxq->vxrxq_sc;
1979	rxc = &rxq->vxrxq_comp_ring;
1980
1981	do {
1982		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1983		if (rxcd->gen != rxc->vxcr_gen)
1984			break;		/* Not expected. */
1985		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1986
1987		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1988			rxc->vxcr_next = 0;
1989			rxc->vxcr_gen ^= 1;
1990		}
1991
1992		idx = rxcd->rxd_idx;
1993		eof = rxcd->eop;
1994		if (rxcd->qid < sc->vmx_nrxqueues)
1995			rxr = &rxq->vxrxq_cmd_ring[0];
1996		else
1997			rxr = &rxq->vxrxq_cmd_ring[1];
1998		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1999	} while (!eof);
2000}
2001
2002static void
2003vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2004{
2005
2006	if (rxcd->ipv4) {
2007		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2008		if (rxcd->ipcsum_ok)
2009			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2010	}
2011
2012	if (!rxcd->fragment) {
2013		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2014			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2015			    CSUM_PSEUDO_HDR;
2016			m->m_pkthdr.csum_data = 0xFFFF;
2017		}
2018	}
2019}
2020
2021static void
2022vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2023    struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2024{
2025	struct vmxnet3_softc *sc;
2026	struct ifnet *ifp;
2027
2028	sc = rxq->vxrxq_sc;
2029	ifp = sc->vmx_ifp;
2030
2031	if (rxcd->error) {
2032		rxq->vxrxq_stats.vmrxs_ierrors++;
2033		m_freem(m);
2034		return;
2035	}
2036
2037#ifdef notyet
2038	switch (rxcd->rss_type) {
2039	case VMXNET3_RCD_RSS_TYPE_IPV4:
2040		m->m_pkthdr.flowid = rxcd->rss_hash;
2041		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2042		break;
2043	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2044		m->m_pkthdr.flowid = rxcd->rss_hash;
2045		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2046		break;
2047	case VMXNET3_RCD_RSS_TYPE_IPV6:
2048		m->m_pkthdr.flowid = rxcd->rss_hash;
2049		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2050		break;
2051	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2052		m->m_pkthdr.flowid = rxcd->rss_hash;
2053		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2054		break;
2055	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2056		m->m_pkthdr.flowid = rxq->vxrxq_id;
2057		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2058		break;
2059	}
2060#else
2061	m->m_pkthdr.flowid = rxq->vxrxq_id;
2062	m->m_flags |= M_FLOWID;
2063#endif
2064
2065	if (!rxcd->no_csum)
2066		vmxnet3_rx_csum(rxcd, m);
2067	if (rxcd->vlan) {
2068		m->m_flags |= M_VLANTAG;
2069		m->m_pkthdr.ether_vtag = rxcd->vtag;
2070	}
2071
2072	rxq->vxrxq_stats.vmrxs_ipackets++;
2073	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2074
2075	VMXNET3_RXQ_UNLOCK(rxq);
2076	(*ifp->if_input)(ifp, m);
2077	VMXNET3_RXQ_LOCK(rxq);
2078}
2079
2080static void
2081vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2082{
2083	struct vmxnet3_softc *sc;
2084	struct ifnet *ifp;
2085	struct vmxnet3_rxring *rxr;
2086	struct vmxnet3_comp_ring *rxc;
2087	struct vmxnet3_rxdesc *rxd;
2088	struct vmxnet3_rxcompdesc *rxcd;
2089	struct mbuf *m, *m_head, *m_tail;
2090	int idx, length;
2091
2092	sc = rxq->vxrxq_sc;
2093	ifp = sc->vmx_ifp;
2094	rxc = &rxq->vxrxq_comp_ring;
2095
2096	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2097
2098	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2099		return;
2100
2101	m_head = rxq->vxrxq_mhead;
2102	rxq->vxrxq_mhead = NULL;
2103	m_tail = rxq->vxrxq_mtail;
2104	rxq->vxrxq_mtail = NULL;
2105	MPASS(m_head == NULL || m_tail != NULL);
2106
2107	for (;;) {
2108		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2109		if (rxcd->gen != rxc->vxcr_gen) {
2110			rxq->vxrxq_mhead = m_head;
2111			rxq->vxrxq_mtail = m_tail;
2112			break;
2113		}
2114		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2115
2116		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2117			rxc->vxcr_next = 0;
2118			rxc->vxcr_gen ^= 1;
2119		}
2120
2121		idx = rxcd->rxd_idx;
2122		length = rxcd->len;
2123		if (rxcd->qid < sc->vmx_nrxqueues)
2124			rxr = &rxq->vxrxq_cmd_ring[0];
2125		else
2126			rxr = &rxq->vxrxq_cmd_ring[1];
2127		rxd = &rxr->vxrxr_rxd[idx];
2128
2129		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2130		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2131		    __func__, rxcd->qid, idx));
2132
2133		/*
2134		 * The host may skip descriptors. We detect this when this
2135		 * descriptor does not match the previous fill index. Catch
2136		 * up with the host now.
2137		 */
2138		if (__predict_false(rxr->vxrxr_fill != idx)) {
2139			while (rxr->vxrxr_fill != idx) {
2140				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2141				    rxr->vxrxr_gen;
2142				vmxnet3_rxr_increment_fill(rxr);
2143			}
2144		}
2145
2146		if (rxcd->sop) {
2147			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2148			    ("%s: start of frame w/o head buffer", __func__));
2149			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2150			    ("%s: start of frame not in ring 0", __func__));
2151			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2152			    ("%s: start of frame at unexcepted index %d (%d)",
2153			     __func__, idx, sc->vmx_rx_max_chain));
2154			KASSERT(m_head == NULL,
2155			    ("%s: duplicate start of frame?", __func__));
2156
2157			if (length == 0) {
2158				/* Just ignore this descriptor. */
2159				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2160				goto nextp;
2161			}
2162
2163			if (vmxnet3_newbuf(sc, rxr) != 0) {
2164				rxq->vxrxq_stats.vmrxs_iqdrops++;
2165				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2166				if (!rxcd->eop)
2167					vmxnet3_rxq_discard_chain(rxq);
2168				goto nextp;
2169			}
2170
2171			m->m_pkthdr.rcvif = ifp;
2172			m->m_pkthdr.len = m->m_len = length;
2173			m->m_pkthdr.csum_flags = 0;
2174			m_head = m_tail = m;
2175
2176		} else {
2177			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2178			    ("%s: non start of frame w/o body buffer", __func__));
2179			KASSERT(m_head != NULL,
2180			    ("%s: frame not started?", __func__));
2181
2182			if (vmxnet3_newbuf(sc, rxr) != 0) {
2183				rxq->vxrxq_stats.vmrxs_iqdrops++;
2184				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2185				if (!rxcd->eop)
2186					vmxnet3_rxq_discard_chain(rxq);
2187				m_freem(m_head);
2188				m_head = m_tail = NULL;
2189				goto nextp;
2190			}
2191
2192			m->m_len = length;
2193			m_head->m_pkthdr.len += length;
2194			m_tail->m_next = m;
2195			m_tail = m;
2196		}
2197
2198		if (rxcd->eop) {
2199			vmxnet3_rxq_input(rxq, rxcd, m_head);
2200			m_head = m_tail = NULL;
2201
2202			/* Must recheck after dropping the Rx lock. */
2203			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2204				break;
2205		}
2206
2207nextp:
2208		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2209			int qid = rxcd->qid;
2210			bus_size_t r;
2211
2212			idx = (idx + 1) % rxr->vxrxr_ndesc;
2213			if (qid >= sc->vmx_nrxqueues) {
2214				qid -= sc->vmx_nrxqueues;
2215				r = VMXNET3_BAR0_RXH2(qid);
2216			} else
2217				r = VMXNET3_BAR0_RXH1(qid);
2218			vmxnet3_write_bar0(sc, r, idx);
2219		}
2220	}
2221}
2222
2223static void
2224vmxnet3_legacy_intr(void *xsc)
2225{
2226	struct vmxnet3_softc *sc;
2227	struct vmxnet3_rxqueue *rxq;
2228	struct vmxnet3_txqueue *txq;
2229
2230	sc = xsc;
2231	rxq = &sc->vmx_rxq[0];
2232	txq = &sc->vmx_txq[0];
2233
2234	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2235		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2236			return;
2237	}
2238	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2239		vmxnet3_disable_all_intrs(sc);
2240
2241	if (sc->vmx_ds->event != 0)
2242		vmxnet3_evintr(sc);
2243
2244	VMXNET3_RXQ_LOCK(rxq);
2245	vmxnet3_rxq_eof(rxq);
2246	VMXNET3_RXQ_UNLOCK(rxq);
2247
2248	VMXNET3_TXQ_LOCK(txq);
2249	vmxnet3_txq_eof(txq);
2250	vmxnet3_txq_start(txq);
2251	VMXNET3_TXQ_UNLOCK(txq);
2252
2253	vmxnet3_enable_all_intrs(sc);
2254}
2255
2256static void
2257vmxnet3_txq_intr(void *xtxq)
2258{
2259	struct vmxnet3_softc *sc;
2260	struct vmxnet3_txqueue *txq;
2261
2262	txq = xtxq;
2263	sc = txq->vxtxq_sc;
2264
2265	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2266		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2267
2268	VMXNET3_TXQ_LOCK(txq);
2269	vmxnet3_txq_eof(txq);
2270	vmxnet3_txq_start(txq);
2271	VMXNET3_TXQ_UNLOCK(txq);
2272
2273	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2274}
2275
2276static void
2277vmxnet3_rxq_intr(void *xrxq)
2278{
2279	struct vmxnet3_softc *sc;
2280	struct vmxnet3_rxqueue *rxq;
2281
2282	rxq = xrxq;
2283	sc = rxq->vxrxq_sc;
2284
2285	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2286		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2287
2288	VMXNET3_RXQ_LOCK(rxq);
2289	vmxnet3_rxq_eof(rxq);
2290	VMXNET3_RXQ_UNLOCK(rxq);
2291
2292	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2293}
2294
2295static void
2296vmxnet3_event_intr(void *xsc)
2297{
2298	struct vmxnet3_softc *sc;
2299
2300	sc = xsc;
2301
2302	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2303		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2304
2305	if (sc->vmx_ds->event != 0)
2306		vmxnet3_evintr(sc);
2307
2308	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2309}
2310
2311static void
2312vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2313{
2314	struct vmxnet3_txring *txr;
2315	struct vmxnet3_txbuf *txb;
2316	int i;
2317
2318	txr = &txq->vxtxq_cmd_ring;
2319
2320	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2321		txb = &txr->vxtxr_txbuf[i];
2322
2323		if (txb->vtxb_m == NULL)
2324			continue;
2325
2326		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2327		    BUS_DMASYNC_POSTWRITE);
2328		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2329		m_freem(txb->vtxb_m);
2330		txb->vtxb_m = NULL;
2331	}
2332}
2333
2334static void
2335vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2336{
2337	struct vmxnet3_rxring *rxr;
2338	struct vmxnet3_rxbuf *rxb;
2339	int i, j;
2340
2341	if (rxq->vxrxq_mhead != NULL) {
2342		m_freem(rxq->vxrxq_mhead);
2343		rxq->vxrxq_mhead = NULL;
2344		rxq->vxrxq_mtail = NULL;
2345	}
2346
2347	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2348		rxr = &rxq->vxrxq_cmd_ring[i];
2349
2350		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2351			rxb = &rxr->vxrxr_rxbuf[j];
2352
2353			if (rxb->vrxb_m == NULL)
2354				continue;
2355
2356			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2357			    BUS_DMASYNC_POSTREAD);
2358			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2359			m_freem(rxb->vrxb_m);
2360			rxb->vrxb_m = NULL;
2361		}
2362	}
2363}
2364
2365static void
2366vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2367{
2368	struct vmxnet3_rxqueue *rxq;
2369	struct vmxnet3_txqueue *txq;
2370	int i;
2371
2372	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2373		rxq = &sc->vmx_rxq[i];
2374		VMXNET3_RXQ_LOCK(rxq);
2375		VMXNET3_RXQ_UNLOCK(rxq);
2376	}
2377
2378	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2379		txq = &sc->vmx_txq[i];
2380		VMXNET3_TXQ_LOCK(txq);
2381		VMXNET3_TXQ_UNLOCK(txq);
2382	}
2383}
2384
2385static void
2386vmxnet3_stop(struct vmxnet3_softc *sc)
2387{
2388	struct ifnet *ifp;
2389	int q;
2390
2391	ifp = sc->vmx_ifp;
2392	VMXNET3_CORE_LOCK_ASSERT(sc);
2393
2394	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2395	sc->vmx_link_active = 0;
2396	callout_stop(&sc->vmx_tick);
2397
2398	/* Disable interrupts. */
2399	vmxnet3_disable_all_intrs(sc);
2400	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2401
2402	vmxnet3_stop_rendezvous(sc);
2403
2404	for (q = 0; q < sc->vmx_ntxqueues; q++)
2405		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2406	for (q = 0; q < sc->vmx_nrxqueues; q++)
2407		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2408
2409	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2410}
2411
2412static void
2413vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2414{
2415	struct vmxnet3_txring *txr;
2416	struct vmxnet3_comp_ring *txc;
2417
2418	txr = &txq->vxtxq_cmd_ring;
2419	txr->vxtxr_head = 0;
2420	txr->vxtxr_next = 0;
2421	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2422	bzero(txr->vxtxr_txd,
2423	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2424
2425	txc = &txq->vxtxq_comp_ring;
2426	txc->vxcr_next = 0;
2427	txc->vxcr_gen = VMXNET3_INIT_GEN;
2428	bzero(txc->vxcr_u.txcd,
2429	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2430}
2431
2432static int
2433vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2434{
2435	struct ifnet *ifp;
2436	struct vmxnet3_rxring *rxr;
2437	struct vmxnet3_comp_ring *rxc;
2438	int i, populate, idx, frame_size, error;
2439
2440	ifp = sc->vmx_ifp;
2441	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2442	    ifp->if_mtu;
2443
2444	/*
2445	 * If the MTU causes us to exceed what a regular sized cluster can
2446	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2447	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2448	 *
2449	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2450	 * our life easier. We do not support changing the ring size after
2451	 * the attach.
2452	 */
2453	if (frame_size <= MCLBYTES)
2454		sc->vmx_rx_max_chain = 1;
2455	else
2456		sc->vmx_rx_max_chain = 2;
2457
2458	/*
2459	 * Only populate ring 1 if the configuration will take advantage
2460	 * of it. That is either when LRO is enabled or the frame size
2461	 * exceeds what ring 0 can contain.
2462	 */
2463	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2464	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2465		populate = 1;
2466	else
2467		populate = VMXNET3_RXRINGS_PERQ;
2468
2469	for (i = 0; i < populate; i++) {
2470		rxr = &rxq->vxrxq_cmd_ring[i];
2471		rxr->vxrxr_fill = 0;
2472		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2473		bzero(rxr->vxrxr_rxd,
2474		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2475
2476		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2477			error = vmxnet3_newbuf(sc, rxr);
2478			if (error)
2479				return (error);
2480		}
2481	}
2482
2483	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2484		rxr = &rxq->vxrxq_cmd_ring[i];
2485		rxr->vxrxr_fill = 0;
2486		rxr->vxrxr_gen = 0;
2487		bzero(rxr->vxrxr_rxd,
2488		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2489	}
2490
2491	rxc = &rxq->vxrxq_comp_ring;
2492	rxc->vxcr_next = 0;
2493	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2494	bzero(rxc->vxcr_u.rxcd,
2495	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2496
2497	return (0);
2498}
2499
2500static int
2501vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2502{
2503	device_t dev;
2504	int q, error;
2505
2506	dev = sc->vmx_dev;
2507
2508	for (q = 0; q < sc->vmx_ntxqueues; q++)
2509		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2510
2511	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2512		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2513		if (error) {
2514			device_printf(dev, "cannot populate Rx queue %d\n", q);
2515			return (error);
2516		}
2517	}
2518
2519	return (0);
2520}
2521
2522static int
2523vmxnet3_enable_device(struct vmxnet3_softc *sc)
2524{
2525	int q;
2526
2527	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2528		device_printf(sc->vmx_dev, "device enable command failed!\n");
2529		return (1);
2530	}
2531
2532	/* Reset the Rx queue heads. */
2533	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2534		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2535		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2536	}
2537
2538	return (0);
2539}
2540
2541static void
2542vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2543{
2544	struct ifnet *ifp;
2545
2546	ifp = sc->vmx_ifp;
2547
2548	vmxnet3_set_rxfilter(sc);
2549
2550	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2551		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2552		    sizeof(sc->vmx_ds->vlan_filter));
2553	else
2554		bzero(sc->vmx_ds->vlan_filter,
2555		    sizeof(sc->vmx_ds->vlan_filter));
2556	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2557}
2558
2559static int
2560vmxnet3_reinit(struct vmxnet3_softc *sc)
2561{
2562
2563	vmxnet3_reinit_interface(sc);
2564	vmxnet3_reinit_shared_data(sc);
2565
2566	if (vmxnet3_reinit_queues(sc) != 0)
2567		return (ENXIO);
2568
2569	if (vmxnet3_enable_device(sc) != 0)
2570		return (ENXIO);
2571
2572	vmxnet3_reinit_rxfilters(sc);
2573
2574	return (0);
2575}
2576
2577static void
2578vmxnet3_init_locked(struct vmxnet3_softc *sc)
2579{
2580	struct ifnet *ifp;
2581
2582	ifp = sc->vmx_ifp;
2583
2584	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2585		return;
2586
2587	vmxnet3_stop(sc);
2588
2589	if (vmxnet3_reinit(sc) != 0) {
2590		vmxnet3_stop(sc);
2591		return;
2592	}
2593
2594	ifp->if_drv_flags |= IFF_DRV_RUNNING;
2595	vmxnet3_link_status(sc);
2596
2597	vmxnet3_enable_all_intrs(sc);
2598	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2599}
2600
2601static void
2602vmxnet3_init(void *xsc)
2603{
2604	struct vmxnet3_softc *sc;
2605
2606	sc = xsc;
2607
2608	VMXNET3_CORE_LOCK(sc);
2609	vmxnet3_init_locked(sc);
2610	VMXNET3_CORE_UNLOCK(sc);
2611}
2612
2613/*
2614 * BMV: Much of this can go away once we finally have offsets in
2615 * the mbuf packet header. Bug andre@.
2616 */
2617static int
2618vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2619    int *etype, int *proto, int *start)
2620{
2621	struct ether_vlan_header *evh;
2622	int offset;
2623#if defined(INET)
2624	struct ip *ip = NULL;
2625	struct ip iphdr;
2626#endif
2627#if defined(INET6)
2628	struct ip6_hdr *ip6 = NULL;
2629	struct ip6_hdr ip6hdr;
2630#endif
2631
2632	evh = mtod(m, struct ether_vlan_header *);
2633	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2634		/* BMV: We should handle nested VLAN tags too. */
2635		*etype = ntohs(evh->evl_proto);
2636		offset = sizeof(struct ether_vlan_header);
2637	} else {
2638		*etype = ntohs(evh->evl_encap_proto);
2639		offset = sizeof(struct ether_header);
2640	}
2641
2642	switch (*etype) {
2643#if defined(INET)
2644	case ETHERTYPE_IP:
2645		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2646			m_copydata(m, offset, sizeof(struct ip),
2647			    (caddr_t) &iphdr);
2648			ip = &iphdr;
2649		} else
2650			ip = mtodo(m, offset);
2651		*proto = ip->ip_p;
2652		*start = offset + (ip->ip_hl << 2);
2653		break;
2654#endif
2655#if defined(INET6)
2656	case ETHERTYPE_IPV6:
2657		if (__predict_false(m->m_len <
2658		    offset + sizeof(struct ip6_hdr))) {
2659			m_copydata(m, offset, sizeof(struct ip6_hdr),
2660			    (caddr_t) &ip6hdr);
2661			ip6 = &ip6hdr;
2662		} else
2663			ip6 = mtodo(m, offset);
2664		*proto = -1;
2665		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2666		/* Assert the network stack sent us a valid packet. */
2667		KASSERT(*start > offset,
2668		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2669		    *start, offset, *proto));
2670		break;
2671#endif
2672	default:
2673		return (EINVAL);
2674	}
2675
2676	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2677		struct tcphdr *tcp, tcphdr;
2678		uint16_t sum;
2679
2680		if (__predict_false(*proto != IPPROTO_TCP)) {
2681			/* Likely failed to correctly parse the mbuf. */
2682			return (EINVAL);
2683		}
2684
2685		txq->vxtxq_stats.vmtxs_tso++;
2686
2687		switch (*etype) {
2688#if defined(INET)
2689		case ETHERTYPE_IP:
2690			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2691			    htons(IPPROTO_TCP));
2692			break;
2693#endif
2694#if defined(INET6)
2695		case ETHERTYPE_IPV6:
2696			sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2697			break;
2698#endif
2699		default:
2700			sum = 0;
2701			break;
2702		}
2703
2704		if (m->m_len < *start + sizeof(struct tcphdr)) {
2705			m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2706			    sizeof(uint16_t), (caddr_t) &sum);
2707			m_copydata(m, *start, sizeof(struct tcphdr),
2708			    (caddr_t) &tcphdr);
2709			tcp = &tcphdr;
2710		} else {
2711			tcp = mtodo(m, *start);
2712			tcp->th_sum = sum;
2713		}
2714
2715		/*
2716		 * For TSO, the size of the protocol header is also
2717		 * included in the descriptor header size.
2718		 */
2719		*start += (tcp->th_off << 2);
2720	} else
2721		txq->vxtxq_stats.vmtxs_csum++;
2722
2723	return (0);
2724}
2725
2726static int
2727vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2728    bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2729{
2730	struct vmxnet3_txring *txr;
2731	struct mbuf *m;
2732	bus_dma_tag_t tag;
2733	int error;
2734
2735	txr = &txq->vxtxq_cmd_ring;
2736	m = *m0;
2737	tag = txr->vxtxr_txtag;
2738
2739	error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2740	if (error == 0 || error != EFBIG)
2741		return (error);
2742
2743	m = m_defrag(m, M_NOWAIT);
2744	if (m != NULL) {
2745		*m0 = m;
2746		error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2747	} else
2748		error = ENOBUFS;
2749
2750	if (error) {
2751		m_freem(*m0);
2752		*m0 = NULL;
2753		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2754	} else
2755		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2756
2757	return (error);
2758}
2759
2760static void
2761vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2762{
2763	struct vmxnet3_txring *txr;
2764
2765	txr = &txq->vxtxq_cmd_ring;
2766	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2767}
2768
2769static int
2770vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2771{
2772	struct vmxnet3_softc *sc;
2773	struct vmxnet3_txring *txr;
2774	struct vmxnet3_txdesc *txd, *sop;
2775	struct mbuf *m;
2776	bus_dmamap_t dmap;
2777	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2778	int i, gen, nsegs, etype, proto, start, error;
2779
2780	sc = txq->vxtxq_sc;
2781	start = 0;
2782	txd = NULL;
2783	txr = &txq->vxtxq_cmd_ring;
2784	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2785
2786	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2787	if (error)
2788		return (error);
2789
2790	m = *m0;
2791	M_ASSERTPKTHDR(m);
2792	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2793	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2794
2795	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2796		txq->vxtxq_stats.vmtxs_full++;
2797		vmxnet3_txq_unload_mbuf(txq, dmap);
2798		return (ENOSPC);
2799	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2800		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2801		if (error) {
2802			txq->vxtxq_stats.vmtxs_offload_failed++;
2803			vmxnet3_txq_unload_mbuf(txq, dmap);
2804			m_freem(m);
2805			*m0 = NULL;
2806			return (error);
2807		}
2808	}
2809
2810	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2811	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2812	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2813
2814	for (i = 0; i < nsegs; i++) {
2815		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2816
2817		txd->addr = segs[i].ds_addr;
2818		txd->len = segs[i].ds_len;
2819		txd->gen = gen;
2820		txd->dtype = 0;
2821		txd->offload_mode = VMXNET3_OM_NONE;
2822		txd->offload_pos = 0;
2823		txd->hlen = 0;
2824		txd->eop = 0;
2825		txd->compreq = 0;
2826		txd->vtag_mode = 0;
2827		txd->vtag = 0;
2828
2829		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2830			txr->vxtxr_head = 0;
2831			txr->vxtxr_gen ^= 1;
2832		}
2833		gen = txr->vxtxr_gen;
2834	}
2835	txd->eop = 1;
2836	txd->compreq = 1;
2837
2838	if (m->m_flags & M_VLANTAG) {
2839		sop->vtag_mode = 1;
2840		sop->vtag = m->m_pkthdr.ether_vtag;
2841	}
2842
2843	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2844		sop->offload_mode = VMXNET3_OM_TSO;
2845		sop->hlen = start;
2846		sop->offload_pos = m->m_pkthdr.tso_segsz;
2847	} else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2848	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
2849		sop->offload_mode = VMXNET3_OM_CSUM;
2850		sop->hlen = start;
2851		sop->offload_pos = start + m->m_pkthdr.csum_data;
2852	}
2853
2854	/* Finally, change the ownership. */
2855	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2856	sop->gen ^= 1;
2857
2858	txq->vxtxq_ts->npending += nsegs;
2859	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2860		txq->vxtxq_ts->npending = 0;
2861		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2862		    txr->vxtxr_head);
2863	}
2864
2865	return (0);
2866}
2867
2868#ifdef VMXNET3_LEGACY_TX
2869
2870static void
2871vmxnet3_start_locked(struct ifnet *ifp)
2872{
2873	struct vmxnet3_softc *sc;
2874	struct vmxnet3_txqueue *txq;
2875	struct vmxnet3_txring *txr;
2876	struct mbuf *m_head;
2877	int tx, avail;
2878
2879	sc = ifp->if_softc;
2880	txq = &sc->vmx_txq[0];
2881	txr = &txq->vxtxq_cmd_ring;
2882	tx = 0;
2883
2884	VMXNET3_TXQ_LOCK_ASSERT(txq);
2885
2886	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2887	    sc->vmx_link_active == 0)
2888		return;
2889
2890	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2891		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2892			break;
2893
2894		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2895		if (m_head == NULL)
2896			break;
2897
2898		/* Assume worse case if this mbuf is the head of a chain. */
2899		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2900			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2901			break;
2902		}
2903
2904		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2905			if (m_head != NULL)
2906				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2907			break;
2908		}
2909
2910		tx++;
2911		ETHER_BPF_MTAP(ifp, m_head);
2912	}
2913
2914	if (tx > 0)
2915		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2916}
2917
2918static void
2919vmxnet3_start(struct ifnet *ifp)
2920{
2921	struct vmxnet3_softc *sc;
2922	struct vmxnet3_txqueue *txq;
2923
2924	sc = ifp->if_softc;
2925	txq = &sc->vmx_txq[0];
2926
2927	VMXNET3_TXQ_LOCK(txq);
2928	vmxnet3_start_locked(ifp);
2929	VMXNET3_TXQ_UNLOCK(txq);
2930}
2931
2932#else /* !VMXNET3_LEGACY_TX */
2933
2934static int
2935vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2936{
2937	struct vmxnet3_softc *sc;
2938	struct vmxnet3_txring *txr;
2939	struct buf_ring *br;
2940	struct ifnet *ifp;
2941	int tx, avail, error;
2942
2943	sc = txq->vxtxq_sc;
2944	br = txq->vxtxq_br;
2945	ifp = sc->vmx_ifp;
2946	txr = &txq->vxtxq_cmd_ring;
2947	tx = 0;
2948	error = 0;
2949
2950	VMXNET3_TXQ_LOCK_ASSERT(txq);
2951
2952	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2953	    sc->vmx_link_active == 0) {
2954		if (m != NULL)
2955			error = drbr_enqueue(ifp, br, m);
2956		return (error);
2957	}
2958
2959	if (m != NULL) {
2960		error = drbr_enqueue(ifp, br, m);
2961		if (error)
2962			return (error);
2963	}
2964
2965	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2966		m = drbr_peek(ifp, br);
2967		if (m == NULL)
2968			break;
2969
2970		/* Assume worse case if this mbuf is the head of a chain. */
2971		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2972			drbr_putback(ifp, br, m);
2973			break;
2974		}
2975
2976		if (vmxnet3_txq_encap(txq, &m) != 0) {
2977			if (m != NULL)
2978				drbr_putback(ifp, br, m);
2979			else
2980				drbr_advance(ifp, br);
2981			break;
2982		}
2983		drbr_advance(ifp, br);
2984
2985		tx++;
2986		ETHER_BPF_MTAP(ifp, m);
2987	}
2988
2989	if (tx > 0)
2990		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2991
2992	return (0);
2993}
2994
2995static int
2996vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2997{
2998	struct vmxnet3_softc *sc;
2999	struct vmxnet3_txqueue *txq;
3000	int i, ntxq, error;
3001
3002	sc = ifp->if_softc;
3003	ntxq = sc->vmx_ntxqueues;
3004
3005	if (m->m_flags & M_FLOWID)
3006		i = m->m_pkthdr.flowid % ntxq;
3007	else
3008		i = curcpu % ntxq;
3009
3010	txq = &sc->vmx_txq[i];
3011
3012	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3013		error = vmxnet3_txq_mq_start_locked(txq, m);
3014		VMXNET3_TXQ_UNLOCK(txq);
3015	} else {
3016		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3017		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3018	}
3019
3020	return (error);
3021}
3022
3023static void
3024vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3025{
3026	struct vmxnet3_softc *sc;
3027	struct vmxnet3_txqueue *txq;
3028
3029	txq = xtxq;
3030	sc = txq->vxtxq_sc;
3031
3032	VMXNET3_TXQ_LOCK(txq);
3033	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3034		vmxnet3_txq_mq_start_locked(txq, NULL);
3035	VMXNET3_TXQ_UNLOCK(txq);
3036}
3037
3038#endif /* VMXNET3_LEGACY_TX */
3039
3040static void
3041vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3042{
3043	struct vmxnet3_softc *sc;
3044	struct ifnet *ifp;
3045
3046	sc = txq->vxtxq_sc;
3047	ifp = sc->vmx_ifp;
3048
3049#ifdef VMXNET3_LEGACY_TX
3050	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3051		vmxnet3_start_locked(ifp);
3052#else
3053	if (!drbr_empty(ifp, txq->vxtxq_br))
3054		vmxnet3_txq_mq_start_locked(txq, NULL);
3055#endif
3056}
3057
3058static void
3059vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3060{
3061	struct vmxnet3_txqueue *txq;
3062	int i;
3063
3064	VMXNET3_CORE_LOCK_ASSERT(sc);
3065
3066	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3067		txq = &sc->vmx_txq[i];
3068
3069		VMXNET3_TXQ_LOCK(txq);
3070		vmxnet3_txq_start(txq);
3071		VMXNET3_TXQ_UNLOCK(txq);
3072	}
3073}
3074
3075static void
3076vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3077{
3078	struct ifnet *ifp;
3079	int idx, bit;
3080
3081	ifp = sc->vmx_ifp;
3082	idx = (tag >> 5) & 0x7F;
3083	bit = tag & 0x1F;
3084
3085	if (tag == 0 || tag > 4095)
3086		return;
3087
3088	VMXNET3_CORE_LOCK(sc);
3089
3090	/* Update our private VLAN bitvector. */
3091	if (add)
3092		sc->vmx_vlan_filter[idx] |= (1 << bit);
3093	else
3094		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3095
3096	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3097		if (add)
3098			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3099		else
3100			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3101		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3102	}
3103
3104	VMXNET3_CORE_UNLOCK(sc);
3105}
3106
3107static void
3108vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3109{
3110
3111	if (ifp->if_softc == arg)
3112		vmxnet3_update_vlan_filter(arg, 1, tag);
3113}
3114
3115static void
3116vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3117{
3118
3119	if (ifp->if_softc == arg)
3120		vmxnet3_update_vlan_filter(arg, 0, tag);
3121}
3122
3123static void
3124vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3125{
3126	struct ifnet *ifp;
3127	struct vmxnet3_driver_shared *ds;
3128	struct ifmultiaddr *ifma;
3129	u_int mode;
3130
3131	ifp = sc->vmx_ifp;
3132	ds = sc->vmx_ds;
3133
3134	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3135	if (ifp->if_flags & IFF_PROMISC)
3136		mode |= VMXNET3_RXMODE_PROMISC;
3137	if (ifp->if_flags & IFF_ALLMULTI)
3138		mode |= VMXNET3_RXMODE_ALLMULTI;
3139	else {
3140		int cnt = 0, overflow = 0;
3141
3142		if_maddr_rlock(ifp);
3143		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3144			if (ifma->ifma_addr->sa_family != AF_LINK)
3145				continue;
3146			else if (cnt == VMXNET3_MULTICAST_MAX) {
3147				overflow = 1;
3148				break;
3149			}
3150
3151			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3152			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3153			cnt++;
3154		}
3155		if_maddr_runlock(ifp);
3156
3157		if (overflow != 0) {
3158			cnt = 0;
3159			mode |= VMXNET3_RXMODE_ALLMULTI;
3160		} else if (cnt > 0)
3161			mode |= VMXNET3_RXMODE_MCAST;
3162		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3163	}
3164
3165	ds->rxmode = mode;
3166
3167	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3168	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3169}
3170
3171static int
3172vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3173{
3174	struct ifnet *ifp;
3175
3176	ifp = sc->vmx_ifp;
3177
3178	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3179		return (EINVAL);
3180
3181	ifp->if_mtu = mtu;
3182
3183	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3184		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3185		vmxnet3_init_locked(sc);
3186	}
3187
3188	return (0);
3189}
3190
3191static int
3192vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3193{
3194	struct vmxnet3_softc *sc;
3195	struct ifreq *ifr;
3196	int reinit, mask, error;
3197
3198	sc = ifp->if_softc;
3199	ifr = (struct ifreq *) data;
3200	error = 0;
3201
3202	switch (cmd) {
3203	case SIOCSIFMTU:
3204		if (ifp->if_mtu != ifr->ifr_mtu) {
3205			VMXNET3_CORE_LOCK(sc);
3206			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3207			VMXNET3_CORE_UNLOCK(sc);
3208		}
3209		break;
3210
3211	case SIOCSIFFLAGS:
3212		VMXNET3_CORE_LOCK(sc);
3213		if (ifp->if_flags & IFF_UP) {
3214			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3215				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3216				    (IFF_PROMISC | IFF_ALLMULTI)) {
3217					vmxnet3_set_rxfilter(sc);
3218				}
3219			} else
3220				vmxnet3_init_locked(sc);
3221		} else {
3222			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3223				vmxnet3_stop(sc);
3224		}
3225		sc->vmx_if_flags = ifp->if_flags;
3226		VMXNET3_CORE_UNLOCK(sc);
3227		break;
3228
3229	case SIOCADDMULTI:
3230	case SIOCDELMULTI:
3231		VMXNET3_CORE_LOCK(sc);
3232		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3233			vmxnet3_set_rxfilter(sc);
3234		VMXNET3_CORE_UNLOCK(sc);
3235		break;
3236
3237	case SIOCSIFMEDIA:
3238	case SIOCGIFMEDIA:
3239		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3240		break;
3241
3242	case SIOCSIFCAP:
3243		VMXNET3_CORE_LOCK(sc);
3244		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3245
3246		if (mask & IFCAP_TXCSUM)
3247			ifp->if_capenable ^= IFCAP_TXCSUM;
3248		if (mask & IFCAP_TXCSUM_IPV6)
3249			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3250		if (mask & IFCAP_TSO4)
3251			ifp->if_capenable ^= IFCAP_TSO4;
3252		if (mask & IFCAP_TSO6)
3253			ifp->if_capenable ^= IFCAP_TSO6;
3254
3255		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3256		    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3257			/* Changing these features requires us to reinit. */
3258			reinit = 1;
3259
3260			if (mask & IFCAP_RXCSUM)
3261				ifp->if_capenable ^= IFCAP_RXCSUM;
3262			if (mask & IFCAP_RXCSUM_IPV6)
3263				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3264			if (mask & IFCAP_LRO)
3265				ifp->if_capenable ^= IFCAP_LRO;
3266			if (mask & IFCAP_VLAN_HWTAGGING)
3267				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3268			if (mask & IFCAP_VLAN_HWFILTER)
3269				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3270		} else
3271			reinit = 0;
3272
3273		if (mask & IFCAP_VLAN_HWTSO)
3274			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3275
3276		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3277			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3278			vmxnet3_init_locked(sc);
3279		}
3280
3281		VMXNET3_CORE_UNLOCK(sc);
3282		VLAN_CAPABILITIES(ifp);
3283		break;
3284
3285	default:
3286		error = ether_ioctl(ifp, cmd, data);
3287		break;
3288	}
3289
3290	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3291
3292	return (error);
3293}
3294
3295#ifndef VMXNET3_LEGACY_TX
3296static void
3297vmxnet3_qflush(struct ifnet *ifp)
3298{
3299	struct vmxnet3_softc *sc;
3300	struct vmxnet3_txqueue *txq;
3301	struct mbuf *m;
3302	int i;
3303
3304	sc = ifp->if_softc;
3305
3306	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3307		txq = &sc->vmx_txq[i];
3308
3309		VMXNET3_TXQ_LOCK(txq);
3310		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3311			m_freem(m);
3312		VMXNET3_TXQ_UNLOCK(txq);
3313	}
3314
3315	if_qflush(ifp);
3316}
3317#endif
3318
3319static int
3320vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3321{
3322	struct vmxnet3_softc *sc;
3323
3324	sc = txq->vxtxq_sc;
3325
3326	VMXNET3_TXQ_LOCK(txq);
3327	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3328		VMXNET3_TXQ_UNLOCK(txq);
3329		return (0);
3330	}
3331	VMXNET3_TXQ_UNLOCK(txq);
3332
3333	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3334	    txq->vxtxq_id);
3335	return (1);
3336}
3337
3338static void
3339vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3340{
3341
3342	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3343}
3344
3345static void
3346vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3347    struct vmxnet3_txq_stats *accum)
3348{
3349	struct vmxnet3_txq_stats *st;
3350
3351	st = &txq->vxtxq_stats;
3352
3353	accum->vmtxs_opackets += st->vmtxs_opackets;
3354	accum->vmtxs_obytes += st->vmtxs_obytes;
3355	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3356	accum->vmtxs_csum += st->vmtxs_csum;
3357	accum->vmtxs_tso += st->vmtxs_tso;
3358	accum->vmtxs_full += st->vmtxs_full;
3359	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3360}
3361
3362static void
3363vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3364    struct vmxnet3_rxq_stats *accum)
3365{
3366	struct vmxnet3_rxq_stats *st;
3367
3368	st = &rxq->vxrxq_stats;
3369
3370	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3371	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3372	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3373	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3374}
3375
3376static void
3377vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3378{
3379	struct ifnet *ifp;
3380	struct vmxnet3_statistics *st;
3381	struct vmxnet3_txq_stats txaccum;
3382	struct vmxnet3_rxq_stats rxaccum;
3383	int i;
3384
3385	ifp = sc->vmx_ifp;
3386	st = &sc->vmx_stats;
3387
3388	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3389	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3390
3391	for (i = 0; i < sc->vmx_ntxqueues; i++)
3392		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3393	for (i = 0; i < sc->vmx_nrxqueues; i++)
3394		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3395
3396	/*
3397	 * With the exception of if_ierrors, these ifnet statistics are
3398	 * only updated in the driver, so just set them to our accumulated
3399	 * values. if_ierrors is updated in ether_input() for malformed
3400	 * frames that we should have already discarded.
3401	 */
3402	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3403	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3404	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3405	ifp->if_opackets = txaccum.vmtxs_opackets;
3406#ifndef VMXNET3_LEGACY_TX
3407	ifp->if_obytes = txaccum.vmtxs_obytes;
3408	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3409#endif
3410}
3411
3412static void
3413vmxnet3_tick(void *xsc)
3414{
3415	struct vmxnet3_softc *sc;
3416	struct ifnet *ifp;
3417	int i, timedout;
3418
3419	sc = xsc;
3420	ifp = sc->vmx_ifp;
3421	timedout = 0;
3422
3423	VMXNET3_CORE_LOCK_ASSERT(sc);
3424
3425	vmxnet3_accumulate_stats(sc);
3426	vmxnet3_refresh_host_stats(sc);
3427
3428	for (i = 0; i < sc->vmx_ntxqueues; i++)
3429		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3430
3431	if (timedout != 0) {
3432		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3433		vmxnet3_init_locked(sc);
3434	} else
3435		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3436}
3437
3438static int
3439vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3440{
3441	uint32_t status;
3442
3443	/* Also update the link speed while here. */
3444	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3445	sc->vmx_link_speed = status >> 16;
3446	return !!(status & 0x1);
3447}
3448
3449static void
3450vmxnet3_link_status(struct vmxnet3_softc *sc)
3451{
3452	struct ifnet *ifp;
3453	int link;
3454
3455	ifp = sc->vmx_ifp;
3456	link = vmxnet3_link_is_up(sc);
3457
3458	if (link != 0 && sc->vmx_link_active == 0) {
3459		sc->vmx_link_active = 1;
3460		if_link_state_change(ifp, LINK_STATE_UP);
3461	} else if (link == 0 && sc->vmx_link_active != 0) {
3462		sc->vmx_link_active = 0;
3463		if_link_state_change(ifp, LINK_STATE_DOWN);
3464	}
3465}
3466
3467static void
3468vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3469{
3470	struct vmxnet3_softc *sc;
3471
3472	sc = ifp->if_softc;
3473
3474	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3475	ifmr->ifm_status = IFM_AVALID;
3476
3477	VMXNET3_CORE_LOCK(sc);
3478	if (vmxnet3_link_is_up(sc) != 0)
3479		ifmr->ifm_status |= IFM_ACTIVE;
3480	else
3481		ifmr->ifm_status |= IFM_NONE;
3482	VMXNET3_CORE_UNLOCK(sc);
3483}
3484
3485static int
3486vmxnet3_media_change(struct ifnet *ifp)
3487{
3488
3489	/* Ignore. */
3490	return (0);
3491}
3492
3493static void
3494vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3495{
3496	uint32_t ml, mh;
3497
3498	ml  = sc->vmx_lladdr[0];
3499	ml |= sc->vmx_lladdr[1] << 8;
3500	ml |= sc->vmx_lladdr[2] << 16;
3501	ml |= sc->vmx_lladdr[3] << 24;
3502	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3503
3504	mh  = sc->vmx_lladdr[4];
3505	mh |= sc->vmx_lladdr[5] << 8;
3506	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3507}
3508
3509static void
3510vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3511{
3512	uint32_t ml, mh;
3513
3514	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3515	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3516
3517	sc->vmx_lladdr[0] = ml;
3518	sc->vmx_lladdr[1] = ml >> 8;
3519	sc->vmx_lladdr[2] = ml >> 16;
3520	sc->vmx_lladdr[3] = ml >> 24;
3521	sc->vmx_lladdr[4] = mh;
3522	sc->vmx_lladdr[5] = mh >> 8;
3523}
3524
3525static void
3526vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3527    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3528{
3529	struct sysctl_oid *node, *txsnode;
3530	struct sysctl_oid_list *list, *txslist;
3531	struct vmxnet3_txq_stats *stats;
3532	struct UPT1_TxStats *txstats;
3533	char namebuf[16];
3534
3535	stats = &txq->vxtxq_stats;
3536	txstats = &txq->vxtxq_ts->stats;
3537
3538	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3539	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3540	    NULL, "Transmit Queue");
3541	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3542
3543	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3544	    &stats->vmtxs_opackets, "Transmit packets");
3545	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3546	    &stats->vmtxs_obytes, "Transmit bytes");
3547	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3548	    &stats->vmtxs_omcasts, "Transmit multicasts");
3549	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3550	    &stats->vmtxs_csum, "Transmit checksum offloaded");
3551	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3552	    &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3553	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3554	    &stats->vmtxs_full, "Transmit ring full");
3555	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3556	    &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3557
3558	/*
3559	 * Add statistics reported by the host. These are updated once
3560	 * per second.
3561	 */
3562	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3563	    NULL, "Host Statistics");
3564	txslist = SYSCTL_CHILDREN(txsnode);
3565	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3566	    &txstats->TSO_packets, "TSO packets");
3567	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3568	    &txstats->TSO_bytes, "TSO bytes");
3569	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3570	    &txstats->ucast_packets, "Unicast packets");
3571	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3572	    &txstats->ucast_bytes, "Unicast bytes");
3573	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3574	    &txstats->mcast_packets, "Multicast packets");
3575	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3576	    &txstats->mcast_bytes, "Multicast bytes");
3577	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3578	    &txstats->error, "Errors");
3579	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3580	    &txstats->discard, "Discards");
3581}
3582
3583static void
3584vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3585    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3586{
3587	struct sysctl_oid *node, *rxsnode;
3588	struct sysctl_oid_list *list, *rxslist;
3589	struct vmxnet3_rxq_stats *stats;
3590	struct UPT1_RxStats *rxstats;
3591	char namebuf[16];
3592
3593	stats = &rxq->vxrxq_stats;
3594	rxstats = &rxq->vxrxq_rs->stats;
3595
3596	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3597	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3598	    NULL, "Receive Queue");
3599	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3600
3601	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3602	    &stats->vmrxs_ipackets, "Receive packets");
3603	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3604	    &stats->vmrxs_ibytes, "Receive bytes");
3605	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3606	    &stats->vmrxs_iqdrops, "Receive drops");
3607	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3608	    &stats->vmrxs_ierrors, "Receive errors");
3609
3610	/*
3611	 * Add statistics reported by the host. These are updated once
3612	 * per second.
3613	 */
3614	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3615	    NULL, "Host Statistics");
3616	rxslist = SYSCTL_CHILDREN(rxsnode);
3617	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3618	    &rxstats->LRO_packets, "LRO packets");
3619	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3620	    &rxstats->LRO_bytes, "LRO bytes");
3621	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3622	    &rxstats->ucast_packets, "Unicast packets");
3623	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3624	    &rxstats->ucast_bytes, "Unicast bytes");
3625	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3626	    &rxstats->mcast_packets, "Multicast packets");
3627	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3628	    &rxstats->mcast_bytes, "Multicast bytes");
3629	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3630	    &rxstats->bcast_packets, "Broadcast packets");
3631	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3632	    &rxstats->bcast_bytes, "Broadcast bytes");
3633	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3634	    &rxstats->nobuffer, "No buffer");
3635	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3636	    &rxstats->error, "Errors");
3637}
3638
3639static void
3640vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3641    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3642{
3643	struct sysctl_oid *node;
3644	struct sysctl_oid_list *list;
3645	int i;
3646
3647	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3648		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3649
3650		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3651		    "debug", CTLFLAG_RD, NULL, "");
3652		list = SYSCTL_CHILDREN(node);
3653
3654		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3655		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3656		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3657		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3658		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3659		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3660		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3661		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3662		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3663		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3664		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3665		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3666		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3667		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3668	}
3669
3670	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3671		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3672
3673		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3674		    "debug", CTLFLAG_RD, NULL, "");
3675		list = SYSCTL_CHILDREN(node);
3676
3677		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3678		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3679		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3680		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3681		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3682		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3683		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3684		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3685		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3686		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3687		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3688		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3689		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3690		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3691		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3692		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3693		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3694		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3695	}
3696}
3697
3698static void
3699vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3700    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3701{
3702	int i;
3703
3704	for (i = 0; i < sc->vmx_ntxqueues; i++)
3705		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3706	for (i = 0; i < sc->vmx_nrxqueues; i++)
3707		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3708
3709	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3710}
3711
3712static void
3713vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3714{
3715	device_t dev;
3716	struct vmxnet3_statistics *stats;
3717	struct sysctl_ctx_list *ctx;
3718	struct sysctl_oid *tree;
3719	struct sysctl_oid_list *child;
3720
3721	dev = sc->vmx_dev;
3722	ctx = device_get_sysctl_ctx(dev);
3723	tree = device_get_sysctl_tree(dev);
3724	child = SYSCTL_CHILDREN(tree);
3725
3726	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3727	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3728	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3729	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3730	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3731	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3732	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3733	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3734
3735	stats = &sc->vmx_stats;
3736	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3737	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3738	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3739	    &stats->vmst_defrag_failed, 0,
3740	    "Tx mbuf dropped because defrag failed");
3741	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3742	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3743	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3744	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3745
3746	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3747}
3748
3749static void
3750vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3751{
3752
3753	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3754}
3755
3756static uint32_t
3757vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3758{
3759
3760	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3761}
3762
3763static void
3764vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3765{
3766
3767	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3768}
3769
3770static void
3771vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3772{
3773
3774	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3775}
3776
3777static uint32_t
3778vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3779{
3780
3781	vmxnet3_write_cmd(sc, cmd);
3782	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3783	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3784	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3785}
3786
3787static void
3788vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3789{
3790
3791	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3792}
3793
3794static void
3795vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3796{
3797
3798	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3799}
3800
3801static void
3802vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3803{
3804	int i;
3805
3806	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3807	for (i = 0; i < sc->vmx_nintrs; i++)
3808		vmxnet3_enable_intr(sc, i);
3809}
3810
3811static void
3812vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3813{
3814	int i;
3815
3816	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3817	for (i = 0; i < sc->vmx_nintrs; i++)
3818		vmxnet3_disable_intr(sc, i);
3819}
3820
3821static void
3822vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3823{
3824	bus_addr_t *baddr = arg;
3825
3826	if (error == 0)
3827		*baddr = segs->ds_addr;
3828}
3829
3830static int
3831vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3832    struct vmxnet3_dma_alloc *dma)
3833{
3834	device_t dev;
3835	int error;
3836
3837	dev = sc->vmx_dev;
3838	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3839
3840	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3841	    align, 0,		/* alignment, bounds */
3842	    BUS_SPACE_MAXADDR,	/* lowaddr */
3843	    BUS_SPACE_MAXADDR,	/* highaddr */
3844	    NULL, NULL,		/* filter, filterarg */
3845	    size,		/* maxsize */
3846	    1,			/* nsegments */
3847	    size,		/* maxsegsize */
3848	    BUS_DMA_ALLOCNOW,	/* flags */
3849	    NULL,		/* lockfunc */
3850	    NULL,		/* lockfuncarg */
3851	    &dma->dma_tag);
3852	if (error) {
3853		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3854		goto fail;
3855	}
3856
3857	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3858	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3859	if (error) {
3860		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3861		goto fail;
3862	}
3863
3864	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3865	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3866	if (error) {
3867		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3868		goto fail;
3869	}
3870
3871	dma->dma_size = size;
3872
3873fail:
3874	if (error)
3875		vmxnet3_dma_free(sc, dma);
3876
3877	return (error);
3878}
3879
3880static void
3881vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3882{
3883
3884	if (dma->dma_tag != NULL) {
3885		if (dma->dma_map != NULL) {
3886			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3887			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3888			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3889		}
3890
3891		if (dma->dma_vaddr != NULL) {
3892			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3893			    dma->dma_map);
3894		}
3895
3896		bus_dma_tag_destroy(dma->dma_tag);
3897	}
3898	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3899}
3900
3901static int
3902vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3903{
3904	char path[64];
3905
3906	snprintf(path, sizeof(path),
3907	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3908	TUNABLE_INT_FETCH(path, &def);
3909
3910	return (def);
3911}
3912
3913/*
3914 * Since this is a purely paravirtualized device, we do not have
3915 * to worry about DMA coherency. But at times, we must make sure
3916 * both the compiler and CPU do not reorder memory operations.
3917 */
3918static inline void
3919vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3920{
3921
3922	switch (type) {
3923	case VMXNET3_BARRIER_RD:
3924		rmb();
3925		break;
3926	case VMXNET3_BARRIER_WR:
3927		wmb();
3928		break;
3929	case VMXNET3_BARRIER_RDWR:
3930		mb();
3931		break;
3932	default:
3933		panic("%s: bad barrier type %d", __func__, type);
3934	}
3935}
3936