1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/* Driver for VirtIO network devices. */
30
31#include <sys/param.h>
32#include <sys/eventhandler.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/sockio.h>
36#include <sys/malloc.h>
37#include <sys/mbuf.h>
38#include <sys/module.h>
39#include <sys/msan.h>
40#include <sys/socket.h>
41#include <sys/sysctl.h>
42#include <sys/random.h>
43#include <sys/sglist.h>
44#include <sys/lock.h>
45#include <sys/mutex.h>
46#include <sys/taskqueue.h>
47#include <sys/smp.h>
48#include <machine/smp.h>
49
50#include <vm/uma.h>
51
52#include <net/debugnet.h>
53#include <net/ethernet.h>
54#include <net/pfil.h>
55#include <net/if.h>
56#include <net/if_var.h>
57#include <net/if_arp.h>
58#include <net/if_dl.h>
59#include <net/if_types.h>
60#include <net/if_media.h>
61#include <net/if_vlan_var.h>
62
63#include <net/bpf.h>
64
65#include <netinet/in_systm.h>
66#include <netinet/in.h>
67#include <netinet/ip.h>
68#include <netinet/ip6.h>
69#include <netinet6/ip6_var.h>
70#include <netinet/udp.h>
71#include <netinet/tcp.h>
72#include <netinet/tcp_lro.h>
73
74#include <machine/bus.h>
75#include <machine/resource.h>
76#include <sys/bus.h>
77#include <sys/rman.h>
78
79#include <dev/virtio/virtio.h>
80#include <dev/virtio/virtqueue.h>
81#include <dev/virtio/network/virtio_net.h>
82#include <dev/virtio/network/if_vtnetvar.h>
83#include "virtio_if.h"
84
85#include "opt_inet.h"
86#include "opt_inet6.h"
87
88#if defined(INET) || defined(INET6)
89#include <machine/in_cksum.h>
90#endif
91
92#ifdef __NO_STRICT_ALIGNMENT
93#define VTNET_ETHER_ALIGN 0
94#else /* Strict alignment */
95#define VTNET_ETHER_ALIGN ETHER_ALIGN
96#endif
97
98static int	vtnet_modevent(module_t, int, void *);
99
100static int	vtnet_probe(device_t);
101static int	vtnet_attach(device_t);
102static int	vtnet_detach(device_t);
103static int	vtnet_suspend(device_t);
104static int	vtnet_resume(device_t);
105static int	vtnet_shutdown(device_t);
106static int	vtnet_attach_completed(device_t);
107static int	vtnet_config_change(device_t);
108
109static int	vtnet_negotiate_features(struct vtnet_softc *);
110static int	vtnet_setup_features(struct vtnet_softc *);
111static int	vtnet_init_rxq(struct vtnet_softc *, int);
112static int	vtnet_init_txq(struct vtnet_softc *, int);
113static int	vtnet_alloc_rxtx_queues(struct vtnet_softc *);
114static void	vtnet_free_rxtx_queues(struct vtnet_softc *);
115static int	vtnet_alloc_rx_filters(struct vtnet_softc *);
116static void	vtnet_free_rx_filters(struct vtnet_softc *);
117static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
118static int	vtnet_alloc_interface(struct vtnet_softc *);
119static int	vtnet_setup_interface(struct vtnet_softc *);
120static int	vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
121static int	vtnet_ioctl_ifflags(struct vtnet_softc *);
122static int	vtnet_ioctl_multi(struct vtnet_softc *);
123static int	vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
124static int	vtnet_ioctl(if_t, u_long, caddr_t);
125static uint64_t	vtnet_get_counter(if_t, ift_counter);
126
127static int	vtnet_rxq_populate(struct vtnet_rxq *);
128static void	vtnet_rxq_free_mbufs(struct vtnet_rxq *);
129static struct mbuf *
130		vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
131static int	vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
132		    struct mbuf *, int);
133static int	vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
134static int	vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
135static int	vtnet_rxq_new_buf(struct vtnet_rxq *);
136static int	vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
137		     uint16_t, int, struct virtio_net_hdr *);
138static int	vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
139		     uint16_t, int, struct virtio_net_hdr *);
140static int	vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
141		     struct virtio_net_hdr *);
142static void	vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
143static void	vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
144static int	vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
145static void	vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
146		    struct virtio_net_hdr *);
147static int	vtnet_rxq_eof(struct vtnet_rxq *);
148static void	vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
149static void	vtnet_rx_vq_intr(void *);
150static void	vtnet_rxq_tq_intr(void *, int);
151
152static int	vtnet_txq_intr_threshold(struct vtnet_txq *);
153static int	vtnet_txq_below_threshold(struct vtnet_txq *);
154static int	vtnet_txq_notify(struct vtnet_txq *);
155static void	vtnet_txq_free_mbufs(struct vtnet_txq *);
156static int	vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
157		    int *, int *, int *);
158static int	vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
159		    int, struct virtio_net_hdr *);
160static struct mbuf *
161		vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
162		    struct virtio_net_hdr *);
163static int	vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
164		    struct vtnet_tx_header *);
165static int	vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
166#ifdef VTNET_LEGACY_TX
167static void	vtnet_start_locked(struct vtnet_txq *, if_t);
168static void	vtnet_start(if_t);
169#else
170static int	vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
171static int	vtnet_txq_mq_start(if_t, struct mbuf *);
172static void	vtnet_txq_tq_deferred(void *, int);
173#endif
174static void	vtnet_txq_start(struct vtnet_txq *);
175static void	vtnet_txq_tq_intr(void *, int);
176static int	vtnet_txq_eof(struct vtnet_txq *);
177static void	vtnet_tx_vq_intr(void *);
178static void	vtnet_tx_start_all(struct vtnet_softc *);
179
180#ifndef VTNET_LEGACY_TX
181static void	vtnet_qflush(if_t);
182#endif
183
184static int	vtnet_watchdog(struct vtnet_txq *);
185static void	vtnet_accum_stats(struct vtnet_softc *,
186		    struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
187static void	vtnet_tick(void *);
188
189static void	vtnet_start_taskqueues(struct vtnet_softc *);
190static void	vtnet_free_taskqueues(struct vtnet_softc *);
191static void	vtnet_drain_taskqueues(struct vtnet_softc *);
192
193static void	vtnet_drain_rxtx_queues(struct vtnet_softc *);
194static void	vtnet_stop_rendezvous(struct vtnet_softc *);
195static void	vtnet_stop(struct vtnet_softc *);
196static int	vtnet_virtio_reinit(struct vtnet_softc *);
197static void	vtnet_init_rx_filters(struct vtnet_softc *);
198static int	vtnet_init_rx_queues(struct vtnet_softc *);
199static int	vtnet_init_tx_queues(struct vtnet_softc *);
200static int	vtnet_init_rxtx_queues(struct vtnet_softc *);
201static void	vtnet_set_active_vq_pairs(struct vtnet_softc *);
202static void	vtnet_update_rx_offloads(struct vtnet_softc *);
203static int	vtnet_reinit(struct vtnet_softc *);
204static void	vtnet_init_locked(struct vtnet_softc *, int);
205static void	vtnet_init(void *);
206
207static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
208static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
209		    struct sglist *, int, int);
210static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
211static int	vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
212static int	vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
213static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
214static int	vtnet_set_promisc(struct vtnet_softc *, bool);
215static int	vtnet_set_allmulti(struct vtnet_softc *, bool);
216static void	vtnet_rx_filter(struct vtnet_softc *);
217static void	vtnet_rx_filter_mac(struct vtnet_softc *);
218static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
219static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
220static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
221static void	vtnet_register_vlan(void *, if_t, uint16_t);
222static void	vtnet_unregister_vlan(void *, if_t, uint16_t);
223
224static void	vtnet_update_speed_duplex(struct vtnet_softc *);
225static int	vtnet_is_link_up(struct vtnet_softc *);
226static void	vtnet_update_link_status(struct vtnet_softc *);
227static int	vtnet_ifmedia_upd(if_t);
228static void	vtnet_ifmedia_sts(if_t, struct ifmediareq *);
229static void	vtnet_get_macaddr(struct vtnet_softc *);
230static void	vtnet_set_macaddr(struct vtnet_softc *);
231static void	vtnet_attached_set_macaddr(struct vtnet_softc *);
232static void	vtnet_vlan_tag_remove(struct mbuf *);
233static void	vtnet_set_rx_process_limit(struct vtnet_softc *);
234
235static void	vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
236		    struct sysctl_oid_list *, struct vtnet_rxq *);
237static void	vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
238		    struct sysctl_oid_list *, struct vtnet_txq *);
239static void	vtnet_setup_queue_sysctl(struct vtnet_softc *);
240static void	vtnet_load_tunables(struct vtnet_softc *);
241static void	vtnet_setup_sysctl(struct vtnet_softc *);
242
243static int	vtnet_rxq_enable_intr(struct vtnet_rxq *);
244static void	vtnet_rxq_disable_intr(struct vtnet_rxq *);
245static int	vtnet_txq_enable_intr(struct vtnet_txq *);
246static void	vtnet_txq_disable_intr(struct vtnet_txq *);
247static void	vtnet_enable_rx_interrupts(struct vtnet_softc *);
248static void	vtnet_enable_tx_interrupts(struct vtnet_softc *);
249static void	vtnet_enable_interrupts(struct vtnet_softc *);
250static void	vtnet_disable_rx_interrupts(struct vtnet_softc *);
251static void	vtnet_disable_tx_interrupts(struct vtnet_softc *);
252static void	vtnet_disable_interrupts(struct vtnet_softc *);
253
254static int	vtnet_tunable_int(struct vtnet_softc *, const char *, int);
255
256DEBUGNET_DEFINE(vtnet);
257
258#define vtnet_htog16(_sc, _val)	virtio_htog16(vtnet_modern(_sc), _val)
259#define vtnet_htog32(_sc, _val)	virtio_htog32(vtnet_modern(_sc), _val)
260#define vtnet_htog64(_sc, _val)	virtio_htog64(vtnet_modern(_sc), _val)
261#define vtnet_gtoh16(_sc, _val)	virtio_gtoh16(vtnet_modern(_sc), _val)
262#define vtnet_gtoh32(_sc, _val)	virtio_gtoh32(vtnet_modern(_sc), _val)
263#define vtnet_gtoh64(_sc, _val)	virtio_gtoh64(vtnet_modern(_sc), _val)
264
265/* Tunables. */
266static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
267    "VirtIO Net driver parameters");
268
269static int vtnet_csum_disable = 0;
270SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
271    &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
272
273static int vtnet_fixup_needs_csum = 0;
274SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
275    &vtnet_fixup_needs_csum, 0,
276    "Calculate valid checksum for NEEDS_CSUM packets");
277
278static int vtnet_tso_disable = 0;
279SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
280    &vtnet_tso_disable, 0, "Disables TSO");
281
282static int vtnet_lro_disable = 0;
283SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
284    &vtnet_lro_disable, 0, "Disables hardware LRO");
285
286static int vtnet_mq_disable = 0;
287SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
288    &vtnet_mq_disable, 0, "Disables multiqueue support");
289
290static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
291SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
292    &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
293
294static int vtnet_tso_maxlen = IP_MAXPACKET;
295SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
296    &vtnet_tso_maxlen, 0, "TSO burst limit");
297
298static int vtnet_rx_process_limit = 1024;
299SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
300    &vtnet_rx_process_limit, 0,
301    "Number of RX segments processed in one pass");
302
303static int vtnet_lro_entry_count = 128;
304SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
305    &vtnet_lro_entry_count, 0, "Software LRO entry count");
306
307/* Enable sorted LRO, and the depth of the mbuf queue. */
308static int vtnet_lro_mbufq_depth = 0;
309SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
310    &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
311
312static uma_zone_t vtnet_tx_header_zone;
313
314static struct virtio_feature_desc vtnet_feature_desc[] = {
315	{ VIRTIO_NET_F_CSUM,			"TxChecksum"		},
316	{ VIRTIO_NET_F_GUEST_CSUM,		"RxChecksum"		},
317	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,	"CtrlRxOffloads"	},
318	{ VIRTIO_NET_F_MAC,			"MAC"			},
319	{ VIRTIO_NET_F_GSO,			"TxGSO"			},
320	{ VIRTIO_NET_F_GUEST_TSO4,		"RxLROv4"		},
321	{ VIRTIO_NET_F_GUEST_TSO6,		"RxLROv6"		},
322	{ VIRTIO_NET_F_GUEST_ECN,		"RxLROECN"		},
323	{ VIRTIO_NET_F_GUEST_UFO,		"RxUFO"			},
324	{ VIRTIO_NET_F_HOST_TSO4,		"TxTSOv4"		},
325	{ VIRTIO_NET_F_HOST_TSO6,		"TxTSOv6"		},
326	{ VIRTIO_NET_F_HOST_ECN,		"TxTSOECN"		},
327	{ VIRTIO_NET_F_HOST_UFO,		"TxUFO"			},
328	{ VIRTIO_NET_F_MRG_RXBUF,		"MrgRxBuf"		},
329	{ VIRTIO_NET_F_STATUS,			"Status"		},
330	{ VIRTIO_NET_F_CTRL_VQ,			"CtrlVq"		},
331	{ VIRTIO_NET_F_CTRL_RX,			"CtrlRxMode"		},
332	{ VIRTIO_NET_F_CTRL_VLAN,		"CtrlVLANFilter"	},
333	{ VIRTIO_NET_F_CTRL_RX_EXTRA,		"CtrlRxModeExtra"	},
334	{ VIRTIO_NET_F_GUEST_ANNOUNCE,		"GuestAnnounce"		},
335	{ VIRTIO_NET_F_MQ,			"Multiqueue"		},
336	{ VIRTIO_NET_F_CTRL_MAC_ADDR,		"CtrlMacAddr"		},
337	{ VIRTIO_NET_F_SPEED_DUPLEX,		"SpeedDuplex"		},
338
339	{ 0, NULL }
340};
341
342static device_method_t vtnet_methods[] = {
343	/* Device methods. */
344	DEVMETHOD(device_probe,			vtnet_probe),
345	DEVMETHOD(device_attach,		vtnet_attach),
346	DEVMETHOD(device_detach,		vtnet_detach),
347	DEVMETHOD(device_suspend,		vtnet_suspend),
348	DEVMETHOD(device_resume,		vtnet_resume),
349	DEVMETHOD(device_shutdown,		vtnet_shutdown),
350
351	/* VirtIO methods. */
352	DEVMETHOD(virtio_attach_completed,	vtnet_attach_completed),
353	DEVMETHOD(virtio_config_change,		vtnet_config_change),
354
355	DEVMETHOD_END
356};
357
358#ifdef DEV_NETMAP
359#include <dev/netmap/if_vtnet_netmap.h>
360#endif
361
362static driver_t vtnet_driver = {
363    .name = "vtnet",
364    .methods = vtnet_methods,
365    .size = sizeof(struct vtnet_softc)
366};
367VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
368MODULE_VERSION(vtnet, 1);
369MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
370#ifdef DEV_NETMAP
371MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
372#endif
373
374VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
375
376static int
377vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
378{
379	int error = 0;
380	static int loaded = 0;
381
382	switch (type) {
383	case MOD_LOAD:
384		if (loaded++ == 0) {
385			vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
386				sizeof(struct vtnet_tx_header),
387				NULL, NULL, NULL, NULL, 0, 0);
388#ifdef DEBUGNET
389			/*
390			 * We need to allocate from this zone in the transmit path, so ensure
391			 * that we have at least one item per header available.
392			 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
393			 * buckets
394			 */
395			uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
396			uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
397#endif
398		}
399		break;
400	case MOD_QUIESCE:
401		if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
402			error = EBUSY;
403		break;
404	case MOD_UNLOAD:
405		if (--loaded == 0) {
406			uma_zdestroy(vtnet_tx_header_zone);
407			vtnet_tx_header_zone = NULL;
408		}
409		break;
410	case MOD_SHUTDOWN:
411		break;
412	default:
413		error = EOPNOTSUPP;
414		break;
415	}
416
417	return (error);
418}
419
420static int
421vtnet_probe(device_t dev)
422{
423	return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
424}
425
426static int
427vtnet_attach(device_t dev)
428{
429	struct vtnet_softc *sc;
430	int error;
431
432	sc = device_get_softc(dev);
433	sc->vtnet_dev = dev;
434	virtio_set_feature_desc(dev, vtnet_feature_desc);
435
436	VTNET_CORE_LOCK_INIT(sc);
437	callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
438	vtnet_load_tunables(sc);
439
440	error = vtnet_alloc_interface(sc);
441	if (error) {
442		device_printf(dev, "cannot allocate interface\n");
443		goto fail;
444	}
445
446	vtnet_setup_sysctl(sc);
447
448	error = vtnet_setup_features(sc);
449	if (error) {
450		device_printf(dev, "cannot setup features\n");
451		goto fail;
452	}
453
454	error = vtnet_alloc_rx_filters(sc);
455	if (error) {
456		device_printf(dev, "cannot allocate Rx filters\n");
457		goto fail;
458	}
459
460	error = vtnet_alloc_rxtx_queues(sc);
461	if (error) {
462		device_printf(dev, "cannot allocate queues\n");
463		goto fail;
464	}
465
466	error = vtnet_alloc_virtqueues(sc);
467	if (error) {
468		device_printf(dev, "cannot allocate virtqueues\n");
469		goto fail;
470	}
471
472	error = vtnet_setup_interface(sc);
473	if (error) {
474		device_printf(dev, "cannot setup interface\n");
475		goto fail;
476	}
477
478	error = virtio_setup_intr(dev, INTR_TYPE_NET);
479	if (error) {
480		device_printf(dev, "cannot setup interrupts\n");
481		ether_ifdetach(sc->vtnet_ifp);
482		goto fail;
483	}
484
485#ifdef DEV_NETMAP
486	vtnet_netmap_attach(sc);
487#endif
488	vtnet_start_taskqueues(sc);
489
490fail:
491	if (error)
492		vtnet_detach(dev);
493
494	return (error);
495}
496
497static int
498vtnet_detach(device_t dev)
499{
500	struct vtnet_softc *sc;
501	if_t ifp;
502
503	sc = device_get_softc(dev);
504	ifp = sc->vtnet_ifp;
505
506	if (device_is_attached(dev)) {
507		VTNET_CORE_LOCK(sc);
508		vtnet_stop(sc);
509		VTNET_CORE_UNLOCK(sc);
510
511		callout_drain(&sc->vtnet_tick_ch);
512		vtnet_drain_taskqueues(sc);
513
514		ether_ifdetach(ifp);
515	}
516
517#ifdef DEV_NETMAP
518	netmap_detach(ifp);
519#endif
520
521	if (sc->vtnet_pfil != NULL) {
522		pfil_head_unregister(sc->vtnet_pfil);
523		sc->vtnet_pfil = NULL;
524	}
525
526	vtnet_free_taskqueues(sc);
527
528	if (sc->vtnet_vlan_attach != NULL) {
529		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
530		sc->vtnet_vlan_attach = NULL;
531	}
532	if (sc->vtnet_vlan_detach != NULL) {
533		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
534		sc->vtnet_vlan_detach = NULL;
535	}
536
537	ifmedia_removeall(&sc->vtnet_media);
538
539	if (ifp != NULL) {
540		if_free(ifp);
541		sc->vtnet_ifp = NULL;
542	}
543
544	vtnet_free_rxtx_queues(sc);
545	vtnet_free_rx_filters(sc);
546
547	if (sc->vtnet_ctrl_vq != NULL)
548		vtnet_free_ctrl_vq(sc);
549
550	VTNET_CORE_LOCK_DESTROY(sc);
551
552	return (0);
553}
554
555static int
556vtnet_suspend(device_t dev)
557{
558	struct vtnet_softc *sc;
559
560	sc = device_get_softc(dev);
561
562	VTNET_CORE_LOCK(sc);
563	vtnet_stop(sc);
564	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
565	VTNET_CORE_UNLOCK(sc);
566
567	return (0);
568}
569
570static int
571vtnet_resume(device_t dev)
572{
573	struct vtnet_softc *sc;
574	if_t ifp;
575
576	sc = device_get_softc(dev);
577	ifp = sc->vtnet_ifp;
578
579	VTNET_CORE_LOCK(sc);
580	if (if_getflags(ifp) & IFF_UP)
581		vtnet_init_locked(sc, 0);
582	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
583	VTNET_CORE_UNLOCK(sc);
584
585	return (0);
586}
587
588static int
589vtnet_shutdown(device_t dev)
590{
591	/*
592	 * Suspend already does all of what we need to
593	 * do here; we just never expect to be resumed.
594	 */
595	return (vtnet_suspend(dev));
596}
597
598static int
599vtnet_attach_completed(device_t dev)
600{
601	struct vtnet_softc *sc;
602
603	sc = device_get_softc(dev);
604
605	VTNET_CORE_LOCK(sc);
606	vtnet_attached_set_macaddr(sc);
607	VTNET_CORE_UNLOCK(sc);
608
609	return (0);
610}
611
612static int
613vtnet_config_change(device_t dev)
614{
615	struct vtnet_softc *sc;
616
617	sc = device_get_softc(dev);
618
619	VTNET_CORE_LOCK(sc);
620	vtnet_update_link_status(sc);
621	if (sc->vtnet_link_active != 0)
622		vtnet_tx_start_all(sc);
623	VTNET_CORE_UNLOCK(sc);
624
625	return (0);
626}
627
628static int
629vtnet_negotiate_features(struct vtnet_softc *sc)
630{
631	device_t dev;
632	uint64_t features, negotiated_features;
633	int no_csum;
634
635	dev = sc->vtnet_dev;
636	features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
637	    VTNET_LEGACY_FEATURES;
638
639	/*
640	 * TSO and LRO are only available when their corresponding checksum
641	 * offload feature is also negotiated.
642	 */
643	no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
644	if (no_csum)
645		features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
646	if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
647		features &= ~VTNET_TSO_FEATURES;
648	if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
649		features &= ~VTNET_LRO_FEATURES;
650
651#ifndef VTNET_LEGACY_TX
652	if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
653		features &= ~VIRTIO_NET_F_MQ;
654#else
655	features &= ~VIRTIO_NET_F_MQ;
656#endif
657
658	negotiated_features = virtio_negotiate_features(dev, features);
659
660	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
661		uint16_t mtu;
662
663		mtu = virtio_read_dev_config_2(dev,
664		    offsetof(struct virtio_net_config, mtu));
665		if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) {
666			device_printf(dev, "Invalid MTU value: %d. "
667			    "MTU feature disabled.\n", mtu);
668			features &= ~VIRTIO_NET_F_MTU;
669			negotiated_features =
670			    virtio_negotiate_features(dev, features);
671		}
672	}
673
674	if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
675		uint16_t npairs;
676
677		npairs = virtio_read_dev_config_2(dev,
678		    offsetof(struct virtio_net_config, max_virtqueue_pairs));
679		if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
680		    npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
681			device_printf(dev, "Invalid max_virtqueue_pairs value: "
682			    "%d. Multiqueue feature disabled.\n", npairs);
683			features &= ~VIRTIO_NET_F_MQ;
684			negotiated_features =
685			    virtio_negotiate_features(dev, features);
686		}
687	}
688
689	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
690	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
691		/*
692		 * LRO without mergeable buffers requires special care. This
693		 * is not ideal because every receive buffer must be large
694		 * enough to hold the maximum TCP packet, the Ethernet header,
695		 * and the header. This requires up to 34 descriptors with
696		 * MCLBYTES clusters. If we do not have indirect descriptors,
697		 * LRO is disabled since the virtqueue will not contain very
698		 * many receive buffers.
699		 */
700		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
701			device_printf(dev,
702			    "Host LRO disabled since both mergeable buffers "
703			    "and indirect descriptors were not negotiated\n");
704			features &= ~VTNET_LRO_FEATURES;
705			negotiated_features =
706			    virtio_negotiate_features(dev, features);
707		} else
708			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
709	}
710
711	sc->vtnet_features = negotiated_features;
712	sc->vtnet_negotiated_features = negotiated_features;
713
714	return (virtio_finalize_features(dev));
715}
716
717static int
718vtnet_setup_features(struct vtnet_softc *sc)
719{
720	device_t dev;
721	int error;
722
723	dev = sc->vtnet_dev;
724
725	error = vtnet_negotiate_features(sc);
726	if (error)
727		return (error);
728
729	if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
730		sc->vtnet_flags |= VTNET_FLAG_MODERN;
731	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
732		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
733	if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
734		sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
735
736	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
737		/* This feature should always be negotiated. */
738		sc->vtnet_flags |= VTNET_FLAG_MAC;
739	}
740
741	if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
742		sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
743		    offsetof(struct virtio_net_config, mtu));
744	} else
745		sc->vtnet_max_mtu = VTNET_MAX_MTU;
746
747	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
748		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
749		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
750	} else if (vtnet_modern(sc)) {
751		/* This is identical to the mergeable header. */
752		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
753	} else
754		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
755
756	if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
757		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
758	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
759		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
760	else
761		sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
762
763	/*
764	 * Favor "hardware" LRO if negotiated, but support software LRO as
765	 * a fallback; there is usually little benefit (or worse) with both.
766	 */
767	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
768	    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
769		sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
770
771	if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
772	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
773	    virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
774		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
775	else
776		sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
777
778	sc->vtnet_req_vq_pairs = 1;
779	sc->vtnet_max_vq_pairs = 1;
780
781	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
782		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
783
784		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
785			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
786		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
787			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
788		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
789			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
790
791		if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
792			sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
793			    offsetof(struct virtio_net_config,
794			    max_virtqueue_pairs));
795		}
796	}
797
798	if (sc->vtnet_max_vq_pairs > 1) {
799		int req;
800
801		/*
802		 * Limit the maximum number of requested queue pairs to the
803		 * number of CPUs and the configured maximum.
804		 */
805		req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
806		if (req < 0)
807			req = 1;
808		if (req == 0)
809			req = mp_ncpus;
810		if (req > sc->vtnet_max_vq_pairs)
811			req = sc->vtnet_max_vq_pairs;
812		if (req > mp_ncpus)
813			req = mp_ncpus;
814		if (req > 1) {
815			sc->vtnet_req_vq_pairs = req;
816			sc->vtnet_flags |= VTNET_FLAG_MQ;
817		}
818	}
819
820	return (0);
821}
822
823static int
824vtnet_init_rxq(struct vtnet_softc *sc, int id)
825{
826	struct vtnet_rxq *rxq;
827
828	rxq = &sc->vtnet_rxqs[id];
829
830	snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
831	    device_get_nameunit(sc->vtnet_dev), id);
832	mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
833
834	rxq->vtnrx_sc = sc;
835	rxq->vtnrx_id = id;
836
837	rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
838	if (rxq->vtnrx_sg == NULL)
839		return (ENOMEM);
840
841#if defined(INET) || defined(INET6)
842	if (vtnet_software_lro(sc)) {
843		if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
844		    sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
845			return (ENOMEM);
846	}
847#endif
848
849	NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
850	rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
851	    taskqueue_thread_enqueue, &rxq->vtnrx_tq);
852
853	return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
854}
855
856static int
857vtnet_init_txq(struct vtnet_softc *sc, int id)
858{
859	struct vtnet_txq *txq;
860
861	txq = &sc->vtnet_txqs[id];
862
863	snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
864	    device_get_nameunit(sc->vtnet_dev), id);
865	mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
866
867	txq->vtntx_sc = sc;
868	txq->vtntx_id = id;
869
870	txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
871	if (txq->vtntx_sg == NULL)
872		return (ENOMEM);
873
874#ifndef VTNET_LEGACY_TX
875	txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
876	    M_NOWAIT, &txq->vtntx_mtx);
877	if (txq->vtntx_br == NULL)
878		return (ENOMEM);
879
880	TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
881#endif
882	TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
883	txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
884	    taskqueue_thread_enqueue, &txq->vtntx_tq);
885	if (txq->vtntx_tq == NULL)
886		return (ENOMEM);
887
888	return (0);
889}
890
891static int
892vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
893{
894	int i, npairs, error;
895
896	npairs = sc->vtnet_max_vq_pairs;
897
898	sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
899	    M_NOWAIT | M_ZERO);
900	sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
901	    M_NOWAIT | M_ZERO);
902	if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
903		return (ENOMEM);
904
905	for (i = 0; i < npairs; i++) {
906		error = vtnet_init_rxq(sc, i);
907		if (error)
908			return (error);
909		error = vtnet_init_txq(sc, i);
910		if (error)
911			return (error);
912	}
913
914	vtnet_set_rx_process_limit(sc);
915	vtnet_setup_queue_sysctl(sc);
916
917	return (0);
918}
919
920static void
921vtnet_destroy_rxq(struct vtnet_rxq *rxq)
922{
923
924	rxq->vtnrx_sc = NULL;
925	rxq->vtnrx_id = -1;
926
927#if defined(INET) || defined(INET6)
928	tcp_lro_free(&rxq->vtnrx_lro);
929#endif
930
931	if (rxq->vtnrx_sg != NULL) {
932		sglist_free(rxq->vtnrx_sg);
933		rxq->vtnrx_sg = NULL;
934	}
935
936	if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
937		mtx_destroy(&rxq->vtnrx_mtx);
938}
939
940static void
941vtnet_destroy_txq(struct vtnet_txq *txq)
942{
943
944	txq->vtntx_sc = NULL;
945	txq->vtntx_id = -1;
946
947	if (txq->vtntx_sg != NULL) {
948		sglist_free(txq->vtntx_sg);
949		txq->vtntx_sg = NULL;
950	}
951
952#ifndef VTNET_LEGACY_TX
953	if (txq->vtntx_br != NULL) {
954		buf_ring_free(txq->vtntx_br, M_DEVBUF);
955		txq->vtntx_br = NULL;
956	}
957#endif
958
959	if (mtx_initialized(&txq->vtntx_mtx) != 0)
960		mtx_destroy(&txq->vtntx_mtx);
961}
962
963static void
964vtnet_free_rxtx_queues(struct vtnet_softc *sc)
965{
966	int i;
967
968	if (sc->vtnet_rxqs != NULL) {
969		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
970			vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
971		free(sc->vtnet_rxqs, M_DEVBUF);
972		sc->vtnet_rxqs = NULL;
973	}
974
975	if (sc->vtnet_txqs != NULL) {
976		for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
977			vtnet_destroy_txq(&sc->vtnet_txqs[i]);
978		free(sc->vtnet_txqs, M_DEVBUF);
979		sc->vtnet_txqs = NULL;
980	}
981}
982
983static int
984vtnet_alloc_rx_filters(struct vtnet_softc *sc)
985{
986
987	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
988		sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
989		    M_DEVBUF, M_NOWAIT | M_ZERO);
990		if (sc->vtnet_mac_filter == NULL)
991			return (ENOMEM);
992	}
993
994	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
995		sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
996		    VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
997		if (sc->vtnet_vlan_filter == NULL)
998			return (ENOMEM);
999	}
1000
1001	return (0);
1002}
1003
1004static void
1005vtnet_free_rx_filters(struct vtnet_softc *sc)
1006{
1007
1008	if (sc->vtnet_mac_filter != NULL) {
1009		free(sc->vtnet_mac_filter, M_DEVBUF);
1010		sc->vtnet_mac_filter = NULL;
1011	}
1012
1013	if (sc->vtnet_vlan_filter != NULL) {
1014		free(sc->vtnet_vlan_filter, M_DEVBUF);
1015		sc->vtnet_vlan_filter = NULL;
1016	}
1017}
1018
1019static int
1020vtnet_alloc_virtqueues(struct vtnet_softc *sc)
1021{
1022	device_t dev;
1023	struct vq_alloc_info *info;
1024	struct vtnet_rxq *rxq;
1025	struct vtnet_txq *txq;
1026	int i, idx, nvqs, error;
1027
1028	dev = sc->vtnet_dev;
1029
1030	nvqs = sc->vtnet_max_vq_pairs * 2;
1031	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
1032		nvqs++;
1033
1034	info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
1035	if (info == NULL)
1036		return (ENOMEM);
1037
1038	for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
1039		rxq = &sc->vtnet_rxqs[i];
1040		VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
1041		    vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
1042		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1043
1044		txq = &sc->vtnet_txqs[i];
1045		VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
1046		    vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
1047		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1048	}
1049
1050	/* These queues will not be used so allocate the minimum resources. */
1051	for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
1052		rxq = &sc->vtnet_rxqs[i];
1053		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
1054		    "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1055
1056		txq = &sc->vtnet_txqs[i];
1057		VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq,
1058		    "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1059	}
1060
1061	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
1062		VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
1063		    &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
1064	}
1065
1066	error = virtio_alloc_virtqueues(dev, nvqs, info);
1067	free(info, M_TEMP);
1068
1069	return (error);
1070}
1071
1072static int
1073vtnet_alloc_interface(struct vtnet_softc *sc)
1074{
1075	device_t dev;
1076	if_t ifp;
1077
1078	dev = sc->vtnet_dev;
1079
1080	ifp = if_alloc(IFT_ETHER);
1081	if (ifp == NULL)
1082		return (ENOMEM);
1083
1084	sc->vtnet_ifp = ifp;
1085	if_setsoftc(ifp, sc);
1086	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1087
1088	return (0);
1089}
1090
1091static int
1092vtnet_setup_interface(struct vtnet_softc *sc)
1093{
1094	device_t dev;
1095	struct pfil_head_args pa;
1096	if_t ifp;
1097
1098	dev = sc->vtnet_dev;
1099	ifp = sc->vtnet_ifp;
1100
1101	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1102	if_setbaudrate(ifp, IF_Gbps(10));
1103	if_setinitfn(ifp, vtnet_init);
1104	if_setioctlfn(ifp, vtnet_ioctl);
1105	if_setgetcounterfn(ifp, vtnet_get_counter);
1106#ifndef VTNET_LEGACY_TX
1107	if_settransmitfn(ifp, vtnet_txq_mq_start);
1108	if_setqflushfn(ifp, vtnet_qflush);
1109#else
1110	struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1111	if_setstartfn(ifp, vtnet_start);
1112	if_setsendqlen(ifp, virtqueue_size(vq) - 1);
1113	if_setsendqready(ifp);
1114#endif
1115
1116	vtnet_get_macaddr(sc);
1117
1118	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1119		if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
1120
1121	ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1122	ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1123	ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1124
1125	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1126		int gso;
1127
1128		if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
1129
1130		gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1131		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1132			if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
1133		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1134			if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
1135		if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1136			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1137
1138		if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
1139			int tso_maxlen;
1140
1141			if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
1142
1143			tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
1144			    vtnet_tso_maxlen);
1145			if_sethwtsomax(ifp, tso_maxlen -
1146			    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
1147			if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
1148			if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
1149		}
1150	}
1151
1152	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1153		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
1154#ifdef notyet
1155		/* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
1156		if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
1157#endif
1158
1159		if (vtnet_tunable_int(sc, "fixup_needs_csum",
1160		    vtnet_fixup_needs_csum) != 0)
1161			sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
1162
1163		/* Support either "hardware" or software LRO. */
1164		if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
1165	}
1166
1167	if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
1168		/*
1169		 * VirtIO does not support VLAN tagging, but we can fake
1170		 * it by inserting and removing the 802.1Q header during
1171		 * transmit and receive. We are then able to do checksum
1172		 * offloading of VLAN frames.
1173		 */
1174		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
1175	}
1176
1177	if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
1178		if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
1179	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
1180
1181	/*
1182	 * Capabilities after here are not enabled by default.
1183	 */
1184	if_setcapenable(ifp, if_getcapabilities(ifp));
1185
1186	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1187		if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
1188
1189		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1190		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1191		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1192		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1193	}
1194
1195	ether_ifattach(ifp, sc->vtnet_hwaddr);
1196
1197	/* Tell the upper layer(s) we support long frames. */
1198	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
1199
1200	DEBUGNET_SET(ifp, vtnet);
1201
1202	pa.pa_version = PFIL_VERSION;
1203	pa.pa_flags = PFIL_IN;
1204	pa.pa_type = PFIL_TYPE_ETHERNET;
1205	pa.pa_headname = if_name(ifp);
1206	sc->vtnet_pfil = pfil_head_register(&pa);
1207
1208	return (0);
1209}
1210
1211static int
1212vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1213{
1214	int framesz;
1215
1216	if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1217		return (MJUMPAGESIZE);
1218	else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1219		return (MCLBYTES);
1220
1221	/*
1222	 * Try to scale the receive mbuf cluster size from the MTU. We
1223	 * could also use the VQ size to influence the selected size,
1224	 * but that would only matter for very small queues.
1225	 */
1226	if (vtnet_modern(sc)) {
1227		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1228		framesz = sizeof(struct virtio_net_hdr_v1);
1229	} else
1230		framesz = sizeof(struct vtnet_rx_header);
1231	framesz += sizeof(struct ether_vlan_header) + mtu;
1232	/*
1233	 * Account for the offsetting we'll do elsewhere so we allocate the
1234	 * right size for the mtu.
1235	 */
1236	if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1237		framesz += VTNET_ETHER_ALIGN;
1238	}
1239
1240	if (framesz <= MCLBYTES)
1241		return (MCLBYTES);
1242	else if (framesz <= MJUMPAGESIZE)
1243		return (MJUMPAGESIZE);
1244	else if (framesz <= MJUM9BYTES)
1245		return (MJUM9BYTES);
1246
1247	/* Sane default; avoid 16KB clusters. */
1248	return (MCLBYTES);
1249}
1250
1251static int
1252vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
1253{
1254	if_t ifp;
1255	int clustersz;
1256
1257	ifp = sc->vtnet_ifp;
1258	VTNET_CORE_LOCK_ASSERT(sc);
1259
1260	if (if_getmtu(ifp) == mtu)
1261		return (0);
1262	else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1263		return (EINVAL);
1264
1265	if_setmtu(ifp, mtu);
1266	clustersz = vtnet_rx_cluster_size(sc, mtu);
1267
1268	if (clustersz != sc->vtnet_rx_clustersz &&
1269	    if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1270		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1271		vtnet_init_locked(sc, 0);
1272	}
1273
1274	return (0);
1275}
1276
1277static int
1278vtnet_ioctl_ifflags(struct vtnet_softc *sc)
1279{
1280	if_t ifp;
1281	int drv_running;
1282
1283	ifp = sc->vtnet_ifp;
1284	drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
1285
1286	VTNET_CORE_LOCK_ASSERT(sc);
1287
1288	if ((if_getflags(ifp) & IFF_UP) == 0) {
1289		if (drv_running)
1290			vtnet_stop(sc);
1291		goto out;
1292	}
1293
1294	if (!drv_running) {
1295		vtnet_init_locked(sc, 0);
1296		goto out;
1297	}
1298
1299	if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
1300	    (IFF_PROMISC | IFF_ALLMULTI)) {
1301		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1302			vtnet_rx_filter(sc);
1303		else {
1304			/*
1305			 * We don't support filtering out multicast, so
1306			 * ALLMULTI is always set.
1307			 */
1308			if_setflagbits(ifp, IFF_ALLMULTI, 0);
1309			if_setflagbits(ifp, IFF_PROMISC, 0);
1310		}
1311	}
1312
1313out:
1314	sc->vtnet_if_flags = if_getflags(ifp);
1315	return (0);
1316}
1317
1318static int
1319vtnet_ioctl_multi(struct vtnet_softc *sc)
1320{
1321	if_t ifp;
1322
1323	ifp = sc->vtnet_ifp;
1324
1325	VTNET_CORE_LOCK_ASSERT(sc);
1326
1327	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1328	    if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1329		vtnet_rx_filter_mac(sc);
1330
1331	return (0);
1332}
1333
1334static int
1335vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
1336{
1337	if_t ifp;
1338	int mask, reinit, update;
1339
1340	ifp = sc->vtnet_ifp;
1341	mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
1342	reinit = update = 0;
1343
1344	VTNET_CORE_LOCK_ASSERT(sc);
1345
1346	if (mask & IFCAP_TXCSUM)
1347		if_togglecapenable(ifp, IFCAP_TXCSUM);
1348	if (mask & IFCAP_TXCSUM_IPV6)
1349		if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
1350	if (mask & IFCAP_TSO4)
1351		if_togglecapenable(ifp, IFCAP_TSO4);
1352	if (mask & IFCAP_TSO6)
1353		if_togglecapenable(ifp, IFCAP_TSO6);
1354
1355	if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
1356		/*
1357		 * These Rx features require the negotiated features to
1358		 * be updated. Avoid a full reinit if possible.
1359		 */
1360		if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
1361			update = 1;
1362		else
1363			reinit = 1;
1364
1365		/* BMV: Avoid needless renegotiation for just software LRO. */
1366		if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
1367		    IFCAP_LRO && vtnet_software_lro(sc))
1368			reinit = update = 0;
1369
1370		if (mask & IFCAP_RXCSUM)
1371			if_togglecapenable(ifp, IFCAP_RXCSUM);
1372		if (mask & IFCAP_RXCSUM_IPV6)
1373			if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
1374		if (mask & IFCAP_LRO)
1375			if_togglecapenable(ifp, IFCAP_LRO);
1376
1377		/*
1378		 * VirtIO does not distinguish between IPv4 and IPv6 checksums
1379		 * so treat them as a pair. Guest TSO (LRO) requires receive
1380		 * checksums.
1381		 */
1382		if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1383			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
1384#ifdef notyet
1385			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
1386#endif
1387		} else
1388			if_setcapenablebit(ifp, 0,
1389			    (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO));
1390	}
1391
1392	if (mask & IFCAP_VLAN_HWFILTER) {
1393		/* These Rx features require renegotiation. */
1394		reinit = 1;
1395
1396		if (mask & IFCAP_VLAN_HWFILTER)
1397			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1398	}
1399
1400	if (mask & IFCAP_VLAN_HWTSO)
1401		if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1402	if (mask & IFCAP_VLAN_HWTAGGING)
1403		if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1404
1405	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1406		if (reinit) {
1407			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1408			vtnet_init_locked(sc, 0);
1409		} else if (update)
1410			vtnet_update_rx_offloads(sc);
1411	}
1412
1413	return (0);
1414}
1415
1416static int
1417vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
1418{
1419	struct vtnet_softc *sc;
1420	struct ifreq *ifr;
1421	int error;
1422
1423	sc = if_getsoftc(ifp);
1424	ifr = (struct ifreq *) data;
1425	error = 0;
1426
1427	switch (cmd) {
1428	case SIOCSIFMTU:
1429		VTNET_CORE_LOCK(sc);
1430		error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1431		VTNET_CORE_UNLOCK(sc);
1432		break;
1433
1434	case SIOCSIFFLAGS:
1435		VTNET_CORE_LOCK(sc);
1436		error = vtnet_ioctl_ifflags(sc);
1437		VTNET_CORE_UNLOCK(sc);
1438		break;
1439
1440	case SIOCADDMULTI:
1441	case SIOCDELMULTI:
1442		VTNET_CORE_LOCK(sc);
1443		error = vtnet_ioctl_multi(sc);
1444		VTNET_CORE_UNLOCK(sc);
1445		break;
1446
1447	case SIOCSIFMEDIA:
1448	case SIOCGIFMEDIA:
1449		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1450		break;
1451
1452	case SIOCSIFCAP:
1453		VTNET_CORE_LOCK(sc);
1454		error = vtnet_ioctl_ifcap(sc, ifr);
1455		VTNET_CORE_UNLOCK(sc);
1456		VLAN_CAPABILITIES(ifp);
1457		break;
1458
1459	default:
1460		error = ether_ioctl(ifp, cmd, data);
1461		break;
1462	}
1463
1464	VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1465
1466	return (error);
1467}
1468
1469static int
1470vtnet_rxq_populate(struct vtnet_rxq *rxq)
1471{
1472	struct virtqueue *vq;
1473	int nbufs, error;
1474
1475#ifdef DEV_NETMAP
1476	error = vtnet_netmap_rxq_populate(rxq);
1477	if (error >= 0)
1478		return (error);
1479#endif  /* DEV_NETMAP */
1480
1481	vq = rxq->vtnrx_vq;
1482	error = ENOSPC;
1483
1484	for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1485		error = vtnet_rxq_new_buf(rxq);
1486		if (error)
1487			break;
1488	}
1489
1490	if (nbufs > 0) {
1491		virtqueue_notify(vq);
1492		/*
1493		 * EMSGSIZE signifies the virtqueue did not have enough
1494		 * entries available to hold the last mbuf. This is not
1495		 * an error.
1496		 */
1497		if (error == EMSGSIZE)
1498			error = 0;
1499	}
1500
1501	return (error);
1502}
1503
1504static void
1505vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1506{
1507	struct virtqueue *vq;
1508	struct mbuf *m;
1509	int last;
1510#ifdef DEV_NETMAP
1511	struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1512							rxq->vtnrx_id, NR_RX);
1513#else  /* !DEV_NETMAP */
1514	void *kring = NULL;
1515#endif /* !DEV_NETMAP */
1516
1517	vq = rxq->vtnrx_vq;
1518	last = 0;
1519
1520	while ((m = virtqueue_drain(vq, &last)) != NULL) {
1521		if (kring == NULL)
1522			m_freem(m);
1523	}
1524
1525	KASSERT(virtqueue_empty(vq),
1526	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1527}
1528
1529static struct mbuf *
1530vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1531{
1532	struct mbuf *m_head, *m_tail, *m;
1533	int i, size;
1534
1535	m_head = NULL;
1536	size = sc->vtnet_rx_clustersz;
1537
1538	KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1539	    ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1540
1541	for (i = 0; i < nbufs; i++) {
1542		m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1543		if (m == NULL) {
1544			sc->vtnet_stats.mbuf_alloc_failed++;
1545			m_freem(m_head);
1546			return (NULL);
1547		}
1548
1549		m->m_len = size;
1550		/*
1551		 * Need to offset the mbuf if the header we're going to add
1552		 * will misalign.
1553		 */
1554		if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1555			m_adj(m, VTNET_ETHER_ALIGN);
1556		}
1557		if (m_head != NULL) {
1558			m_tail->m_next = m;
1559			m_tail = m;
1560		} else
1561			m_head = m_tail = m;
1562	}
1563
1564	if (m_tailp != NULL)
1565		*m_tailp = m_tail;
1566
1567	return (m_head);
1568}
1569
1570/*
1571 * Slow path for when LRO without mergeable buffers is negotiated.
1572 */
1573static int
1574vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1575    int len0)
1576{
1577	struct vtnet_softc *sc;
1578	struct mbuf *m, *m_prev, *m_new, *m_tail;
1579	int len, clustersz, nreplace, error;
1580
1581	sc = rxq->vtnrx_sc;
1582	clustersz = sc->vtnet_rx_clustersz;
1583	/*
1584	 * Need to offset the mbuf if the header we're going to add will
1585	 * misalign, account for that here.
1586	 */
1587	if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
1588		clustersz -= VTNET_ETHER_ALIGN;
1589
1590	m_prev = NULL;
1591	m_tail = NULL;
1592	nreplace = 0;
1593
1594	m = m0;
1595	len = len0;
1596
1597	/*
1598	 * Since these mbuf chains are so large, avoid allocating a complete
1599	 * replacement when the received frame did not consume the entire
1600	 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1601	 */
1602	while (len > 0) {
1603		if (m == NULL) {
1604			sc->vtnet_stats.rx_frame_too_large++;
1605			return (EMSGSIZE);
1606		}
1607
1608		/*
1609		 * Every mbuf should have the expected cluster size since that
1610		 * is also used to allocate the replacements.
1611		 */
1612		KASSERT(m->m_len == clustersz,
1613		    ("%s: mbuf size %d not expected cluster size %d", __func__,
1614		    m->m_len, clustersz));
1615
1616		m->m_len = MIN(m->m_len, len);
1617		len -= m->m_len;
1618
1619		m_prev = m;
1620		m = m->m_next;
1621		nreplace++;
1622	}
1623
1624	KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1625	    ("%s: invalid replacement mbuf count %d max %d", __func__,
1626	    nreplace, sc->vtnet_rx_nmbufs));
1627
1628	m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1629	if (m_new == NULL) {
1630		m_prev->m_len = clustersz;
1631		return (ENOBUFS);
1632	}
1633
1634	/*
1635	 * Move any unused mbufs from the received mbuf chain onto the
1636	 * end of the replacement chain.
1637	 */
1638	if (m_prev->m_next != NULL) {
1639		m_tail->m_next = m_prev->m_next;
1640		m_prev->m_next = NULL;
1641	}
1642
1643	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1644	if (error) {
1645		/*
1646		 * The replacement is suppose to be an copy of the one
1647		 * dequeued so this is a very unexpected error.
1648		 *
1649		 * Restore the m0 chain to the original state if it was
1650		 * modified so we can then discard it.
1651		 */
1652		if (m_tail->m_next != NULL) {
1653			m_prev->m_next = m_tail->m_next;
1654			m_tail->m_next = NULL;
1655		}
1656		m_prev->m_len = clustersz;
1657		sc->vtnet_stats.rx_enq_replacement_failed++;
1658		m_freem(m_new);
1659	}
1660
1661	return (error);
1662}
1663
1664static int
1665vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1666{
1667	struct vtnet_softc *sc;
1668	struct mbuf *m_new;
1669	int error;
1670
1671	sc = rxq->vtnrx_sc;
1672
1673	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1674		return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1675
1676	MPASS(m->m_next == NULL);
1677	if (m->m_len < len)
1678		return (EMSGSIZE);
1679
1680	m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1681	if (m_new == NULL)
1682		return (ENOBUFS);
1683
1684	error = vtnet_rxq_enqueue_buf(rxq, m_new);
1685	if (error) {
1686		sc->vtnet_stats.rx_enq_replacement_failed++;
1687		m_freem(m_new);
1688	} else
1689		m->m_len = len;
1690
1691	return (error);
1692}
1693
1694static int
1695vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1696{
1697	struct vtnet_softc *sc;
1698	struct sglist *sg;
1699	int header_inlined, error;
1700
1701	sc = rxq->vtnrx_sc;
1702	sg = rxq->vtnrx_sg;
1703
1704	KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1705	    ("%s: mbuf chain without LRO_NOMRG", __func__));
1706	VTNET_RXQ_LOCK_ASSERT(rxq);
1707
1708	sglist_reset(sg);
1709	header_inlined = vtnet_modern(sc) ||
1710	    (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1711
1712	/*
1713	 * Note: The mbuf has been already adjusted when we allocate it if we
1714	 * have to do strict alignment.
1715	 */
1716	if (header_inlined)
1717		error = sglist_append_mbuf(sg, m);
1718	else {
1719		struct vtnet_rx_header *rxhdr =
1720		    mtod(m, struct vtnet_rx_header *);
1721		MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1722
1723		/* Append the header and remaining mbuf data. */
1724		error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1725		if (error)
1726			return (error);
1727		error = sglist_append(sg, &rxhdr[1],
1728		    m->m_len - sizeof(struct vtnet_rx_header));
1729		if (error)
1730			return (error);
1731
1732		if (m->m_next != NULL)
1733			error = sglist_append_mbuf(sg, m->m_next);
1734	}
1735
1736	if (error)
1737		return (error);
1738
1739	return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1740}
1741
1742static int
1743vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1744{
1745	struct vtnet_softc *sc;
1746	struct mbuf *m;
1747	int error;
1748
1749	sc = rxq->vtnrx_sc;
1750
1751	m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1752	if (m == NULL)
1753		return (ENOBUFS);
1754
1755	error = vtnet_rxq_enqueue_buf(rxq, m);
1756	if (error)
1757		m_freem(m);
1758
1759	return (error);
1760}
1761
1762static int
1763vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
1764    int hoff, struct virtio_net_hdr *hdr)
1765{
1766	struct vtnet_softc *sc;
1767	int error;
1768
1769	sc = rxq->vtnrx_sc;
1770
1771	/*
1772	 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
1773	 * not have an analogous CSUM flag. The checksum has been validated,
1774	 * but is incomplete (TCP/UDP pseudo header).
1775	 *
1776	 * The packet is likely from another VM on the same host that itself
1777	 * performed checksum offloading so Tx/Rx is basically a memcpy and
1778	 * the checksum has little value.
1779	 *
1780	 * Default to receiving the packet as-is for performance reasons, but
1781	 * this can cause issues if the packet is to be forwarded because it
1782	 * does not contain a valid checksum. This patch may be helpful:
1783	 * https://reviews.freebsd.org/D6611. In the meantime, have the driver
1784	 * compute the checksum if requested.
1785	 *
1786	 * BMV: Need to add an CSUM_PARTIAL flag?
1787	 */
1788	if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
1789		error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
1790		return (error);
1791	}
1792
1793	/*
1794	 * Compute the checksum in the driver so the packet will contain a
1795	 * valid checksum. The checksum is at csum_offset from csum_start.
1796	 */
1797	switch (etype) {
1798#if defined(INET) || defined(INET6)
1799	case ETHERTYPE_IP:
1800	case ETHERTYPE_IPV6: {
1801		int csum_off, csum_end;
1802		uint16_t csum;
1803
1804		csum_off = hdr->csum_start + hdr->csum_offset;
1805		csum_end = csum_off + sizeof(uint16_t);
1806
1807		/* Assume checksum will be in the first mbuf. */
1808		if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
1809			return (1);
1810
1811		/*
1812		 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
1813		 * checksum and write it at the specified offset. We could
1814		 * try to verify the packet: csum_start should probably
1815		 * correspond to the start of the TCP/UDP header.
1816		 *
1817		 * BMV: Need to properly handle UDP with zero checksum. Is
1818		 * the IPv4 header checksum implicitly validated?
1819		 */
1820		csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
1821		*(uint16_t *)(mtodo(m, csum_off)) = csum;
1822		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1823		m->m_pkthdr.csum_data = 0xFFFF;
1824		break;
1825	}
1826#endif
1827	default:
1828		sc->vtnet_stats.rx_csum_bad_ethtype++;
1829		return (1);
1830	}
1831
1832	return (0);
1833}
1834
1835static int
1836vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
1837    uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused)
1838{
1839#if 0
1840	struct vtnet_softc *sc;
1841#endif
1842	int protocol;
1843
1844#if 0
1845	sc = rxq->vtnrx_sc;
1846#endif
1847
1848	switch (etype) {
1849#if defined(INET)
1850	case ETHERTYPE_IP:
1851		if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
1852			protocol = IPPROTO_DONE;
1853		else {
1854			struct ip *ip = (struct ip *)(m->m_data + hoff);
1855			protocol = ip->ip_p;
1856		}
1857		break;
1858#endif
1859#if defined(INET6)
1860	case ETHERTYPE_IPV6:
1861		if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
1862		    || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
1863			protocol = IPPROTO_DONE;
1864		break;
1865#endif
1866	default:
1867		protocol = IPPROTO_DONE;
1868		break;
1869	}
1870
1871	switch (protocol) {
1872	case IPPROTO_TCP:
1873	case IPPROTO_UDP:
1874		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1875		m->m_pkthdr.csum_data = 0xFFFF;
1876		break;
1877	default:
1878		/*
1879		 * FreeBSD does not support checksum offloading of this
1880		 * protocol. Let the stack re-verify the checksum later
1881		 * if the protocol is supported.
1882		 */
1883#if 0
1884		if_printf(sc->vtnet_ifp,
1885		    "%s: checksum offload of unsupported protocol "
1886		    "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
1887		    __func__, etype, protocol, hdr->csum_start,
1888		    hdr->csum_offset);
1889#endif
1890		break;
1891	}
1892
1893	return (0);
1894}
1895
1896static int
1897vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1898    struct virtio_net_hdr *hdr)
1899{
1900	const struct ether_header *eh;
1901	int hoff;
1902	uint16_t etype;
1903
1904	eh = mtod(m, const struct ether_header *);
1905	etype = ntohs(eh->ether_type);
1906	if (etype == ETHERTYPE_VLAN) {
1907		/* TODO BMV: Handle QinQ. */
1908		const struct ether_vlan_header *evh =
1909		    mtod(m, const struct ether_vlan_header *);
1910		etype = ntohs(evh->evl_proto);
1911		hoff = sizeof(struct ether_vlan_header);
1912	} else
1913		hoff = sizeof(struct ether_header);
1914
1915	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1916		return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
1917	else /* VIRTIO_NET_HDR_F_DATA_VALID */
1918		return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
1919}
1920
1921static void
1922vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1923{
1924	struct mbuf *m;
1925
1926	while (--nbufs > 0) {
1927		m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1928		if (m == NULL)
1929			break;
1930		vtnet_rxq_discard_buf(rxq, m);
1931	}
1932}
1933
1934static void
1935vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1936{
1937	int error __diagused;
1938
1939	/*
1940	 * Requeue the discarded mbuf. This should always be successful
1941	 * since it was just dequeued.
1942	 */
1943	error = vtnet_rxq_enqueue_buf(rxq, m);
1944	KASSERT(error == 0,
1945	    ("%s: cannot requeue discarded mbuf %d", __func__, error));
1946}
1947
1948static int
1949vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1950{
1951	struct vtnet_softc *sc;
1952	struct virtqueue *vq;
1953	struct mbuf *m_tail;
1954
1955	sc = rxq->vtnrx_sc;
1956	vq = rxq->vtnrx_vq;
1957	m_tail = m_head;
1958
1959	while (--nbufs > 0) {
1960		struct mbuf *m;
1961		uint32_t len;
1962
1963		m = virtqueue_dequeue(vq, &len);
1964		if (m == NULL) {
1965			rxq->vtnrx_stats.vrxs_ierrors++;
1966			goto fail;
1967		}
1968
1969		if (vtnet_rxq_new_buf(rxq) != 0) {
1970			rxq->vtnrx_stats.vrxs_iqdrops++;
1971			vtnet_rxq_discard_buf(rxq, m);
1972			if (nbufs > 1)
1973				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1974			goto fail;
1975		}
1976
1977		if (m->m_len < len)
1978			len = m->m_len;
1979
1980		m->m_len = len;
1981		m->m_flags &= ~M_PKTHDR;
1982
1983		m_head->m_pkthdr.len += len;
1984		m_tail->m_next = m;
1985		m_tail = m;
1986	}
1987
1988	return (0);
1989
1990fail:
1991	sc->vtnet_stats.rx_mergeable_failed++;
1992	m_freem(m_head);
1993
1994	return (1);
1995}
1996
1997#if defined(INET) || defined(INET6)
1998static int
1999vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
2000{
2001	struct lro_ctrl *lro;
2002
2003	lro = &rxq->vtnrx_lro;
2004
2005	if (lro->lro_mbuf_max != 0) {
2006		tcp_lro_queue_mbuf(lro, m);
2007		return (0);
2008	}
2009
2010	return (tcp_lro_rx(lro, m, 0));
2011}
2012#endif
2013
2014static void
2015vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
2016    struct virtio_net_hdr *hdr)
2017{
2018	struct vtnet_softc *sc;
2019	if_t ifp;
2020
2021	sc = rxq->vtnrx_sc;
2022	ifp = sc->vtnet_ifp;
2023
2024	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
2025		struct ether_header *eh = mtod(m, struct ether_header *);
2026		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2027			vtnet_vlan_tag_remove(m);
2028			/*
2029			 * With the 802.1Q header removed, update the
2030			 * checksum starting location accordingly.
2031			 */
2032			if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2033				hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
2034		}
2035	}
2036
2037	m->m_pkthdr.flowid = rxq->vtnrx_id;
2038	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2039
2040	if (hdr->flags &
2041	    (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
2042		if (vtnet_rxq_csum(rxq, m, hdr) == 0)
2043			rxq->vtnrx_stats.vrxs_csum++;
2044		else
2045			rxq->vtnrx_stats.vrxs_csum_failed++;
2046	}
2047
2048	if (hdr->gso_size != 0) {
2049		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2050		case VIRTIO_NET_HDR_GSO_TCPV4:
2051		case VIRTIO_NET_HDR_GSO_TCPV6:
2052			m->m_pkthdr.lro_nsegs =
2053			    howmany(m->m_pkthdr.len, hdr->gso_size);
2054			rxq->vtnrx_stats.vrxs_host_lro++;
2055			break;
2056		}
2057	}
2058
2059	rxq->vtnrx_stats.vrxs_ipackets++;
2060	rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
2061
2062#if defined(INET) || defined(INET6)
2063	if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
2064		if (vtnet_lro_rx(rxq, m) == 0)
2065			return;
2066	}
2067#endif
2068
2069	if_input(ifp, m);
2070}
2071
2072static int
2073vtnet_rxq_eof(struct vtnet_rxq *rxq)
2074{
2075	struct virtio_net_hdr lhdr, *hdr;
2076	struct vtnet_softc *sc;
2077	if_t ifp;
2078	struct virtqueue *vq;
2079	int deq, count;
2080
2081	sc = rxq->vtnrx_sc;
2082	vq = rxq->vtnrx_vq;
2083	ifp = sc->vtnet_ifp;
2084	deq = 0;
2085	count = sc->vtnet_rx_process_limit;
2086
2087	VTNET_RXQ_LOCK_ASSERT(rxq);
2088
2089	CURVNET_SET(if_getvnet(ifp));
2090	while (count-- > 0) {
2091		struct mbuf *m;
2092		uint32_t len, nbufs, adjsz;
2093
2094		m = virtqueue_dequeue(vq, &len);
2095		if (m == NULL)
2096			break;
2097		deq++;
2098
2099		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
2100			rxq->vtnrx_stats.vrxs_ierrors++;
2101			vtnet_rxq_discard_buf(rxq, m);
2102			continue;
2103		}
2104
2105		if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
2106			struct virtio_net_hdr_mrg_rxbuf *mhdr =
2107			    mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
2108			kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
2109			nbufs = vtnet_htog16(sc, mhdr->num_buffers);
2110			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2111		} else if (vtnet_modern(sc)) {
2112			nbufs = 1; /* num_buffers is always 1 */
2113			adjsz = sizeof(struct virtio_net_hdr_v1);
2114		} else {
2115			nbufs = 1;
2116			adjsz = sizeof(struct vtnet_rx_header);
2117			/*
2118			 * Account for our gap between the header and start of
2119			 * data to keep the segments separated.
2120			 */
2121			len += VTNET_RX_HEADER_PAD;
2122		}
2123
2124		if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
2125			rxq->vtnrx_stats.vrxs_iqdrops++;
2126			vtnet_rxq_discard_buf(rxq, m);
2127			if (nbufs > 1)
2128				vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2129			continue;
2130		}
2131
2132		m->m_pkthdr.len = len;
2133		m->m_pkthdr.rcvif = ifp;
2134		m->m_pkthdr.csum_flags = 0;
2135
2136		if (nbufs > 1) {
2137			/* Dequeue the rest of chain. */
2138			if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
2139				continue;
2140		}
2141
2142		kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
2143
2144		/*
2145		 * Save an endian swapped version of the header prior to it
2146		 * being stripped. The header is always at the start of the
2147		 * mbuf data. num_buffers was already saved (and not needed)
2148		 * so use the standard header.
2149		 */
2150		hdr = mtod(m, struct virtio_net_hdr *);
2151		lhdr.flags = hdr->flags;
2152		lhdr.gso_type = hdr->gso_type;
2153		lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
2154		lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
2155		lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
2156		lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
2157		m_adj(m, adjsz);
2158
2159		if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
2160			pfil_return_t pfil;
2161
2162			pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL);
2163			switch (pfil) {
2164			case PFIL_DROPPED:
2165			case PFIL_CONSUMED:
2166				continue;
2167			default:
2168				KASSERT(pfil == PFIL_PASS,
2169				    ("Filter returned %d!", pfil));
2170			}
2171		}
2172
2173		vtnet_rxq_input(rxq, m, &lhdr);
2174	}
2175
2176	if (deq > 0) {
2177#if defined(INET) || defined(INET6)
2178		if (vtnet_software_lro(sc))
2179			tcp_lro_flush_all(&rxq->vtnrx_lro);
2180#endif
2181		virtqueue_notify(vq);
2182	}
2183	CURVNET_RESTORE();
2184
2185	return (count > 0 ? 0 : EAGAIN);
2186}
2187
2188static void
2189vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
2190{
2191	struct vtnet_softc *sc;
2192	if_t ifp;
2193	u_int more;
2194#ifdef DEV_NETMAP
2195	int nmirq;
2196#endif /* DEV_NETMAP */
2197
2198	sc = rxq->vtnrx_sc;
2199	ifp = sc->vtnet_ifp;
2200
2201	if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
2202		/*
2203		 * Ignore this interrupt. Either this is a spurious interrupt
2204		 * or multiqueue without per-VQ MSIX so every queue needs to
2205		 * be polled (a brain dead configuration we could try harder
2206		 * to avoid).
2207		 */
2208		vtnet_rxq_disable_intr(rxq);
2209		return;
2210	}
2211
2212	VTNET_RXQ_LOCK(rxq);
2213
2214#ifdef DEV_NETMAP
2215	/*
2216	 * We call netmap_rx_irq() under lock to prevent concurrent calls.
2217	 * This is not necessary to serialize the access to the RX vq, but
2218	 * rather to avoid races that may happen if this interface is
2219	 * attached to a VALE switch, which would cause received packets
2220	 * to stall in the RX queue (nm_kr_tryget() could find the kring
2221	 * busy when called from netmap_bwrap_intr_notify()).
2222	 */
2223	nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2224	if (nmirq != NM_IRQ_PASS) {
2225		VTNET_RXQ_UNLOCK(rxq);
2226		if (nmirq == NM_IRQ_RESCHED) {
2227			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2228		}
2229		return;
2230	}
2231#endif /* DEV_NETMAP */
2232
2233again:
2234	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2235		VTNET_RXQ_UNLOCK(rxq);
2236		return;
2237	}
2238
2239	more = vtnet_rxq_eof(rxq);
2240	if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2241		if (!more)
2242			vtnet_rxq_disable_intr(rxq);
2243		/*
2244		 * This is an occasional condition or race (when !more),
2245		 * so retry a few times before scheduling the taskqueue.
2246		 */
2247		if (tries-- > 0)
2248			goto again;
2249
2250		rxq->vtnrx_stats.vrxs_rescheduled++;
2251		VTNET_RXQ_UNLOCK(rxq);
2252		taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2253	} else
2254		VTNET_RXQ_UNLOCK(rxq);
2255}
2256
2257static void
2258vtnet_rx_vq_intr(void *xrxq)
2259{
2260	struct vtnet_rxq *rxq;
2261
2262	rxq = xrxq;
2263	vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2264}
2265
2266static void
2267vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
2268{
2269	struct vtnet_rxq *rxq;
2270
2271	rxq = xrxq;
2272	vtnet_rx_vq_process(rxq, 0);
2273}
2274
2275static int
2276vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2277{
2278	struct vtnet_softc *sc;
2279	int threshold;
2280
2281	sc = txq->vtntx_sc;
2282
2283	/*
2284	 * The Tx interrupt is disabled until the queue free count falls
2285	 * below our threshold. Completed frames are drained from the Tx
2286	 * virtqueue before transmitting new frames and in the watchdog
2287	 * callout, so the frequency of Tx interrupts is greatly reduced,
2288	 * at the cost of not freeing mbufs as quickly as they otherwise
2289	 * would be.
2290	 */
2291	threshold = virtqueue_size(txq->vtntx_vq) / 4;
2292
2293	/*
2294	 * Without indirect descriptors, leave enough room for the most
2295	 * segments we handle.
2296	 */
2297	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2298	    threshold < sc->vtnet_tx_nsegs)
2299		threshold = sc->vtnet_tx_nsegs;
2300
2301	return (threshold);
2302}
2303
2304static int
2305vtnet_txq_below_threshold(struct vtnet_txq *txq)
2306{
2307	struct virtqueue *vq;
2308
2309	vq = txq->vtntx_vq;
2310
2311	return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2312}
2313
2314static int
2315vtnet_txq_notify(struct vtnet_txq *txq)
2316{
2317	struct virtqueue *vq;
2318
2319	vq = txq->vtntx_vq;
2320
2321	txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2322	virtqueue_notify(vq);
2323
2324	if (vtnet_txq_enable_intr(txq) == 0)
2325		return (0);
2326
2327	/*
2328	 * Drain frames that were completed since last checked. If this
2329	 * causes the queue to go above the threshold, the caller should
2330	 * continue transmitting.
2331	 */
2332	if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2333		virtqueue_disable_intr(vq);
2334		return (1);
2335	}
2336
2337	return (0);
2338}
2339
2340static void
2341vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2342{
2343	struct virtqueue *vq;
2344	struct vtnet_tx_header *txhdr;
2345	int last;
2346#ifdef DEV_NETMAP
2347	struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2348							txq->vtntx_id, NR_TX);
2349#else  /* !DEV_NETMAP */
2350	void *kring = NULL;
2351#endif /* !DEV_NETMAP */
2352
2353	vq = txq->vtntx_vq;
2354	last = 0;
2355
2356	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2357		if (kring == NULL) {
2358			m_freem(txhdr->vth_mbuf);
2359			uma_zfree(vtnet_tx_header_zone, txhdr);
2360		}
2361	}
2362
2363	KASSERT(virtqueue_empty(vq),
2364	    ("%s: mbufs remaining in tx queue %p", __func__, txq));
2365}
2366
2367/*
2368 * BMV: This can go away once we finally have offsets in the mbuf header.
2369 */
2370static int
2371vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2372    int *proto, int *start)
2373{
2374	struct vtnet_softc *sc;
2375	struct ether_vlan_header *evh;
2376#if defined(INET) || defined(INET6)
2377	int offset;
2378#endif
2379
2380	sc = txq->vtntx_sc;
2381
2382	evh = mtod(m, struct ether_vlan_header *);
2383	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2384		/* BMV: We should handle nested VLAN tags too. */
2385		*etype = ntohs(evh->evl_proto);
2386#if defined(INET) || defined(INET6)
2387		offset = sizeof(struct ether_vlan_header);
2388#endif
2389	} else {
2390		*etype = ntohs(evh->evl_encap_proto);
2391#if defined(INET) || defined(INET6)
2392		offset = sizeof(struct ether_header);
2393#endif
2394	}
2395
2396	switch (*etype) {
2397#if defined(INET)
2398	case ETHERTYPE_IP: {
2399		struct ip *ip, iphdr;
2400		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2401			m_copydata(m, offset, sizeof(struct ip),
2402			    (caddr_t) &iphdr);
2403			ip = &iphdr;
2404		} else
2405			ip = (struct ip *)(m->m_data + offset);
2406		*proto = ip->ip_p;
2407		*start = offset + (ip->ip_hl << 2);
2408		break;
2409	}
2410#endif
2411#if defined(INET6)
2412	case ETHERTYPE_IPV6:
2413		*proto = -1;
2414		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2415		/* Assert the network stack sent us a valid packet. */
2416		KASSERT(*start > offset,
2417		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2418		    *start, offset, *proto));
2419		break;
2420#endif
2421	default:
2422		sc->vtnet_stats.tx_csum_unknown_ethtype++;
2423		return (EINVAL);
2424	}
2425
2426	return (0);
2427}
2428
2429static int
2430vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2431    int offset, struct virtio_net_hdr *hdr)
2432{
2433	static struct timeval lastecn;
2434	static int curecn;
2435	struct vtnet_softc *sc;
2436	struct tcphdr *tcp, tcphdr;
2437
2438	sc = txq->vtntx_sc;
2439
2440	if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2441		m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2442		tcp = &tcphdr;
2443	} else
2444		tcp = (struct tcphdr *)(m->m_data + offset);
2445
2446	hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2447	hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2448	hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2449	    VIRTIO_NET_HDR_GSO_TCPV6;
2450
2451	if (__predict_false(tcp->th_flags & TH_CWR)) {
2452		/*
2453		 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
2454		 * FreeBSD, ECN support is not on a per-interface basis,
2455		 * but globally via the net.inet.tcp.ecn.enable sysctl
2456		 * knob. The default is off.
2457		 */
2458		if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2459			if (ppsratecheck(&lastecn, &curecn, 1))
2460				if_printf(sc->vtnet_ifp,
2461				    "TSO with ECN not negotiated with host\n");
2462			return (ENOTSUP);
2463		}
2464		hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2465	}
2466
2467	txq->vtntx_stats.vtxs_tso++;
2468
2469	return (0);
2470}
2471
2472static struct mbuf *
2473vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2474    struct virtio_net_hdr *hdr)
2475{
2476	struct vtnet_softc *sc;
2477	int flags, etype, csum_start, proto, error;
2478
2479	sc = txq->vtntx_sc;
2480	flags = m->m_pkthdr.csum_flags;
2481
2482	error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2483	if (error)
2484		goto drop;
2485
2486	if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
2487		/* Sanity check the parsed mbuf matches the offload flags. */
2488		if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
2489		    etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
2490		    && etype != ETHERTYPE_IPV6))) {
2491			sc->vtnet_stats.tx_csum_proto_mismatch++;
2492			goto drop;
2493		}
2494
2495		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2496		hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2497		hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2498		txq->vtntx_stats.vtxs_csum++;
2499	}
2500
2501	if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
2502		/*
2503		 * Sanity check the parsed mbuf IP protocol is TCP, and
2504		 * VirtIO TSO reqires the checksum offloading above.
2505		 */
2506		if (__predict_false(proto != IPPROTO_TCP)) {
2507			sc->vtnet_stats.tx_tso_not_tcp++;
2508			goto drop;
2509		} else if (__predict_false((hdr->flags &
2510		    VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
2511			sc->vtnet_stats.tx_tso_without_csum++;
2512			goto drop;
2513		}
2514
2515		error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2516		if (error)
2517			goto drop;
2518	}
2519
2520	return (m);
2521
2522drop:
2523	m_freem(m);
2524	return (NULL);
2525}
2526
2527static int
2528vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2529    struct vtnet_tx_header *txhdr)
2530{
2531	struct vtnet_softc *sc;
2532	struct virtqueue *vq;
2533	struct sglist *sg;
2534	struct mbuf *m;
2535	int error;
2536
2537	sc = txq->vtntx_sc;
2538	vq = txq->vtntx_vq;
2539	sg = txq->vtntx_sg;
2540	m = *m_head;
2541
2542	sglist_reset(sg);
2543	error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2544	if (error != 0 || sg->sg_nseg != 1) {
2545		KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2546		    __func__, error, sg->sg_nseg));
2547		goto fail;
2548	}
2549
2550	error = sglist_append_mbuf(sg, m);
2551	if (error) {
2552		m = m_defrag(m, M_NOWAIT);
2553		if (m == NULL)
2554			goto fail;
2555
2556		*m_head = m;
2557		sc->vtnet_stats.tx_defragged++;
2558
2559		error = sglist_append_mbuf(sg, m);
2560		if (error)
2561			goto fail;
2562	}
2563
2564	txhdr->vth_mbuf = m;
2565	error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2566
2567	return (error);
2568
2569fail:
2570	sc->vtnet_stats.tx_defrag_failed++;
2571	m_freem(*m_head);
2572	*m_head = NULL;
2573
2574	return (ENOBUFS);
2575}
2576
2577static int
2578vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2579{
2580	struct vtnet_tx_header *txhdr;
2581	struct virtio_net_hdr *hdr;
2582	struct mbuf *m;
2583	int error;
2584
2585	m = *m_head;
2586	M_ASSERTPKTHDR(m);
2587
2588	txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2589	if (txhdr == NULL) {
2590		m_freem(m);
2591		*m_head = NULL;
2592		return (ENOMEM);
2593	}
2594
2595	/*
2596	 * Always use the non-mergeable header, regardless if mergable headers
2597	 * were negotiated, because for transmit num_buffers is always zero.
2598	 * The vtnet_hdr_size is used to enqueue the right header size segment.
2599	 */
2600	hdr = &txhdr->vth_uhdr.hdr;
2601
2602	if (m->m_flags & M_VLANTAG) {
2603		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2604		if ((*m_head = m) == NULL) {
2605			error = ENOBUFS;
2606			goto fail;
2607		}
2608		m->m_flags &= ~M_VLANTAG;
2609	}
2610
2611	if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2612		m = vtnet_txq_offload(txq, m, hdr);
2613		if ((*m_head = m) == NULL) {
2614			error = ENOBUFS;
2615			goto fail;
2616		}
2617	}
2618
2619	error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2620fail:
2621	if (error)
2622		uma_zfree(vtnet_tx_header_zone, txhdr);
2623
2624	return (error);
2625}
2626
2627#ifdef VTNET_LEGACY_TX
2628
2629static void
2630vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
2631{
2632	struct vtnet_softc *sc;
2633	struct virtqueue *vq;
2634	struct mbuf *m0;
2635	int tries, enq;
2636
2637	sc = txq->vtntx_sc;
2638	vq = txq->vtntx_vq;
2639	tries = 0;
2640
2641	VTNET_TXQ_LOCK_ASSERT(txq);
2642
2643	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2644	    sc->vtnet_link_active == 0)
2645		return;
2646
2647	vtnet_txq_eof(txq);
2648
2649again:
2650	enq = 0;
2651
2652	while (!if_sendq_empty(ifp)) {
2653		if (virtqueue_full(vq))
2654			break;
2655
2656		m0 = if_dequeue(ifp);
2657		if (m0 == NULL)
2658			break;
2659
2660		if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2661			if (m0 != NULL)
2662				if_sendq_prepend(ifp, m0);
2663			break;
2664		}
2665
2666		enq++;
2667		ETHER_BPF_MTAP(ifp, m0);
2668	}
2669
2670	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2671		if (tries++ < VTNET_NOTIFY_RETRIES)
2672			goto again;
2673
2674		txq->vtntx_stats.vtxs_rescheduled++;
2675		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2676	}
2677}
2678
2679static void
2680vtnet_start(if_t ifp)
2681{
2682	struct vtnet_softc *sc;
2683	struct vtnet_txq *txq;
2684
2685	sc = if_getsoftc(ifp);
2686	txq = &sc->vtnet_txqs[0];
2687
2688	VTNET_TXQ_LOCK(txq);
2689	vtnet_start_locked(txq, ifp);
2690	VTNET_TXQ_UNLOCK(txq);
2691}
2692
2693#else /* !VTNET_LEGACY_TX */
2694
2695static int
2696vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2697{
2698	struct vtnet_softc *sc;
2699	struct virtqueue *vq;
2700	struct buf_ring *br;
2701	if_t ifp;
2702	int enq, tries, error;
2703
2704	sc = txq->vtntx_sc;
2705	vq = txq->vtntx_vq;
2706	br = txq->vtntx_br;
2707	ifp = sc->vtnet_ifp;
2708	tries = 0;
2709	error = 0;
2710
2711	VTNET_TXQ_LOCK_ASSERT(txq);
2712
2713	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2714	    sc->vtnet_link_active == 0) {
2715		if (m != NULL)
2716			error = drbr_enqueue(ifp, br, m);
2717		return (error);
2718	}
2719
2720	if (m != NULL) {
2721		error = drbr_enqueue(ifp, br, m);
2722		if (error)
2723			return (error);
2724	}
2725
2726	vtnet_txq_eof(txq);
2727
2728again:
2729	enq = 0;
2730
2731	while ((m = drbr_peek(ifp, br)) != NULL) {
2732		if (virtqueue_full(vq)) {
2733			drbr_putback(ifp, br, m);
2734			break;
2735		}
2736
2737		if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2738			if (m != NULL)
2739				drbr_putback(ifp, br, m);
2740			else
2741				drbr_advance(ifp, br);
2742			break;
2743		}
2744		drbr_advance(ifp, br);
2745
2746		enq++;
2747		ETHER_BPF_MTAP(ifp, m);
2748	}
2749
2750	if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2751		if (tries++ < VTNET_NOTIFY_RETRIES)
2752			goto again;
2753
2754		txq->vtntx_stats.vtxs_rescheduled++;
2755		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2756	}
2757
2758	return (0);
2759}
2760
2761static int
2762vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
2763{
2764	struct vtnet_softc *sc;
2765	struct vtnet_txq *txq;
2766	int i, npairs, error;
2767
2768	sc = if_getsoftc(ifp);
2769	npairs = sc->vtnet_act_vq_pairs;
2770
2771	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2772		i = m->m_pkthdr.flowid % npairs;
2773	else
2774		i = curcpu % npairs;
2775
2776	txq = &sc->vtnet_txqs[i];
2777
2778	if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2779		error = vtnet_txq_mq_start_locked(txq, m);
2780		VTNET_TXQ_UNLOCK(txq);
2781	} else {
2782		error = drbr_enqueue(ifp, txq->vtntx_br, m);
2783		taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2784	}
2785
2786	return (error);
2787}
2788
2789static void
2790vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
2791{
2792	struct vtnet_softc *sc;
2793	struct vtnet_txq *txq;
2794
2795	txq = xtxq;
2796	sc = txq->vtntx_sc;
2797
2798	VTNET_TXQ_LOCK(txq);
2799	if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2800		vtnet_txq_mq_start_locked(txq, NULL);
2801	VTNET_TXQ_UNLOCK(txq);
2802}
2803
2804#endif /* VTNET_LEGACY_TX */
2805
2806static void
2807vtnet_txq_start(struct vtnet_txq *txq)
2808{
2809	struct vtnet_softc *sc;
2810	if_t ifp;
2811
2812	sc = txq->vtntx_sc;
2813	ifp = sc->vtnet_ifp;
2814
2815#ifdef VTNET_LEGACY_TX
2816	if (!if_sendq_empty(ifp))
2817		vtnet_start_locked(txq, ifp);
2818#else
2819	if (!drbr_empty(ifp, txq->vtntx_br))
2820		vtnet_txq_mq_start_locked(txq, NULL);
2821#endif
2822}
2823
2824static void
2825vtnet_txq_tq_intr(void *xtxq, int pending __unused)
2826{
2827	struct vtnet_softc *sc;
2828	struct vtnet_txq *txq;
2829	if_t ifp;
2830
2831	txq = xtxq;
2832	sc = txq->vtntx_sc;
2833	ifp = sc->vtnet_ifp;
2834
2835	VTNET_TXQ_LOCK(txq);
2836
2837	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2838		VTNET_TXQ_UNLOCK(txq);
2839		return;
2840	}
2841
2842	vtnet_txq_eof(txq);
2843	vtnet_txq_start(txq);
2844
2845	VTNET_TXQ_UNLOCK(txq);
2846}
2847
2848static int
2849vtnet_txq_eof(struct vtnet_txq *txq)
2850{
2851	struct virtqueue *vq;
2852	struct vtnet_tx_header *txhdr;
2853	struct mbuf *m;
2854	int deq;
2855
2856	vq = txq->vtntx_vq;
2857	deq = 0;
2858	VTNET_TXQ_LOCK_ASSERT(txq);
2859
2860	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2861		m = txhdr->vth_mbuf;
2862		deq++;
2863
2864		txq->vtntx_stats.vtxs_opackets++;
2865		txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2866		if (m->m_flags & M_MCAST)
2867			txq->vtntx_stats.vtxs_omcasts++;
2868
2869		m_freem(m);
2870		uma_zfree(vtnet_tx_header_zone, txhdr);
2871	}
2872
2873	if (virtqueue_empty(vq))
2874		txq->vtntx_watchdog = 0;
2875
2876	return (deq);
2877}
2878
2879static void
2880vtnet_tx_vq_intr(void *xtxq)
2881{
2882	struct vtnet_softc *sc;
2883	struct vtnet_txq *txq;
2884	if_t ifp;
2885
2886	txq = xtxq;
2887	sc = txq->vtntx_sc;
2888	ifp = sc->vtnet_ifp;
2889
2890	if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2891		/*
2892		 * Ignore this interrupt. Either this is a spurious interrupt
2893		 * or multiqueue without per-VQ MSIX so every queue needs to
2894		 * be polled (a brain dead configuration we could try harder
2895		 * to avoid).
2896		 */
2897		vtnet_txq_disable_intr(txq);
2898		return;
2899	}
2900
2901#ifdef DEV_NETMAP
2902	if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2903		return;
2904#endif /* DEV_NETMAP */
2905
2906	VTNET_TXQ_LOCK(txq);
2907
2908	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2909		VTNET_TXQ_UNLOCK(txq);
2910		return;
2911	}
2912
2913	vtnet_txq_eof(txq);
2914	vtnet_txq_start(txq);
2915
2916	VTNET_TXQ_UNLOCK(txq);
2917}
2918
2919static void
2920vtnet_tx_start_all(struct vtnet_softc *sc)
2921{
2922	struct vtnet_txq *txq;
2923	int i;
2924
2925	VTNET_CORE_LOCK_ASSERT(sc);
2926
2927	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2928		txq = &sc->vtnet_txqs[i];
2929
2930		VTNET_TXQ_LOCK(txq);
2931		vtnet_txq_start(txq);
2932		VTNET_TXQ_UNLOCK(txq);
2933	}
2934}
2935
2936#ifndef VTNET_LEGACY_TX
2937static void
2938vtnet_qflush(if_t ifp)
2939{
2940	struct vtnet_softc *sc;
2941	struct vtnet_txq *txq;
2942	struct mbuf *m;
2943	int i;
2944
2945	sc = if_getsoftc(ifp);
2946
2947	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2948		txq = &sc->vtnet_txqs[i];
2949
2950		VTNET_TXQ_LOCK(txq);
2951		while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2952			m_freem(m);
2953		VTNET_TXQ_UNLOCK(txq);
2954	}
2955
2956	if_qflush(ifp);
2957}
2958#endif
2959
2960static int
2961vtnet_watchdog(struct vtnet_txq *txq)
2962{
2963	if_t ifp;
2964
2965	ifp = txq->vtntx_sc->vtnet_ifp;
2966
2967	VTNET_TXQ_LOCK(txq);
2968	if (txq->vtntx_watchdog == 1) {
2969		/*
2970		 * Only drain completed frames if the watchdog is about to
2971		 * expire. If any frames were drained, there may be enough
2972		 * free descriptors now available to transmit queued frames.
2973		 * In that case, the timer will immediately be decremented
2974		 * below, but the timeout is generous enough that should not
2975		 * be a problem.
2976		 */
2977		if (vtnet_txq_eof(txq) != 0)
2978			vtnet_txq_start(txq);
2979	}
2980
2981	if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2982		VTNET_TXQ_UNLOCK(txq);
2983		return (0);
2984	}
2985	VTNET_TXQ_UNLOCK(txq);
2986
2987	if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2988	return (1);
2989}
2990
2991static void
2992vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
2993    struct vtnet_txq_stats *txacc)
2994{
2995
2996	bzero(rxacc, sizeof(struct vtnet_rxq_stats));
2997	bzero(txacc, sizeof(struct vtnet_txq_stats));
2998
2999	for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3000		struct vtnet_rxq_stats *rxst;
3001		struct vtnet_txq_stats *txst;
3002
3003		rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
3004		rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
3005		rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
3006		rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
3007		rxacc->vrxs_csum += rxst->vrxs_csum;
3008		rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
3009		rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
3010
3011		txst = &sc->vtnet_txqs[i].vtntx_stats;
3012		txacc->vtxs_opackets += txst->vtxs_opackets;
3013		txacc->vtxs_obytes += txst->vtxs_obytes;
3014		txacc->vtxs_csum += txst->vtxs_csum;
3015		txacc->vtxs_tso += txst->vtxs_tso;
3016		txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
3017	}
3018}
3019
3020static uint64_t
3021vtnet_get_counter(if_t ifp, ift_counter cnt)
3022{
3023	struct vtnet_softc *sc;
3024	struct vtnet_rxq_stats rxaccum;
3025	struct vtnet_txq_stats txaccum;
3026
3027	sc = if_getsoftc(ifp);
3028	vtnet_accum_stats(sc, &rxaccum, &txaccum);
3029
3030	switch (cnt) {
3031	case IFCOUNTER_IPACKETS:
3032		return (rxaccum.vrxs_ipackets);
3033	case IFCOUNTER_IQDROPS:
3034		return (rxaccum.vrxs_iqdrops);
3035	case IFCOUNTER_IERRORS:
3036		return (rxaccum.vrxs_ierrors);
3037	case IFCOUNTER_OPACKETS:
3038		return (txaccum.vtxs_opackets);
3039#ifndef VTNET_LEGACY_TX
3040	case IFCOUNTER_OBYTES:
3041		return (txaccum.vtxs_obytes);
3042	case IFCOUNTER_OMCASTS:
3043		return (txaccum.vtxs_omcasts);
3044#endif
3045	default:
3046		return (if_get_counter_default(ifp, cnt));
3047	}
3048}
3049
3050static void
3051vtnet_tick(void *xsc)
3052{
3053	struct vtnet_softc *sc;
3054	if_t ifp;
3055	int i, timedout;
3056
3057	sc = xsc;
3058	ifp = sc->vtnet_ifp;
3059	timedout = 0;
3060
3061	VTNET_CORE_LOCK_ASSERT(sc);
3062
3063	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3064		timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
3065
3066	if (timedout != 0) {
3067		if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3068		vtnet_init_locked(sc, 0);
3069	} else
3070		callout_schedule(&sc->vtnet_tick_ch, hz);
3071}
3072
3073static void
3074vtnet_start_taskqueues(struct vtnet_softc *sc)
3075{
3076	device_t dev;
3077	struct vtnet_rxq *rxq;
3078	struct vtnet_txq *txq;
3079	int i, error;
3080
3081	dev = sc->vtnet_dev;
3082
3083	/*
3084	 * Errors here are very difficult to recover from - we cannot
3085	 * easily fail because, if this is during boot, we will hang
3086	 * when freeing any successfully started taskqueues because
3087	 * the scheduler isn't up yet.
3088	 *
3089	 * Most drivers just ignore the return value - it only fails
3090	 * with ENOMEM so an error is not likely.
3091	 */
3092	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
3093		rxq = &sc->vtnet_rxqs[i];
3094		error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
3095		    "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
3096		if (error) {
3097			device_printf(dev, "failed to start rx taskq %d\n",
3098			    rxq->vtnrx_id);
3099		}
3100
3101		txq = &sc->vtnet_txqs[i];
3102		error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
3103		    "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
3104		if (error) {
3105			device_printf(dev, "failed to start tx taskq %d\n",
3106			    txq->vtntx_id);
3107		}
3108	}
3109}
3110
3111static void
3112vtnet_free_taskqueues(struct vtnet_softc *sc)
3113{
3114	struct vtnet_rxq *rxq;
3115	struct vtnet_txq *txq;
3116	int i;
3117
3118	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3119		rxq = &sc->vtnet_rxqs[i];
3120		if (rxq->vtnrx_tq != NULL) {
3121			taskqueue_free(rxq->vtnrx_tq);
3122			rxq->vtnrx_tq = NULL;
3123		}
3124
3125		txq = &sc->vtnet_txqs[i];
3126		if (txq->vtntx_tq != NULL) {
3127			taskqueue_free(txq->vtntx_tq);
3128			txq->vtntx_tq = NULL;
3129		}
3130	}
3131}
3132
3133static void
3134vtnet_drain_taskqueues(struct vtnet_softc *sc)
3135{
3136	struct vtnet_rxq *rxq;
3137	struct vtnet_txq *txq;
3138	int i;
3139
3140	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3141		rxq = &sc->vtnet_rxqs[i];
3142		if (rxq->vtnrx_tq != NULL)
3143			taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
3144
3145		txq = &sc->vtnet_txqs[i];
3146		if (txq->vtntx_tq != NULL) {
3147			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
3148#ifndef VTNET_LEGACY_TX
3149			taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
3150#endif
3151		}
3152	}
3153}
3154
3155static void
3156vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
3157{
3158	struct vtnet_rxq *rxq;
3159	struct vtnet_txq *txq;
3160	int i;
3161
3162	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3163		rxq = &sc->vtnet_rxqs[i];
3164		vtnet_rxq_free_mbufs(rxq);
3165
3166		txq = &sc->vtnet_txqs[i];
3167		vtnet_txq_free_mbufs(txq);
3168	}
3169}
3170
3171static void
3172vtnet_stop_rendezvous(struct vtnet_softc *sc)
3173{
3174	struct vtnet_rxq *rxq;
3175	struct vtnet_txq *txq;
3176	int i;
3177
3178	VTNET_CORE_LOCK_ASSERT(sc);
3179
3180	/*
3181	 * Lock and unlock the per-queue mutex so we known the stop
3182	 * state is visible. Doing only the active queues should be
3183	 * sufficient, but it does not cost much extra to do all the
3184	 * queues.
3185	 */
3186	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3187		rxq = &sc->vtnet_rxqs[i];
3188		VTNET_RXQ_LOCK(rxq);
3189		VTNET_RXQ_UNLOCK(rxq);
3190
3191		txq = &sc->vtnet_txqs[i];
3192		VTNET_TXQ_LOCK(txq);
3193		VTNET_TXQ_UNLOCK(txq);
3194	}
3195}
3196
3197static void
3198vtnet_stop(struct vtnet_softc *sc)
3199{
3200	device_t dev;
3201	if_t ifp;
3202
3203	dev = sc->vtnet_dev;
3204	ifp = sc->vtnet_ifp;
3205
3206	VTNET_CORE_LOCK_ASSERT(sc);
3207
3208	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3209	sc->vtnet_link_active = 0;
3210	callout_stop(&sc->vtnet_tick_ch);
3211
3212	/* Only advisory. */
3213	vtnet_disable_interrupts(sc);
3214
3215#ifdef DEV_NETMAP
3216	/* Stop any pending txsync/rxsync and disable them. */
3217	netmap_disable_all_rings(ifp);
3218#endif /* DEV_NETMAP */
3219
3220	/*
3221	 * Stop the host adapter. This resets it to the pre-initialized
3222	 * state. It will not generate any interrupts until after it is
3223	 * reinitialized.
3224	 */
3225	virtio_stop(dev);
3226	vtnet_stop_rendezvous(sc);
3227
3228	vtnet_drain_rxtx_queues(sc);
3229	sc->vtnet_act_vq_pairs = 1;
3230}
3231
3232static int
3233vtnet_virtio_reinit(struct vtnet_softc *sc)
3234{
3235	device_t dev;
3236	if_t ifp;
3237	uint64_t features;
3238	int error;
3239
3240	dev = sc->vtnet_dev;
3241	ifp = sc->vtnet_ifp;
3242	features = sc->vtnet_negotiated_features;
3243
3244	/*
3245	 * Re-negotiate with the host, removing any disabled receive
3246	 * features. Transmit features are disabled only on our side
3247	 * via if_capenable and if_hwassist.
3248	 */
3249
3250	if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
3251		features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
3252
3253	if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
3254		features &= ~VTNET_LRO_FEATURES;
3255
3256	if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
3257		features &= ~VIRTIO_NET_F_CTRL_VLAN;
3258
3259	error = virtio_reinit(dev, features);
3260	if (error) {
3261		device_printf(dev, "virtio reinit error %d\n", error);
3262		return (error);
3263	}
3264
3265	sc->vtnet_features = features;
3266	virtio_reinit_complete(dev);
3267
3268	return (0);
3269}
3270
3271static void
3272vtnet_init_rx_filters(struct vtnet_softc *sc)
3273{
3274	if_t ifp;
3275
3276	ifp = sc->vtnet_ifp;
3277
3278	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3279		vtnet_rx_filter(sc);
3280		vtnet_rx_filter_mac(sc);
3281	}
3282
3283	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3284		vtnet_rx_filter_vlan(sc);
3285}
3286
3287static int
3288vtnet_init_rx_queues(struct vtnet_softc *sc)
3289{
3290	device_t dev;
3291	if_t ifp;
3292	struct vtnet_rxq *rxq;
3293	int i, clustersz, error;
3294
3295	dev = sc->vtnet_dev;
3296	ifp = sc->vtnet_ifp;
3297
3298	clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
3299	sc->vtnet_rx_clustersz = clustersz;
3300
3301	if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3302		sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3303		    VTNET_MAX_RX_SIZE, clustersz);
3304		KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3305		    ("%s: too many rx mbufs %d for %d segments", __func__,
3306		    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3307	} else
3308		sc->vtnet_rx_nmbufs = 1;
3309
3310	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3311		rxq = &sc->vtnet_rxqs[i];
3312
3313		/* Hold the lock to satisfy asserts. */
3314		VTNET_RXQ_LOCK(rxq);
3315		error = vtnet_rxq_populate(rxq);
3316		VTNET_RXQ_UNLOCK(rxq);
3317
3318		if (error) {
3319			device_printf(dev, "cannot populate Rx queue %d\n", i);
3320			return (error);
3321		}
3322	}
3323
3324	return (0);
3325}
3326
3327static int
3328vtnet_init_tx_queues(struct vtnet_softc *sc)
3329{
3330	struct vtnet_txq *txq;
3331	int i;
3332
3333	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3334		txq = &sc->vtnet_txqs[i];
3335		txq->vtntx_watchdog = 0;
3336		txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3337#ifdef DEV_NETMAP
3338		netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3339#endif /* DEV_NETMAP */
3340	}
3341
3342	return (0);
3343}
3344
3345static int
3346vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3347{
3348	int error;
3349
3350	error = vtnet_init_rx_queues(sc);
3351	if (error)
3352		return (error);
3353
3354	error = vtnet_init_tx_queues(sc);
3355	if (error)
3356		return (error);
3357
3358	return (0);
3359}
3360
3361static void
3362vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3363{
3364	device_t dev;
3365	int npairs;
3366
3367	dev = sc->vtnet_dev;
3368
3369	if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3370		sc->vtnet_act_vq_pairs = 1;
3371		return;
3372	}
3373
3374	npairs = sc->vtnet_req_vq_pairs;
3375
3376	if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3377		device_printf(dev, "cannot set active queue pairs to %d, "
3378		    "falling back to 1 queue pair\n", npairs);
3379		npairs = 1;
3380	}
3381
3382	sc->vtnet_act_vq_pairs = npairs;
3383}
3384
3385static void
3386vtnet_update_rx_offloads(struct vtnet_softc *sc)
3387{
3388	if_t ifp;
3389	uint64_t features;
3390	int error;
3391
3392	ifp = sc->vtnet_ifp;
3393	features = sc->vtnet_features;
3394
3395	VTNET_CORE_LOCK_ASSERT(sc);
3396
3397	if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
3398		if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
3399			features |= VIRTIO_NET_F_GUEST_CSUM;
3400		else
3401			features &= ~VIRTIO_NET_F_GUEST_CSUM;
3402	}
3403
3404	if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
3405		if (if_getcapenable(ifp) & IFCAP_LRO)
3406			features |= VTNET_LRO_FEATURES;
3407		else
3408			features &= ~VTNET_LRO_FEATURES;
3409	}
3410
3411	error = vtnet_ctrl_guest_offloads(sc,
3412	    features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
3413		        VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN  |
3414			VIRTIO_NET_F_GUEST_UFO));
3415	if (error) {
3416		device_printf(sc->vtnet_dev,
3417		    "%s: cannot update Rx features\n", __func__);
3418		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3419			if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3420			vtnet_init_locked(sc, 0);
3421		}
3422	} else
3423		sc->vtnet_features = features;
3424}
3425
3426static int
3427vtnet_reinit(struct vtnet_softc *sc)
3428{
3429	if_t ifp;
3430	int error;
3431
3432	ifp = sc->vtnet_ifp;
3433
3434	bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3435
3436	error = vtnet_virtio_reinit(sc);
3437	if (error)
3438		return (error);
3439
3440	vtnet_set_macaddr(sc);
3441	vtnet_set_active_vq_pairs(sc);
3442
3443	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3444		vtnet_init_rx_filters(sc);
3445
3446	if_sethwassist(ifp, 0);
3447	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
3448		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
3449	if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
3450		if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
3451	if (if_getcapenable(ifp) & IFCAP_TSO4)
3452		if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
3453	if (if_getcapenable(ifp) & IFCAP_TSO6)
3454		if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
3455
3456	error = vtnet_init_rxtx_queues(sc);
3457	if (error)
3458		return (error);
3459
3460	return (0);
3461}
3462
3463static void
3464vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3465{
3466	if_t ifp;
3467
3468	ifp = sc->vtnet_ifp;
3469
3470	VTNET_CORE_LOCK_ASSERT(sc);
3471
3472	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
3473		return;
3474
3475	vtnet_stop(sc);
3476
3477#ifdef DEV_NETMAP
3478	/* Once stopped we can update the netmap flags, if necessary. */
3479	switch (init_mode) {
3480	case VTNET_INIT_NETMAP_ENTER:
3481		nm_set_native_flags(NA(ifp));
3482		break;
3483	case VTNET_INIT_NETMAP_EXIT:
3484		nm_clear_native_flags(NA(ifp));
3485		break;
3486	}
3487#endif /* DEV_NETMAP */
3488
3489	if (vtnet_reinit(sc) != 0) {
3490		vtnet_stop(sc);
3491		return;
3492	}
3493
3494	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
3495	vtnet_update_link_status(sc);
3496	vtnet_enable_interrupts(sc);
3497	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3498
3499#ifdef DEV_NETMAP
3500	/* Re-enable txsync/rxsync. */
3501	netmap_enable_all_rings(ifp);
3502#endif /* DEV_NETMAP */
3503}
3504
3505static void
3506vtnet_init(void *xsc)
3507{
3508	struct vtnet_softc *sc;
3509
3510	sc = xsc;
3511
3512	VTNET_CORE_LOCK(sc);
3513	vtnet_init_locked(sc, 0);
3514	VTNET_CORE_UNLOCK(sc);
3515}
3516
3517static void
3518vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3519{
3520
3521	/*
3522	 * The control virtqueue is only polled and therefore it should
3523	 * already be empty.
3524	 */
3525	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3526	    ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3527}
3528
3529static void
3530vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3531    struct sglist *sg, int readable, int writable)
3532{
3533	struct virtqueue *vq;
3534
3535	vq = sc->vtnet_ctrl_vq;
3536
3537	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3538	VTNET_CORE_LOCK_ASSERT(sc);
3539
3540	if (!virtqueue_empty(vq))
3541		return;
3542
3543	/*
3544	 * Poll for the response, but the command is likely completed before
3545	 * returning from the notify.
3546	 */
3547	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0)  {
3548		virtqueue_notify(vq);
3549		virtqueue_poll(vq, NULL);
3550	}
3551}
3552
3553static int
3554vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3555{
3556	struct sglist_seg segs[3];
3557	struct sglist sg;
3558	struct {
3559		struct virtio_net_ctrl_hdr hdr __aligned(2);
3560		uint8_t pad1;
3561		uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3562		uint8_t pad2;
3563		uint8_t ack;
3564	} s;
3565	int error;
3566
3567	error = 0;
3568	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3569
3570	s.hdr.class = VIRTIO_NET_CTRL_MAC;
3571	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3572	bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3573	s.ack = VIRTIO_NET_ERR;
3574
3575	sglist_init(&sg, nitems(segs), segs);
3576	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3577	error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
3578	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3579	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3580
3581	if (error == 0)
3582		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3583
3584	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3585}
3586
3587static int
3588vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
3589{
3590	struct sglist_seg segs[3];
3591	struct sglist sg;
3592	struct {
3593		struct virtio_net_ctrl_hdr hdr __aligned(2);
3594		uint8_t pad1;
3595		uint64_t offloads __aligned(8);
3596		uint8_t pad2;
3597		uint8_t ack;
3598	} s;
3599	int error;
3600
3601	error = 0;
3602	MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3603
3604	s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
3605	s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
3606	s.offloads = vtnet_gtoh64(sc, offloads);
3607	s.ack = VIRTIO_NET_ERR;
3608
3609	sglist_init(&sg, nitems(segs), segs);
3610	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3611	error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
3612	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3613	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3614
3615	if (error == 0)
3616		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3617
3618	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3619}
3620
3621static int
3622vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3623{
3624	struct sglist_seg segs[3];
3625	struct sglist sg;
3626	struct {
3627		struct virtio_net_ctrl_hdr hdr __aligned(2);
3628		uint8_t pad1;
3629		struct virtio_net_ctrl_mq mq __aligned(2);
3630		uint8_t pad2;
3631		uint8_t ack;
3632	} s;
3633	int error;
3634
3635	error = 0;
3636	MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
3637
3638	s.hdr.class = VIRTIO_NET_CTRL_MQ;
3639	s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3640	s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
3641	s.ack = VIRTIO_NET_ERR;
3642
3643	sglist_init(&sg, nitems(segs), segs);
3644	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3645	error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3646	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3647	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3648
3649	if (error == 0)
3650		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3651
3652	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3653}
3654
3655static int
3656vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
3657{
3658	struct sglist_seg segs[3];
3659	struct sglist sg;
3660	struct {
3661		struct virtio_net_ctrl_hdr hdr __aligned(2);
3662		uint8_t pad1;
3663		uint8_t onoff;
3664		uint8_t pad2;
3665		uint8_t ack;
3666	} s;
3667	int error;
3668
3669	error = 0;
3670	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3671
3672	s.hdr.class = VIRTIO_NET_CTRL_RX;
3673	s.hdr.cmd = cmd;
3674	s.onoff = on;
3675	s.ack = VIRTIO_NET_ERR;
3676
3677	sglist_init(&sg, nitems(segs), segs);
3678	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3679	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3680	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3681	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3682
3683	if (error == 0)
3684		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3685
3686	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3687}
3688
3689static int
3690vtnet_set_promisc(struct vtnet_softc *sc, bool on)
3691{
3692	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3693}
3694
3695static int
3696vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
3697{
3698	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3699}
3700
3701static void
3702vtnet_rx_filter(struct vtnet_softc *sc)
3703{
3704	device_t dev;
3705	if_t ifp;
3706
3707	dev = sc->vtnet_dev;
3708	ifp = sc->vtnet_ifp;
3709
3710	VTNET_CORE_LOCK_ASSERT(sc);
3711
3712	if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
3713		device_printf(dev, "cannot %s promiscuous mode\n",
3714		    if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
3715	}
3716
3717	if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
3718		device_printf(dev, "cannot %s all-multicast mode\n",
3719		    if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
3720	}
3721}
3722
3723static u_int
3724vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3725{
3726	struct vtnet_softc *sc = arg;
3727
3728	if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3729		return (0);
3730
3731	if (ucnt < VTNET_MAX_MAC_ENTRIES)
3732		bcopy(LLADDR(sdl),
3733		    &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3734		    ETHER_ADDR_LEN);
3735
3736	return (1);
3737}
3738
3739static u_int
3740vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3741{
3742	struct vtnet_mac_filter *filter = arg;
3743
3744	if (mcnt < VTNET_MAX_MAC_ENTRIES)
3745		bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3746		    ETHER_ADDR_LEN);
3747
3748	return (1);
3749}
3750
3751static void
3752vtnet_rx_filter_mac(struct vtnet_softc *sc)
3753{
3754	struct virtio_net_ctrl_hdr hdr __aligned(2);
3755	struct vtnet_mac_filter *filter;
3756	struct sglist_seg segs[4];
3757	struct sglist sg;
3758	if_t ifp;
3759	bool promisc, allmulti;
3760	u_int ucnt, mcnt;
3761	int error;
3762	uint8_t ack;
3763
3764	ifp = sc->vtnet_ifp;
3765	filter = sc->vtnet_mac_filter;
3766	error = 0;
3767
3768	MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3769	VTNET_CORE_LOCK_ASSERT(sc);
3770
3771	/* Unicast MAC addresses: */
3772	ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3773	promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3774
3775	if (promisc) {
3776		ucnt = 0;
3777		if_printf(ifp, "more than %d MAC addresses assigned, "
3778		    "falling back to promiscuous mode\n",
3779		    VTNET_MAX_MAC_ENTRIES);
3780	}
3781
3782	/* Multicast MAC addresses: */
3783	mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3784	allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3785
3786	if (allmulti) {
3787		mcnt = 0;
3788		if_printf(ifp, "more than %d multicast MAC addresses "
3789		    "assigned, falling back to all-multicast mode\n",
3790		    VTNET_MAX_MAC_ENTRIES);
3791	}
3792
3793	if (promisc && allmulti)
3794		goto out;
3795
3796	filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
3797	filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
3798
3799	hdr.class = VIRTIO_NET_CTRL_MAC;
3800	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3801	ack = VIRTIO_NET_ERR;
3802
3803	sglist_init(&sg, nitems(segs), segs);
3804	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3805	error |= sglist_append(&sg, &filter->vmf_unicast,
3806	    sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
3807	error |= sglist_append(&sg, &filter->vmf_multicast,
3808	    sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
3809	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3810	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3811
3812	if (error == 0)
3813		vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3814	if (ack != VIRTIO_NET_OK)
3815		if_printf(ifp, "error setting host MAC filter table\n");
3816
3817out:
3818	if (promisc != 0 && vtnet_set_promisc(sc, true) != 0)
3819		if_printf(ifp, "cannot enable promiscuous mode\n");
3820	if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0)
3821		if_printf(ifp, "cannot enable all-multicast mode\n");
3822}
3823
3824static int
3825vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3826{
3827	struct sglist_seg segs[3];
3828	struct sglist sg;
3829	struct {
3830		struct virtio_net_ctrl_hdr hdr __aligned(2);
3831		uint8_t pad1;
3832		uint16_t tag __aligned(2);
3833		uint8_t pad2;
3834		uint8_t ack;
3835	} s;
3836	int error;
3837
3838	error = 0;
3839	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3840
3841	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3842	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3843	s.tag = vtnet_gtoh16(sc, tag);
3844	s.ack = VIRTIO_NET_ERR;
3845
3846	sglist_init(&sg, nitems(segs), segs);
3847	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3848	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3849	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3850	MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3851
3852	if (error == 0)
3853		vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3854
3855	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3856}
3857
3858static void
3859vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3860{
3861	int i, bit;
3862	uint32_t w;
3863	uint16_t tag;
3864
3865	MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3866	VTNET_CORE_LOCK_ASSERT(sc);
3867
3868	/* Enable the filter for each configured VLAN. */
3869	for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3870		w = sc->vtnet_vlan_filter[i];
3871
3872		while ((bit = ffs(w) - 1) != -1) {
3873			w &= ~(1 << bit);
3874			tag = sizeof(w) * CHAR_BIT * i + bit;
3875
3876			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3877				device_printf(sc->vtnet_dev,
3878				    "cannot enable VLAN %d filter\n", tag);
3879			}
3880		}
3881	}
3882}
3883
3884static void
3885vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3886{
3887	if_t ifp;
3888	int idx, bit;
3889
3890	ifp = sc->vtnet_ifp;
3891	idx = (tag >> 5) & 0x7F;
3892	bit = tag & 0x1F;
3893
3894	if (tag == 0 || tag > 4095)
3895		return;
3896
3897	VTNET_CORE_LOCK(sc);
3898
3899	if (add)
3900		sc->vtnet_vlan_filter[idx] |= (1 << bit);
3901	else
3902		sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3903
3904	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
3905	    if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
3906	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3907		device_printf(sc->vtnet_dev,
3908		    "cannot %s VLAN %d %s the host filter table\n",
3909		    add ? "add" : "remove", tag, add ? "to" : "from");
3910	}
3911
3912	VTNET_CORE_UNLOCK(sc);
3913}
3914
3915static void
3916vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
3917{
3918
3919	if (if_getsoftc(ifp) != arg)
3920		return;
3921
3922	vtnet_update_vlan_filter(arg, 1, tag);
3923}
3924
3925static void
3926vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
3927{
3928
3929	if (if_getsoftc(ifp) != arg)
3930		return;
3931
3932	vtnet_update_vlan_filter(arg, 0, tag);
3933}
3934
3935static void
3936vtnet_update_speed_duplex(struct vtnet_softc *sc)
3937{
3938	if_t ifp;
3939	uint32_t speed;
3940
3941	ifp = sc->vtnet_ifp;
3942
3943	if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
3944		return;
3945
3946	/* BMV: Ignore duplex. */
3947	speed = virtio_read_dev_config_4(sc->vtnet_dev,
3948	    offsetof(struct virtio_net_config, speed));
3949	if (speed != UINT32_MAX)
3950		if_setbaudrate(ifp, IF_Mbps(speed));
3951}
3952
3953static int
3954vtnet_is_link_up(struct vtnet_softc *sc)
3955{
3956	uint16_t status;
3957
3958	if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
3959		return (1);
3960
3961	status = virtio_read_dev_config_2(sc->vtnet_dev,
3962	    offsetof(struct virtio_net_config, status));
3963
3964	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3965}
3966
3967static void
3968vtnet_update_link_status(struct vtnet_softc *sc)
3969{
3970	if_t ifp;
3971	int link;
3972
3973	ifp = sc->vtnet_ifp;
3974	VTNET_CORE_LOCK_ASSERT(sc);
3975	link = vtnet_is_link_up(sc);
3976
3977	/* Notify if the link status has changed. */
3978	if (link != 0 && sc->vtnet_link_active == 0) {
3979		vtnet_update_speed_duplex(sc);
3980		sc->vtnet_link_active = 1;
3981		if_link_state_change(ifp, LINK_STATE_UP);
3982	} else if (link == 0 && sc->vtnet_link_active != 0) {
3983		sc->vtnet_link_active = 0;
3984		if_link_state_change(ifp, LINK_STATE_DOWN);
3985	}
3986}
3987
3988static int
3989vtnet_ifmedia_upd(if_t ifp __unused)
3990{
3991	return (EOPNOTSUPP);
3992}
3993
3994static void
3995vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
3996{
3997	struct vtnet_softc *sc;
3998
3999	sc = if_getsoftc(ifp);
4000
4001	ifmr->ifm_status = IFM_AVALID;
4002	ifmr->ifm_active = IFM_ETHER;
4003
4004	VTNET_CORE_LOCK(sc);
4005	if (vtnet_is_link_up(sc) != 0) {
4006		ifmr->ifm_status |= IFM_ACTIVE;
4007		ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
4008	} else
4009		ifmr->ifm_active |= IFM_NONE;
4010	VTNET_CORE_UNLOCK(sc);
4011}
4012
4013static void
4014vtnet_get_macaddr(struct vtnet_softc *sc)
4015{
4016
4017	if (sc->vtnet_flags & VTNET_FLAG_MAC) {
4018		virtio_read_device_config_array(sc->vtnet_dev,
4019		    offsetof(struct virtio_net_config, mac),
4020		    &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
4021	} else {
4022		/* Generate a random locally administered unicast address. */
4023		sc->vtnet_hwaddr[0] = 0xB2;
4024		arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
4025	}
4026}
4027
4028static void
4029vtnet_set_macaddr(struct vtnet_softc *sc)
4030{
4031	device_t dev;
4032	int error;
4033
4034	dev = sc->vtnet_dev;
4035
4036	if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
4037		error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
4038		if (error)
4039			device_printf(dev, "unable to set MAC address\n");
4040		return;
4041	}
4042
4043	/* MAC in config is read-only in modern VirtIO. */
4044	if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
4045		for (int i = 0; i < ETHER_ADDR_LEN; i++) {
4046			virtio_write_dev_config_1(dev,
4047			    offsetof(struct virtio_net_config, mac) + i,
4048			    sc->vtnet_hwaddr[i]);
4049		}
4050	}
4051}
4052
4053static void
4054vtnet_attached_set_macaddr(struct vtnet_softc *sc)
4055{
4056
4057	/* Assign MAC address if it was generated. */
4058	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
4059		vtnet_set_macaddr(sc);
4060}
4061
4062static void
4063vtnet_vlan_tag_remove(struct mbuf *m)
4064{
4065	struct ether_vlan_header *evh;
4066
4067	evh = mtod(m, struct ether_vlan_header *);
4068	m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
4069	m->m_flags |= M_VLANTAG;
4070
4071	/* Strip the 802.1Q header. */
4072	bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
4073	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
4074	m_adj(m, ETHER_VLAN_ENCAP_LEN);
4075}
4076
4077static void
4078vtnet_set_rx_process_limit(struct vtnet_softc *sc)
4079{
4080	int limit;
4081
4082	limit = vtnet_tunable_int(sc, "rx_process_limit",
4083	    vtnet_rx_process_limit);
4084	if (limit < 0)
4085		limit = INT_MAX;
4086	sc->vtnet_rx_process_limit = limit;
4087}
4088
4089static void
4090vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
4091    struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
4092{
4093	struct sysctl_oid *node;
4094	struct sysctl_oid_list *list;
4095	struct vtnet_rxq_stats *stats;
4096	char namebuf[16];
4097
4098	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
4099	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4100	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
4101	list = SYSCTL_CHILDREN(node);
4102
4103	stats = &rxq->vtnrx_stats;
4104
4105	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
4106	    &stats->vrxs_ipackets, "Receive packets");
4107	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
4108	    &stats->vrxs_ibytes, "Receive bytes");
4109	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
4110	    &stats->vrxs_iqdrops, "Receive drops");
4111	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
4112	    &stats->vrxs_ierrors, "Receive errors");
4113	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4114	    &stats->vrxs_csum, "Receive checksum offloaded");
4115	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
4116	    &stats->vrxs_csum_failed, "Receive checksum offload failed");
4117	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD,
4118	    &stats->vrxs_host_lro, "Receive host segmentation offloaded");
4119	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4120	    &stats->vrxs_rescheduled,
4121	    "Receive interrupt handler rescheduled");
4122}
4123
4124static void
4125vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
4126    struct sysctl_oid_list *child, struct vtnet_txq *txq)
4127{
4128	struct sysctl_oid *node;
4129	struct sysctl_oid_list *list;
4130	struct vtnet_txq_stats *stats;
4131	char namebuf[16];
4132
4133	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
4134	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4135	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
4136	list = SYSCTL_CHILDREN(node);
4137
4138	stats = &txq->vtntx_stats;
4139
4140	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
4141	    &stats->vtxs_opackets, "Transmit packets");
4142	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
4143	    &stats->vtxs_obytes, "Transmit bytes");
4144	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
4145	    &stats->vtxs_omcasts, "Transmit multicasts");
4146	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
4147	    &stats->vtxs_csum, "Transmit checksum offloaded");
4148	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
4149	    &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
4150	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
4151	    &stats->vtxs_rescheduled,
4152	    "Transmit interrupt handler rescheduled");
4153}
4154
4155static void
4156vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
4157{
4158	device_t dev;
4159	struct sysctl_ctx_list *ctx;
4160	struct sysctl_oid *tree;
4161	struct sysctl_oid_list *child;
4162	int i;
4163
4164	dev = sc->vtnet_dev;
4165	ctx = device_get_sysctl_ctx(dev);
4166	tree = device_get_sysctl_tree(dev);
4167	child = SYSCTL_CHILDREN(tree);
4168
4169	for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
4170		vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
4171		vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
4172	}
4173}
4174
4175static void
4176vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
4177    struct sysctl_oid_list *child, struct vtnet_softc *sc)
4178{
4179	struct vtnet_statistics *stats;
4180	struct vtnet_rxq_stats rxaccum;
4181	struct vtnet_txq_stats txaccum;
4182
4183	vtnet_accum_stats(sc, &rxaccum, &txaccum);
4184
4185	stats = &sc->vtnet_stats;
4186	stats->rx_csum_offloaded = rxaccum.vrxs_csum;
4187	stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
4188	stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
4189	stats->tx_csum_offloaded = txaccum.vtxs_csum;
4190	stats->tx_tso_offloaded = txaccum.vtxs_tso;
4191	stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
4192
4193	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
4194	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
4195	    "Mbuf cluster allocation failures");
4196
4197	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
4198	    CTLFLAG_RD, &stats->rx_frame_too_large,
4199	    "Received frame larger than the mbuf chain");
4200	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
4201	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
4202	    "Enqueuing the replacement receive mbuf failed");
4203	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
4204	    CTLFLAG_RD, &stats->rx_mergeable_failed,
4205	    "Mergeable buffers receive failures");
4206	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
4207	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
4208	    "Received checksum offloaded buffer with unsupported "
4209	    "Ethernet type");
4210	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
4211	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
4212	    "Received checksum offloaded buffer with incorrect IP protocol");
4213	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
4214	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
4215	    "Received checksum offloaded buffer with incorrect offset");
4216	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
4217	    CTLFLAG_RD, &stats->rx_csum_bad_proto,
4218	    "Received checksum offloaded buffer with incorrect protocol");
4219	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
4220	    CTLFLAG_RD, &stats->rx_csum_failed,
4221	    "Received buffer checksum offload failed");
4222	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
4223	    CTLFLAG_RD, &stats->rx_csum_offloaded,
4224	    "Received buffer checksum offload succeeded");
4225	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
4226	    CTLFLAG_RD, &stats->rx_task_rescheduled,
4227	    "Times the receive interrupt task rescheduled itself");
4228
4229	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
4230	    CTLFLAG_RD, &stats->tx_csum_unknown_ethtype,
4231	    "Aborted transmit of checksum offloaded buffer with unknown "
4232	    "Ethernet type");
4233	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
4234	    CTLFLAG_RD, &stats->tx_csum_proto_mismatch,
4235	    "Aborted transmit of checksum offloaded buffer because mismatched "
4236	    "protocols");
4237	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
4238	    CTLFLAG_RD, &stats->tx_tso_not_tcp,
4239	    "Aborted transmit of TSO buffer with non TCP protocol");
4240	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
4241	    CTLFLAG_RD, &stats->tx_tso_without_csum,
4242	    "Aborted transmit of TSO buffer without TCP checksum offload");
4243	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
4244	    CTLFLAG_RD, &stats->tx_defragged,
4245	    "Transmit mbufs defragged");
4246	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
4247	    CTLFLAG_RD, &stats->tx_defrag_failed,
4248	    "Aborted transmit of buffer because defrag failed");
4249	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
4250	    CTLFLAG_RD, &stats->tx_csum_offloaded,
4251	    "Offloaded checksum of transmitted buffer");
4252	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
4253	    CTLFLAG_RD, &stats->tx_tso_offloaded,
4254	    "Segmentation offload of transmitted buffer");
4255	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
4256	    CTLFLAG_RD, &stats->tx_task_rescheduled,
4257	    "Times the transmit interrupt task rescheduled itself");
4258}
4259
4260static void
4261vtnet_setup_sysctl(struct vtnet_softc *sc)
4262{
4263	device_t dev;
4264	struct sysctl_ctx_list *ctx;
4265	struct sysctl_oid *tree;
4266	struct sysctl_oid_list *child;
4267
4268	dev = sc->vtnet_dev;
4269	ctx = device_get_sysctl_ctx(dev);
4270	tree = device_get_sysctl_tree(dev);
4271	child = SYSCTL_CHILDREN(tree);
4272
4273	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
4274	    CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
4275	    "Number of maximum supported virtqueue pairs");
4276	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
4277	    CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
4278	    "Number of requested virtqueue pairs");
4279	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
4280	    CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
4281	    "Number of active virtqueue pairs");
4282
4283	vtnet_setup_stat_sysctl(ctx, child, sc);
4284}
4285
4286static void
4287vtnet_load_tunables(struct vtnet_softc *sc)
4288{
4289
4290	sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
4291	    "lro_entry_count", vtnet_lro_entry_count);
4292	if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
4293		sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
4294
4295	sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
4296	    "lro_mbufq_depth", vtnet_lro_mbufq_depth);
4297}
4298
4299static int
4300vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
4301{
4302
4303	return (virtqueue_enable_intr(rxq->vtnrx_vq));
4304}
4305
4306static void
4307vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
4308{
4309
4310	virtqueue_disable_intr(rxq->vtnrx_vq);
4311}
4312
4313static int
4314vtnet_txq_enable_intr(struct vtnet_txq *txq)
4315{
4316	struct virtqueue *vq;
4317
4318	vq = txq->vtntx_vq;
4319
4320	if (vtnet_txq_below_threshold(txq) != 0)
4321		return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
4322
4323	/*
4324	 * The free count is above our threshold. Keep the Tx interrupt
4325	 * disabled until the queue is fuller.
4326	 */
4327	return (0);
4328}
4329
4330static void
4331vtnet_txq_disable_intr(struct vtnet_txq *txq)
4332{
4333
4334	virtqueue_disable_intr(txq->vtntx_vq);
4335}
4336
4337static void
4338vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4339{
4340	struct vtnet_rxq *rxq;
4341	int i;
4342
4343	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
4344		rxq = &sc->vtnet_rxqs[i];
4345		if (vtnet_rxq_enable_intr(rxq) != 0)
4346			taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
4347	}
4348}
4349
4350static void
4351vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4352{
4353	int i;
4354
4355	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4356		vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4357}
4358
4359static void
4360vtnet_enable_interrupts(struct vtnet_softc *sc)
4361{
4362
4363	vtnet_enable_rx_interrupts(sc);
4364	vtnet_enable_tx_interrupts(sc);
4365}
4366
4367static void
4368vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4369{
4370	int i;
4371
4372	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4373		vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4374}
4375
4376static void
4377vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4378{
4379	int i;
4380
4381	for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4382		vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4383}
4384
4385static void
4386vtnet_disable_interrupts(struct vtnet_softc *sc)
4387{
4388
4389	vtnet_disable_rx_interrupts(sc);
4390	vtnet_disable_tx_interrupts(sc);
4391}
4392
4393static int
4394vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4395{
4396	char path[64];
4397
4398	snprintf(path, sizeof(path),
4399	    "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4400	TUNABLE_INT_FETCH(path, &def);
4401
4402	return (def);
4403}
4404
4405#ifdef DEBUGNET
4406static void
4407vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
4408{
4409	struct vtnet_softc *sc;
4410
4411	sc = if_getsoftc(ifp);
4412
4413	VTNET_CORE_LOCK(sc);
4414	*nrxr = sc->vtnet_req_vq_pairs;
4415	*ncl = DEBUGNET_MAX_IN_FLIGHT;
4416	*clsize = sc->vtnet_rx_clustersz;
4417	VTNET_CORE_UNLOCK(sc);
4418}
4419
4420static void
4421vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
4422{
4423	struct vtnet_softc *sc;
4424	static bool sw_lro_enabled = false;
4425
4426	/*
4427	 * Disable software LRO, since it would require entering the network
4428	 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
4429	 */
4430	sc = if_getsoftc(ifp);
4431	switch (event) {
4432	case DEBUGNET_START:
4433		sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
4434		if (sw_lro_enabled)
4435			sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
4436		break;
4437	case DEBUGNET_END:
4438		if (sw_lro_enabled)
4439			sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
4440		break;
4441	}
4442}
4443
4444static int
4445vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
4446{
4447	struct vtnet_softc *sc;
4448	struct vtnet_txq *txq;
4449	int error;
4450
4451	sc = if_getsoftc(ifp);
4452	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4453	    IFF_DRV_RUNNING)
4454		return (EBUSY);
4455
4456	txq = &sc->vtnet_txqs[0];
4457	error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4458	if (error == 0)
4459		(void)vtnet_txq_notify(txq);
4460	return (error);
4461}
4462
4463static int
4464vtnet_debugnet_poll(if_t ifp, int count)
4465{
4466	struct vtnet_softc *sc;
4467	int i;
4468
4469	sc = if_getsoftc(ifp);
4470	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4471	    IFF_DRV_RUNNING)
4472		return (EBUSY);
4473
4474	(void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4475	for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4476		(void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4477	return (0);
4478}
4479#endif /* DEBUGNET */
4480