if_hn.c revision 308511
1/*- 2 * Copyright (c) 2010-2012 Citrix Inc. 3 * Copyright (c) 2009-2012,2016 Microsoft Corp. 4 * Copyright (c) 2012 NetApp Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/*- 30 * Copyright (c) 2004-2006 Kip Macy 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 */ 54 55#include <sys/cdefs.h> 56__FBSDID("$FreeBSD: stable/10/sys/dev/hyperv/netvsc/if_hn.c 308511 2016-11-11 07:25:14Z sephe $"); 57 58#include "opt_inet6.h" 59#include "opt_inet.h" 60 61#include <sys/param.h> 62#include <sys/bus.h> 63#include <sys/kernel.h> 64#include <sys/limits.h> 65#include <sys/malloc.h> 66#include <sys/mbuf.h> 67#include <sys/module.h> 68#include <sys/proc.h> 69#include <sys/queue.h> 70#include <sys/lock.h> 71#include <sys/smp.h> 72#include <sys/socket.h> 73#include <sys/sockio.h> 74#include <sys/sx.h> 75#include <sys/sysctl.h> 76#include <sys/systm.h> 77#include <sys/taskqueue.h> 78#include <sys/buf_ring.h> 79 80#include <machine/atomic.h> 81#include <machine/in_cksum.h> 82 83#include <net/bpf.h> 84#include <net/ethernet.h> 85#include <net/if.h> 86#include <net/if_arp.h> 87#include <net/if_media.h> 88#include <net/if_types.h> 89#include <net/if_var.h> 90#include <net/if_vlan_var.h> 91#include <net/rndis.h> 92 93#include <netinet/in_systm.h> 94#include <netinet/in.h> 95#include <netinet/ip.h> 96#include <netinet/ip6.h> 97#include <netinet/tcp.h> 98#include <netinet/tcp_lro.h> 99#include <netinet/udp.h> 100 101#include <dev/hyperv/include/hyperv.h> 102#include <dev/hyperv/include/hyperv_busdma.h> 103#include <dev/hyperv/include/vmbus.h> 104#include <dev/hyperv/include/vmbus_xact.h> 105 106#include <dev/hyperv/netvsc/ndis.h> 107#include <dev/hyperv/netvsc/if_hnreg.h> 108#include <dev/hyperv/netvsc/if_hnvar.h> 109#include <dev/hyperv/netvsc/hn_nvs.h> 110#include <dev/hyperv/netvsc/hn_rndis.h> 111 112#include "vmbus_if.h" 113 114#define HN_RING_CNT_DEF_MAX 8 115 116/* YYY should get it from the underlying channel */ 117#define HN_TX_DESC_CNT 512 118 119#define HN_RNDIS_PKT_LEN \ 120 (sizeof(struct rndis_packet_msg) + \ 121 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ 122 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ 123 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ 124 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) 125#define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE 126#define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE 127 128#define HN_TX_DATA_BOUNDARY PAGE_SIZE 129#define HN_TX_DATA_MAXSIZE IP_MAXPACKET 130#define HN_TX_DATA_SEGSIZE PAGE_SIZE 131/* -1 for RNDIS packet message */ 132#define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) 133 134#define HN_DIRECT_TX_SIZE_DEF 128 135 136#define HN_EARLY_TXEOF_THRESH 8 137 138#define HN_PKTBUF_LEN_DEF (16 * 1024) 139 140#define HN_LROENT_CNT_DEF 128 141 142#define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) 143#define HN_LRO_LENLIM_DEF (25 * ETHERMTU) 144/* YYY 2*MTU is a bit rough, but should be good enough. */ 145#define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) 146 147#define HN_LRO_ACKCNT_DEF 1 148 149#define HN_LOCK_INIT(sc) \ 150 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) 151#define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) 152#define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) 153#define HN_LOCK(sc) sx_xlock(&(sc)->hn_lock) 154#define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) 155 156#define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) 157#define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) 158#define HN_CSUM_IP_HWASSIST(sc) \ 159 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) 160#define HN_CSUM_IP6_HWASSIST(sc) \ 161 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) 162 163struct hn_txdesc { 164#ifndef HN_USE_TXDESC_BUFRING 165 SLIST_ENTRY(hn_txdesc) link; 166#endif 167 struct mbuf *m; 168 struct hn_tx_ring *txr; 169 int refs; 170 uint32_t flags; /* HN_TXD_FLAG_ */ 171 struct hn_nvs_sendctx send_ctx; 172 uint32_t chim_index; 173 int chim_size; 174 175 bus_dmamap_t data_dmap; 176 177 bus_addr_t rndis_pkt_paddr; 178 struct rndis_packet_msg *rndis_pkt; 179 bus_dmamap_t rndis_pkt_dmap; 180}; 181 182#define HN_TXD_FLAG_ONLIST 0x0001 183#define HN_TXD_FLAG_DMAMAP 0x0002 184 185struct hn_rxinfo { 186 uint32_t vlan_info; 187 uint32_t csum_info; 188 uint32_t hash_info; 189 uint32_t hash_value; 190}; 191 192#define HN_RXINFO_VLAN 0x0001 193#define HN_RXINFO_CSUM 0x0002 194#define HN_RXINFO_HASHINF 0x0004 195#define HN_RXINFO_HASHVAL 0x0008 196#define HN_RXINFO_ALL \ 197 (HN_RXINFO_VLAN | \ 198 HN_RXINFO_CSUM | \ 199 HN_RXINFO_HASHINF | \ 200 HN_RXINFO_HASHVAL) 201 202#define HN_NDIS_VLAN_INFO_INVALID 0xffffffff 203#define HN_NDIS_RXCSUM_INFO_INVALID 0 204#define HN_NDIS_HASH_INFO_INVALID 0 205 206static int hn_probe(device_t); 207static int hn_attach(device_t); 208static int hn_detach(device_t); 209static int hn_shutdown(device_t); 210static void hn_chan_callback(struct vmbus_channel *, 211 void *); 212 213static void hn_init(void *); 214static int hn_ioctl(struct ifnet *, u_long, caddr_t); 215static void hn_start(struct ifnet *); 216static int hn_transmit(struct ifnet *, struct mbuf *); 217static void hn_xmit_qflush(struct ifnet *); 218static int hn_ifmedia_upd(struct ifnet *); 219static void hn_ifmedia_sts(struct ifnet *, 220 struct ifmediareq *); 221 222static int hn_rndis_rxinfo(const void *, int, 223 struct hn_rxinfo *); 224static void hn_rndis_rx_data(struct hn_rx_ring *, 225 const void *, int); 226static void hn_rndis_rx_status(struct hn_softc *, 227 const void *, int); 228 229static void hn_nvs_handle_notify(struct hn_softc *, 230 const struct vmbus_chanpkt_hdr *); 231static void hn_nvs_handle_comp(struct hn_softc *, 232 struct vmbus_channel *, 233 const struct vmbus_chanpkt_hdr *); 234static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, 235 struct vmbus_channel *, 236 const struct vmbus_chanpkt_hdr *); 237static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, 238 struct vmbus_channel *, uint64_t); 239 240#if __FreeBSD_version >= 1100099 241static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); 242static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); 243#endif 244static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); 245static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); 246#if __FreeBSD_version < 1100095 247static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); 248#else 249static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); 250#endif 251static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 252static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); 253static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); 254static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); 255static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); 256static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); 257static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); 258static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); 259static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); 260static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); 261 262static void hn_stop(struct hn_softc *); 263static void hn_init_locked(struct hn_softc *); 264static int hn_chan_attach(struct hn_softc *, 265 struct vmbus_channel *); 266static void hn_chan_detach(struct hn_softc *, 267 struct vmbus_channel *); 268static int hn_attach_subchans(struct hn_softc *); 269static void hn_detach_allchans(struct hn_softc *); 270static void hn_chan_rollup(struct hn_rx_ring *, 271 struct hn_tx_ring *); 272static void hn_set_ring_inuse(struct hn_softc *, int); 273static int hn_synth_attach(struct hn_softc *, int); 274static void hn_synth_detach(struct hn_softc *); 275static int hn_synth_alloc_subchans(struct hn_softc *, 276 int *); 277static void hn_suspend(struct hn_softc *); 278static void hn_suspend_data(struct hn_softc *); 279static void hn_suspend_mgmt(struct hn_softc *); 280static void hn_resume(struct hn_softc *); 281static void hn_resume_data(struct hn_softc *); 282static void hn_resume_mgmt(struct hn_softc *); 283static void hn_suspend_mgmt_taskfunc(void *, int); 284static void hn_chan_drain(struct vmbus_channel *); 285 286static void hn_update_link_status(struct hn_softc *); 287static void hn_change_network(struct hn_softc *); 288static void hn_link_taskfunc(void *, int); 289static void hn_netchg_init_taskfunc(void *, int); 290static void hn_netchg_status_taskfunc(void *, int); 291static void hn_link_status(struct hn_softc *); 292 293static int hn_create_rx_data(struct hn_softc *, int); 294static void hn_destroy_rx_data(struct hn_softc *); 295static int hn_check_iplen(const struct mbuf *, int); 296static int hn_set_rxfilter(struct hn_softc *); 297static int hn_rss_reconfig(struct hn_softc *); 298static void hn_rss_ind_fixup(struct hn_softc *, int); 299static int hn_rxpkt(struct hn_rx_ring *, const void *, 300 int, const struct hn_rxinfo *); 301 302static int hn_tx_ring_create(struct hn_softc *, int); 303static void hn_tx_ring_destroy(struct hn_tx_ring *); 304static int hn_create_tx_data(struct hn_softc *, int); 305static void hn_fixup_tx_data(struct hn_softc *); 306static void hn_destroy_tx_data(struct hn_softc *); 307static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); 308static int hn_encap(struct hn_tx_ring *, 309 struct hn_txdesc *, struct mbuf **); 310static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, 311 struct hn_txdesc *); 312static void hn_set_chim_size(struct hn_softc *, int); 313static void hn_set_tso_maxsize(struct hn_softc *, int, int); 314static bool hn_tx_ring_pending(struct hn_tx_ring *); 315static void hn_tx_ring_qflush(struct hn_tx_ring *); 316static void hn_resume_tx(struct hn_softc *, int); 317static int hn_get_txswq_depth(const struct hn_tx_ring *); 318static void hn_txpkt_done(struct hn_nvs_sendctx *, 319 struct hn_softc *, struct vmbus_channel *, 320 const void *, int); 321static int hn_txpkt_sglist(struct hn_tx_ring *, 322 struct hn_txdesc *); 323static int hn_txpkt_chim(struct hn_tx_ring *, 324 struct hn_txdesc *); 325static int hn_xmit(struct hn_tx_ring *, int); 326static void hn_xmit_taskfunc(void *, int); 327static void hn_xmit_txeof(struct hn_tx_ring *); 328static void hn_xmit_txeof_taskfunc(void *, int); 329static int hn_start_locked(struct hn_tx_ring *, int); 330static void hn_start_taskfunc(void *, int); 331static void hn_start_txeof(struct hn_tx_ring *); 332static void hn_start_txeof_taskfunc(void *, int); 333 334SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 335 "Hyper-V network interface"); 336 337/* Trust tcp segements verification on host side. */ 338static int hn_trust_hosttcp = 1; 339SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, 340 &hn_trust_hosttcp, 0, 341 "Trust tcp segement verification on host side, " 342 "when csum info is missing (global setting)"); 343 344/* Trust udp datagrams verification on host side. */ 345static int hn_trust_hostudp = 1; 346SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, 347 &hn_trust_hostudp, 0, 348 "Trust udp datagram verification on host side, " 349 "when csum info is missing (global setting)"); 350 351/* Trust ip packets verification on host side. */ 352static int hn_trust_hostip = 1; 353SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, 354 &hn_trust_hostip, 0, 355 "Trust ip packet verification on host side, " 356 "when csum info is missing (global setting)"); 357 358/* Limit TSO burst size */ 359static int hn_tso_maxlen = IP_MAXPACKET; 360SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 361 &hn_tso_maxlen, 0, "TSO burst limit"); 362 363/* Limit chimney send size */ 364static int hn_tx_chimney_size = 0; 365SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, 366 &hn_tx_chimney_size, 0, "Chimney send packet size limit"); 367 368/* Limit the size of packet for direct transmission */ 369static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; 370SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, 371 &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); 372 373/* # of LRO entries per RX ring */ 374#if defined(INET) || defined(INET6) 375#if __FreeBSD_version >= 1100095 376static int hn_lro_entry_count = HN_LROENT_CNT_DEF; 377SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 378 &hn_lro_entry_count, 0, "LRO entry count"); 379#endif 380#endif 381 382/* Use shared TX taskqueue */ 383static int hn_share_tx_taskq = 0; 384SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, 385 &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); 386 387#ifndef HN_USE_TXDESC_BUFRING 388static int hn_use_txdesc_bufring = 0; 389#else 390static int hn_use_txdesc_bufring = 1; 391#endif 392SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, 393 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); 394 395/* Bind TX taskqueue to the target CPU */ 396static int hn_bind_tx_taskq = -1; 397SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, 398 &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); 399 400/* Use ifnet.if_start instead of ifnet.if_transmit */ 401static int hn_use_if_start = 0; 402SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, 403 &hn_use_if_start, 0, "Use if_start TX method"); 404 405/* # of channels to use */ 406static int hn_chan_cnt = 0; 407SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, 408 &hn_chan_cnt, 0, 409 "# of channels to use; each channel has one RX ring and one TX ring"); 410 411/* # of transmit rings to use */ 412static int hn_tx_ring_cnt = 0; 413SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, 414 &hn_tx_ring_cnt, 0, "# of TX rings to use"); 415 416/* Software TX ring deptch */ 417static int hn_tx_swq_depth = 0; 418SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, 419 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); 420 421/* Enable sorted LRO, and the depth of the per-channel mbuf queue */ 422#if __FreeBSD_version >= 1100095 423static u_int hn_lro_mbufq_depth = 0; 424SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 425 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); 426#endif 427 428static u_int hn_cpu_index; /* next CPU for channel */ 429static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */ 430 431static const uint8_t 432hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 433 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 434 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 435 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 436 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 437 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa 438}; 439 440static device_method_t hn_methods[] = { 441 /* Device interface */ 442 DEVMETHOD(device_probe, hn_probe), 443 DEVMETHOD(device_attach, hn_attach), 444 DEVMETHOD(device_detach, hn_detach), 445 DEVMETHOD(device_shutdown, hn_shutdown), 446 DEVMETHOD_END 447}; 448 449static driver_t hn_driver = { 450 "hn", 451 hn_methods, 452 sizeof(struct hn_softc) 453}; 454 455static devclass_t hn_devclass; 456 457DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); 458MODULE_VERSION(hn, 1); 459MODULE_DEPEND(hn, vmbus, 1, 1, 1); 460 461#if __FreeBSD_version >= 1100099 462static void 463hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) 464{ 465 int i; 466 467 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) 468 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; 469} 470#endif 471 472static int 473hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) 474{ 475 476 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 477 txd->chim_size == 0, ("invalid rndis sglist txd")); 478 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, 479 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); 480} 481 482static int 483hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) 484{ 485 struct hn_nvs_rndis rndis; 486 487 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && 488 txd->chim_size > 0, ("invalid rndis chim txd")); 489 490 rndis.nvs_type = HN_NVS_TYPE_RNDIS; 491 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; 492 rndis.nvs_chim_idx = txd->chim_index; 493 rndis.nvs_chim_sz = txd->chim_size; 494 495 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, 496 &rndis, sizeof(rndis), &txd->send_ctx)); 497} 498 499static __inline uint32_t 500hn_chim_alloc(struct hn_softc *sc) 501{ 502 int i, bmap_cnt = sc->hn_chim_bmap_cnt; 503 u_long *bmap = sc->hn_chim_bmap; 504 uint32_t ret = HN_NVS_CHIM_IDX_INVALID; 505 506 for (i = 0; i < bmap_cnt; ++i) { 507 int idx; 508 509 idx = ffsl(~bmap[i]); 510 if (idx == 0) 511 continue; 512 513 --idx; /* ffsl is 1-based */ 514 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, 515 ("invalid i %d and idx %d", i, idx)); 516 517 if (atomic_testandset_long(&bmap[i], idx)) 518 continue; 519 520 ret = i * LONG_BIT + idx; 521 break; 522 } 523 return (ret); 524} 525 526static __inline void 527hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) 528{ 529 u_long mask; 530 uint32_t idx; 531 532 idx = chim_idx / LONG_BIT; 533 KASSERT(idx < sc->hn_chim_bmap_cnt, 534 ("invalid chimney index 0x%x", chim_idx)); 535 536 mask = 1UL << (chim_idx % LONG_BIT); 537 KASSERT(sc->hn_chim_bmap[idx] & mask, 538 ("index bitmap 0x%lx, chimney index %u, " 539 "bitmap idx %d, bitmask 0x%lx", 540 sc->hn_chim_bmap[idx], chim_idx, idx, mask)); 541 542 atomic_clear_long(&sc->hn_chim_bmap[idx], mask); 543} 544 545static int 546hn_set_rxfilter(struct hn_softc *sc) 547{ 548 struct ifnet *ifp = sc->hn_ifp; 549 uint32_t filter; 550 int error = 0; 551 552 HN_LOCK_ASSERT(sc); 553 554 if (ifp->if_flags & IFF_PROMISC) { 555 filter = NDIS_PACKET_TYPE_PROMISCUOUS; 556 } else { 557 filter = NDIS_PACKET_TYPE_DIRECTED; 558 if (ifp->if_flags & IFF_BROADCAST) 559 filter |= NDIS_PACKET_TYPE_BROADCAST; 560#ifdef notyet 561 /* 562 * See the comment in SIOCADDMULTI/SIOCDELMULTI. 563 */ 564 /* TODO: support multicast list */ 565 if ((ifp->if_flags & IFF_ALLMULTI) || 566 !TAILQ_EMPTY(&ifp->if_multiaddrs)) 567 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 568#else 569 /* Always enable ALLMULTI */ 570 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; 571#endif 572 } 573 574 if (sc->hn_rx_filter != filter) { 575 error = hn_rndis_set_rxfilter(sc, filter); 576 if (!error) 577 sc->hn_rx_filter = filter; 578 } 579 return (error); 580} 581 582static int 583hn_get_txswq_depth(const struct hn_tx_ring *txr) 584{ 585 586 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); 587 if (hn_tx_swq_depth < txr->hn_txdesc_cnt) 588 return txr->hn_txdesc_cnt; 589 return hn_tx_swq_depth; 590} 591 592static int 593hn_rss_reconfig(struct hn_softc *sc) 594{ 595 int error; 596 597 HN_LOCK_ASSERT(sc); 598 599 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 600 return (ENXIO); 601 602 /* 603 * Disable RSS first. 604 * 605 * NOTE: 606 * Direct reconfiguration by setting the UNCHG flags does 607 * _not_ work properly. 608 */ 609 if (bootverbose) 610 if_printf(sc->hn_ifp, "disable RSS\n"); 611 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); 612 if (error) { 613 if_printf(sc->hn_ifp, "RSS disable failed\n"); 614 return (error); 615 } 616 617 /* 618 * Reenable the RSS w/ the updated RSS key or indirect 619 * table. 620 */ 621 if (bootverbose) 622 if_printf(sc->hn_ifp, "reconfig RSS\n"); 623 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 624 if (error) { 625 if_printf(sc->hn_ifp, "RSS reconfig failed\n"); 626 return (error); 627 } 628 return (0); 629} 630 631static void 632hn_rss_ind_fixup(struct hn_softc *sc, int nchan) 633{ 634 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 635 int i; 636 637 KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); 638 639 /* 640 * Check indirect table to make sure that all channels in it 641 * can be used. 642 */ 643 for (i = 0; i < NDIS_HASH_INDCNT; ++i) { 644 if (rss->rss_ind[i] >= nchan) { 645 if_printf(sc->hn_ifp, 646 "RSS indirect table %d fixup: %u -> %d\n", 647 i, rss->rss_ind[i], nchan - 1); 648 rss->rss_ind[i] = nchan - 1; 649 } 650 } 651} 652 653static int 654hn_ifmedia_upd(struct ifnet *ifp __unused) 655{ 656 657 return EOPNOTSUPP; 658} 659 660static void 661hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 662{ 663 struct hn_softc *sc = ifp->if_softc; 664 665 ifmr->ifm_status = IFM_AVALID; 666 ifmr->ifm_active = IFM_ETHER; 667 668 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { 669 ifmr->ifm_active |= IFM_NONE; 670 return; 671 } 672 ifmr->ifm_status |= IFM_ACTIVE; 673 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 674} 675 676/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ 677static const struct hyperv_guid g_net_vsc_device_type = { 678 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 679 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} 680}; 681 682static int 683hn_probe(device_t dev) 684{ 685 686 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, 687 &g_net_vsc_device_type) == 0) { 688 device_set_desc(dev, "Hyper-V Network Interface"); 689 return BUS_PROBE_DEFAULT; 690 } 691 return ENXIO; 692} 693 694static void 695hn_cpuset_setthread_task(void *xmask, int pending __unused) 696{ 697 cpuset_t *mask = xmask; 698 int error; 699 700 error = cpuset_setthread(curthread->td_tid, mask); 701 if (error) { 702 panic("curthread=%ju: can't pin; error=%d", 703 (uintmax_t)curthread->td_tid, error); 704 } 705} 706 707static int 708hn_attach(device_t dev) 709{ 710 struct hn_softc *sc = device_get_softc(dev); 711 struct sysctl_oid_list *child; 712 struct sysctl_ctx_list *ctx; 713 uint8_t eaddr[ETHER_ADDR_LEN]; 714 struct ifnet *ifp = NULL; 715 int error, ring_cnt, tx_ring_cnt; 716 717 sc->hn_dev = dev; 718 sc->hn_prichan = vmbus_get_channel(dev); 719 HN_LOCK_INIT(sc); 720 721 /* 722 * Setup taskqueue for transmission. 723 */ 724 if (hn_tx_taskq == NULL) { 725 sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, 726 taskqueue_thread_enqueue, &sc->hn_tx_taskq); 727 taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", 728 device_get_nameunit(dev)); 729 if (hn_bind_tx_taskq >= 0) { 730 int cpu = hn_bind_tx_taskq; 731 struct task cpuset_task; 732 cpuset_t cpu_set; 733 734 if (cpu > mp_ncpus - 1) 735 cpu = mp_ncpus - 1; 736 CPU_SETOF(cpu, &cpu_set); 737 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, 738 &cpu_set); 739 taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task); 740 taskqueue_drain(sc->hn_tx_taskq, &cpuset_task); 741 } 742 } else { 743 sc->hn_tx_taskq = hn_tx_taskq; 744 } 745 746 /* 747 * Setup taskqueue for mangement tasks, e.g. link status. 748 */ 749 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, 750 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); 751 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", 752 device_get_nameunit(dev)); 753 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); 754 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); 755 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, 756 hn_netchg_status_taskfunc, sc); 757 758 /* 759 * Allocate ifnet and setup its name earlier, so that if_printf 760 * can be used by functions, which will be called after 761 * ether_ifattach(). 762 */ 763 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER); 764 ifp->if_softc = sc; 765 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 766 767 /* 768 * Initialize ifmedia earlier so that it can be unconditionally 769 * destroyed, if error happened later on. 770 */ 771 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); 772 773 /* 774 * Figure out the # of RX rings (ring_cnt) and the # of TX rings 775 * to use (tx_ring_cnt). 776 * 777 * NOTE: 778 * The # of RX rings to use is same as the # of channels to use. 779 */ 780 ring_cnt = hn_chan_cnt; 781 if (ring_cnt <= 0) { 782 /* Default */ 783 ring_cnt = mp_ncpus; 784 if (ring_cnt > HN_RING_CNT_DEF_MAX) 785 ring_cnt = HN_RING_CNT_DEF_MAX; 786 } else if (ring_cnt > mp_ncpus) { 787 ring_cnt = mp_ncpus; 788 } 789 790 tx_ring_cnt = hn_tx_ring_cnt; 791 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) 792 tx_ring_cnt = ring_cnt; 793 if (hn_use_if_start) { 794 /* ifnet.if_start only needs one TX ring. */ 795 tx_ring_cnt = 1; 796 } 797 798 /* 799 * Set the leader CPU for channels. 800 */ 801 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; 802 803 /* 804 * Create enough TX/RX rings, even if only limited number of 805 * channels can be allocated. 806 */ 807 error = hn_create_tx_data(sc, tx_ring_cnt); 808 if (error) 809 goto failed; 810 error = hn_create_rx_data(sc, ring_cnt); 811 if (error) 812 goto failed; 813 814 /* 815 * Create transaction context for NVS and RNDIS transactions. 816 */ 817 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), 818 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); 819 if (sc->hn_xact == NULL) 820 goto failed; 821 822 /* 823 * Attach the synthetic parts, i.e. NVS and RNDIS. 824 */ 825 error = hn_synth_attach(sc, ETHERMTU); 826 if (error) 827 goto failed; 828 829 error = hn_rndis_get_eaddr(sc, eaddr); 830 if (error) 831 goto failed; 832 833#if __FreeBSD_version >= 1100099 834 if (sc->hn_rx_ring_inuse > 1) { 835 /* 836 * Reduce TCP segment aggregation limit for multiple 837 * RX rings to increase ACK timeliness. 838 */ 839 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); 840 } 841#endif 842 843 /* 844 * Fixup TX stuffs after synthetic parts are attached. 845 */ 846 hn_fixup_tx_data(sc); 847 848 ctx = device_get_sysctl_ctx(dev); 849 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 850 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, 851 &sc->hn_nvs_ver, 0, "NVS version"); 852 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", 853 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 854 hn_ndis_version_sysctl, "A", "NDIS version"); 855 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", 856 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 857 hn_caps_sysctl, "A", "capabilities"); 858 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", 859 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 860 hn_hwassist_sysctl, "A", "hwassist"); 861 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", 862 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 863 hn_rxfilter_sysctl, "A", "rxfilter"); 864 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", 865 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 866 hn_rss_hash_sysctl, "A", "RSS hash"); 867 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", 868 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); 869 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", 870 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 871 hn_rss_key_sysctl, "IU", "RSS key"); 872 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", 873 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 874 hn_rss_ind_sysctl, "IU", "RSS indirect table"); 875 876 /* 877 * Setup the ifmedia, which has been initialized earlier. 878 */ 879 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); 880 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); 881 /* XXX ifmedia_set really should do this for us */ 882 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; 883 884 /* 885 * Setup the ifnet for this interface. 886 */ 887 888#ifdef __LP64__ 889 ifp->if_baudrate = IF_Gbps(10); 890#else 891 /* if_baudrate is 32bits on 32bit system. */ 892 ifp->if_baudrate = IF_Gbps(1); 893#endif 894 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 895 ifp->if_ioctl = hn_ioctl; 896 ifp->if_init = hn_init; 897 if (hn_use_if_start) { 898 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); 899 900 ifp->if_start = hn_start; 901 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); 902 ifp->if_snd.ifq_drv_maxlen = qdepth - 1; 903 IFQ_SET_READY(&ifp->if_snd); 904 } else { 905 ifp->if_transmit = hn_transmit; 906 ifp->if_qflush = hn_xmit_qflush; 907 } 908 909 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; 910#ifdef foo 911 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 912 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 913#endif 914 if (sc->hn_caps & HN_CAP_VLAN) { 915 /* XXX not sure about VLAN_MTU. */ 916 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 917 } 918 919 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; 920 if (ifp->if_hwassist & HN_CSUM_IP_MASK) 921 ifp->if_capabilities |= IFCAP_TXCSUM; 922 if (ifp->if_hwassist & HN_CSUM_IP6_MASK) 923 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; 924 if (sc->hn_caps & HN_CAP_TSO4) { 925 ifp->if_capabilities |= IFCAP_TSO4; 926 ifp->if_hwassist |= CSUM_IP_TSO; 927 } 928 if (sc->hn_caps & HN_CAP_TSO6) { 929 ifp->if_capabilities |= IFCAP_TSO6; 930 ifp->if_hwassist |= CSUM_IP6_TSO; 931 } 932 933 /* Enable all available capabilities by default. */ 934 ifp->if_capenable = ifp->if_capabilities; 935 936 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { 937 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); 938 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; 939 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 940 } 941 942 ether_ifattach(ifp, eaddr); 943 944 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { 945 if_printf(ifp, "TSO segcnt %u segsz %u\n", 946 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); 947 } 948 949 /* Inform the upper layer about the long frame support. */ 950 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 951 952 /* 953 * Kick off link status check. 954 */ 955 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 956 hn_update_link_status(sc); 957 958 return (0); 959failed: 960 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) 961 hn_synth_detach(sc); 962 hn_detach(dev); 963 return (error); 964} 965 966static int 967hn_detach(device_t dev) 968{ 969 struct hn_softc *sc = device_get_softc(dev); 970 struct ifnet *ifp = sc->hn_ifp; 971 972 if (device_is_attached(dev)) { 973 HN_LOCK(sc); 974 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { 975 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 976 hn_stop(sc); 977 /* 978 * NOTE: 979 * hn_stop() only suspends data, so managment 980 * stuffs have to be suspended manually here. 981 */ 982 hn_suspend_mgmt(sc); 983 hn_synth_detach(sc); 984 } 985 HN_UNLOCK(sc); 986 ether_ifdetach(ifp); 987 } 988 989 ifmedia_removeall(&sc->hn_media); 990 hn_destroy_rx_data(sc); 991 hn_destroy_tx_data(sc); 992 993 if (sc->hn_tx_taskq != hn_tx_taskq) 994 taskqueue_free(sc->hn_tx_taskq); 995 taskqueue_free(sc->hn_mgmt_taskq0); 996 997 if (sc->hn_xact != NULL) 998 vmbus_xact_ctx_destroy(sc->hn_xact); 999 1000 if_free(ifp); 1001 1002 HN_LOCK_DESTROY(sc); 1003 return (0); 1004} 1005 1006static int 1007hn_shutdown(device_t dev) 1008{ 1009 1010 return (0); 1011} 1012 1013static void 1014hn_link_status(struct hn_softc *sc) 1015{ 1016 uint32_t link_status; 1017 int error; 1018 1019 error = hn_rndis_get_linkstatus(sc, &link_status); 1020 if (error) { 1021 /* XXX what to do? */ 1022 return; 1023 } 1024 1025 if (link_status == NDIS_MEDIA_STATE_CONNECTED) 1026 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP; 1027 else 1028 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1029 if_link_state_change(sc->hn_ifp, 1030 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ? 1031 LINK_STATE_UP : LINK_STATE_DOWN); 1032} 1033 1034static void 1035hn_link_taskfunc(void *xsc, int pending __unused) 1036{ 1037 struct hn_softc *sc = xsc; 1038 1039 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 1040 return; 1041 hn_link_status(sc); 1042} 1043 1044static void 1045hn_netchg_init_taskfunc(void *xsc, int pending __unused) 1046{ 1047 struct hn_softc *sc = xsc; 1048 1049 /* Prevent any link status checks from running. */ 1050 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG; 1051 1052 /* 1053 * Fake up a [link down --> link up] state change; 5 seconds 1054 * delay is used, which closely simulates miibus reaction 1055 * upon link down event. 1056 */ 1057 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP; 1058 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN); 1059 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0, 1060 &sc->hn_netchg_status, 5 * hz); 1061} 1062 1063static void 1064hn_netchg_status_taskfunc(void *xsc, int pending __unused) 1065{ 1066 struct hn_softc *sc = xsc; 1067 1068 /* Re-allow link status checks. */ 1069 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG; 1070 hn_link_status(sc); 1071} 1072 1073static void 1074hn_update_link_status(struct hn_softc *sc) 1075{ 1076 1077 if (sc->hn_mgmt_taskq != NULL) 1078 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task); 1079} 1080 1081static void 1082hn_change_network(struct hn_softc *sc) 1083{ 1084 1085 if (sc->hn_mgmt_taskq != NULL) 1086 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init); 1087} 1088 1089static __inline int 1090hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, 1091 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) 1092{ 1093 struct mbuf *m = *m_head; 1094 int error; 1095 1096 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim")); 1097 1098 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, 1099 m, segs, nsegs, BUS_DMA_NOWAIT); 1100 if (error == EFBIG) { 1101 struct mbuf *m_new; 1102 1103 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); 1104 if (m_new == NULL) 1105 return ENOBUFS; 1106 else 1107 *m_head = m = m_new; 1108 txr->hn_tx_collapsed++; 1109 1110 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, 1111 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); 1112 } 1113 if (!error) { 1114 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, 1115 BUS_DMASYNC_PREWRITE); 1116 txd->flags |= HN_TXD_FLAG_DMAMAP; 1117 } 1118 return error; 1119} 1120 1121static __inline int 1122hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) 1123{ 1124 1125 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, 1126 ("put an onlist txd %#x", txd->flags)); 1127 1128 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); 1129 if (atomic_fetchadd_int(&txd->refs, -1) != 1) 1130 return 0; 1131 1132 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 1133 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, 1134 ("chim txd uses dmamap")); 1135 hn_chim_free(txr->hn_sc, txd->chim_index); 1136 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1137 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) { 1138 bus_dmamap_sync(txr->hn_tx_data_dtag, 1139 txd->data_dmap, BUS_DMASYNC_POSTWRITE); 1140 bus_dmamap_unload(txr->hn_tx_data_dtag, 1141 txd->data_dmap); 1142 txd->flags &= ~HN_TXD_FLAG_DMAMAP; 1143 } 1144 1145 if (txd->m != NULL) { 1146 m_freem(txd->m); 1147 txd->m = NULL; 1148 } 1149 1150 txd->flags |= HN_TXD_FLAG_ONLIST; 1151#ifndef HN_USE_TXDESC_BUFRING 1152 mtx_lock_spin(&txr->hn_txlist_spin); 1153 KASSERT(txr->hn_txdesc_avail >= 0 && 1154 txr->hn_txdesc_avail < txr->hn_txdesc_cnt, 1155 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); 1156 txr->hn_txdesc_avail++; 1157 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 1158 mtx_unlock_spin(&txr->hn_txlist_spin); 1159#else 1160 atomic_add_int(&txr->hn_txdesc_avail, 1); 1161 buf_ring_enqueue(txr->hn_txdesc_br, txd); 1162#endif 1163 1164 return 1; 1165} 1166 1167static __inline struct hn_txdesc * 1168hn_txdesc_get(struct hn_tx_ring *txr) 1169{ 1170 struct hn_txdesc *txd; 1171 1172#ifndef HN_USE_TXDESC_BUFRING 1173 mtx_lock_spin(&txr->hn_txlist_spin); 1174 txd = SLIST_FIRST(&txr->hn_txlist); 1175 if (txd != NULL) { 1176 KASSERT(txr->hn_txdesc_avail > 0, 1177 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); 1178 txr->hn_txdesc_avail--; 1179 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 1180 } 1181 mtx_unlock_spin(&txr->hn_txlist_spin); 1182#else 1183 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); 1184#endif 1185 1186 if (txd != NULL) { 1187#ifdef HN_USE_TXDESC_BUFRING 1188 atomic_subtract_int(&txr->hn_txdesc_avail, 1); 1189#endif 1190 KASSERT(txd->m == NULL && txd->refs == 0 && 1191 txd->chim_index == HN_NVS_CHIM_IDX_INVALID && 1192 (txd->flags & HN_TXD_FLAG_ONLIST) && 1193 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd")); 1194 txd->flags &= ~HN_TXD_FLAG_ONLIST; 1195 txd->refs = 1; 1196 } 1197 return txd; 1198} 1199 1200static __inline void 1201hn_txdesc_hold(struct hn_txdesc *txd) 1202{ 1203 1204 /* 0->1 transition will never work */ 1205 KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); 1206 atomic_add_int(&txd->refs, 1); 1207} 1208 1209static bool 1210hn_tx_ring_pending(struct hn_tx_ring *txr) 1211{ 1212 bool pending = false; 1213 1214#ifndef HN_USE_TXDESC_BUFRING 1215 mtx_lock_spin(&txr->hn_txlist_spin); 1216 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt) 1217 pending = true; 1218 mtx_unlock_spin(&txr->hn_txlist_spin); 1219#else 1220 if (!buf_ring_full(txr->hn_txdesc_br)) 1221 pending = true; 1222#endif 1223 return (pending); 1224} 1225 1226static __inline void 1227hn_txeof(struct hn_tx_ring *txr) 1228{ 1229 txr->hn_has_txeof = 0; 1230 txr->hn_txeof(txr); 1231} 1232 1233static void 1234hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc, 1235 struct vmbus_channel *chan, const void *data __unused, int dlen __unused) 1236{ 1237 struct hn_txdesc *txd = sndc->hn_cbarg; 1238 struct hn_tx_ring *txr; 1239 1240 txr = txd->txr; 1241 KASSERT(txr->hn_chan == chan, 1242 ("channel mismatch, on chan%u, should be chan%u", 1243 vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan))); 1244 1245 txr->hn_has_txeof = 1; 1246 hn_txdesc_put(txr, txd); 1247 1248 ++txr->hn_txdone_cnt; 1249 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { 1250 txr->hn_txdone_cnt = 0; 1251 if (txr->hn_oactive) 1252 hn_txeof(txr); 1253 } 1254} 1255 1256static void 1257hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) 1258{ 1259#if defined(INET) || defined(INET6) 1260 struct lro_ctrl *lro = &rxr->hn_lro; 1261 struct lro_entry *queued; 1262 1263 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { 1264 SLIST_REMOVE_HEAD(&lro->lro_active, next); 1265 tcp_lro_flush(lro, queued); 1266 } 1267#endif 1268 1269 /* 1270 * NOTE: 1271 * 'txr' could be NULL, if multiple channels and 1272 * ifnet.if_start method are enabled. 1273 */ 1274 if (txr == NULL || !txr->hn_has_txeof) 1275 return; 1276 1277 txr->hn_txdone_cnt = 0; 1278 hn_txeof(txr); 1279} 1280 1281static __inline uint32_t 1282hn_rndis_pktmsg_offset(uint32_t ofs) 1283{ 1284 1285 KASSERT(ofs >= sizeof(struct rndis_packet_msg), 1286 ("invalid RNDIS packet msg offset %u", ofs)); 1287 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); 1288} 1289 1290static __inline void * 1291hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, 1292 size_t pi_dlen, uint32_t pi_type) 1293{ 1294 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); 1295 struct rndis_pktinfo *pi; 1296 1297 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, 1298 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); 1299 1300 /* 1301 * Per-packet-info does not move; it only grows. 1302 * 1303 * NOTE: 1304 * rm_pktinfooffset in this phase counts from the beginning 1305 * of rndis_packet_msg. 1306 */ 1307 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, 1308 ("%u pktinfo overflows RNDIS packet msg", pi_type)); 1309 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + 1310 pkt->rm_pktinfolen); 1311 pkt->rm_pktinfolen += pi_size; 1312 1313 pi->rm_size = pi_size; 1314 pi->rm_type = pi_type; 1315 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; 1316 1317 /* Data immediately follow per-packet-info. */ 1318 pkt->rm_dataoffset += pi_size; 1319 1320 /* Update RNDIS packet msg length */ 1321 pkt->rm_len += pi_size; 1322 1323 return (pi->rm_data); 1324} 1325 1326/* 1327 * NOTE: 1328 * If this function fails, then both txd and m_head0 will be freed. 1329 */ 1330static int 1331hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) 1332{ 1333 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; 1334 int error, nsegs, i; 1335 struct mbuf *m_head = *m_head0; 1336 struct rndis_packet_msg *pkt; 1337 uint32_t *pi_data; 1338 int pktlen; 1339 1340 /* 1341 * extension points to the area reserved for the 1342 * rndis_filter_packet, which is placed just after 1343 * the netvsc_packet (and rppi struct, if present; 1344 * length is updated later). 1345 */ 1346 pkt = txd->rndis_pkt; 1347 pkt->rm_type = REMOTE_NDIS_PACKET_MSG; 1348 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len; 1349 pkt->rm_dataoffset = sizeof(*pkt); 1350 pkt->rm_datalen = m_head->m_pkthdr.len; 1351 pkt->rm_pktinfooffset = sizeof(*pkt); 1352 pkt->rm_pktinfolen = 0; 1353 1354 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) { 1355 /* 1356 * Set the hash value for this packet, so that the host could 1357 * dispatch the TX done event for this packet back to this TX 1358 * ring's channel. 1359 */ 1360 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1361 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); 1362 *pi_data = txr->hn_tx_idx; 1363 } 1364 1365 if (m_head->m_flags & M_VLANTAG) { 1366 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1367 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); 1368 *pi_data = NDIS_VLAN_INFO_MAKE( 1369 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), 1370 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), 1371 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); 1372 } 1373 1374 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 1375#if defined(INET6) || defined(INET) 1376 struct ether_vlan_header *eh; 1377 int ether_len; 1378 1379 /* 1380 * XXX need m_pullup and use mtodo 1381 */ 1382 eh = mtod(m_head, struct ether_vlan_header*); 1383 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) 1384 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1385 else 1386 ether_len = ETHER_HDR_LEN; 1387 1388 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1389 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); 1390#ifdef INET 1391 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { 1392 struct ip *ip = 1393 (struct ip *)(m_head->m_data + ether_len); 1394 unsigned long iph_len = ip->ip_hl << 2; 1395 struct tcphdr *th = 1396 (struct tcphdr *)((caddr_t)ip + iph_len); 1397 1398 ip->ip_len = 0; 1399 ip->ip_sum = 0; 1400 th->th_sum = in_pseudo(ip->ip_src.s_addr, 1401 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 1402 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, 1403 m_head->m_pkthdr.tso_segsz); 1404 } 1405#endif 1406#if defined(INET6) && defined(INET) 1407 else 1408#endif 1409#ifdef INET6 1410 { 1411 struct ip6_hdr *ip6 = (struct ip6_hdr *) 1412 (m_head->m_data + ether_len); 1413 struct tcphdr *th = (struct tcphdr *)(ip6 + 1); 1414 1415 ip6->ip6_plen = 0; 1416 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 1417 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, 1418 m_head->m_pkthdr.tso_segsz); 1419 } 1420#endif 1421#endif /* INET6 || INET */ 1422 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { 1423 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, 1424 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); 1425 if (m_head->m_pkthdr.csum_flags & 1426 (CSUM_IP6_TCP | CSUM_IP6_UDP)) { 1427 *pi_data = NDIS_TXCSUM_INFO_IPV6; 1428 } else { 1429 *pi_data = NDIS_TXCSUM_INFO_IPV4; 1430 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 1431 *pi_data |= NDIS_TXCSUM_INFO_IPCS; 1432 } 1433 1434 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 1435 *pi_data |= NDIS_TXCSUM_INFO_TCPCS; 1436 else if (m_head->m_pkthdr.csum_flags & 1437 (CSUM_IP_UDP | CSUM_IP6_UDP)) 1438 *pi_data |= NDIS_TXCSUM_INFO_UDPCS; 1439 } 1440 1441 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; 1442 /* Convert RNDIS packet message offsets */ 1443 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset); 1444 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); 1445 1446 /* 1447 * Chimney send, if the packet could fit into one chimney buffer. 1448 */ 1449 if (pkt->rm_len < txr->hn_chim_size) { 1450 txr->hn_tx_chimney_tried++; 1451 txd->chim_index = hn_chim_alloc(txr->hn_sc); 1452 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) { 1453 uint8_t *dest = txr->hn_sc->hn_chim + 1454 (txd->chim_index * txr->hn_sc->hn_chim_szmax); 1455 1456 memcpy(dest, pkt, pktlen); 1457 dest += pktlen; 1458 m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); 1459 1460 txd->chim_size = pkt->rm_len; 1461 txr->hn_gpa_cnt = 0; 1462 txr->hn_tx_chimney++; 1463 txr->hn_sendpkt = hn_txpkt_chim; 1464 goto done; 1465 } 1466 } 1467 1468 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); 1469 if (error) { 1470 int freed; 1471 1472 /* 1473 * This mbuf is not linked w/ the txd yet, so free it now. 1474 */ 1475 m_freem(m_head); 1476 *m_head0 = NULL; 1477 1478 freed = hn_txdesc_put(txr, txd); 1479 KASSERT(freed != 0, 1480 ("fail to free txd upon txdma error")); 1481 1482 txr->hn_txdma_failed++; 1483 if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); 1484 return error; 1485 } 1486 *m_head0 = m_head; 1487 1488 /* +1 RNDIS packet message */ 1489 txr->hn_gpa_cnt = nsegs + 1; 1490 1491 /* send packet with page buffer */ 1492 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); 1493 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; 1494 txr->hn_gpa[0].gpa_len = pktlen; 1495 1496 /* 1497 * Fill the page buffers with mbuf info after the page 1498 * buffer for RNDIS packet message. 1499 */ 1500 for (i = 0; i < nsegs; ++i) { 1501 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; 1502 1503 gpa->gpa_page = atop(segs[i].ds_addr); 1504 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; 1505 gpa->gpa_len = segs[i].ds_len; 1506 } 1507 1508 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 1509 txd->chim_size = 0; 1510 txr->hn_sendpkt = hn_txpkt_sglist; 1511done: 1512 txd->m = m_head; 1513 1514 /* Set the completion routine */ 1515 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd); 1516 1517 return 0; 1518} 1519 1520/* 1521 * NOTE: 1522 * If this function fails, then txd will be freed, but the mbuf 1523 * associated w/ the txd will _not_ be freed. 1524 */ 1525static int 1526hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) 1527{ 1528 int error, send_failed = 0; 1529 1530again: 1531 /* 1532 * Make sure that txd is not freed before ETHER_BPF_MTAP. 1533 */ 1534 hn_txdesc_hold(txd); 1535 error = txr->hn_sendpkt(txr, txd); 1536 if (!error) { 1537 ETHER_BPF_MTAP(ifp, txd->m); 1538 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 1539 if (!hn_use_if_start) { 1540 if_inc_counter(ifp, IFCOUNTER_OBYTES, 1541 txd->m->m_pkthdr.len); 1542 if (txd->m->m_flags & M_MCAST) 1543 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 1544 } 1545 txr->hn_pkts++; 1546 } 1547 hn_txdesc_put(txr, txd); 1548 1549 if (__predict_false(error)) { 1550 int freed; 1551 1552 /* 1553 * This should "really rarely" happen. 1554 * 1555 * XXX Too many RX to be acked or too many sideband 1556 * commands to run? Ask netvsc_channel_rollup() 1557 * to kick start later. 1558 */ 1559 txr->hn_has_txeof = 1; 1560 if (!send_failed) { 1561 txr->hn_send_failed++; 1562 send_failed = 1; 1563 /* 1564 * Try sending again after set hn_has_txeof; 1565 * in case that we missed the last 1566 * netvsc_channel_rollup(). 1567 */ 1568 goto again; 1569 } 1570 if_printf(ifp, "send failed\n"); 1571 1572 /* 1573 * Caller will perform further processing on the 1574 * associated mbuf, so don't free it in hn_txdesc_put(); 1575 * only unload it from the DMA map in hn_txdesc_put(), 1576 * if it was loaded. 1577 */ 1578 txd->m = NULL; 1579 freed = hn_txdesc_put(txr, txd); 1580 KASSERT(freed != 0, 1581 ("fail to free txd upon send error")); 1582 1583 txr->hn_send_failed++; 1584 } 1585 return error; 1586} 1587 1588/* 1589 * Start a transmit of one or more packets 1590 */ 1591static int 1592hn_start_locked(struct hn_tx_ring *txr, int len) 1593{ 1594 struct hn_softc *sc = txr->hn_sc; 1595 struct ifnet *ifp = sc->hn_ifp; 1596 1597 KASSERT(hn_use_if_start, 1598 ("hn_start_locked is called, when if_start is disabled")); 1599 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 1600 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 1601 1602 if (__predict_false(txr->hn_suspended)) 1603 return 0; 1604 1605 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 1606 IFF_DRV_RUNNING) 1607 return 0; 1608 1609 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 1610 struct hn_txdesc *txd; 1611 struct mbuf *m_head; 1612 int error; 1613 1614 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 1615 if (m_head == NULL) 1616 break; 1617 1618 if (len > 0 && m_head->m_pkthdr.len > len) { 1619 /* 1620 * This sending could be time consuming; let callers 1621 * dispatch this packet sending (and sending of any 1622 * following up packets) to tx taskqueue. 1623 */ 1624 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 1625 return 1; 1626 } 1627 1628 txd = hn_txdesc_get(txr); 1629 if (txd == NULL) { 1630 txr->hn_no_txdescs++; 1631 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 1632 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 1633 break; 1634 } 1635 1636 error = hn_encap(txr, txd, &m_head); 1637 if (error) { 1638 /* Both txd and m_head are freed */ 1639 continue; 1640 } 1641 1642 error = hn_txpkt(ifp, txr, txd); 1643 if (__predict_false(error)) { 1644 /* txd is freed, but m_head is not */ 1645 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 1646 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 1647 break; 1648 } 1649 } 1650 return 0; 1651} 1652 1653/* 1654 * Append the specified data to the indicated mbuf chain, 1655 * Extend the mbuf chain if the new data does not fit in 1656 * existing space. 1657 * 1658 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. 1659 * There should be an equivalent in the kernel mbuf code, 1660 * but there does not appear to be one yet. 1661 * 1662 * Differs from m_append() in that additional mbufs are 1663 * allocated with cluster size MJUMPAGESIZE, and filled 1664 * accordingly. 1665 * 1666 * Return 1 if able to complete the job; otherwise 0. 1667 */ 1668static int 1669hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) 1670{ 1671 struct mbuf *m, *n; 1672 int remainder, space; 1673 1674 for (m = m0; m->m_next != NULL; m = m->m_next) 1675 ; 1676 remainder = len; 1677 space = M_TRAILINGSPACE(m); 1678 if (space > 0) { 1679 /* 1680 * Copy into available space. 1681 */ 1682 if (space > remainder) 1683 space = remainder; 1684 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 1685 m->m_len += space; 1686 cp += space; 1687 remainder -= space; 1688 } 1689 while (remainder > 0) { 1690 /* 1691 * Allocate a new mbuf; could check space 1692 * and allocate a cluster instead. 1693 */ 1694 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE); 1695 if (n == NULL) 1696 break; 1697 n->m_len = min(MJUMPAGESIZE, remainder); 1698 bcopy(cp, mtod(n, caddr_t), n->m_len); 1699 cp += n->m_len; 1700 remainder -= n->m_len; 1701 m->m_next = n; 1702 m = n; 1703 } 1704 if (m0->m_flags & M_PKTHDR) 1705 m0->m_pkthdr.len += len - remainder; 1706 1707 return (remainder == 0); 1708} 1709 1710#if defined(INET) || defined(INET6) 1711static __inline int 1712hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) 1713{ 1714#if __FreeBSD_version >= 1100095 1715 if (hn_lro_mbufq_depth) { 1716 tcp_lro_queue_mbuf(lc, m); 1717 return 0; 1718 } 1719#endif 1720 return tcp_lro_rx(lc, m, 0); 1721} 1722#endif 1723 1724static int 1725hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, 1726 const struct hn_rxinfo *info) 1727{ 1728 struct ifnet *ifp = rxr->hn_ifp; 1729 struct mbuf *m_new; 1730 int size, do_lro = 0, do_csum = 1; 1731 int hash_type = M_HASHTYPE_OPAQUE; 1732 1733 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1734 return (0); 1735 1736 /* 1737 * Bail out if packet contains more data than configured MTU. 1738 */ 1739 if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) { 1740 return (0); 1741 } else if (dlen <= MHLEN) { 1742 m_new = m_gethdr(M_NOWAIT, MT_DATA); 1743 if (m_new == NULL) { 1744 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1745 return (0); 1746 } 1747 memcpy(mtod(m_new, void *), data, dlen); 1748 m_new->m_pkthdr.len = m_new->m_len = dlen; 1749 rxr->hn_small_pkts++; 1750 } else { 1751 /* 1752 * Get an mbuf with a cluster. For packets 2K or less, 1753 * get a standard 2K cluster. For anything larger, get a 1754 * 4K cluster. Any buffers larger than 4K can cause problems 1755 * if looped around to the Hyper-V TX channel, so avoid them. 1756 */ 1757 size = MCLBYTES; 1758 if (dlen > MCLBYTES) { 1759 /* 4096 */ 1760 size = MJUMPAGESIZE; 1761 } 1762 1763 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); 1764 if (m_new == NULL) { 1765 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 1766 return (0); 1767 } 1768 1769 hv_m_append(m_new, dlen, data); 1770 } 1771 m_new->m_pkthdr.rcvif = ifp; 1772 1773 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) 1774 do_csum = 0; 1775 1776 /* receive side checksum offload */ 1777 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { 1778 /* IP csum offload */ 1779 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { 1780 m_new->m_pkthdr.csum_flags |= 1781 (CSUM_IP_CHECKED | CSUM_IP_VALID); 1782 rxr->hn_csum_ip++; 1783 } 1784 1785 /* TCP/UDP csum offload */ 1786 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | 1787 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { 1788 m_new->m_pkthdr.csum_flags |= 1789 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1790 m_new->m_pkthdr.csum_data = 0xffff; 1791 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) 1792 rxr->hn_csum_tcp++; 1793 else 1794 rxr->hn_csum_udp++; 1795 } 1796 1797 /* 1798 * XXX 1799 * As of this write (Oct 28th, 2016), host side will turn 1800 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so 1801 * the do_lro setting here is actually _not_ accurate. We 1802 * depend on the RSS hash type check to reset do_lro. 1803 */ 1804 if ((info->csum_info & 1805 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == 1806 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) 1807 do_lro = 1; 1808 } else { 1809 const struct ether_header *eh; 1810 uint16_t etype; 1811 int hoff; 1812 1813 hoff = sizeof(*eh); 1814 if (m_new->m_len < hoff) 1815 goto skip; 1816 eh = mtod(m_new, struct ether_header *); 1817 etype = ntohs(eh->ether_type); 1818 if (etype == ETHERTYPE_VLAN) { 1819 const struct ether_vlan_header *evl; 1820 1821 hoff = sizeof(*evl); 1822 if (m_new->m_len < hoff) 1823 goto skip; 1824 evl = mtod(m_new, struct ether_vlan_header *); 1825 etype = ntohs(evl->evl_proto); 1826 } 1827 1828 if (etype == ETHERTYPE_IP) { 1829 int pr; 1830 1831 pr = hn_check_iplen(m_new, hoff); 1832 if (pr == IPPROTO_TCP) { 1833 if (do_csum && 1834 (rxr->hn_trust_hcsum & 1835 HN_TRUST_HCSUM_TCP)) { 1836 rxr->hn_csum_trusted++; 1837 m_new->m_pkthdr.csum_flags |= 1838 (CSUM_IP_CHECKED | CSUM_IP_VALID | 1839 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1840 m_new->m_pkthdr.csum_data = 0xffff; 1841 } 1842 do_lro = 1; 1843 } else if (pr == IPPROTO_UDP) { 1844 if (do_csum && 1845 (rxr->hn_trust_hcsum & 1846 HN_TRUST_HCSUM_UDP)) { 1847 rxr->hn_csum_trusted++; 1848 m_new->m_pkthdr.csum_flags |= 1849 (CSUM_IP_CHECKED | CSUM_IP_VALID | 1850 CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1851 m_new->m_pkthdr.csum_data = 0xffff; 1852 } 1853 } else if (pr != IPPROTO_DONE && do_csum && 1854 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { 1855 rxr->hn_csum_trusted++; 1856 m_new->m_pkthdr.csum_flags |= 1857 (CSUM_IP_CHECKED | CSUM_IP_VALID); 1858 } 1859 } 1860 } 1861skip: 1862 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { 1863 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( 1864 NDIS_VLAN_INFO_ID(info->vlan_info), 1865 NDIS_VLAN_INFO_PRI(info->vlan_info), 1866 NDIS_VLAN_INFO_CFI(info->vlan_info)); 1867 m_new->m_flags |= M_VLANTAG; 1868 } 1869 1870 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { 1871 rxr->hn_rss_pkts++; 1872 m_new->m_pkthdr.flowid = info->hash_value; 1873 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == 1874 NDIS_HASH_FUNCTION_TOEPLITZ) { 1875 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); 1876 1877 /* 1878 * NOTE: 1879 * do_lro is resetted, if the hash types are not TCP 1880 * related. See the comment in the above csum_flags 1881 * setup section. 1882 */ 1883 switch (type) { 1884 case NDIS_HASH_IPV4: 1885 hash_type = M_HASHTYPE_RSS_IPV4; 1886 do_lro = 0; 1887 break; 1888 1889 case NDIS_HASH_TCP_IPV4: 1890 hash_type = M_HASHTYPE_RSS_TCP_IPV4; 1891 break; 1892 1893 case NDIS_HASH_IPV6: 1894 hash_type = M_HASHTYPE_RSS_IPV6; 1895 do_lro = 0; 1896 break; 1897 1898 case NDIS_HASH_IPV6_EX: 1899 hash_type = M_HASHTYPE_RSS_IPV6_EX; 1900 do_lro = 0; 1901 break; 1902 1903 case NDIS_HASH_TCP_IPV6: 1904 hash_type = M_HASHTYPE_RSS_TCP_IPV6; 1905 break; 1906 1907 case NDIS_HASH_TCP_IPV6_EX: 1908 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; 1909 break; 1910 } 1911 } 1912 } else { 1913 m_new->m_pkthdr.flowid = rxr->hn_rx_idx; 1914 } 1915 M_HASHTYPE_SET(m_new, hash_type); 1916 1917 /* 1918 * Note: Moved RX completion back to hv_nv_on_receive() so all 1919 * messages (not just data messages) will trigger a response. 1920 */ 1921 1922 ifp->if_ipackets++; 1923 rxr->hn_pkts++; 1924 1925 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { 1926#if defined(INET) || defined(INET6) 1927 struct lro_ctrl *lro = &rxr->hn_lro; 1928 1929 if (lro->lro_cnt) { 1930 rxr->hn_lro_tried++; 1931 if (hn_lro_rx(lro, m_new) == 0) { 1932 /* DONE! */ 1933 return 0; 1934 } 1935 } 1936#endif 1937 } 1938 1939 /* We're not holding the lock here, so don't release it */ 1940 (*ifp->if_input)(ifp, m_new); 1941 1942 return (0); 1943} 1944 1945static int 1946hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1947{ 1948 struct hn_softc *sc = ifp->if_softc; 1949 struct ifreq *ifr = (struct ifreq *)data; 1950 int mask, error = 0; 1951 1952 switch (cmd) { 1953 case SIOCSIFMTU: 1954 if (ifr->ifr_mtu > HN_MTU_MAX) { 1955 error = EINVAL; 1956 break; 1957 } 1958 1959 HN_LOCK(sc); 1960 1961 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 1962 HN_UNLOCK(sc); 1963 break; 1964 } 1965 1966 if ((sc->hn_caps & HN_CAP_MTU) == 0) { 1967 /* Can't change MTU */ 1968 HN_UNLOCK(sc); 1969 error = EOPNOTSUPP; 1970 break; 1971 } 1972 1973 if (ifp->if_mtu == ifr->ifr_mtu) { 1974 HN_UNLOCK(sc); 1975 break; 1976 } 1977 1978 /* 1979 * Suspend this interface before the synthetic parts 1980 * are ripped. 1981 */ 1982 hn_suspend(sc); 1983 1984 /* 1985 * Detach the synthetics parts, i.e. NVS and RNDIS. 1986 */ 1987 hn_synth_detach(sc); 1988 1989 /* 1990 * Reattach the synthetic parts, i.e. NVS and RNDIS, 1991 * with the new MTU setting. 1992 */ 1993 error = hn_synth_attach(sc, ifr->ifr_mtu); 1994 if (error) { 1995 HN_UNLOCK(sc); 1996 break; 1997 } 1998 1999 /* 2000 * Commit the requested MTU, after the synthetic parts 2001 * have been successfully attached. 2002 */ 2003 ifp->if_mtu = ifr->ifr_mtu; 2004 2005 /* 2006 * Make sure that various parameters based on MTU are 2007 * still valid, after the MTU change. 2008 */ 2009 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) 2010 hn_set_chim_size(sc, sc->hn_chim_szmax); 2011 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); 2012#if __FreeBSD_version >= 1100099 2013 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < 2014 HN_LRO_LENLIM_MIN(ifp)) 2015 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); 2016#endif 2017 2018 /* 2019 * All done! Resume the interface now. 2020 */ 2021 hn_resume(sc); 2022 2023 HN_UNLOCK(sc); 2024 break; 2025 2026 case SIOCSIFFLAGS: 2027 HN_LOCK(sc); 2028 2029 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2030 HN_UNLOCK(sc); 2031 break; 2032 } 2033 2034 if (ifp->if_flags & IFF_UP) { 2035 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2036 hn_set_rxfilter(sc); 2037 else 2038 hn_init_locked(sc); 2039 } else { 2040 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2041 hn_stop(sc); 2042 } 2043 sc->hn_if_flags = ifp->if_flags; 2044 2045 HN_UNLOCK(sc); 2046 break; 2047 2048 case SIOCSIFCAP: 2049 HN_LOCK(sc); 2050 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2051 2052 if (mask & IFCAP_TXCSUM) { 2053 ifp->if_capenable ^= IFCAP_TXCSUM; 2054 if (ifp->if_capenable & IFCAP_TXCSUM) 2055 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc); 2056 else 2057 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc); 2058 } 2059 if (mask & IFCAP_TXCSUM_IPV6) { 2060 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 2061 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 2062 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc); 2063 else 2064 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc); 2065 } 2066 2067 /* TODO: flip RNDIS offload parameters for RXCSUM. */ 2068 if (mask & IFCAP_RXCSUM) 2069 ifp->if_capenable ^= IFCAP_RXCSUM; 2070#ifdef foo 2071 /* We can't diff IPv6 packets from IPv4 packets on RX path. */ 2072 if (mask & IFCAP_RXCSUM_IPV6) 2073 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 2074#endif 2075 2076 if (mask & IFCAP_LRO) 2077 ifp->if_capenable ^= IFCAP_LRO; 2078 2079 if (mask & IFCAP_TSO4) { 2080 ifp->if_capenable ^= IFCAP_TSO4; 2081 if (ifp->if_capenable & IFCAP_TSO4) 2082 ifp->if_hwassist |= CSUM_IP_TSO; 2083 else 2084 ifp->if_hwassist &= ~CSUM_IP_TSO; 2085 } 2086 if (mask & IFCAP_TSO6) { 2087 ifp->if_capenable ^= IFCAP_TSO6; 2088 if (ifp->if_capenable & IFCAP_TSO6) 2089 ifp->if_hwassist |= CSUM_IP6_TSO; 2090 else 2091 ifp->if_hwassist &= ~CSUM_IP6_TSO; 2092 } 2093 2094 HN_UNLOCK(sc); 2095 break; 2096 2097 case SIOCADDMULTI: 2098 case SIOCDELMULTI: 2099#ifdef notyet 2100 /* 2101 * XXX 2102 * Multicast uses mutex, while RNDIS RX filter setting 2103 * sleeps. We workaround this by always enabling 2104 * ALLMULTI. ALLMULTI would actually always be on, even 2105 * if we supported the SIOCADDMULTI/SIOCDELMULTI, since 2106 * we don't support multicast address list configuration 2107 * for this driver. 2108 */ 2109 HN_LOCK(sc); 2110 2111 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) { 2112 HN_UNLOCK(sc); 2113 break; 2114 } 2115 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2116 hn_set_rxfilter(sc); 2117 2118 HN_UNLOCK(sc); 2119#endif 2120 break; 2121 2122 case SIOCSIFMEDIA: 2123 case SIOCGIFMEDIA: 2124 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); 2125 break; 2126 2127 default: 2128 error = ether_ioctl(ifp, cmd, data); 2129 break; 2130 } 2131 return (error); 2132} 2133 2134static void 2135hn_stop(struct hn_softc *sc) 2136{ 2137 struct ifnet *ifp = sc->hn_ifp; 2138 int i; 2139 2140 HN_LOCK_ASSERT(sc); 2141 2142 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 2143 ("synthetic parts were not attached")); 2144 2145 /* Clear RUNNING bit _before_ hn_suspend_data() */ 2146 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2147 hn_suspend_data(sc); 2148 2149 /* Clear OACTIVE bit. */ 2150 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2151 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2152 sc->hn_tx_ring[i].hn_oactive = 0; 2153} 2154 2155static void 2156hn_start(struct ifnet *ifp) 2157{ 2158 struct hn_softc *sc = ifp->if_softc; 2159 struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; 2160 2161 if (txr->hn_sched_tx) 2162 goto do_sched; 2163 2164 if (mtx_trylock(&txr->hn_tx_lock)) { 2165 int sched; 2166 2167 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 2168 mtx_unlock(&txr->hn_tx_lock); 2169 if (!sched) 2170 return; 2171 } 2172do_sched: 2173 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 2174} 2175 2176static void 2177hn_start_txeof(struct hn_tx_ring *txr) 2178{ 2179 struct hn_softc *sc = txr->hn_sc; 2180 struct ifnet *ifp = sc->hn_ifp; 2181 2182 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); 2183 2184 if (txr->hn_sched_tx) 2185 goto do_sched; 2186 2187 if (mtx_trylock(&txr->hn_tx_lock)) { 2188 int sched; 2189 2190 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2191 sched = hn_start_locked(txr, txr->hn_direct_tx_size); 2192 mtx_unlock(&txr->hn_tx_lock); 2193 if (sched) { 2194 taskqueue_enqueue(txr->hn_tx_taskq, 2195 &txr->hn_tx_task); 2196 } 2197 } else { 2198do_sched: 2199 /* 2200 * Release the OACTIVE earlier, with the hope, that 2201 * others could catch up. The task will clear the 2202 * flag again with the hn_tx_lock to avoid possible 2203 * races. 2204 */ 2205 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2206 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 2207 } 2208} 2209 2210static void 2211hn_init_locked(struct hn_softc *sc) 2212{ 2213 struct ifnet *ifp = sc->hn_ifp; 2214 int i; 2215 2216 HN_LOCK_ASSERT(sc); 2217 2218 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) 2219 return; 2220 2221 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2222 return; 2223 2224 /* Configure RX filter */ 2225 hn_set_rxfilter(sc); 2226 2227 /* Clear OACTIVE bit. */ 2228 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); 2229 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 2230 sc->hn_tx_ring[i].hn_oactive = 0; 2231 2232 /* Clear TX 'suspended' bit. */ 2233 hn_resume_tx(sc, sc->hn_tx_ring_inuse); 2234 2235 /* Everything is ready; unleash! */ 2236 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); 2237} 2238 2239static void 2240hn_init(void *xsc) 2241{ 2242 struct hn_softc *sc = xsc; 2243 2244 HN_LOCK(sc); 2245 hn_init_locked(sc); 2246 HN_UNLOCK(sc); 2247} 2248 2249#if __FreeBSD_version >= 1100099 2250 2251static int 2252hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) 2253{ 2254 struct hn_softc *sc = arg1; 2255 unsigned int lenlim; 2256 int error; 2257 2258 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; 2259 error = sysctl_handle_int(oidp, &lenlim, 0, req); 2260 if (error || req->newptr == NULL) 2261 return error; 2262 2263 HN_LOCK(sc); 2264 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || 2265 lenlim > TCP_LRO_LENGTH_MAX) { 2266 HN_UNLOCK(sc); 2267 return EINVAL; 2268 } 2269 hn_set_lro_lenlim(sc, lenlim); 2270 HN_UNLOCK(sc); 2271 2272 return 0; 2273} 2274 2275static int 2276hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) 2277{ 2278 struct hn_softc *sc = arg1; 2279 int ackcnt, error, i; 2280 2281 /* 2282 * lro_ackcnt_lim is append count limit, 2283 * +1 to turn it into aggregation limit. 2284 */ 2285 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; 2286 error = sysctl_handle_int(oidp, &ackcnt, 0, req); 2287 if (error || req->newptr == NULL) 2288 return error; 2289 2290 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) 2291 return EINVAL; 2292 2293 /* 2294 * Convert aggregation limit back to append 2295 * count limit. 2296 */ 2297 --ackcnt; 2298 HN_LOCK(sc); 2299 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) 2300 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; 2301 HN_UNLOCK(sc); 2302 return 0; 2303} 2304 2305#endif 2306 2307static int 2308hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) 2309{ 2310 struct hn_softc *sc = arg1; 2311 int hcsum = arg2; 2312 int on, error, i; 2313 2314 on = 0; 2315 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) 2316 on = 1; 2317 2318 error = sysctl_handle_int(oidp, &on, 0, req); 2319 if (error || req->newptr == NULL) 2320 return error; 2321 2322 HN_LOCK(sc); 2323 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2324 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2325 2326 if (on) 2327 rxr->hn_trust_hcsum |= hcsum; 2328 else 2329 rxr->hn_trust_hcsum &= ~hcsum; 2330 } 2331 HN_UNLOCK(sc); 2332 return 0; 2333} 2334 2335static int 2336hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) 2337{ 2338 struct hn_softc *sc = arg1; 2339 int chim_size, error; 2340 2341 chim_size = sc->hn_tx_ring[0].hn_chim_size; 2342 error = sysctl_handle_int(oidp, &chim_size, 0, req); 2343 if (error || req->newptr == NULL) 2344 return error; 2345 2346 if (chim_size > sc->hn_chim_szmax || chim_size <= 0) 2347 return EINVAL; 2348 2349 HN_LOCK(sc); 2350 hn_set_chim_size(sc, chim_size); 2351 HN_UNLOCK(sc); 2352 return 0; 2353} 2354 2355#if __FreeBSD_version < 1100095 2356static int 2357hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS) 2358{ 2359 struct hn_softc *sc = arg1; 2360 int ofs = arg2, i, error; 2361 struct hn_rx_ring *rxr; 2362 uint64_t stat; 2363 2364 stat = 0; 2365 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2366 rxr = &sc->hn_rx_ring[i]; 2367 stat += *((int *)((uint8_t *)rxr + ofs)); 2368 } 2369 2370 error = sysctl_handle_64(oidp, &stat, 0, req); 2371 if (error || req->newptr == NULL) 2372 return error; 2373 2374 /* Zero out this stat. */ 2375 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2376 rxr = &sc->hn_rx_ring[i]; 2377 *((int *)((uint8_t *)rxr + ofs)) = 0; 2378 } 2379 return 0; 2380} 2381#else 2382static int 2383hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) 2384{ 2385 struct hn_softc *sc = arg1; 2386 int ofs = arg2, i, error; 2387 struct hn_rx_ring *rxr; 2388 uint64_t stat; 2389 2390 stat = 0; 2391 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2392 rxr = &sc->hn_rx_ring[i]; 2393 stat += *((uint64_t *)((uint8_t *)rxr + ofs)); 2394 } 2395 2396 error = sysctl_handle_64(oidp, &stat, 0, req); 2397 if (error || req->newptr == NULL) 2398 return error; 2399 2400 /* Zero out this stat. */ 2401 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { 2402 rxr = &sc->hn_rx_ring[i]; 2403 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; 2404 } 2405 return 0; 2406} 2407 2408#endif 2409 2410static int 2411hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 2412{ 2413 struct hn_softc *sc = arg1; 2414 int ofs = arg2, i, error; 2415 struct hn_rx_ring *rxr; 2416 u_long stat; 2417 2418 stat = 0; 2419 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2420 rxr = &sc->hn_rx_ring[i]; 2421 stat += *((u_long *)((uint8_t *)rxr + ofs)); 2422 } 2423 2424 error = sysctl_handle_long(oidp, &stat, 0, req); 2425 if (error || req->newptr == NULL) 2426 return error; 2427 2428 /* Zero out this stat. */ 2429 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2430 rxr = &sc->hn_rx_ring[i]; 2431 *((u_long *)((uint8_t *)rxr + ofs)) = 0; 2432 } 2433 return 0; 2434} 2435 2436static int 2437hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) 2438{ 2439 struct hn_softc *sc = arg1; 2440 int ofs = arg2, i, error; 2441 struct hn_tx_ring *txr; 2442 u_long stat; 2443 2444 stat = 0; 2445 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 2446 txr = &sc->hn_tx_ring[i]; 2447 stat += *((u_long *)((uint8_t *)txr + ofs)); 2448 } 2449 2450 error = sysctl_handle_long(oidp, &stat, 0, req); 2451 if (error || req->newptr == NULL) 2452 return error; 2453 2454 /* Zero out this stat. */ 2455 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 2456 txr = &sc->hn_tx_ring[i]; 2457 *((u_long *)((uint8_t *)txr + ofs)) = 0; 2458 } 2459 return 0; 2460} 2461 2462static int 2463hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) 2464{ 2465 struct hn_softc *sc = arg1; 2466 int ofs = arg2, i, error, conf; 2467 struct hn_tx_ring *txr; 2468 2469 txr = &sc->hn_tx_ring[0]; 2470 conf = *((int *)((uint8_t *)txr + ofs)); 2471 2472 error = sysctl_handle_int(oidp, &conf, 0, req); 2473 if (error || req->newptr == NULL) 2474 return error; 2475 2476 HN_LOCK(sc); 2477 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 2478 txr = &sc->hn_tx_ring[i]; 2479 *((int *)((uint8_t *)txr + ofs)) = conf; 2480 } 2481 HN_UNLOCK(sc); 2482 2483 return 0; 2484} 2485 2486static int 2487hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) 2488{ 2489 struct hn_softc *sc = arg1; 2490 char verstr[16]; 2491 2492 snprintf(verstr, sizeof(verstr), "%u.%u", 2493 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver), 2494 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver)); 2495 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); 2496} 2497 2498static int 2499hn_caps_sysctl(SYSCTL_HANDLER_ARGS) 2500{ 2501 struct hn_softc *sc = arg1; 2502 char caps_str[128]; 2503 uint32_t caps; 2504 2505 HN_LOCK(sc); 2506 caps = sc->hn_caps; 2507 HN_UNLOCK(sc); 2508 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS); 2509 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req); 2510} 2511 2512static int 2513hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS) 2514{ 2515 struct hn_softc *sc = arg1; 2516 char assist_str[128]; 2517 uint32_t hwassist; 2518 2519 HN_LOCK(sc); 2520 hwassist = sc->hn_ifp->if_hwassist; 2521 HN_UNLOCK(sc); 2522 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS); 2523 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req); 2524} 2525 2526static int 2527hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS) 2528{ 2529 struct hn_softc *sc = arg1; 2530 char filter_str[128]; 2531 uint32_t filter; 2532 2533 HN_LOCK(sc); 2534 filter = sc->hn_rx_filter; 2535 HN_UNLOCK(sc); 2536 snprintf(filter_str, sizeof(filter_str), "%b", filter, 2537 NDIS_PACKET_TYPES); 2538 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req); 2539} 2540 2541static int 2542hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS) 2543{ 2544 struct hn_softc *sc = arg1; 2545 int error; 2546 2547 HN_LOCK(sc); 2548 2549 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 2550 if (error || req->newptr == NULL) 2551 goto back; 2552 2553 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key)); 2554 if (error) 2555 goto back; 2556 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 2557 2558 if (sc->hn_rx_ring_inuse > 1) { 2559 error = hn_rss_reconfig(sc); 2560 } else { 2561 /* Not RSS capable, at least for now; just save the RSS key. */ 2562 error = 0; 2563 } 2564back: 2565 HN_UNLOCK(sc); 2566 return (error); 2567} 2568 2569static int 2570hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS) 2571{ 2572 struct hn_softc *sc = arg1; 2573 int error; 2574 2575 HN_LOCK(sc); 2576 2577 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 2578 if (error || req->newptr == NULL) 2579 goto back; 2580 2581 /* 2582 * Don't allow RSS indirect table change, if this interface is not 2583 * RSS capable currently. 2584 */ 2585 if (sc->hn_rx_ring_inuse == 1) { 2586 error = EOPNOTSUPP; 2587 goto back; 2588 } 2589 2590 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind)); 2591 if (error) 2592 goto back; 2593 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 2594 2595 hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse); 2596 error = hn_rss_reconfig(sc); 2597back: 2598 HN_UNLOCK(sc); 2599 return (error); 2600} 2601 2602static int 2603hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS) 2604{ 2605 struct hn_softc *sc = arg1; 2606 char hash_str[128]; 2607 uint32_t hash; 2608 2609 HN_LOCK(sc); 2610 hash = sc->hn_rss_hash; 2611 HN_UNLOCK(sc); 2612 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS); 2613 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req); 2614} 2615 2616static int 2617hn_check_iplen(const struct mbuf *m, int hoff) 2618{ 2619 const struct ip *ip; 2620 int len, iphlen, iplen; 2621 const struct tcphdr *th; 2622 int thoff; /* TCP data offset */ 2623 2624 len = hoff + sizeof(struct ip); 2625 2626 /* The packet must be at least the size of an IP header. */ 2627 if (m->m_pkthdr.len < len) 2628 return IPPROTO_DONE; 2629 2630 /* The fixed IP header must reside completely in the first mbuf. */ 2631 if (m->m_len < len) 2632 return IPPROTO_DONE; 2633 2634 ip = mtodo(m, hoff); 2635 2636 /* Bound check the packet's stated IP header length. */ 2637 iphlen = ip->ip_hl << 2; 2638 if (iphlen < sizeof(struct ip)) /* minimum header length */ 2639 return IPPROTO_DONE; 2640 2641 /* The full IP header must reside completely in the one mbuf. */ 2642 if (m->m_len < hoff + iphlen) 2643 return IPPROTO_DONE; 2644 2645 iplen = ntohs(ip->ip_len); 2646 2647 /* 2648 * Check that the amount of data in the buffers is as 2649 * at least much as the IP header would have us expect. 2650 */ 2651 if (m->m_pkthdr.len < hoff + iplen) 2652 return IPPROTO_DONE; 2653 2654 /* 2655 * Ignore IP fragments. 2656 */ 2657 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) 2658 return IPPROTO_DONE; 2659 2660 /* 2661 * The TCP/IP or UDP/IP header must be entirely contained within 2662 * the first fragment of a packet. 2663 */ 2664 switch (ip->ip_p) { 2665 case IPPROTO_TCP: 2666 if (iplen < iphlen + sizeof(struct tcphdr)) 2667 return IPPROTO_DONE; 2668 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) 2669 return IPPROTO_DONE; 2670 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); 2671 thoff = th->th_off << 2; 2672 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) 2673 return IPPROTO_DONE; 2674 if (m->m_len < hoff + iphlen + thoff) 2675 return IPPROTO_DONE; 2676 break; 2677 case IPPROTO_UDP: 2678 if (iplen < iphlen + sizeof(struct udphdr)) 2679 return IPPROTO_DONE; 2680 if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) 2681 return IPPROTO_DONE; 2682 break; 2683 default: 2684 if (iplen < iphlen) 2685 return IPPROTO_DONE; 2686 break; 2687 } 2688 return ip->ip_p; 2689} 2690 2691static int 2692hn_create_rx_data(struct hn_softc *sc, int ring_cnt) 2693{ 2694 struct sysctl_oid_list *child; 2695 struct sysctl_ctx_list *ctx; 2696 device_t dev = sc->hn_dev; 2697#if defined(INET) || defined(INET6) 2698#if __FreeBSD_version >= 1100095 2699 int lroent_cnt; 2700#endif 2701#endif 2702 int i; 2703 2704 /* 2705 * Create RXBUF for reception. 2706 * 2707 * NOTE: 2708 * - It is shared by all channels. 2709 * - A large enough buffer is allocated, certain version of NVSes 2710 * may further limit the usable space. 2711 */ 2712 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 2713 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma, 2714 BUS_DMA_WAITOK | BUS_DMA_ZERO); 2715 if (sc->hn_rxbuf == NULL) { 2716 device_printf(sc->hn_dev, "allocate rxbuf failed\n"); 2717 return (ENOMEM); 2718 } 2719 2720 sc->hn_rx_ring_cnt = ring_cnt; 2721 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; 2722 2723 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, 2724 M_DEVBUF, M_WAITOK | M_ZERO); 2725 2726#if defined(INET) || defined(INET6) 2727#if __FreeBSD_version >= 1100095 2728 lroent_cnt = hn_lro_entry_count; 2729 if (lroent_cnt < TCP_LRO_ENTRIES) 2730 lroent_cnt = TCP_LRO_ENTRIES; 2731 if (bootverbose) 2732 device_printf(dev, "LRO: entry count %d\n", lroent_cnt); 2733#endif 2734#endif /* INET || INET6 */ 2735 2736 ctx = device_get_sysctl_ctx(dev); 2737 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 2738 2739 /* Create dev.hn.UNIT.rx sysctl tree */ 2740 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", 2741 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 2742 2743 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2744 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2745 2746 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev), 2747 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE, 2748 &rxr->hn_br_dma, BUS_DMA_WAITOK); 2749 if (rxr->hn_br == NULL) { 2750 device_printf(dev, "allocate bufring failed\n"); 2751 return (ENOMEM); 2752 } 2753 2754 if (hn_trust_hosttcp) 2755 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; 2756 if (hn_trust_hostudp) 2757 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; 2758 if (hn_trust_hostip) 2759 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; 2760 rxr->hn_ifp = sc->hn_ifp; 2761 if (i < sc->hn_tx_ring_cnt) 2762 rxr->hn_txr = &sc->hn_tx_ring[i]; 2763 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF; 2764 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK); 2765 rxr->hn_rx_idx = i; 2766 rxr->hn_rxbuf = sc->hn_rxbuf; 2767 2768 /* 2769 * Initialize LRO. 2770 */ 2771#if defined(INET) || defined(INET6) 2772#if __FreeBSD_version >= 1100095 2773 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 2774 hn_lro_mbufq_depth); 2775#else 2776 tcp_lro_init(&rxr->hn_lro); 2777 rxr->hn_lro.ifp = sc->hn_ifp; 2778#endif 2779#if __FreeBSD_version >= 1100099 2780 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; 2781 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; 2782#endif 2783#endif /* INET || INET6 */ 2784 2785 if (sc->hn_rx_sysctl_tree != NULL) { 2786 char name[16]; 2787 2788 /* 2789 * Create per RX ring sysctl tree: 2790 * dev.hn.UNIT.rx.RINGID 2791 */ 2792 snprintf(name, sizeof(name), "%d", i); 2793 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, 2794 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), 2795 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 2796 2797 if (rxr->hn_rx_sysctl_tree != NULL) { 2798 SYSCTL_ADD_ULONG(ctx, 2799 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 2800 OID_AUTO, "packets", CTLFLAG_RW, 2801 &rxr->hn_pkts, "# of packets received"); 2802 SYSCTL_ADD_ULONG(ctx, 2803 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 2804 OID_AUTO, "rss_pkts", CTLFLAG_RW, 2805 &rxr->hn_rss_pkts, 2806 "# of packets w/ RSS info received"); 2807 SYSCTL_ADD_INT(ctx, 2808 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), 2809 OID_AUTO, "pktbuf_len", CTLFLAG_RD, 2810 &rxr->hn_pktbuf_len, 0, 2811 "Temporary channel packet buffer length"); 2812 } 2813 } 2814 } 2815 2816 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", 2817 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2818 __offsetof(struct hn_rx_ring, hn_lro.lro_queued), 2819#if __FreeBSD_version < 1100095 2820 hn_rx_stat_int_sysctl, 2821#else 2822 hn_rx_stat_u64_sysctl, 2823#endif 2824 "LU", "LRO queued"); 2825 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", 2826 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2827 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), 2828#if __FreeBSD_version < 1100095 2829 hn_rx_stat_int_sysctl, 2830#else 2831 hn_rx_stat_u64_sysctl, 2832#endif 2833 "LU", "LRO flushed"); 2834 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", 2835 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2836 __offsetof(struct hn_rx_ring, hn_lro_tried), 2837 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); 2838#if __FreeBSD_version >= 1100099 2839 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", 2840 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2841 hn_lro_lenlim_sysctl, "IU", 2842 "Max # of data bytes to be aggregated by LRO"); 2843 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", 2844 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 2845 hn_lro_ackcnt_sysctl, "I", 2846 "Max # of ACKs to be aggregated by LRO"); 2847#endif 2848 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", 2849 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, 2850 hn_trust_hcsum_sysctl, "I", 2851 "Trust tcp segement verification on host side, " 2852 "when csum info is missing"); 2853 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", 2854 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, 2855 hn_trust_hcsum_sysctl, "I", 2856 "Trust udp datagram verification on host side, " 2857 "when csum info is missing"); 2858 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", 2859 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, 2860 hn_trust_hcsum_sysctl, "I", 2861 "Trust ip packet verification on host side, " 2862 "when csum info is missing"); 2863 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", 2864 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2865 __offsetof(struct hn_rx_ring, hn_csum_ip), 2866 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); 2867 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", 2868 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2869 __offsetof(struct hn_rx_ring, hn_csum_tcp), 2870 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); 2871 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", 2872 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2873 __offsetof(struct hn_rx_ring, hn_csum_udp), 2874 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); 2875 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", 2876 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2877 __offsetof(struct hn_rx_ring, hn_csum_trusted), 2878 hn_rx_stat_ulong_sysctl, "LU", 2879 "# of packets that we trust host's csum verification"); 2880 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", 2881 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2882 __offsetof(struct hn_rx_ring, hn_small_pkts), 2883 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); 2884 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed", 2885 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 2886 __offsetof(struct hn_rx_ring, hn_ack_failed), 2887 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures"); 2888 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", 2889 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); 2890 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", 2891 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); 2892 2893 return (0); 2894} 2895 2896static void 2897hn_destroy_rx_data(struct hn_softc *sc) 2898{ 2899 int i; 2900 2901 if (sc->hn_rxbuf != NULL) { 2902 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); 2903 sc->hn_rxbuf = NULL; 2904 } 2905 2906 if (sc->hn_rx_ring_cnt == 0) 2907 return; 2908 2909 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 2910 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; 2911 2912 if (rxr->hn_br == NULL) 2913 continue; 2914 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br); 2915 rxr->hn_br = NULL; 2916 2917#if defined(INET) || defined(INET6) 2918 tcp_lro_free(&rxr->hn_lro); 2919#endif 2920 free(rxr->hn_pktbuf, M_DEVBUF); 2921 } 2922 free(sc->hn_rx_ring, M_DEVBUF); 2923 sc->hn_rx_ring = NULL; 2924 2925 sc->hn_rx_ring_cnt = 0; 2926 sc->hn_rx_ring_inuse = 0; 2927} 2928 2929static int 2930hn_tx_ring_create(struct hn_softc *sc, int id) 2931{ 2932 struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; 2933 device_t dev = sc->hn_dev; 2934 bus_dma_tag_t parent_dtag; 2935 int error, i; 2936 2937 txr->hn_sc = sc; 2938 txr->hn_tx_idx = id; 2939 2940#ifndef HN_USE_TXDESC_BUFRING 2941 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); 2942#endif 2943 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); 2944 2945 txr->hn_txdesc_cnt = HN_TX_DESC_CNT; 2946 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, 2947 M_DEVBUF, M_WAITOK | M_ZERO); 2948#ifndef HN_USE_TXDESC_BUFRING 2949 SLIST_INIT(&txr->hn_txlist); 2950#else 2951 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF, 2952 M_WAITOK, &txr->hn_tx_lock); 2953#endif 2954 2955 txr->hn_tx_taskq = sc->hn_tx_taskq; 2956 2957 if (hn_use_if_start) { 2958 txr->hn_txeof = hn_start_txeof; 2959 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); 2960 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); 2961 } else { 2962 int br_depth; 2963 2964 txr->hn_txeof = hn_xmit_txeof; 2965 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); 2966 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); 2967 2968 br_depth = hn_get_txswq_depth(txr); 2969 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF, 2970 M_WAITOK, &txr->hn_tx_lock); 2971 } 2972 2973 txr->hn_direct_tx_size = hn_direct_tx_size; 2974 2975 /* 2976 * Always schedule transmission instead of trying to do direct 2977 * transmission. This one gives the best performance so far. 2978 */ 2979 txr->hn_sched_tx = 1; 2980 2981 parent_dtag = bus_get_dma_tag(dev); 2982 2983 /* DMA tag for RNDIS packet messages. */ 2984 error = bus_dma_tag_create(parent_dtag, /* parent */ 2985 HN_RNDIS_PKT_ALIGN, /* alignment */ 2986 HN_RNDIS_PKT_BOUNDARY, /* boundary */ 2987 BUS_SPACE_MAXADDR, /* lowaddr */ 2988 BUS_SPACE_MAXADDR, /* highaddr */ 2989 NULL, NULL, /* filter, filterarg */ 2990 HN_RNDIS_PKT_LEN, /* maxsize */ 2991 1, /* nsegments */ 2992 HN_RNDIS_PKT_LEN, /* maxsegsize */ 2993 0, /* flags */ 2994 NULL, /* lockfunc */ 2995 NULL, /* lockfuncarg */ 2996 &txr->hn_tx_rndis_dtag); 2997 if (error) { 2998 device_printf(dev, "failed to create rndis dmatag\n"); 2999 return error; 3000 } 3001 3002 /* DMA tag for data. */ 3003 error = bus_dma_tag_create(parent_dtag, /* parent */ 3004 1, /* alignment */ 3005 HN_TX_DATA_BOUNDARY, /* boundary */ 3006 BUS_SPACE_MAXADDR, /* lowaddr */ 3007 BUS_SPACE_MAXADDR, /* highaddr */ 3008 NULL, NULL, /* filter, filterarg */ 3009 HN_TX_DATA_MAXSIZE, /* maxsize */ 3010 HN_TX_DATA_SEGCNT_MAX, /* nsegments */ 3011 HN_TX_DATA_SEGSIZE, /* maxsegsize */ 3012 0, /* flags */ 3013 NULL, /* lockfunc */ 3014 NULL, /* lockfuncarg */ 3015 &txr->hn_tx_data_dtag); 3016 if (error) { 3017 device_printf(dev, "failed to create data dmatag\n"); 3018 return error; 3019 } 3020 3021 for (i = 0; i < txr->hn_txdesc_cnt; ++i) { 3022 struct hn_txdesc *txd = &txr->hn_txdesc[i]; 3023 3024 txd->txr = txr; 3025 txd->chim_index = HN_NVS_CHIM_IDX_INVALID; 3026 3027 /* 3028 * Allocate and load RNDIS packet message. 3029 */ 3030 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, 3031 (void **)&txd->rndis_pkt, 3032 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, 3033 &txd->rndis_pkt_dmap); 3034 if (error) { 3035 device_printf(dev, 3036 "failed to allocate rndis_packet_msg, %d\n", i); 3037 return error; 3038 } 3039 3040 error = bus_dmamap_load(txr->hn_tx_rndis_dtag, 3041 txd->rndis_pkt_dmap, 3042 txd->rndis_pkt, HN_RNDIS_PKT_LEN, 3043 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, 3044 BUS_DMA_NOWAIT); 3045 if (error) { 3046 device_printf(dev, 3047 "failed to load rndis_packet_msg, %d\n", i); 3048 bus_dmamem_free(txr->hn_tx_rndis_dtag, 3049 txd->rndis_pkt, txd->rndis_pkt_dmap); 3050 return error; 3051 } 3052 3053 /* DMA map for TX data. */ 3054 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, 3055 &txd->data_dmap); 3056 if (error) { 3057 device_printf(dev, 3058 "failed to allocate tx data dmamap\n"); 3059 bus_dmamap_unload(txr->hn_tx_rndis_dtag, 3060 txd->rndis_pkt_dmap); 3061 bus_dmamem_free(txr->hn_tx_rndis_dtag, 3062 txd->rndis_pkt, txd->rndis_pkt_dmap); 3063 return error; 3064 } 3065 3066 /* All set, put it to list */ 3067 txd->flags |= HN_TXD_FLAG_ONLIST; 3068#ifndef HN_USE_TXDESC_BUFRING 3069 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); 3070#else 3071 buf_ring_enqueue(txr->hn_txdesc_br, txd); 3072#endif 3073 } 3074 txr->hn_txdesc_avail = txr->hn_txdesc_cnt; 3075 3076 if (sc->hn_tx_sysctl_tree != NULL) { 3077 struct sysctl_oid_list *child; 3078 struct sysctl_ctx_list *ctx; 3079 char name[16]; 3080 3081 /* 3082 * Create per TX ring sysctl tree: 3083 * dev.hn.UNIT.tx.RINGID 3084 */ 3085 ctx = device_get_sysctl_ctx(dev); 3086 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); 3087 3088 snprintf(name, sizeof(name), "%d", id); 3089 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 3090 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3091 3092 if (txr->hn_tx_sysctl_tree != NULL) { 3093 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); 3094 3095 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", 3096 CTLFLAG_RD, &txr->hn_txdesc_avail, 0, 3097 "# of available TX descs"); 3098 if (!hn_use_if_start) { 3099 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", 3100 CTLFLAG_RD, &txr->hn_oactive, 0, 3101 "over active"); 3102 } 3103 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", 3104 CTLFLAG_RW, &txr->hn_pkts, 3105 "# of packets transmitted"); 3106 } 3107 } 3108 3109 return 0; 3110} 3111 3112static void 3113hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) 3114{ 3115 struct hn_tx_ring *txr = txd->txr; 3116 3117 KASSERT(txd->m == NULL, ("still has mbuf installed")); 3118 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); 3119 3120 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); 3121 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, 3122 txd->rndis_pkt_dmap); 3123 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); 3124} 3125 3126static void 3127hn_tx_ring_destroy(struct hn_tx_ring *txr) 3128{ 3129 struct hn_txdesc *txd; 3130 3131 if (txr->hn_txdesc == NULL) 3132 return; 3133 3134#ifndef HN_USE_TXDESC_BUFRING 3135 while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { 3136 SLIST_REMOVE_HEAD(&txr->hn_txlist, link); 3137 hn_txdesc_dmamap_destroy(txd); 3138 } 3139#else 3140 mtx_lock(&txr->hn_tx_lock); 3141 while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) 3142 hn_txdesc_dmamap_destroy(txd); 3143 mtx_unlock(&txr->hn_tx_lock); 3144#endif 3145 3146 if (txr->hn_tx_data_dtag != NULL) 3147 bus_dma_tag_destroy(txr->hn_tx_data_dtag); 3148 if (txr->hn_tx_rndis_dtag != NULL) 3149 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); 3150 3151#ifdef HN_USE_TXDESC_BUFRING 3152 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF); 3153#endif 3154 3155 free(txr->hn_txdesc, M_DEVBUF); 3156 txr->hn_txdesc = NULL; 3157 3158 if (txr->hn_mbuf_br != NULL) 3159 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF); 3160 3161#ifndef HN_USE_TXDESC_BUFRING 3162 mtx_destroy(&txr->hn_txlist_spin); 3163#endif 3164 mtx_destroy(&txr->hn_tx_lock); 3165} 3166 3167static int 3168hn_create_tx_data(struct hn_softc *sc, int ring_cnt) 3169{ 3170 struct sysctl_oid_list *child; 3171 struct sysctl_ctx_list *ctx; 3172 int i; 3173 3174 /* 3175 * Create TXBUF for chimney sending. 3176 * 3177 * NOTE: It is shared by all channels. 3178 */ 3179 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), 3180 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma, 3181 BUS_DMA_WAITOK | BUS_DMA_ZERO); 3182 if (sc->hn_chim == NULL) { 3183 device_printf(sc->hn_dev, "allocate txbuf failed\n"); 3184 return (ENOMEM); 3185 } 3186 3187 sc->hn_tx_ring_cnt = ring_cnt; 3188 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 3189 3190 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, 3191 M_DEVBUF, M_WAITOK | M_ZERO); 3192 3193 ctx = device_get_sysctl_ctx(sc->hn_dev); 3194 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); 3195 3196 /* Create dev.hn.UNIT.tx sysctl tree */ 3197 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", 3198 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 3199 3200 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3201 int error; 3202 3203 error = hn_tx_ring_create(sc, i); 3204 if (error) 3205 return error; 3206 } 3207 3208 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", 3209 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3210 __offsetof(struct hn_tx_ring, hn_no_txdescs), 3211 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); 3212 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", 3213 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3214 __offsetof(struct hn_tx_ring, hn_send_failed), 3215 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); 3216 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", 3217 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3218 __offsetof(struct hn_tx_ring, hn_txdma_failed), 3219 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); 3220 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", 3221 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3222 __offsetof(struct hn_tx_ring, hn_tx_collapsed), 3223 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); 3224 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", 3225 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3226 __offsetof(struct hn_tx_ring, hn_tx_chimney), 3227 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); 3228 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", 3229 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3230 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), 3231 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); 3232 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", 3233 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, 3234 "# of total TX descs"); 3235 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", 3236 CTLFLAG_RD, &sc->hn_chim_szmax, 0, 3237 "Chimney send packet size upper boundary"); 3238 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", 3239 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, 3240 hn_chim_size_sysctl, "I", "Chimney send packet size limit"); 3241 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", 3242 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3243 __offsetof(struct hn_tx_ring, hn_direct_tx_size), 3244 hn_tx_conf_int_sysctl, "I", 3245 "Size of the packet for direct transmission"); 3246 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", 3247 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 3248 __offsetof(struct hn_tx_ring, hn_sched_tx), 3249 hn_tx_conf_int_sysctl, "I", 3250 "Always schedule transmission " 3251 "instead of doing direct transmission"); 3252 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", 3253 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); 3254 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", 3255 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); 3256 3257 return 0; 3258} 3259 3260static void 3261hn_set_chim_size(struct hn_softc *sc, int chim_size) 3262{ 3263 int i; 3264 3265 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3266 sc->hn_tx_ring[i].hn_chim_size = chim_size; 3267} 3268 3269static void 3270hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) 3271{ 3272 struct ifnet *ifp = sc->hn_ifp; 3273 int tso_minlen; 3274 3275 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) 3276 return; 3277 3278 KASSERT(sc->hn_ndis_tso_sgmin >= 2, 3279 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin)); 3280 tso_minlen = sc->hn_ndis_tso_sgmin * mtu; 3281 3282 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen && 3283 sc->hn_ndis_tso_szmax <= IP_MAXPACKET, 3284 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax)); 3285 3286 if (tso_maxlen < tso_minlen) 3287 tso_maxlen = tso_minlen; 3288 else if (tso_maxlen > IP_MAXPACKET) 3289 tso_maxlen = IP_MAXPACKET; 3290 if (tso_maxlen > sc->hn_ndis_tso_szmax) 3291 tso_maxlen = sc->hn_ndis_tso_szmax; 3292 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 3293 if (bootverbose) 3294 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); 3295} 3296 3297static void 3298hn_fixup_tx_data(struct hn_softc *sc) 3299{ 3300 uint64_t csum_assist; 3301 int i; 3302 3303 hn_set_chim_size(sc, sc->hn_chim_szmax); 3304 if (hn_tx_chimney_size > 0 && 3305 hn_tx_chimney_size < sc->hn_chim_szmax) 3306 hn_set_chim_size(sc, hn_tx_chimney_size); 3307 3308 csum_assist = 0; 3309 if (sc->hn_caps & HN_CAP_IPCS) 3310 csum_assist |= CSUM_IP; 3311 if (sc->hn_caps & HN_CAP_TCP4CS) 3312 csum_assist |= CSUM_IP_TCP; 3313 if (sc->hn_caps & HN_CAP_UDP4CS) 3314 csum_assist |= CSUM_IP_UDP; 3315#ifdef notyet 3316 if (sc->hn_caps & HN_CAP_TCP6CS) 3317 csum_assist |= CSUM_IP6_TCP; 3318 if (sc->hn_caps & HN_CAP_UDP6CS) 3319 csum_assist |= CSUM_IP6_UDP; 3320#endif 3321 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3322 sc->hn_tx_ring[i].hn_csum_assist = csum_assist; 3323 3324 if (sc->hn_caps & HN_CAP_HASHVAL) { 3325 /* 3326 * Support HASHVAL pktinfo on TX path. 3327 */ 3328 if (bootverbose) 3329 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n"); 3330 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3331 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL; 3332 } 3333} 3334 3335static void 3336hn_destroy_tx_data(struct hn_softc *sc) 3337{ 3338 int i; 3339 3340 if (sc->hn_chim != NULL) { 3341 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); 3342 sc->hn_chim = NULL; 3343 } 3344 3345 if (sc->hn_tx_ring_cnt == 0) 3346 return; 3347 3348 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) 3349 hn_tx_ring_destroy(&sc->hn_tx_ring[i]); 3350 3351 free(sc->hn_tx_ring, M_DEVBUF); 3352 sc->hn_tx_ring = NULL; 3353 3354 sc->hn_tx_ring_cnt = 0; 3355 sc->hn_tx_ring_inuse = 0; 3356} 3357 3358static void 3359hn_start_taskfunc(void *xtxr, int pending __unused) 3360{ 3361 struct hn_tx_ring *txr = xtxr; 3362 3363 mtx_lock(&txr->hn_tx_lock); 3364 hn_start_locked(txr, 0); 3365 mtx_unlock(&txr->hn_tx_lock); 3366} 3367 3368static void 3369hn_start_txeof_taskfunc(void *xtxr, int pending __unused) 3370{ 3371 struct hn_tx_ring *txr = xtxr; 3372 3373 mtx_lock(&txr->hn_tx_lock); 3374 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); 3375 hn_start_locked(txr, 0); 3376 mtx_unlock(&txr->hn_tx_lock); 3377} 3378 3379static int 3380hn_xmit(struct hn_tx_ring *txr, int len) 3381{ 3382 struct hn_softc *sc = txr->hn_sc; 3383 struct ifnet *ifp = sc->hn_ifp; 3384 struct mbuf *m_head; 3385 3386 mtx_assert(&txr->hn_tx_lock, MA_OWNED); 3387 KASSERT(hn_use_if_start == 0, 3388 ("hn_xmit is called, when if_start is enabled")); 3389 3390 if (__predict_false(txr->hn_suspended)) 3391 return 0; 3392 3393 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) 3394 return 0; 3395 3396 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { 3397 struct hn_txdesc *txd; 3398 int error; 3399 3400 if (len > 0 && m_head->m_pkthdr.len > len) { 3401 /* 3402 * This sending could be time consuming; let callers 3403 * dispatch this packet sending (and sending of any 3404 * following up packets) to tx taskqueue. 3405 */ 3406 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3407 return 1; 3408 } 3409 3410 txd = hn_txdesc_get(txr); 3411 if (txd == NULL) { 3412 txr->hn_no_txdescs++; 3413 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3414 txr->hn_oactive = 1; 3415 break; 3416 } 3417 3418 error = hn_encap(txr, txd, &m_head); 3419 if (error) { 3420 /* Both txd and m_head are freed; discard */ 3421 drbr_advance(ifp, txr->hn_mbuf_br); 3422 continue; 3423 } 3424 3425 error = hn_txpkt(ifp, txr, txd); 3426 if (__predict_false(error)) { 3427 /* txd is freed, but m_head is not */ 3428 drbr_putback(ifp, txr->hn_mbuf_br, m_head); 3429 txr->hn_oactive = 1; 3430 break; 3431 } 3432 3433 /* Sent */ 3434 drbr_advance(ifp, txr->hn_mbuf_br); 3435 } 3436 return 0; 3437} 3438 3439static int 3440hn_transmit(struct ifnet *ifp, struct mbuf *m) 3441{ 3442 struct hn_softc *sc = ifp->if_softc; 3443 struct hn_tx_ring *txr; 3444 int error, idx = 0; 3445 3446 /* 3447 * Select the TX ring based on flowid 3448 */ 3449 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 3450 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; 3451 txr = &sc->hn_tx_ring[idx]; 3452 3453 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); 3454 if (error) { 3455 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); 3456 return error; 3457 } 3458 3459 if (txr->hn_oactive) 3460 return 0; 3461 3462 if (txr->hn_sched_tx) 3463 goto do_sched; 3464 3465 if (mtx_trylock(&txr->hn_tx_lock)) { 3466 int sched; 3467 3468 sched = hn_xmit(txr, txr->hn_direct_tx_size); 3469 mtx_unlock(&txr->hn_tx_lock); 3470 if (!sched) 3471 return 0; 3472 } 3473do_sched: 3474 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); 3475 return 0; 3476} 3477 3478static void 3479hn_tx_ring_qflush(struct hn_tx_ring *txr) 3480{ 3481 struct mbuf *m; 3482 3483 mtx_lock(&txr->hn_tx_lock); 3484 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) 3485 m_freem(m); 3486 mtx_unlock(&txr->hn_tx_lock); 3487} 3488 3489static void 3490hn_xmit_qflush(struct ifnet *ifp) 3491{ 3492 struct hn_softc *sc = ifp->if_softc; 3493 int i; 3494 3495 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) 3496 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 3497 if_qflush(ifp); 3498} 3499 3500static void 3501hn_xmit_txeof(struct hn_tx_ring *txr) 3502{ 3503 3504 if (txr->hn_sched_tx) 3505 goto do_sched; 3506 3507 if (mtx_trylock(&txr->hn_tx_lock)) { 3508 int sched; 3509 3510 txr->hn_oactive = 0; 3511 sched = hn_xmit(txr, txr->hn_direct_tx_size); 3512 mtx_unlock(&txr->hn_tx_lock); 3513 if (sched) { 3514 taskqueue_enqueue(txr->hn_tx_taskq, 3515 &txr->hn_tx_task); 3516 } 3517 } else { 3518do_sched: 3519 /* 3520 * Release the oactive earlier, with the hope, that 3521 * others could catch up. The task will clear the 3522 * oactive again with the hn_tx_lock to avoid possible 3523 * races. 3524 */ 3525 txr->hn_oactive = 0; 3526 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 3527 } 3528} 3529 3530static void 3531hn_xmit_taskfunc(void *xtxr, int pending __unused) 3532{ 3533 struct hn_tx_ring *txr = xtxr; 3534 3535 mtx_lock(&txr->hn_tx_lock); 3536 hn_xmit(txr, 0); 3537 mtx_unlock(&txr->hn_tx_lock); 3538} 3539 3540static void 3541hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) 3542{ 3543 struct hn_tx_ring *txr = xtxr; 3544 3545 mtx_lock(&txr->hn_tx_lock); 3546 txr->hn_oactive = 0; 3547 hn_xmit(txr, 0); 3548 mtx_unlock(&txr->hn_tx_lock); 3549} 3550 3551static int 3552hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan) 3553{ 3554 struct vmbus_chan_br cbr; 3555 struct hn_rx_ring *rxr; 3556 struct hn_tx_ring *txr = NULL; 3557 int idx, error; 3558 3559 idx = vmbus_chan_subidx(chan); 3560 3561 /* 3562 * Link this channel to RX/TX ring. 3563 */ 3564 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 3565 ("invalid channel index %d, should > 0 && < %d", 3566 idx, sc->hn_rx_ring_inuse)); 3567 rxr = &sc->hn_rx_ring[idx]; 3568 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, 3569 ("RX ring %d already attached", idx)); 3570 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; 3571 3572 if (bootverbose) { 3573 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n", 3574 idx, vmbus_chan_id(chan)); 3575 } 3576 3577 if (idx < sc->hn_tx_ring_inuse) { 3578 txr = &sc->hn_tx_ring[idx]; 3579 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, 3580 ("TX ring %d already attached", idx)); 3581 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; 3582 3583 txr->hn_chan = chan; 3584 if (bootverbose) { 3585 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n", 3586 idx, vmbus_chan_id(chan)); 3587 } 3588 } 3589 3590 /* Bind this channel to a proper CPU. */ 3591 vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); 3592 3593 /* 3594 * Open this channel 3595 */ 3596 cbr.cbr = rxr->hn_br; 3597 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr; 3598 cbr.cbr_txsz = HN_TXBR_SIZE; 3599 cbr.cbr_rxsz = HN_RXBR_SIZE; 3600 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr); 3601 if (error) { 3602 if_printf(sc->hn_ifp, "open chan%u failed: %d\n", 3603 vmbus_chan_id(chan), error); 3604 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 3605 if (txr != NULL) 3606 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 3607 } 3608 return (error); 3609} 3610 3611static void 3612hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan) 3613{ 3614 struct hn_rx_ring *rxr; 3615 int idx; 3616 3617 idx = vmbus_chan_subidx(chan); 3618 3619 /* 3620 * Link this channel to RX/TX ring. 3621 */ 3622 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, 3623 ("invalid channel index %d, should > 0 && < %d", 3624 idx, sc->hn_rx_ring_inuse)); 3625 rxr = &sc->hn_rx_ring[idx]; 3626 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED), 3627 ("RX ring %d is not attached", idx)); 3628 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; 3629 3630 if (idx < sc->hn_tx_ring_inuse) { 3631 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; 3632 3633 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED), 3634 ("TX ring %d is not attached attached", idx)); 3635 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; 3636 } 3637 3638 /* 3639 * Close this channel. 3640 * 3641 * NOTE: 3642 * Channel closing does _not_ destroy the target channel. 3643 */ 3644 vmbus_chan_close(chan); 3645} 3646 3647static int 3648hn_attach_subchans(struct hn_softc *sc) 3649{ 3650 struct vmbus_channel **subchans; 3651 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 3652 int i, error = 0; 3653 3654 if (subchan_cnt == 0) 3655 return (0); 3656 3657 /* Attach the sub-channels. */ 3658 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 3659 for (i = 0; i < subchan_cnt; ++i) { 3660 error = hn_chan_attach(sc, subchans[i]); 3661 if (error) 3662 break; 3663 } 3664 vmbus_subchan_rel(subchans, subchan_cnt); 3665 3666 if (error) { 3667 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error); 3668 } else { 3669 if (bootverbose) { 3670 if_printf(sc->hn_ifp, "%d sub-channels attached\n", 3671 subchan_cnt); 3672 } 3673 } 3674 return (error); 3675} 3676 3677static void 3678hn_detach_allchans(struct hn_softc *sc) 3679{ 3680 struct vmbus_channel **subchans; 3681 int subchan_cnt = sc->hn_rx_ring_inuse - 1; 3682 int i; 3683 3684 if (subchan_cnt == 0) 3685 goto back; 3686 3687 /* Detach the sub-channels. */ 3688 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); 3689 for (i = 0; i < subchan_cnt; ++i) 3690 hn_chan_detach(sc, subchans[i]); 3691 vmbus_subchan_rel(subchans, subchan_cnt); 3692 3693back: 3694 /* 3695 * Detach the primary channel, _after_ all sub-channels 3696 * are detached. 3697 */ 3698 hn_chan_detach(sc, sc->hn_prichan); 3699 3700 /* Wait for sub-channels to be destroyed, if any. */ 3701 vmbus_subchan_drain(sc->hn_prichan); 3702 3703#ifdef INVARIANTS 3704 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { 3705 KASSERT((sc->hn_rx_ring[i].hn_rx_flags & 3706 HN_RX_FLAG_ATTACHED) == 0, 3707 ("%dth RX ring is still attached", i)); 3708 } 3709 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { 3710 KASSERT((sc->hn_tx_ring[i].hn_tx_flags & 3711 HN_TX_FLAG_ATTACHED) == 0, 3712 ("%dth TX ring is still attached", i)); 3713 } 3714#endif 3715} 3716 3717static int 3718hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch) 3719{ 3720 struct vmbus_channel **subchans; 3721 int nchan, rxr_cnt, error; 3722 3723 nchan = *nsubch + 1; 3724 if (nchan == 1) { 3725 /* 3726 * Multiple RX/TX rings are not requested. 3727 */ 3728 *nsubch = 0; 3729 return (0); 3730 } 3731 3732 /* 3733 * Query RSS capabilities, e.g. # of RX rings, and # of indirect 3734 * table entries. 3735 */ 3736 error = hn_rndis_query_rsscaps(sc, &rxr_cnt); 3737 if (error) { 3738 /* No RSS; this is benign. */ 3739 *nsubch = 0; 3740 return (0); 3741 } 3742 if (bootverbose) { 3743 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", 3744 rxr_cnt, nchan); 3745 } 3746 3747 if (nchan > rxr_cnt) 3748 nchan = rxr_cnt; 3749 if (nchan == 1) { 3750 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n"); 3751 *nsubch = 0; 3752 return (0); 3753 } 3754 3755 /* 3756 * Allocate sub-channels from NVS. 3757 */ 3758 *nsubch = nchan - 1; 3759 error = hn_nvs_alloc_subchans(sc, nsubch); 3760 if (error || *nsubch == 0) { 3761 /* Failed to allocate sub-channels. */ 3762 *nsubch = 0; 3763 return (0); 3764 } 3765 3766 /* 3767 * Wait for all sub-channels to become ready before moving on. 3768 */ 3769 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch); 3770 vmbus_subchan_rel(subchans, *nsubch); 3771 return (0); 3772} 3773 3774static int 3775hn_synth_attach(struct hn_softc *sc, int mtu) 3776{ 3777 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; 3778 int error, nsubch, nchan, i; 3779 uint32_t old_caps; 3780 3781 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0, 3782 ("synthetic parts were attached")); 3783 3784 /* Save capabilities for later verification. */ 3785 old_caps = sc->hn_caps; 3786 sc->hn_caps = 0; 3787 3788 /* Clear RSS stuffs. */ 3789 sc->hn_rss_ind_size = 0; 3790 sc->hn_rss_hash = 0; 3791 3792 /* 3793 * Attach the primary channel _before_ attaching NVS and RNDIS. 3794 */ 3795 error = hn_chan_attach(sc, sc->hn_prichan); 3796 if (error) 3797 return (error); 3798 3799 /* 3800 * Attach NVS. 3801 */ 3802 error = hn_nvs_attach(sc, mtu); 3803 if (error) 3804 return (error); 3805 3806 /* 3807 * Attach RNDIS _after_ NVS is attached. 3808 */ 3809 error = hn_rndis_attach(sc, mtu); 3810 if (error) 3811 return (error); 3812 3813 /* 3814 * Make sure capabilities are not changed. 3815 */ 3816 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) { 3817 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n", 3818 old_caps, sc->hn_caps); 3819 /* Restore old capabilities and abort. */ 3820 sc->hn_caps = old_caps; 3821 return ENXIO; 3822 } 3823 3824 /* 3825 * Allocate sub-channels for multi-TX/RX rings. 3826 * 3827 * NOTE: 3828 * The # of RX rings that can be used is equivalent to the # of 3829 * channels to be requested. 3830 */ 3831 nsubch = sc->hn_rx_ring_cnt - 1; 3832 error = hn_synth_alloc_subchans(sc, &nsubch); 3833 if (error) 3834 return (error); 3835 3836 nchan = nsubch + 1; 3837 if (nchan == 1) { 3838 /* Only the primary channel can be used; done */ 3839 goto back; 3840 } 3841 3842 /* 3843 * Configure RSS key and indirect table _after_ all sub-channels 3844 * are allocated. 3845 */ 3846 3847 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) { 3848 /* 3849 * RSS key is not set yet; set it to the default RSS key. 3850 */ 3851 if (bootverbose) 3852 if_printf(sc->hn_ifp, "setup default RSS key\n"); 3853 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key)); 3854 sc->hn_flags |= HN_FLAG_HAS_RSSKEY; 3855 } 3856 3857 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) { 3858 /* 3859 * RSS indirect table is not set yet; set it up in round- 3860 * robin fashion. 3861 */ 3862 if (bootverbose) { 3863 if_printf(sc->hn_ifp, "setup default RSS indirect " 3864 "table\n"); 3865 } 3866 for (i = 0; i < NDIS_HASH_INDCNT; ++i) 3867 rss->rss_ind[i] = i % nchan; 3868 sc->hn_flags |= HN_FLAG_HAS_RSSIND; 3869 } else { 3870 /* 3871 * # of usable channels may be changed, so we have to 3872 * make sure that all entries in RSS indirect table 3873 * are valid. 3874 */ 3875 hn_rss_ind_fixup(sc, nchan); 3876 } 3877 3878 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); 3879 if (error) { 3880 /* 3881 * Failed to configure RSS key or indirect table; only 3882 * the primary channel can be used. 3883 */ 3884 nchan = 1; 3885 } 3886back: 3887 /* 3888 * Set the # of TX/RX rings that could be used according to 3889 * the # of channels that NVS offered. 3890 */ 3891 hn_set_ring_inuse(sc, nchan); 3892 3893 /* 3894 * Attach the sub-channels, if any. 3895 */ 3896 error = hn_attach_subchans(sc); 3897 if (error) 3898 return (error); 3899 3900 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED; 3901 return (0); 3902} 3903 3904/* 3905 * NOTE: 3906 * The interface must have been suspended though hn_suspend(), before 3907 * this function get called. 3908 */ 3909static void 3910hn_synth_detach(struct hn_softc *sc) 3911{ 3912 HN_LOCK_ASSERT(sc); 3913 3914 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, 3915 ("synthetic parts were not attached")); 3916 3917 /* Detach the RNDIS first. */ 3918 hn_rndis_detach(sc); 3919 3920 /* Detach NVS. */ 3921 hn_nvs_detach(sc); 3922 3923 /* Detach all of the channels. */ 3924 hn_detach_allchans(sc); 3925 3926 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED; 3927} 3928 3929static void 3930hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt) 3931{ 3932 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt, 3933 ("invalid ring count %d", ring_cnt)); 3934 3935 if (sc->hn_tx_ring_cnt > ring_cnt) 3936 sc->hn_tx_ring_inuse = ring_cnt; 3937 else 3938 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; 3939 sc->hn_rx_ring_inuse = ring_cnt; 3940 3941 if (bootverbose) { 3942 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n", 3943 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); 3944 } 3945} 3946 3947static void 3948hn_chan_drain(struct vmbus_channel *chan) 3949{ 3950 3951 while (!vmbus_chan_rx_empty(chan) || !vmbus_chan_tx_empty(chan)) 3952 pause("waitch", 1); 3953 vmbus_chan_intr_drain(chan); 3954} 3955 3956static void 3957hn_suspend_data(struct hn_softc *sc) 3958{ 3959 struct vmbus_channel **subch = NULL; 3960 int i, nsubch; 3961 3962 HN_LOCK_ASSERT(sc); 3963 3964 /* 3965 * Suspend TX. 3966 */ 3967 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 3968 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 3969 3970 mtx_lock(&txr->hn_tx_lock); 3971 txr->hn_suspended = 1; 3972 mtx_unlock(&txr->hn_tx_lock); 3973 /* No one is able send more packets now. */ 3974 3975 /* Wait for all pending sends to finish. */ 3976 while (hn_tx_ring_pending(txr)) 3977 pause("hnwtx", 1 /* 1 tick */); 3978 3979 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); 3980 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); 3981 } 3982 3983 /* 3984 * Disable RX by clearing RX filter. 3985 */ 3986 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE; 3987 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); 3988 3989 /* 3990 * Give RNDIS enough time to flush all pending data packets. 3991 */ 3992 pause("waitrx", (200 * hz) / 1000); 3993 3994 /* 3995 * Drain RX/TX bufrings and interrupts. 3996 */ 3997 nsubch = sc->hn_rx_ring_inuse - 1; 3998 if (nsubch > 0) 3999 subch = vmbus_subchan_get(sc->hn_prichan, nsubch); 4000 4001 if (subch != NULL) { 4002 for (i = 0; i < nsubch; ++i) 4003 hn_chan_drain(subch[i]); 4004 } 4005 hn_chan_drain(sc->hn_prichan); 4006 4007 if (subch != NULL) 4008 vmbus_subchan_rel(subch, nsubch); 4009} 4010 4011static void 4012hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused) 4013{ 4014 4015 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL; 4016} 4017 4018static void 4019hn_suspend_mgmt(struct hn_softc *sc) 4020{ 4021 struct task task; 4022 4023 HN_LOCK_ASSERT(sc); 4024 4025 /* 4026 * Make sure that hn_mgmt_taskq0 can nolonger be accessed 4027 * through hn_mgmt_taskq. 4028 */ 4029 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc); 4030 vmbus_chan_run_task(sc->hn_prichan, &task); 4031 4032 /* 4033 * Make sure that all pending management tasks are completed. 4034 */ 4035 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init); 4036 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status); 4037 taskqueue_drain_all(sc->hn_mgmt_taskq0); 4038} 4039 4040static void 4041hn_suspend(struct hn_softc *sc) 4042{ 4043 4044 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 4045 hn_suspend_data(sc); 4046 hn_suspend_mgmt(sc); 4047} 4048 4049static void 4050hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt) 4051{ 4052 int i; 4053 4054 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt, 4055 ("invalid TX ring count %d", tx_ring_cnt)); 4056 4057 for (i = 0; i < tx_ring_cnt; ++i) { 4058 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4059 4060 mtx_lock(&txr->hn_tx_lock); 4061 txr->hn_suspended = 0; 4062 mtx_unlock(&txr->hn_tx_lock); 4063 } 4064} 4065 4066static void 4067hn_resume_data(struct hn_softc *sc) 4068{ 4069 int i; 4070 4071 HN_LOCK_ASSERT(sc); 4072 4073 /* 4074 * Re-enable RX. 4075 */ 4076 hn_set_rxfilter(sc); 4077 4078 /* 4079 * Make sure to clear suspend status on "all" TX rings, 4080 * since hn_tx_ring_inuse can be changed after 4081 * hn_suspend_data(). 4082 */ 4083 hn_resume_tx(sc, sc->hn_tx_ring_cnt); 4084 4085 if (!hn_use_if_start) { 4086 /* 4087 * Flush unused drbrs, since hn_tx_ring_inuse may be 4088 * reduced. 4089 */ 4090 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i) 4091 hn_tx_ring_qflush(&sc->hn_tx_ring[i]); 4092 } 4093 4094 /* 4095 * Kick start TX. 4096 */ 4097 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { 4098 struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; 4099 4100 /* 4101 * Use txeof task, so that any pending oactive can be 4102 * cleared properly. 4103 */ 4104 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); 4105 } 4106} 4107 4108static void 4109hn_resume_mgmt(struct hn_softc *sc) 4110{ 4111 4112 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; 4113 4114 /* 4115 * Kick off network change detection, if it was pending. 4116 * If no network change was pending, start link status 4117 * checks, which is more lightweight than network change 4118 * detection. 4119 */ 4120 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG) 4121 hn_change_network(sc); 4122 else 4123 hn_update_link_status(sc); 4124} 4125 4126static void 4127hn_resume(struct hn_softc *sc) 4128{ 4129 4130 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) 4131 hn_resume_data(sc); 4132 hn_resume_mgmt(sc); 4133} 4134 4135static void 4136hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen) 4137{ 4138 const struct rndis_status_msg *msg; 4139 int ofs; 4140 4141 if (dlen < sizeof(*msg)) { 4142 if_printf(sc->hn_ifp, "invalid RNDIS status\n"); 4143 return; 4144 } 4145 msg = data; 4146 4147 switch (msg->rm_status) { 4148 case RNDIS_STATUS_MEDIA_CONNECT: 4149 case RNDIS_STATUS_MEDIA_DISCONNECT: 4150 hn_update_link_status(sc); 4151 break; 4152 4153 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: 4154 /* Not really useful; ignore. */ 4155 break; 4156 4157 case RNDIS_STATUS_NETWORK_CHANGE: 4158 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset); 4159 if (dlen < ofs + msg->rm_stbuflen || 4160 msg->rm_stbuflen < sizeof(uint32_t)) { 4161 if_printf(sc->hn_ifp, "network changed\n"); 4162 } else { 4163 uint32_t change; 4164 4165 memcpy(&change, ((const uint8_t *)msg) + ofs, 4166 sizeof(change)); 4167 if_printf(sc->hn_ifp, "network changed, change %u\n", 4168 change); 4169 } 4170 hn_change_network(sc); 4171 break; 4172 4173 default: 4174 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", 4175 msg->rm_status); 4176 break; 4177 } 4178} 4179 4180static int 4181hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) 4182{ 4183 const struct rndis_pktinfo *pi = info_data; 4184 uint32_t mask = 0; 4185 4186 while (info_dlen != 0) { 4187 const void *data; 4188 uint32_t dlen; 4189 4190 if (__predict_false(info_dlen < sizeof(*pi))) 4191 return (EINVAL); 4192 if (__predict_false(info_dlen < pi->rm_size)) 4193 return (EINVAL); 4194 info_dlen -= pi->rm_size; 4195 4196 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) 4197 return (EINVAL); 4198 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) 4199 return (EINVAL); 4200 dlen = pi->rm_size - pi->rm_pktinfooffset; 4201 data = pi->rm_data; 4202 4203 switch (pi->rm_type) { 4204 case NDIS_PKTINFO_TYPE_VLAN: 4205 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) 4206 return (EINVAL); 4207 info->vlan_info = *((const uint32_t *)data); 4208 mask |= HN_RXINFO_VLAN; 4209 break; 4210 4211 case NDIS_PKTINFO_TYPE_CSUM: 4212 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) 4213 return (EINVAL); 4214 info->csum_info = *((const uint32_t *)data); 4215 mask |= HN_RXINFO_CSUM; 4216 break; 4217 4218 case HN_NDIS_PKTINFO_TYPE_HASHVAL: 4219 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) 4220 return (EINVAL); 4221 info->hash_value = *((const uint32_t *)data); 4222 mask |= HN_RXINFO_HASHVAL; 4223 break; 4224 4225 case HN_NDIS_PKTINFO_TYPE_HASHINF: 4226 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) 4227 return (EINVAL); 4228 info->hash_info = *((const uint32_t *)data); 4229 mask |= HN_RXINFO_HASHINF; 4230 break; 4231 4232 default: 4233 goto next; 4234 } 4235 4236 if (mask == HN_RXINFO_ALL) { 4237 /* All found; done */ 4238 break; 4239 } 4240next: 4241 pi = (const struct rndis_pktinfo *) 4242 ((const uint8_t *)pi + pi->rm_size); 4243 } 4244 4245 /* 4246 * Final fixup. 4247 * - If there is no hash value, invalidate the hash info. 4248 */ 4249 if ((mask & HN_RXINFO_HASHVAL) == 0) 4250 info->hash_info = HN_NDIS_HASH_INFO_INVALID; 4251 return (0); 4252} 4253 4254static __inline bool 4255hn_rndis_check_overlap(int off, int len, int check_off, int check_len) 4256{ 4257 4258 if (off < check_off) { 4259 if (__predict_true(off + len <= check_off)) 4260 return (false); 4261 } else if (off > check_off) { 4262 if (__predict_true(check_off + check_len <= off)) 4263 return (false); 4264 } 4265 return (true); 4266} 4267 4268static void 4269hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) 4270{ 4271 const struct rndis_packet_msg *pkt; 4272 struct hn_rxinfo info; 4273 int data_off, pktinfo_off, data_len, pktinfo_len; 4274 4275 /* 4276 * Check length. 4277 */ 4278 if (__predict_false(dlen < sizeof(*pkt))) { 4279 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); 4280 return; 4281 } 4282 pkt = data; 4283 4284 if (__predict_false(dlen < pkt->rm_len)) { 4285 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " 4286 "dlen %d, msglen %u\n", dlen, pkt->rm_len); 4287 return; 4288 } 4289 if (__predict_false(pkt->rm_len < 4290 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { 4291 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " 4292 "msglen %u, data %u, oob %u, pktinfo %u\n", 4293 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, 4294 pkt->rm_pktinfolen); 4295 return; 4296 } 4297 if (__predict_false(pkt->rm_datalen == 0)) { 4298 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); 4299 return; 4300 } 4301 4302 /* 4303 * Check offests. 4304 */ 4305#define IS_OFFSET_INVALID(ofs) \ 4306 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ 4307 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) 4308 4309 /* XXX Hyper-V does not meet data offset alignment requirement */ 4310 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { 4311 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4312 "data offset %u\n", pkt->rm_dataoffset); 4313 return; 4314 } 4315 if (__predict_false(pkt->rm_oobdataoffset > 0 && 4316 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { 4317 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4318 "oob offset %u\n", pkt->rm_oobdataoffset); 4319 return; 4320 } 4321 if (__predict_true(pkt->rm_pktinfooffset > 0) && 4322 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { 4323 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4324 "pktinfo offset %u\n", pkt->rm_pktinfooffset); 4325 return; 4326 } 4327 4328#undef IS_OFFSET_INVALID 4329 4330 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); 4331 data_len = pkt->rm_datalen; 4332 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); 4333 pktinfo_len = pkt->rm_pktinfolen; 4334 4335 /* 4336 * Check OOB coverage. 4337 */ 4338 if (__predict_false(pkt->rm_oobdatalen != 0)) { 4339 int oob_off, oob_len; 4340 4341 if_printf(rxr->hn_ifp, "got oobdata\n"); 4342 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); 4343 oob_len = pkt->rm_oobdatalen; 4344 4345 if (__predict_false(oob_off + oob_len > pkt->rm_len)) { 4346 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4347 "oob overflow, msglen %u, oob abs %d len %d\n", 4348 pkt->rm_len, oob_off, oob_len); 4349 return; 4350 } 4351 4352 /* 4353 * Check against data. 4354 */ 4355 if (hn_rndis_check_overlap(oob_off, oob_len, 4356 data_off, data_len)) { 4357 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4358 "oob overlaps data, oob abs %d len %d, " 4359 "data abs %d len %d\n", 4360 oob_off, oob_len, data_off, data_len); 4361 return; 4362 } 4363 4364 /* 4365 * Check against pktinfo. 4366 */ 4367 if (pktinfo_len != 0 && 4368 hn_rndis_check_overlap(oob_off, oob_len, 4369 pktinfo_off, pktinfo_len)) { 4370 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4371 "oob overlaps pktinfo, oob abs %d len %d, " 4372 "pktinfo abs %d len %d\n", 4373 oob_off, oob_len, pktinfo_off, pktinfo_len); 4374 return; 4375 } 4376 } 4377 4378 /* 4379 * Check per-packet-info coverage and find useful per-packet-info. 4380 */ 4381 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; 4382 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; 4383 info.hash_info = HN_NDIS_HASH_INFO_INVALID; 4384 if (__predict_true(pktinfo_len != 0)) { 4385 bool overlap; 4386 int error; 4387 4388 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { 4389 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4390 "pktinfo overflow, msglen %u, " 4391 "pktinfo abs %d len %d\n", 4392 pkt->rm_len, pktinfo_off, pktinfo_len); 4393 return; 4394 } 4395 4396 /* 4397 * Check packet info coverage. 4398 */ 4399 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, 4400 data_off, data_len); 4401 if (__predict_false(overlap)) { 4402 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4403 "pktinfo overlap data, pktinfo abs %d len %d, " 4404 "data abs %d len %d\n", 4405 pktinfo_off, pktinfo_len, data_off, data_len); 4406 return; 4407 } 4408 4409 /* 4410 * Find useful per-packet-info. 4411 */ 4412 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, 4413 pktinfo_len, &info); 4414 if (__predict_false(error)) { 4415 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " 4416 "pktinfo\n"); 4417 return; 4418 } 4419 } 4420 4421 if (__predict_false(data_off + data_len > pkt->rm_len)) { 4422 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " 4423 "data overflow, msglen %u, data abs %d len %d\n", 4424 pkt->rm_len, data_off, data_len); 4425 return; 4426 } 4427 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); 4428} 4429 4430static __inline void 4431hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen) 4432{ 4433 const struct rndis_msghdr *hdr; 4434 4435 if (__predict_false(dlen < sizeof(*hdr))) { 4436 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); 4437 return; 4438 } 4439 hdr = data; 4440 4441 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) { 4442 /* Hot data path. */ 4443 hn_rndis_rx_data(rxr, data, dlen); 4444 /* Done! */ 4445 return; 4446 } 4447 4448 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG) 4449 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen); 4450 else 4451 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen); 4452} 4453 4454static void 4455hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) 4456{ 4457 const struct hn_nvs_hdr *hdr; 4458 4459 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) { 4460 if_printf(sc->hn_ifp, "invalid nvs notify\n"); 4461 return; 4462 } 4463 hdr = VMBUS_CHANPKT_CONST_DATA(pkt); 4464 4465 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) { 4466 /* Useless; ignore */ 4467 return; 4468 } 4469 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type); 4470} 4471 4472static void 4473hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan, 4474 const struct vmbus_chanpkt_hdr *pkt) 4475{ 4476 struct hn_nvs_sendctx *sndc; 4477 4478 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid; 4479 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt), 4480 VMBUS_CHANPKT_DATALEN(pkt)); 4481 /* 4482 * NOTE: 4483 * 'sndc' CAN NOT be accessed anymore, since it can be freed by 4484 * its callback. 4485 */ 4486} 4487 4488static void 4489hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 4490 const struct vmbus_chanpkt_hdr *pkthdr) 4491{ 4492 const struct vmbus_chanpkt_rxbuf *pkt; 4493 const struct hn_nvs_hdr *nvs_hdr; 4494 int count, i, hlen; 4495 4496 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) { 4497 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n"); 4498 return; 4499 } 4500 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr); 4501 4502 /* Make sure that this is a RNDIS message. */ 4503 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) { 4504 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n", 4505 nvs_hdr->nvs_type); 4506 return; 4507 } 4508 4509 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen); 4510 if (__predict_false(hlen < sizeof(*pkt))) { 4511 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n"); 4512 return; 4513 } 4514 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; 4515 4516 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) { 4517 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n", 4518 pkt->cp_rxbuf_id); 4519 return; 4520 } 4521 4522 count = pkt->cp_rxbuf_cnt; 4523 if (__predict_false(hlen < 4524 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) { 4525 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count); 4526 return; 4527 } 4528 4529 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ 4530 for (i = 0; i < count; ++i) { 4531 int ofs, len; 4532 4533 ofs = pkt->cp_rxbuf[i].rb_ofs; 4534 len = pkt->cp_rxbuf[i].rb_len; 4535 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) { 4536 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, " 4537 "ofs %d, len %d\n", i, ofs, len); 4538 continue; 4539 } 4540 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); 4541 } 4542 4543 /* 4544 * Ack the consumed RXBUF associated w/ this channel packet, 4545 * so that this RXBUF can be recycled by the hypervisor. 4546 */ 4547 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid); 4548} 4549 4550static void 4551hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, 4552 uint64_t tid) 4553{ 4554 struct hn_nvs_rndis_ack ack; 4555 int retries, error; 4556 4557 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK; 4558 ack.nvs_status = HN_NVS_STATUS_OK; 4559 4560 retries = 0; 4561again: 4562 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 4563 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid); 4564 if (__predict_false(error == EAGAIN)) { 4565 /* 4566 * NOTE: 4567 * This should _not_ happen in real world, since the 4568 * consumption of the TX bufring from the TX path is 4569 * controlled. 4570 */ 4571 if (rxr->hn_ack_failed == 0) 4572 if_printf(rxr->hn_ifp, "RXBUF ack retry\n"); 4573 rxr->hn_ack_failed++; 4574 retries++; 4575 if (retries < 10) { 4576 DELAY(100); 4577 goto again; 4578 } 4579 /* RXBUF leaks! */ 4580 if_printf(rxr->hn_ifp, "RXBUF ack failed\n"); 4581 } 4582} 4583 4584static void 4585hn_chan_callback(struct vmbus_channel *chan, void *xrxr) 4586{ 4587 struct hn_rx_ring *rxr = xrxr; 4588 struct hn_softc *sc = rxr->hn_ifp->if_softc; 4589 4590 for (;;) { 4591 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf; 4592 int error, pktlen; 4593 4594 pktlen = rxr->hn_pktbuf_len; 4595 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen); 4596 if (__predict_false(error == ENOBUFS)) { 4597 void *nbuf; 4598 int nlen; 4599 4600 /* 4601 * Expand channel packet buffer. 4602 * 4603 * XXX 4604 * Use M_WAITOK here, since allocation failure 4605 * is fatal. 4606 */ 4607 nlen = rxr->hn_pktbuf_len * 2; 4608 while (nlen < pktlen) 4609 nlen *= 2; 4610 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK); 4611 4612 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n", 4613 rxr->hn_pktbuf_len, nlen); 4614 4615 free(rxr->hn_pktbuf, M_DEVBUF); 4616 rxr->hn_pktbuf = nbuf; 4617 rxr->hn_pktbuf_len = nlen; 4618 /* Retry! */ 4619 continue; 4620 } else if (__predict_false(error == EAGAIN)) { 4621 /* No more channel packets; done! */ 4622 break; 4623 } 4624 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error)); 4625 4626 switch (pkt->cph_type) { 4627 case VMBUS_CHANPKT_TYPE_COMP: 4628 hn_nvs_handle_comp(sc, chan, pkt); 4629 break; 4630 4631 case VMBUS_CHANPKT_TYPE_RXBUF: 4632 hn_nvs_handle_rxbuf(rxr, chan, pkt); 4633 break; 4634 4635 case VMBUS_CHANPKT_TYPE_INBAND: 4636 hn_nvs_handle_notify(sc, pkt); 4637 break; 4638 4639 default: 4640 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n", 4641 pkt->cph_type); 4642 break; 4643 } 4644 } 4645 hn_chan_rollup(rxr, rxr->hn_txr); 4646} 4647 4648static void 4649hn_tx_taskq_create(void *arg __unused) 4650{ 4651 4652 if (vm_guest != VM_GUEST_HV) 4653 return; 4654 4655 if (!hn_share_tx_taskq) 4656 return; 4657 4658 hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, 4659 taskqueue_thread_enqueue, &hn_tx_taskq); 4660 taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); 4661 if (hn_bind_tx_taskq >= 0) { 4662 int cpu = hn_bind_tx_taskq; 4663 struct task cpuset_task; 4664 cpuset_t cpu_set; 4665 4666 if (cpu > mp_ncpus - 1) 4667 cpu = mp_ncpus - 1; 4668 CPU_SETOF(cpu, &cpu_set); 4669 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set); 4670 taskqueue_enqueue(hn_tx_taskq, &cpuset_task); 4671 taskqueue_drain(hn_tx_taskq, &cpuset_task); 4672 } 4673} 4674SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND, 4675 hn_tx_taskq_create, NULL); 4676 4677static void 4678hn_tx_taskq_destroy(void *arg __unused) 4679{ 4680 4681 if (hn_tx_taskq != NULL) 4682 taskqueue_free(hn_tx_taskq); 4683} 4684SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND, 4685 hn_tx_taskq_destroy, NULL); 4686