if_lem_netmap.h revision 330897
1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29/* 30 * $FreeBSD: stable/11/sys/dev/netmap/if_lem_netmap.h 330897 2018-03-14 03:19:51Z eadler $ 31 * 32 * netmap support for: lem 33 * 34 * For details on netmap support please see ixgbe_netmap.h 35 */ 36 37 38#include <net/netmap.h> 39#include <sys/selinfo.h> 40#include <vm/vm.h> 41#include <vm/pmap.h> /* vtophys ? */ 42#include <dev/netmap/netmap_kern.h> 43 44extern int netmap_adaptive_io; 45 46/* 47 * Register/unregister. We are already under netmap lock. 48 */ 49static int 50lem_netmap_reg(struct netmap_adapter *na, int onoff) 51{ 52 struct ifnet *ifp = na->ifp; 53 struct adapter *adapter = ifp->if_softc; 54 55 EM_CORE_LOCK(adapter); 56 57 lem_disable_intr(adapter); 58 59 /* Tell the stack that the interface is no longer active */ 60 ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); 61 62#ifndef EM_LEGACY_IRQ // XXX do we need this ? 63 taskqueue_block(adapter->tq); 64 taskqueue_drain(adapter->tq, &adapter->rxtx_task); 65 taskqueue_drain(adapter->tq, &adapter->link_task); 66#endif /* !EM_LEGCY_IRQ */ 67 68 /* enable or disable flags and callbacks in na and ifp */ 69 if (onoff) { 70 nm_set_native_flags(na); 71 } else { 72 nm_clear_native_flags(na); 73 } 74 lem_init_locked(adapter); /* also enable intr */ 75 76#ifndef EM_LEGACY_IRQ 77 taskqueue_unblock(adapter->tq); // XXX do we need this ? 78#endif /* !EM_LEGCY_IRQ */ 79 80 EM_CORE_UNLOCK(adapter); 81 82 return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); 83} 84 85 86/* 87 * Reconcile kernel and user view of the transmit ring. 88 */ 89static int 90lem_netmap_txsync(struct netmap_kring *kring, int flags) 91{ 92 struct netmap_adapter *na = kring->na; 93 struct ifnet *ifp = na->ifp; 94 struct netmap_ring *ring = kring->ring; 95 u_int nm_i; /* index into the netmap ring */ 96 u_int nic_i; /* index into the NIC ring */ 97 u_int const lim = kring->nkr_num_slots - 1; 98 u_int const head = kring->rhead; 99 /* generate an interrupt approximately every half ring */ 100 u_int report_frequency = kring->nkr_num_slots >> 1; 101 102 /* device-specific */ 103 struct adapter *adapter = ifp->if_softc; 104#ifdef NIC_PARAVIRT 105 struct paravirt_csb *csb = adapter->csb; 106 uint64_t *csbd = (uint64_t *)(csb + 1); 107#endif /* NIC_PARAVIRT */ 108 109 bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, 110 BUS_DMASYNC_POSTREAD); 111 112 /* 113 * First part: process new packets to send. 114 */ 115 116 nm_i = kring->nr_hwcur; 117 if (nm_i != head) { /* we have new packets to send */ 118#ifdef NIC_PARAVIRT 119 int do_kick = 0; 120 uint64_t t = 0; // timestamp 121 int n = head - nm_i; 122 if (n < 0) 123 n += lim + 1; 124 if (csb) { 125 t = rdtsc(); /* last timestamp */ 126 csbd[16] += t - csbd[0]; /* total Wg */ 127 csbd[17] += n; /* Wg count */ 128 csbd[0] = t; 129 } 130#endif /* NIC_PARAVIRT */ 131 nic_i = netmap_idx_k2n(kring, nm_i); 132 while (nm_i != head) { 133 struct netmap_slot *slot = &ring->slot[nm_i]; 134 u_int len = slot->len; 135 uint64_t paddr; 136 void *addr = PNMB(na, slot, &paddr); 137 138 /* device-specific */ 139 struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i]; 140 struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i]; 141 int flags = (slot->flags & NS_REPORT || 142 nic_i == 0 || nic_i == report_frequency) ? 143 E1000_TXD_CMD_RS : 0; 144 145 NM_CHECK_ADDR_LEN(na, addr, len); 146 147 if (slot->flags & NS_BUF_CHANGED) { 148 /* buffer has changed, reload map */ 149 curr->buffer_addr = htole64(paddr); 150 netmap_reload_map(na, adapter->txtag, txbuf->map, addr); 151 } 152 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 153 154 /* Fill the slot in the NIC ring. */ 155 curr->upper.data = 0; 156 curr->lower.data = htole32(adapter->txd_cmd | len | 157 (E1000_TXD_CMD_EOP | flags) ); 158 bus_dmamap_sync(adapter->txtag, txbuf->map, 159 BUS_DMASYNC_PREWRITE); 160 161 nm_i = nm_next(nm_i, lim); 162 nic_i = nm_next(nic_i, lim); 163 // XXX might try an early kick 164 } 165 kring->nr_hwcur = head; 166 167 /* synchronize the NIC ring */ 168 bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map, 169 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 170 171#ifdef NIC_PARAVIRT 172 /* set unconditionally, then also kick if needed */ 173 if (csb) { 174 t = rdtsc(); 175 if (csb->host_need_txkick == 2) { 176 /* can compute an update of delta */ 177 int64_t delta = t - csbd[3]; 178 if (delta < 0) 179 delta = -delta; 180 if (csbd[8] == 0 || delta < csbd[8]) { 181 csbd[8] = delta; 182 csbd[9]++; 183 } 184 csbd[10]++; 185 } 186 csb->guest_tdt = nic_i; 187 csbd[18] += t - csbd[0]; // total wp 188 csbd[19] += n; 189 } 190 if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1)) 191 do_kick = 1; 192 if (do_kick) 193#endif /* NIC_PARAVIRT */ 194 /* (re)start the tx unit up to slot nic_i (excluded) */ 195 E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i); 196#ifdef NIC_PARAVIRT 197 if (do_kick) { 198 uint64_t t1 = rdtsc(); 199 csbd[20] += t1 - t; // total Np 200 csbd[21]++; 201 } 202#endif /* NIC_PARAVIRT */ 203 } 204 205 /* 206 * Second part: reclaim buffers for completed transmissions. 207 */ 208 if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { 209 kring->last_reclaim = ticks; 210 /* record completed transmissions using TDH */ 211#ifdef NIC_PARAVIRT 212 /* host updates tdh unconditionally, and we have 213 * no side effects on reads, so we can read from there 214 * instead of exiting. 215 */ 216 if (csb) { 217 static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0; 218 u_int x = adapter->next_tx_to_clean; 219 csbd[19]++; // XXX count reclaims 220 nic_i = csb->host_tdh; 221 if (csb->guest_csb_on) { 222 if (nic_i == x) { 223 bad++; 224 csbd[24]++; // failed reclaims 225 /* no progress, request kick and retry */ 226 csb->guest_need_txkick = 1; 227 mb(); // XXX barrier 228 nic_i = csb->host_tdh; 229 } else { 230 good++; 231 } 232 if (nic_i != x) { 233 csb->guest_need_txkick = 2; 234 if (nic_i == csb->guest_tdt) 235 drain++; 236 else 237 nodrain++; 238#if 1 239 if (netmap_adaptive_io) { 240 /* new mechanism: last half ring (or so) 241 * released one slot at a time. 242 * This effectively makes the system spin. 243 * 244 * Take next_to_clean + 1 as a reference. 245 * tdh must be ahead or equal 246 * On entry, the logical order is 247 * x < tdh = nic_i 248 * We first push tdh up to avoid wraps. 249 * The limit is tdh-ll (half ring). 250 * if tdh-256 < x we report x; 251 * else we report tdh-256 252 */ 253 u_int tdh = nic_i; 254 u_int ll = csbd[15]; 255 u_int delta = lim/8; 256 if (netmap_adaptive_io == 2 || ll > delta) 257 csbd[15] = ll = delta; 258 else if (netmap_adaptive_io == 1 && ll > 1) { 259 csbd[15]--; 260 } 261 262 if (nic_i >= kring->nkr_num_slots) { 263 RD(5, "bad nic_i %d on input", nic_i); 264 } 265 x = nm_next(x, lim); 266 if (tdh < x) 267 tdh += lim + 1; 268 if (tdh <= x + ll) { 269 nic_i = x; 270 csbd[25]++; //report n + 1; 271 } else { 272 tdh = nic_i; 273 if (tdh < ll) 274 tdh += lim + 1; 275 nic_i = tdh - ll; 276 csbd[26]++; // report tdh - ll 277 } 278 } 279#endif 280 } else { 281 /* we stop, count whether we are idle or not */ 282 int bh_active = csb->host_need_txkick & 2 ? 4 : 0; 283 csbd[27+ csb->host_need_txkick]++; 284 if (netmap_adaptive_io == 1) { 285 if (bh_active && csbd[15] > 1) 286 csbd[15]--; 287 else if (!bh_active && csbd[15] < lim/2) 288 csbd[15]++; 289 } 290 bad--; 291 fail++; 292 } 293 } 294 RD(1, "drain %d nodrain %d good %d retry %d fail %d", 295 drain, nodrain, good, bad, fail); 296 } else 297#endif /* !NIC_PARAVIRT */ 298 nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0)); 299 if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */ 300 D("TDH wrap %d", nic_i); 301 nic_i -= kring->nkr_num_slots; 302 } 303 adapter->next_tx_to_clean = nic_i; 304 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 305 } 306 307 return 0; 308} 309 310 311/* 312 * Reconcile kernel and user view of the receive ring. 313 */ 314static int 315lem_netmap_rxsync(struct netmap_kring *kring, int flags) 316{ 317 struct netmap_adapter *na = kring->na; 318 struct ifnet *ifp = na->ifp; 319 struct netmap_ring *ring = kring->ring; 320 u_int nm_i; /* index into the netmap ring */ 321 u_int nic_i; /* index into the NIC ring */ 322 u_int n; 323 u_int const lim = kring->nkr_num_slots - 1; 324 u_int const head = kring->rhead; 325 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 326 327 /* device-specific */ 328 struct adapter *adapter = ifp->if_softc; 329#ifdef NIC_PARAVIRT 330 struct paravirt_csb *csb = adapter->csb; 331 uint32_t csb_mode = csb && csb->guest_csb_on; 332 uint32_t do_host_rxkick = 0; 333#endif /* NIC_PARAVIRT */ 334 335 if (head > lim) 336 return netmap_ring_reinit(kring); 337 338#ifdef NIC_PARAVIRT 339 if (csb_mode) { 340 force_update = 1; 341 csb->guest_need_rxkick = 0; 342 } 343#endif /* NIC_PARAVIRT */ 344 /* XXX check sync modes */ 345 bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, 346 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 347 348 /* 349 * First part: import newly received packets. 350 */ 351 if (netmap_no_pendintr || force_update) { 352 uint16_t slot_flags = kring->nkr_slot_flags; 353 354 nic_i = adapter->next_rx_desc_to_check; 355 nm_i = netmap_idx_n2k(kring, nic_i); 356 357 for (n = 0; ; n++) { 358 struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; 359 uint32_t staterr = le32toh(curr->status); 360 int len; 361 362#ifdef NIC_PARAVIRT 363 if (csb_mode) { 364 if ((staterr & E1000_RXD_STAT_DD) == 0) { 365 /* don't bother to retry if more than 1 pkt */ 366 if (n > 1) 367 break; 368 csb->guest_need_rxkick = 1; 369 wmb(); 370 staterr = le32toh(curr->status); 371 if ((staterr & E1000_RXD_STAT_DD) == 0) { 372 break; 373 } else { /* we are good */ 374 csb->guest_need_rxkick = 0; 375 } 376 } 377 } else 378#endif /* NIC_PARAVIRT */ 379 if ((staterr & E1000_RXD_STAT_DD) == 0) 380 break; 381 len = le16toh(curr->length) - 4; // CRC 382 if (len < 0) { 383 RD(5, "bogus pkt (%d) size %d nic idx %d", n, len, nic_i); 384 len = 0; 385 } 386 ring->slot[nm_i].len = len; 387 ring->slot[nm_i].flags = slot_flags; 388 bus_dmamap_sync(adapter->rxtag, 389 adapter->rx_buffer_area[nic_i].map, 390 BUS_DMASYNC_POSTREAD); 391 nm_i = nm_next(nm_i, lim); 392 nic_i = nm_next(nic_i, lim); 393 } 394 if (n) { /* update the state variables */ 395#ifdef NIC_PARAVIRT 396 if (csb_mode) { 397 if (n > 1) { 398 /* leave one spare buffer so we avoid rxkicks */ 399 nm_i = nm_prev(nm_i, lim); 400 nic_i = nm_prev(nic_i, lim); 401 n--; 402 } else { 403 csb->guest_need_rxkick = 1; 404 } 405 } 406#endif /* NIC_PARAVIRT */ 407 ND("%d new packets at nic %d nm %d tail %d", 408 n, 409 adapter->next_rx_desc_to_check, 410 netmap_idx_n2k(kring, adapter->next_rx_desc_to_check), 411 kring->nr_hwtail); 412 adapter->next_rx_desc_to_check = nic_i; 413 // if_inc_counter(ifp, IFCOUNTER_IPACKETS, n); 414 kring->nr_hwtail = nm_i; 415 } 416 kring->nr_kflags &= ~NKR_PENDINTR; 417 } 418 419 /* 420 * Second part: skip past packets that userspace has released. 421 */ 422 nm_i = kring->nr_hwcur; 423 if (nm_i != head) { 424 nic_i = netmap_idx_k2n(kring, nm_i); 425 for (n = 0; nm_i != head; n++) { 426 struct netmap_slot *slot = &ring->slot[nm_i]; 427 uint64_t paddr; 428 void *addr = PNMB(na, slot, &paddr); 429 430 struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i]; 431 struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i]; 432 433 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 434 goto ring_reset; 435 436 if (slot->flags & NS_BUF_CHANGED) { 437 /* buffer has changed, reload map */ 438 curr->buffer_addr = htole64(paddr); 439 netmap_reload_map(na, adapter->rxtag, rxbuf->map, addr); 440 slot->flags &= ~NS_BUF_CHANGED; 441 } 442 curr->status = 0; 443 bus_dmamap_sync(adapter->rxtag, rxbuf->map, 444 BUS_DMASYNC_PREREAD); 445#ifdef NIC_PARAVIRT 446 if (csb_mode && csb->host_rxkick_at == nic_i) 447 do_host_rxkick = 1; 448#endif /* NIC_PARAVIRT */ 449 nm_i = nm_next(nm_i, lim); 450 nic_i = nm_next(nic_i, lim); 451 } 452 kring->nr_hwcur = head; 453 bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map, 454 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 455 /* 456 * IMPORTANT: we must leave one free slot in the ring, 457 * so move nic_i back by one unit 458 */ 459 nic_i = nm_prev(nic_i, lim); 460#ifdef NIC_PARAVIRT 461 /* set unconditionally, then also kick if needed */ 462 if (csb) 463 csb->guest_rdt = nic_i; 464 if (!csb_mode || do_host_rxkick) 465#endif /* NIC_PARAVIRT */ 466 E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i); 467 } 468 469 return 0; 470 471ring_reset: 472 return netmap_ring_reinit(kring); 473} 474 475 476static void 477lem_netmap_attach(struct adapter *adapter) 478{ 479 struct netmap_adapter na; 480 481 bzero(&na, sizeof(na)); 482 483 na.ifp = adapter->ifp; 484 na.na_flags = NAF_BDG_MAYSLEEP; 485 na.num_tx_desc = adapter->num_tx_desc; 486 na.num_rx_desc = adapter->num_rx_desc; 487 na.nm_txsync = lem_netmap_txsync; 488 na.nm_rxsync = lem_netmap_rxsync; 489 na.nm_register = lem_netmap_reg; 490 na.num_tx_rings = na.num_rx_rings = 1; 491 netmap_attach(&na); 492} 493 494/* end of file */ 495