if_lem_netmap.h revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28
29/*
30 * $FreeBSD: stable/11/sys/dev/netmap/if_lem_netmap.h 330897 2018-03-14 03:19:51Z eadler $
31 *
32 * netmap support for: lem
33 *
34 * For details on netmap support please see ixgbe_netmap.h
35 */
36
37
38#include <net/netmap.h>
39#include <sys/selinfo.h>
40#include <vm/vm.h>
41#include <vm/pmap.h>    /* vtophys ? */
42#include <dev/netmap/netmap_kern.h>
43
44extern int netmap_adaptive_io;
45
46/*
47 * Register/unregister. We are already under netmap lock.
48 */
49static int
50lem_netmap_reg(struct netmap_adapter *na, int onoff)
51{
52	struct ifnet *ifp = na->ifp;
53	struct adapter *adapter = ifp->if_softc;
54
55	EM_CORE_LOCK(adapter);
56
57	lem_disable_intr(adapter);
58
59	/* Tell the stack that the interface is no longer active */
60	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
61
62#ifndef EM_LEGACY_IRQ // XXX do we need this ?
63	taskqueue_block(adapter->tq);
64	taskqueue_drain(adapter->tq, &adapter->rxtx_task);
65	taskqueue_drain(adapter->tq, &adapter->link_task);
66#endif /* !EM_LEGCY_IRQ */
67
68	/* enable or disable flags and callbacks in na and ifp */
69	if (onoff) {
70		nm_set_native_flags(na);
71	} else {
72		nm_clear_native_flags(na);
73	}
74	lem_init_locked(adapter);	/* also enable intr */
75
76#ifndef EM_LEGACY_IRQ
77	taskqueue_unblock(adapter->tq); // XXX do we need this ?
78#endif /* !EM_LEGCY_IRQ */
79
80	EM_CORE_UNLOCK(adapter);
81
82	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
83}
84
85
86/*
87 * Reconcile kernel and user view of the transmit ring.
88 */
89static int
90lem_netmap_txsync(struct netmap_kring *kring, int flags)
91{
92	struct netmap_adapter *na = kring->na;
93	struct ifnet *ifp = na->ifp;
94	struct netmap_ring *ring = kring->ring;
95	u_int nm_i;	/* index into the netmap ring */
96	u_int nic_i;	/* index into the NIC ring */
97	u_int const lim = kring->nkr_num_slots - 1;
98	u_int const head = kring->rhead;
99	/* generate an interrupt approximately every half ring */
100	u_int report_frequency = kring->nkr_num_slots >> 1;
101
102	/* device-specific */
103	struct adapter *adapter = ifp->if_softc;
104#ifdef NIC_PARAVIRT
105	struct paravirt_csb *csb = adapter->csb;
106	uint64_t *csbd = (uint64_t *)(csb + 1);
107#endif /* NIC_PARAVIRT */
108
109	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
110			BUS_DMASYNC_POSTREAD);
111
112	/*
113	 * First part: process new packets to send.
114	 */
115
116	nm_i = kring->nr_hwcur;
117	if (nm_i != head) {	/* we have new packets to send */
118#ifdef NIC_PARAVIRT
119		int do_kick = 0;
120		uint64_t t = 0; // timestamp
121		int n = head - nm_i;
122		if (n < 0)
123			n += lim + 1;
124		if (csb) {
125			t = rdtsc(); /* last timestamp */
126			csbd[16] += t - csbd[0]; /* total Wg */
127			csbd[17] += n;		/* Wg count */
128			csbd[0] = t;
129		}
130#endif /* NIC_PARAVIRT */
131		nic_i = netmap_idx_k2n(kring, nm_i);
132		while (nm_i != head) {
133			struct netmap_slot *slot = &ring->slot[nm_i];
134			u_int len = slot->len;
135			uint64_t paddr;
136			void *addr = PNMB(na, slot, &paddr);
137
138			/* device-specific */
139			struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i];
140			struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i];
141			int flags = (slot->flags & NS_REPORT ||
142				nic_i == 0 || nic_i == report_frequency) ?
143				E1000_TXD_CMD_RS : 0;
144
145			NM_CHECK_ADDR_LEN(na, addr, len);
146
147			if (slot->flags & NS_BUF_CHANGED) {
148				/* buffer has changed, reload map */
149				curr->buffer_addr = htole64(paddr);
150				netmap_reload_map(na, adapter->txtag, txbuf->map, addr);
151			}
152			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
153
154			/* Fill the slot in the NIC ring. */
155			curr->upper.data = 0;
156			curr->lower.data = htole32(adapter->txd_cmd | len |
157				(E1000_TXD_CMD_EOP | flags) );
158			bus_dmamap_sync(adapter->txtag, txbuf->map,
159				BUS_DMASYNC_PREWRITE);
160
161			nm_i = nm_next(nm_i, lim);
162			nic_i = nm_next(nic_i, lim);
163			// XXX might try an early kick
164		}
165		kring->nr_hwcur = head;
166
167		 /* synchronize the NIC ring */
168		bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
169			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
170
171#ifdef NIC_PARAVIRT
172		/* set unconditionally, then also kick if needed */
173		if (csb) {
174			t = rdtsc();
175			if (csb->host_need_txkick == 2) {
176				/* can compute an update of delta */
177				int64_t delta = t - csbd[3];
178				if (delta < 0)
179					delta = -delta;
180				if (csbd[8] == 0 || delta < csbd[8]) {
181					csbd[8] = delta;
182					csbd[9]++;
183				}
184				csbd[10]++;
185			}
186			csb->guest_tdt = nic_i;
187			csbd[18] += t - csbd[0]; // total wp
188			csbd[19] += n;
189		}
190		if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1))
191			do_kick = 1;
192		if (do_kick)
193#endif /* NIC_PARAVIRT */
194		/* (re)start the tx unit up to slot nic_i (excluded) */
195		E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
196#ifdef NIC_PARAVIRT
197		if (do_kick) {
198			uint64_t t1 = rdtsc();
199			csbd[20] += t1 - t; // total Np
200			csbd[21]++;
201		}
202#endif /* NIC_PARAVIRT */
203	}
204
205	/*
206	 * Second part: reclaim buffers for completed transmissions.
207	 */
208	if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
209		kring->last_reclaim = ticks;
210		/* record completed transmissions using TDH */
211#ifdef NIC_PARAVIRT
212		/* host updates tdh unconditionally, and we have
213		 * no side effects on reads, so we can read from there
214		 * instead of exiting.
215		 */
216		if (csb) {
217		    static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0;
218		    u_int x = adapter->next_tx_to_clean;
219		    csbd[19]++; // XXX count reclaims
220		    nic_i = csb->host_tdh;
221		    if (csb->guest_csb_on) {
222			if (nic_i == x) {
223			    bad++;
224		    	    csbd[24]++; // failed reclaims
225			    /* no progress, request kick and retry */
226			    csb->guest_need_txkick = 1;
227			    mb(); // XXX barrier
228		    	    nic_i = csb->host_tdh;
229			} else {
230			    good++;
231			}
232			if (nic_i != x) {
233			    csb->guest_need_txkick = 2;
234			    if (nic_i == csb->guest_tdt)
235				drain++;
236			    else
237				nodrain++;
238#if 1
239			if (netmap_adaptive_io) {
240			    /* new mechanism: last half ring (or so)
241			     * released one slot at a time.
242			     * This effectively makes the system spin.
243			     *
244			     * Take next_to_clean + 1 as a reference.
245			     * tdh must be ahead or equal
246			     * On entry, the logical order is
247			     *		x < tdh = nic_i
248			     * We first push tdh up to avoid wraps.
249			     * The limit is tdh-ll (half ring).
250			     * if tdh-256 < x we report x;
251			     * else we report tdh-256
252			     */
253			    u_int tdh = nic_i;
254			    u_int ll = csbd[15];
255			    u_int delta = lim/8;
256			    if (netmap_adaptive_io == 2 || ll > delta)
257				csbd[15] = ll = delta;
258			    else if (netmap_adaptive_io == 1 && ll > 1) {
259				csbd[15]--;
260			    }
261
262			    if (nic_i >= kring->nkr_num_slots) {
263				RD(5, "bad nic_i %d on input", nic_i);
264			    }
265			    x = nm_next(x, lim);
266			    if (tdh < x)
267				tdh += lim + 1;
268			    if (tdh <= x + ll) {
269				nic_i = x;
270				csbd[25]++; //report n + 1;
271			    } else {
272				tdh = nic_i;
273				if (tdh < ll)
274				    tdh += lim + 1;
275				nic_i = tdh - ll;
276				csbd[26]++; // report tdh - ll
277			    }
278			}
279#endif
280			} else {
281			    /* we stop, count whether we are idle or not */
282			    int bh_active = csb->host_need_txkick & 2 ? 4 : 0;
283			    csbd[27+ csb->host_need_txkick]++;
284			    if (netmap_adaptive_io == 1) {
285				if (bh_active && csbd[15] > 1)
286				    csbd[15]--;
287				else if (!bh_active && csbd[15] < lim/2)
288				    csbd[15]++;
289			    }
290			    bad--;
291			    fail++;
292			}
293		    }
294		    RD(1, "drain %d nodrain %d good %d retry %d fail %d",
295			drain, nodrain, good, bad, fail);
296		} else
297#endif /* !NIC_PARAVIRT */
298		nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
299		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
300			D("TDH wrap %d", nic_i);
301			nic_i -= kring->nkr_num_slots;
302		}
303		adapter->next_tx_to_clean = nic_i;
304		kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
305	}
306
307	return 0;
308}
309
310
311/*
312 * Reconcile kernel and user view of the receive ring.
313 */
314static int
315lem_netmap_rxsync(struct netmap_kring *kring, int flags)
316{
317	struct netmap_adapter *na = kring->na;
318	struct ifnet *ifp = na->ifp;
319	struct netmap_ring *ring = kring->ring;
320	u_int nm_i;	/* index into the netmap ring */
321	u_int nic_i;	/* index into the NIC ring */
322	u_int n;
323	u_int const lim = kring->nkr_num_slots - 1;
324	u_int const head = kring->rhead;
325	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
326
327	/* device-specific */
328	struct adapter *adapter = ifp->if_softc;
329#ifdef NIC_PARAVIRT
330	struct paravirt_csb *csb = adapter->csb;
331	uint32_t csb_mode = csb && csb->guest_csb_on;
332	uint32_t do_host_rxkick = 0;
333#endif /* NIC_PARAVIRT */
334
335	if (head > lim)
336		return netmap_ring_reinit(kring);
337
338#ifdef NIC_PARAVIRT
339	if (csb_mode) {
340		force_update = 1;
341		csb->guest_need_rxkick = 0;
342	}
343#endif /* NIC_PARAVIRT */
344	/* XXX check sync modes */
345	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
346			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
347
348	/*
349	 * First part: import newly received packets.
350	 */
351	if (netmap_no_pendintr || force_update) {
352		uint16_t slot_flags = kring->nkr_slot_flags;
353
354		nic_i = adapter->next_rx_desc_to_check;
355		nm_i = netmap_idx_n2k(kring, nic_i);
356
357		for (n = 0; ; n++) {
358			struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
359			uint32_t staterr = le32toh(curr->status);
360			int len;
361
362#ifdef NIC_PARAVIRT
363			if (csb_mode) {
364			    if ((staterr & E1000_RXD_STAT_DD) == 0) {
365				/* don't bother to retry if more than 1 pkt */
366				if (n > 1)
367				    break;
368				csb->guest_need_rxkick = 1;
369				wmb();
370				staterr = le32toh(curr->status);
371				if ((staterr & E1000_RXD_STAT_DD) == 0) {
372				    break;
373				} else { /* we are good */
374				   csb->guest_need_rxkick = 0;
375				}
376			    }
377			} else
378#endif /* NIC_PARAVIRT */
379			if ((staterr & E1000_RXD_STAT_DD) == 0)
380				break;
381			len = le16toh(curr->length) - 4; // CRC
382			if (len < 0) {
383				RD(5, "bogus pkt (%d) size %d nic idx %d", n, len, nic_i);
384				len = 0;
385			}
386			ring->slot[nm_i].len = len;
387			ring->slot[nm_i].flags = slot_flags;
388			bus_dmamap_sync(adapter->rxtag,
389				adapter->rx_buffer_area[nic_i].map,
390				BUS_DMASYNC_POSTREAD);
391			nm_i = nm_next(nm_i, lim);
392			nic_i = nm_next(nic_i, lim);
393		}
394		if (n) { /* update the state variables */
395#ifdef NIC_PARAVIRT
396			if (csb_mode) {
397			    if (n > 1) {
398				/* leave one spare buffer so we avoid rxkicks */
399				nm_i = nm_prev(nm_i, lim);
400				nic_i = nm_prev(nic_i, lim);
401				n--;
402			    } else {
403				csb->guest_need_rxkick = 1;
404			    }
405			}
406#endif /* NIC_PARAVIRT */
407			ND("%d new packets at nic %d nm %d tail %d",
408				n,
409				adapter->next_rx_desc_to_check,
410				netmap_idx_n2k(kring, adapter->next_rx_desc_to_check),
411				kring->nr_hwtail);
412			adapter->next_rx_desc_to_check = nic_i;
413			// if_inc_counter(ifp, IFCOUNTER_IPACKETS, n);
414			kring->nr_hwtail = nm_i;
415		}
416		kring->nr_kflags &= ~NKR_PENDINTR;
417	}
418
419	/*
420	 * Second part: skip past packets that userspace has released.
421	 */
422	nm_i = kring->nr_hwcur;
423	if (nm_i != head) {
424		nic_i = netmap_idx_k2n(kring, nm_i);
425		for (n = 0; nm_i != head; n++) {
426			struct netmap_slot *slot = &ring->slot[nm_i];
427			uint64_t paddr;
428			void *addr = PNMB(na, slot, &paddr);
429
430			struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
431			struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i];
432
433			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
434				goto ring_reset;
435
436			if (slot->flags & NS_BUF_CHANGED) {
437				/* buffer has changed, reload map */
438				curr->buffer_addr = htole64(paddr);
439				netmap_reload_map(na, adapter->rxtag, rxbuf->map, addr);
440				slot->flags &= ~NS_BUF_CHANGED;
441			}
442			curr->status = 0;
443			bus_dmamap_sync(adapter->rxtag, rxbuf->map,
444			    BUS_DMASYNC_PREREAD);
445#ifdef NIC_PARAVIRT
446			if (csb_mode && csb->host_rxkick_at == nic_i)
447				do_host_rxkick = 1;
448#endif /* NIC_PARAVIRT */
449			nm_i = nm_next(nm_i, lim);
450			nic_i = nm_next(nic_i, lim);
451		}
452		kring->nr_hwcur = head;
453		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
454		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
455		/*
456		 * IMPORTANT: we must leave one free slot in the ring,
457		 * so move nic_i back by one unit
458		 */
459		nic_i = nm_prev(nic_i, lim);
460#ifdef NIC_PARAVIRT
461		/* set unconditionally, then also kick if needed */
462		if (csb)
463			csb->guest_rdt = nic_i;
464		if (!csb_mode || do_host_rxkick)
465#endif /* NIC_PARAVIRT */
466		E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
467	}
468
469	return 0;
470
471ring_reset:
472	return netmap_ring_reinit(kring);
473}
474
475
476static void
477lem_netmap_attach(struct adapter *adapter)
478{
479	struct netmap_adapter na;
480
481	bzero(&na, sizeof(na));
482
483	na.ifp = adapter->ifp;
484	na.na_flags = NAF_BDG_MAYSLEEP;
485	na.num_tx_desc = adapter->num_tx_desc;
486	na.num_rx_desc = adapter->num_rx_desc;
487	na.nm_txsync = lem_netmap_txsync;
488	na.nm_rxsync = lem_netmap_rxsync;
489	na.nm_register = lem_netmap_reg;
490	na.num_tx_rings = na.num_rx_rings = 1;
491	netmap_attach(&na);
492}
493
494/* end of file */
495