ixl_txrx.c revision 281955
1/******************************************************************************
2
3  Copyright (c) 2013-2014, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/ixl/ixl_txrx.c 281955 2015-04-24 23:26:44Z hiren $*/
34
35/*
36**	IXL driver TX/RX Routines:
37**	    This was seperated to allow usage by
38** 	    both the BASE and the VF drivers.
39*/
40
41#include "opt_inet.h"
42#include "opt_inet6.h"
43#include "ixl.h"
44
45/* Local Prototypes */
46static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
47static void	ixl_refresh_mbufs(struct ixl_queue *, int);
48static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
49static int	ixl_tx_setup_offload(struct ixl_queue *,
50		    struct mbuf *, u32 *, u32 *);
51static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
52
53static __inline void ixl_rx_discard(struct rx_ring *, int);
54static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
55		    struct mbuf *, u8);
56
57/*
58** Multiqueue Transmit driver
59**
60*/
61int
62ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
63{
64	struct ixl_vsi		*vsi = ifp->if_softc;
65	struct ixl_queue	*que;
66	struct tx_ring		*txr;
67	int 			err, i;
68
69	/* check if flowid is set */
70	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
71		i = m->m_pkthdr.flowid % vsi->num_queues;
72	else
73		i = curcpu % vsi->num_queues;
74
75	/* Check for a hung queue and pick alternative */
76	if (((1 << i) & vsi->active_queues) == 0)
77		i = ffsl(vsi->active_queues);
78
79	que = &vsi->queues[i];
80	txr = &que->txr;
81
82	err = drbr_enqueue(ifp, txr->br, m);
83	if (err)
84		return(err);
85	if (IXL_TX_TRYLOCK(txr)) {
86		ixl_mq_start_locked(ifp, txr);
87		IXL_TX_UNLOCK(txr);
88	} else
89		taskqueue_enqueue(que->tq, &que->tx_task);
90
91	return (0);
92}
93
94int
95ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
96{
97	struct ixl_queue	*que = txr->que;
98	struct ixl_vsi		*vsi = que->vsi;
99        struct mbuf		*next;
100        int			err = 0;
101
102
103	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
104	    vsi->link_active == 0)
105		return (ENETDOWN);
106
107	/* Process the transmit queue */
108	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
109		if ((err = ixl_xmit(que, &next)) != 0) {
110			if (next == NULL)
111				drbr_advance(ifp, txr->br);
112			else
113				drbr_putback(ifp, txr->br, next);
114			break;
115		}
116		drbr_advance(ifp, txr->br);
117		/* Send a copy of the frame to the BPF listener */
118		ETHER_BPF_MTAP(ifp, next);
119		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
120			break;
121	}
122
123	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
124		ixl_txeof(que);
125
126	return (err);
127}
128
129/*
130 * Called from a taskqueue to drain queued transmit packets.
131 */
132void
133ixl_deferred_mq_start(void *arg, int pending)
134{
135	struct ixl_queue	*que = arg;
136        struct tx_ring		*txr = &que->txr;
137	struct ixl_vsi		*vsi = que->vsi;
138        struct ifnet		*ifp = vsi->ifp;
139
140	IXL_TX_LOCK(txr);
141	if (!drbr_empty(ifp, txr->br))
142		ixl_mq_start_locked(ifp, txr);
143	IXL_TX_UNLOCK(txr);
144}
145
146/*
147** Flush all queue ring buffers
148*/
149void
150ixl_qflush(struct ifnet *ifp)
151{
152	struct ixl_vsi	*vsi = ifp->if_softc;
153
154        for (int i = 0; i < vsi->num_queues; i++) {
155		struct ixl_queue *que = &vsi->queues[i];
156		struct tx_ring	*txr = &que->txr;
157		struct mbuf	*m;
158		IXL_TX_LOCK(txr);
159		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
160			m_freem(m);
161		IXL_TX_UNLOCK(txr);
162	}
163	if_qflush(ifp);
164}
165
166/*
167** Find mbuf chains passed to the driver
168** that are 'sparse', using more than 8
169** mbufs to deliver an mss-size chunk of data
170*/
171static inline bool
172ixl_tso_detect_sparse(struct mbuf *mp)
173{
174	struct mbuf	*m;
175	int		num = 0, mss;
176	bool		ret = FALSE;
177
178	mss = mp->m_pkthdr.tso_segsz;
179	for (m = mp->m_next; m != NULL; m = m->m_next) {
180		num++;
181		mss -= m->m_len;
182		if (mss < 1)
183			break;
184		if (m->m_next == NULL)
185			break;
186	}
187	if (num > IXL_SPARSE_CHAIN)
188		ret = TRUE;
189
190	return (ret);
191}
192
193
194/*********************************************************************
195 *
196 *  This routine maps the mbufs to tx descriptors, allowing the
197 *  TX engine to transmit the packets.
198 *  	- return 0 on success, positive on failure
199 *
200 **********************************************************************/
201#define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
202
203static int
204ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
205{
206	struct ixl_vsi		*vsi = que->vsi;
207	struct i40e_hw		*hw = vsi->hw;
208	struct tx_ring		*txr = &que->txr;
209	struct ixl_tx_buf	*buf;
210	struct i40e_tx_desc	*txd = NULL;
211	struct mbuf		*m_head, *m;
212	int             	i, j, error, nsegs, maxsegs;
213	int			first, last = 0;
214	u16			vtag = 0;
215	u32			cmd, off;
216	bus_dmamap_t		map;
217	bus_dma_tag_t		tag;
218	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
219
220
221	cmd = off = 0;
222	m_head = *m_headp;
223
224        /*
225         * Important to capture the first descriptor
226         * used because it will contain the index of
227         * the one we tell the hardware to report back
228         */
229        first = txr->next_avail;
230	buf = &txr->buffers[first];
231	map = buf->map;
232	tag = txr->tx_tag;
233	maxsegs = IXL_MAX_TX_SEGS;
234
235	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
236		/* Use larger mapping for TSO */
237		tag = txr->tso_tag;
238		maxsegs = IXL_MAX_TSO_SEGS;
239		if (ixl_tso_detect_sparse(m_head)) {
240			m = m_defrag(m_head, M_NOWAIT);
241			if (m == NULL) {
242				m_freem(*m_headp);
243				*m_headp = NULL;
244				return (ENOBUFS);
245			}
246			*m_headp = m;
247		}
248	}
249
250	/*
251	 * Map the packet for DMA.
252	 */
253	error = bus_dmamap_load_mbuf_sg(tag, map,
254	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
255
256	if (error == EFBIG) {
257		struct mbuf *m;
258
259		m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
260		if (m == NULL) {
261			que->mbuf_defrag_failed++;
262			m_freem(*m_headp);
263			*m_headp = NULL;
264			return (ENOBUFS);
265		}
266		*m_headp = m;
267
268		/* Try it again */
269		error = bus_dmamap_load_mbuf_sg(tag, map,
270		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
271
272		if (error == ENOMEM) {
273			que->tx_dma_setup++;
274			return (error);
275		} else if (error != 0) {
276			que->tx_dma_setup++;
277			m_freem(*m_headp);
278			*m_headp = NULL;
279			return (error);
280		}
281	} else if (error == ENOMEM) {
282		que->tx_dma_setup++;
283		return (error);
284	} else if (error != 0) {
285		que->tx_dma_setup++;
286		m_freem(*m_headp);
287		*m_headp = NULL;
288		return (error);
289	}
290
291	/* Make certain there are enough descriptors */
292	if (nsegs > txr->avail - 2) {
293		txr->no_desc++;
294		error = ENOBUFS;
295		goto xmit_fail;
296	}
297	m_head = *m_headp;
298
299	/* Set up the TSO/CSUM offload */
300	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
301		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
302		if (error)
303			goto xmit_fail;
304	}
305
306	cmd |= I40E_TX_DESC_CMD_ICRC;
307	/* Grab the VLAN tag */
308	if (m_head->m_flags & M_VLANTAG) {
309		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
310		vtag = htole16(m_head->m_pkthdr.ether_vtag);
311	}
312
313	i = txr->next_avail;
314	for (j = 0; j < nsegs; j++) {
315		bus_size_t seglen;
316
317		buf = &txr->buffers[i];
318		buf->tag = tag; /* Keep track of the type tag */
319		txd = &txr->base[i];
320		seglen = segs[j].ds_len;
321
322		txd->buffer_addr = htole64(segs[j].ds_addr);
323		txd->cmd_type_offset_bsz =
324		    htole64(I40E_TX_DESC_DTYPE_DATA
325		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
326		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
327		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
328		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
329
330		last = i; /* descriptor that will get completion IRQ */
331
332		if (++i == que->num_desc)
333			i = 0;
334
335		buf->m_head = NULL;
336		buf->eop_index = -1;
337	}
338	/* Set the last descriptor for report */
339	txd->cmd_type_offset_bsz |=
340	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
341	txr->avail -= nsegs;
342	txr->next_avail = i;
343
344	buf->m_head = m_head;
345	/* Swap the dma map between the first and last descriptor */
346	txr->buffers[first].map = buf->map;
347	buf->map = map;
348	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
349
350        /* Set the index of the descriptor that will be marked done */
351        buf = &txr->buffers[first];
352	buf->eop_index = last;
353
354        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
355            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
356	/*
357	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
358	 * hardware that this frame is available to transmit.
359	 */
360	++txr->total_packets;
361	wr32(hw, txr->tail, i);
362
363	ixl_flush(hw);
364	/* Mark outstanding work */
365	if (que->busy == 0)
366		que->busy = 1;
367	return (0);
368
369xmit_fail:
370	bus_dmamap_unload(tag, buf->map);
371	return (error);
372}
373
374
375/*********************************************************************
376 *
377 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
378 *  the information needed to transmit a packet on the wire. This is
379 *  called only once at attach, setup is done every reset.
380 *
381 **********************************************************************/
382int
383ixl_allocate_tx_data(struct ixl_queue *que)
384{
385	struct tx_ring		*txr = &que->txr;
386	struct ixl_vsi		*vsi = que->vsi;
387	device_t		dev = vsi->dev;
388	struct ixl_tx_buf	*buf;
389	int			error = 0;
390
391	/*
392	 * Setup DMA descriptor areas.
393	 */
394	if ((error = bus_dma_tag_create(NULL,		/* parent */
395			       1, 0,			/* alignment, bounds */
396			       BUS_SPACE_MAXADDR,	/* lowaddr */
397			       BUS_SPACE_MAXADDR,	/* highaddr */
398			       NULL, NULL,		/* filter, filterarg */
399			       IXL_TSO_SIZE,		/* maxsize */
400			       IXL_MAX_TX_SEGS,		/* nsegments */
401			       PAGE_SIZE,		/* maxsegsize */
402			       0,			/* flags */
403			       NULL,			/* lockfunc */
404			       NULL,			/* lockfuncarg */
405			       &txr->tx_tag))) {
406		device_printf(dev,"Unable to allocate TX DMA tag\n");
407		goto fail;
408	}
409
410	/* Make a special tag for TSO */
411	if ((error = bus_dma_tag_create(NULL,		/* parent */
412			       1, 0,			/* alignment, bounds */
413			       BUS_SPACE_MAXADDR,	/* lowaddr */
414			       BUS_SPACE_MAXADDR,	/* highaddr */
415			       NULL, NULL,		/* filter, filterarg */
416			       IXL_TSO_SIZE,		/* maxsize */
417			       IXL_MAX_TSO_SEGS,	/* nsegments */
418			       PAGE_SIZE,		/* maxsegsize */
419			       0,			/* flags */
420			       NULL,			/* lockfunc */
421			       NULL,			/* lockfuncarg */
422			       &txr->tso_tag))) {
423		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
424		goto fail;
425	}
426
427	if (!(txr->buffers =
428	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
429	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
430		device_printf(dev, "Unable to allocate tx_buffer memory\n");
431		error = ENOMEM;
432		goto fail;
433	}
434
435        /* Create the descriptor buffer default dma maps */
436	buf = txr->buffers;
437	for (int i = 0; i < que->num_desc; i++, buf++) {
438		buf->tag = txr->tx_tag;
439		error = bus_dmamap_create(buf->tag, 0, &buf->map);
440		if (error != 0) {
441			device_printf(dev, "Unable to create TX DMA map\n");
442			goto fail;
443		}
444	}
445fail:
446	return (error);
447}
448
449
450/*********************************************************************
451 *
452 *  (Re)Initialize a queue transmit ring.
453 *	- called by init, it clears the descriptor ring,
454 *	  and frees any stale mbufs
455 *
456 **********************************************************************/
457void
458ixl_init_tx_ring(struct ixl_queue *que)
459{
460	struct tx_ring *txr = &que->txr;
461	struct ixl_tx_buf *buf;
462
463	/* Clear the old ring contents */
464	IXL_TX_LOCK(txr);
465	bzero((void *)txr->base,
466	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
467
468	/* Reset indices */
469	txr->next_avail = 0;
470	txr->next_to_clean = 0;
471
472#ifdef IXL_FDIR
473	/* Initialize flow director */
474	txr->atr_rate = ixl_atr_rate;
475	txr->atr_count = 0;
476#endif
477
478	/* Free any existing tx mbufs. */
479        buf = txr->buffers;
480	for (int i = 0; i < que->num_desc; i++, buf++) {
481		if (buf->m_head != NULL) {
482			bus_dmamap_sync(buf->tag, buf->map,
483			    BUS_DMASYNC_POSTWRITE);
484			bus_dmamap_unload(buf->tag, buf->map);
485			m_freem(buf->m_head);
486			buf->m_head = NULL;
487		}
488		/* Clear the EOP index */
489		buf->eop_index = -1;
490        }
491
492	/* Set number of descriptors available */
493	txr->avail = que->num_desc;
494
495	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
496	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
497	IXL_TX_UNLOCK(txr);
498}
499
500
501/*********************************************************************
502 *
503 *  Free transmit ring related data structures.
504 *
505 **********************************************************************/
506void
507ixl_free_que_tx(struct ixl_queue *que)
508{
509	struct tx_ring *txr = &que->txr;
510	struct ixl_tx_buf *buf;
511
512	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
513
514	for (int i = 0; i < que->num_desc; i++) {
515		buf = &txr->buffers[i];
516		if (buf->m_head != NULL) {
517			bus_dmamap_sync(buf->tag, buf->map,
518			    BUS_DMASYNC_POSTWRITE);
519			bus_dmamap_unload(buf->tag,
520			    buf->map);
521			m_freem(buf->m_head);
522			buf->m_head = NULL;
523			if (buf->map != NULL) {
524				bus_dmamap_destroy(buf->tag,
525				    buf->map);
526				buf->map = NULL;
527			}
528		} else if (buf->map != NULL) {
529			bus_dmamap_unload(buf->tag,
530			    buf->map);
531			bus_dmamap_destroy(buf->tag,
532			    buf->map);
533			buf->map = NULL;
534		}
535	}
536	if (txr->br != NULL)
537		buf_ring_free(txr->br, M_DEVBUF);
538	if (txr->buffers != NULL) {
539		free(txr->buffers, M_DEVBUF);
540		txr->buffers = NULL;
541	}
542	if (txr->tx_tag != NULL) {
543		bus_dma_tag_destroy(txr->tx_tag);
544		txr->tx_tag = NULL;
545	}
546	if (txr->tso_tag != NULL) {
547		bus_dma_tag_destroy(txr->tso_tag);
548		txr->tso_tag = NULL;
549	}
550
551	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
552	return;
553}
554
555/*********************************************************************
556 *
557 *  Setup descriptor for hw offloads
558 *
559 **********************************************************************/
560
561static int
562ixl_tx_setup_offload(struct ixl_queue *que,
563    struct mbuf *mp, u32 *cmd, u32 *off)
564{
565	struct ether_vlan_header	*eh;
566#ifdef INET
567	struct ip			*ip = NULL;
568#endif
569	struct tcphdr			*th = NULL;
570#ifdef INET6
571	struct ip6_hdr			*ip6;
572#endif
573	int				elen, ip_hlen = 0, tcp_hlen;
574	u16				etype;
575	u8				ipproto = 0;
576	bool				tso = FALSE;
577
578
579	/* Set up the TSO context descriptor if required */
580	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
581		tso = ixl_tso_setup(que, mp);
582		if (tso)
583			++que->tso;
584		else
585			return (ENXIO);
586	}
587
588	/*
589	 * Determine where frame payload starts.
590	 * Jump over vlan headers if already present,
591	 * helpful for QinQ too.
592	 */
593	eh = mtod(mp, struct ether_vlan_header *);
594	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
595		etype = ntohs(eh->evl_proto);
596		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
597	} else {
598		etype = ntohs(eh->evl_encap_proto);
599		elen = ETHER_HDR_LEN;
600	}
601
602	switch (etype) {
603#ifdef INET
604		case ETHERTYPE_IP:
605			ip = (struct ip *)(mp->m_data + elen);
606			ip_hlen = ip->ip_hl << 2;
607			ipproto = ip->ip_p;
608			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
609			/* The IP checksum must be recalculated with TSO */
610			if (tso)
611				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
612			else
613				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
614			break;
615#endif
616#ifdef INET6
617		case ETHERTYPE_IPV6:
618			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
619			ip_hlen = sizeof(struct ip6_hdr);
620			ipproto = ip6->ip6_nxt;
621			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
622			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
623			break;
624#endif
625		default:
626			break;
627	}
628
629	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
630	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
631
632	switch (ipproto) {
633		case IPPROTO_TCP:
634			tcp_hlen = th->th_off << 2;
635			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
636				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
637				*off |= (tcp_hlen >> 2) <<
638				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
639			}
640#ifdef IXL_FDIR
641			ixl_atr(que, th, etype);
642#endif
643			break;
644		case IPPROTO_UDP:
645			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
646				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
647				*off |= (sizeof(struct udphdr) >> 2) <<
648				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
649			}
650			break;
651
652		case IPPROTO_SCTP:
653			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
654				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
655				*off |= (sizeof(struct sctphdr) >> 2) <<
656				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
657			}
658			/* Fall Thru */
659		default:
660			break;
661	}
662
663        return (0);
664}
665
666
667/**********************************************************************
668 *
669 *  Setup context for hardware segmentation offload (TSO)
670 *
671 **********************************************************************/
672static bool
673ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
674{
675	struct tx_ring			*txr = &que->txr;
676	struct i40e_tx_context_desc	*TXD;
677	struct ixl_tx_buf		*buf;
678	u32				cmd, mss, type, tsolen;
679	u16				etype;
680	int				idx, elen, ip_hlen, tcp_hlen;
681	struct ether_vlan_header	*eh;
682#ifdef INET
683	struct ip			*ip;
684#endif
685#ifdef INET6
686	struct ip6_hdr			*ip6;
687#endif
688#if defined(INET6) || defined(INET)
689	struct tcphdr			*th;
690#endif
691	u64				type_cmd_tso_mss;
692
693	/*
694	 * Determine where frame payload starts.
695	 * Jump over vlan headers if already present
696	 */
697	eh = mtod(mp, struct ether_vlan_header *);
698	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
699		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
700		etype = eh->evl_proto;
701	} else {
702		elen = ETHER_HDR_LEN;
703		etype = eh->evl_encap_proto;
704	}
705
706        switch (ntohs(etype)) {
707#ifdef INET6
708	case ETHERTYPE_IPV6:
709		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
710		if (ip6->ip6_nxt != IPPROTO_TCP)
711			return (ENXIO);
712		ip_hlen = sizeof(struct ip6_hdr);
713		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
714		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
715		tcp_hlen = th->th_off << 2;
716		break;
717#endif
718#ifdef INET
719	case ETHERTYPE_IP:
720		ip = (struct ip *)(mp->m_data + elen);
721		if (ip->ip_p != IPPROTO_TCP)
722			return (ENXIO);
723		ip->ip_sum = 0;
724		ip_hlen = ip->ip_hl << 2;
725		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
726		th->th_sum = in_pseudo(ip->ip_src.s_addr,
727		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
728		tcp_hlen = th->th_off << 2;
729		break;
730#endif
731	default:
732		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
733		    __func__, ntohs(etype));
734		return FALSE;
735        }
736
737        /* Ensure we have at least the IP+TCP header in the first mbuf. */
738        if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
739		return FALSE;
740
741	idx = txr->next_avail;
742	buf = &txr->buffers[idx];
743	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
744	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
745
746	type = I40E_TX_DESC_DTYPE_CONTEXT;
747	cmd = I40E_TX_CTX_DESC_TSO;
748	mss = mp->m_pkthdr.tso_segsz;
749
750	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
751	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
752	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
753	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
754	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
755
756	TXD->tunneling_params = htole32(0);
757	buf->m_head = NULL;
758	buf->eop_index = -1;
759
760	if (++idx == que->num_desc)
761		idx = 0;
762
763	txr->avail--;
764	txr->next_avail = idx;
765
766	return TRUE;
767}
768
769/*
770** ixl_get_tx_head - Retrieve the value from the
771**    location the HW records its HEAD index
772*/
773static inline u32
774ixl_get_tx_head(struct ixl_queue *que)
775{
776	struct tx_ring  *txr = &que->txr;
777	void *head = &txr->base[que->num_desc];
778	return LE32_TO_CPU(*(volatile __le32 *)head);
779}
780
781/**********************************************************************
782 *
783 *  Examine each tx_buffer in the used queue. If the hardware is done
784 *  processing the packet then free associated resources. The
785 *  tx_buffer is put back on the free queue.
786 *
787 **********************************************************************/
788bool
789ixl_txeof(struct ixl_queue *que)
790{
791	struct tx_ring		*txr = &que->txr;
792	u32			first, last, head, done, processed;
793	struct ixl_tx_buf	*buf;
794	struct i40e_tx_desc	*tx_desc, *eop_desc;
795
796
797	mtx_assert(&txr->mtx, MA_OWNED);
798
799
800	/* These are not the descriptors you seek, move along :) */
801	if (txr->avail == que->num_desc) {
802		que->busy = 0;
803		return FALSE;
804	}
805
806	processed = 0;
807	first = txr->next_to_clean;
808	buf = &txr->buffers[first];
809	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
810	last = buf->eop_index;
811	if (last == -1)
812		return FALSE;
813	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
814
815	/* Get the Head WB value */
816	head = ixl_get_tx_head(que);
817
818	/*
819	** Get the index of the first descriptor
820	** BEYOND the EOP and call that 'done'.
821	** I do this so the comparison in the
822	** inner while loop below can be simple
823	*/
824	if (++last == que->num_desc) last = 0;
825	done = last;
826
827        bus_dmamap_sync(txr->dma.tag, txr->dma.map,
828            BUS_DMASYNC_POSTREAD);
829	/*
830	** The HEAD index of the ring is written in a
831	** defined location, this rather than a done bit
832	** is what is used to keep track of what must be
833	** 'cleaned'.
834	*/
835	while (first != head) {
836		/* We clean the range of the packet */
837		while (first != done) {
838			++txr->avail;
839			++processed;
840
841			if (buf->m_head) {
842				txr->bytes += /* for ITR adjustment */
843				    buf->m_head->m_pkthdr.len;
844				txr->tx_bytes += /* for TX stats */
845				    buf->m_head->m_pkthdr.len;
846				bus_dmamap_sync(buf->tag,
847				    buf->map,
848				    BUS_DMASYNC_POSTWRITE);
849				bus_dmamap_unload(buf->tag,
850				    buf->map);
851				m_freem(buf->m_head);
852				buf->m_head = NULL;
853				buf->map = NULL;
854			}
855			buf->eop_index = -1;
856
857			if (++first == que->num_desc)
858				first = 0;
859
860			buf = &txr->buffers[first];
861			tx_desc = &txr->base[first];
862		}
863		++txr->packets;
864		/* See if there is more work now */
865		last = buf->eop_index;
866		if (last != -1) {
867			eop_desc = &txr->base[last];
868			/* Get next done point */
869			if (++last == que->num_desc) last = 0;
870			done = last;
871		} else
872			break;
873	}
874	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
875	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
876
877	txr->next_to_clean = first;
878
879
880	/*
881	** Hang detection, we know there's
882	** work outstanding or the first return
883	** would have been taken, so indicate an
884	** unsuccessful pass, in local_timer if
885	** the value is too great the queue will
886	** be considered hung. If anything has been
887	** cleaned then reset the state.
888	*/
889	if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
890		++que->busy;
891
892	if (processed)
893		que->busy = 1; /* Note this turns off HUNG */
894
895	/*
896	 * If there are no pending descriptors, clear the timeout.
897	 */
898	if (txr->avail == que->num_desc) {
899		que->busy = 0;
900		return FALSE;
901	}
902
903	return TRUE;
904}
905
906/*********************************************************************
907 *
908 *  Refresh mbuf buffers for RX descriptor rings
909 *   - now keeps its own state so discards due to resource
910 *     exhaustion are unnecessary, if an mbuf cannot be obtained
911 *     it just returns, keeping its placeholder, thus it can simply
912 *     be recalled to try again.
913 *
914 **********************************************************************/
915static void
916ixl_refresh_mbufs(struct ixl_queue *que, int limit)
917{
918	struct ixl_vsi		*vsi = que->vsi;
919	struct rx_ring		*rxr = &que->rxr;
920	bus_dma_segment_t	hseg[1];
921	bus_dma_segment_t	pseg[1];
922	struct ixl_rx_buf	*buf;
923	struct mbuf		*mh, *mp;
924	int			i, j, nsegs, error;
925	bool			refreshed = FALSE;
926
927	i = j = rxr->next_refresh;
928	/* Control the loop with one beyond */
929	if (++j == que->num_desc)
930		j = 0;
931
932	while (j != limit) {
933		buf = &rxr->buffers[i];
934		if (rxr->hdr_split == FALSE)
935			goto no_split;
936
937		if (buf->m_head == NULL) {
938			mh = m_gethdr(M_NOWAIT, MT_DATA);
939			if (mh == NULL)
940				goto update;
941		} else
942			mh = buf->m_head;
943
944		mh->m_pkthdr.len = mh->m_len = MHLEN;
945		mh->m_len = MHLEN;
946		mh->m_flags |= M_PKTHDR;
947		/* Get the memory mapping */
948		error = bus_dmamap_load_mbuf_sg(rxr->htag,
949		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
950		if (error != 0) {
951			printf("Refresh mbufs: hdr dmamap load"
952			    " failure - %d\n", error);
953			m_free(mh);
954			buf->m_head = NULL;
955			goto update;
956		}
957		buf->m_head = mh;
958		bus_dmamap_sync(rxr->htag, buf->hmap,
959		    BUS_DMASYNC_PREREAD);
960		rxr->base[i].read.hdr_addr =
961		   htole64(hseg[0].ds_addr);
962
963no_split:
964		if (buf->m_pack == NULL) {
965			mp = m_getjcl(M_NOWAIT, MT_DATA,
966			    M_PKTHDR, rxr->mbuf_sz);
967			if (mp == NULL)
968				goto update;
969		} else
970			mp = buf->m_pack;
971
972		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
973		/* Get the memory mapping */
974		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
975		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
976		if (error != 0) {
977			printf("Refresh mbufs: payload dmamap load"
978			    " failure - %d\n", error);
979			m_free(mp);
980			buf->m_pack = NULL;
981			goto update;
982		}
983		buf->m_pack = mp;
984		bus_dmamap_sync(rxr->ptag, buf->pmap,
985		    BUS_DMASYNC_PREREAD);
986		rxr->base[i].read.pkt_addr =
987		   htole64(pseg[0].ds_addr);
988		/* Used only when doing header split */
989		rxr->base[i].read.hdr_addr = 0;
990
991		refreshed = TRUE;
992		/* Next is precalculated */
993		i = j;
994		rxr->next_refresh = i;
995		if (++j == que->num_desc)
996			j = 0;
997	}
998update:
999	if (refreshed) /* Update hardware tail index */
1000		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1001	return;
1002}
1003
1004
1005/*********************************************************************
1006 *
1007 *  Allocate memory for rx_buffer structures. Since we use one
1008 *  rx_buffer per descriptor, the maximum number of rx_buffer's
1009 *  that we'll need is equal to the number of receive descriptors
1010 *  that we've defined.
1011 *
1012 **********************************************************************/
1013int
1014ixl_allocate_rx_data(struct ixl_queue *que)
1015{
1016	struct rx_ring		*rxr = &que->rxr;
1017	struct ixl_vsi		*vsi = que->vsi;
1018	device_t 		dev = vsi->dev;
1019	struct ixl_rx_buf 	*buf;
1020	int             	i, bsize, error;
1021
1022	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1023	if (!(rxr->buffers =
1024	    (struct ixl_rx_buf *) malloc(bsize,
1025	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
1026		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1027		error = ENOMEM;
1028		return (error);
1029	}
1030
1031	if ((error = bus_dma_tag_create(NULL,	/* parent */
1032				   1, 0,	/* alignment, bounds */
1033				   BUS_SPACE_MAXADDR,	/* lowaddr */
1034				   BUS_SPACE_MAXADDR,	/* highaddr */
1035				   NULL, NULL,		/* filter, filterarg */
1036				   MSIZE,		/* maxsize */
1037				   1,			/* nsegments */
1038				   MSIZE,		/* maxsegsize */
1039				   0,			/* flags */
1040				   NULL,		/* lockfunc */
1041				   NULL,		/* lockfuncarg */
1042				   &rxr->htag))) {
1043		device_printf(dev, "Unable to create RX DMA htag\n");
1044		return (error);
1045	}
1046
1047	if ((error = bus_dma_tag_create(NULL,	/* parent */
1048				   1, 0,	/* alignment, bounds */
1049				   BUS_SPACE_MAXADDR,	/* lowaddr */
1050				   BUS_SPACE_MAXADDR,	/* highaddr */
1051				   NULL, NULL,		/* filter, filterarg */
1052				   MJUM16BYTES,		/* maxsize */
1053				   1,			/* nsegments */
1054				   MJUM16BYTES,		/* maxsegsize */
1055				   0,			/* flags */
1056				   NULL,		/* lockfunc */
1057				   NULL,		/* lockfuncarg */
1058				   &rxr->ptag))) {
1059		device_printf(dev, "Unable to create RX DMA ptag\n");
1060		return (error);
1061	}
1062
1063	for (i = 0; i < que->num_desc; i++) {
1064		buf = &rxr->buffers[i];
1065		error = bus_dmamap_create(rxr->htag,
1066		    BUS_DMA_NOWAIT, &buf->hmap);
1067		if (error) {
1068			device_printf(dev, "Unable to create RX head map\n");
1069			break;
1070		}
1071		error = bus_dmamap_create(rxr->ptag,
1072		    BUS_DMA_NOWAIT, &buf->pmap);
1073		if (error) {
1074			device_printf(dev, "Unable to create RX pkt map\n");
1075			break;
1076		}
1077	}
1078
1079	return (error);
1080}
1081
1082
1083/*********************************************************************
1084 *
1085 *  (Re)Initialize the queue receive ring and its buffers.
1086 *
1087 **********************************************************************/
1088int
1089ixl_init_rx_ring(struct ixl_queue *que)
1090{
1091	struct	rx_ring 	*rxr = &que->rxr;
1092#if defined(INET6) || defined(INET)
1093	struct ixl_vsi		*vsi = que->vsi;
1094	struct ifnet		*ifp = vsi->ifp;
1095	struct lro_ctrl		*lro = &rxr->lro;
1096#endif
1097	struct ixl_rx_buf	*buf;
1098	bus_dma_segment_t	pseg[1], hseg[1];
1099	int			rsize, nsegs, error = 0;
1100
1101	IXL_RX_LOCK(rxr);
1102	/* Clear the ring contents */
1103	rsize = roundup2(que->num_desc *
1104	    sizeof(union i40e_rx_desc), DBA_ALIGN);
1105	bzero((void *)rxr->base, rsize);
1106	/* Cleanup any existing buffers */
1107	for (int i = 0; i < que->num_desc; i++) {
1108		buf = &rxr->buffers[i];
1109		if (buf->m_head != NULL) {
1110			bus_dmamap_sync(rxr->htag, buf->hmap,
1111			    BUS_DMASYNC_POSTREAD);
1112			bus_dmamap_unload(rxr->htag, buf->hmap);
1113			buf->m_head->m_flags |= M_PKTHDR;
1114			m_freem(buf->m_head);
1115		}
1116		if (buf->m_pack != NULL) {
1117			bus_dmamap_sync(rxr->ptag, buf->pmap,
1118			    BUS_DMASYNC_POSTREAD);
1119			bus_dmamap_unload(rxr->ptag, buf->pmap);
1120			buf->m_pack->m_flags |= M_PKTHDR;
1121			m_freem(buf->m_pack);
1122		}
1123		buf->m_head = NULL;
1124		buf->m_pack = NULL;
1125	}
1126
1127	/* header split is off */
1128	rxr->hdr_split = FALSE;
1129
1130	/* Now replenish the mbufs */
1131	for (int j = 0; j != que->num_desc; ++j) {
1132		struct mbuf	*mh, *mp;
1133
1134		buf = &rxr->buffers[j];
1135		/*
1136		** Don't allocate mbufs if not
1137		** doing header split, its wasteful
1138		*/
1139		if (rxr->hdr_split == FALSE)
1140			goto skip_head;
1141
1142		/* First the header */
1143		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1144		if (buf->m_head == NULL) {
1145			error = ENOBUFS;
1146			goto fail;
1147		}
1148		m_adj(buf->m_head, ETHER_ALIGN);
1149		mh = buf->m_head;
1150		mh->m_len = mh->m_pkthdr.len = MHLEN;
1151		mh->m_flags |= M_PKTHDR;
1152		/* Get the memory mapping */
1153		error = bus_dmamap_load_mbuf_sg(rxr->htag,
1154		    buf->hmap, buf->m_head, hseg,
1155		    &nsegs, BUS_DMA_NOWAIT);
1156		if (error != 0) /* Nothing elegant to do here */
1157			goto fail;
1158		bus_dmamap_sync(rxr->htag,
1159		    buf->hmap, BUS_DMASYNC_PREREAD);
1160		/* Update descriptor */
1161		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1162
1163skip_head:
1164		/* Now the payload cluster */
1165		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1166		    M_PKTHDR, rxr->mbuf_sz);
1167		if (buf->m_pack == NULL) {
1168			error = ENOBUFS;
1169                        goto fail;
1170		}
1171		mp = buf->m_pack;
1172		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1173		/* Get the memory mapping */
1174		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1175		    buf->pmap, mp, pseg,
1176		    &nsegs, BUS_DMA_NOWAIT);
1177		if (error != 0)
1178                        goto fail;
1179		bus_dmamap_sync(rxr->ptag,
1180		    buf->pmap, BUS_DMASYNC_PREREAD);
1181		/* Update descriptor */
1182		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1183		rxr->base[j].read.hdr_addr = 0;
1184	}
1185
1186
1187	/* Setup our descriptor indices */
1188	rxr->next_check = 0;
1189	rxr->next_refresh = 0;
1190	rxr->lro_enabled = FALSE;
1191	rxr->split = 0;
1192	rxr->bytes = 0;
1193	rxr->discard = FALSE;
1194
1195	wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1196	ixl_flush(vsi->hw);
1197
1198#if defined(INET6) || defined(INET)
1199	/*
1200	** Now set up the LRO interface:
1201	*/
1202	if (ifp->if_capenable & IFCAP_LRO) {
1203		int err = tcp_lro_init(lro);
1204		if (err) {
1205			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1206			goto fail;
1207		}
1208		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1209		rxr->lro_enabled = TRUE;
1210		lro->ifp = vsi->ifp;
1211	}
1212#endif
1213
1214	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1215	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1216
1217fail:
1218	IXL_RX_UNLOCK(rxr);
1219	return (error);
1220}
1221
1222
1223/*********************************************************************
1224 *
1225 *  Free station receive ring data structures
1226 *
1227 **********************************************************************/
1228void
1229ixl_free_que_rx(struct ixl_queue *que)
1230{
1231	struct rx_ring		*rxr = &que->rxr;
1232	struct ixl_rx_buf	*buf;
1233
1234	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1235
1236	/* Cleanup any existing buffers */
1237	if (rxr->buffers != NULL) {
1238		for (int i = 0; i < que->num_desc; i++) {
1239			buf = &rxr->buffers[i];
1240			if (buf->m_head != NULL) {
1241				bus_dmamap_sync(rxr->htag, buf->hmap,
1242				    BUS_DMASYNC_POSTREAD);
1243				bus_dmamap_unload(rxr->htag, buf->hmap);
1244				buf->m_head->m_flags |= M_PKTHDR;
1245				m_freem(buf->m_head);
1246			}
1247			if (buf->m_pack != NULL) {
1248				bus_dmamap_sync(rxr->ptag, buf->pmap,
1249				    BUS_DMASYNC_POSTREAD);
1250				bus_dmamap_unload(rxr->ptag, buf->pmap);
1251				buf->m_pack->m_flags |= M_PKTHDR;
1252				m_freem(buf->m_pack);
1253			}
1254			buf->m_head = NULL;
1255			buf->m_pack = NULL;
1256			if (buf->hmap != NULL) {
1257				bus_dmamap_destroy(rxr->htag, buf->hmap);
1258				buf->hmap = NULL;
1259			}
1260			if (buf->pmap != NULL) {
1261				bus_dmamap_destroy(rxr->ptag, buf->pmap);
1262				buf->pmap = NULL;
1263			}
1264		}
1265		if (rxr->buffers != NULL) {
1266			free(rxr->buffers, M_DEVBUF);
1267			rxr->buffers = NULL;
1268		}
1269	}
1270
1271	if (rxr->htag != NULL) {
1272		bus_dma_tag_destroy(rxr->htag);
1273		rxr->htag = NULL;
1274	}
1275	if (rxr->ptag != NULL) {
1276		bus_dma_tag_destroy(rxr->ptag);
1277		rxr->ptag = NULL;
1278	}
1279
1280	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1281	return;
1282}
1283
1284static __inline void
1285ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1286{
1287
1288#if defined(INET6) || defined(INET)
1289        /*
1290         * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1291         * should be computed by hardware. Also it should not have VLAN tag in
1292         * ethernet header.
1293         */
1294        if (rxr->lro_enabled &&
1295            (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1296            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1297            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1298                /*
1299                 * Send to the stack if:
1300                 **  - LRO not enabled, or
1301                 **  - no LRO resources, or
1302                 **  - lro enqueue fails
1303                 */
1304                if (rxr->lro.lro_cnt != 0)
1305                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1306                                return;
1307        }
1308#endif
1309	IXL_RX_UNLOCK(rxr);
1310        (*ifp->if_input)(ifp, m);
1311	IXL_RX_LOCK(rxr);
1312}
1313
1314
1315static __inline void
1316ixl_rx_discard(struct rx_ring *rxr, int i)
1317{
1318	struct ixl_rx_buf	*rbuf;
1319
1320	rbuf = &rxr->buffers[i];
1321
1322        if (rbuf->fmp != NULL) {/* Partial chain ? */
1323		rbuf->fmp->m_flags |= M_PKTHDR;
1324                m_freem(rbuf->fmp);
1325                rbuf->fmp = NULL;
1326	}
1327
1328	/*
1329	** With advanced descriptors the writeback
1330	** clobbers the buffer addrs, so its easier
1331	** to just free the existing mbufs and take
1332	** the normal refresh path to get new buffers
1333	** and mapping.
1334	*/
1335	if (rbuf->m_head) {
1336		m_free(rbuf->m_head);
1337		rbuf->m_head = NULL;
1338	}
1339
1340	if (rbuf->m_pack) {
1341		m_free(rbuf->m_pack);
1342		rbuf->m_pack = NULL;
1343	}
1344
1345	return;
1346}
1347
1348
1349/*********************************************************************
1350 *
1351 *  This routine executes in interrupt context. It replenishes
1352 *  the mbufs in the descriptor and sends data which has been
1353 *  dma'ed into host memory to upper layer.
1354 *
1355 *  We loop at most count times if count is > 0, or until done if
1356 *  count < 0.
1357 *
1358 *  Return TRUE for more work, FALSE for all clean.
1359 *********************************************************************/
1360bool
1361ixl_rxeof(struct ixl_queue *que, int count)
1362{
1363	struct ixl_vsi		*vsi = que->vsi;
1364	struct rx_ring		*rxr = &que->rxr;
1365	struct ifnet		*ifp = vsi->ifp;
1366#if defined(INET6) || defined(INET)
1367	struct lro_ctrl		*lro = &rxr->lro;
1368	struct lro_entry	*queued;
1369#endif
1370	int			i, nextp, processed = 0;
1371	union i40e_rx_desc	*cur;
1372	struct ixl_rx_buf	*rbuf, *nbuf;
1373
1374
1375	IXL_RX_LOCK(rxr);
1376
1377
1378	for (i = rxr->next_check; count != 0;) {
1379		struct mbuf	*sendmp, *mh, *mp;
1380		u32		rsc, status, error;
1381		u16		hlen, plen, vtag;
1382		u64		qword;
1383		u8		ptype;
1384		bool		eop;
1385
1386		/* Sync the ring. */
1387		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1388		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1389
1390		cur = &rxr->base[i];
1391		qword = le64toh(cur->wb.qword1.status_error_len);
1392		status = (qword & I40E_RXD_QW1_STATUS_MASK)
1393		    >> I40E_RXD_QW1_STATUS_SHIFT;
1394		error = (qword & I40E_RXD_QW1_ERROR_MASK)
1395		    >> I40E_RXD_QW1_ERROR_SHIFT;
1396		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1397		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1398		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1399		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1400		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1401		    >> I40E_RXD_QW1_PTYPE_SHIFT;
1402
1403		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1404			++rxr->not_done;
1405			break;
1406		}
1407		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1408			break;
1409
1410		count--;
1411		sendmp = NULL;
1412		nbuf = NULL;
1413		rsc = 0;
1414		cur->wb.qword1.status_error_len = 0;
1415		rbuf = &rxr->buffers[i];
1416		mh = rbuf->m_head;
1417		mp = rbuf->m_pack;
1418		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1419		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1420			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1421		else
1422			vtag = 0;
1423
1424		/*
1425		** Make sure bad packets are discarded,
1426		** note that only EOP descriptor has valid
1427		** error results.
1428		*/
1429                if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1430			rxr->discarded++;
1431			ixl_rx_discard(rxr, i);
1432			goto next_desc;
1433		}
1434
1435		/* Prefetch the next buffer */
1436		if (!eop) {
1437			nextp = i + 1;
1438			if (nextp == que->num_desc)
1439				nextp = 0;
1440			nbuf = &rxr->buffers[nextp];
1441			prefetch(nbuf);
1442		}
1443
1444		/*
1445		** The header mbuf is ONLY used when header
1446		** split is enabled, otherwise we get normal
1447		** behavior, ie, both header and payload
1448		** are DMA'd into the payload buffer.
1449		**
1450		** Rather than using the fmp/lmp global pointers
1451		** we now keep the head of a packet chain in the
1452		** buffer struct and pass this along from one
1453		** descriptor to the next, until we get EOP.
1454		*/
1455		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1456			if (hlen > IXL_RX_HDR)
1457				hlen = IXL_RX_HDR;
1458			mh->m_len = hlen;
1459			mh->m_flags |= M_PKTHDR;
1460			mh->m_next = NULL;
1461			mh->m_pkthdr.len = mh->m_len;
1462			/* Null buf pointer so it is refreshed */
1463			rbuf->m_head = NULL;
1464			/*
1465			** Check the payload length, this
1466			** could be zero if its a small
1467			** packet.
1468			*/
1469			if (plen > 0) {
1470				mp->m_len = plen;
1471				mp->m_next = NULL;
1472				mp->m_flags &= ~M_PKTHDR;
1473				mh->m_next = mp;
1474				mh->m_pkthdr.len += mp->m_len;
1475				/* Null buf pointer so it is refreshed */
1476				rbuf->m_pack = NULL;
1477				rxr->split++;
1478			}
1479			/*
1480			** Now create the forward
1481			** chain so when complete
1482			** we wont have to.
1483			*/
1484                        if (eop == 0) {
1485				/* stash the chain head */
1486                                nbuf->fmp = mh;
1487				/* Make forward chain */
1488                                if (plen)
1489                                        mp->m_next = nbuf->m_pack;
1490                                else
1491                                        mh->m_next = nbuf->m_pack;
1492                        } else {
1493				/* Singlet, prepare to send */
1494                                sendmp = mh;
1495                                if (vtag) {
1496                                        sendmp->m_pkthdr.ether_vtag = vtag;
1497                                        sendmp->m_flags |= M_VLANTAG;
1498                                }
1499                        }
1500		} else {
1501			/*
1502			** Either no header split, or a
1503			** secondary piece of a fragmented
1504			** split packet.
1505			*/
1506			mp->m_len = plen;
1507			/*
1508			** See if there is a stored head
1509			** that determines what we are
1510			*/
1511			sendmp = rbuf->fmp;
1512			rbuf->m_pack = rbuf->fmp = NULL;
1513
1514			if (sendmp != NULL) /* secondary frag */
1515				sendmp->m_pkthdr.len += mp->m_len;
1516			else {
1517				/* first desc of a non-ps chain */
1518				sendmp = mp;
1519				sendmp->m_flags |= M_PKTHDR;
1520				sendmp->m_pkthdr.len = mp->m_len;
1521				if (vtag) {
1522					sendmp->m_pkthdr.ether_vtag = vtag;
1523					sendmp->m_flags |= M_VLANTAG;
1524				}
1525                        }
1526			/* Pass the head pointer on */
1527			if (eop == 0) {
1528				nbuf->fmp = sendmp;
1529				sendmp = NULL;
1530				mp->m_next = nbuf->m_pack;
1531			}
1532		}
1533		++processed;
1534		/* Sending this frame? */
1535		if (eop) {
1536			sendmp->m_pkthdr.rcvif = ifp;
1537			/* gather stats */
1538			rxr->rx_packets++;
1539			rxr->rx_bytes += sendmp->m_pkthdr.len;
1540			/* capture data for dynamic ITR adjustment */
1541			rxr->packets++;
1542			rxr->bytes += sendmp->m_pkthdr.len;
1543			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1544				ixl_rx_checksum(sendmp, status, error, ptype);
1545			sendmp->m_pkthdr.flowid = que->msix;
1546			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1547		}
1548next_desc:
1549		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1550		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1551
1552		/* Advance our pointers to the next descriptor. */
1553		if (++i == que->num_desc)
1554			i = 0;
1555
1556		/* Now send to the stack or do LRO */
1557		if (sendmp != NULL) {
1558			rxr->next_check = i;
1559			ixl_rx_input(rxr, ifp, sendmp, ptype);
1560			i = rxr->next_check;
1561		}
1562
1563               /* Every 8 descriptors we go to refresh mbufs */
1564		if (processed == 8) {
1565			ixl_refresh_mbufs(que, i);
1566			processed = 0;
1567		}
1568	}
1569
1570	/* Refresh any remaining buf structs */
1571	if (ixl_rx_unrefreshed(que))
1572		ixl_refresh_mbufs(que, i);
1573
1574	rxr->next_check = i;
1575
1576#if defined(INET6) || defined(INET)
1577	/*
1578	 * Flush any outstanding LRO work
1579	 */
1580	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1581		SLIST_REMOVE_HEAD(&lro->lro_active, next);
1582		tcp_lro_flush(lro, queued);
1583	}
1584#endif
1585
1586	IXL_RX_UNLOCK(rxr);
1587	return (FALSE);
1588}
1589
1590
1591/*********************************************************************
1592 *
1593 *  Verify that the hardware indicated that the checksum is valid.
1594 *  Inform the stack about the status of checksum so that stack
1595 *  doesn't spend time verifying the checksum.
1596 *
1597 *********************************************************************/
1598static void
1599ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1600{
1601	struct i40e_rx_ptype_decoded decoded;
1602
1603	decoded = decode_rx_desc_ptype(ptype);
1604
1605	/* Errors? */
1606 	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1607	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1608		mp->m_pkthdr.csum_flags = 0;
1609		return;
1610	}
1611
1612	/* IPv6 with extension headers likely have bad csum */
1613	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1614	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1615		if (status &
1616		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1617			mp->m_pkthdr.csum_flags = 0;
1618			return;
1619		}
1620
1621
1622	/* IP Checksum Good */
1623	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1624	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1625
1626	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1627		mp->m_pkthdr.csum_flags |=
1628		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1629		mp->m_pkthdr.csum_data |= htons(0xffff);
1630	}
1631	return;
1632}
1633
1634#if __FreeBSD_version >= 1100000
1635uint64_t
1636ixl_get_counter(if_t ifp, ift_counter cnt)
1637{
1638	struct ixl_vsi *vsi;
1639
1640	vsi = if_getsoftc(ifp);
1641
1642	switch (cnt) {
1643	case IFCOUNTER_IPACKETS:
1644		return (vsi->ipackets);
1645	case IFCOUNTER_IERRORS:
1646		return (vsi->ierrors);
1647	case IFCOUNTER_OPACKETS:
1648		return (vsi->opackets);
1649	case IFCOUNTER_OERRORS:
1650		return (vsi->oerrors);
1651	case IFCOUNTER_COLLISIONS:
1652		/* Collisions are by standard impossible in 40G/10G Ethernet */
1653		return (0);
1654	case IFCOUNTER_IBYTES:
1655		return (vsi->ibytes);
1656	case IFCOUNTER_OBYTES:
1657		return (vsi->obytes);
1658	case IFCOUNTER_IMCASTS:
1659		return (vsi->imcasts);
1660	case IFCOUNTER_OMCASTS:
1661		return (vsi->omcasts);
1662	case IFCOUNTER_IQDROPS:
1663		return (vsi->iqdrops);
1664	case IFCOUNTER_OQDROPS:
1665		return (vsi->oqdrops);
1666	case IFCOUNTER_NOPROTO:
1667		return (vsi->noproto);
1668	default:
1669		return (if_get_counter_default(ifp, cnt));
1670	}
1671}
1672#endif
1673
1674