1/******************************************************************************
2
3  Copyright (c) 2001-2017, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33
34#ifndef IXGBE_STANDALONE_BUILD
35#include "opt_inet.h"
36#include "opt_inet6.h"
37#include "opt_rss.h"
38#endif
39
40#include "ixgbe.h"
41
42/************************************************************************
43 * Local Function prototypes
44 ************************************************************************/
45static int ixgbe_isc_txd_encap(void *, if_pkt_info_t);
46static void ixgbe_isc_txd_flush(void *, uint16_t, qidx_t);
47static int ixgbe_isc_txd_credits_update(void *, uint16_t, bool);
48
49static void ixgbe_isc_rxd_refill(void *, if_rxd_update_t);
50static void ixgbe_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
51static int ixgbe_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
52static int ixgbe_isc_rxd_pkt_get(void *, if_rxd_info_t);
53
54static void ixgbe_rx_checksum(uint32_t, if_rxd_info_t, uint32_t);
55static int ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *,
56    if_pkt_info_t);
57
58extern void ixgbe_if_enable_intr(if_ctx_t ctx);
59static int ixgbe_determine_rsstype(uint16_t pkt_info);
60
61struct if_txrx ixgbe_txrx  = {
62	.ift_txd_encap = ixgbe_isc_txd_encap,
63	.ift_txd_flush = ixgbe_isc_txd_flush,
64	.ift_txd_credits_update = ixgbe_isc_txd_credits_update,
65	.ift_rxd_available = ixgbe_isc_rxd_available,
66	.ift_rxd_pkt_get = ixgbe_isc_rxd_pkt_get,
67	.ift_rxd_refill = ixgbe_isc_rxd_refill,
68	.ift_rxd_flush = ixgbe_isc_rxd_flush,
69	.ift_legacy_intr = NULL
70};
71
72/************************************************************************
73 * ixgbe_tx_ctx_setup
74 *
75 *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
76 *
77 ************************************************************************/
78static int
79ixgbe_tx_ctx_setup(struct ixgbe_adv_tx_context_desc *TXD, if_pkt_info_t pi)
80{
81	uint32_t vlan_macip_lens, type_tucmd_mlhl;
82	uint32_t olinfo_status, mss_l4len_idx, pktlen, offload;
83	u8  ehdrlen;
84
85	offload = true;
86	olinfo_status = mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0;
87	/* VLAN MACLEN IPLEN */
88	vlan_macip_lens |= (htole16(pi->ipi_vtag) << IXGBE_ADVTXD_VLAN_SHIFT);
89
90	/*
91	 * Some of our VF devices need a context descriptor for every
92	 * packet.  That means the ehdrlen needs to be non-zero in order
93	 * for the host driver not to flag a malicious event. The stack
94	 * will most likely populate this for all other reasons of why
95	 * this function was called.
96	 */
97	if (pi->ipi_ehdrlen == 0) {
98		ehdrlen = ETHER_HDR_LEN;
99		ehdrlen += (pi->ipi_vtag != 0) ? ETHER_VLAN_ENCAP_LEN : 0;
100	} else
101		ehdrlen = pi->ipi_ehdrlen;
102	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
103
104	pktlen = pi->ipi_len;
105	/* First check if TSO is to be used */
106	if (pi->ipi_csum_flags & CSUM_TSO) {
107		/* This is used in the transmit desc in encap */
108		pktlen = pi->ipi_len - ehdrlen - pi->ipi_ip_hlen - pi->ipi_tcp_hlen;
109		mss_l4len_idx |= (pi->ipi_tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
110		mss_l4len_idx |= (pi->ipi_tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
111	}
112
113	olinfo_status |= pktlen << IXGBE_ADVTXD_PAYLEN_SHIFT;
114
115	if (pi->ipi_flags & IPI_TX_IPV4) {
116		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
117		/* Tell transmit desc to also do IPv4 checksum. */
118		if (pi->ipi_csum_flags & (CSUM_IP|CSUM_TSO))
119			olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
120	} else if (pi->ipi_flags & IPI_TX_IPV6)
121		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
122	else
123		offload = false;
124
125	vlan_macip_lens |= pi->ipi_ip_hlen;
126
127	switch (pi->ipi_ipproto) {
128	case IPPROTO_TCP:
129		if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP | CSUM_TSO))
130			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
131		else
132			offload = false;
133		break;
134	case IPPROTO_UDP:
135		if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
136			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
137		else
138			offload = false;
139		break;
140	case IPPROTO_SCTP:
141		if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
142			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
143		else
144			offload = false;
145		break;
146	default:
147		offload = false;
148		break;
149	}
150	/* Insert L4 checksum into data descriptors */
151	if (offload)
152		olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
153
154	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
155
156	/* Now copy bits into descriptor */
157	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
158	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
159	TXD->seqnum_seed = htole32(0);
160	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
161
162	return (olinfo_status);
163} /* ixgbe_tx_ctx_setup */
164
165/************************************************************************
166 * ixgbe_isc_txd_encap
167 ************************************************************************/
168static int
169ixgbe_isc_txd_encap(void *arg, if_pkt_info_t pi)
170{
171	struct ixgbe_softc               *sc = arg;
172	if_softc_ctx_t                   scctx = sc->shared;
173	struct ix_tx_queue               *que = &sc->tx_queues[pi->ipi_qsidx];
174	struct tx_ring                   *txr = &que->txr;
175	int                              nsegs = pi->ipi_nsegs;
176	bus_dma_segment_t                *segs = pi->ipi_segs;
177	union ixgbe_adv_tx_desc          *txd = NULL;
178	struct ixgbe_adv_tx_context_desc *TXD;
179	int                              i, j, first, pidx_last;
180	uint32_t                         olinfo_status, cmd, flags;
181	qidx_t                           ntxd;
182
183	cmd =  (IXGBE_ADVTXD_DTYP_DATA |
184		IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
185
186	if (pi->ipi_mflags & M_VLANTAG)
187		cmd |= IXGBE_ADVTXD_DCMD_VLE;
188
189	i = first = pi->ipi_pidx;
190	flags = (pi->ipi_flags & IPI_TX_INTR) ? IXGBE_TXD_CMD_RS : 0;
191	ntxd = scctx->isc_ntxd[0];
192
193	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[first];
194	if ((pi->ipi_csum_flags & CSUM_OFFLOAD) ||
195	    (sc->feat_en & IXGBE_FEATURE_NEEDS_CTXD) ||
196	    pi->ipi_vtag) {
197		/*********************************************
198		 * Set up the appropriate offload context
199		 * this will consume the first descriptor
200		 *********************************************/
201		olinfo_status = ixgbe_tx_ctx_setup(TXD, pi);
202		if (pi->ipi_csum_flags & CSUM_TSO) {
203			cmd |= IXGBE_ADVTXD_DCMD_TSE;
204			++txr->tso_tx;
205		}
206
207		if (++i == scctx->isc_ntxd[0])
208			i = 0;
209	} else {
210		/* Indicate the whole packet as payload when not doing TSO */
211		olinfo_status = pi->ipi_len << IXGBE_ADVTXD_PAYLEN_SHIFT;
212	}
213
214	olinfo_status |= IXGBE_ADVTXD_CC;
215	pidx_last = 0;
216	for (j = 0; j < nsegs; j++) {
217		bus_size_t seglen;
218
219		txd = &txr->tx_base[i];
220		seglen = segs[j].ds_len;
221
222		txd->read.buffer_addr = htole64(segs[j].ds_addr);
223		txd->read.cmd_type_len = htole32(cmd | seglen);
224		txd->read.olinfo_status = htole32(olinfo_status);
225
226		pidx_last = i;
227		if (++i == scctx->isc_ntxd[0]) {
228			i = 0;
229		}
230	}
231
232	if (flags) {
233		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
234		txr->tx_rs_pidx = (txr->tx_rs_pidx + 1) & (ntxd - 1);
235	}
236	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | flags);
237
238	txr->bytes += pi->ipi_len;
239	pi->ipi_new_pidx = i;
240
241	++txr->total_packets;
242
243	return (0);
244} /* ixgbe_isc_txd_encap */
245
246/************************************************************************
247 * ixgbe_isc_txd_flush
248 ************************************************************************/
249static void
250ixgbe_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
251{
252	struct ixgbe_softc     *sc = arg;
253	struct ix_tx_queue *que = &sc->tx_queues[txqid];
254	struct tx_ring     *txr = &que->txr;
255
256	IXGBE_WRITE_REG(&sc->hw, txr->tail, pidx);
257} /* ixgbe_isc_txd_flush */
258
259/************************************************************************
260 * ixgbe_isc_txd_credits_update
261 ************************************************************************/
262static int
263ixgbe_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
264{
265	struct ixgbe_softc *sc = arg;
266	if_softc_ctx_t     scctx = sc->shared;
267	struct ix_tx_queue *que = &sc->tx_queues[txqid];
268	struct tx_ring     *txr = &que->txr;
269	qidx_t             processed = 0;
270	int                updated;
271	qidx_t             cur, prev, ntxd, rs_cidx;
272	int32_t            delta;
273	uint8_t            status;
274
275	rs_cidx = txr->tx_rs_cidx;
276	if (rs_cidx == txr->tx_rs_pidx)
277		return (0);
278
279	cur = txr->tx_rsq[rs_cidx];
280	status = txr->tx_base[cur].wb.status;
281	updated = !!(status & IXGBE_TXD_STAT_DD);
282
283	if (!updated)
284		return (0);
285
286	/* If clear is false just let caller know that there
287	 * are descriptors to reclaim */
288	if (!clear)
289		return (1);
290
291	prev = txr->tx_cidx_processed;
292	ntxd = scctx->isc_ntxd[0];
293	do {
294		MPASS(prev != cur);
295		delta = (int32_t)cur - (int32_t)prev;
296		if (delta < 0)
297			delta += ntxd;
298		MPASS(delta > 0);
299
300		processed += delta;
301		prev = cur;
302		rs_cidx = (rs_cidx + 1) & (ntxd - 1);
303		if (rs_cidx == txr->tx_rs_pidx)
304			break;
305
306		cur = txr->tx_rsq[rs_cidx];
307		status = txr->tx_base[cur].wb.status;
308	} while ((status & IXGBE_TXD_STAT_DD));
309
310	txr->tx_rs_cidx = rs_cidx;
311	txr->tx_cidx_processed = prev;
312
313	return (processed);
314} /* ixgbe_isc_txd_credits_update */
315
316/************************************************************************
317 * ixgbe_isc_rxd_refill
318 ************************************************************************/
319static void
320ixgbe_isc_rxd_refill(void *arg, if_rxd_update_t iru)
321{
322	struct ixgbe_softc *sc   = arg;
323	struct ix_rx_queue *que  = &sc->rx_queues[iru->iru_qsidx];
324	struct rx_ring *rxr      = &que->rxr;
325	uint64_t *paddrs;
326	int i;
327	uint32_t next_pidx, pidx;
328	uint16_t count;
329
330	paddrs = iru->iru_paddrs;
331	pidx = iru->iru_pidx;
332	count = iru->iru_count;
333
334	for (i = 0, next_pidx = pidx; i < count; i++) {
335		rxr->rx_base[next_pidx].read.pkt_addr = htole64(paddrs[i]);
336		if (++next_pidx == sc->shared->isc_nrxd[0])
337			next_pidx = 0;
338	}
339} /* ixgbe_isc_rxd_refill */
340
341/************************************************************************
342 * ixgbe_isc_rxd_flush
343 ************************************************************************/
344static void
345ixgbe_isc_rxd_flush(void *arg, uint16_t qsidx, uint8_t flidx __unused, qidx_t pidx)
346{
347	struct ixgbe_softc *sc  = arg;
348	struct ix_rx_queue *que = &sc->rx_queues[qsidx];
349	struct rx_ring     *rxr = &que->rxr;
350
351	IXGBE_WRITE_REG(&sc->hw, rxr->tail, pidx);
352} /* ixgbe_isc_rxd_flush */
353
354/************************************************************************
355 * ixgbe_isc_rxd_available
356 ************************************************************************/
357static int
358ixgbe_isc_rxd_available(void *arg, uint16_t qsidx, qidx_t pidx, qidx_t budget)
359{
360	struct ixgbe_softc      *sc = arg;
361	struct ix_rx_queue      *que = &sc->rx_queues[qsidx];
362	struct rx_ring          *rxr = &que->rxr;
363	union ixgbe_adv_rx_desc *rxd;
364	uint32_t                 staterr;
365	int                      cnt, i, nrxd;
366
367	nrxd = sc->shared->isc_nrxd[0];
368	for (cnt = 0, i = pidx; cnt < nrxd && cnt <= budget;) {
369		rxd = &rxr->rx_base[i];
370		staterr = le32toh(rxd->wb.upper.status_error);
371
372		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
373			break;
374		if (++i == nrxd)
375			i = 0;
376		if (staterr & IXGBE_RXD_STAT_EOP)
377			cnt++;
378	}
379	return (cnt);
380} /* ixgbe_isc_rxd_available */
381
382/************************************************************************
383 * ixgbe_isc_rxd_pkt_get
384 *
385 *   Routine sends data which has been dma'ed into host memory
386 *   to upper layer. Initialize ri structure.
387 *
388 *   Returns 0 upon success, errno on failure
389 ************************************************************************/
390
391static int
392ixgbe_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
393{
394	struct ixgbe_softc       *sc = arg;
395	if_softc_ctx_t		 scctx = sc->shared;
396	struct ix_rx_queue       *que = &sc->rx_queues[ri->iri_qsidx];
397	struct rx_ring           *rxr = &que->rxr;
398	union ixgbe_adv_rx_desc  *rxd;
399
400	uint16_t                  pkt_info, len, cidx, i;
401	uint32_t                  ptype;
402	uint32_t                  staterr = 0;
403	bool                      eop;
404
405	i = 0;
406	cidx = ri->iri_cidx;
407	do {
408		rxd = &rxr->rx_base[cidx];
409		staterr = le32toh(rxd->wb.upper.status_error);
410		pkt_info = le16toh(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
411
412		/* Error Checking then decrement count */
413		MPASS ((staterr & IXGBE_RXD_STAT_DD) != 0);
414
415		len = le16toh(rxd->wb.upper.length);
416		ptype = le32toh(rxd->wb.lower.lo_dword.data) &
417			IXGBE_RXDADV_PKTTYPE_MASK;
418
419		ri->iri_len += len;
420		rxr->bytes += len;
421
422		rxd->wb.upper.status_error = 0;
423		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
424
425		/* Make sure bad packets are discarded */
426		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
427			if (sc->feat_en & IXGBE_FEATURE_VF)
428				if_inc_counter(ri->iri_ifp, IFCOUNTER_IERRORS, 1);
429
430			rxr->rx_discarded++;
431			return (EBADMSG);
432		}
433		ri->iri_frags[i].irf_flid = 0;
434		ri->iri_frags[i].irf_idx = cidx;
435		ri->iri_frags[i].irf_len = len;
436		if (++cidx == sc->shared->isc_nrxd[0])
437			cidx = 0;
438		i++;
439		/* even a 16K packet shouldn't consume more than 8 clusters */
440		MPASS(i < 9);
441	} while (!eop);
442
443	rxr->rx_packets++;
444	rxr->packets++;
445	rxr->rx_bytes += ri->iri_len;
446
447	if ((scctx->isc_capenable & IFCAP_RXCSUM) != 0)
448		ixgbe_rx_checksum(staterr, ri,  ptype);
449
450	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
451	ri->iri_rsstype = ixgbe_determine_rsstype(pkt_info);
452	if ((sc->feat_en & IXGBE_FEATURE_RSS) == 0) {
453		if (ri->iri_rsstype == M_HASHTYPE_OPAQUE)
454			ri->iri_rsstype = M_HASHTYPE_NONE;
455		else
456			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
457	}
458	if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP)) {
459		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
460		ri->iri_flags |= M_VLANTAG;
461	}
462
463	ri->iri_nfrags = i;
464	return (0);
465} /* ixgbe_isc_rxd_pkt_get */
466
467/************************************************************************
468 * ixgbe_rx_checksum
469 *
470 *   Verify that the hardware indicated that the checksum is valid.
471 *   Inform the stack about the status of checksum so that stack
472 *   doesn't spend time verifying the checksum.
473 ************************************************************************/
474static void
475ixgbe_rx_checksum(uint32_t staterr, if_rxd_info_t ri, uint32_t ptype)
476{
477	uint16_t status = (uint16_t)staterr;
478	uint8_t errors = (uint8_t)(staterr >> 24);
479
480	/* If there is a layer 3 or 4 error we are done */
481	if (__predict_false(errors & (IXGBE_RXD_ERR_IPE | IXGBE_RXD_ERR_TCPE)))
482		return;
483
484	/* IP Checksum Good */
485	if (status & IXGBE_RXD_STAT_IPCS)
486		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
487
488	/* Valid L4E checksum */
489	if (__predict_true(status & IXGBE_RXD_STAT_L4CS)) {
490		/* SCTP header present. */
491		if (__predict_false((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
492		    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)) {
493			ri->iri_csum_flags |= CSUM_SCTP_VALID;
494		} else {
495			ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
496			ri->iri_csum_data = htons(0xffff);
497		}
498	}
499} /* ixgbe_rx_checksum */
500
501/************************************************************************
502 * ixgbe_determine_rsstype
503 *
504 *   Parse the packet type to determine the appropriate hash
505 ************************************************************************/
506static int
507ixgbe_determine_rsstype(uint16_t pkt_info)
508{
509	switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
510	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
511		return M_HASHTYPE_RSS_TCP_IPV4;
512	case IXGBE_RXDADV_RSSTYPE_IPV4:
513		return M_HASHTYPE_RSS_IPV4;
514	case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
515		return M_HASHTYPE_RSS_TCP_IPV6;
516	case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
517		return M_HASHTYPE_RSS_IPV6_EX;
518	case IXGBE_RXDADV_RSSTYPE_IPV6:
519		return M_HASHTYPE_RSS_IPV6;
520	case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
521		return M_HASHTYPE_RSS_TCP_IPV6_EX;
522	case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
523		return M_HASHTYPE_RSS_UDP_IPV4;
524	case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
525		return M_HASHTYPE_RSS_UDP_IPV6;
526	case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
527		return M_HASHTYPE_RSS_UDP_IPV6_EX;
528	default:
529		return M_HASHTYPE_OPAQUE;
530	}
531} /* ixgbe_determine_rsstype */
532