mlx5_en_rx.c revision 291184
1/*-
2 * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c 291184 2015-11-23 09:32:32Z hselasky $
26 */
27
28#include "en.h"
29#include <machine/in_cksum.h>
30
31static inline int
32mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq,
33    struct mlx5e_rx_wqe *wqe, u16 ix)
34{
35	bus_dma_segment_t segs[1];
36	struct mbuf *mb;
37	int nsegs;
38	int err;
39
40	if (rq->mbuf[ix].mbuf != NULL)
41		return (0);
42
43	mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
44	if (unlikely(!mb))
45		return (-ENOMEM);
46
47	/* set initial mbuf length */
48	mb->m_pkthdr.len = mb->m_len = rq->wqe_sz;
49
50	/* get IP header aligned */
51	m_adj(mb, MLX5E_NET_IP_ALIGN);
52
53	err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map,
54	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
55	if (err != 0)
56		goto err_free_mbuf;
57	if (unlikely(nsegs != 1)) {
58		bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map);
59		err = -ENOMEM;
60		goto err_free_mbuf;
61	}
62	wqe->data.addr = cpu_to_be64(segs[0].ds_addr);
63
64	rq->mbuf[ix].mbuf = mb;
65	rq->mbuf[ix].data = mb->m_data;
66
67	bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map,
68	    BUS_DMASYNC_PREREAD);
69	return (0);
70
71err_free_mbuf:
72	m_freem(mb);
73	return (err);
74}
75
76static void
77mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
78{
79	if (unlikely(rq->enabled == 0))
80		return;
81
82	while (!mlx5_wq_ll_is_full(&rq->wq)) {
83		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head);
84
85		if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head)))
86			break;
87
88		mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index));
89	}
90
91	/* ensure wqes are visible to device before updating doorbell record */
92	wmb();
93
94	mlx5_wq_ll_update_db_record(&rq->wq);
95}
96
97static void
98mlx5e_lro_update_hdr(struct mbuf *mb, struct mlx5_cqe64 *cqe)
99{
100	/* TODO: consider vlans, ip options, ... */
101	struct ether_header *eh;
102	uint16_t eh_type;
103	struct ip6_hdr *ip6 = NULL;
104	struct ip *ip4 = NULL;
105	struct tcphdr *th;
106	uint32_t *ts_ptr;
107
108	eh = mtod(mb, struct ether_header *);
109	eh_type = ntohs(eh->ether_type);
110
111	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
112	int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) ||
113	    (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
114
115	/* TODO: consider vlan */
116	u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN;
117
118	switch (eh_type) {
119	case ETHERTYPE_IP:
120		ip4 = (struct ip *)(eh + 1);
121		th = (struct tcphdr *)(ip4 + 1);
122		break;
123	case ETHERTYPE_IPV6:
124		ip6 = (struct ip6_hdr *)(eh + 1);
125		th = (struct tcphdr *)(ip6 + 1);
126		break;
127	default:
128		return;
129	}
130
131	ts_ptr = (uint32_t *)(th + 1);
132
133	if (get_cqe_lro_tcppsh(cqe))
134		th->th_flags |= TH_PUSH;
135
136	if (tcp_ack) {
137		th->th_flags |= TH_ACK;
138		th->th_ack = cqe->lro_ack_seq_num;
139		th->th_win = cqe->lro_tcp_win;
140
141		/*
142		 * FreeBSD handles only 32bit aligned timestamp right after
143		 * the TCP hdr
144		 * +--------+--------+--------+--------+
145		 * |   NOP  |  NOP   |  TSopt |   10   |
146		 * +--------+--------+--------+--------+
147		 * |          TSval   timestamp        |
148		 * +--------+--------+--------+--------+
149		 * |          TSecr   timestamp        |
150		 * +--------+--------+--------+--------+
151		 */
152		if (get_cqe_lro_timestamp_valid(cqe) &&
153		    (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 |
154		    TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 |
155		    TCPOLEN_TIMESTAMP))) {
156			/*
157			 * cqe->timestamp is 64bit long.
158			 * [0-31] - timestamp.
159			 * [32-64] - timestamp echo replay.
160			 */
161			ts_ptr[1] = *(uint32_t *)&cqe->timestamp;
162			ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1);
163		}
164	}
165	if (ip4) {
166		ip4->ip_ttl = cqe->lro_min_ttl;
167		ip4->ip_len = cpu_to_be16(tot_len);
168		ip4->ip_sum = 0;
169		ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2);
170	} else {
171		ip6->ip6_hlim = cqe->lro_min_ttl;
172		ip6->ip6_plen = cpu_to_be16(tot_len -
173		    sizeof(struct ip6_hdr));
174	}
175	/* TODO: handle tcp checksum */
176}
177
178static inline void
179mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe,
180    struct mlx5e_rq *rq, struct mbuf *mb,
181    u32 cqe_bcnt)
182{
183	struct ifnet *ifp = rq->ifp;
184	int lro_num_seg;	/* HW LRO session aggregated packets counter */
185
186	lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
187	if (lro_num_seg > 1) {
188		mlx5e_lro_update_hdr(mb, cqe);
189		rq->stats.lro_packets++;
190		rq->stats.lro_bytes += cqe_bcnt;
191	}
192
193	mb->m_pkthdr.len = mb->m_len = cqe_bcnt;
194	/* check if a Toeplitz hash was computed */
195	if (cqe->rss_hash_type != 0)
196		mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result);
197	else
198		mb->m_pkthdr.flowid = rq->ix;
199
200	M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
201	mb->m_pkthdr.rcvif = ifp;
202
203	if (likely(ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) &&
204	    ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) ==
205	    (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) {
206		mb->m_pkthdr.csum_flags =
207		    CSUM_IP_CHECKED | CSUM_IP_VALID |
208		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
209		mb->m_pkthdr.csum_data = htons(0xffff);
210	} else {
211		rq->stats.csum_none++;
212	}
213
214	if (cqe_has_vlan(cqe)) {
215		mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info);
216		mb->m_flags |= M_VLANTAG;
217	}
218}
219
220static int
221mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
222{
223#ifndef HAVE_TURBO_LRO
224	struct lro_entry *queued;
225#endif
226	int i;
227
228	for (i = 0; i < budget; i++) {
229		struct mlx5e_rx_wqe *wqe;
230		struct mlx5_cqe64 *cqe;
231		struct mbuf *mb;
232		__be16 wqe_counter_be;
233		u16 wqe_counter;
234		u32 byte_cnt;
235
236		cqe = mlx5e_get_cqe(&rq->cq);
237		if (!cqe)
238			break;
239
240		wqe_counter_be = cqe->wqe_counter;
241		wqe_counter = be16_to_cpu(wqe_counter_be);
242		wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
243		byte_cnt = be32_to_cpu(cqe->byte_cnt);
244
245		bus_dmamap_sync(rq->dma_tag,
246		    rq->mbuf[wqe_counter].dma_map,
247		    BUS_DMASYNC_POSTREAD);
248
249		if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
250			rq->stats.wqe_err++;
251			goto wq_ll_pop;
252		}
253
254		if (MHLEN >= byte_cnt &&
255		    (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) {
256			bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t),
257			    byte_cnt);
258		} else {
259			mb = rq->mbuf[wqe_counter].mbuf;
260			rq->mbuf[wqe_counter].mbuf = NULL;	/* safety clear */
261
262			bus_dmamap_unload(rq->dma_tag,
263			    rq->mbuf[wqe_counter].dma_map);
264		}
265
266		mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt);
267		rq->stats.packets++;
268#ifdef HAVE_TURBO_LRO
269		if (mb->m_pkthdr.csum_flags == 0 ||
270		    (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
271		    rq->lro.mbuf == NULL) {
272			/* normal input */
273			rq->ifp->if_input(rq->ifp, mb);
274		} else {
275			tcp_tlro_rx(&rq->lro, mb);
276		}
277#else
278		if (mb->m_pkthdr.csum_flags == 0 ||
279		    (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
280		    rq->lro.lro_cnt == 0 ||
281		    tcp_lro_rx(&rq->lro, mb, 0) != 0) {
282			rq->ifp->if_input(rq->ifp, mb);
283		}
284#endif
285wq_ll_pop:
286		mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
287		    &wqe->next.next_wqe_index);
288	}
289
290	mlx5_cqwq_update_db_record(&rq->cq.wq);
291
292	/* ensure cq space is freed before enabling more cqes */
293	wmb();
294#ifndef HAVE_TURBO_LRO
295	while ((queued = SLIST_FIRST(&rq->lro.lro_active)) != NULL) {
296		SLIST_REMOVE_HEAD(&rq->lro.lro_active, next);
297		tcp_lro_flush(&rq->lro, queued);
298	}
299#endif
300	return (i);
301}
302
303void
304mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq)
305{
306	struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq);
307	int i = 0;
308
309#ifdef HAVE_PER_CQ_EVENT_PACKET
310	struct mbuf *mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
311
312	if (mb != NULL) {
313		/* this code is used for debugging purpose only */
314		mb->m_pkthdr.len = mb->m_len = 15;
315		memset(mb->m_data, 255, 14);
316		mb->m_data[14] = rq->ix;
317		mb->m_pkthdr.rcvif = rq->ifp;
318		rq->ifp->if_input(rq->ifp, mb);
319	}
320#endif
321
322	mtx_lock(&rq->mtx);
323
324	/*
325	 * Polling the entire CQ without posting new WQEs results in
326	 * lack of receive WQEs during heavy traffic scenarios.
327	 */
328	while (1) {
329		if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) !=
330		    MLX5E_RX_BUDGET_MAX)
331			break;
332		i += MLX5E_RX_BUDGET_MAX;
333		if (i >= MLX5E_BUDGET_MAX)
334			break;
335		mlx5e_post_rx_wqes(rq);
336	}
337	mlx5e_post_rx_wqes(rq);
338	mlx5e_cq_arm(&rq->cq);
339#ifdef HAVE_TURBO_LRO
340	tcp_tlro_flush(&rq->lro, 1);
341#endif
342	mtx_unlock(&rq->mtx);
343}
344