cq.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *	  copyright notice, this list of conditions and the following
18 *	  disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *	  copyright notice, this list of conditions and the following
22 *	  disclaimer in the documentation and/or other materials
23 *	  provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/iw_cxgbe/cq.c 330897 2018-03-14 03:19:51Z eadler $");
36
37#include "opt_inet.h"
38
39#ifdef TCP_OFFLOAD
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/ktr.h>
44#include <sys/bus.h>
45#include <sys/lock.h>
46#include <sys/mutex.h>
47#include <sys/rwlock.h>
48#include <sys/socket.h>
49#include <sys/sbuf.h>
50
51#include "iw_cxgbe.h"
52#include "user.h"
53
54static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
55		      struct c4iw_dev_ucontext *uctx)
56{
57	struct adapter *sc = rdev->adap;
58	struct fw_ri_res_wr *res_wr;
59	struct fw_ri_res *res;
60	int wr_len;
61	struct c4iw_wr_wait wr_wait;
62	struct wrqe *wr;
63
64	wr_len = sizeof *res_wr + sizeof *res;
65	wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
66                if (wr == NULL)
67                        return (0);
68        res_wr = wrtod(wr);
69	memset(res_wr, 0, wr_len);
70	res_wr->op_nres = cpu_to_be32(
71			V_FW_WR_OP(FW_RI_RES_WR) |
72			V_FW_RI_RES_WR_NRES(1) |
73			F_FW_WR_COMPL);
74	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
75	res_wr->cookie = (unsigned long) &wr_wait;
76	res = res_wr->res;
77	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
78	res->u.cq.op = FW_RI_RES_OP_RESET;
79	res->u.cq.iqid = cpu_to_be32(cq->cqid);
80
81	c4iw_init_wr_wait(&wr_wait);
82
83	t4_wrq_tx(sc, wr);
84
85	c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
86
87	kfree(cq->sw_queue);
88	contigfree(cq->queue, cq->memsize, M_DEVBUF);
89	c4iw_put_cqid(rdev, cq->cqid, uctx);
90	return 0;
91}
92
93static int
94create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
95    struct c4iw_dev_ucontext *uctx)
96{
97	struct adapter *sc = rdev->adap;
98	struct fw_ri_res_wr *res_wr;
99	struct fw_ri_res *res;
100	int wr_len;
101	int user = (uctx != &rdev->uctx);
102	struct c4iw_wr_wait wr_wait;
103	int ret;
104	struct wrqe *wr;
105
106	cq->cqid = c4iw_get_cqid(rdev, uctx);
107	if (!cq->cqid) {
108		ret = -ENOMEM;
109		goto err1;
110	}
111
112	if (!user) {
113		cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
114		if (!cq->sw_queue) {
115			ret = -ENOMEM;
116			goto err2;
117		}
118	}
119
120	cq->queue = contigmalloc(cq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul,
121	    PAGE_SIZE, 0);
122        if (cq->queue)
123                cq->dma_addr = vtophys(cq->queue);
124        else {
125		ret = -ENOMEM;
126                goto err3;
127	}
128
129	pci_unmap_addr_set(cq, mapping, cq->dma_addr);
130	memset(cq->queue, 0, cq->memsize);
131
132	/* build fw_ri_res_wr */
133	wr_len = sizeof *res_wr + sizeof *res;
134
135	wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
136	if (wr == NULL)
137        	return (0);
138        res_wr = wrtod(wr);
139
140	memset(res_wr, 0, wr_len);
141	res_wr->op_nres = cpu_to_be32(
142			V_FW_WR_OP(FW_RI_RES_WR) |
143			V_FW_RI_RES_WR_NRES(1) |
144			F_FW_WR_COMPL);
145	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
146	res_wr->cookie = (unsigned long) &wr_wait;
147	res = res_wr->res;
148	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
149	res->u.cq.op = FW_RI_RES_OP_WRITE;
150	res->u.cq.iqid = cpu_to_be32(cq->cqid);
151	//Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same.
152	res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
153			V_FW_RI_RES_WR_IQANUS(0) |
154			V_FW_RI_RES_WR_IQANUD(1) |
155			F_FW_RI_RES_WR_IQANDST |
156			V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id));
157	res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
158			F_FW_RI_RES_WR_IQDROPRSS |
159			V_FW_RI_RES_WR_IQPCIECH(2) |
160			V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
161			F_FW_RI_RES_WR_IQO |
162			V_FW_RI_RES_WR_IQESIZE(1));
163	res->u.cq.iqsize = cpu_to_be16(cq->size);
164	res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
165
166	c4iw_init_wr_wait(&wr_wait);
167
168	t4_wrq_tx(sc, wr);
169
170	CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait);
171	ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
172	if (ret)
173		goto err4;
174
175	cq->gen = 1;
176	cq->gts = (void *)((unsigned long)rman_get_virtual(sc->regs_res) +
177	    sc->sge_gts_reg);
178	cq->rdev = rdev;
179
180	if (user) {
181		cq->ugts = (u64)((char*)rman_get_virtual(sc->udbs_res) +
182		    (cq->cqid << rdev->cqshift));
183		cq->ugts &= PAGE_MASK;
184		CTR5(KTR_IW_CXGBE,
185		    "%s: UGTS %p cqid %x cqshift %d page_mask %x", __func__,
186		    cq->ugts, cq->cqid, rdev->cqshift, PAGE_MASK);
187	}
188	return 0;
189err4:
190	contigfree(cq->queue, cq->memsize, M_DEVBUF);
191err3:
192	kfree(cq->sw_queue);
193err2:
194	c4iw_put_cqid(rdev, cq->cqid, uctx);
195err1:
196	return ret;
197}
198
199static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
200{
201	struct t4_cqe cqe;
202
203	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
204	    cq, cq->sw_cidx, cq->sw_pidx);
205	memset(&cqe, 0, sizeof(cqe));
206	cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
207				 V_CQE_OPCODE(FW_RI_SEND) |
208				 V_CQE_TYPE(0) |
209				 V_CQE_SWCQE(1) |
210				 V_CQE_QPID(wq->sq.qid));
211	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
212	cq->sw_queue[cq->sw_pidx] = cqe;
213	t4_swcq_produce(cq);
214}
215
216int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
217{
218	int flushed = 0;
219	int in_use = wq->rq.in_use - count;
220
221	BUG_ON(in_use < 0);
222	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u",
223	    __func__, wq, cq, wq->rq.in_use, count);
224	while (in_use--) {
225		insert_recv_cqe(wq, cq);
226		flushed++;
227	}
228	return flushed;
229}
230
231static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
232			  struct t4_swsqe *swcqe)
233{
234	struct t4_cqe cqe;
235
236	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
237	    cq, cq->sw_cidx, cq->sw_pidx);
238	memset(&cqe, 0, sizeof(cqe));
239	cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
240				 V_CQE_OPCODE(swcqe->opcode) |
241				 V_CQE_TYPE(1) |
242				 V_CQE_SWCQE(1) |
243				 V_CQE_QPID(wq->sq.qid));
244	CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
245	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
246	cq->sw_queue[cq->sw_pidx] = cqe;
247	t4_swcq_produce(cq);
248}
249
250static void advance_oldest_read(struct t4_wq *wq);
251
252int c4iw_flush_sq(struct c4iw_qp *qhp)
253{
254	int flushed = 0;
255	struct t4_wq *wq = &qhp->wq;
256	struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
257	struct t4_cq *cq = &chp->cq;
258	int idx;
259	struct t4_swsqe *swsqe;
260
261	if (wq->sq.flush_cidx == -1)
262		wq->sq.flush_cidx = wq->sq.cidx;
263	idx = wq->sq.flush_cidx;
264	BUG_ON(idx >= wq->sq.size);
265	while (idx != wq->sq.pidx) {
266		swsqe = &wq->sq.sw_sq[idx];
267		BUG_ON(swsqe->flushed);
268		swsqe->flushed = 1;
269		insert_sq_cqe(wq, cq, swsqe);
270		if (wq->sq.oldest_read == swsqe) {
271			BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
272			advance_oldest_read(wq);
273		}
274		flushed++;
275		if (++idx == wq->sq.size)
276			idx = 0;
277	}
278	wq->sq.flush_cidx += flushed;
279	if (wq->sq.flush_cidx >= wq->sq.size)
280        	wq->sq.flush_cidx -= wq->sq.size;
281	return flushed;
282}
283
284static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
285{
286	struct t4_swsqe *swsqe;
287	int cidx;
288
289	if (wq->sq.flush_cidx == -1)
290		wq->sq.flush_cidx = wq->sq.cidx;
291	cidx = wq->sq.flush_cidx;
292	BUG_ON(cidx > wq->sq.size);
293
294	while (cidx != wq->sq.pidx) {
295		swsqe = &wq->sq.sw_sq[cidx];
296		if (!swsqe->signaled) {
297			if (++cidx == wq->sq.size)
298				cidx = 0;
299		} else if (swsqe->complete) {
300
301			BUG_ON(swsqe->flushed);
302
303			/*
304			 * Insert this completed cqe into the swcq.
305			 */
306			CTR3(KTR_IW_CXGBE,
307				"%s moving cqe into swcq sq idx %u cq idx %u\n",
308				__func__, cidx, cq->sw_pidx);
309			swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
310			cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
311			t4_swcq_produce(cq);
312			swsqe->flushed = 1;
313			if (++cidx == wq->sq.size)
314				cidx = 0;
315			wq->sq.flush_cidx = cidx;
316		} else
317			break;
318	}
319}
320
321static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
322				struct t4_cqe *read_cqe)
323{
324	read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
325	read_cqe->len = htonl(wq->sq.oldest_read->read_len);
326	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
327				 V_CQE_SWCQE(SW_CQE(hw_cqe)) |
328				 V_CQE_OPCODE(FW_RI_READ_REQ) |
329				 V_CQE_TYPE(1));
330	read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
331}
332
333static void advance_oldest_read(struct t4_wq *wq)
334{
335
336	u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
337
338	if (rptr == wq->sq.size)
339		rptr = 0;
340	while (rptr != wq->sq.pidx) {
341		wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
342
343		if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
344			return;
345		if (++rptr == wq->sq.size)
346			rptr = 0;
347	}
348	wq->sq.oldest_read = NULL;
349}
350
351/*
352 * Move all CQEs from the HWCQ into the SWCQ.
353 * Deal with out-of-order and/or completions that complete
354 * prior unsignalled WRs.
355 */
356void c4iw_flush_hw_cq(struct c4iw_cq *chp)
357{
358	struct t4_cqe *hw_cqe, *swcqe, read_cqe;
359	struct c4iw_qp *qhp;
360	struct t4_swsqe *swsqe;
361	int ret;
362
363	CTR3(KTR_IW_CXGBE, "%s c4iw_cq %p cqid 0x%x", __func__, chp,
364	    chp->cq.cqid);
365	ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
366
367	/*
368	 * This logic is similar to poll_cq(), but not quite the same
369	 * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but
370	 * also do any translation magic that poll_cq() normally does.
371	 */
372	while (!ret) {
373		qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
374
375		/*
376		 * drop CQEs with no associated QP
377		 */
378		if (qhp == NULL)
379			goto next_cqe;
380
381		if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
382			goto next_cqe;
383
384		if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
385
386			/*
387			 * If we have reached here because of async
388			 * event or other error, and have egress error
389			 * then drop
390			 */
391			if (CQE_TYPE(hw_cqe) == 1) {
392				goto next_cqe;
393			}
394
395			/*
396			 * drop peer2peer RTR reads.
397			 */
398			if (CQE_WRID_STAG(hw_cqe) == 1)
399				goto next_cqe;
400
401			/*
402			 * Eat completions for unsignaled read WRs.
403			 */
404			if (!qhp->wq.sq.oldest_read->signaled) {
405				advance_oldest_read(&qhp->wq);
406				goto next_cqe;
407			}
408
409			/*
410			 * Don't write to the HWCQ, create a new read req CQE
411			 * in local memory and move it into the swcq.
412			 */
413			create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
414			hw_cqe = &read_cqe;
415			advance_oldest_read(&qhp->wq);
416		}
417
418		/* if its a SQ completion, then do the magic to move all the
419		 * unsignaled and now in-order completions into the swcq.
420		 */
421		if (SQ_TYPE(hw_cqe)) {
422			swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
423			swsqe->cqe = *hw_cqe;
424			swsqe->complete = 1;
425			flush_completed_wrs(&qhp->wq, &chp->cq);
426		} else {
427			swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
428			*swcqe = *hw_cqe;
429		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
430			t4_swcq_produce(&chp->cq);
431		}
432next_cqe:
433		t4_hwcq_consume(&chp->cq);
434		ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
435	}
436}
437
438static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
439{
440	if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
441		return 0;
442
443	if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
444		return 0;
445
446	if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
447		return 0;
448
449	if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
450		return 0;
451	return 1;
452}
453
454void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
455{
456	struct t4_cqe *cqe;
457	u32 ptr;
458
459	*count = 0;
460	CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
461	ptr = cq->sw_cidx;
462	while (ptr != cq->sw_pidx) {
463		cqe = &cq->sw_queue[ptr];
464		if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
465		    (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
466			(*count)++;
467		if (++ptr == cq->size)
468			ptr = 0;
469	}
470	CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
471}
472
473/*
474 * poll_cq
475 *
476 * Caller must:
477 *     check the validity of the first CQE,
478 *     supply the wq assicated with the qpid.
479 *
480 * credit: cq credit to return to sge.
481 * cqe_flushed: 1 iff the CQE is flushed.
482 * cqe: copy of the polled CQE.
483 *
484 * return value:
485 *    0		    CQE returned ok.
486 *    -EAGAIN       CQE skipped, try again.
487 *    -EOVERFLOW    CQ overflow detected.
488 */
489static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
490		   u8 *cqe_flushed, u64 *cookie, u32 *credit)
491{
492	int ret = 0;
493	struct t4_cqe *hw_cqe, read_cqe;
494
495	*cqe_flushed = 0;
496	*credit = 0;
497	ret = t4_next_cqe(cq, &hw_cqe);
498	if (ret)
499		return ret;
500
501	CTR6(KTR_IW_CXGBE,
502	    "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__,
503	    CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe),
504	    CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe));
505	CTR5(KTR_IW_CXGBE,
506	    "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
507	    __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
508	    CQE_WRID_LOW(hw_cqe));
509
510	/*
511	 * skip cqe's not affiliated with a QP.
512	 */
513	if (wq == NULL) {
514		ret = -EAGAIN;
515		goto skip_cqe;
516	}
517
518	/*
519	* skip hw cqe's if the wq is flushed.
520	*/
521	if (wq->flushed && !SW_CQE(hw_cqe)) {
522		ret = -EAGAIN;
523		goto skip_cqe;
524	}
525
526	/*
527	 * skip TERMINATE cqes...
528	 */
529	if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
530		ret = -EAGAIN;
531		goto skip_cqe;
532	}
533
534	/*
535	 * Special cqe for drain WR completions...
536	 */
537	if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
538		*cookie = CQE_DRAIN_COOKIE(hw_cqe);
539		*cqe = *hw_cqe;
540		goto skip_cqe;
541	}
542
543	/*
544	 * Gotta tweak READ completions:
545	 *	1) the cqe doesn't contain the sq_wptr from the wr.
546	 *	2) opcode not reflected from the wr.
547	 *	3) read_len not reflected from the wr.
548	 *	4) cq_type is RQ_TYPE not SQ_TYPE.
549	 */
550	if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
551
552		/* If we have reached here because of async
553		 * event or other error, and have egress error
554		 * then drop
555		 */
556		if (CQE_TYPE(hw_cqe) == 1) {
557			if (CQE_STATUS(hw_cqe))
558				t4_set_wq_in_error(wq);
559			ret = -EAGAIN;
560			goto skip_cqe;
561		}
562
563		/* If this is an unsolicited read response, then the read
564		 * was generated by the kernel driver as part of peer-2-peer
565		 * connection setup.  So ignore the completion.
566		 */
567		if (CQE_WRID_STAG(hw_cqe) == 1) {
568			if (CQE_STATUS(hw_cqe))
569				t4_set_wq_in_error(wq);
570			ret = -EAGAIN;
571			goto skip_cqe;
572		}
573
574		/*
575		 * Eat completions for unsignaled read WRs.
576		 */
577		if (!wq->sq.oldest_read->signaled) {
578			advance_oldest_read(wq);
579			ret = -EAGAIN;
580			goto skip_cqe;
581		}
582
583		/*
584		 * Don't write to the HWCQ, so create a new read req CQE
585		 * in local memory.
586		 */
587		create_read_req_cqe(wq, hw_cqe, &read_cqe);
588		hw_cqe = &read_cqe;
589		advance_oldest_read(wq);
590	}
591
592	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
593		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
594		t4_set_wq_in_error(wq);
595	}
596
597	/*
598	 * RECV completion.
599	 */
600	if (RQ_TYPE(hw_cqe)) {
601
602		/*
603		 * HW only validates 4 bits of MSN.  So we must validate that
604		 * the MSN in the SEND is the next expected MSN.  If its not,
605		 * then we complete this with T4_ERR_MSN and mark the wq in
606		 * error.
607		 */
608		BUG_ON(t4_rq_empty(wq));
609		if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
610			t4_set_wq_in_error(wq);
611			hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
612			goto proc_cqe;
613		}
614		goto proc_cqe;
615	}
616
617	/*
618	 * If we get here its a send completion.
619	 *
620	 * Handle out of order completion. These get stuffed
621	 * in the SW SQ. Then the SW SQ is walked to move any
622	 * now in-order completions into the SW CQ.  This handles
623	 * 2 cases:
624	 *	1) reaping unsignaled WRs when the first subsequent
625	 *	   signaled WR is completed.
626	 *	2) out of order read completions.
627	 */
628	if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
629		struct t4_swsqe *swsqe;
630
631		CTR2(KTR_IW_CXGBE,
632		    "%s out of order completion going in sw_sq at idx %u",
633		    __func__, CQE_WRID_SQ_IDX(hw_cqe));
634		swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
635		swsqe->cqe = *hw_cqe;
636		swsqe->complete = 1;
637		ret = -EAGAIN;
638		goto flush_wq;
639	}
640
641proc_cqe:
642	*cqe = *hw_cqe;
643
644	/*
645	 * Reap the associated WR(s) that are freed up with this
646	 * completion.
647	 */
648	if (SQ_TYPE(hw_cqe)) {
649		int idx = CQE_WRID_SQ_IDX(hw_cqe);
650		BUG_ON(idx >= wq->sq.size);
651
652		/*
653		* Account for any unsignaled completions completed by
654		* this signaled completion.  In this case, cidx points
655		* to the first unsignaled one, and idx points to the
656		* signaled one.  So adjust in_use based on this delta.
657		* if this is not completing any unsigned wrs, then the
658		* delta will be 0. Handle wrapping also!
659		*/
660		if (idx < wq->sq.cidx)
661			wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
662		else
663			wq->sq.in_use -= idx - wq->sq.cidx;
664		BUG_ON(wq->sq.in_use <= 0 || wq->sq.in_use >= wq->sq.size);
665
666		wq->sq.cidx = (uint16_t)idx;
667		CTR2(KTR_IW_CXGBE, "%s completing sq idx %u\n",
668				__func__, wq->sq.cidx);
669		*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
670		t4_sq_consume(wq);
671	} else {
672		CTR2(KTR_IW_CXGBE, "%s completing rq idx %u",
673		     __func__, wq->rq.cidx);
674		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
675		BUG_ON(t4_rq_empty(wq));
676		t4_rq_consume(wq);
677		goto skip_cqe;
678	}
679
680flush_wq:
681	/*
682	 * Flush any completed cqes that are now in-order.
683	 */
684	flush_completed_wrs(wq, cq);
685
686skip_cqe:
687	if (SW_CQE(hw_cqe)) {
688		CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u",
689		     __func__, cq, cq->cqid, cq->sw_cidx);
690		t4_swcq_consume(cq);
691	} else {
692		CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u",
693		     __func__, cq, cq->cqid, cq->cidx);
694		t4_hwcq_consume(cq);
695	}
696	return ret;
697}
698
699/*
700 * Get one cq entry from c4iw and map it to openib.
701 *
702 * Returns:
703 *	0			cqe returned
704 *	-ENODATA		EMPTY;
705 *	-EAGAIN			caller must try again
706 *	any other -errno	fatal error
707 */
708static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
709{
710	struct c4iw_qp *qhp = NULL;
711	struct t4_cqe cqe = {0, 0}, *rd_cqe;
712	struct t4_wq *wq;
713	u32 credit = 0;
714	u8 cqe_flushed;
715	u64 cookie = 0;
716	int ret;
717
718	ret = t4_next_cqe(&chp->cq, &rd_cqe);
719
720	if (ret)
721		return ret;
722
723	qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
724	if (!qhp)
725		wq = NULL;
726	else {
727		spin_lock(&qhp->lock);
728		wq = &(qhp->wq);
729	}
730	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
731	if (ret)
732		goto out;
733
734	wc->wr_id = cookie;
735	wc->qp = &qhp->ibqp;
736	wc->vendor_err = CQE_STATUS(&cqe);
737	wc->wc_flags = 0;
738
739	CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x",
740	    __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
741	    CQE_STATUS(&cqe));
742	CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx",
743	    __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
744	    (unsigned long long)cookie);
745
746	if (CQE_TYPE(&cqe) == 0) {
747		if (!CQE_STATUS(&cqe))
748			wc->byte_len = CQE_LEN(&cqe);
749		else
750			wc->byte_len = 0;
751		wc->opcode = IB_WC_RECV;
752		if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
753		    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
754			wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
755			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
756		}
757	} else {
758		switch (CQE_OPCODE(&cqe)) {
759		case FW_RI_RDMA_WRITE:
760			wc->opcode = IB_WC_RDMA_WRITE;
761			break;
762		case FW_RI_READ_REQ:
763			wc->opcode = IB_WC_RDMA_READ;
764			wc->byte_len = CQE_LEN(&cqe);
765			break;
766		case FW_RI_SEND_WITH_INV:
767		case FW_RI_SEND_WITH_SE_INV:
768			wc->opcode = IB_WC_SEND;
769			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
770			break;
771		case FW_RI_SEND:
772		case FW_RI_SEND_WITH_SE:
773			wc->opcode = IB_WC_SEND;
774			break;
775		case FW_RI_BIND_MW:
776			wc->opcode = IB_WC_BIND_MW;
777			break;
778
779		case FW_RI_LOCAL_INV:
780			wc->opcode = IB_WC_LOCAL_INV;
781			break;
782		case FW_RI_FAST_REGISTER:
783			wc->opcode = IB_WC_FAST_REG_MR;
784			break;
785		case C4IW_DRAIN_OPCODE:
786			wc->opcode = IB_WC_SEND;
787			break;
788		default:
789			printf("Unexpected opcode %d "
790			       "in the CQE received for QPID = 0x%0x\n",
791			       CQE_OPCODE(&cqe), CQE_QPID(&cqe));
792			ret = -EINVAL;
793			goto out;
794		}
795	}
796
797	if (cqe_flushed)
798		wc->status = IB_WC_WR_FLUSH_ERR;
799	else {
800
801		switch (CQE_STATUS(&cqe)) {
802		case T4_ERR_SUCCESS:
803			wc->status = IB_WC_SUCCESS;
804			break;
805		case T4_ERR_STAG:
806			wc->status = IB_WC_LOC_ACCESS_ERR;
807			break;
808		case T4_ERR_PDID:
809			wc->status = IB_WC_LOC_PROT_ERR;
810			break;
811		case T4_ERR_QPID:
812		case T4_ERR_ACCESS:
813			wc->status = IB_WC_LOC_ACCESS_ERR;
814			break;
815		case T4_ERR_WRAP:
816			wc->status = IB_WC_GENERAL_ERR;
817			break;
818		case T4_ERR_BOUND:
819			wc->status = IB_WC_LOC_LEN_ERR;
820			break;
821		case T4_ERR_INVALIDATE_SHARED_MR:
822		case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
823			wc->status = IB_WC_MW_BIND_ERR;
824			break;
825		case T4_ERR_CRC:
826		case T4_ERR_MARKER:
827		case T4_ERR_PDU_LEN_ERR:
828		case T4_ERR_OUT_OF_RQE:
829		case T4_ERR_DDP_VERSION:
830		case T4_ERR_RDMA_VERSION:
831		case T4_ERR_DDP_QUEUE_NUM:
832		case T4_ERR_MSN:
833		case T4_ERR_TBIT:
834		case T4_ERR_MO:
835		case T4_ERR_MSN_RANGE:
836		case T4_ERR_IRD_OVERFLOW:
837		case T4_ERR_OPCODE:
838		case T4_ERR_INTERNAL_ERR:
839			wc->status = IB_WC_FATAL_ERR;
840			break;
841		case T4_ERR_SWFLUSH:
842			wc->status = IB_WC_WR_FLUSH_ERR;
843			break;
844		default:
845			printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n",
846			       CQE_STATUS(&cqe), CQE_QPID(&cqe));
847			wc->status = IB_WC_FATAL_ERR;
848		}
849	}
850out:
851	if (wq)
852		spin_unlock(&qhp->lock);
853	return ret;
854}
855
856int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
857{
858	struct c4iw_cq *chp;
859	unsigned long flags;
860	int npolled;
861	int err = 0;
862
863	chp = to_c4iw_cq(ibcq);
864
865	spin_lock_irqsave(&chp->lock, flags);
866	for (npolled = 0; npolled < num_entries; ++npolled) {
867		do {
868			err = c4iw_poll_cq_one(chp, wc + npolled);
869		} while (err == -EAGAIN);
870		if (err)
871			break;
872	}
873	spin_unlock_irqrestore(&chp->lock, flags);
874	return !err || err == -ENODATA ? npolled : err;
875}
876
877int c4iw_destroy_cq(struct ib_cq *ib_cq)
878{
879	struct c4iw_cq *chp;
880	struct c4iw_ucontext *ucontext;
881
882	CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq);
883	chp = to_c4iw_cq(ib_cq);
884
885	remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
886	atomic_dec(&chp->refcnt);
887	wait_event(chp->wait, !atomic_read(&chp->refcnt));
888
889	ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
890				  : NULL;
891	destroy_cq(&chp->rhp->rdev, &chp->cq,
892		   ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
893	kfree(chp);
894	return 0;
895}
896
897struct ib_cq *
898c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
899    struct ib_ucontext *ib_context, struct ib_udata *udata)
900{
901	struct c4iw_dev *rhp;
902	struct c4iw_cq *chp;
903	struct c4iw_create_cq_resp uresp;
904	struct c4iw_ucontext *ucontext = NULL;
905	int ret;
906	size_t memsize, hwentries;
907	struct c4iw_mm_entry *mm, *mm2;
908	int entries = attr->cqe;
909
910	CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
911
912	rhp = to_c4iw_dev(ibdev);
913
914	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
915	if (!chp)
916		return ERR_PTR(-ENOMEM);
917
918	if (ib_context)
919		ucontext = to_c4iw_ucontext(ib_context);
920
921	/* account for the status page. */
922	entries++;
923
924	/* IQ needs one extra entry to differentiate full vs empty. */
925	entries++;
926
927	/*
928	 * entries must be multiple of 16 for HW.
929	 */
930	entries = roundup(entries, 16);
931
932	/*
933	 * Make actual HW queue 2x to avoid cidx_inc overflows.
934	 */
935	hwentries = entries * 2;
936
937	/*
938	 * Make HW queue at least 64 entries so GTS updates aren't too
939	 * frequent.
940	 */
941	if (hwentries < 64)
942		hwentries = 64;
943
944	memsize = hwentries * sizeof *chp->cq.queue;
945
946	/*
947	 * memsize must be a multiple of the page size if its a user cq.
948	 */
949	if (ucontext) {
950		memsize = roundup(memsize, PAGE_SIZE);
951		hwentries = memsize / sizeof *chp->cq.queue;
952		while (hwentries > T4_MAX_IQ_SIZE) {
953			memsize -= PAGE_SIZE;
954			hwentries = memsize / sizeof *chp->cq.queue;
955		}
956	}
957	chp->cq.size = hwentries;
958	chp->cq.memsize = memsize;
959
960	ret = create_cq(&rhp->rdev, &chp->cq,
961			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
962	if (ret)
963		goto err1;
964
965	chp->rhp = rhp;
966	chp->cq.size--;				/* status page */
967	chp->ibcq.cqe = entries - 2;
968	spin_lock_init(&chp->lock);
969	spin_lock_init(&chp->comp_handler_lock);
970	atomic_set(&chp->refcnt, 1);
971	init_waitqueue_head(&chp->wait);
972	ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
973	if (ret)
974		goto err2;
975
976	if (ucontext) {
977		mm = kmalloc(sizeof *mm, GFP_KERNEL);
978		if (!mm)
979			goto err3;
980		mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
981		if (!mm2)
982			goto err4;
983
984		memset(&uresp, 0, sizeof(uresp));
985		uresp.qid_mask = rhp->rdev.cqmask;
986		uresp.cqid = chp->cq.cqid;
987		uresp.size = chp->cq.size;
988		uresp.memsize = chp->cq.memsize;
989		spin_lock(&ucontext->mmap_lock);
990		uresp.key = ucontext->key;
991		ucontext->key += PAGE_SIZE;
992		uresp.gts_key = ucontext->key;
993		ucontext->key += PAGE_SIZE;
994		spin_unlock(&ucontext->mmap_lock);
995		ret = ib_copy_to_udata(udata, &uresp,
996					sizeof(uresp) - sizeof(uresp.reserved));
997		if (ret)
998			goto err5;
999
1000		mm->key = uresp.key;
1001		mm->addr = vtophys(chp->cq.queue);
1002		mm->len = chp->cq.memsize;
1003		insert_mmap(ucontext, mm);
1004
1005		mm2->key = uresp.gts_key;
1006		mm2->addr = chp->cq.ugts;
1007		mm2->len = PAGE_SIZE;
1008		insert_mmap(ucontext, mm2);
1009	}
1010	CTR6(KTR_IW_CXGBE,
1011	    "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
1012	    __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
1013	    (unsigned long long) chp->cq.dma_addr);
1014	return &chp->ibcq;
1015err5:
1016	kfree(mm2);
1017err4:
1018	kfree(mm);
1019err3:
1020	remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
1021err2:
1022	destroy_cq(&chp->rhp->rdev, &chp->cq,
1023		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
1024err1:
1025	kfree(chp);
1026	return ERR_PTR(ret);
1027}
1028
1029int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
1030{
1031	return -ENOSYS;
1032}
1033
1034int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
1035{
1036	struct c4iw_cq *chp;
1037	int ret;
1038	unsigned long flag;
1039
1040	chp = to_c4iw_cq(ibcq);
1041	spin_lock_irqsave(&chp->lock, flag);
1042	ret = t4_arm_cq(&chp->cq,
1043			(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
1044	spin_unlock_irqrestore(&chp->lock, flag);
1045	if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
1046		ret = 0;
1047	return ret;
1048}
1049#endif
1050