mlx5_ib_qp.c revision 323223
1/*-
2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c 323223 2017-09-06 15:33:23Z hselasky $
26 */
27
28#include <linux/module.h>
29#include <rdma/ib_cache.h>
30#include <rdma/ib_umem.h>
31#include "mlx5_ib.h"
32#include "user.h"
33#include <dev/mlx5/mlx5_core/transobj.h>
34#include <sys/priv.h>
35
36#define	IPV6_DEFAULT_HOPLIMIT 64
37
38
39static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
40			       const struct ib_qp_attr *attr, int attr_mask,
41			       enum ib_qp_state cur_state, enum ib_qp_state new_state);
42
43/* not supported currently */
44static int workqueue_signature;
45
46enum {
47	MLX5_IB_ACK_REQ_FREQ	= 8,
48};
49
50enum {
51	MLX5_IB_DEFAULT_SCHED_QUEUE	= 0x83,
52	MLX5_IB_DEFAULT_QP0_SCHED_QUEUE	= 0x3f,
53	MLX5_IB_LINK_TYPE_IB		= 0,
54	MLX5_IB_LINK_TYPE_ETH		= 1
55};
56
57enum {
58	MLX5_IB_SQ_STRIDE	= 6,
59	MLX5_IB_CACHE_LINE_SIZE	= 64,
60};
61
62enum {
63	MLX5_RQ_NUM_STATE	= MLX5_RQC_STATE_ERR + 1,
64	MLX5_SQ_NUM_STATE	= MLX5_SQC_STATE_ERR + 1,
65	MLX5_QP_STATE		= MLX5_QP_NUM_STATE + 1,
66	MLX5_QP_STATE_BAD	= MLX5_QP_STATE + 1,
67};
68
69static const u32 mlx5_ib_opcode[] = {
70	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
71	[IB_WR_SEND_WITH_IMM]			= MLX5_OPCODE_SEND_IMM,
72	[IB_WR_RDMA_WRITE]			= MLX5_OPCODE_RDMA_WRITE,
73	[IB_WR_RDMA_WRITE_WITH_IMM]		= MLX5_OPCODE_RDMA_WRITE_IMM,
74	[IB_WR_RDMA_READ]			= MLX5_OPCODE_RDMA_READ,
75	[IB_WR_ATOMIC_CMP_AND_SWP]		= MLX5_OPCODE_ATOMIC_CS,
76	[IB_WR_ATOMIC_FETCH_AND_ADD]		= MLX5_OPCODE_ATOMIC_FA,
77	[IB_WR_SEND_WITH_INV]			= MLX5_OPCODE_SEND_INVAL,
78	[IB_WR_LOCAL_INV]			= MLX5_OPCODE_UMR,
79	[IB_WR_FAST_REG_MR]			= MLX5_OPCODE_UMR,
80	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= MLX5_OPCODE_ATOMIC_MASKED_CS,
81	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= MLX5_OPCODE_ATOMIC_MASKED_FA,
82};
83
84struct umr_wr {
85	u64				virt_addr;
86	struct ib_pd		       *pd;
87	unsigned int			page_shift;
88	unsigned int			npages;
89	u32				length;
90	int				access_flags;
91	u32				mkey;
92};
93
94static int is_qp0(enum ib_qp_type qp_type)
95{
96	return qp_type == IB_QPT_SMI;
97}
98
99static int is_qp1(enum ib_qp_type qp_type)
100{
101	return qp_type == IB_QPT_GSI;
102}
103
104static int is_sqp(enum ib_qp_type qp_type)
105{
106	return is_qp0(qp_type) || is_qp1(qp_type);
107}
108
109static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
110{
111	return mlx5_buf_offset(&qp->buf, offset);
112}
113
114static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
115{
116	return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
117}
118
119void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
120{
121	return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
122}
123
124
125static int
126query_wqe_idx(struct mlx5_ib_qp *qp)
127{
128	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
129	struct mlx5_query_qp_mbox_out *outb;
130	struct mlx5_qp_context *context;
131	int ret;
132
133	outb = kzalloc(sizeof(*outb), GFP_KERNEL);
134	if (!outb)
135		return -ENOMEM;
136
137	context = &outb->ctx;
138
139	mutex_lock(&qp->mutex);
140	ret = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
141	if (ret)
142		goto out_free;
143
144	ret = be16_to_cpu(context->hw_sq_wqe_counter) & (qp->sq.wqe_cnt - 1);
145
146out_free:
147	mutex_unlock(&qp->mutex);
148	kfree(outb);
149
150	return ret;
151}
152
153static int mlx5_handle_sig_pipelining(struct mlx5_ib_qp *qp)
154{
155	int wqe_idx;
156
157	wqe_idx = query_wqe_idx(qp);
158	if (wqe_idx < 0) {
159		printf("mlx5_ib: ERR: ""Failed to query QP 0x%x wqe index\n", qp->mqp.qpn);
160		return wqe_idx;
161	}
162
163	if (qp->sq.swr_ctx[wqe_idx].sig_piped) {
164		struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
165		struct mlx5_wqe_ctrl_seg *cwqe;
166
167		cwqe = mlx5_get_send_wqe(qp, wqe_idx);
168		cwqe->opmod_idx_opcode = cpu_to_be32(be32_to_cpu(cwqe->opmod_idx_opcode) & 0xffffff00);
169		qp->sq.swr_ctx[wqe_idx].w_list.opcode |= MLX5_OPCODE_SIGNATURE_CANCELED;
170		mlx5_ib_dbg(dev, "Cancel QP 0x%x wqe_index 0x%x\n",
171			    qp->mqp.qpn, wqe_idx);
172	}
173
174	return 0;
175}
176
177static void mlx5_ib_sqd_work(struct work_struct *work)
178{
179	struct mlx5_ib_sqd *sqd;
180	struct mlx5_ib_qp *qp;
181	struct ib_qp_attr qp_attr;
182
183	sqd = container_of(work, struct mlx5_ib_sqd, work);
184	qp = sqd->qp;
185
186	if (mlx5_handle_sig_pipelining(qp))
187		goto out;
188
189	mutex_lock(&qp->mutex);
190	if (__mlx5_ib_modify_qp(&qp->ibqp, &qp_attr, 0, IB_QPS_SQD, IB_QPS_RTS))
191		printf("mlx5_ib: ERR: ""Failed to resume QP 0x%x\n", qp->mqp.qpn);
192	mutex_unlock(&qp->mutex);
193out:
194	kfree(sqd);
195}
196
197static void mlx5_ib_sigerr_sqd_event(struct mlx5_ib_qp *qp)
198{
199	struct mlx5_ib_sqd *sqd;
200
201	sqd = kzalloc(sizeof(*sqd), GFP_ATOMIC);
202	if (!sqd)
203		return;
204
205	sqd->qp = qp;
206	INIT_WORK(&sqd->work, mlx5_ib_sqd_work);
207	queue_work(mlx5_ib_wq, &sqd->work);
208}
209
210static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
211{
212	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
213	struct ib_event event;
214
215	if (type == MLX5_EVENT_TYPE_SQ_DRAINED &&
216	    to_mibqp(qp)->state != IB_QPS_SQD) {
217		mlx5_ib_sigerr_sqd_event(to_mibqp(qp));
218		return;
219	}
220
221	if (type == MLX5_EVENT_TYPE_PATH_MIG)
222		to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
223
224	if (ibqp->event_handler) {
225		event.device     = ibqp->device;
226		event.element.qp = ibqp;
227		switch (type) {
228		case MLX5_EVENT_TYPE_PATH_MIG:
229			event.event = IB_EVENT_PATH_MIG;
230			break;
231		case MLX5_EVENT_TYPE_COMM_EST:
232			event.event = IB_EVENT_COMM_EST;
233			break;
234		case MLX5_EVENT_TYPE_SQ_DRAINED:
235			event.event = IB_EVENT_SQ_DRAINED;
236			break;
237		case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
238			event.event = IB_EVENT_QP_LAST_WQE_REACHED;
239			break;
240		case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
241			event.event = IB_EVENT_QP_FATAL;
242			break;
243		case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
244			event.event = IB_EVENT_PATH_MIG_ERR;
245			break;
246		case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
247			event.event = IB_EVENT_QP_REQ_ERR;
248			break;
249		case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
250			event.event = IB_EVENT_QP_ACCESS_ERR;
251			break;
252		default:
253			printf("mlx5_ib: WARN: ""mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
254			return;
255		}
256
257		ibqp->event_handler(&event, ibqp->qp_context);
258	}
259}
260
261static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
262		       int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
263{
264	int wqe_size;
265	int wq_size;
266
267	/* Sanity check RQ size before proceeding */
268	if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)))
269		return -EINVAL;
270
271	if (!has_rq) {
272		qp->rq.max_gs = 0;
273		qp->rq.wqe_cnt = 0;
274		qp->rq.wqe_shift = 0;
275		cap->max_recv_wr = 0;
276		cap->max_recv_sge = 0;
277	} else {
278		if (ucmd) {
279			qp->rq.wqe_cnt = ucmd->rq_wqe_count;
280			qp->rq.wqe_shift = ucmd->rq_wqe_shift;
281			qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
282			qp->rq.max_post = qp->rq.wqe_cnt;
283		} else {
284			wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
285			wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
286			wqe_size = roundup_pow_of_two(wqe_size);
287			wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
288			wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
289			qp->rq.wqe_cnt = wq_size / wqe_size;
290			if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) {
291				mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
292					    wqe_size,
293					    MLX5_CAP_GEN(dev->mdev,
294							 max_wqe_sz_rq));
295				return -EINVAL;
296			}
297			qp->rq.wqe_shift = ilog2(wqe_size);
298			qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
299			qp->rq.max_post = qp->rq.wqe_cnt;
300		}
301	}
302
303	return 0;
304}
305
306static int sq_overhead(enum ib_qp_type qp_type)
307{
308	int size = 0;
309
310	switch (qp_type) {
311	case IB_QPT_XRC_INI:
312		size += sizeof(struct mlx5_wqe_xrc_seg);
313		/* fall through */
314	case IB_QPT_RC:
315		size += sizeof(struct mlx5_wqe_ctrl_seg) +
316			sizeof(struct mlx5_wqe_atomic_seg) +
317			sizeof(struct mlx5_wqe_raddr_seg) +
318			sizeof(struct mlx5_wqe_umr_ctrl_seg) +
319			sizeof(struct mlx5_mkey_seg);
320		break;
321
322	case IB_QPT_XRC_TGT:
323		return 0;
324
325	case IB_QPT_UC:
326		size += sizeof(struct mlx5_wqe_ctrl_seg) +
327			sizeof(struct mlx5_wqe_raddr_seg) +
328			sizeof(struct mlx5_wqe_umr_ctrl_seg) +
329			sizeof(struct mlx5_mkey_seg);
330		break;
331
332	case IB_QPT_UD:
333	case IB_QPT_SMI:
334	case IB_QPT_GSI:
335		size += sizeof(struct mlx5_wqe_ctrl_seg) +
336			sizeof(struct mlx5_wqe_datagram_seg);
337		break;
338
339	default:
340		return -EINVAL;
341	}
342
343	return size;
344}
345
346static int calc_send_wqe(struct ib_qp_init_attr *attr)
347{
348	int inl_size = 0;
349	int size;
350
351	size = sq_overhead(attr->qp_type);
352	if (size < 0)
353		return size;
354
355	if (attr->cap.max_inline_data) {
356		inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
357			attr->cap.max_inline_data;
358	}
359
360	size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
361	return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
362}
363
364static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
365{
366	int max_sge;
367
368	if (attr->qp_type == IB_QPT_RC)
369		max_sge = (min_t(int, wqe_size, 512) -
370			   sizeof(struct mlx5_wqe_ctrl_seg) -
371			   sizeof(struct mlx5_wqe_raddr_seg)) /
372			sizeof(struct mlx5_wqe_data_seg);
373	else if (attr->qp_type == IB_QPT_XRC_INI)
374		max_sge = (min_t(int, wqe_size, 512) -
375			   sizeof(struct mlx5_wqe_ctrl_seg) -
376			   sizeof(struct mlx5_wqe_xrc_seg) -
377			   sizeof(struct mlx5_wqe_raddr_seg)) /
378			sizeof(struct mlx5_wqe_data_seg);
379	else
380		max_sge = (wqe_size - sq_overhead(attr->qp_type)) /
381			sizeof(struct mlx5_wqe_data_seg);
382
383	return min_t(int, max_sge, wqe_size - sq_overhead(attr->qp_type) /
384		     sizeof(struct mlx5_wqe_data_seg));
385}
386
387static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
388			struct mlx5_ib_qp *qp)
389{
390	int wqe_size;
391	int wq_size;
392
393	if (!attr->cap.max_send_wr)
394		return 0;
395
396	wqe_size = calc_send_wqe(attr);
397	mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
398	if (wqe_size < 0)
399		return wqe_size;
400
401	if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
402		mlx5_ib_warn(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
403			     wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
404		return -EINVAL;
405	}
406
407	qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
408		sizeof(struct mlx5_wqe_inline_seg);
409	attr->cap.max_inline_data = qp->max_inline_data;
410
411	wq_size = roundup_pow_of_two(attr->cap.max_send_wr * (u64)wqe_size);
412	qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
413	if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
414		mlx5_ib_warn(dev, "wqe count(%d) exceeds limits(%d)\n",
415			     qp->sq.wqe_cnt,
416			     1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
417		return -ENOMEM;
418	}
419	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
420	qp->sq.max_gs = get_send_sge(attr, wqe_size);
421	if (qp->sq.max_gs < attr->cap.max_send_sge) {
422		mlx5_ib_warn(dev, "max sge(%d) exceeds limits(%d)\n",
423			     qp->sq.max_gs, attr->cap.max_send_sge);
424		return -ENOMEM;
425	}
426
427	attr->cap.max_send_sge = qp->sq.max_gs;
428	qp->sq.max_post = wq_size / wqe_size;
429	attr->cap.max_send_wr = qp->sq.max_post;
430
431	return wq_size;
432}
433
434static int set_user_buf_size(struct mlx5_ib_dev *dev,
435			    struct mlx5_ib_qp *qp,
436			    struct mlx5_ib_create_qp *ucmd,
437			    struct ib_qp_init_attr *attr)
438{
439	int desc_sz = 1 << qp->sq.wqe_shift;
440
441	if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
442		mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
443			     desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
444		return -EINVAL;
445	}
446
447	if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
448		mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
449			     ucmd->sq_wqe_count, ucmd->sq_wqe_count);
450		return -EINVAL;
451	}
452
453	qp->sq.wqe_cnt = ucmd->sq_wqe_count;
454
455	if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
456		mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
457			     qp->sq.wqe_cnt,
458			     1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
459		return -EINVAL;
460	}
461
462
463	if (attr->qp_type == IB_QPT_RAW_PACKET) {
464		qp->buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
465		qp->sq_buf_size = qp->sq.wqe_cnt << 6;
466	} else {
467		qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
468			(qp->sq.wqe_cnt << 6);
469		qp->sq_buf_size = 0;
470	}
471
472	return 0;
473}
474
475static int qp_has_rq(struct ib_qp_init_attr *attr)
476{
477	if (attr->qp_type == IB_QPT_XRC_INI ||
478	    attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
479	    !attr->cap.max_recv_wr)
480		return 0;
481
482	return 1;
483}
484
485static int first_med_uuar(void)
486{
487	return 1;
488}
489
490static int next_uuar(int n)
491{
492	n++;
493
494	while (((n % 4) & 2))
495		n++;
496
497	return n;
498}
499
500static int num_med_uuar(struct mlx5_uuar_info *uuari)
501{
502	int n;
503
504	n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
505		uuari->num_low_latency_uuars - 1;
506
507	return n >= 0 ? n : 0;
508}
509
510static int max_uuari(struct mlx5_uuar_info *uuari)
511{
512	return uuari->num_uars * 4;
513}
514
515static int first_hi_uuar(struct mlx5_uuar_info *uuari)
516{
517	int med;
518	int i;
519	int t;
520
521	med = num_med_uuar(uuari);
522	for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
523		t++;
524		if (t == med)
525			return next_uuar(i);
526	}
527
528	return 0;
529}
530
531static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
532{
533	int i;
534
535	for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
536		if (!test_bit(i, uuari->bitmap)) {
537			set_bit(i, uuari->bitmap);
538			uuari->count[i]++;
539			return i;
540		}
541	}
542
543	return -ENOMEM;
544}
545
546static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
547{
548	int minidx = first_med_uuar();
549	int i;
550
551	for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
552		if (uuari->count[i] < uuari->count[minidx])
553			minidx = i;
554	}
555
556	uuari->count[minidx]++;
557
558	return minidx;
559}
560
561static int alloc_uuar(struct mlx5_uuar_info *uuari,
562		      enum mlx5_ib_latency_class lat)
563{
564	int uuarn = -EINVAL;
565
566	mutex_lock(&uuari->lock);
567	switch (lat) {
568	case MLX5_IB_LATENCY_CLASS_LOW:
569		uuarn = 0;
570		uuari->count[uuarn]++;
571		break;
572
573	case MLX5_IB_LATENCY_CLASS_MEDIUM:
574		if (uuari->ver < 2)
575			uuarn = -ENOMEM;
576		else
577			uuarn = alloc_med_class_uuar(uuari);
578		break;
579
580	case MLX5_IB_LATENCY_CLASS_HIGH:
581		if (uuari->ver < 2)
582			uuarn = -ENOMEM;
583		else
584			uuarn = alloc_high_class_uuar(uuari);
585		break;
586
587	case MLX5_IB_LATENCY_CLASS_FAST_PATH:
588		uuarn = 2;
589		break;
590	}
591	mutex_unlock(&uuari->lock);
592
593	return uuarn;
594}
595
596static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
597{
598	clear_bit(uuarn, uuari->bitmap);
599	--uuari->count[uuarn];
600}
601
602static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
603{
604	clear_bit(uuarn, uuari->bitmap);
605	--uuari->count[uuarn];
606}
607
608static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
609{
610	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
611	int high_uuar = nuuars - uuari->num_low_latency_uuars;
612
613	mutex_lock(&uuari->lock);
614	if (uuarn == 0) {
615		--uuari->count[uuarn];
616		goto out;
617	}
618
619	if (uuarn < high_uuar) {
620		free_med_class_uuar(uuari, uuarn);
621		goto out;
622	}
623
624	free_high_class_uuar(uuari, uuarn);
625
626out:
627	mutex_unlock(&uuari->lock);
628}
629
630static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
631{
632	switch (state) {
633	case IB_QPS_RESET:	return MLX5_QP_STATE_RST;
634	case IB_QPS_INIT:	return MLX5_QP_STATE_INIT;
635	case IB_QPS_RTR:	return MLX5_QP_STATE_RTR;
636	case IB_QPS_RTS:	return MLX5_QP_STATE_RTS;
637	case IB_QPS_SQD:	return MLX5_QP_STATE_SQD;
638	case IB_QPS_SQE:	return MLX5_QP_STATE_SQER;
639	case IB_QPS_ERR:	return MLX5_QP_STATE_ERR;
640	default:		return -1;
641	}
642}
643
644static int to_mlx5_st(enum ib_qp_type type)
645{
646	switch (type) {
647	case IB_QPT_RC:			return MLX5_QP_ST_RC;
648	case IB_QPT_UC:			return MLX5_QP_ST_UC;
649	case IB_QPT_UD:			return MLX5_QP_ST_UD;
650	case IB_QPT_XRC_INI:
651	case IB_QPT_XRC_TGT:		return MLX5_QP_ST_XRC;
652	case IB_QPT_SMI:		return MLX5_QP_ST_QP0;
653	case IB_QPT_GSI:		return MLX5_QP_ST_QP1;
654	case IB_QPT_RAW_IPV6:		return MLX5_QP_ST_RAW_IPV6;
655	case IB_QPT_RAW_PACKET:
656	case IB_QPT_RAW_ETHERTYPE:	return MLX5_QP_ST_RAW_ETHERTYPE;
657	case IB_QPT_MAX:
658	default:		return -EINVAL;
659	}
660}
661
662static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
663			     struct mlx5_ib_cq *recv_cq);
664static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
665			       struct mlx5_ib_cq *recv_cq);
666
667static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
668{
669	return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
670}
671
672static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
673			  struct mlx5_ib_qp *qp, struct ib_udata *udata,
674			  struct ib_qp_init_attr *attr,
675			  struct mlx5_create_qp_mbox_in **in,
676			  int *inlen,
677			  struct mlx5_exp_ib_create_qp *ucmd)
678{
679	struct mlx5_exp_ib_create_qp_resp resp;
680	struct mlx5_ib_ucontext *context;
681	int page_shift = 0;
682	int uar_index;
683	int npages;
684	u32 offset = 0;
685	int uuarn;
686	int ncont = 0;
687	int err;
688
689	context = to_mucontext(pd->uobject->context);
690	memset(&resp, 0, sizeof(resp));
691	resp.size_of_prefix = offsetof(struct mlx5_exp_ib_create_qp_resp, prefix_reserved);
692	/*
693	 * TBD: should come from the verbs when we have the API
694	 */
695	if (ucmd->exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_WC_UAR_IDX) {
696		if (ucmd->exp.wc_uar_index == MLX5_EXP_CREATE_QP_DB_ONLY_UUAR) {
697			/* Assign LATENCY_CLASS_LOW (DB only UUAR) to this QP */
698			uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
699			if (uuarn < 0) {
700				mlx5_ib_warn(dev, "DB only uuar allocation failed\n");
701				return uuarn;
702			}
703			uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
704		} else if (ucmd->exp.wc_uar_index >= MLX5_IB_MAX_CTX_DYNAMIC_UARS ||
705			   context->dynamic_wc_uar_index[ucmd->exp.wc_uar_index] ==
706			   MLX5_IB_INVALID_UAR_INDEX) {
707			mlx5_ib_warn(dev, "dynamic uuar allocation failed\n");
708			return -EINVAL;
709		} else {
710			uar_index = context->dynamic_wc_uar_index[ucmd->exp.wc_uar_index];
711			uuarn = MLX5_EXP_INVALID_UUAR;
712		}
713	} else {
714		uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
715		if (uuarn < 0) {
716			mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
717			mlx5_ib_dbg(dev, "reverting to medium latency\n");
718			uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
719			if (uuarn < 0) {
720				mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
721				mlx5_ib_dbg(dev, "reverting to high latency\n");
722				uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
723				if (uuarn < 0) {
724					mlx5_ib_warn(dev, "uuar allocation failed\n");
725					return uuarn;
726				}
727			}
728		}
729		uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
730	}
731	mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
732
733	qp->rq.offset = 0;
734	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
735	qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
736
737	err = set_user_buf_size(dev, qp, (struct mlx5_ib_create_qp *)ucmd, attr);
738	if (err)
739		goto err_uuar;
740
741	if (ucmd->buf_addr && qp->buf_size) {
742		qp->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
743				       qp->buf_size, 0, 0);
744		if (IS_ERR(qp->umem)) {
745			mlx5_ib_warn(dev, "umem_get failed\n");
746			err = PTR_ERR(qp->umem);
747			goto err_uuar;
748		}
749	} else {
750		qp->umem = NULL;
751	}
752
753	if (qp->umem) {
754		mlx5_ib_cont_pages(qp->umem, ucmd->buf_addr, &npages, &page_shift,
755				   &ncont, NULL);
756		err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &offset);
757		if (err) {
758			mlx5_ib_warn(dev, "bad offset\n");
759			goto err_umem;
760		}
761		mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
762			    (unsigned long long)ucmd->buf_addr, qp->buf_size,
763			    npages, page_shift, ncont, offset);
764	}
765
766	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
767	*in = mlx5_vzalloc(*inlen);
768	if (!*in) {
769		err = -ENOMEM;
770		goto err_umem;
771	}
772	if (qp->umem)
773		mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
774	(*in)->ctx.log_pg_sz_remote_qpn =
775		cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
776	(*in)->ctx.params2 = cpu_to_be32(offset << 6);
777
778	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
779	resp.uuar_index = uuarn;
780	qp->uuarn = uuarn;
781
782	err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
783	if (err) {
784		mlx5_ib_warn(dev, "map failed\n");
785		goto err_free;
786	}
787
788	err = ib_copy_to_udata(udata, &resp, sizeof(struct mlx5_ib_create_qp_resp));
789	if (err) {
790		mlx5_ib_err(dev, "copy failed\n");
791		goto err_unmap;
792	}
793	qp->create_type = MLX5_QP_USER;
794
795	return 0;
796
797err_unmap:
798	mlx5_ib_db_unmap_user(context, &qp->db);
799
800err_free:
801	kvfree(*in);
802
803err_umem:
804	if (qp->umem)
805		ib_umem_release(qp->umem);
806
807err_uuar:
808	free_uuar(&context->uuari, uuarn);
809	return err;
810}
811
812static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
813{
814	struct mlx5_ib_ucontext *context;
815
816	context = to_mucontext(pd->uobject->context);
817	mlx5_ib_db_unmap_user(context, &qp->db);
818	if (qp->umem)
819		ib_umem_release(qp->umem);
820	if (qp->sq_umem)
821		ib_umem_release(qp->sq_umem);
822	/*
823	 * Free only the UUARs handled by the kernel.
824	 * UUARs of UARs allocated dynamically are handled by user.
825	 */
826	if (qp->uuarn != MLX5_EXP_INVALID_UUAR)
827		free_uuar(&context->uuari, qp->uuarn);
828}
829
830static int create_kernel_qp(struct mlx5_ib_dev *dev,
831			    struct ib_qp_init_attr *init_attr,
832			    struct mlx5_ib_qp *qp,
833			    struct mlx5_create_qp_mbox_in **in, int *inlen)
834{
835	enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
836	struct mlx5_uuar_info *uuari;
837	int uar_index;
838	int uuarn;
839	int err;
840
841	uuari = &dev->mdev->priv.uuari;
842	if (init_attr->create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
843		return -EINVAL;
844
845	uuarn = alloc_uuar(uuari, lc);
846	if (uuarn < 0) {
847		mlx5_ib_warn(dev, "\n");
848		return -ENOMEM;
849	}
850
851	qp->bf = &uuari->bfs[uuarn];
852	uar_index = qp->bf->uar->index;
853
854	err = calc_sq_size(dev, init_attr, qp);
855	if (err < 0) {
856		mlx5_ib_warn(dev, "err %d\n", err);
857		goto err_uuar;
858	}
859
860	qp->rq.offset = 0;
861	qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
862	qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
863
864	err = mlx5_buf_alloc(dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
865	if (err) {
866		mlx5_ib_warn(dev, "err %d\n", err);
867		goto err_uuar;
868	}
869
870	qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
871	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
872	*in = mlx5_vzalloc(*inlen);
873	if (!*in) {
874		err = -ENOMEM;
875		goto err_buf;
876	}
877	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
878	(*in)->ctx.log_pg_sz_remote_qpn =
879		cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
880	/* Set "fast registration enabled" for all kernel QPs */
881	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
882	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
883
884	mlx5_fill_page_array(&qp->buf, (*in)->pas);
885
886	err = mlx5_db_alloc(dev->mdev, &qp->db);
887	if (err) {
888		mlx5_ib_warn(dev, "err %d\n", err);
889		goto err_free;
890	}
891
892	qp->sq.swr_ctx = kcalloc(qp->sq.wqe_cnt, sizeof(*qp->sq.swr_ctx),
893				 GFP_KERNEL);
894	qp->rq.rwr_ctx = kcalloc(qp->rq.wqe_cnt, sizeof(*qp->rq.rwr_ctx),
895				 GFP_KERNEL);
896	if (!qp->sq.swr_ctx || !qp->rq.rwr_ctx) {
897		err = -ENOMEM;
898		goto err_wrid;
899	}
900	qp->create_type = MLX5_QP_KERNEL;
901
902	return 0;
903
904err_wrid:
905	mlx5_db_free(dev->mdev, &qp->db);
906	kfree(qp->sq.swr_ctx);
907	kfree(qp->rq.rwr_ctx);
908
909err_free:
910	kvfree(*in);
911
912err_buf:
913	mlx5_buf_free(dev->mdev, &qp->buf);
914
915err_uuar:
916	free_uuar(&dev->mdev->priv.uuari, uuarn);
917	return err;
918}
919
920static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
921{
922	mlx5_db_free(dev->mdev, &qp->db);
923	kfree(qp->sq.swr_ctx);
924	kfree(qp->rq.rwr_ctx);
925	mlx5_buf_free(dev->mdev, &qp->buf);
926	free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
927}
928
929static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
930{
931	enum ib_qp_type qt = attr->qp_type;
932
933	if (attr->srq || (qt == IB_QPT_XRC_TGT) || (qt == IB_QPT_XRC_INI))
934		return cpu_to_be32(MLX5_SRQ_RQ);
935	else if (!qp->has_rq)
936		return cpu_to_be32(MLX5_ZERO_LEN_RQ);
937	else
938		return cpu_to_be32(MLX5_NON_ZERO_RQ);
939}
940
941static int is_connected(enum ib_qp_type qp_type)
942{
943	if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
944		return 1;
945
946	return 0;
947}
948
949static void get_cqs(enum ib_qp_type qp_type,
950		    struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
951		    struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
952{
953	switch (qp_type) {
954	case IB_QPT_XRC_TGT:
955		*send_cq = NULL;
956		*recv_cq = NULL;
957		break;
958	case IB_QPT_XRC_INI:
959		*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
960		*recv_cq = NULL;
961		break;
962
963	case IB_QPT_SMI:
964	case IB_QPT_GSI:
965	case IB_QPT_RC:
966	case IB_QPT_UC:
967	case IB_QPT_UD:
968	case IB_QPT_RAW_IPV6:
969	case IB_QPT_RAW_ETHERTYPE:
970	case IB_QPT_RAW_PACKET:
971		*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
972		*recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
973		break;
974
975	case IB_QPT_MAX:
976	default:
977		*send_cq = NULL;
978		*recv_cq = NULL;
979		break;
980	}
981}
982
983enum {
984	MLX5_QP_END_PAD_MODE_ALIGN	= MLX5_WQ_END_PAD_MODE_ALIGN,
985	MLX5_QP_END_PAD_MODE_NONE	= MLX5_WQ_END_PAD_MODE_NONE,
986};
987
988static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
989			    struct ib_qp_init_attr *init_attr,
990			    struct ib_udata *udata, struct mlx5_ib_qp *qp)
991{
992	struct mlx5_ib_resources *devr = &dev->devr;
993	struct mlx5_core_dev *mdev = dev->mdev;
994	struct mlx5_create_qp_mbox_in *in = NULL;
995	struct mlx5_exp_ib_create_qp ucmd;
996	struct mlx5_ib_create_qp *pucmd = NULL;
997	struct mlx5_ib_cq *send_cq;
998	struct mlx5_ib_cq *recv_cq;
999	unsigned long flags;
1000	int inlen = sizeof(*in);
1001	size_t ucmd_size;
1002	int err;
1003	int st;
1004	u32 uidx;
1005	void *qpc;
1006
1007	mutex_init(&qp->mutex);
1008	spin_lock_init(&qp->sq.lock);
1009	spin_lock_init(&qp->rq.lock);
1010
1011	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1012		if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
1013			mlx5_ib_warn(dev, "block multicast loopback isn't supported\n");
1014			return -EINVAL;
1015		} else {
1016			qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
1017		}
1018	}
1019
1020	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1021		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1022
1023	if (pd && pd->uobject) {
1024		memset(&ucmd, 0, sizeof(ucmd));
1025		ucmd_size = sizeof(struct mlx5_ib_create_qp);
1026		if (ucmd_size > offsetof(struct mlx5_exp_ib_create_qp, size_of_prefix)) {
1027			mlx5_ib_warn(dev, "mlx5_ib_create_qp is too big to fit as prefix of mlx5_exp_ib_create_qp\n");
1028				return -EINVAL;
1029		}
1030		err = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, ucmd_size));
1031		if (err) {
1032			mlx5_ib_err(dev, "copy failed\n");
1033			return err;
1034		}
1035		pucmd = (struct mlx5_ib_create_qp *)&ucmd;
1036		if (ucmd.exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_UIDX)
1037			uidx = ucmd.exp.uidx;
1038		else
1039			uidx = 0xffffff;
1040
1041		qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
1042	} else {
1043		qp->wq_sig = !!workqueue_signature;
1044		uidx = 0xffffff;
1045	}
1046
1047	qp->has_rq = qp_has_rq(init_attr);
1048	err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
1049			  qp, (pd && pd->uobject) ? pucmd : NULL);
1050	if (err) {
1051		mlx5_ib_warn(dev, "err %d\n", err);
1052		return err;
1053	}
1054
1055	if (pd) {
1056		if (pd->uobject) {
1057			__u32 max_wqes =
1058				1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
1059			mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
1060			if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
1061			    ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
1062				mlx5_ib_warn(dev, "invalid rq params\n");
1063				return -EINVAL;
1064			}
1065			if (ucmd.sq_wqe_count > max_wqes) {
1066				mlx5_ib_warn(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
1067					     ucmd.sq_wqe_count, max_wqes);
1068				return -EINVAL;
1069			}
1070			err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
1071					     &inlen, &ucmd);
1072			if (err)
1073				mlx5_ib_warn(dev, "err %d\n", err);
1074		} else {
1075			if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1076				mlx5_ib_warn(dev, "Raw Eth QP is disabled for Kernel consumers\n");
1077				return -EINVAL;
1078			}
1079			err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
1080			if (err)
1081				mlx5_ib_warn(dev, "err %d\n", err);
1082			else
1083				qp->pa_lkey = to_mpd(pd)->pa_lkey;
1084		}
1085
1086		if (err)
1087			return err;
1088	} else {
1089		in = mlx5_vzalloc(sizeof(*in));
1090		if (!in)
1091			return -ENOMEM;
1092
1093		qp->create_type = MLX5_QP_EMPTY;
1094	}
1095
1096	if (is_sqp(init_attr->qp_type))
1097		qp->port = init_attr->port_num;
1098
1099	st = to_mlx5_st(init_attr->qp_type);
1100	if (st < 0) {
1101		mlx5_ib_warn(dev, "invalid service type\n");
1102		err = st;
1103		goto err_create;
1104	}
1105	in->ctx.flags |= cpu_to_be32(st << 16 | MLX5_QP_PM_MIGRATED << 11);
1106
1107	in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
1108
1109	if (qp->wq_sig)
1110		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
1111
1112	if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
1113		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
1114
1115	if (qp->flags &  MLX5_IB_QP_CAP_RX_END_PADDING)
1116		in->ctx.flags |= cpu_to_be32(MLX5_QP_END_PAD_MODE_ALIGN << 2);
1117	else
1118		in->ctx.flags |= cpu_to_be32(MLX5_QP_END_PAD_MODE_NONE << 2);
1119
1120	if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
1121		int rcqe_sz;
1122		int scqe_sz;
1123
1124		rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
1125		scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
1126
1127		if (rcqe_sz == 128) {
1128			in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
1129		} else {
1130			in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
1131		}
1132
1133		if (init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
1134			in->ctx.cs_req = 0;
1135		} else {
1136			if (scqe_sz == 128)
1137				in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
1138			else
1139				in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
1140		}
1141	}
1142
1143	if (qp->rq.wqe_cnt) {
1144		in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
1145		in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
1146	}
1147
1148	in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
1149
1150	if (qp->sq.wqe_cnt)
1151		in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
1152	else
1153		in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
1154
1155	/* Set default resources */
1156	switch (init_attr->qp_type) {
1157	case IB_QPT_XRC_TGT:
1158		in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
1159		in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
1160		in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
1161		in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
1162		break;
1163	case IB_QPT_XRC_INI:
1164		in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
1165		in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
1166		in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
1167		break;
1168	default:
1169		if (init_attr->srq) {
1170			in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
1171			in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
1172		} else {
1173			in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
1174			in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s1)->msrq.srqn);
1175		}
1176	}
1177
1178	if (init_attr->send_cq)
1179		in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
1180
1181	if (init_attr->recv_cq)
1182		in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
1183
1184	in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
1185
1186	if (MLX5_CAP_GEN(mdev, cqe_version)) {
1187		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
1188		/* 0xffffff means we ask to work with cqe version 0 */
1189		MLX5_SET(qpc, qpc, user_index, uidx);
1190	}
1191
1192	if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
1193		if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) {
1194			mlx5_ib_warn(dev, "Raw Ethernet QP is allowed only for Ethernet link layer\n");
1195			return -ENOSYS;
1196		}
1197		if (ucmd.exp.comp_mask & MLX5_EXP_CREATE_QP_MASK_SQ_BUFF_ADD) {
1198			qp->sq_buf_addr = ucmd.exp.sq_buf_addr;
1199		} else {
1200			mlx5_ib_warn(dev, "Raw Ethernet QP needs SQ buff address\n");
1201			return -EINVAL;
1202		}
1203		err = -EOPNOTSUPP;
1204	} else {
1205		err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
1206		qp->mqp.event = mlx5_ib_qp_event;
1207	}
1208
1209	if (err) {
1210		mlx5_ib_warn(dev, "create qp failed\n");
1211		goto err_create;
1212	}
1213
1214	kvfree(in);
1215	/* Hardware wants QPN written in big-endian order (after
1216	 * shifting) for send doorbell.  Precompute this value to save
1217	 * a little bit when posting sends.
1218	 */
1219	qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
1220
1221	get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
1222		&send_cq, &recv_cq);
1223	spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1224	mlx5_ib_lock_cqs(send_cq, recv_cq);
1225	/* Maintain device to QPs access, needed for further handling via reset
1226	 * flow
1227	 */
1228	list_add_tail(&qp->qps_list, &dev->qp_list);
1229	/* Maintain CQ to QPs access, needed for further handling via reset flow
1230	 */
1231	if (send_cq)
1232		list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
1233	if (recv_cq)
1234		list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
1235	mlx5_ib_unlock_cqs(send_cq, recv_cq);
1236	spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1237
1238	return 0;
1239
1240err_create:
1241	if (qp->create_type == MLX5_QP_USER)
1242		destroy_qp_user(pd, qp);
1243	else if (qp->create_type == MLX5_QP_KERNEL)
1244		destroy_qp_kernel(dev, qp);
1245
1246	kvfree(in);
1247	return err;
1248}
1249
1250static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
1251	__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1252{
1253	if (send_cq) {
1254		if (recv_cq) {
1255			if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
1256				spin_lock(&send_cq->lock);
1257				spin_lock_nested(&recv_cq->lock,
1258						 SINGLE_DEPTH_NESTING);
1259			} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1260				spin_lock(&send_cq->lock);
1261				__acquire(&recv_cq->lock);
1262			} else {
1263				spin_lock(&recv_cq->lock);
1264				spin_lock_nested(&send_cq->lock,
1265						 SINGLE_DEPTH_NESTING);
1266			}
1267		} else {
1268			spin_lock(&send_cq->lock);
1269			__acquire(&recv_cq->lock);
1270		}
1271	} else if (recv_cq) {
1272		spin_lock(&recv_cq->lock);
1273		__acquire(&send_cq->lock);
1274	} else {
1275		__acquire(&send_cq->lock);
1276		__acquire(&recv_cq->lock);
1277	}
1278}
1279
1280static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
1281	__releases(&send_cq->lock) __releases(&recv_cq->lock)
1282{
1283	if (send_cq) {
1284		if (recv_cq) {
1285			if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
1286				spin_unlock(&recv_cq->lock);
1287				spin_unlock(&send_cq->lock);
1288			} else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
1289				__release(&recv_cq->lock);
1290				spin_unlock(&send_cq->lock);
1291			} else {
1292				spin_unlock(&send_cq->lock);
1293				spin_unlock(&recv_cq->lock);
1294			}
1295		} else {
1296			__release(&recv_cq->lock);
1297			spin_unlock(&send_cq->lock);
1298		}
1299	} else if (recv_cq) {
1300		__release(&send_cq->lock);
1301		spin_unlock(&recv_cq->lock);
1302	} else {
1303		__release(&recv_cq->lock);
1304		__release(&send_cq->lock);
1305	}
1306}
1307
1308static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
1309{
1310	return to_mpd(qp->ibqp.pd);
1311}
1312
1313static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1314{
1315	struct mlx5_ib_cq *send_cq, *recv_cq;
1316	struct mlx5_modify_qp_mbox_in *in;
1317	unsigned long flags;
1318	int err;
1319
1320	in = kzalloc(sizeof(*in), GFP_KERNEL);
1321	if (!in)
1322		return;
1323
1324	if (qp->state != IB_QPS_RESET) {
1325		if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
1326			if (mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, in, 0,
1327						&qp->mqp))
1328			mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
1329				     qp->mqp.qpn);
1330		}
1331	}
1332
1333	get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
1334		&send_cq, &recv_cq);
1335
1336	spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1337	mlx5_ib_lock_cqs(send_cq, recv_cq);
1338	/* del from lists under both locks above to protect reset flow paths */
1339	list_del(&qp->qps_list);
1340	if (send_cq)
1341		list_del(&qp->cq_send_list);
1342
1343	if (recv_cq)
1344		list_del(&qp->cq_recv_list);
1345
1346	if (qp->create_type == MLX5_QP_KERNEL) {
1347		__mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1348				   qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1349		if (send_cq != recv_cq)
1350			__mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1351	}
1352	mlx5_ib_unlock_cqs(send_cq, recv_cq);
1353	spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1354
1355	if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
1356	} else {
1357		err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
1358		if (err)
1359			mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
1360				     qp->mqp.qpn);
1361	}
1362
1363	kfree(in);
1364
1365	if (qp->create_type == MLX5_QP_KERNEL)
1366		destroy_qp_kernel(dev, qp);
1367	else if (qp->create_type == MLX5_QP_USER)
1368		destroy_qp_user(&get_pd(qp)->ibpd, qp);
1369}
1370
1371static const char *ib_qp_type_str(enum ib_qp_type type)
1372{
1373	switch (type) {
1374	case IB_QPT_SMI:
1375		return "IB_QPT_SMI";
1376	case IB_QPT_GSI:
1377		return "IB_QPT_GSI";
1378	case IB_QPT_RC:
1379		return "IB_QPT_RC";
1380	case IB_QPT_UC:
1381		return "IB_QPT_UC";
1382	case IB_QPT_UD:
1383		return "IB_QPT_UD";
1384	case IB_QPT_RAW_IPV6:
1385		return "IB_QPT_RAW_IPV6";
1386	case IB_QPT_RAW_ETHERTYPE:
1387		return "IB_QPT_RAW_ETHERTYPE";
1388	case IB_QPT_XRC_INI:
1389		return "IB_QPT_XRC_INI";
1390	case IB_QPT_XRC_TGT:
1391		return "IB_QPT_XRC_TGT";
1392	case IB_QPT_RAW_PACKET:
1393		return "IB_QPT_RAW_PACKET";
1394	case IB_QPT_MAX:
1395	default:
1396		return "Invalid QP type";
1397	}
1398}
1399
1400struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1401				struct ib_qp_init_attr *init_attr,
1402				struct ib_udata *udata)
1403{
1404	struct mlx5_ib_dev *dev;
1405	struct mlx5_ib_qp *qp;
1406	u16 xrcdn = 0;
1407	int err;
1408	u32 rcqn;
1409	u32 scqn;
1410
1411	init_attr->qpg_type = IB_QPG_NONE;
1412
1413	if (pd) {
1414		dev = to_mdev(pd->device);
1415	} else {
1416		/* being cautious here */
1417		if (init_attr->qp_type != IB_QPT_XRC_TGT) {
1418			printf("mlx5_ib: WARN: ""%s: no PD for transport %s\n", __func__, ib_qp_type_str(init_attr->qp_type));
1419			return ERR_PTR(-EINVAL);
1420		}
1421		dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
1422	}
1423
1424	switch (init_attr->qp_type) {
1425	case IB_QPT_XRC_TGT:
1426	case IB_QPT_XRC_INI:
1427		if (!MLX5_CAP_GEN(dev->mdev, xrc)) {
1428			mlx5_ib_warn(dev, "XRC not supported\n");
1429			return ERR_PTR(-ENOSYS);
1430		}
1431		init_attr->recv_cq = NULL;
1432		if (init_attr->qp_type == IB_QPT_XRC_TGT) {
1433			xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1434			init_attr->send_cq = NULL;
1435		}
1436
1437		/* fall through */
1438	case IB_QPT_RC:
1439	case IB_QPT_UC:
1440	case IB_QPT_UD:
1441	case IB_QPT_SMI:
1442	case IB_QPT_GSI:
1443	case IB_QPT_RAW_ETHERTYPE:
1444	case IB_QPT_RAW_PACKET:
1445		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1446		if (!qp)
1447			return ERR_PTR(-ENOMEM);
1448
1449		err = create_qp_common(dev, pd, init_attr, udata, qp);
1450		if (err) {
1451			mlx5_ib_warn(dev, "create_qp_common failed\n");
1452			kfree(qp);
1453			return ERR_PTR(err);
1454		}
1455
1456		if (is_qp0(init_attr->qp_type))
1457			qp->ibqp.qp_num = 0;
1458		else if (is_qp1(init_attr->qp_type))
1459			qp->ibqp.qp_num = 1;
1460		else
1461			qp->ibqp.qp_num = qp->mqp.qpn;
1462
1463		rcqn = init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1;
1464		scqn = init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1;
1465		mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
1466			    qp->ibqp.qp_num, qp->mqp.qpn, rcqn, scqn);
1467
1468		qp->xrcdn = xrcdn;
1469
1470		break;
1471
1472	case IB_QPT_RAW_IPV6:
1473	case IB_QPT_MAX:
1474	default:
1475		mlx5_ib_warn(dev, "unsupported qp type %d\n",
1476			     init_attr->qp_type);
1477		/* Don't support raw QPs */
1478		return ERR_PTR(-EINVAL);
1479	}
1480
1481	return &qp->ibqp;
1482}
1483
1484int mlx5_ib_destroy_qp(struct ib_qp *qp)
1485{
1486	struct mlx5_ib_dev *dev = to_mdev(qp->device);
1487	struct mlx5_ib_qp *mqp = to_mqp(qp);
1488
1489	destroy_qp_common(dev, mqp);
1490
1491	kfree(mqp);
1492
1493	return 0;
1494}
1495
1496static u32 atomic_mode_qp(struct mlx5_ib_dev *dev)
1497{
1498	unsigned long mask;
1499	unsigned long tmp;
1500
1501	mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp) &
1502		MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
1503
1504	tmp = find_last_bit(&mask, BITS_PER_LONG);
1505	if (tmp < 2 || tmp >= BITS_PER_LONG)
1506		return MLX5_ATOMIC_MODE_NONE;
1507
1508	if (tmp == 2)
1509		return MLX5_ATOMIC_MODE_CX;
1510
1511	return tmp << MLX5_ATOMIC_MODE_OFF;
1512}
1513
1514static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
1515				   int attr_mask)
1516{
1517	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
1518	u32 hw_access_flags = 0;
1519	u8 dest_rd_atomic;
1520	u32 access_flags;
1521
1522	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1523		dest_rd_atomic = attr->max_dest_rd_atomic;
1524	else
1525		dest_rd_atomic = qp->resp_depth;
1526
1527	if (attr_mask & IB_QP_ACCESS_FLAGS)
1528		access_flags = attr->qp_access_flags;
1529	else
1530		access_flags = qp->atomic_rd_en;
1531
1532	if (!dest_rd_atomic)
1533		access_flags &= IB_ACCESS_REMOTE_WRITE;
1534
1535	if (access_flags & IB_ACCESS_REMOTE_READ)
1536		hw_access_flags |= MLX5_QP_BIT_RRE;
1537	if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1538		hw_access_flags |= (MLX5_QP_BIT_RAE |
1539				    atomic_mode_qp(dev));
1540	if (access_flags & IB_ACCESS_REMOTE_WRITE)
1541		hw_access_flags |= MLX5_QP_BIT_RWE;
1542
1543	return cpu_to_be32(hw_access_flags);
1544}
1545
1546enum {
1547	MLX5_PATH_FLAG_FL	= 1 << 0,
1548	MLX5_PATH_FLAG_FREE_AR	= 1 << 1,
1549	MLX5_PATH_FLAG_COUNTER	= 1 << 2,
1550};
1551
1552static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
1553{
1554	if (rate == IB_RATE_PORT_CURRENT) {
1555		return 0;
1556	} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
1557		return -EINVAL;
1558	} else {
1559		while (rate != IB_RATE_2_5_GBPS &&
1560		       !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
1561			 MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
1562			--rate;
1563	}
1564
1565	return rate + MLX5_STAT_RATE_OFFSET;
1566}
1567
1568static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
1569			 struct mlx5_qp_path *path, u8 port, int attr_mask,
1570			 u32 path_flags, const struct ib_qp_attr *attr,
1571			 int alt)
1572{
1573	enum rdma_link_layer ll = dev->ib_dev.get_link_layer(&dev->ib_dev,
1574							     port);
1575	int err;
1576	int gid_type;
1577
1578	if ((ll == IB_LINK_LAYER_ETHERNET) || (ah->ah_flags & IB_AH_GRH)) {
1579		int len = dev->ib_dev.gid_tbl_len[port - 1];
1580		if (ah->grh.sgid_index >= len) {
1581			printf("mlx5_ib: ERR: ""sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, len - 1);
1582			return -EINVAL;
1583		}
1584	}
1585
1586	if (ll == IB_LINK_LAYER_ETHERNET) {
1587		if (!(ah->ah_flags & IB_AH_GRH))
1588			return -EINVAL;
1589
1590		err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
1591					     &gid_type);
1592		if (err)
1593			return err;
1594		err = mlx5_ib_resolve_grh(ah, path->rmac, NULL);
1595		if (err)
1596			return err;
1597		path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
1598							  ah->grh.sgid_index,
1599							  0);
1600		path->dci_cfi_prio_sl = (ah->sl & 0xf) << 4;
1601	} else {
1602		path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
1603		path->grh_mlid	= ah->src_path_bits & 0x7f;
1604		path->rlid	= cpu_to_be16(ah->dlid);
1605		if (ah->ah_flags & IB_AH_GRH)
1606			path->grh_mlid	|= 1 << 7;
1607		if (attr_mask & IB_QP_PKEY_INDEX)
1608			path->pkey_index = cpu_to_be16(alt ?
1609						       attr->alt_pkey_index :
1610						       attr->pkey_index);
1611
1612		path->dci_cfi_prio_sl = ah->sl & 0xf;
1613	}
1614
1615	path->fl_free_ar |= (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
1616
1617	if (ah->ah_flags & IB_AH_GRH) {
1618		path->mgid_index = ah->grh.sgid_index;
1619		path->hop_limit  = ah->grh.hop_limit;
1620		path->tclass_flowlabel =
1621			cpu_to_be32((ah->grh.traffic_class << 20) |
1622				    (ah->grh.flow_label));
1623		memcpy(path->rgid, ah->grh.dgid.raw, 16);
1624	}
1625
1626	err = ib_rate_to_mlx5(dev, ah->static_rate);
1627	if (err < 0)
1628		return err;
1629	path->static_rate = err;
1630	path->port = port;
1631
1632	if (attr_mask & IB_QP_TIMEOUT)
1633		path->ackto_lt = alt ? attr->alt_timeout << 3 : attr->timeout << 3;
1634
1635	return 0;
1636}
1637
1638static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
1639	[MLX5_QP_STATE_INIT] = {
1640		[MLX5_QP_STATE_INIT] = {
1641			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE		|
1642					  MLX5_QP_OPTPAR_RAE		|
1643					  MLX5_QP_OPTPAR_RWE		|
1644					  MLX5_QP_OPTPAR_PKEY_INDEX	|
1645					  MLX5_QP_OPTPAR_PRI_PORT,
1646			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE		|
1647					  MLX5_QP_OPTPAR_PKEY_INDEX	|
1648					  MLX5_QP_OPTPAR_PRI_PORT,
1649			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX	|
1650					  MLX5_QP_OPTPAR_Q_KEY		|
1651					  MLX5_QP_OPTPAR_PRI_PORT,
1652			[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_PRI_PORT	|
1653					  MLX5_QP_OPTPAR_DC_KEY		|
1654					  MLX5_QP_OPTPAR_PKEY_INDEX	|
1655					  MLX5_QP_OPTPAR_RAE,
1656		},
1657		[MLX5_QP_STATE_RTR] = {
1658			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
1659					  MLX5_QP_OPTPAR_RRE            |
1660					  MLX5_QP_OPTPAR_RAE            |
1661					  MLX5_QP_OPTPAR_RWE            |
1662					  MLX5_QP_OPTPAR_PKEY_INDEX,
1663			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
1664					  MLX5_QP_OPTPAR_RWE            |
1665					  MLX5_QP_OPTPAR_PKEY_INDEX,
1666			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
1667					  MLX5_QP_OPTPAR_Q_KEY,
1668			[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX	|
1669					   MLX5_QP_OPTPAR_Q_KEY,
1670			[MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1671					  MLX5_QP_OPTPAR_RRE            |
1672					  MLX5_QP_OPTPAR_RAE            |
1673					  MLX5_QP_OPTPAR_RWE            |
1674					  MLX5_QP_OPTPAR_PKEY_INDEX,
1675			[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_PKEY_INDEX	|
1676					  MLX5_QP_OPTPAR_RAE		|
1677					  MLX5_QP_OPTPAR_DC_KEY,
1678		},
1679	},
1680	[MLX5_QP_STATE_RTR] = {
1681		[MLX5_QP_STATE_RTS] = {
1682			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH	|
1683					  MLX5_QP_OPTPAR_RRE		|
1684					  MLX5_QP_OPTPAR_RAE		|
1685					  MLX5_QP_OPTPAR_RWE		|
1686					  MLX5_QP_OPTPAR_PM_STATE	|
1687					  MLX5_QP_OPTPAR_RNR_TIMEOUT,
1688			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH	|
1689					  MLX5_QP_OPTPAR_RWE		|
1690					  MLX5_QP_OPTPAR_PM_STATE,
1691			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1692			[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_DC_KEY		|
1693					  MLX5_QP_OPTPAR_PM_STATE	|
1694					  MLX5_QP_OPTPAR_RAE,
1695		},
1696	},
1697	[MLX5_QP_STATE_RTS] = {
1698		[MLX5_QP_STATE_RTS] = {
1699			[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE		|
1700					  MLX5_QP_OPTPAR_RAE		|
1701					  MLX5_QP_OPTPAR_RWE		|
1702					  MLX5_QP_OPTPAR_RNR_TIMEOUT	|
1703					  MLX5_QP_OPTPAR_PM_STATE	|
1704					  MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1705			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE		|
1706					  MLX5_QP_OPTPAR_PM_STATE	|
1707					  MLX5_QP_OPTPAR_ALT_ADDR_PATH,
1708			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY		|
1709					  MLX5_QP_OPTPAR_SRQN		|
1710					  MLX5_QP_OPTPAR_CQN_RCV,
1711			[MLX5_QP_ST_DCI] = MLX5_QP_OPTPAR_DC_KEY		|
1712					  MLX5_QP_OPTPAR_PM_STATE	|
1713					  MLX5_QP_OPTPAR_RAE,
1714		},
1715	},
1716	[MLX5_QP_STATE_SQER] = {
1717		[MLX5_QP_STATE_RTS] = {
1718			[MLX5_QP_ST_UD]	 = MLX5_QP_OPTPAR_Q_KEY,
1719			[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
1720			[MLX5_QP_ST_UC]	 = MLX5_QP_OPTPAR_RWE,
1721			[MLX5_QP_ST_RC]	 = MLX5_QP_OPTPAR_RNR_TIMEOUT	|
1722					   MLX5_QP_OPTPAR_RWE		|
1723					   MLX5_QP_OPTPAR_RAE		|
1724					   MLX5_QP_OPTPAR_RRE,
1725			[MLX5_QP_ST_DCI]  = MLX5_QP_OPTPAR_DC_KEY	|
1726					   MLX5_QP_OPTPAR_RAE,
1727
1728		},
1729	},
1730	[MLX5_QP_STATE_SQD] = {
1731		[MLX5_QP_STATE_RTS] = {
1732			[MLX5_QP_ST_UD]	 = MLX5_QP_OPTPAR_Q_KEY,
1733			[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
1734			[MLX5_QP_ST_UC]	 = MLX5_QP_OPTPAR_RWE,
1735			[MLX5_QP_ST_RC]	 = MLX5_QP_OPTPAR_RNR_TIMEOUT	|
1736					   MLX5_QP_OPTPAR_RWE		|
1737					   MLX5_QP_OPTPAR_RAE		|
1738					   MLX5_QP_OPTPAR_RRE,
1739		},
1740	},
1741};
1742
1743static int ib_nr_to_mlx5_nr(int ib_mask)
1744{
1745	switch (ib_mask) {
1746	case IB_QP_STATE:
1747		return 0;
1748	case IB_QP_CUR_STATE:
1749		return 0;
1750	case IB_QP_EN_SQD_ASYNC_NOTIFY:
1751		return 0;
1752	case IB_QP_ACCESS_FLAGS:
1753		return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
1754			MLX5_QP_OPTPAR_RAE;
1755	case IB_QP_PKEY_INDEX:
1756		return MLX5_QP_OPTPAR_PKEY_INDEX;
1757	case IB_QP_PORT:
1758		return MLX5_QP_OPTPAR_PRI_PORT;
1759	case IB_QP_QKEY:
1760		return MLX5_QP_OPTPAR_Q_KEY;
1761	case IB_QP_AV:
1762		return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
1763			MLX5_QP_OPTPAR_PRI_PORT;
1764	case IB_QP_PATH_MTU:
1765		return 0;
1766	case IB_QP_TIMEOUT:
1767		return MLX5_QP_OPTPAR_ACK_TIMEOUT;
1768	case IB_QP_RETRY_CNT:
1769		return MLX5_QP_OPTPAR_RETRY_COUNT;
1770	case IB_QP_RNR_RETRY:
1771		return MLX5_QP_OPTPAR_RNR_RETRY;
1772	case IB_QP_RQ_PSN:
1773		return 0;
1774	case IB_QP_MAX_QP_RD_ATOMIC:
1775		return MLX5_QP_OPTPAR_SRA_MAX;
1776	case IB_QP_ALT_PATH:
1777		return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
1778	case IB_QP_MIN_RNR_TIMER:
1779		return MLX5_QP_OPTPAR_RNR_TIMEOUT;
1780	case IB_QP_SQ_PSN:
1781		return 0;
1782	case IB_QP_MAX_DEST_RD_ATOMIC:
1783		return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
1784			MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
1785	case IB_QP_PATH_MIG_STATE:
1786		return MLX5_QP_OPTPAR_PM_STATE;
1787	case IB_QP_CAP:
1788		return 0;
1789	case IB_QP_DEST_QPN:
1790		return 0;
1791	}
1792	return 0;
1793}
1794
1795static int ib_mask_to_mlx5_opt(int ib_mask)
1796{
1797	int result = 0;
1798	int i;
1799
1800	for (i = 0; i < 8 * sizeof(int); i++) {
1801		if ((1 << i) & ib_mask)
1802			result |= ib_nr_to_mlx5_nr(1 << i);
1803	}
1804
1805	return result;
1806}
1807
1808static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1809			       const struct ib_qp_attr *attr, int attr_mask,
1810			       enum ib_qp_state cur_state, enum ib_qp_state new_state)
1811{
1812	static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
1813		[MLX5_QP_STATE_RST] = {
1814			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
1815			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
1816			[MLX5_QP_STATE_INIT]	= MLX5_CMD_OP_RST2INIT_QP,
1817		},
1818		[MLX5_QP_STATE_INIT]  = {
1819			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
1820			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
1821			[MLX5_QP_STATE_INIT]	= MLX5_CMD_OP_INIT2INIT_QP,
1822			[MLX5_QP_STATE_RTR]	= MLX5_CMD_OP_INIT2RTR_QP,
1823		},
1824		[MLX5_QP_STATE_RTR]   = {
1825			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
1826			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
1827			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_RTR2RTS_QP,
1828		},
1829		[MLX5_QP_STATE_RTS]   = {
1830			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
1831			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
1832			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_RTS2RTS_QP,
1833		},
1834		[MLX5_QP_STATE_SQD] = {
1835			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
1836			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
1837			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_SQD_RTS_QP,
1838		},
1839		[MLX5_QP_STATE_SQER] = {
1840			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
1841			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
1842			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_SQERR2RTS_QP,
1843		},
1844		[MLX5_QP_STATE_ERR] = {
1845			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
1846			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
1847		}
1848	};
1849
1850	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1851	struct mlx5_ib_qp *qp = to_mqp(ibqp);
1852	struct mlx5_ib_cq *send_cq, *recv_cq;
1853	struct mlx5_qp_context *context;
1854	struct mlx5_modify_qp_mbox_in *in;
1855	struct mlx5_ib_pd *pd;
1856	enum mlx5_qp_state mlx5_cur, mlx5_new;
1857	enum mlx5_qp_optpar optpar;
1858	int sqd_event;
1859	int mlx5_st;
1860	int err;
1861	u16 op;
1862
1863	in = kzalloc(sizeof(*in), GFP_KERNEL);
1864	if (!in)
1865		return -ENOMEM;
1866
1867	context = &in->ctx;
1868	err = to_mlx5_st(ibqp->qp_type);
1869	if (err < 0)
1870		goto out;
1871
1872	context->flags = cpu_to_be32(err << 16);
1873
1874	if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
1875		context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1876	} else {
1877		switch (attr->path_mig_state) {
1878		case IB_MIG_MIGRATED:
1879			context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1880			break;
1881		case IB_MIG_REARM:
1882			context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
1883			break;
1884		case IB_MIG_ARMED:
1885			context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
1886			break;
1887		}
1888	}
1889
1890	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
1891		context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
1892	} else if (ibqp->qp_type == IB_QPT_UD) {
1893		context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1894	} else if (attr_mask & IB_QP_PATH_MTU) {
1895		if (attr->path_mtu < IB_MTU_256 ||
1896		    attr->path_mtu > IB_MTU_4096) {
1897			mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
1898			err = -EINVAL;
1899			goto out;
1900		}
1901		context->mtu_msgmax = (attr->path_mtu << 5) |
1902				      (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg);
1903	}
1904
1905	if (attr_mask & IB_QP_DEST_QPN)
1906		context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
1907
1908	if (attr_mask & IB_QP_PKEY_INDEX)
1909		context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
1910
1911	/* todo implement counter_index functionality */
1912
1913	if (is_sqp(ibqp->qp_type))
1914		context->pri_path.port = qp->port;
1915
1916	if (attr_mask & IB_QP_PORT)
1917		context->pri_path.port = attr->port_num;
1918
1919	if (attr_mask & IB_QP_AV) {
1920		err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
1921				    attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
1922				    attr_mask, 0, attr, 0);
1923		if (err)
1924			goto out;
1925	}
1926
1927	if (attr_mask & IB_QP_TIMEOUT)
1928		context->pri_path.ackto_lt |= attr->timeout << 3;
1929
1930	if (attr_mask & IB_QP_ALT_PATH) {
1931		err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
1932				    attr->alt_port_num,
1933				    attr_mask  | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
1934				    0, attr, 1);
1935		if (err)
1936			goto out;
1937	}
1938
1939	pd = get_pd(qp);
1940	get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
1941		&send_cq, &recv_cq);
1942
1943	context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
1944	context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
1945	context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
1946	context->params1  = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
1947
1948	if (attr_mask & IB_QP_RNR_RETRY)
1949		context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1950
1951	if (attr_mask & IB_QP_RETRY_CNT)
1952		context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1953
1954	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1955		if (attr->max_rd_atomic)
1956			context->params1 |=
1957				cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1958	}
1959
1960	if (attr_mask & IB_QP_SQ_PSN)
1961		context->next_send_psn = cpu_to_be32(attr->sq_psn & 0xffffff);
1962
1963	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1964		if (attr->max_dest_rd_atomic)
1965			context->params2 |=
1966				cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1967	}
1968
1969	if ((attr_mask & IB_QP_ACCESS_FLAGS) &&
1970	    (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
1971	    !dev->enable_atomic_resp) {
1972		mlx5_ib_warn(dev, "atomic responder is not supported\n");
1973		err = -EINVAL;
1974		goto out;
1975	}
1976
1977	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
1978		context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
1979
1980	if (attr_mask & IB_QP_MIN_RNR_TIMER)
1981		context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1982
1983	if (attr_mask & IB_QP_RQ_PSN)
1984		context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn & 0xffffff);
1985
1986	if (attr_mask & IB_QP_QKEY)
1987		context->qkey = cpu_to_be32(attr->qkey);
1988
1989	if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1990		context->db_rec_addr = cpu_to_be64(qp->db.dma);
1991
1992	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
1993	    attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1994		sqd_event = 1;
1995	else
1996		sqd_event = 0;
1997
1998	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1999		context->sq_crq_size |= cpu_to_be16(1 << 4);
2000
2001	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
2002		u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
2003			       qp->port) - 1;
2004		struct mlx5_ib_port *mibport = &dev->port[port_num];
2005
2006		context->qp_counter_set_usr_page |=
2007			cpu_to_be32(mibport->q_cnt_id << 24);
2008	}
2009
2010	mlx5_cur = to_mlx5_state(cur_state);
2011	mlx5_new = to_mlx5_state(new_state);
2012	mlx5_st = to_mlx5_st(ibqp->qp_type);
2013	if (mlx5_st < 0)
2014		goto out;
2015
2016	if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
2017	    !optab[mlx5_cur][mlx5_new])
2018		return -EINVAL;
2019
2020	op = optab[mlx5_cur][mlx5_new];
2021	optpar = ib_mask_to_mlx5_opt(attr_mask);
2022	optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
2023	in->optparam = cpu_to_be32(optpar);
2024
2025	if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
2026		err = -EOPNOTSUPP;
2027	else
2028		err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
2029				  &qp->mqp);
2030	if (err)
2031		goto out;
2032
2033	qp->state = new_state;
2034
2035	if (attr_mask & IB_QP_ACCESS_FLAGS)
2036		qp->atomic_rd_en = attr->qp_access_flags;
2037	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
2038		qp->resp_depth = attr->max_dest_rd_atomic;
2039	if (attr_mask & IB_QP_PORT)
2040		qp->port = attr->port_num;
2041	if (attr_mask & IB_QP_ALT_PATH)
2042		qp->alt_port = attr->alt_port_num;
2043
2044	/*
2045	 * If we moved a kernel QP to RESET, clean up all old CQ
2046	 * entries and reinitialize the QP.
2047	 */
2048	if (new_state == IB_QPS_RESET && !ibqp->uobject) {
2049		mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
2050				 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
2051		if (send_cq != recv_cq)
2052			mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
2053
2054		qp->rq.head = 0;
2055		qp->rq.tail = 0;
2056		qp->sq.head = 0;
2057		qp->sq.tail = 0;
2058		qp->sq.cur_post = 0;
2059		qp->sq.last_poll = 0;
2060		if (qp->db.db) {
2061			qp->db.db[MLX5_RCV_DBR] = 0;
2062			qp->db.db[MLX5_SND_DBR] = 0;
2063		}
2064	}
2065
2066out:
2067	kfree(in);
2068	return err;
2069}
2070
2071static int ignored_ts_check(enum ib_qp_type qp_type)
2072{
2073	return 0;
2074}
2075
2076int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2077		      int attr_mask, struct ib_udata *udata)
2078{
2079	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2080	struct mlx5_ib_qp *qp = to_mqp(ibqp);
2081	enum ib_qp_state cur_state, new_state;
2082	int err = -EINVAL;
2083	int port;
2084
2085	mutex_lock(&qp->mutex);
2086
2087	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
2088	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
2089
2090	if (!ignored_ts_check(ibqp->qp_type) &&
2091	    !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
2092		goto out;
2093
2094	if ((attr_mask & IB_QP_PORT) &&
2095	    (attr->port_num == 0 ||
2096	     attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)))
2097		goto out;
2098
2099	if (attr_mask & IB_QP_PKEY_INDEX) {
2100		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2101		if (attr->pkey_index >=
2102		    dev->mdev->port_caps[port - 1].pkey_table_len)
2103			goto out;
2104	}
2105
2106	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
2107	    attr->max_rd_atomic >
2108	    (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp)))
2109		goto out;
2110
2111	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
2112	    attr->max_dest_rd_atomic >
2113	    (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp)))
2114		goto out;
2115
2116	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
2117		err = 0;
2118		goto out;
2119	}
2120
2121	err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
2122
2123out:
2124	mutex_unlock(&qp->mutex);
2125	return err;
2126}
2127
2128static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
2129{
2130	struct mlx5_ib_cq *cq;
2131	unsigned cur;
2132
2133	cur = wq->head - wq->tail;
2134	if (likely(cur + nreq < wq->max_post))
2135		return 0;
2136
2137	cq = to_mcq(ib_cq);
2138	spin_lock(&cq->lock);
2139	cur = wq->head - wq->tail;
2140	spin_unlock(&cq->lock);
2141
2142	return cur + nreq >= wq->max_post;
2143}
2144
2145static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
2146					  u64 remote_addr, u32 rkey)
2147{
2148	rseg->raddr    = cpu_to_be64(remote_addr);
2149	rseg->rkey     = cpu_to_be32(rkey);
2150	rseg->reserved = 0;
2151}
2152
2153static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
2154			     struct ib_send_wr *wr)
2155{
2156	memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
2157	dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
2158	dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2159}
2160
2161static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
2162{
2163	dseg->byte_count = cpu_to_be32(sg->length);
2164	dseg->lkey       = cpu_to_be32(sg->lkey);
2165	dseg->addr       = cpu_to_be64(sg->addr);
2166}
2167
2168static __be16 get_klm_octo(int npages)
2169{
2170	return cpu_to_be16(ALIGN(npages, 8) / 2);
2171}
2172
2173static __be64 frwr_mkey_mask(void)
2174{
2175	u64 result;
2176
2177	result = MLX5_MKEY_MASK_LEN		|
2178		MLX5_MKEY_MASK_PAGE_SIZE	|
2179		MLX5_MKEY_MASK_START_ADDR	|
2180		MLX5_MKEY_MASK_EN_RINVAL	|
2181		MLX5_MKEY_MASK_KEY		|
2182		MLX5_MKEY_MASK_LR		|
2183		MLX5_MKEY_MASK_LW		|
2184		MLX5_MKEY_MASK_RR		|
2185		MLX5_MKEY_MASK_RW		|
2186		MLX5_MKEY_MASK_A		|
2187		MLX5_MKEY_MASK_SMALL_FENCE	|
2188		MLX5_MKEY_MASK_FREE;
2189
2190	return cpu_to_be64(result);
2191}
2192
2193static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
2194				 struct ib_send_wr *wr, int li)
2195{
2196	memset(umr, 0, sizeof(*umr));
2197
2198	if (li) {
2199		umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
2200		umr->flags = 1 << 7;
2201		return;
2202	}
2203
2204	umr->flags = (1 << 5); /* fail if not free */
2205	umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
2206	umr->mkey_mask = frwr_mkey_mask();
2207}
2208
2209static u8 get_umr_flags(int acc)
2210{
2211	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
2212	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
2213	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
2214	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
2215		MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
2216}
2217
2218static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
2219			     int li, int *writ)
2220{
2221	memset(seg, 0, sizeof(*seg));
2222	if (li) {
2223		seg->status = MLX5_MKEY_STATUS_FREE;
2224		return;
2225	}
2226
2227	seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
2228		     MLX5_ACCESS_MODE_MTT;
2229	*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
2230	seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
2231	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
2232	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
2233	seg->len = cpu_to_be64(wr->wr.fast_reg.length);
2234	seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
2235	seg->log2_page_size = wr->wr.fast_reg.page_shift;
2236}
2237
2238static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
2239			   struct ib_send_wr *wr,
2240			   struct mlx5_core_dev *mdev,
2241			   struct mlx5_ib_pd *pd,
2242			   int writ)
2243{
2244	struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
2245	u64 *page_list = wr->wr.fast_reg.page_list->page_list;
2246	u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
2247	int i;
2248
2249	for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
2250		mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
2251	dseg->addr = cpu_to_be64(mfrpl->map);
2252	dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
2253	dseg->lkey = cpu_to_be32(pd->pa_lkey);
2254}
2255
2256static __be32 send_ieth(struct ib_send_wr *wr)
2257{
2258	switch (wr->opcode) {
2259	case IB_WR_SEND_WITH_IMM:
2260	case IB_WR_RDMA_WRITE_WITH_IMM:
2261		return wr->ex.imm_data;
2262
2263	case IB_WR_SEND_WITH_INV:
2264		return cpu_to_be32(wr->ex.invalidate_rkey);
2265
2266	default:
2267		return 0;
2268	}
2269}
2270
2271static u8 calc_sig(void *wqe, int size)
2272{
2273	u8 *p = wqe;
2274	u8 res = 0;
2275	int i;
2276
2277	for (i = 0; i < size; i++)
2278		res ^= p[i];
2279
2280	return ~res;
2281}
2282
2283static u8 calc_wq_sig(void *wqe)
2284{
2285	return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
2286}
2287
2288static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
2289			    void *wqe, int *sz)
2290{
2291	struct mlx5_wqe_inline_seg *seg;
2292	void *qend = qp->sq.qend;
2293	void *addr;
2294	int inl = 0;
2295	int copy;
2296	int len;
2297	int i;
2298
2299	seg = wqe;
2300	wqe += sizeof(*seg);
2301	for (i = 0; i < wr->num_sge; i++) {
2302		addr = (void *)(uintptr_t)(wr->sg_list[i].addr);
2303		len  = wr->sg_list[i].length;
2304		inl += len;
2305
2306		if (unlikely(inl > qp->max_inline_data))
2307			return -ENOMEM;
2308
2309		if (unlikely(wqe + len > qend)) {
2310			copy = (int)(qend - wqe);
2311			memcpy(wqe, addr, copy);
2312			addr += copy;
2313			len -= copy;
2314			wqe = mlx5_get_send_wqe(qp, 0);
2315		}
2316		memcpy(wqe, addr, len);
2317		wqe += len;
2318	}
2319
2320	seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
2321
2322	*sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
2323
2324	return 0;
2325}
2326
2327static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
2328			  struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
2329{
2330	int writ = 0;
2331	int li;
2332
2333	li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
2334	if (unlikely(wr->send_flags & IB_SEND_INLINE))
2335		return -EINVAL;
2336
2337	set_frwr_umr_segment(*seg, wr, li);
2338	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2339	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2340	if (unlikely((*seg == qp->sq.qend)))
2341		*seg = mlx5_get_send_wqe(qp, 0);
2342	set_mkey_segment(*seg, wr, li, &writ);
2343	*seg += sizeof(struct mlx5_mkey_seg);
2344	*size += sizeof(struct mlx5_mkey_seg) / 16;
2345	if (unlikely((*seg == qp->sq.qend)))
2346		*seg = mlx5_get_send_wqe(qp, 0);
2347	if (!li) {
2348		if (unlikely(wr->wr.fast_reg.page_list_len >
2349			     wr->wr.fast_reg.page_list->max_page_list_len))
2350			return	-ENOMEM;
2351
2352		set_frwr_pages(*seg, wr, mdev, pd, writ);
2353		*seg += sizeof(struct mlx5_wqe_data_seg);
2354		*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
2355	}
2356	return 0;
2357}
2358
2359static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
2360{
2361	__be32 *p = NULL;
2362	int tidx = idx;
2363	int i, j;
2364
2365	pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
2366	for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
2367		if ((i & 0xf) == 0) {
2368			void *buf = mlx5_get_send_wqe(qp, tidx);
2369			tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
2370			p = buf;
2371			j = 0;
2372		}
2373		pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
2374			 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
2375			 be32_to_cpu(p[j + 3]));
2376	}
2377}
2378
2379static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
2380			 unsigned bytecnt, struct mlx5_ib_qp *qp)
2381{
2382	while (bytecnt > 0) {
2383		__iowrite64_copy(dst++, src++, 8);
2384		__iowrite64_copy(dst++, src++, 8);
2385		__iowrite64_copy(dst++, src++, 8);
2386		__iowrite64_copy(dst++, src++, 8);
2387		__iowrite64_copy(dst++, src++, 8);
2388		__iowrite64_copy(dst++, src++, 8);
2389		__iowrite64_copy(dst++, src++, 8);
2390		__iowrite64_copy(dst++, src++, 8);
2391		bytecnt -= 64;
2392		if (unlikely(src == qp->sq.qend))
2393			src = mlx5_get_send_wqe(qp, 0);
2394	}
2395}
2396
2397static u8 get_fence(u8 fence, struct ib_send_wr *wr)
2398{
2399	if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
2400		     wr->send_flags & IB_SEND_FENCE))
2401		return MLX5_FENCE_MODE_STRONG_ORDERING;
2402
2403	if (unlikely(fence)) {
2404		if (wr->send_flags & IB_SEND_FENCE)
2405			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
2406		else
2407			return fence;
2408
2409	} else {
2410		return 0;
2411	}
2412}
2413
2414static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
2415		     struct mlx5_wqe_ctrl_seg **ctrl,
2416		     struct ib_send_wr *wr, unsigned *idx,
2417		     int *size, int nreq)
2418{
2419	int err = 0;
2420
2421	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
2422		mlx5_ib_warn(to_mdev(qp->ibqp.device), "work queue overflow\n");
2423		err = -ENOMEM;
2424		return err;
2425	}
2426
2427	*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
2428	*seg = mlx5_get_send_wqe(qp, *idx);
2429	*ctrl = *seg;
2430	*(u32 *)(*seg + 8) = 0;
2431	(*ctrl)->imm = send_ieth(wr);
2432	(*ctrl)->fm_ce_se = qp->sq_signal_bits |
2433		(wr->send_flags & IB_SEND_SIGNALED ?
2434		 MLX5_WQE_CTRL_CQ_UPDATE : 0) |
2435		(wr->send_flags & IB_SEND_SOLICITED ?
2436		 MLX5_WQE_CTRL_SOLICITED : 0);
2437
2438	*seg += sizeof(**ctrl);
2439	*size = sizeof(**ctrl) / 16;
2440
2441	return err;
2442}
2443
2444static void finish_wqe(struct mlx5_ib_qp *qp,
2445		       struct mlx5_wqe_ctrl_seg *ctrl,
2446		       u8 size, unsigned idx,
2447		       struct ib_send_wr *wr,
2448		       int nreq, u8 fence, u8 next_fence,
2449		       u32 mlx5_opcode)
2450{
2451	u8 opmod = 0;
2452
2453	ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
2454					     mlx5_opcode | ((u32)opmod << 24));
2455	ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
2456	ctrl->fm_ce_se |= fence;
2457	qp->fm_cache = next_fence;
2458	if (unlikely(qp->wq_sig))
2459		ctrl->signature = calc_wq_sig(ctrl);
2460
2461	qp->sq.swr_ctx[idx].wrid = wr->wr_id;
2462	qp->sq.swr_ctx[idx].w_list.opcode = mlx5_opcode;
2463	qp->sq.swr_ctx[idx].wqe_head = qp->sq.head + nreq;
2464	qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
2465	qp->sq.swr_ctx[idx].w_list.next = qp->sq.cur_post;
2466	qp->sq.swr_ctx[idx].sig_piped = 0;
2467}
2468
2469int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2470		      struct ib_send_wr **bad_wr)
2471{
2472	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
2473	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2474	struct mlx5_core_dev *mdev = dev->mdev;
2475	struct mlx5_ib_qp *qp = to_mqp(ibqp);
2476	struct mlx5_wqe_data_seg *dpseg;
2477	struct mlx5_wqe_xrc_seg *xrc;
2478	struct mlx5_bf *bf = qp->bf;
2479	int uninitialized_var(size);
2480	void *qend = qp->sq.qend;
2481	unsigned long flags;
2482	unsigned idx;
2483	int err = 0;
2484	int inl = 0;
2485	int num_sge;
2486	void *seg;
2487	int nreq;
2488	int i;
2489	u8 next_fence = 0;
2490	u8 fence;
2491
2492
2493	spin_lock_irqsave(&qp->sq.lock, flags);
2494
2495	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
2496		err = -EIO;
2497		*bad_wr = wr;
2498		nreq = 0;
2499		goto out;
2500	}
2501
2502	for (nreq = 0; wr; nreq++, wr = wr->next) {
2503		if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
2504			mlx5_ib_warn(dev, "Invalid opcode 0x%x\n", wr->opcode);
2505			err = -EINVAL;
2506			*bad_wr = wr;
2507			goto out;
2508		}
2509
2510		fence = qp->fm_cache;
2511		num_sge = wr->num_sge;
2512		if (unlikely(num_sge > qp->sq.max_gs)) {
2513			mlx5_ib_warn(dev, "Max gs exceeded %d (max = %d)\n", wr->num_sge, qp->sq.max_gs);
2514			err = -ENOMEM;
2515			*bad_wr = wr;
2516			goto out;
2517		}
2518
2519		err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
2520		if (err) {
2521			mlx5_ib_warn(dev, "Failed to prepare WQE\n");
2522			err = -ENOMEM;
2523			*bad_wr = wr;
2524			goto out;
2525		}
2526
2527		switch (ibqp->qp_type) {
2528		case IB_QPT_XRC_INI:
2529			xrc = seg;
2530			xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
2531			seg += sizeof(*xrc);
2532			size += sizeof(*xrc) / 16;
2533			/* fall through */
2534		case IB_QPT_RC:
2535			switch (wr->opcode) {
2536			case IB_WR_RDMA_READ:
2537			case IB_WR_RDMA_WRITE:
2538			case IB_WR_RDMA_WRITE_WITH_IMM:
2539				set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2540					      wr->wr.rdma.rkey);
2541				seg += sizeof(struct mlx5_wqe_raddr_seg);
2542				size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2543				break;
2544
2545			case IB_WR_ATOMIC_CMP_AND_SWP:
2546			case IB_WR_ATOMIC_FETCH_AND_ADD:
2547			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2548				mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
2549				err = -ENOSYS;
2550				*bad_wr = wr;
2551				goto out;
2552
2553			case IB_WR_LOCAL_INV:
2554				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2555				qp->sq.swr_ctx[idx].wr_data = IB_WR_LOCAL_INV;
2556				ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
2557				err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2558				if (err) {
2559					mlx5_ib_warn(dev, "Failed to prepare LOCAL_INV WQE\n");
2560					*bad_wr = wr;
2561					goto out;
2562				}
2563				num_sge = 0;
2564				break;
2565
2566			case IB_WR_FAST_REG_MR:
2567				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2568				qp->sq.swr_ctx[idx].wr_data = IB_WR_FAST_REG_MR;
2569				ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2570				err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2571				if (err) {
2572					mlx5_ib_warn(dev, "Failed to prepare FAST_REG_MR WQE\n");
2573					*bad_wr = wr;
2574					goto out;
2575				}
2576				num_sge = 0;
2577				break;
2578
2579			default:
2580				break;
2581			}
2582			break;
2583
2584		case IB_QPT_UC:
2585			switch (wr->opcode) {
2586			case IB_WR_RDMA_WRITE:
2587			case IB_WR_RDMA_WRITE_WITH_IMM:
2588				set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2589					      wr->wr.rdma.rkey);
2590				seg  += sizeof(struct mlx5_wqe_raddr_seg);
2591				size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2592				break;
2593
2594			default:
2595				break;
2596			}
2597			break;
2598
2599		case IB_QPT_SMI:
2600			if (!mlx5_core_is_pf(mdev)) {
2601				err = -EINVAL;
2602				mlx5_ib_warn(dev, "Only physical function is allowed to send SMP MADs\n");
2603				*bad_wr = wr;
2604				goto out;
2605			}
2606		case IB_QPT_GSI:
2607		case IB_QPT_UD:
2608			set_datagram_seg(seg, wr);
2609			seg += sizeof(struct mlx5_wqe_datagram_seg);
2610			size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
2611			if (unlikely((seg == qend)))
2612				seg = mlx5_get_send_wqe(qp, 0);
2613			break;
2614		default:
2615			break;
2616		}
2617
2618		if (wr->send_flags & IB_SEND_INLINE && num_sge) {
2619			int uninitialized_var(sz);
2620
2621			err = set_data_inl_seg(qp, wr, seg, &sz);
2622			if (unlikely(err)) {
2623				mlx5_ib_warn(dev, "Failed to prepare inline data segment\n");
2624				*bad_wr = wr;
2625				goto out;
2626			}
2627			inl = 1;
2628			size += sz;
2629		} else {
2630			dpseg = seg;
2631			for (i = 0; i < num_sge; i++) {
2632				if (unlikely(dpseg == qend)) {
2633					seg = mlx5_get_send_wqe(qp, 0);
2634					dpseg = seg;
2635				}
2636				if (likely(wr->sg_list[i].length)) {
2637					set_data_ptr_seg(dpseg, wr->sg_list + i);
2638					size += sizeof(struct mlx5_wqe_data_seg) / 16;
2639					dpseg++;
2640				}
2641			}
2642		}
2643
2644		finish_wqe(qp, ctrl, size, idx, wr, nreq,
2645			   get_fence(fence, wr), next_fence,
2646			   mlx5_ib_opcode[wr->opcode]);
2647		if (0)
2648			dump_wqe(qp, idx, size);
2649	}
2650
2651out:
2652	if (likely(nreq)) {
2653		qp->sq.head += nreq;
2654
2655		/* Make sure that descriptors are written before
2656		 * updating doorbell record and ringing the doorbell
2657		 */
2658		wmb();
2659
2660		qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
2661
2662		/* Make sure doorbell record is visible to the HCA before
2663		 * we hit doorbell */
2664		wmb();
2665
2666		if (bf->need_lock)
2667			spin_lock(&bf->lock);
2668		else
2669			__acquire(&bf->lock);
2670
2671		/* TBD enable WC */
2672		if (BF_ENABLE && nreq == 1 && bf->uuarn && inl && size > 1 &&
2673		    size <= bf->buf_size / 16) {
2674			mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
2675			/* wc_wmb(); */
2676		} else {
2677			mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
2678				     MLX5_GET_DOORBELL_LOCK(&bf->lock32));
2679			/* Make sure doorbells don't leak out of SQ spinlock
2680			 * and reach the HCA out of order.
2681			 */
2682			mmiowb();
2683		}
2684		bf->offset ^= bf->buf_size;
2685		if (bf->need_lock)
2686			spin_unlock(&bf->lock);
2687		else
2688			__release(&bf->lock);
2689	}
2690
2691	spin_unlock_irqrestore(&qp->sq.lock, flags);
2692
2693	return err;
2694}
2695
2696static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
2697{
2698	sig->signature = calc_sig(sig, size);
2699}
2700
2701int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2702		      struct ib_recv_wr **bad_wr)
2703{
2704	struct mlx5_ib_qp *qp = to_mqp(ibqp);
2705	struct mlx5_wqe_data_seg *scat;
2706	struct mlx5_rwqe_sig *sig;
2707	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2708	struct mlx5_core_dev *mdev = dev->mdev;
2709	unsigned long flags;
2710	int err = 0;
2711	int nreq;
2712	int ind;
2713	int i;
2714
2715	spin_lock_irqsave(&qp->rq.lock, flags);
2716
2717	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
2718		err = -EIO;
2719		*bad_wr = wr;
2720		nreq = 0;
2721		goto out;
2722	}
2723
2724	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2725
2726	for (nreq = 0; wr; nreq++, wr = wr->next) {
2727		if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2728			err = -ENOMEM;
2729			*bad_wr = wr;
2730			goto out;
2731		}
2732
2733		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2734			err = -EINVAL;
2735			*bad_wr = wr;
2736			goto out;
2737		}
2738
2739		scat = get_recv_wqe(qp, ind);
2740		if (qp->wq_sig)
2741			scat++;
2742
2743		for (i = 0; i < wr->num_sge; i++)
2744			set_data_ptr_seg(scat + i, wr->sg_list + i);
2745
2746		if (i < qp->rq.max_gs) {
2747			scat[i].byte_count = 0;
2748			scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
2749			scat[i].addr       = 0;
2750		}
2751
2752		if (qp->wq_sig) {
2753			sig = (struct mlx5_rwqe_sig *)scat;
2754			set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
2755		}
2756
2757		qp->rq.rwr_ctx[ind].wrid = wr->wr_id;
2758
2759		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
2760	}
2761
2762out:
2763	if (likely(nreq)) {
2764		qp->rq.head += nreq;
2765
2766		/* Make sure that descriptors are written before
2767		 * doorbell record.
2768		 */
2769		wmb();
2770
2771		*qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
2772	}
2773
2774	spin_unlock_irqrestore(&qp->rq.lock, flags);
2775
2776	return err;
2777}
2778
2779static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
2780{
2781	switch (mlx5_state) {
2782	case MLX5_QP_STATE_RST:      return IB_QPS_RESET;
2783	case MLX5_QP_STATE_INIT:     return IB_QPS_INIT;
2784	case MLX5_QP_STATE_RTR:      return IB_QPS_RTR;
2785	case MLX5_QP_STATE_RTS:      return IB_QPS_RTS;
2786	case MLX5_QP_STATE_SQ_DRAINING:
2787	case MLX5_QP_STATE_SQD:      return IB_QPS_SQD;
2788	case MLX5_QP_STATE_SQER:     return IB_QPS_SQE;
2789	case MLX5_QP_STATE_ERR:      return IB_QPS_ERR;
2790	default:		     return -1;
2791	}
2792}
2793
2794static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
2795{
2796	switch (mlx5_mig_state) {
2797	case MLX5_QP_PM_ARMED:		return IB_MIG_ARMED;
2798	case MLX5_QP_PM_REARM:		return IB_MIG_REARM;
2799	case MLX5_QP_PM_MIGRATED:	return IB_MIG_MIGRATED;
2800	default: return -1;
2801	}
2802}
2803
2804static int to_ib_qp_access_flags(int mlx5_flags)
2805{
2806	int ib_flags = 0;
2807
2808	if (mlx5_flags & MLX5_QP_BIT_RRE)
2809		ib_flags |= IB_ACCESS_REMOTE_READ;
2810	if (mlx5_flags & MLX5_QP_BIT_RWE)
2811		ib_flags |= IB_ACCESS_REMOTE_WRITE;
2812	if (mlx5_flags & MLX5_QP_BIT_RAE)
2813		ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
2814
2815	return ib_flags;
2816}
2817
2818static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
2819				struct mlx5_qp_path *path)
2820{
2821	struct mlx5_core_dev *dev = ibdev->mdev;
2822
2823	memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
2824	ib_ah_attr->port_num	  = path->port;
2825
2826	if (ib_ah_attr->port_num == 0 ||
2827	    ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
2828		return;
2829
2830	ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
2831
2832	ib_ah_attr->dlid	  = be16_to_cpu(path->rlid);
2833	ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
2834	ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
2835	ib_ah_attr->ah_flags      = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
2836	if (ib_ah_attr->ah_flags) {
2837		ib_ah_attr->grh.sgid_index = path->mgid_index;
2838		ib_ah_attr->grh.hop_limit  = path->hop_limit;
2839		ib_ah_attr->grh.traffic_class =
2840			(be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
2841		ib_ah_attr->grh.flow_label =
2842			be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
2843		memcpy(ib_ah_attr->grh.dgid.raw,
2844		       path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
2845	}
2846}
2847
2848int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
2849		     struct ib_qp_init_attr *qp_init_attr)
2850{
2851	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2852	struct mlx5_ib_qp *qp = to_mqp(ibqp);
2853	struct mlx5_query_qp_mbox_out *outb;
2854	struct mlx5_qp_context *context;
2855	int mlx5_state;
2856	int err = 0;
2857
2858	mutex_lock(&qp->mutex);
2859	if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
2860		err = -EOPNOTSUPP;
2861		goto out;
2862	} else {
2863		outb = kzalloc(sizeof(*outb), GFP_KERNEL);
2864		if (!outb) {
2865			err = -ENOMEM;
2866			goto out;
2867		}
2868
2869		context = &outb->ctx;
2870		err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb,
2871					 sizeof(*outb));
2872		if (err) {
2873			kfree(outb);
2874			goto out;
2875		}
2876
2877		mlx5_state = be32_to_cpu(context->flags) >> 28;
2878
2879		qp->state		     = to_ib_qp_state(mlx5_state);
2880		qp_attr->path_mtu	     = context->mtu_msgmax >> 5;
2881		qp_attr->path_mig_state	     =
2882			to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
2883		qp_attr->qkey		     = be32_to_cpu(context->qkey);
2884		qp_attr->rq_psn		     = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
2885		qp_attr->sq_psn		     = be32_to_cpu(context->next_send_psn) & 0xffffff;
2886		qp_attr->dest_qp_num	     = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
2887		qp_attr->qp_access_flags     =
2888			to_ib_qp_access_flags(be32_to_cpu(context->params2));
2889
2890		if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
2891			to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
2892			to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
2893				qp_attr->alt_pkey_index = be16_to_cpu(context->alt_path.pkey_index);
2894			qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
2895		}
2896
2897		qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
2898		qp_attr->port_num = context->pri_path.port;
2899
2900		/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
2901		qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
2902
2903		qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
2904
2905		qp_attr->max_dest_rd_atomic =
2906			1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
2907		qp_attr->min_rnr_timer	    =
2908			(be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
2909		qp_attr->timeout	    = context->pri_path.ackto_lt >> 3;
2910		qp_attr->retry_cnt	    = (be32_to_cpu(context->params1) >> 16) & 0x7;
2911		qp_attr->rnr_retry	    = (be32_to_cpu(context->params1) >> 13) & 0x7;
2912		qp_attr->alt_timeout	    = context->alt_path.ackto_lt >> 3;
2913
2914
2915		kfree(outb);
2916	}
2917
2918	qp_attr->qp_state	     = qp->state;
2919	qp_attr->cur_qp_state	     = qp_attr->qp_state;
2920	qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
2921	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
2922
2923	if (!ibqp->uobject) {
2924		qp_attr->cap.max_send_wr  = qp->sq.max_post;
2925		qp_attr->cap.max_send_sge = qp->sq.max_gs;
2926		qp_init_attr->qp_context = ibqp->qp_context;
2927	} else {
2928		qp_attr->cap.max_send_wr  = 0;
2929		qp_attr->cap.max_send_sge = 0;
2930	}
2931
2932	qp_init_attr->qp_type = ibqp->qp_type;
2933	qp_init_attr->recv_cq = ibqp->recv_cq;
2934	qp_init_attr->send_cq = ibqp->send_cq;
2935	qp_init_attr->srq = ibqp->srq;
2936	qp_attr->cap.max_inline_data = qp->max_inline_data;
2937
2938	qp_init_attr->cap	     = qp_attr->cap;
2939
2940	qp_init_attr->create_flags = 0;
2941	if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
2942		qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
2943
2944	qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
2945		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2946
2947out:
2948	mutex_unlock(&qp->mutex);
2949	return err;
2950}
2951
2952struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
2953					  struct ib_ucontext *context,
2954					  struct ib_udata *udata)
2955{
2956	struct mlx5_ib_dev *dev = to_mdev(ibdev);
2957	struct mlx5_ib_xrcd *xrcd;
2958	int err;
2959
2960	if (!MLX5_CAP_GEN(dev->mdev, xrc))
2961		return ERR_PTR(-ENOSYS);
2962
2963	xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
2964	if (!xrcd)
2965		return ERR_PTR(-ENOMEM);
2966
2967	err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
2968	if (err) {
2969		kfree(xrcd);
2970		return ERR_PTR(-ENOMEM);
2971	}
2972
2973	return &xrcd->ibxrcd;
2974}
2975
2976int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
2977{
2978	struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
2979	u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
2980	int err;
2981
2982	err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
2983	if (err) {
2984		mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
2985		return err;
2986	}
2987
2988	kfree(xrcd);
2989
2990	return 0;
2991}
2992