1// SPDX-License-Identifier: GPL-2.0
2
3/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4/*          Kai Shen <kaishen@linux.alibaba.com> */
5/* Copyright (c) 2020-2022, Alibaba Group. */
6
7/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8/* Copyright (c) 2008-2019, IBM Corporation */
9
10/* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11
12#include <linux/vmalloc.h>
13#include <net/addrconf.h>
14#include <rdma/erdma-abi.h>
15#include <rdma/ib_umem.h>
16#include <rdma/uverbs_ioctl.h>
17
18#include "erdma.h"
19#include "erdma_cm.h"
20#include "erdma_verbs.h"
21
22static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
23				      u64 *addr0, u64 *addr1)
24{
25	struct erdma_mtt *mtt = mem->mtt;
26
27	if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
28		*addr0 = mtt->buf_dma;
29		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
30				   ERDMA_MR_MTT_1LEVEL);
31	} else {
32		*addr0 = mtt->buf[0];
33		memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
34		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
35				   ERDMA_MR_MTT_0LEVEL);
36	}
37}
38
39static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
40{
41	struct erdma_dev *dev = to_edev(qp->ibqp.device);
42	struct erdma_pd *pd = to_epd(qp->ibqp.pd);
43	struct erdma_cmdq_create_qp_req req;
44	struct erdma_uqp *user_qp;
45	u64 resp0, resp1;
46	int err;
47
48	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
49				CMDQ_OPCODE_CREATE_QP);
50
51	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
52			      ilog2(qp->attrs.sq_size)) |
53		   FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
54	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
55			      ilog2(qp->attrs.rq_size)) |
56		   FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
57
58	if (rdma_is_kernel_res(&qp->ibqp.res)) {
59		u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
60
61		req.sq_cqn_mtt_cfg =
62			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
63				   pgsz_range) |
64			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
65		req.rq_cqn_mtt_cfg =
66			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
67				   pgsz_range) |
68			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
69
70		req.sq_mtt_cfg =
71			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
72			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
73			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
74				   ERDMA_MR_MTT_0LEVEL);
75		req.rq_mtt_cfg = req.sq_mtt_cfg;
76
77		req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
78		req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
79		req.sq_dbrec_dma = qp->kern_qp.sq_dbrec_dma;
80		req.rq_dbrec_dma = qp->kern_qp.rq_dbrec_dma;
81	} else {
82		user_qp = &qp->user_qp;
83		req.sq_cqn_mtt_cfg = FIELD_PREP(
84			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
85			ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
86		req.sq_cqn_mtt_cfg |=
87			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
88
89		req.rq_cqn_mtt_cfg = FIELD_PREP(
90			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
91			ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
92		req.rq_cqn_mtt_cfg |=
93			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
94
95		req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
96		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
97					     user_qp->sq_mem.mtt_nents);
98
99		req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
100		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
101					     user_qp->rq_mem.mtt_nents);
102
103		assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
104					  &req.sq_buf_addr, req.sq_mtt_entry);
105		assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
106					  &req.rq_buf_addr, req.rq_mtt_entry);
107
108		req.sq_dbrec_dma = user_qp->sq_dbrec_dma;
109		req.rq_dbrec_dma = user_qp->rq_dbrec_dma;
110
111		if (uctx->ext_db.enable) {
112			req.sq_cqn_mtt_cfg |=
113				FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1);
114			req.db_cfg =
115				FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK,
116					   uctx->ext_db.sdb_off) |
117				FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK,
118					   uctx->ext_db.rdb_off);
119		}
120	}
121
122	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
123				  &resp1);
124	if (!err)
125		qp->attrs.cookie =
126			FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
127
128	return err;
129}
130
131static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
132{
133	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
134	u32 mtt_level = ERDMA_MR_MTT_0LEVEL;
135	struct erdma_cmdq_reg_mr_req req;
136
137	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
138
139	if (mr->type == ERDMA_MR_TYPE_FRMR ||
140	    mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
141		if (mr->mem.mtt->continuous) {
142			req.phy_addr[0] = mr->mem.mtt->buf_dma;
143			mtt_level = ERDMA_MR_MTT_1LEVEL;
144		} else {
145			req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist);
146			mtt_level = mr->mem.mtt->level;
147		}
148	} else if (mr->type != ERDMA_MR_TYPE_DMA) {
149		memcpy(req.phy_addr, mr->mem.mtt->buf,
150		       MTT_SIZE(mr->mem.page_cnt));
151	}
152
153	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
154		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
155		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
156	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
157		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
158		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
159	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
160			      ilog2(mr->mem.page_size)) |
161		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
162		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
163
164	if (mr->type == ERDMA_MR_TYPE_DMA)
165		goto post_cmd;
166
167	if (mr->type == ERDMA_MR_TYPE_NORMAL) {
168		req.start_va = mr->mem.va;
169		req.size = mr->mem.len;
170	}
171
172	if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) {
173		req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1);
174		req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK,
175				       PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT);
176		req.size_h = upper_32_bits(mr->mem.len);
177		req.mtt_cnt_h = mr->mem.page_cnt >> 20;
178	}
179
180post_cmd:
181	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
182}
183
184static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
185{
186	struct erdma_dev *dev = to_edev(cq->ibcq.device);
187	struct erdma_cmdq_create_cq_req req;
188	struct erdma_mem *mem;
189	u32 page_size;
190
191	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
192				CMDQ_OPCODE_CREATE_CQ);
193
194	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
195		   FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
196	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
197
198	if (rdma_is_kernel_res(&cq->ibcq.res)) {
199		page_size = SZ_32M;
200		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
201				       ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
202		req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
203		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
204
205		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
206			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
207				       ERDMA_MR_MTT_0LEVEL);
208
209		req.first_page_offset = 0;
210		req.cq_dbrec_dma = cq->kern_cq.dbrec_dma;
211	} else {
212		mem = &cq->user_cq.qbuf_mem;
213		req.cfg0 |=
214			FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
215				   ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
216		if (mem->mtt_nents == 1) {
217			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
218			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
219			req.cfg1 |=
220				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
221					   ERDMA_MR_MTT_0LEVEL);
222		} else {
223			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
224			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
225			req.cfg1 |=
226				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
227					   ERDMA_MR_MTT_1LEVEL);
228		}
229		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
230				       mem->mtt_nents);
231
232		req.first_page_offset = mem->page_offset;
233		req.cq_dbrec_dma = cq->user_cq.dbrec_dma;
234
235		if (uctx->ext_db.enable) {
236			req.cfg1 |= FIELD_PREP(
237				ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
238			req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
239					      uctx->ext_db.cdb_off);
240		}
241	}
242
243	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
244}
245
246static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
247{
248	int idx;
249	unsigned long flags;
250
251	spin_lock_irqsave(&res_cb->lock, flags);
252	idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
253				 res_cb->next_alloc_idx);
254	if (idx == res_cb->max_cap) {
255		idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
256		if (idx == res_cb->max_cap) {
257			res_cb->next_alloc_idx = 1;
258			spin_unlock_irqrestore(&res_cb->lock, flags);
259			return -ENOSPC;
260		}
261	}
262
263	set_bit(idx, res_cb->bitmap);
264	res_cb->next_alloc_idx = idx + 1;
265	spin_unlock_irqrestore(&res_cb->lock, flags);
266
267	return idx;
268}
269
270static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
271{
272	unsigned long flags;
273	u32 used;
274
275	spin_lock_irqsave(&res_cb->lock, flags);
276	used = __test_and_clear_bit(idx, res_cb->bitmap);
277	spin_unlock_irqrestore(&res_cb->lock, flags);
278	WARN_ON(!used);
279}
280
281static struct rdma_user_mmap_entry *
282erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
283			     u32 size, u8 mmap_flag, u64 *mmap_offset)
284{
285	struct erdma_user_mmap_entry *entry =
286		kzalloc(sizeof(*entry), GFP_KERNEL);
287	int ret;
288
289	if (!entry)
290		return NULL;
291
292	entry->address = (u64)address;
293	entry->mmap_flag = mmap_flag;
294
295	size = PAGE_ALIGN(size);
296
297	ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
298					  size);
299	if (ret) {
300		kfree(entry);
301		return NULL;
302	}
303
304	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
305
306	return &entry->rdma_entry;
307}
308
309int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
310		       struct ib_udata *unused)
311{
312	struct erdma_dev *dev = to_edev(ibdev);
313
314	memset(attr, 0, sizeof(*attr));
315
316	attr->max_mr_size = dev->attrs.max_mr_size;
317	attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
318	attr->vendor_part_id = dev->pdev->device;
319	attr->hw_ver = dev->pdev->revision;
320	attr->max_qp = dev->attrs.max_qp - 1;
321	attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
322	attr->max_qp_rd_atom = dev->attrs.max_ord;
323	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
324	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
325	attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
326	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
327	ibdev->local_dma_lkey = dev->attrs.local_dma_key;
328	attr->max_send_sge = dev->attrs.max_send_sge;
329	attr->max_recv_sge = dev->attrs.max_recv_sge;
330	attr->max_sge_rd = dev->attrs.max_sge_rd;
331	attr->max_cq = dev->attrs.max_cq - 1;
332	attr->max_cqe = dev->attrs.max_cqe;
333	attr->max_mr = dev->attrs.max_mr;
334	attr->max_pd = dev->attrs.max_pd;
335	attr->max_mw = dev->attrs.max_mw;
336	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
337	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
338
339	if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
340		attr->atomic_cap = IB_ATOMIC_GLOB;
341
342	attr->fw_ver = dev->attrs.fw_version;
343
344	if (dev->netdev)
345		addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
346				    dev->netdev->dev_addr);
347
348	return 0;
349}
350
351int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
352		    union ib_gid *gid)
353{
354	struct erdma_dev *dev = to_edev(ibdev);
355
356	memset(gid, 0, sizeof(*gid));
357	ether_addr_copy(gid->raw, dev->attrs.peer_addr);
358
359	return 0;
360}
361
362int erdma_query_port(struct ib_device *ibdev, u32 port,
363		     struct ib_port_attr *attr)
364{
365	struct erdma_dev *dev = to_edev(ibdev);
366	struct net_device *ndev = dev->netdev;
367
368	memset(attr, 0, sizeof(*attr));
369
370	attr->gid_tbl_len = 1;
371	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
372	attr->max_msg_sz = -1;
373
374	if (!ndev)
375		goto out;
376
377	ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
378	attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
379	attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
380	if (netif_running(ndev) && netif_carrier_ok(ndev))
381		dev->state = IB_PORT_ACTIVE;
382	else
383		dev->state = IB_PORT_DOWN;
384	attr->state = dev->state;
385
386out:
387	if (dev->state == IB_PORT_ACTIVE)
388		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
389	else
390		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
391
392	return 0;
393}
394
395int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
396			     struct ib_port_immutable *port_immutable)
397{
398	port_immutable->gid_tbl_len = 1;
399	port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
400
401	return 0;
402}
403
404int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
405{
406	struct erdma_pd *pd = to_epd(ibpd);
407	struct erdma_dev *dev = to_edev(ibpd->device);
408	int pdn;
409
410	pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
411	if (pdn < 0)
412		return pdn;
413
414	pd->pdn = pdn;
415
416	return 0;
417}
418
419int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
420{
421	struct erdma_pd *pd = to_epd(ibpd);
422	struct erdma_dev *dev = to_edev(ibpd->device);
423
424	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
425
426	return 0;
427}
428
429static void erdma_flush_worker(struct work_struct *work)
430{
431	struct delayed_work *dwork = to_delayed_work(work);
432	struct erdma_qp *qp =
433		container_of(dwork, struct erdma_qp, reflush_dwork);
434	struct erdma_cmdq_reflush_req req;
435
436	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
437				CMDQ_OPCODE_REFLUSH);
438	req.qpn = QP_ID(qp);
439	req.sq_pi = qp->kern_qp.sq_pi;
440	req.rq_pi = qp->kern_qp.rq_pi;
441	erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
442}
443
444static int erdma_qp_validate_cap(struct erdma_dev *dev,
445				 struct ib_qp_init_attr *attrs)
446{
447	if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
448	    (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
449	    (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
450	    (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
451	    (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
452	    !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
453		return -EINVAL;
454	}
455
456	return 0;
457}
458
459static int erdma_qp_validate_attr(struct erdma_dev *dev,
460				  struct ib_qp_init_attr *attrs)
461{
462	if (attrs->qp_type != IB_QPT_RC)
463		return -EOPNOTSUPP;
464
465	if (attrs->srq)
466		return -EOPNOTSUPP;
467
468	if (!attrs->send_cq || !attrs->recv_cq)
469		return -EOPNOTSUPP;
470
471	return 0;
472}
473
474static void free_kernel_qp(struct erdma_qp *qp)
475{
476	struct erdma_dev *dev = qp->dev;
477
478	vfree(qp->kern_qp.swr_tbl);
479	vfree(qp->kern_qp.rwr_tbl);
480
481	if (qp->kern_qp.sq_buf)
482		dma_free_coherent(&dev->pdev->dev,
483				  qp->attrs.sq_size << SQEBB_SHIFT,
484				  qp->kern_qp.sq_buf,
485				  qp->kern_qp.sq_buf_dma_addr);
486
487	if (qp->kern_qp.sq_dbrec)
488		dma_pool_free(dev->db_pool, qp->kern_qp.sq_dbrec,
489			      qp->kern_qp.sq_dbrec_dma);
490
491	if (qp->kern_qp.rq_buf)
492		dma_free_coherent(&dev->pdev->dev,
493				  qp->attrs.rq_size << RQE_SHIFT,
494				  qp->kern_qp.rq_buf,
495				  qp->kern_qp.rq_buf_dma_addr);
496
497	if (qp->kern_qp.rq_dbrec)
498		dma_pool_free(dev->db_pool, qp->kern_qp.rq_dbrec,
499			      qp->kern_qp.rq_dbrec_dma);
500}
501
502static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
503			  struct ib_qp_init_attr *attrs)
504{
505	struct erdma_kqp *kqp = &qp->kern_qp;
506	int size;
507
508	if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
509		kqp->sig_all = 1;
510
511	kqp->sq_pi = 0;
512	kqp->sq_ci = 0;
513	kqp->rq_pi = 0;
514	kqp->rq_ci = 0;
515	kqp->hw_sq_db =
516		dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
517	kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
518
519	kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64));
520	kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64));
521	if (!kqp->swr_tbl || !kqp->rwr_tbl)
522		goto err_out;
523
524	size = qp->attrs.sq_size << SQEBB_SHIFT;
525	kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
526					 &kqp->sq_buf_dma_addr, GFP_KERNEL);
527	if (!kqp->sq_buf)
528		goto err_out;
529
530	kqp->sq_dbrec =
531		dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->sq_dbrec_dma);
532	if (!kqp->sq_dbrec)
533		goto err_out;
534
535	size = qp->attrs.rq_size << RQE_SHIFT;
536	kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
537					 &kqp->rq_buf_dma_addr, GFP_KERNEL);
538	if (!kqp->rq_buf)
539		goto err_out;
540
541	kqp->rq_dbrec =
542		dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->rq_dbrec_dma);
543	if (!kqp->rq_dbrec)
544		goto err_out;
545
546	return 0;
547
548err_out:
549	free_kernel_qp(qp);
550	return -ENOMEM;
551}
552
553static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
554{
555	struct erdma_mtt *mtt = mem->mtt;
556	struct ib_block_iter biter;
557	u32 idx = 0;
558
559	while (mtt->low_level)
560		mtt = mtt->low_level;
561
562	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
563		mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
564}
565
566static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
567					       size_t size)
568{
569	struct erdma_mtt *mtt;
570
571	mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
572	if (!mtt)
573		return ERR_PTR(-ENOMEM);
574
575	mtt->size = size;
576	mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
577	if (!mtt->buf)
578		goto err_free_mtt;
579
580	mtt->continuous = true;
581	mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
582				      DMA_TO_DEVICE);
583	if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
584		goto err_free_mtt_buf;
585
586	return mtt;
587
588err_free_mtt_buf:
589	kfree(mtt->buf);
590
591err_free_mtt:
592	kfree(mtt);
593
594	return ERR_PTR(-ENOMEM);
595}
596
597static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
598				     struct erdma_mtt *mtt)
599{
600	dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE);
601	vfree(mtt->sglist);
602}
603
604static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
605				      struct erdma_mtt *mtt)
606{
607	erdma_destroy_mtt_buf_sg(dev, mtt);
608	vfree(mtt->buf);
609	kfree(mtt);
610}
611
612static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
613				  struct erdma_mtt *low_mtt)
614{
615	struct scatterlist *sg;
616	u32 idx = 0, i;
617
618	for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i)
619		mtt->buf[idx++] = sg_dma_address(sg);
620}
621
622static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt)
623{
624	struct scatterlist *sglist;
625	void *buf = mtt->buf;
626	u32 npages, i, nsg;
627	struct page *pg;
628
629	/* Failed if buf is not page aligned */
630	if ((uintptr_t)buf & ~PAGE_MASK)
631		return -EINVAL;
632
633	npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE);
634	sglist = vzalloc(npages * sizeof(*sglist));
635	if (!sglist)
636		return -ENOMEM;
637
638	sg_init_table(sglist, npages);
639	for (i = 0; i < npages; i++) {
640		pg = vmalloc_to_page(buf);
641		if (!pg)
642			goto err;
643		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
644		buf += PAGE_SIZE;
645	}
646
647	nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE);
648	if (!nsg)
649		goto err;
650
651	mtt->sglist = sglist;
652	mtt->nsg = nsg;
653
654	return 0;
655err:
656	vfree(sglist);
657
658	return -ENOMEM;
659}
660
661static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
662						  size_t size)
663{
664	struct erdma_mtt *mtt;
665	int ret = -ENOMEM;
666
667	mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
668	if (!mtt)
669		return ERR_PTR(-ENOMEM);
670
671	mtt->size = ALIGN(size, PAGE_SIZE);
672	mtt->buf = vzalloc(mtt->size);
673	mtt->continuous = false;
674	if (!mtt->buf)
675		goto err_free_mtt;
676
677	ret = erdma_create_mtt_buf_sg(dev, mtt);
678	if (ret)
679		goto err_free_mtt_buf;
680
681	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n",
682		  mtt->size, mtt->nsg);
683
684	return mtt;
685
686err_free_mtt_buf:
687	vfree(mtt->buf);
688
689err_free_mtt:
690	kfree(mtt);
691
692	return ERR_PTR(ret);
693}
694
695static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
696					  bool force_continuous)
697{
698	struct erdma_mtt *mtt, *tmp_mtt;
699	int ret, level = 0;
700
701	ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
702		  force_continuous);
703
704	if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA))
705		force_continuous = true;
706
707	if (force_continuous)
708		return erdma_create_cont_mtt(dev, size);
709
710	mtt = erdma_create_scatter_mtt(dev, size);
711	if (IS_ERR(mtt))
712		return mtt;
713	level = 1;
714
715	/* convergence the mtt table. */
716	while (mtt->nsg != 1 && level <= 3) {
717		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg));
718		if (IS_ERR(tmp_mtt)) {
719			ret = PTR_ERR(tmp_mtt);
720			goto err_free_mtt;
721		}
722		erdma_init_middle_mtt(tmp_mtt, mtt);
723		tmp_mtt->low_level = mtt;
724		mtt = tmp_mtt;
725		level++;
726	}
727
728	if (level > 3) {
729		ret = -ENOMEM;
730		goto err_free_mtt;
731	}
732
733	mtt->level = level;
734	ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
735		  mtt->level, mtt->sglist[0].dma_address);
736
737	return mtt;
738err_free_mtt:
739	while (mtt) {
740		tmp_mtt = mtt->low_level;
741		erdma_destroy_scatter_mtt(dev, mtt);
742		mtt = tmp_mtt;
743	}
744
745	return ERR_PTR(ret);
746}
747
748static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
749{
750	struct erdma_mtt *tmp_mtt;
751
752	if (mtt->continuous) {
753		dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
754				 DMA_TO_DEVICE);
755		kfree(mtt->buf);
756		kfree(mtt);
757	} else {
758		while (mtt) {
759			tmp_mtt = mtt->low_level;
760			erdma_destroy_scatter_mtt(dev, mtt);
761			mtt = tmp_mtt;
762		}
763	}
764}
765
766static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
767			   u64 start, u64 len, int access, u64 virt,
768			   unsigned long req_page_size, bool force_continuous)
769{
770	int ret = 0;
771
772	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
773	if (IS_ERR(mem->umem)) {
774		ret = PTR_ERR(mem->umem);
775		mem->umem = NULL;
776		return ret;
777	}
778
779	mem->va = virt;
780	mem->len = len;
781	mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
782	mem->page_offset = start & (mem->page_size - 1);
783	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
784	mem->page_cnt = mem->mtt_nents;
785	mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt),
786				    force_continuous);
787	if (IS_ERR(mem->mtt)) {
788		ret = PTR_ERR(mem->mtt);
789		goto error_ret;
790	}
791
792	erdma_fill_bottom_mtt(dev, mem);
793
794	return 0;
795
796error_ret:
797	if (mem->umem) {
798		ib_umem_release(mem->umem);
799		mem->umem = NULL;
800	}
801
802	return ret;
803}
804
805static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
806{
807	if (mem->mtt)
808		erdma_destroy_mtt(dev, mem->mtt);
809
810	if (mem->umem) {
811		ib_umem_release(mem->umem);
812		mem->umem = NULL;
813	}
814}
815
816static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
817				    u64 dbrecords_va,
818				    struct erdma_user_dbrecords_page **dbr_page,
819				    dma_addr_t *dma_addr)
820{
821	struct erdma_user_dbrecords_page *page = NULL;
822	int rv = 0;
823
824	mutex_lock(&ctx->dbrecords_page_mutex);
825
826	list_for_each_entry(page, &ctx->dbrecords_page_list, list)
827		if (page->va == (dbrecords_va & PAGE_MASK))
828			goto found;
829
830	page = kmalloc(sizeof(*page), GFP_KERNEL);
831	if (!page) {
832		rv = -ENOMEM;
833		goto out;
834	}
835
836	page->va = (dbrecords_va & PAGE_MASK);
837	page->refcnt = 0;
838
839	page->umem = ib_umem_get(ctx->ibucontext.device,
840				 dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
841	if (IS_ERR(page->umem)) {
842		rv = PTR_ERR(page->umem);
843		kfree(page);
844		goto out;
845	}
846
847	list_add(&page->list, &ctx->dbrecords_page_list);
848
849found:
850	*dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
851		    (dbrecords_va & ~PAGE_MASK);
852	*dbr_page = page;
853	page->refcnt++;
854
855out:
856	mutex_unlock(&ctx->dbrecords_page_mutex);
857	return rv;
858}
859
860static void
861erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
862			   struct erdma_user_dbrecords_page **dbr_page)
863{
864	if (!ctx || !(*dbr_page))
865		return;
866
867	mutex_lock(&ctx->dbrecords_page_mutex);
868	if (--(*dbr_page)->refcnt == 0) {
869		list_del(&(*dbr_page)->list);
870		ib_umem_release((*dbr_page)->umem);
871		kfree(*dbr_page);
872	}
873
874	*dbr_page = NULL;
875	mutex_unlock(&ctx->dbrecords_page_mutex);
876}
877
878static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
879			u64 va, u32 len, u64 dbrec_va)
880{
881	dma_addr_t dbrec_dma;
882	u32 rq_offset;
883	int ret;
884
885	if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
886		   qp->attrs.rq_size * RQE_SIZE))
887		return -EINVAL;
888
889	ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va,
890			      qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
891			      (SZ_1M - SZ_4K), true);
892	if (ret)
893		return ret;
894
895	rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
896	qp->user_qp.rq_offset = rq_offset;
897
898	ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset,
899			      qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
900			      (SZ_1M - SZ_4K), true);
901	if (ret)
902		goto put_sq_mtt;
903
904	ret = erdma_map_user_dbrecords(uctx, dbrec_va,
905				       &qp->user_qp.user_dbr_page,
906				       &dbrec_dma);
907	if (ret)
908		goto put_rq_mtt;
909
910	qp->user_qp.sq_dbrec_dma = dbrec_dma;
911	qp->user_qp.rq_dbrec_dma = dbrec_dma + ERDMA_DB_SIZE;
912
913	return 0;
914
915put_rq_mtt:
916	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
917
918put_sq_mtt:
919	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
920
921	return ret;
922}
923
924static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
925{
926	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
927	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
928	erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
929}
930
931int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
932		    struct ib_udata *udata)
933{
934	struct erdma_qp *qp = to_eqp(ibqp);
935	struct erdma_dev *dev = to_edev(ibqp->device);
936	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
937		udata, struct erdma_ucontext, ibucontext);
938	struct erdma_ureq_create_qp ureq;
939	struct erdma_uresp_create_qp uresp;
940	int ret;
941
942	ret = erdma_qp_validate_cap(dev, attrs);
943	if (ret)
944		goto err_out;
945
946	ret = erdma_qp_validate_attr(dev, attrs);
947	if (ret)
948		goto err_out;
949
950	qp->scq = to_ecq(attrs->send_cq);
951	qp->rcq = to_ecq(attrs->recv_cq);
952	qp->dev = dev;
953	qp->attrs.cc = dev->attrs.cc;
954
955	init_rwsem(&qp->state_lock);
956	kref_init(&qp->ref);
957	init_completion(&qp->safe_free);
958
959	ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
960			      XA_LIMIT(1, dev->attrs.max_qp - 1),
961			      &dev->next_alloc_qpn, GFP_KERNEL);
962	if (ret < 0) {
963		ret = -ENOMEM;
964		goto err_out;
965	}
966
967	qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
968					       ERDMA_MAX_WQEBB_PER_SQE);
969	qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
970
971	if (uctx) {
972		ret = ib_copy_from_udata(&ureq, udata,
973					 min(sizeof(ureq), udata->inlen));
974		if (ret)
975			goto err_out_xa;
976
977		ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
978				   ureq.db_record_va);
979		if (ret)
980			goto err_out_xa;
981
982		memset(&uresp, 0, sizeof(uresp));
983
984		uresp.num_sqe = qp->attrs.sq_size;
985		uresp.num_rqe = qp->attrs.rq_size;
986		uresp.qp_id = QP_ID(qp);
987		uresp.rq_offset = qp->user_qp.rq_offset;
988
989		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
990		if (ret)
991			goto err_out_cmd;
992	} else {
993		init_kernel_qp(dev, qp, attrs);
994	}
995
996	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
997	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
998	qp->attrs.state = ERDMA_QP_STATE_IDLE;
999	INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
1000
1001	ret = create_qp_cmd(uctx, qp);
1002	if (ret)
1003		goto err_out_cmd;
1004
1005	spin_lock_init(&qp->lock);
1006
1007	return 0;
1008
1009err_out_cmd:
1010	if (uctx)
1011		free_user_qp(qp, uctx);
1012	else
1013		free_kernel_qp(qp);
1014err_out_xa:
1015	xa_erase(&dev->qp_xa, QP_ID(qp));
1016err_out:
1017	return ret;
1018}
1019
1020static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
1021{
1022	int stag_idx;
1023
1024	stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
1025	if (stag_idx < 0)
1026		return stag_idx;
1027
1028	/* For now, we always let key field be zero. */
1029	*stag = (stag_idx << 8);
1030
1031	return 0;
1032}
1033
1034struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
1035{
1036	struct erdma_dev *dev = to_edev(ibpd->device);
1037	struct erdma_mr *mr;
1038	u32 stag;
1039	int ret;
1040
1041	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1042	if (!mr)
1043		return ERR_PTR(-ENOMEM);
1044
1045	ret = erdma_create_stag(dev, &stag);
1046	if (ret)
1047		goto out_free;
1048
1049	mr->type = ERDMA_MR_TYPE_DMA;
1050
1051	mr->ibmr.lkey = stag;
1052	mr->ibmr.rkey = stag;
1053	mr->ibmr.pd = ibpd;
1054	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
1055	ret = regmr_cmd(dev, mr);
1056	if (ret)
1057		goto out_remove_stag;
1058
1059	return &mr->ibmr;
1060
1061out_remove_stag:
1062	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1063		       mr->ibmr.lkey >> 8);
1064
1065out_free:
1066	kfree(mr);
1067
1068	return ERR_PTR(ret);
1069}
1070
1071struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
1072				u32 max_num_sg)
1073{
1074	struct erdma_mr *mr;
1075	struct erdma_dev *dev = to_edev(ibpd->device);
1076	int ret;
1077	u32 stag;
1078
1079	if (mr_type != IB_MR_TYPE_MEM_REG)
1080		return ERR_PTR(-EOPNOTSUPP);
1081
1082	if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
1083		return ERR_PTR(-EINVAL);
1084
1085	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1086	if (!mr)
1087		return ERR_PTR(-ENOMEM);
1088
1089	ret = erdma_create_stag(dev, &stag);
1090	if (ret)
1091		goto out_free;
1092
1093	mr->type = ERDMA_MR_TYPE_FRMR;
1094
1095	mr->ibmr.lkey = stag;
1096	mr->ibmr.rkey = stag;
1097	mr->ibmr.pd = ibpd;
1098	/* update it in FRMR. */
1099	mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
1100		     ERDMA_MR_ACC_RW;
1101
1102	mr->mem.page_size = PAGE_SIZE; /* update it later. */
1103	mr->mem.page_cnt = max_num_sg;
1104	mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
1105	if (IS_ERR(mr->mem.mtt)) {
1106		ret = PTR_ERR(mr->mem.mtt);
1107		goto out_remove_stag;
1108	}
1109
1110	ret = regmr_cmd(dev, mr);
1111	if (ret)
1112		goto out_destroy_mtt;
1113
1114	return &mr->ibmr;
1115
1116out_destroy_mtt:
1117	erdma_destroy_mtt(dev, mr->mem.mtt);
1118
1119out_remove_stag:
1120	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1121		       mr->ibmr.lkey >> 8);
1122
1123out_free:
1124	kfree(mr);
1125
1126	return ERR_PTR(ret);
1127}
1128
1129static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
1130{
1131	struct erdma_mr *mr = to_emr(ibmr);
1132
1133	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
1134		return -1;
1135
1136	mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
1137	mr->mem.mtt_nents++;
1138
1139	return 0;
1140}
1141
1142int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1143		    unsigned int *sg_offset)
1144{
1145	struct erdma_mr *mr = to_emr(ibmr);
1146	int num;
1147
1148	mr->mem.mtt_nents = 0;
1149
1150	num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
1151			     erdma_set_page);
1152
1153	return num;
1154}
1155
1156struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
1157				u64 virt, int access, struct ib_udata *udata)
1158{
1159	struct erdma_mr *mr = NULL;
1160	struct erdma_dev *dev = to_edev(ibpd->device);
1161	u32 stag;
1162	int ret;
1163
1164	if (!len || len > dev->attrs.max_mr_size)
1165		return ERR_PTR(-EINVAL);
1166
1167	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1168	if (!mr)
1169		return ERR_PTR(-ENOMEM);
1170
1171	ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
1172			      SZ_2G - SZ_4K, false);
1173	if (ret)
1174		goto err_out_free;
1175
1176	ret = erdma_create_stag(dev, &stag);
1177	if (ret)
1178		goto err_out_put_mtt;
1179
1180	mr->ibmr.lkey = mr->ibmr.rkey = stag;
1181	mr->ibmr.pd = ibpd;
1182	mr->mem.va = virt;
1183	mr->mem.len = len;
1184	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
1185	mr->valid = 1;
1186	mr->type = ERDMA_MR_TYPE_NORMAL;
1187
1188	ret = regmr_cmd(dev, mr);
1189	if (ret)
1190		goto err_out_mr;
1191
1192	return &mr->ibmr;
1193
1194err_out_mr:
1195	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1196		       mr->ibmr.lkey >> 8);
1197
1198err_out_put_mtt:
1199	put_mtt_entries(dev, &mr->mem);
1200
1201err_out_free:
1202	kfree(mr);
1203
1204	return ERR_PTR(ret);
1205}
1206
1207int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1208{
1209	struct erdma_mr *mr;
1210	struct erdma_dev *dev = to_edev(ibmr->device);
1211	struct erdma_cmdq_dereg_mr_req req;
1212	int ret;
1213
1214	mr = to_emr(ibmr);
1215
1216	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1217				CMDQ_OPCODE_DEREG_MR);
1218
1219	req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
1220		  FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
1221
1222	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1223	if (ret)
1224		return ret;
1225
1226	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
1227
1228	put_mtt_entries(dev, &mr->mem);
1229
1230	kfree(mr);
1231	return 0;
1232}
1233
1234int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1235{
1236	struct erdma_cq *cq = to_ecq(ibcq);
1237	struct erdma_dev *dev = to_edev(ibcq->device);
1238	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1239		udata, struct erdma_ucontext, ibucontext);
1240	int err;
1241	struct erdma_cmdq_destroy_cq_req req;
1242
1243	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1244				CMDQ_OPCODE_DESTROY_CQ);
1245	req.cqn = cq->cqn;
1246
1247	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1248	if (err)
1249		return err;
1250
1251	if (rdma_is_kernel_res(&cq->ibcq.res)) {
1252		dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
1253				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1254		dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
1255			      cq->kern_cq.dbrec_dma);
1256	} else {
1257		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1258		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1259	}
1260
1261	xa_erase(&dev->cq_xa, cq->cqn);
1262
1263	return 0;
1264}
1265
1266int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1267{
1268	struct erdma_qp *qp = to_eqp(ibqp);
1269	struct erdma_dev *dev = to_edev(ibqp->device);
1270	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1271		udata, struct erdma_ucontext, ibucontext);
1272	struct erdma_qp_attrs qp_attrs;
1273	int err;
1274	struct erdma_cmdq_destroy_qp_req req;
1275
1276	down_write(&qp->state_lock);
1277	qp_attrs.state = ERDMA_QP_STATE_ERROR;
1278	erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1279	up_write(&qp->state_lock);
1280
1281	cancel_delayed_work_sync(&qp->reflush_dwork);
1282
1283	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1284				CMDQ_OPCODE_DESTROY_QP);
1285	req.qpn = QP_ID(qp);
1286
1287	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1288	if (err)
1289		return err;
1290
1291	erdma_qp_put(qp);
1292	wait_for_completion(&qp->safe_free);
1293
1294	if (rdma_is_kernel_res(&qp->ibqp.res)) {
1295		free_kernel_qp(qp);
1296	} else {
1297		put_mtt_entries(dev, &qp->user_qp.sq_mem);
1298		put_mtt_entries(dev, &qp->user_qp.rq_mem);
1299		erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1300	}
1301
1302	if (qp->cep)
1303		erdma_cep_put(qp->cep);
1304	xa_erase(&dev->qp_xa, QP_ID(qp));
1305
1306	return 0;
1307}
1308
1309void erdma_qp_get_ref(struct ib_qp *ibqp)
1310{
1311	erdma_qp_get(to_eqp(ibqp));
1312}
1313
1314void erdma_qp_put_ref(struct ib_qp *ibqp)
1315{
1316	erdma_qp_put(to_eqp(ibqp));
1317}
1318
1319int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1320{
1321	struct rdma_user_mmap_entry *rdma_entry;
1322	struct erdma_user_mmap_entry *entry;
1323	pgprot_t prot;
1324	int err;
1325
1326	rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1327	if (!rdma_entry)
1328		return -EINVAL;
1329
1330	entry = to_emmap(rdma_entry);
1331
1332	switch (entry->mmap_flag) {
1333	case ERDMA_MMAP_IO_NC:
1334		/* map doorbell. */
1335		prot = pgprot_device(vma->vm_page_prot);
1336		break;
1337	default:
1338		err = -EINVAL;
1339		goto put_entry;
1340	}
1341
1342	err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1343				prot, rdma_entry);
1344
1345put_entry:
1346	rdma_user_mmap_entry_put(rdma_entry);
1347	return err;
1348}
1349
1350void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1351{
1352	struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1353
1354	kfree(entry);
1355}
1356
1357static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
1358			      bool ext_db_en)
1359{
1360	struct erdma_cmdq_ext_db_req req = {};
1361	u64 val0, val1;
1362	int ret;
1363
1364	/*
1365	 * CAP_SYS_RAWIO is required if hardware does not support extend
1366	 * doorbell mechanism.
1367	 */
1368	if (!ext_db_en && !capable(CAP_SYS_RAWIO))
1369		return -EPERM;
1370
1371	if (!ext_db_en) {
1372		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET;
1373		ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1374		ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1375		return 0;
1376	}
1377
1378	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1379				CMDQ_OPCODE_ALLOC_DB);
1380
1381	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1382		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1383		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1384
1385	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1);
1386	if (ret)
1387		return ret;
1388
1389	ctx->ext_db.enable = true;
1390	ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB);
1391	ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB);
1392	ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB);
1393
1394	ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT);
1395	ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT);
1396	ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT);
1397
1398	return 0;
1399}
1400
1401static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
1402{
1403	struct erdma_cmdq_ext_db_req req = {};
1404	int ret;
1405
1406	if (!ctx->ext_db.enable)
1407		return;
1408
1409	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1410				CMDQ_OPCODE_FREE_DB);
1411
1412	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1413		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1414		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1415
1416	req.sdb_off = ctx->ext_db.sdb_off;
1417	req.rdb_off = ctx->ext_db.rdb_off;
1418	req.cdb_off = ctx->ext_db.cdb_off;
1419
1420	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1421	if (ret)
1422		ibdev_err_ratelimited(&dev->ibdev,
1423				      "free db resources failed %d", ret);
1424}
1425
1426static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1427{
1428	rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1429	rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1430	rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1431}
1432
1433int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1434{
1435	struct erdma_ucontext *ctx = to_ectx(ibctx);
1436	struct erdma_dev *dev = to_edev(ibctx->device);
1437	int ret;
1438	struct erdma_uresp_alloc_ctx uresp = {};
1439
1440	if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1441		ret = -ENOMEM;
1442		goto err_out;
1443	}
1444
1445	if (udata->outlen < sizeof(uresp)) {
1446		ret = -EINVAL;
1447		goto err_out;
1448	}
1449
1450	INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1451	mutex_init(&ctx->dbrecords_page_mutex);
1452
1453	ret = alloc_db_resources(dev, ctx,
1454				 !!(dev->attrs.cap_flags &
1455				    ERDMA_DEV_CAP_FLAGS_EXTEND_DB));
1456	if (ret)
1457		goto err_out;
1458
1459	ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1460		ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1461	if (!ctx->sq_db_mmap_entry) {
1462		ret = -ENOMEM;
1463		goto err_free_ext_db;
1464	}
1465
1466	ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1467		ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1468	if (!ctx->rq_db_mmap_entry) {
1469		ret = -EINVAL;
1470		goto err_put_mmap_entries;
1471	}
1472
1473	ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1474		ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1475	if (!ctx->cq_db_mmap_entry) {
1476		ret = -EINVAL;
1477		goto err_put_mmap_entries;
1478	}
1479
1480	uresp.dev_id = dev->pdev->device;
1481
1482	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1483	if (ret)
1484		goto err_put_mmap_entries;
1485
1486	return 0;
1487
1488err_put_mmap_entries:
1489	erdma_uctx_user_mmap_entries_remove(ctx);
1490
1491err_free_ext_db:
1492	free_db_resources(dev, ctx);
1493
1494err_out:
1495	atomic_dec(&dev->num_ctx);
1496	return ret;
1497}
1498
1499void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1500{
1501	struct erdma_dev *dev = to_edev(ibctx->device);
1502	struct erdma_ucontext *ctx = to_ectx(ibctx);
1503
1504	erdma_uctx_user_mmap_entries_remove(ctx);
1505	free_db_resources(dev, ctx);
1506	atomic_dec(&dev->num_ctx);
1507}
1508
1509static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1510	[IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1511	[IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1512	[IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1513	[IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1514	[IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1515	[IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1516	[IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1517};
1518
1519int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1520		    struct ib_udata *udata)
1521{
1522	struct erdma_qp_attrs new_attrs;
1523	enum erdma_qp_attr_mask erdma_attr_mask = 0;
1524	struct erdma_qp *qp = to_eqp(ibqp);
1525	int ret = 0;
1526
1527	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1528		return -EOPNOTSUPP;
1529
1530	memset(&new_attrs, 0, sizeof(new_attrs));
1531
1532	if (attr_mask & IB_QP_STATE) {
1533		new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1534
1535		erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1536	}
1537
1538	down_write(&qp->state_lock);
1539
1540	ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1541
1542	up_write(&qp->state_lock);
1543
1544	return ret;
1545}
1546
1547int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1548		   int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1549{
1550	struct erdma_qp *qp;
1551	struct erdma_dev *dev;
1552
1553	if (ibqp && qp_attr && qp_init_attr) {
1554		qp = to_eqp(ibqp);
1555		dev = to_edev(ibqp->device);
1556	} else {
1557		return -EINVAL;
1558	}
1559
1560	qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1561	qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1562
1563	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1564	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1565	qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1566	qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1567
1568	qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1569	qp_attr->max_rd_atomic = qp->attrs.irq_size;
1570	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1571
1572	qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1573				   IB_ACCESS_REMOTE_WRITE |
1574				   IB_ACCESS_REMOTE_READ;
1575
1576	qp_init_attr->cap = qp_attr->cap;
1577
1578	return 0;
1579}
1580
1581static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1582			      struct erdma_ureq_create_cq *ureq)
1583{
1584	int ret;
1585	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1586
1587	ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va,
1588			      ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1589			      true);
1590	if (ret)
1591		return ret;
1592
1593	ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1594				       &cq->user_cq.user_dbr_page,
1595				       &cq->user_cq.dbrec_dma);
1596	if (ret)
1597		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1598
1599	return ret;
1600}
1601
1602static int erdma_init_kernel_cq(struct erdma_cq *cq)
1603{
1604	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1605
1606	cq->kern_cq.qbuf =
1607		dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
1608				   &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1609	if (!cq->kern_cq.qbuf)
1610		return -ENOMEM;
1611
1612	cq->kern_cq.dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL,
1613					    &cq->kern_cq.dbrec_dma);
1614	if (!cq->kern_cq.dbrec)
1615		goto err_out;
1616
1617	spin_lock_init(&cq->kern_cq.lock);
1618	/* use default cqdb addr */
1619	cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1620
1621	return 0;
1622
1623err_out:
1624	dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
1625			  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1626
1627	return -ENOMEM;
1628}
1629
1630int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1631		    struct ib_udata *udata)
1632{
1633	struct erdma_cq *cq = to_ecq(ibcq);
1634	struct erdma_dev *dev = to_edev(ibcq->device);
1635	unsigned int depth = attr->cqe;
1636	int ret;
1637	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1638		udata, struct erdma_ucontext, ibucontext);
1639
1640	if (depth > dev->attrs.max_cqe)
1641		return -EINVAL;
1642
1643	depth = roundup_pow_of_two(depth);
1644	cq->ibcq.cqe = depth;
1645	cq->depth = depth;
1646	cq->assoc_eqn = attr->comp_vector + 1;
1647
1648	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1649			      XA_LIMIT(1, dev->attrs.max_cq - 1),
1650			      &dev->next_alloc_cqn, GFP_KERNEL);
1651	if (ret < 0)
1652		return ret;
1653
1654	if (!rdma_is_kernel_res(&ibcq->res)) {
1655		struct erdma_ureq_create_cq ureq;
1656		struct erdma_uresp_create_cq uresp;
1657
1658		ret = ib_copy_from_udata(&ureq, udata,
1659					 min(udata->inlen, sizeof(ureq)));
1660		if (ret)
1661			goto err_out_xa;
1662
1663		ret = erdma_init_user_cq(ctx, cq, &ureq);
1664		if (ret)
1665			goto err_out_xa;
1666
1667		uresp.cq_id = cq->cqn;
1668		uresp.num_cqe = depth;
1669
1670		ret = ib_copy_to_udata(udata, &uresp,
1671				       min(sizeof(uresp), udata->outlen));
1672		if (ret)
1673			goto err_free_res;
1674	} else {
1675		ret = erdma_init_kernel_cq(cq);
1676		if (ret)
1677			goto err_out_xa;
1678	}
1679
1680	ret = create_cq_cmd(ctx, cq);
1681	if (ret)
1682		goto err_free_res;
1683
1684	return 0;
1685
1686err_free_res:
1687	if (!rdma_is_kernel_res(&ibcq->res)) {
1688		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1689		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1690	} else {
1691		dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
1692				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1693		dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
1694			      cq->kern_cq.dbrec_dma);
1695	}
1696
1697err_out_xa:
1698	xa_erase(&dev->cq_xa, cq->cqn);
1699
1700	return ret;
1701}
1702
1703void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
1704{
1705	struct erdma_cmdq_config_mtu_req req;
1706
1707	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1708				CMDQ_OPCODE_CONF_MTU);
1709	req.mtu = mtu;
1710
1711	erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1712}
1713
1714void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1715{
1716	struct ib_event event;
1717
1718	event.device = &dev->ibdev;
1719	event.element.port_num = 1;
1720	event.event = reason;
1721
1722	ib_dispatch_event(&event);
1723}
1724
1725enum counters {
1726	ERDMA_STATS_TX_REQS_CNT,
1727	ERDMA_STATS_TX_PACKETS_CNT,
1728	ERDMA_STATS_TX_BYTES_CNT,
1729	ERDMA_STATS_TX_DISABLE_DROP_CNT,
1730	ERDMA_STATS_TX_BPS_METER_DROP_CNT,
1731	ERDMA_STATS_TX_PPS_METER_DROP_CNT,
1732
1733	ERDMA_STATS_RX_PACKETS_CNT,
1734	ERDMA_STATS_RX_BYTES_CNT,
1735	ERDMA_STATS_RX_DISABLE_DROP_CNT,
1736	ERDMA_STATS_RX_BPS_METER_DROP_CNT,
1737	ERDMA_STATS_RX_PPS_METER_DROP_CNT,
1738
1739	ERDMA_STATS_MAX
1740};
1741
1742static const struct rdma_stat_desc erdma_descs[] = {
1743	[ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt",
1744	[ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt",
1745	[ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt",
1746	[ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt",
1747	[ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt",
1748	[ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt",
1749	[ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt",
1750	[ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt",
1751	[ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt",
1752	[ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt",
1753	[ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt",
1754};
1755
1756struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
1757						u32 port_num)
1758{
1759	return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX,
1760					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
1761}
1762
1763static int erdma_query_hw_stats(struct erdma_dev *dev,
1764				struct rdma_hw_stats *stats)
1765{
1766	struct erdma_cmdq_query_stats_resp *resp;
1767	struct erdma_cmdq_query_req req;
1768	dma_addr_t dma_addr;
1769	int err;
1770
1771	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1772				CMDQ_OPCODE_GET_STATS);
1773
1774	resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr);
1775	if (!resp)
1776		return -ENOMEM;
1777
1778	req.target_addr = dma_addr;
1779	req.target_length = ERDMA_HW_RESP_SIZE;
1780
1781	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1782	if (err)
1783		goto out;
1784
1785	if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) {
1786		err = -EINVAL;
1787		goto out;
1788	}
1789
1790	memcpy(&stats->value[0], &resp->tx_req_cnt,
1791	       sizeof(u64) * stats->num_counters);
1792
1793out:
1794	dma_pool_free(dev->resp_pool, resp, dma_addr);
1795
1796	return err;
1797}
1798
1799int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1800		       u32 port, int index)
1801{
1802	struct erdma_dev *dev = to_edev(ibdev);
1803	int ret;
1804
1805	if (port == 0)
1806		return 0;
1807
1808	ret = erdma_query_hw_stats(dev, stats);
1809	if (ret)
1810		return ret;
1811
1812	return stats->num_counters;
1813}
1814