1322810Shselasky/*-
2322810Shselasky * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3322810Shselasky *
4322810Shselasky * Redistribution and use in source and binary forms, with or without
5322810Shselasky * modification, are permitted provided that the following conditions
6322810Shselasky * are met:
7322810Shselasky * 1. Redistributions of source code must retain the above copyright
8322810Shselasky *    notice, this list of conditions and the following disclaimer.
9322810Shselasky * 2. Redistributions in binary form must reproduce the above copyright
10322810Shselasky *    notice, this list of conditions and the following disclaimer in the
11322810Shselasky *    documentation and/or other materials provided with the distribution.
12322810Shselasky *
13322810Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14322810Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15322810Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16322810Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17322810Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18322810Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19322810Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20322810Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21322810Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22322810Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23322810Shselasky * SUCH DAMAGE.
24322810Shselasky *
25322810Shselasky * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c 337748 2018-08-14 11:52:05Z hselasky $
26322810Shselasky */
27322810Shselasky
28322810Shselasky#include <linux/errno.h>
29322810Shselasky#include <linux/pci.h>
30322810Shselasky#include <linux/dma-mapping.h>
31322810Shselasky#include <linux/slab.h>
32322810Shselasky#include <linux/io-mapping.h>
33322810Shselasky#include <linux/sched.h>
34322810Shselasky#include <linux/netdevice.h>
35322810Shselasky#include <linux/etherdevice.h>
36323223Shselasky#include <net/ipv6.h>
37322810Shselasky#include <linux/list.h>
38322810Shselasky#include <dev/mlx5/driver.h>
39322810Shselasky#include <dev/mlx5/vport.h>
40322810Shselasky#include <asm/pgtable.h>
41322810Shselasky#include <linux/fs.h>
42322810Shselasky#undef inode
43322810Shselasky
44322810Shselasky#include <rdma/ib_user_verbs.h>
45322810Shselasky#include <rdma/ib_smi.h>
46322810Shselasky#include <rdma/ib_umem.h>
47322810Shselasky#include "user.h"
48322810Shselasky#include "mlx5_ib.h"
49322810Shselasky
50322810Shselasky#include <sys/unistd.h>
51322810Shselasky
52322810Shselasky#define DRIVER_NAME "mlx5_ib"
53337748Shselasky#define DRIVER_VERSION "3.2.1"
54337748Shselasky#define DRIVER_RELDATE	"August 2018"
55322810Shselasky
56323223Shselasky#undef MODULE_VERSION
57323223Shselasky#include <sys/module.h>
58323223Shselasky
59322810ShselaskyMODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
60322810ShselaskyMODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
61322810ShselaskyMODULE_LICENSE("Dual BSD/GPL");
62322810ShselaskyMODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
63322810ShselaskyMODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
64322810ShselaskyMODULE_VERSION(mlx5ib, 1);
65322810Shselasky
66322810Shselaskystatic int deprecated_prof_sel = 2;
67322810Shselaskymodule_param_named(prof_sel, deprecated_prof_sel, int, 0444);
68322810ShselaskyMODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
69322810Shselasky
70322810Shselaskyenum {
71322810Shselasky	MLX5_STANDARD_ATOMIC_SIZE = 0x8,
72322810Shselasky};
73322810Shselasky
74322810Shselaskystruct workqueue_struct *mlx5_ib_wq;
75322810Shselasky
76322810Shselaskystatic char mlx5_version[] =
77322810Shselasky	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
78322810Shselasky	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
79322810Shselasky
80322810Shselaskystatic void get_atomic_caps(struct mlx5_ib_dev *dev,
81322810Shselasky			    struct ib_device_attr *props)
82322810Shselasky{
83322810Shselasky	int tmp;
84322810Shselasky	u8 atomic_operations;
85322810Shselasky	u8 atomic_size_qp;
86322810Shselasky	u8 atomic_req_endianess;
87322810Shselasky
88322810Shselasky	atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
89322810Shselasky	atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
90322810Shselasky	atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
91322810Shselasky					       atomic_req_8B_endianess_mode) ||
92322810Shselasky			       !mlx5_host_is_le();
93322810Shselasky
94322810Shselasky	tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
95322810Shselasky	if (((atomic_operations & tmp) == tmp)
96322810Shselasky	    && (atomic_size_qp & 8)) {
97322810Shselasky		if (atomic_req_endianess) {
98322810Shselasky			props->atomic_cap = IB_ATOMIC_HCA;
99322810Shselasky		} else {
100322810Shselasky			props->atomic_cap = IB_ATOMIC_NONE;
101322810Shselasky		}
102322810Shselasky	} else {
103322810Shselasky		props->atomic_cap = IB_ATOMIC_NONE;
104322810Shselasky	}
105322810Shselasky
106322810Shselasky	tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
107322810Shselasky	if (((atomic_operations & tmp) == tmp)
108322810Shselasky	    &&(atomic_size_qp & 8)) {
109322810Shselasky		if (atomic_req_endianess)
110322810Shselasky			props->masked_atomic_cap = IB_ATOMIC_HCA;
111322810Shselasky		else {
112322810Shselasky			props->masked_atomic_cap = IB_ATOMIC_NONE;
113322810Shselasky		}
114322810Shselasky	} else {
115322810Shselasky		props->masked_atomic_cap = IB_ATOMIC_NONE;
116322810Shselasky	}
117322810Shselasky}
118322810Shselasky
119322810Shselaskystatic enum rdma_link_layer
120322810Shselaskymlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
121322810Shselasky{
122322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(device);
123322810Shselasky
124322810Shselasky	switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
125322810Shselasky	case MLX5_CAP_PORT_TYPE_IB:
126322810Shselasky		return IB_LINK_LAYER_INFINIBAND;
127322810Shselasky	case MLX5_CAP_PORT_TYPE_ETH:
128322810Shselasky		return IB_LINK_LAYER_ETHERNET;
129322810Shselasky	default:
130322810Shselasky		return IB_LINK_LAYER_UNSPECIFIED;
131322810Shselasky	}
132322810Shselasky}
133322810Shselasky
134322810Shselaskystatic int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
135322810Shselasky{
136322810Shselasky	return !dev->mdev->issi;
137322810Shselasky}
138322810Shselasky
139322810Shselaskyenum {
140322810Shselasky	MLX5_VPORT_ACCESS_METHOD_MAD,
141322810Shselasky	MLX5_VPORT_ACCESS_METHOD_HCA,
142322810Shselasky	MLX5_VPORT_ACCESS_METHOD_NIC,
143322810Shselasky};
144322810Shselasky
145322810Shselaskystatic int mlx5_get_vport_access_method(struct ib_device *ibdev)
146322810Shselasky{
147322810Shselasky	if (mlx5_use_mad_ifc(to_mdev(ibdev)))
148322810Shselasky		return MLX5_VPORT_ACCESS_METHOD_MAD;
149322810Shselasky
150322810Shselasky	if (mlx5_ib_port_link_layer(ibdev, 1) ==
151322810Shselasky	    IB_LINK_LAYER_ETHERNET)
152322810Shselasky		return MLX5_VPORT_ACCESS_METHOD_NIC;
153322810Shselasky
154322810Shselasky	return MLX5_VPORT_ACCESS_METHOD_HCA;
155322810Shselasky}
156322810Shselasky
157322810Shselaskystatic int mlx5_query_system_image_guid(struct ib_device *ibdev,
158322810Shselasky					__be64 *sys_image_guid)
159322810Shselasky{
160322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
161322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
162322810Shselasky	u64 tmp;
163322810Shselasky	int err;
164322810Shselasky
165322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
166322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
167322810Shselasky		return mlx5_query_system_image_guid_mad_ifc(ibdev,
168322810Shselasky							    sys_image_guid);
169322810Shselasky
170322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
171322810Shselasky		err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
172322810Shselasky		if (!err)
173322810Shselasky			*sys_image_guid = cpu_to_be64(tmp);
174322810Shselasky		return err;
175322810Shselasky
176322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
177322810Shselasky		err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
178322810Shselasky		if (!err)
179322810Shselasky			*sys_image_guid = cpu_to_be64(tmp);
180322810Shselasky		return err;
181322810Shselasky
182322810Shselasky	default:
183322810Shselasky		return -EINVAL;
184322810Shselasky	}
185322810Shselasky}
186322810Shselasky
187322810Shselaskystatic int mlx5_query_max_pkeys(struct ib_device *ibdev,
188322810Shselasky				u16 *max_pkeys)
189322810Shselasky{
190322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
191322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
192322810Shselasky
193322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
194322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
195322810Shselasky		return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
196322810Shselasky
197322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
198322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
199322810Shselasky		*max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
200322810Shselasky						pkey_table_size));
201322810Shselasky		return 0;
202322810Shselasky
203322810Shselasky	default:
204322810Shselasky		return -EINVAL;
205322810Shselasky	}
206322810Shselasky}
207322810Shselasky
208322810Shselaskystatic int mlx5_query_vendor_id(struct ib_device *ibdev,
209322810Shselasky				u32 *vendor_id)
210322810Shselasky{
211322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
212322810Shselasky
213322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
214322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
215322810Shselasky		return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
216322810Shselasky
217322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
218322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
219322810Shselasky		return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
220322810Shselasky
221322810Shselasky	default:
222322810Shselasky		return -EINVAL;
223322810Shselasky	}
224322810Shselasky}
225322810Shselasky
226322810Shselaskystatic int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
227322810Shselasky				__be64 *node_guid)
228322810Shselasky{
229322810Shselasky	u64 tmp;
230322810Shselasky	int err;
231322810Shselasky
232322810Shselasky	switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
233322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
234322810Shselasky		return mlx5_query_node_guid_mad_ifc(dev, node_guid);
235322810Shselasky
236322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
237322810Shselasky		err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
238322810Shselasky		if (!err)
239322810Shselasky			*node_guid = cpu_to_be64(tmp);
240322810Shselasky		return err;
241322810Shselasky
242322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
243322810Shselasky		err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
244322810Shselasky		if (!err)
245322810Shselasky			*node_guid = cpu_to_be64(tmp);
246322810Shselasky		return err;
247322810Shselasky
248322810Shselasky	default:
249322810Shselasky		return -EINVAL;
250322810Shselasky	}
251322810Shselasky}
252322810Shselasky
253322810Shselaskystruct mlx5_reg_node_desc {
254322810Shselasky	u8	desc[64];
255322810Shselasky};
256322810Shselasky
257322810Shselaskystatic int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
258322810Shselasky{
259322810Shselasky	struct mlx5_reg_node_desc in;
260322810Shselasky
261322810Shselasky	if (mlx5_use_mad_ifc(dev))
262322810Shselasky		return mlx5_query_node_desc_mad_ifc(dev, node_desc);
263322810Shselasky
264322810Shselasky	memset(&in, 0, sizeof(in));
265322810Shselasky
266322810Shselasky	return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
267322810Shselasky				    sizeof(struct mlx5_reg_node_desc),
268322810Shselasky				    MLX5_REG_NODE_DESC, 0, 0);
269322810Shselasky}
270322810Shselasky
271322810Shselaskystatic int mlx5_ib_query_device(struct ib_device *ibdev,
272322810Shselasky				struct ib_device_attr *props)
273322810Shselasky{
274322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
275322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
276322810Shselasky	int max_sq_desc;
277322810Shselasky	int max_rq_sg;
278322810Shselasky	int max_sq_sg;
279322810Shselasky	int err;
280322810Shselasky
281322810Shselasky
282322810Shselasky	memset(props, 0, sizeof(*props));
283322810Shselasky
284322810Shselasky	err = mlx5_query_system_image_guid(ibdev,
285322810Shselasky					   &props->sys_image_guid);
286322810Shselasky	if (err)
287322810Shselasky		return err;
288322810Shselasky
289322810Shselasky	err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
290322810Shselasky	if (err)
291322810Shselasky		return err;
292322810Shselasky
293322810Shselasky	err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
294322810Shselasky	if (err)
295322810Shselasky		return err;
296322810Shselasky
297322810Shselasky	props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
298322810Shselasky		((u64)fw_rev_min(dev->mdev) << 16) |
299322810Shselasky		fw_rev_sub(dev->mdev);
300322810Shselasky	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
301322810Shselasky		IB_DEVICE_PORT_ACTIVE_EVENT		|
302322810Shselasky		IB_DEVICE_SYS_IMAGE_GUID		|
303322810Shselasky		IB_DEVICE_RC_RNR_NAK_GEN;
304322810Shselasky
305322810Shselasky	if (MLX5_CAP_GEN(mdev, pkv))
306322810Shselasky		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
307322810Shselasky	if (MLX5_CAP_GEN(mdev, qkv))
308322810Shselasky		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
309322810Shselasky	if (MLX5_CAP_GEN(mdev, apm))
310322810Shselasky		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
311322810Shselasky	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
312322810Shselasky	if (MLX5_CAP_GEN(mdev, xrc))
313322810Shselasky		props->device_cap_flags |= IB_DEVICE_XRC;
314322810Shselasky	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
315322810Shselasky	if (MLX5_CAP_GEN(mdev, block_lb_mc))
316322810Shselasky		props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
317322810Shselasky
318322810Shselasky	props->vendor_part_id	   = mdev->pdev->device;
319322810Shselasky	props->hw_ver		   = mdev->pdev->revision;
320322810Shselasky
321322810Shselasky	props->max_mr_size	   = ~0ull;
322322810Shselasky	props->page_size_cap	   = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
323322810Shselasky	props->max_qp		   = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
324322810Shselasky	props->max_qp_wr	   = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
325322810Shselasky	max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
326322810Shselasky		     sizeof(struct mlx5_wqe_data_seg);
327322810Shselasky	max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
328322810Shselasky	max_sq_sg = (max_sq_desc -
329322810Shselasky		     sizeof(struct mlx5_wqe_ctrl_seg) -
330322810Shselasky		     sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
331322810Shselasky	props->max_sge = min(max_rq_sg, max_sq_sg);
332322810Shselasky	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
333322810Shselasky	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
334322810Shselasky	props->max_mr		   = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
335322810Shselasky	props->max_pd		   = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
336322810Shselasky	props->max_qp_rd_atom	   = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
337322810Shselasky	props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
338322810Shselasky	props->max_srq		   = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
339322810Shselasky	props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
340322810Shselasky	props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
341322810Shselasky	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
342322810Shselasky	props->max_srq_sge	   = max_rq_sg - 1;
343322810Shselasky	props->max_fast_reg_page_list_len = (unsigned int)-1;
344322810Shselasky	get_atomic_caps(dev, props);
345322810Shselasky	props->max_mcast_grp	   = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
346322810Shselasky	props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
347322810Shselasky	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
348322810Shselasky					   props->max_mcast_grp;
349322810Shselasky	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
350322810Shselasky	props->max_ah		= INT_MAX;
351322810Shselasky
352322810Shselasky	return 0;
353322810Shselasky}
354322810Shselasky
355322810Shselaskyenum mlx5_ib_width {
356322810Shselasky	MLX5_IB_WIDTH_1X	= 1 << 0,
357322810Shselasky	MLX5_IB_WIDTH_2X	= 1 << 1,
358322810Shselasky	MLX5_IB_WIDTH_4X	= 1 << 2,
359322810Shselasky	MLX5_IB_WIDTH_8X	= 1 << 3,
360322810Shselasky	MLX5_IB_WIDTH_12X	= 1 << 4
361322810Shselasky};
362322810Shselasky
363322810Shselaskystatic int translate_active_width(struct ib_device *ibdev, u8 active_width,
364322810Shselasky				  u8 *ib_width)
365322810Shselasky{
366322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
367322810Shselasky	int err = 0;
368322810Shselasky
369322810Shselasky	if (active_width & MLX5_IB_WIDTH_1X) {
370322810Shselasky		*ib_width = IB_WIDTH_1X;
371322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_2X) {
372322810Shselasky		mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
373322810Shselasky			     (int)active_width);
374322810Shselasky		err = -EINVAL;
375322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_4X) {
376322810Shselasky		*ib_width = IB_WIDTH_4X;
377322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_8X) {
378322810Shselasky		*ib_width = IB_WIDTH_8X;
379322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_12X) {
380322810Shselasky		*ib_width = IB_WIDTH_12X;
381322810Shselasky	} else {
382322810Shselasky		mlx5_ib_dbg(dev, "Invalid active_width %d\n",
383322810Shselasky			    (int)active_width);
384322810Shselasky		err = -EINVAL;
385322810Shselasky	}
386322810Shselasky
387322810Shselasky	return err;
388322810Shselasky}
389322810Shselasky
390322810Shselasky/*
391322810Shselasky * TODO: Move to IB core
392322810Shselasky */
393322810Shselaskyenum ib_max_vl_num {
394322810Shselasky	__IB_MAX_VL_0		= 1,
395322810Shselasky	__IB_MAX_VL_0_1		= 2,
396322810Shselasky	__IB_MAX_VL_0_3		= 3,
397322810Shselasky	__IB_MAX_VL_0_7		= 4,
398322810Shselasky	__IB_MAX_VL_0_14	= 5,
399322810Shselasky};
400322810Shselasky
401322810Shselaskyenum mlx5_vl_hw_cap {
402322810Shselasky	MLX5_VL_HW_0	= 1,
403322810Shselasky	MLX5_VL_HW_0_1	= 2,
404322810Shselasky	MLX5_VL_HW_0_2	= 3,
405322810Shselasky	MLX5_VL_HW_0_3	= 4,
406322810Shselasky	MLX5_VL_HW_0_4	= 5,
407322810Shselasky	MLX5_VL_HW_0_5	= 6,
408322810Shselasky	MLX5_VL_HW_0_6	= 7,
409322810Shselasky	MLX5_VL_HW_0_7	= 8,
410322810Shselasky	MLX5_VL_HW_0_14	= 15
411322810Shselasky};
412322810Shselasky
413322810Shselaskystatic int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
414322810Shselasky				u8 *max_vl_num)
415322810Shselasky{
416322810Shselasky	switch (vl_hw_cap) {
417322810Shselasky	case MLX5_VL_HW_0:
418322810Shselasky		*max_vl_num = __IB_MAX_VL_0;
419322810Shselasky		break;
420322810Shselasky	case MLX5_VL_HW_0_1:
421322810Shselasky		*max_vl_num = __IB_MAX_VL_0_1;
422322810Shselasky		break;
423322810Shselasky	case MLX5_VL_HW_0_3:
424322810Shselasky		*max_vl_num = __IB_MAX_VL_0_3;
425322810Shselasky		break;
426322810Shselasky	case MLX5_VL_HW_0_7:
427322810Shselasky		*max_vl_num = __IB_MAX_VL_0_7;
428322810Shselasky		break;
429322810Shselasky	case MLX5_VL_HW_0_14:
430322810Shselasky		*max_vl_num = __IB_MAX_VL_0_14;
431322810Shselasky		break;
432322810Shselasky
433322810Shselasky	default:
434322810Shselasky		return -EINVAL;
435322810Shselasky	}
436322810Shselasky
437322810Shselasky	return 0;
438322810Shselasky}
439322810Shselasky
440322810Shselaskystatic int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
441322810Shselasky			      struct ib_port_attr *props)
442322810Shselasky{
443322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
444322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
445322810Shselasky	u32 *rep;
446322810Shselasky	int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
447322810Shselasky	struct mlx5_ptys_reg *ptys;
448322810Shselasky	struct mlx5_pmtu_reg *pmtu;
449322810Shselasky	struct mlx5_pvlc_reg pvlc;
450322810Shselasky	void *ctx;
451322810Shselasky	int err;
452322810Shselasky
453322810Shselasky	rep = mlx5_vzalloc(outlen);
454322810Shselasky	ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
455322810Shselasky	pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
456322810Shselasky	if (!rep || !ptys || !pmtu) {
457322810Shselasky		err = -ENOMEM;
458322810Shselasky		goto out;
459322810Shselasky	}
460322810Shselasky
461322810Shselasky	memset(props, 0, sizeof(*props));
462322810Shselasky
463322810Shselasky	/* what if I am pf with dual port */
464322810Shselasky	err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
465322810Shselasky	if (err)
466322810Shselasky		goto out;
467322810Shselasky
468322810Shselasky	ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
469322810Shselasky
470322810Shselasky	props->lid		= MLX5_GET(hca_vport_context, ctx, lid);
471322810Shselasky	props->lmc		= MLX5_GET(hca_vport_context, ctx, lmc);
472322810Shselasky	props->sm_lid		= MLX5_GET(hca_vport_context, ctx, sm_lid);
473322810Shselasky	props->sm_sl		= MLX5_GET(hca_vport_context, ctx, sm_sl);
474322810Shselasky	props->state		= MLX5_GET(hca_vport_context, ctx, vport_state);
475322810Shselasky	props->phys_state	= MLX5_GET(hca_vport_context, ctx,
476322810Shselasky					port_physical_state);
477322810Shselasky	props->port_cap_flags	= MLX5_GET(hca_vport_context, ctx, cap_mask1);
478322810Shselasky	props->gid_tbl_len	= mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
479322810Shselasky	props->max_msg_sz	= 1 << MLX5_CAP_GEN(mdev, log_max_msg);
480322810Shselasky	props->pkey_tbl_len	= mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
481322810Shselasky	props->bad_pkey_cntr	= MLX5_GET(hca_vport_context, ctx,
482322810Shselasky					      pkey_violation_counter);
483322810Shselasky	props->qkey_viol_cntr	= MLX5_GET(hca_vport_context, ctx,
484322810Shselasky					      qkey_violation_counter);
485322810Shselasky	props->subnet_timeout	= MLX5_GET(hca_vport_context, ctx,
486322810Shselasky					      subnet_timeout);
487322810Shselasky	props->init_type_reply	= MLX5_GET(hca_vport_context, ctx,
488322810Shselasky					   init_type_reply);
489322810Shselasky
490322810Shselasky	ptys->proto_mask |= MLX5_PTYS_IB;
491322810Shselasky	ptys->local_port = port;
492322810Shselasky	err = mlx5_core_access_ptys(mdev, ptys, 0);
493322810Shselasky	if (err)
494322810Shselasky		goto out;
495322810Shselasky
496322810Shselasky	err = translate_active_width(ibdev, ptys->ib_link_width_oper,
497322810Shselasky				     &props->active_width);
498322810Shselasky	if (err)
499322810Shselasky		goto out;
500322810Shselasky
501322810Shselasky	props->active_speed	= (u8)ptys->ib_proto_oper;
502322810Shselasky
503322810Shselasky	pmtu->local_port = port;
504322810Shselasky	err = mlx5_core_access_pmtu(mdev, pmtu, 0);
505322810Shselasky	if (err)
506322810Shselasky		goto out;
507322810Shselasky
508322810Shselasky	props->max_mtu		= pmtu->max_mtu;
509322810Shselasky	props->active_mtu	= pmtu->oper_mtu;
510322810Shselasky
511322810Shselasky	memset(&pvlc, 0, sizeof(pvlc));
512322810Shselasky	pvlc.local_port = port;
513322810Shselasky	err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
514322810Shselasky	if (err)
515322810Shselasky		goto out;
516322810Shselasky
517322810Shselasky	err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
518322810Shselasky				   &props->max_vl_num);
519322810Shselaskyout:
520322810Shselasky	kvfree(rep);
521322810Shselasky	kfree(ptys);
522322810Shselasky	kfree(pmtu);
523322810Shselasky	return err;
524322810Shselasky}
525322810Shselasky
526322810Shselaskyint mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
527322810Shselasky		       struct ib_port_attr *props)
528322810Shselasky{
529322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
530322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
531322810Shselasky		return mlx5_query_port_mad_ifc(ibdev, port, props);
532322810Shselasky
533322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
534322810Shselasky		return mlx5_query_port_ib(ibdev, port, props);
535322810Shselasky
536322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
537322810Shselasky		return mlx5_query_port_roce(ibdev, port, props);
538322810Shselasky
539322810Shselasky	default:
540322810Shselasky		return -EINVAL;
541322810Shselasky	}
542322810Shselasky}
543322810Shselasky
544325599Shselaskystatic void
545325599Shselaskymlx5_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
546322810Shselasky{
547322810Shselasky	if (dev->if_addrlen != ETH_ALEN)
548325599Shselasky		return;
549325599Shselasky
550322810Shselasky	memcpy(eui, IF_LLADDR(dev), 3);
551322810Shselasky	memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
552322810Shselasky
553325599Shselasky	if (vlan_id < 0x1000) {
554325599Shselasky		eui[3] = vlan_id >> 8;
555325599Shselasky		eui[4] = vlan_id & 0xff;
556325599Shselasky	} else {
557325599Shselasky		eui[3] = 0xFF;
558325599Shselasky		eui[4] = 0xFE;
559325599Shselasky	}
560322810Shselasky	eui[0] ^= 2;
561322810Shselasky}
562322810Shselasky
563322810Shselaskystatic void
564322810Shselaskymlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
565322810Shselasky{
566322810Shselasky	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
567325599Shselasky	mlx5_addrconf_ifid_eui48(&gid->raw[8], 0xFFFF, dev);
568322810Shselasky}
569322810Shselasky
570322810Shselaskystatic void
571322810Shselaskymlx5_ib_roce_port_update(void *arg)
572322810Shselasky{
573322810Shselasky	struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
574322810Shselasky	struct mlx5_ib_dev *dev = port->dev;
575322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
576322810Shselasky	struct net_device *xdev[MLX5_IB_GID_MAX];
577322810Shselasky	struct net_device *idev;
578322810Shselasky	struct net_device *ndev;
579322810Shselasky	union ib_gid gid_temp;
580322810Shselasky
581322810Shselasky	while (port->port_gone == 0) {
582322810Shselasky		int update = 0;
583322810Shselasky		int gid_index = 0;
584322810Shselasky		int j;
585322810Shselasky		int error;
586322810Shselasky
587322810Shselasky		ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
588322810Shselasky		if (ndev == NULL) {
589322810Shselasky			pause("W", hz);
590322810Shselasky			continue;
591322810Shselasky		}
592322810Shselasky
593322810Shselasky		CURVNET_SET_QUIET(ndev->if_vnet);
594322810Shselasky
595322810Shselasky		memset(&gid_temp, 0, sizeof(gid_temp));
596322810Shselasky		mlx5_make_default_gid(ndev, &gid_temp);
597322810Shselasky		if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
598322810Shselasky			port->gid_table[gid_index] = gid_temp;
599322810Shselasky			update = 1;
600322810Shselasky		}
601322810Shselasky		xdev[gid_index] = ndev;
602322810Shselasky		gid_index++;
603322810Shselasky
604322810Shselasky		IFNET_RLOCK();
605322810Shselasky		TAILQ_FOREACH(idev, &V_ifnet, if_link) {
606322810Shselasky			if (idev == ndev)
607322810Shselasky				break;
608322810Shselasky		}
609322810Shselasky		if (idev != NULL) {
610322810Shselasky		    TAILQ_FOREACH(idev, &V_ifnet, if_link) {
611325599Shselasky			u16 vid;
612325599Shselasky
613322810Shselasky			if (idev != ndev) {
614322810Shselasky				if (idev->if_type != IFT_L2VLAN)
615322810Shselasky					continue;
616322810Shselasky				if (ndev != rdma_vlan_dev_real_dev(idev))
617322810Shselasky					continue;
618322810Shselasky			}
619325599Shselasky
620325599Shselasky			/* setup valid MAC-based GID */
621325599Shselasky			memset(&gid_temp, 0, sizeof(gid_temp));
622325599Shselasky			gid_temp.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
623325599Shselasky			vid = rdma_vlan_dev_vlan_id(idev);
624325599Shselasky			mlx5_addrconf_ifid_eui48(&gid_temp.raw[8], vid, idev);
625325599Shselasky
626325599Shselasky			/* check for existing entry */
627325599Shselasky			for (j = 0; j != gid_index; j++) {
628325599Shselasky				if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
629325599Shselasky					break;
630325599Shselasky			}
631325599Shselasky
632325599Shselasky			/* check if new entry should be added */
633325599Shselasky			if (j == gid_index && gid_index < MLX5_IB_GID_MAX) {
634325599Shselasky				if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
635325599Shselasky					port->gid_table[gid_index] = gid_temp;
636325599Shselasky					update = 1;
637322810Shselasky				}
638325599Shselasky				xdev[gid_index] = idev;
639325599Shselasky				gid_index++;
640322810Shselasky			}
641322810Shselasky		    }
642322810Shselasky		}
643322810Shselasky		IFNET_RUNLOCK();
644322810Shselasky		CURVNET_RESTORE();
645322810Shselasky
646322810Shselasky		if (update != 0 &&
647322810Shselasky		    mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
648322810Shselasky			struct ib_event event = {
649322810Shselasky			    .device = &dev->ib_dev,
650322810Shselasky			    .element.port_num = port->port_num + 1,
651322810Shselasky			    .event = IB_EVENT_GID_CHANGE,
652322810Shselasky			};
653322810Shselasky
654322810Shselasky			/* add new entries, if any */
655322810Shselasky			for (j = 0; j != gid_index; j++) {
656322810Shselasky				error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
657322810Shselasky				    port->gid_table + j, xdev[j]);
658322810Shselasky				if (error != 0)
659322810Shselasky					printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
660322810Shselasky			}
661322810Shselasky			memset(&gid_temp, 0, sizeof(gid_temp));
662322810Shselasky
663322810Shselasky			/* clear old entries, if any */
664322810Shselasky			for (; j != MLX5_IB_GID_MAX; j++) {
665322810Shselasky				if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
666322810Shselasky					continue;
667322810Shselasky				port->gid_table[j] = gid_temp;
668322810Shselasky				(void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
669322810Shselasky				    port->gid_table + j, ndev);
670322810Shselasky			}
671322810Shselasky
672322810Shselasky			/* make sure ibcore gets updated */
673322810Shselasky			ib_dispatch_event(&event);
674322810Shselasky		}
675322810Shselasky		pause("W", hz);
676322810Shselasky	}
677322810Shselasky	do {
678322810Shselasky		struct ib_event event = {
679322810Shselasky			.device = &dev->ib_dev,
680322810Shselasky			.element.port_num = port->port_num + 1,
681322810Shselasky			.event = IB_EVENT_GID_CHANGE,
682322810Shselasky		};
683322810Shselasky		/* make sure ibcore gets updated */
684322810Shselasky		ib_dispatch_event(&event);
685322810Shselasky
686322810Shselasky		/* wait a bit */
687322810Shselasky		pause("W", hz);
688322810Shselasky	} while (0);
689322810Shselasky	port->port_gone = 2;
690322810Shselasky	kthread_exit();
691322810Shselasky}
692322810Shselasky
693322810Shselaskystatic int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
694322810Shselasky			     union ib_gid *gid)
695322810Shselasky{
696322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
697322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
698322810Shselasky
699322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
700322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
701322810Shselasky		return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
702322810Shselasky
703322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
704322810Shselasky		return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
705322810Shselasky
706322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
707322810Shselasky		if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
708322810Shselasky		    index < 0 || index >= MLX5_IB_GID_MAX ||
709322810Shselasky		    dev->port[port - 1].port_gone != 0)
710322810Shselasky			memset(gid, 0, sizeof(*gid));
711322810Shselasky		else
712322810Shselasky			*gid = dev->port[port - 1].gid_table[index];
713322810Shselasky		return 0;
714322810Shselasky
715322810Shselasky	default:
716322810Shselasky		return -EINVAL;
717322810Shselasky	}
718322810Shselasky}
719322810Shselasky
720322810Shselaskystatic int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
721322810Shselasky			      u16 *pkey)
722322810Shselasky{
723322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
724322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
725322810Shselasky
726322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
727322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
728322810Shselasky		return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
729322810Shselasky
730322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
731322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
732322810Shselasky		return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
733322810Shselasky						 pkey);
734322810Shselasky
735322810Shselasky	default:
736322810Shselasky		return -EINVAL;
737322810Shselasky	}
738322810Shselasky}
739322810Shselasky
740322810Shselaskystatic int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
741322810Shselasky				 struct ib_device_modify *props)
742322810Shselasky{
743322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
744322810Shselasky	struct mlx5_reg_node_desc in;
745322810Shselasky	struct mlx5_reg_node_desc out;
746322810Shselasky	int err;
747322810Shselasky
748322810Shselasky	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
749322810Shselasky		return -EOPNOTSUPP;
750322810Shselasky
751322810Shselasky	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
752322810Shselasky		return 0;
753322810Shselasky
754322810Shselasky	/*
755322810Shselasky	 * If possible, pass node desc to FW, so it can generate
756322810Shselasky	 * a 144 trap.  If cmd fails, just ignore.
757322810Shselasky	 */
758322810Shselasky	memcpy(&in, props->node_desc, 64);
759322810Shselasky	err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
760322810Shselasky				   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
761322810Shselasky	if (err)
762322810Shselasky		return err;
763322810Shselasky
764322810Shselasky	memcpy(ibdev->node_desc, props->node_desc, 64);
765322810Shselasky
766322810Shselasky	return err;
767322810Shselasky}
768322810Shselasky
769322810Shselaskystatic int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
770322810Shselasky			       struct ib_port_modify *props)
771322810Shselasky{
772322810Shselasky	u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
773322810Shselasky		     IB_LINK_LAYER_ETHERNET);
774322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
775322810Shselasky	struct ib_port_attr attr;
776322810Shselasky	u32 tmp;
777322810Shselasky	int err;
778322810Shselasky
779322810Shselasky	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
780322810Shselasky	 * of whether port link layer is ETH or IB. For ETH ports, qkey
781322810Shselasky	 * violations and port capabilities are not valid.
782322810Shselasky	 */
783322810Shselasky	if (is_eth)
784322810Shselasky		return 0;
785322810Shselasky
786322810Shselasky	mutex_lock(&dev->cap_mask_mutex);
787322810Shselasky
788322810Shselasky	err = mlx5_ib_query_port(ibdev, port, &attr);
789322810Shselasky	if (err)
790322810Shselasky		goto out;
791322810Shselasky
792322810Shselasky	tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
793322810Shselasky		~props->clr_port_cap_mask;
794322810Shselasky
795322810Shselasky	err = mlx5_set_port_caps(dev->mdev, port, tmp);
796322810Shselasky
797322810Shselaskyout:
798322810Shselasky	mutex_unlock(&dev->cap_mask_mutex);
799322810Shselasky	return err;
800322810Shselasky}
801322810Shselasky
802322810Shselaskyenum mlx5_cap_flags {
803322810Shselasky	MLX5_CAP_COMPACT_AV = 1 << 0,
804322810Shselasky};
805322810Shselasky
806322810Shselaskystatic void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
807322810Shselasky{
808322810Shselasky	*flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
809322810Shselasky		  MLX5_CAP_COMPACT_AV : 0;
810322810Shselasky}
811322810Shselasky
812322810Shselaskystatic struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
813322810Shselasky						  struct ib_udata *udata)
814322810Shselasky{
815322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
816322810Shselasky	struct mlx5_ib_alloc_ucontext_req_v2 req;
817322810Shselasky	struct mlx5_ib_alloc_ucontext_resp resp;
818322810Shselasky	struct mlx5_ib_ucontext *context;
819322810Shselasky	struct mlx5_uuar_info *uuari;
820322810Shselasky	struct mlx5_uar *uars;
821322810Shselasky	int gross_uuars;
822322810Shselasky	int num_uars;
823322810Shselasky	int ver;
824322810Shselasky	int uuarn;
825322810Shselasky	int err;
826322810Shselasky	int i;
827322810Shselasky	size_t reqlen;
828322810Shselasky
829322810Shselasky	if (!dev->ib_active)
830322810Shselasky		return ERR_PTR(-EAGAIN);
831322810Shselasky
832322810Shselasky	memset(&req, 0, sizeof(req));
833322810Shselasky	memset(&resp, 0, sizeof(resp));
834322810Shselasky
835322810Shselasky	reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
836322810Shselasky	if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
837322810Shselasky		ver = 0;
838322810Shselasky	else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
839322810Shselasky		ver = 2;
840322810Shselasky	else {
841323223Shselasky		mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
842322810Shselasky		return ERR_PTR(-EINVAL);
843322810Shselasky	}
844322810Shselasky
845322810Shselasky	err = ib_copy_from_udata(&req, udata, reqlen);
846322810Shselasky	if (err) {
847322810Shselasky		mlx5_ib_err(dev, "copy failed\n");
848322810Shselasky		return ERR_PTR(err);
849322810Shselasky	}
850322810Shselasky
851322810Shselasky	if (req.reserved) {
852322810Shselasky		mlx5_ib_err(dev, "request corrupted\n");
853322810Shselasky		return ERR_PTR(-EINVAL);
854322810Shselasky	}
855322810Shselasky
856322810Shselasky	if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
857322810Shselasky		mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
858322810Shselasky		return ERR_PTR(-ENOMEM);
859322810Shselasky	}
860322810Shselasky
861322810Shselasky	req.total_num_uuars = ALIGN(req.total_num_uuars,
862322810Shselasky				    MLX5_NON_FP_BF_REGS_PER_PAGE);
863322810Shselasky	if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
864322810Shselasky		mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
865322810Shselasky			     req.total_num_uuars, req.total_num_uuars);
866322810Shselasky		return ERR_PTR(-EINVAL);
867322810Shselasky	}
868322810Shselasky
869322810Shselasky	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
870322810Shselasky	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
871322810Shselasky	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
872322810Shselasky	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
873322810Shselasky		resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
874322810Shselasky	resp.cache_line_size = L1_CACHE_BYTES;
875322810Shselasky	resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
876322810Shselasky	resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
877322810Shselasky	resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
878322810Shselasky	resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
879322810Shselasky	resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
880322810Shselasky	set_mlx5_flags(&resp.flags, dev->mdev);
881322810Shselasky
882322810Shselasky	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
883322810Shselasky		resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
884322810Shselasky
885322810Shselasky	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
886322810Shselasky		resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
887322810Shselasky
888322810Shselasky	context = kzalloc(sizeof(*context), GFP_KERNEL);
889322810Shselasky	if (!context)
890322810Shselasky		return ERR_PTR(-ENOMEM);
891322810Shselasky
892322810Shselasky	uuari = &context->uuari;
893322810Shselasky	mutex_init(&uuari->lock);
894322810Shselasky	uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
895322810Shselasky	if (!uars) {
896322810Shselasky		err = -ENOMEM;
897322810Shselasky		goto out_ctx;
898322810Shselasky	}
899322810Shselasky
900322810Shselasky	uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
901322810Shselasky				sizeof(*uuari->bitmap),
902322810Shselasky				GFP_KERNEL);
903322810Shselasky	if (!uuari->bitmap) {
904322810Shselasky		err = -ENOMEM;
905322810Shselasky		goto out_uar_ctx;
906322810Shselasky	}
907322810Shselasky	/*
908322810Shselasky	 * clear all fast path uuars
909322810Shselasky	 */
910322810Shselasky	for (i = 0; i < gross_uuars; i++) {
911322810Shselasky		uuarn = i & 3;
912322810Shselasky		if (uuarn == 2 || uuarn == 3)
913322810Shselasky			set_bit(i, uuari->bitmap);
914322810Shselasky	}
915322810Shselasky
916322810Shselasky	uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
917322810Shselasky	if (!uuari->count) {
918322810Shselasky		err = -ENOMEM;
919322810Shselasky		goto out_bitmap;
920322810Shselasky	}
921322810Shselasky
922322810Shselasky	for (i = 0; i < num_uars; i++) {
923322810Shselasky		err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
924322810Shselasky		if (err) {
925322810Shselasky			mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
926322810Shselasky			goto out_uars;
927322810Shselasky		}
928322810Shselasky	}
929322810Shselasky	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
930322810Shselasky		context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
931322810Shselasky
932322810Shselasky	INIT_LIST_HEAD(&context->db_page_list);
933322810Shselasky	mutex_init(&context->db_page_mutex);
934322810Shselasky
935322810Shselasky	resp.tot_uuars = req.total_num_uuars;
936322810Shselasky	resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
937322810Shselasky	err = ib_copy_to_udata(udata, &resp,
938322810Shselasky			       min_t(size_t, udata->outlen, sizeof(resp)));
939322810Shselasky	if (err)
940322810Shselasky		goto out_uars;
941322810Shselasky
942322810Shselasky	uuari->ver = ver;
943322810Shselasky	uuari->num_low_latency_uuars = req.num_low_latency_uuars;
944322810Shselasky	uuari->uars = uars;
945322810Shselasky	uuari->num_uars = num_uars;
946322810Shselasky
947322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
948322810Shselasky	    IB_LINK_LAYER_ETHERNET) {
949322810Shselasky		err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
950322810Shselasky		if (err)
951322810Shselasky			goto out_uars;
952322810Shselasky	}
953322810Shselasky
954322810Shselasky	return &context->ibucontext;
955322810Shselasky
956322810Shselaskyout_uars:
957322810Shselasky	for (i--; i >= 0; i--)
958322810Shselasky		mlx5_cmd_free_uar(dev->mdev, uars[i].index);
959322810Shselasky	kfree(uuari->count);
960322810Shselasky
961322810Shselaskyout_bitmap:
962322810Shselasky	kfree(uuari->bitmap);
963322810Shselasky
964322810Shselaskyout_uar_ctx:
965322810Shselasky	kfree(uars);
966322810Shselasky
967322810Shselaskyout_ctx:
968322810Shselasky	kfree(context);
969322810Shselasky	return ERR_PTR(err);
970322810Shselasky}
971322810Shselasky
972322810Shselaskystatic int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
973322810Shselasky{
974322810Shselasky	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
975322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
976322810Shselasky	struct mlx5_uuar_info *uuari = &context->uuari;
977322810Shselasky	int i;
978322810Shselasky
979322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
980322810Shselasky	    IB_LINK_LAYER_ETHERNET)
981322810Shselasky		mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
982322810Shselasky
983322810Shselasky	for (i = 0; i < uuari->num_uars; i++) {
984322810Shselasky		if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
985322810Shselasky			mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
986322810Shselasky	}
987322810Shselasky	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
988322810Shselasky		if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
989322810Shselasky			mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
990322810Shselasky	}
991322810Shselasky
992322810Shselasky	kfree(uuari->count);
993322810Shselasky	kfree(uuari->bitmap);
994322810Shselasky	kfree(uuari->uars);
995322810Shselasky	kfree(context);
996322810Shselasky
997322810Shselasky	return 0;
998322810Shselasky}
999322810Shselasky
1000322810Shselaskystatic phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1001322810Shselasky{
1002322810Shselasky	return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1003322810Shselasky}
1004322810Shselasky
1005322810Shselaskystatic int get_command(unsigned long offset)
1006322810Shselasky{
1007322810Shselasky	return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1008322810Shselasky}
1009322810Shselasky
1010322810Shselaskystatic int get_arg(unsigned long offset)
1011322810Shselasky{
1012322810Shselasky	return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1013322810Shselasky}
1014322810Shselasky
1015322810Shselaskystatic int get_index(unsigned long offset)
1016322810Shselasky{
1017322810Shselasky	return get_arg(offset);
1018322810Shselasky}
1019322810Shselasky
1020322810Shselaskystatic int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1021322810Shselasky		    struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1022322810Shselasky		    struct mlx5_ib_ucontext *context)
1023322810Shselasky{
1024322810Shselasky	unsigned long idx;
1025322810Shselasky	phys_addr_t pfn;
1026322810Shselasky
1027322810Shselasky	if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1028322810Shselasky		mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1029322810Shselasky			     (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1030322810Shselasky		return -EINVAL;
1031322810Shselasky	}
1032322810Shselasky
1033322810Shselasky	idx = get_index(vma->vm_pgoff);
1034322810Shselasky	if (idx >= uuari->num_uars) {
1035322810Shselasky		mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1036322810Shselasky			     idx, uuari->num_uars);
1037322810Shselasky		return -EINVAL;
1038322810Shselasky	}
1039322810Shselasky
1040322810Shselasky	pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1041322810Shselasky	mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1042322810Shselasky		    (unsigned long long)pfn);
1043322810Shselasky
1044322810Shselasky	vma->vm_page_prot = prot;
1045322810Shselasky	if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1046322810Shselasky			       PAGE_SIZE, vma->vm_page_prot)) {
1047322810Shselasky		mlx5_ib_err(dev, "io remap failed\n");
1048322810Shselasky		return -EAGAIN;
1049322810Shselasky	}
1050322810Shselasky
1051322810Shselasky	mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1052322810Shselasky		    (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1053322810Shselasky
1054322810Shselasky	return 0;
1055322810Shselasky}
1056322810Shselasky
1057322810Shselaskystatic int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1058322810Shselasky{
1059322810Shselasky	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1060322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1061322810Shselasky	struct mlx5_uuar_info *uuari = &context->uuari;
1062322810Shselasky	unsigned long command;
1063322810Shselasky
1064322810Shselasky	command = get_command(vma->vm_pgoff);
1065322810Shselasky	switch (command) {
1066322810Shselasky	case MLX5_IB_MMAP_REGULAR_PAGE:
1067322810Shselasky		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1068322810Shselasky				true,
1069322810Shselasky				uuari, dev, context);
1070322810Shselasky
1071322810Shselasky		break;
1072322810Shselasky
1073322810Shselasky	case MLX5_IB_MMAP_WC_PAGE:
1074322810Shselasky		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1075322810Shselasky				true, uuari, dev, context);
1076322810Shselasky		break;
1077322810Shselasky
1078322810Shselasky	case MLX5_IB_MMAP_NC_PAGE:
1079322810Shselasky		return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1080322810Shselasky				false, uuari, dev, context);
1081322810Shselasky		break;
1082322810Shselasky
1083322810Shselasky	default:
1084322810Shselasky		return -EINVAL;
1085322810Shselasky	}
1086322810Shselasky
1087322810Shselasky	return 0;
1088322810Shselasky}
1089322810Shselasky
1090322810Shselaskystatic int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1091322810Shselasky{
1092322810Shselasky	struct mlx5_create_mkey_mbox_in *in;
1093322810Shselasky	struct mlx5_mkey_seg *seg;
1094322810Shselasky	struct mlx5_core_mr mr;
1095322810Shselasky	int err;
1096322810Shselasky
1097322810Shselasky	in = kzalloc(sizeof(*in), GFP_KERNEL);
1098322810Shselasky	if (!in)
1099322810Shselasky		return -ENOMEM;
1100322810Shselasky
1101322810Shselasky	seg = &in->seg;
1102322810Shselasky	seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1103322810Shselasky	seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1104322810Shselasky	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1105322810Shselasky	seg->start_addr = 0;
1106322810Shselasky
1107322810Shselasky	err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1108322810Shselasky				    NULL, NULL, NULL);
1109322810Shselasky	if (err) {
1110322810Shselasky		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1111322810Shselasky		goto err_in;
1112322810Shselasky	}
1113322810Shselasky
1114322810Shselasky	kfree(in);
1115322810Shselasky	*key = mr.key;
1116322810Shselasky
1117322810Shselasky	return 0;
1118322810Shselasky
1119322810Shselaskyerr_in:
1120322810Shselasky	kfree(in);
1121322810Shselasky
1122322810Shselasky	return err;
1123322810Shselasky}
1124322810Shselasky
1125322810Shselaskystatic void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1126322810Shselasky{
1127322810Shselasky	struct mlx5_core_mr mr;
1128322810Shselasky	int err;
1129322810Shselasky
1130322810Shselasky	memset(&mr, 0, sizeof(mr));
1131322810Shselasky	mr.key = key;
1132322810Shselasky	err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1133322810Shselasky	if (err)
1134322810Shselasky		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1135322810Shselasky}
1136322810Shselasky
1137322810Shselaskystatic struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1138322810Shselasky				      struct ib_ucontext *context,
1139322810Shselasky				      struct ib_udata *udata)
1140322810Shselasky{
1141322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1142322810Shselasky	struct mlx5_ib_alloc_pd_resp resp;
1143322810Shselasky	struct mlx5_ib_pd *pd;
1144322810Shselasky	int err;
1145322810Shselasky
1146322810Shselasky	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1147322810Shselasky	if (!pd)
1148322810Shselasky		return ERR_PTR(-ENOMEM);
1149322810Shselasky
1150322810Shselasky	err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1151322810Shselasky	if (err) {
1152322810Shselasky		mlx5_ib_warn(dev, "pd alloc failed\n");
1153322810Shselasky		kfree(pd);
1154322810Shselasky		return ERR_PTR(err);
1155322810Shselasky	}
1156322810Shselasky
1157322810Shselasky	if (context) {
1158322810Shselasky		resp.pdn = pd->pdn;
1159322810Shselasky		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1160322810Shselasky			mlx5_ib_err(dev, "copy failed\n");
1161322810Shselasky			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1162322810Shselasky			kfree(pd);
1163322810Shselasky			return ERR_PTR(-EFAULT);
1164322810Shselasky		}
1165322810Shselasky	} else {
1166322810Shselasky		err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1167322810Shselasky		if (err) {
1168322810Shselasky			mlx5_ib_err(dev, "alloc mkey failed\n");
1169322810Shselasky			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1170322810Shselasky			kfree(pd);
1171322810Shselasky			return ERR_PTR(err);
1172322810Shselasky		}
1173322810Shselasky	}
1174322810Shselasky
1175322810Shselasky	return &pd->ibpd;
1176322810Shselasky}
1177322810Shselasky
1178322810Shselaskystatic int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1179322810Shselasky{
1180322810Shselasky	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1181322810Shselasky	struct mlx5_ib_pd *mpd = to_mpd(pd);
1182322810Shselasky
1183322810Shselasky	if (!pd->uobject)
1184322810Shselasky		free_pa_mkey(mdev, mpd->pa_lkey);
1185322810Shselasky
1186322810Shselasky	mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1187322810Shselasky	kfree(mpd);
1188322810Shselasky
1189322810Shselasky	return 0;
1190322810Shselasky}
1191322810Shselasky
1192322810Shselaskystatic int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1193322810Shselasky{
1194322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1195322810Shselasky	int err;
1196322810Shselasky
1197322810Shselasky	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1198322810Shselasky		err = -EOPNOTSUPP;
1199322810Shselasky	else
1200322810Shselasky		err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1201322810Shselasky	if (err)
1202322810Shselasky		mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1203322810Shselasky			     ibqp->qp_num, gid->raw);
1204322810Shselasky
1205322810Shselasky	return err;
1206322810Shselasky}
1207322810Shselasky
1208322810Shselaskystatic int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1209322810Shselasky{
1210322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1211322810Shselasky	int err;
1212322810Shselasky
1213322810Shselasky	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1214322810Shselasky		err = -EOPNOTSUPP;
1215322810Shselasky	else
1216322810Shselasky		err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1217322810Shselasky	if (err)
1218322810Shselasky		mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1219322810Shselasky			     ibqp->qp_num, gid->raw);
1220322810Shselasky
1221322810Shselasky	return err;
1222322810Shselasky}
1223322810Shselasky
1224322810Shselaskystatic int init_node_data(struct mlx5_ib_dev *dev)
1225322810Shselasky{
1226322810Shselasky	int err;
1227322810Shselasky
1228322810Shselasky	err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1229322810Shselasky	if (err)
1230322810Shselasky		return err;
1231322810Shselasky
1232322810Shselasky	return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1233322810Shselasky}
1234322810Shselasky
1235322810Shselaskystatic ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1236322810Shselasky			     char *buf)
1237322810Shselasky{
1238322810Shselasky	struct mlx5_ib_dev *dev =
1239322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1240322810Shselasky
1241322810Shselasky	return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1242322810Shselasky}
1243322810Shselasky
1244322810Shselaskystatic ssize_t show_reg_pages(struct device *device,
1245322810Shselasky			      struct device_attribute *attr, char *buf)
1246322810Shselasky{
1247322810Shselasky	struct mlx5_ib_dev *dev =
1248322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1249322810Shselasky
1250322810Shselasky	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1251322810Shselasky}
1252322810Shselasky
1253322810Shselaskystatic ssize_t show_hca(struct device *device, struct device_attribute *attr,
1254322810Shselasky			char *buf)
1255322810Shselasky{
1256322810Shselasky	struct mlx5_ib_dev *dev =
1257322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1258322810Shselasky	return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1259322810Shselasky}
1260322810Shselasky
1261322810Shselaskystatic ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1262322810Shselasky			   char *buf)
1263322810Shselasky{
1264322810Shselasky	struct mlx5_ib_dev *dev =
1265322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1266322810Shselasky	return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1267322810Shselasky		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1268322810Shselasky}
1269322810Shselasky
1270322810Shselaskystatic ssize_t show_rev(struct device *device, struct device_attribute *attr,
1271322810Shselasky			char *buf)
1272322810Shselasky{
1273322810Shselasky	struct mlx5_ib_dev *dev =
1274322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1275322810Shselasky	return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1276322810Shselasky}
1277322810Shselasky
1278322810Shselaskystatic ssize_t show_board(struct device *device, struct device_attribute *attr,
1279322810Shselasky			  char *buf)
1280322810Shselasky{
1281322810Shselasky	struct mlx5_ib_dev *dev =
1282322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1283322810Shselasky	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1284322810Shselasky		       dev->mdev->board_id);
1285322810Shselasky}
1286322810Shselasky
1287322810Shselaskystatic DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1288322810Shselaskystatic DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1289322810Shselaskystatic DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1290322810Shselaskystatic DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1291322810Shselaskystatic DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1292322810Shselaskystatic DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1293322810Shselasky
1294322810Shselaskystatic struct device_attribute *mlx5_class_attributes[] = {
1295322810Shselasky	&dev_attr_hw_rev,
1296322810Shselasky	&dev_attr_fw_ver,
1297322810Shselasky	&dev_attr_hca_type,
1298322810Shselasky	&dev_attr_board_id,
1299322810Shselasky	&dev_attr_fw_pages,
1300322810Shselasky	&dev_attr_reg_pages,
1301322810Shselasky};
1302322810Shselasky
1303322810Shselaskystatic void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1304322810Shselasky{
1305322810Shselasky	struct mlx5_ib_qp *mqp;
1306322810Shselasky	struct mlx5_ib_cq *send_mcq, *recv_mcq;
1307322810Shselasky	struct mlx5_core_cq *mcq;
1308322810Shselasky	struct list_head cq_armed_list;
1309322810Shselasky	unsigned long flags_qp;
1310322810Shselasky	unsigned long flags_cq;
1311322810Shselasky	unsigned long flags;
1312322810Shselasky
1313322810Shselasky	mlx5_ib_warn(ibdev, " started\n");
1314322810Shselasky	INIT_LIST_HEAD(&cq_armed_list);
1315322810Shselasky
1316322810Shselasky	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1317322810Shselasky	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1318322810Shselasky	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1319322810Shselasky		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1320322810Shselasky		if (mqp->sq.tail != mqp->sq.head) {
1321322810Shselasky			send_mcq = to_mcq(mqp->ibqp.send_cq);
1322322810Shselasky			spin_lock_irqsave(&send_mcq->lock, flags_cq);
1323322810Shselasky			if (send_mcq->mcq.comp &&
1324322810Shselasky			    mqp->ibqp.send_cq->comp_handler) {
1325322810Shselasky				if (!send_mcq->mcq.reset_notify_added) {
1326322810Shselasky					send_mcq->mcq.reset_notify_added = 1;
1327322810Shselasky					list_add_tail(&send_mcq->mcq.reset_notify,
1328322810Shselasky						      &cq_armed_list);
1329322810Shselasky				}
1330322810Shselasky			}
1331322810Shselasky			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1332322810Shselasky		}
1333322810Shselasky		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1334322810Shselasky		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1335322810Shselasky		/* no handling is needed for SRQ */
1336322810Shselasky		if (!mqp->ibqp.srq) {
1337322810Shselasky			if (mqp->rq.tail != mqp->rq.head) {
1338322810Shselasky				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1339322810Shselasky				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1340322810Shselasky				if (recv_mcq->mcq.comp &&
1341322810Shselasky				    mqp->ibqp.recv_cq->comp_handler) {
1342322810Shselasky					if (!recv_mcq->mcq.reset_notify_added) {
1343322810Shselasky						recv_mcq->mcq.reset_notify_added = 1;
1344322810Shselasky						list_add_tail(&recv_mcq->mcq.reset_notify,
1345322810Shselasky							      &cq_armed_list);
1346322810Shselasky					}
1347322810Shselasky				}
1348322810Shselasky				spin_unlock_irqrestore(&recv_mcq->lock,
1349322810Shselasky						       flags_cq);
1350322810Shselasky			}
1351322810Shselasky		}
1352322810Shselasky		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1353322810Shselasky	}
1354322810Shselasky	/*At that point all inflight post send were put to be executed as of we
1355322810Shselasky	 * lock/unlock above locks Now need to arm all involved CQs.
1356322810Shselasky	 */
1357322810Shselasky	list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1358322810Shselasky		mcq->comp(mcq);
1359322810Shselasky	}
1360322810Shselasky	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1361322810Shselasky	mlx5_ib_warn(ibdev, " ended\n");
1362322810Shselasky	return;
1363322810Shselasky}
1364322810Shselasky
1365322810Shselaskystatic void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1366322810Shselasky			  enum mlx5_dev_event event, unsigned long param)
1367322810Shselasky{
1368322810Shselasky	struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1369322810Shselasky	struct ib_event ibev;
1370322810Shselasky
1371322810Shselasky	u8 port = 0;
1372322810Shselasky
1373322810Shselasky	switch (event) {
1374322810Shselasky	case MLX5_DEV_EVENT_SYS_ERROR:
1375322810Shselasky		ibdev->ib_active = false;
1376322810Shselasky		ibev.event = IB_EVENT_DEVICE_FATAL;
1377322810Shselasky		mlx5_ib_handle_internal_error(ibdev);
1378322810Shselasky		break;
1379322810Shselasky
1380322810Shselasky	case MLX5_DEV_EVENT_PORT_UP:
1381322810Shselasky		ibev.event = IB_EVENT_PORT_ACTIVE;
1382322810Shselasky		port = (u8)param;
1383322810Shselasky		break;
1384322810Shselasky
1385322810Shselasky	case MLX5_DEV_EVENT_PORT_DOWN:
1386322810Shselasky	case MLX5_DEV_EVENT_PORT_INITIALIZED:
1387322810Shselasky		ibev.event = IB_EVENT_PORT_ERR;
1388322810Shselasky		port = (u8)param;
1389322810Shselasky		break;
1390322810Shselasky
1391322810Shselasky	case MLX5_DEV_EVENT_LID_CHANGE:
1392322810Shselasky		ibev.event = IB_EVENT_LID_CHANGE;
1393322810Shselasky		port = (u8)param;
1394322810Shselasky		break;
1395322810Shselasky
1396322810Shselasky	case MLX5_DEV_EVENT_PKEY_CHANGE:
1397322810Shselasky		ibev.event = IB_EVENT_PKEY_CHANGE;
1398322810Shselasky		port = (u8)param;
1399322810Shselasky		break;
1400322810Shselasky
1401322810Shselasky	case MLX5_DEV_EVENT_GUID_CHANGE:
1402322810Shselasky		ibev.event = IB_EVENT_GID_CHANGE;
1403322810Shselasky		port = (u8)param;
1404322810Shselasky		break;
1405322810Shselasky
1406322810Shselasky	case MLX5_DEV_EVENT_CLIENT_REREG:
1407322810Shselasky		ibev.event = IB_EVENT_CLIENT_REREGISTER;
1408322810Shselasky		port = (u8)param;
1409322810Shselasky		break;
1410322810Shselasky
1411322810Shselasky	default:
1412322810Shselasky		break;
1413322810Shselasky	}
1414322810Shselasky
1415322810Shselasky	ibev.device	      = &ibdev->ib_dev;
1416322810Shselasky	ibev.element.port_num = port;
1417322810Shselasky
1418322810Shselasky	if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1419322810Shselasky	    (port < 1 || port > ibdev->num_ports)) {
1420322810Shselasky		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1421322810Shselasky		return;
1422322810Shselasky	}
1423322810Shselasky
1424322810Shselasky	if (ibdev->ib_active)
1425322810Shselasky		ib_dispatch_event(&ibev);
1426322810Shselasky}
1427322810Shselasky
1428322810Shselaskystatic void get_ext_port_caps(struct mlx5_ib_dev *dev)
1429322810Shselasky{
1430322810Shselasky	int port;
1431322810Shselasky
1432322810Shselasky	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1433322810Shselasky		mlx5_query_ext_port_caps(dev, port);
1434322810Shselasky}
1435322810Shselasky
1436322810Shselaskystatic void config_atomic_responder(struct mlx5_ib_dev *dev,
1437322810Shselasky				    struct ib_device_attr *props)
1438322810Shselasky{
1439322810Shselasky	enum ib_atomic_cap cap = props->atomic_cap;
1440322810Shselasky
1441322810Shselasky#if 0
1442322810Shselasky	if (cap == IB_ATOMIC_HCA ||
1443322810Shselasky	    cap == IB_ATOMIC_GLOB)
1444322810Shselasky#endif
1445322810Shselasky		dev->enable_atomic_resp = 1;
1446322810Shselasky
1447322810Shselasky	dev->atomic_cap = cap;
1448322810Shselasky}
1449322810Shselasky
1450322810Shselaskyenum mlx5_addr_align {
1451322810Shselasky	MLX5_ADDR_ALIGN_0	= 0,
1452322810Shselasky	MLX5_ADDR_ALIGN_64	= 64,
1453322810Shselasky	MLX5_ADDR_ALIGN_128	= 128,
1454322810Shselasky};
1455322810Shselasky
1456322810Shselaskystatic int get_port_caps(struct mlx5_ib_dev *dev)
1457322810Shselasky{
1458322810Shselasky	struct ib_device_attr *dprops = NULL;
1459322810Shselasky	struct ib_port_attr *pprops = NULL;
1460322810Shselasky	int err = -ENOMEM;
1461322810Shselasky	int port;
1462322810Shselasky
1463322810Shselasky	pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1464322810Shselasky	if (!pprops)
1465322810Shselasky		goto out;
1466322810Shselasky
1467322810Shselasky	dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1468322810Shselasky	if (!dprops)
1469322810Shselasky		goto out;
1470322810Shselasky
1471322810Shselasky	err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1472322810Shselasky	if (err) {
1473322810Shselasky		mlx5_ib_warn(dev, "query_device failed %d\n", err);
1474322810Shselasky		goto out;
1475322810Shselasky	}
1476322810Shselasky	config_atomic_responder(dev, dprops);
1477322810Shselasky
1478322810Shselasky	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1479322810Shselasky		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1480322810Shselasky		if (err) {
1481322810Shselasky			mlx5_ib_warn(dev, "query_port %d failed %d\n",
1482322810Shselasky				     port, err);
1483322810Shselasky			break;
1484322810Shselasky		}
1485322810Shselasky		dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1486322810Shselasky		dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1487322810Shselasky		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1488322810Shselasky			    dprops->max_pkeys, pprops->gid_tbl_len);
1489322810Shselasky	}
1490322810Shselasky
1491322810Shselaskyout:
1492322810Shselasky	kfree(pprops);
1493322810Shselasky	kfree(dprops);
1494322810Shselasky
1495322810Shselasky	return err;
1496322810Shselasky}
1497322810Shselasky
1498322810Shselaskystatic void destroy_umrc_res(struct mlx5_ib_dev *dev)
1499322810Shselasky{
1500322810Shselasky	int err;
1501322810Shselasky
1502322810Shselasky	err = mlx5_mr_cache_cleanup(dev);
1503322810Shselasky	if (err)
1504322810Shselasky		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1505322810Shselasky
1506322810Shselasky	ib_dereg_mr(dev->umrc.mr);
1507322810Shselasky	ib_dealloc_pd(dev->umrc.pd);
1508322810Shselasky}
1509322810Shselasky
1510322810Shselaskyenum {
1511322810Shselasky	MAX_UMR_WR = 128,
1512322810Shselasky};
1513322810Shselasky
1514322810Shselaskystatic int create_umr_res(struct mlx5_ib_dev *dev)
1515322810Shselasky{
1516322810Shselasky	struct ib_pd *pd;
1517322810Shselasky	struct ib_mr *mr;
1518322810Shselasky	int ret;
1519322810Shselasky
1520322810Shselasky	pd = ib_alloc_pd(&dev->ib_dev);
1521322810Shselasky	if (IS_ERR(pd)) {
1522322810Shselasky		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1523322810Shselasky		ret = PTR_ERR(pd);
1524322810Shselasky		goto error_0;
1525322810Shselasky	}
1526322810Shselasky
1527322810Shselasky	mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
1528322810Shselasky	if (IS_ERR(mr)) {
1529322810Shselasky		mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1530322810Shselasky		ret = PTR_ERR(mr);
1531322810Shselasky		goto error_1;
1532322810Shselasky	}
1533322810Shselasky
1534322810Shselasky	dev->umrc.mr = mr;
1535322810Shselasky	dev->umrc.pd = pd;
1536322810Shselasky
1537322810Shselasky	ret = mlx5_mr_cache_init(dev);
1538322810Shselasky	if (ret) {
1539322810Shselasky		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1540322810Shselasky		goto error_4;
1541322810Shselasky	}
1542322810Shselasky
1543322810Shselasky	return 0;
1544322810Shselasky
1545322810Shselaskyerror_4:
1546322810Shselasky	ib_dereg_mr(mr);
1547322810Shselaskyerror_1:
1548322810Shselasky	ib_dealloc_pd(pd);
1549322810Shselaskyerror_0:
1550322810Shselasky	return ret;
1551322810Shselasky}
1552322810Shselasky
1553322810Shselaskystatic int create_dev_resources(struct mlx5_ib_resources *devr)
1554322810Shselasky{
1555322810Shselasky	struct ib_srq_init_attr attr;
1556322810Shselasky	struct mlx5_ib_dev *dev;
1557322810Shselasky	int ret = 0;
1558322810Shselasky
1559322810Shselasky	dev = container_of(devr, struct mlx5_ib_dev, devr);
1560322810Shselasky
1561322810Shselasky	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1562322810Shselasky	if (IS_ERR(devr->p0)) {
1563322810Shselasky		ret = PTR_ERR(devr->p0);
1564322810Shselasky		goto error0;
1565322810Shselasky	}
1566322810Shselasky	devr->p0->device  = &dev->ib_dev;
1567322810Shselasky	devr->p0->uobject = NULL;
1568322810Shselasky	atomic_set(&devr->p0->usecnt, 0);
1569322810Shselasky
1570323223Shselasky	devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
1571322810Shselasky	if (IS_ERR(devr->c0)) {
1572322810Shselasky		ret = PTR_ERR(devr->c0);
1573322810Shselasky		goto error1;
1574322810Shselasky	}
1575322810Shselasky	devr->c0->device        = &dev->ib_dev;
1576322810Shselasky	devr->c0->uobject       = NULL;
1577322810Shselasky	devr->c0->comp_handler  = NULL;
1578322810Shselasky	devr->c0->event_handler = NULL;
1579322810Shselasky	devr->c0->cq_context    = NULL;
1580322810Shselasky	atomic_set(&devr->c0->usecnt, 0);
1581322810Shselasky
1582322810Shselasky	devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1583322810Shselasky	if (IS_ERR(devr->x0)) {
1584322810Shselasky		ret = PTR_ERR(devr->x0);
1585322810Shselasky		goto error2;
1586322810Shselasky	}
1587322810Shselasky	devr->x0->device = &dev->ib_dev;
1588322810Shselasky	devr->x0->inode = NULL;
1589322810Shselasky	atomic_set(&devr->x0->usecnt, 0);
1590322810Shselasky	mutex_init(&devr->x0->tgt_qp_mutex);
1591322810Shselasky	INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1592322810Shselasky
1593322810Shselasky	devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1594322810Shselasky	if (IS_ERR(devr->x1)) {
1595322810Shselasky		ret = PTR_ERR(devr->x1);
1596322810Shselasky		goto error3;
1597322810Shselasky	}
1598322810Shselasky	devr->x1->device = &dev->ib_dev;
1599322810Shselasky	devr->x1->inode = NULL;
1600322810Shselasky	atomic_set(&devr->x1->usecnt, 0);
1601322810Shselasky	mutex_init(&devr->x1->tgt_qp_mutex);
1602322810Shselasky	INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1603322810Shselasky
1604322810Shselasky	memset(&attr, 0, sizeof(attr));
1605322810Shselasky	attr.attr.max_sge = 1;
1606322810Shselasky	attr.attr.max_wr = 1;
1607322810Shselasky	attr.srq_type = IB_SRQT_XRC;
1608322810Shselasky	attr.ext.xrc.cq = devr->c0;
1609322810Shselasky	attr.ext.xrc.xrcd = devr->x0;
1610322810Shselasky
1611322810Shselasky	devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1612322810Shselasky	if (IS_ERR(devr->s0)) {
1613322810Shselasky		ret = PTR_ERR(devr->s0);
1614322810Shselasky		goto error4;
1615322810Shselasky	}
1616322810Shselasky	devr->s0->device	= &dev->ib_dev;
1617322810Shselasky	devr->s0->pd		= devr->p0;
1618322810Shselasky	devr->s0->uobject       = NULL;
1619322810Shselasky	devr->s0->event_handler = NULL;
1620322810Shselasky	devr->s0->srq_context   = NULL;
1621322810Shselasky	devr->s0->srq_type      = IB_SRQT_XRC;
1622322810Shselasky	devr->s0->ext.xrc.xrcd  = devr->x0;
1623322810Shselasky	devr->s0->ext.xrc.cq	= devr->c0;
1624322810Shselasky	atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1625322810Shselasky	atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1626322810Shselasky	atomic_inc(&devr->p0->usecnt);
1627322810Shselasky	atomic_set(&devr->s0->usecnt, 0);
1628322810Shselasky
1629322810Shselasky	memset(&attr, 0, sizeof(attr));
1630322810Shselasky	attr.attr.max_sge = 1;
1631322810Shselasky	attr.attr.max_wr = 1;
1632322810Shselasky	attr.srq_type = IB_SRQT_BASIC;
1633322810Shselasky	devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1634322810Shselasky	if (IS_ERR(devr->s1)) {
1635322810Shselasky		ret = PTR_ERR(devr->s1);
1636322810Shselasky		goto error5;
1637322810Shselasky	}
1638322810Shselasky	devr->s1->device	= &dev->ib_dev;
1639322810Shselasky	devr->s1->pd		= devr->p0;
1640322810Shselasky	devr->s1->uobject       = NULL;
1641322810Shselasky	devr->s1->event_handler = NULL;
1642322810Shselasky	devr->s1->srq_context   = NULL;
1643322810Shselasky	devr->s1->srq_type      = IB_SRQT_BASIC;
1644322810Shselasky	devr->s1->ext.xrc.cq	= devr->c0;
1645322810Shselasky	atomic_inc(&devr->p0->usecnt);
1646322810Shselasky	atomic_set(&devr->s1->usecnt, 0);
1647322810Shselasky
1648322810Shselasky	return 0;
1649322810Shselasky
1650322810Shselaskyerror5:
1651322810Shselasky	mlx5_ib_destroy_srq(devr->s0);
1652322810Shselaskyerror4:
1653322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x1);
1654322810Shselaskyerror3:
1655322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x0);
1656322810Shselaskyerror2:
1657322810Shselasky	mlx5_ib_destroy_cq(devr->c0);
1658322810Shselaskyerror1:
1659322810Shselasky	mlx5_ib_dealloc_pd(devr->p0);
1660322810Shselaskyerror0:
1661322810Shselasky	return ret;
1662322810Shselasky}
1663322810Shselasky
1664322810Shselaskystatic void destroy_dev_resources(struct mlx5_ib_resources *devr)
1665322810Shselasky{
1666322810Shselasky	mlx5_ib_destroy_srq(devr->s1);
1667322810Shselasky	mlx5_ib_destroy_srq(devr->s0);
1668322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x0);
1669322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x1);
1670322810Shselasky	mlx5_ib_destroy_cq(devr->c0);
1671322810Shselasky	mlx5_ib_dealloc_pd(devr->p0);
1672322810Shselasky}
1673322810Shselasky
1674325611Shselaskystatic u32 get_core_cap_flags(struct ib_device *ibdev)
1675325611Shselasky{
1676325611Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1677325611Shselasky	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
1678325611Shselasky	u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
1679325611Shselasky	u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
1680325611Shselasky	u32 ret = 0;
1681325611Shselasky
1682325611Shselasky	if (ll == IB_LINK_LAYER_INFINIBAND)
1683325611Shselasky		return RDMA_CORE_PORT_IBA_IB;
1684325611Shselasky
1685325611Shselasky	ret = RDMA_CORE_PORT_RAW_PACKET;
1686325611Shselasky
1687325611Shselasky	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
1688325611Shselasky		return ret;
1689325611Shselasky
1690325611Shselasky	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
1691325611Shselasky		return ret;
1692325611Shselasky
1693325611Shselasky	if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
1694325611Shselasky		ret |= RDMA_CORE_PORT_IBA_ROCE;
1695325611Shselasky
1696325611Shselasky	if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
1697325611Shselasky		ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
1698325611Shselasky
1699325611Shselasky	return ret;
1700325611Shselasky}
1701325611Shselasky
1702325611Shselaskystatic int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1703325611Shselasky			       struct ib_port_immutable *immutable)
1704325611Shselasky{
1705325611Shselasky	struct ib_port_attr attr;
1706325611Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1707325611Shselasky	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
1708325611Shselasky	int err;
1709325611Shselasky
1710325611Shselasky	immutable->core_cap_flags = get_core_cap_flags(ibdev);
1711325611Shselasky
1712325611Shselasky	err = ib_query_port(ibdev, port_num, &attr);
1713325611Shselasky	if (err)
1714325611Shselasky		return err;
1715325611Shselasky
1716325611Shselasky	immutable->pkey_tbl_len = attr.pkey_tbl_len;
1717325611Shselasky	immutable->gid_tbl_len = attr.gid_tbl_len;
1718325611Shselasky	immutable->core_cap_flags = get_core_cap_flags(ibdev);
1719325611Shselasky	if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
1720325611Shselasky		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1721325611Shselasky
1722325611Shselasky	return 0;
1723325611Shselasky}
1724325611Shselasky
1725322810Shselaskystatic void enable_dc_tracer(struct mlx5_ib_dev *dev)
1726322810Shselasky{
1727322810Shselasky	struct device *device = dev->ib_dev.dma_device;
1728322810Shselasky	struct mlx5_dc_tracer *dct = &dev->dctr;
1729322810Shselasky	int order;
1730322810Shselasky	void *tmp;
1731322810Shselasky	int size;
1732322810Shselasky	int err;
1733322810Shselasky
1734322810Shselasky	size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1735322810Shselasky	if (size <= PAGE_SIZE)
1736322810Shselasky		order = 0;
1737322810Shselasky	else
1738322810Shselasky		order = 1;
1739322810Shselasky
1740322810Shselasky	dct->pg = alloc_pages(GFP_KERNEL, order);
1741322810Shselasky	if (!dct->pg) {
1742322810Shselasky		mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1743322810Shselasky		return;
1744322810Shselasky	}
1745322810Shselasky
1746322810Shselasky	tmp = page_address(dct->pg);
1747322810Shselasky	memset(tmp, 0xff, size);
1748322810Shselasky
1749322810Shselasky	dct->size = size;
1750322810Shselasky	dct->order = order;
1751322810Shselasky	dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1752322810Shselasky	if (dma_mapping_error(device, dct->dma)) {
1753322810Shselasky		mlx5_ib_err(dev, "dma mapping error\n");
1754322810Shselasky		goto map_err;
1755322810Shselasky	}
1756322810Shselasky
1757322810Shselasky	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1758322810Shselasky	if (err) {
1759322810Shselasky		mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1760322810Shselasky		goto cmd_err;
1761322810Shselasky	}
1762322810Shselasky
1763322810Shselasky	return;
1764322810Shselasky
1765322810Shselaskycmd_err:
1766322810Shselasky	dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1767322810Shselaskymap_err:
1768322810Shselasky	__free_pages(dct->pg, dct->order);
1769322810Shselasky	dct->pg = NULL;
1770322810Shselasky}
1771322810Shselasky
1772322810Shselaskystatic void disable_dc_tracer(struct mlx5_ib_dev *dev)
1773322810Shselasky{
1774322810Shselasky	struct device *device = dev->ib_dev.dma_device;
1775322810Shselasky	struct mlx5_dc_tracer *dct = &dev->dctr;
1776322810Shselasky	int err;
1777322810Shselasky
1778322810Shselasky	if (!dct->pg)
1779322810Shselasky		return;
1780322810Shselasky
1781322810Shselasky	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1782322810Shselasky	if (err) {
1783322810Shselasky		mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1784322810Shselasky		return;
1785322810Shselasky	}
1786322810Shselasky
1787322810Shselasky	dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1788322810Shselasky	__free_pages(dct->pg, dct->order);
1789322810Shselasky	dct->pg = NULL;
1790322810Shselasky}
1791322810Shselasky
1792322810Shselaskyenum {
1793322810Shselasky	MLX5_DC_CNAK_SIZE		= 128,
1794322810Shselasky	MLX5_NUM_BUF_IN_PAGE		= PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1795322810Shselasky	MLX5_CNAK_TX_CQ_SIGNAL_FACTOR	= 128,
1796322810Shselasky	MLX5_DC_CNAK_SL			= 0,
1797322810Shselasky	MLX5_DC_CNAK_VL			= 0,
1798322810Shselasky};
1799322810Shselasky
1800322810Shselaskystatic int init_dc_improvements(struct mlx5_ib_dev *dev)
1801322810Shselasky{
1802322810Shselasky	if (!mlx5_core_is_pf(dev->mdev))
1803322810Shselasky		return 0;
1804322810Shselasky
1805322810Shselasky	if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1806322810Shselasky		return 0;
1807322810Shselasky
1808322810Shselasky	enable_dc_tracer(dev);
1809322810Shselasky
1810322810Shselasky	return 0;
1811322810Shselasky}
1812322810Shselasky
1813322810Shselaskystatic void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1814322810Shselasky{
1815322810Shselasky
1816322810Shselasky	disable_dc_tracer(dev);
1817322810Shselasky}
1818322810Shselasky
1819322810Shselaskystatic void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1820322810Shselasky{
1821322810Shselasky	mlx5_vport_dealloc_q_counter(dev->mdev,
1822322810Shselasky				     MLX5_INTERFACE_PROTOCOL_IB,
1823322810Shselasky				     dev->port[port_num].q_cnt_id);
1824322810Shselasky	dev->port[port_num].q_cnt_id = 0;
1825322810Shselasky}
1826322810Shselasky
1827322810Shselaskystatic void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1828322810Shselasky{
1829322810Shselasky	unsigned int i;
1830322810Shselasky
1831322810Shselasky	for (i = 0; i < dev->num_ports; i++)
1832322810Shselasky		mlx5_ib_dealloc_q_port_counter(dev, i);
1833322810Shselasky}
1834322810Shselasky
1835322810Shselaskystatic int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1836322810Shselasky{
1837322810Shselasky	int i;
1838322810Shselasky	int ret;
1839322810Shselasky
1840322810Shselasky	for (i = 0; i < dev->num_ports; i++) {
1841322810Shselasky		ret = mlx5_vport_alloc_q_counter(dev->mdev,
1842322810Shselasky						 MLX5_INTERFACE_PROTOCOL_IB,
1843322810Shselasky						 &dev->port[i].q_cnt_id);
1844322810Shselasky		if (ret) {
1845322810Shselasky			mlx5_ib_warn(dev,
1846322810Shselasky				     "couldn't allocate queue counter for port %d\n",
1847322810Shselasky				     i + 1);
1848322810Shselasky			goto dealloc_counters;
1849322810Shselasky		}
1850322810Shselasky	}
1851322810Shselasky
1852322810Shselasky	return 0;
1853322810Shselasky
1854322810Shselaskydealloc_counters:
1855322810Shselasky	while (--i >= 0)
1856322810Shselasky		mlx5_ib_dealloc_q_port_counter(dev, i);
1857322810Shselasky
1858322810Shselasky	return ret;
1859322810Shselasky}
1860322810Shselasky
1861322810Shselaskystruct port_attribute {
1862322810Shselasky	struct attribute attr;
1863322810Shselasky	ssize_t (*show)(struct mlx5_ib_port *,
1864322810Shselasky			struct port_attribute *, char *buf);
1865322810Shselasky	ssize_t (*store)(struct mlx5_ib_port *,
1866322810Shselasky			 struct port_attribute *,
1867322810Shselasky			 const char *buf, size_t count);
1868322810Shselasky};
1869322810Shselasky
1870322810Shselaskystruct port_counter_attribute {
1871322810Shselasky	struct port_attribute	attr;
1872322810Shselasky	size_t			offset;
1873322810Shselasky};
1874322810Shselasky
1875322810Shselaskystatic ssize_t port_attr_show(struct kobject *kobj,
1876322810Shselasky			      struct attribute *attr, char *buf)
1877322810Shselasky{
1878322810Shselasky	struct port_attribute *port_attr =
1879322810Shselasky		container_of(attr, struct port_attribute, attr);
1880322810Shselasky	struct mlx5_ib_port_sysfs_group *p =
1881322810Shselasky		container_of(kobj, struct mlx5_ib_port_sysfs_group,
1882322810Shselasky			     kobj);
1883322810Shselasky	struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1884322810Shselasky						    group);
1885322810Shselasky
1886322810Shselasky	if (!port_attr->show)
1887322810Shselasky		return -EIO;
1888322810Shselasky
1889322810Shselasky	return port_attr->show(mibport, port_attr, buf);
1890322810Shselasky}
1891322810Shselasky
1892322810Shselaskystatic ssize_t show_port_counter(struct mlx5_ib_port *p,
1893322810Shselasky				 struct port_attribute *port_attr,
1894322810Shselasky				 char *buf)
1895322810Shselasky{
1896322810Shselasky	int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1897322810Shselasky	struct port_counter_attribute *counter_attr =
1898322810Shselasky		container_of(port_attr, struct port_counter_attribute, attr);
1899322810Shselasky	void *out;
1900322810Shselasky	int ret;
1901322810Shselasky
1902322810Shselasky	out = mlx5_vzalloc(outlen);
1903322810Shselasky	if (!out)
1904322810Shselasky		return -ENOMEM;
1905322810Shselasky
1906322810Shselasky	ret = mlx5_vport_query_q_counter(p->dev->mdev,
1907322810Shselasky					 p->q_cnt_id, 0,
1908322810Shselasky					 out, outlen);
1909322810Shselasky	if (ret)
1910322810Shselasky		goto free;
1911322810Shselasky
1912322810Shselasky	ret = sprintf(buf, "%d\n",
1913322810Shselasky		      be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1914322810Shselasky
1915322810Shselaskyfree:
1916322810Shselasky	kfree(out);
1917322810Shselasky	return ret;
1918322810Shselasky}
1919322810Shselasky
1920322810Shselasky#define PORT_COUNTER_ATTR(_name)					\
1921322810Shselaskystruct port_counter_attribute port_counter_attr_##_name = {		\
1922322810Shselasky	.attr  = __ATTR(_name, S_IRUGO, show_port_counter, NULL),	\
1923322810Shselasky	.offset = MLX5_BYTE_OFF(query_q_counter_out, _name)		\
1924322810Shselasky}
1925322810Shselasky
1926322810Shselaskystatic PORT_COUNTER_ATTR(rx_write_requests);
1927322810Shselaskystatic PORT_COUNTER_ATTR(rx_read_requests);
1928322810Shselaskystatic PORT_COUNTER_ATTR(rx_atomic_requests);
1929322810Shselaskystatic PORT_COUNTER_ATTR(rx_dct_connect);
1930322810Shselaskystatic PORT_COUNTER_ATTR(out_of_buffer);
1931322810Shselaskystatic PORT_COUNTER_ATTR(out_of_sequence);
1932322810Shselaskystatic PORT_COUNTER_ATTR(duplicate_request);
1933322810Shselaskystatic PORT_COUNTER_ATTR(rnr_nak_retry_err);
1934322810Shselaskystatic PORT_COUNTER_ATTR(packet_seq_err);
1935322810Shselaskystatic PORT_COUNTER_ATTR(implied_nak_seq_err);
1936322810Shselaskystatic PORT_COUNTER_ATTR(local_ack_timeout_err);
1937322810Shselasky
1938322810Shselaskystatic struct attribute *counter_attrs[] = {
1939322810Shselasky	&port_counter_attr_rx_write_requests.attr.attr,
1940322810Shselasky	&port_counter_attr_rx_read_requests.attr.attr,
1941322810Shselasky	&port_counter_attr_rx_atomic_requests.attr.attr,
1942322810Shselasky	&port_counter_attr_rx_dct_connect.attr.attr,
1943322810Shselasky	&port_counter_attr_out_of_buffer.attr.attr,
1944322810Shselasky	&port_counter_attr_out_of_sequence.attr.attr,
1945322810Shselasky	&port_counter_attr_duplicate_request.attr.attr,
1946322810Shselasky	&port_counter_attr_rnr_nak_retry_err.attr.attr,
1947322810Shselasky	&port_counter_attr_packet_seq_err.attr.attr,
1948322810Shselasky	&port_counter_attr_implied_nak_seq_err.attr.attr,
1949322810Shselasky	&port_counter_attr_local_ack_timeout_err.attr.attr,
1950322810Shselasky	NULL
1951322810Shselasky};
1952322810Shselasky
1953322810Shselaskystatic struct attribute_group port_counters_group = {
1954322810Shselasky	.name  = "counters",
1955322810Shselasky	.attrs  = counter_attrs
1956322810Shselasky};
1957322810Shselasky
1958322810Shselaskystatic const struct sysfs_ops port_sysfs_ops = {
1959322810Shselasky	.show = port_attr_show
1960322810Shselasky};
1961322810Shselasky
1962322810Shselaskystatic struct kobj_type port_type = {
1963322810Shselasky	.sysfs_ops     = &port_sysfs_ops,
1964322810Shselasky};
1965322810Shselasky
1966322810Shselaskystatic int add_port_attrs(struct mlx5_ib_dev *dev,
1967322810Shselasky			  struct kobject *parent,
1968322810Shselasky			  struct mlx5_ib_port_sysfs_group *port,
1969322810Shselasky			  u8 port_num)
1970322810Shselasky{
1971322810Shselasky	int ret;
1972322810Shselasky
1973322810Shselasky	ret = kobject_init_and_add(&port->kobj, &port_type,
1974322810Shselasky				   parent,
1975322810Shselasky				   "%d", port_num);
1976322810Shselasky	if (ret)
1977322810Shselasky		return ret;
1978322810Shselasky
1979322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1980322810Shselasky	    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1981322810Shselasky		ret = sysfs_create_group(&port->kobj, &port_counters_group);
1982322810Shselasky		if (ret)
1983322810Shselasky			goto put_kobj;
1984322810Shselasky	}
1985322810Shselasky
1986322810Shselasky	port->enabled = true;
1987322810Shselasky	return ret;
1988322810Shselasky
1989322810Shselaskyput_kobj:
1990322810Shselasky	kobject_put(&port->kobj);
1991322810Shselasky	return ret;
1992322810Shselasky}
1993322810Shselasky
1994322810Shselaskystatic void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1995322810Shselasky				unsigned int num_ports)
1996322810Shselasky{
1997322810Shselasky	unsigned int i;
1998322810Shselasky
1999322810Shselasky	for (i = 0; i < num_ports; i++) {
2000322810Shselasky		struct mlx5_ib_port_sysfs_group *port =
2001322810Shselasky			&dev->port[i].group;
2002322810Shselasky
2003322810Shselasky		if (!port->enabled)
2004322810Shselasky			continue;
2005322810Shselasky
2006322810Shselasky		if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
2007322810Shselasky		    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
2008322810Shselasky			sysfs_remove_group(&port->kobj,
2009322810Shselasky					   &port_counters_group);
2010322810Shselasky		kobject_put(&port->kobj);
2011322810Shselasky		port->enabled = false;
2012322810Shselasky	}
2013322810Shselasky
2014322810Shselasky	if (dev->ports_parent) {
2015322810Shselasky		kobject_put(dev->ports_parent);
2016322810Shselasky		dev->ports_parent = NULL;
2017322810Shselasky	}
2018322810Shselasky}
2019322810Shselasky
2020322810Shselaskystatic int create_port_attrs(struct mlx5_ib_dev *dev)
2021322810Shselasky{
2022322810Shselasky	int ret = 0;
2023322810Shselasky	unsigned int i = 0;
2024322810Shselasky	struct device *device = &dev->ib_dev.dev;
2025322810Shselasky
2026322810Shselasky	dev->ports_parent = kobject_create_and_add("mlx5_ports",
2027322810Shselasky						   &device->kobj);
2028322810Shselasky	if (!dev->ports_parent)
2029322810Shselasky		return -ENOMEM;
2030322810Shselasky
2031322810Shselasky	for (i = 0; i < dev->num_ports; i++) {
2032322810Shselasky		ret = add_port_attrs(dev,
2033322810Shselasky				     dev->ports_parent,
2034322810Shselasky				     &dev->port[i].group,
2035322810Shselasky				     i + 1);
2036322810Shselasky
2037322810Shselasky		if (ret)
2038322810Shselasky			goto _destroy_ports_attrs;
2039322810Shselasky	}
2040322810Shselasky
2041322810Shselasky	return 0;
2042322810Shselasky
2043322810Shselasky_destroy_ports_attrs:
2044322810Shselasky	destroy_ports_attrs(dev, i);
2045322810Shselasky	return ret;
2046322810Shselasky}
2047322810Shselasky
2048322810Shselaskystatic void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2049322810Shselasky{
2050322810Shselasky	struct mlx5_ib_dev *dev;
2051322810Shselasky	int err;
2052322810Shselasky	int i;
2053322810Shselasky
2054322810Shselasky	printk_once(KERN_INFO "%s", mlx5_version);
2055322810Shselasky
2056322810Shselasky	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2057322810Shselasky	if (!dev)
2058322810Shselasky		return NULL;
2059322810Shselasky
2060322810Shselasky	dev->mdev = mdev;
2061322810Shselasky
2062322810Shselasky	dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2063322810Shselasky			     GFP_KERNEL);
2064322810Shselasky	if (!dev->port)
2065322810Shselasky		goto err_dealloc;
2066322810Shselasky
2067322810Shselasky	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2068322810Shselasky		dev->port[i].dev = dev;
2069322810Shselasky		dev->port[i].port_num = i;
2070322810Shselasky		dev->port[i].port_gone = 0;
2071322810Shselasky		memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2072322810Shselasky	}
2073322810Shselasky
2074322810Shselasky	err = get_port_caps(dev);
2075322810Shselasky	if (err)
2076322810Shselasky		goto err_free_port;
2077322810Shselasky
2078322810Shselasky	if (mlx5_use_mad_ifc(dev))
2079322810Shselasky		get_ext_port_caps(dev);
2080322810Shselasky
2081322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2082322810Shselasky	    IB_LINK_LAYER_ETHERNET) {
2083322810Shselasky		if (MLX5_CAP_GEN(mdev, roce)) {
2084322810Shselasky			err = mlx5_nic_vport_enable_roce(mdev);
2085322810Shselasky			if (err)
2086322810Shselasky				goto err_free_port;
2087322810Shselasky		} else {
2088322810Shselasky			goto err_free_port;
2089322810Shselasky		}
2090322810Shselasky	}
2091322810Shselasky
2092322810Shselasky	MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2093322810Shselasky
2094322810Shselasky	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2095322810Shselasky	dev->ib_dev.owner		= THIS_MODULE;
2096322810Shselasky	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
2097322810Shselasky	dev->ib_dev.local_dma_lkey	= mdev->special_contexts.resd_lkey;
2098322810Shselasky	dev->num_ports		= MLX5_CAP_GEN(mdev, num_ports);
2099322810Shselasky	dev->ib_dev.phys_port_cnt     = dev->num_ports;
2100322810Shselasky	dev->ib_dev.num_comp_vectors    =
2101322810Shselasky		dev->mdev->priv.eq_table.num_comp_vectors;
2102322810Shselasky	dev->ib_dev.dma_device	= &mdev->pdev->dev;
2103322810Shselasky
2104322810Shselasky	dev->ib_dev.uverbs_abi_ver	= MLX5_IB_UVERBS_ABI_VERSION;
2105322810Shselasky	dev->ib_dev.uverbs_cmd_mask	=
2106322810Shselasky		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
2107322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
2108322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
2109322810Shselasky		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
2110322810Shselasky		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
2111322810Shselasky		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
2112322810Shselasky		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
2113322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
2114322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
2115322810Shselasky		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
2116322810Shselasky		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
2117322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
2118322810Shselasky		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
2119322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
2120322810Shselasky		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
2121322810Shselasky		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
2122322810Shselasky		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
2123322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
2124322810Shselasky		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
2125322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
2126322810Shselasky		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
2127322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
2128322810Shselasky		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
2129322810Shselasky
2130322810Shselasky	dev->ib_dev.query_device	= mlx5_ib_query_device;
2131322810Shselasky	dev->ib_dev.query_port		= mlx5_ib_query_port;
2132322810Shselasky	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;
2133322810Shselasky	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
2134322810Shselasky	dev->ib_dev.query_pkey		= mlx5_ib_query_pkey;
2135322810Shselasky	dev->ib_dev.modify_device	= mlx5_ib_modify_device;
2136322810Shselasky	dev->ib_dev.modify_port		= mlx5_ib_modify_port;
2137322810Shselasky	dev->ib_dev.alloc_ucontext	= mlx5_ib_alloc_ucontext;
2138322810Shselasky	dev->ib_dev.dealloc_ucontext	= mlx5_ib_dealloc_ucontext;
2139322810Shselasky	dev->ib_dev.mmap		= mlx5_ib_mmap;
2140322810Shselasky	dev->ib_dev.alloc_pd		= mlx5_ib_alloc_pd;
2141322810Shselasky	dev->ib_dev.dealloc_pd		= mlx5_ib_dealloc_pd;
2142322810Shselasky	dev->ib_dev.create_ah		= mlx5_ib_create_ah;
2143322810Shselasky	dev->ib_dev.query_ah		= mlx5_ib_query_ah;
2144322810Shselasky	dev->ib_dev.destroy_ah		= mlx5_ib_destroy_ah;
2145322810Shselasky	dev->ib_dev.create_srq		= mlx5_ib_create_srq;
2146322810Shselasky	dev->ib_dev.modify_srq		= mlx5_ib_modify_srq;
2147322810Shselasky	dev->ib_dev.query_srq		= mlx5_ib_query_srq;
2148322810Shselasky	dev->ib_dev.destroy_srq		= mlx5_ib_destroy_srq;
2149322810Shselasky	dev->ib_dev.post_srq_recv	= mlx5_ib_post_srq_recv;
2150322810Shselasky	dev->ib_dev.create_qp		= mlx5_ib_create_qp;
2151322810Shselasky	dev->ib_dev.modify_qp		= mlx5_ib_modify_qp;
2152322810Shselasky	dev->ib_dev.query_qp		= mlx5_ib_query_qp;
2153322810Shselasky	dev->ib_dev.destroy_qp		= mlx5_ib_destroy_qp;
2154322810Shselasky	dev->ib_dev.post_send		= mlx5_ib_post_send;
2155322810Shselasky	dev->ib_dev.post_recv		= mlx5_ib_post_recv;
2156322810Shselasky	dev->ib_dev.create_cq		= mlx5_ib_create_cq;
2157322810Shselasky	dev->ib_dev.modify_cq		= mlx5_ib_modify_cq;
2158322810Shselasky	dev->ib_dev.resize_cq		= mlx5_ib_resize_cq;
2159322810Shselasky	dev->ib_dev.destroy_cq		= mlx5_ib_destroy_cq;
2160322810Shselasky	dev->ib_dev.poll_cq		= mlx5_ib_poll_cq;
2161322810Shselasky	dev->ib_dev.req_notify_cq	= mlx5_ib_arm_cq;
2162322810Shselasky	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;
2163322810Shselasky	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;
2164322810Shselasky	dev->ib_dev.reg_phys_mr		= mlx5_ib_reg_phys_mr;
2165322810Shselasky	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr;
2166322810Shselasky	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;
2167322810Shselasky	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;
2168322810Shselasky	dev->ib_dev.process_mad		= mlx5_ib_process_mad;
2169325611Shselasky	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
2170322810Shselasky	dev->ib_dev.alloc_fast_reg_mr	= mlx5_ib_alloc_fast_reg_mr;
2171322810Shselasky	dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2172322810Shselasky	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
2173322810Shselasky
2174322810Shselasky	if (MLX5_CAP_GEN(mdev, xrc)) {
2175322810Shselasky		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2176322810Shselasky		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2177322810Shselasky		dev->ib_dev.uverbs_cmd_mask |=
2178322810Shselasky			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2179322810Shselasky			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2180322810Shselasky	}
2181322810Shselasky
2182322810Shselasky	err = init_node_data(dev);
2183322810Shselasky	if (err)
2184322810Shselasky		goto err_disable_roce;
2185322810Shselasky
2186322810Shselasky	mutex_init(&dev->cap_mask_mutex);
2187322810Shselasky	INIT_LIST_HEAD(&dev->qp_list);
2188322810Shselasky	spin_lock_init(&dev->reset_flow_resource_lock);
2189322810Shselasky
2190322810Shselasky	err = create_dev_resources(&dev->devr);
2191322810Shselasky	if (err)
2192322810Shselasky		goto err_disable_roce;
2193322810Shselasky
2194322810Shselasky
2195322810Shselasky	err = mlx5_ib_alloc_q_counters(dev);
2196322810Shselasky	if (err)
2197322810Shselasky		goto err_odp;
2198322810Shselasky
2199322810Shselasky	err = ib_register_device(&dev->ib_dev, NULL);
2200322810Shselasky	if (err)
2201322810Shselasky		goto err_q_cnt;
2202322810Shselasky
2203322810Shselasky	err = create_umr_res(dev);
2204322810Shselasky	if (err)
2205322810Shselasky		goto err_dev;
2206322810Shselasky
2207322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2208322810Shselasky	    MLX5_CAP_PORT_TYPE_IB) {
2209322810Shselasky		if (init_dc_improvements(dev))
2210322810Shselasky			mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2211322810Shselasky	}
2212322810Shselasky
2213322810Shselasky	err = create_port_attrs(dev);
2214322810Shselasky	if (err)
2215322810Shselasky		goto err_dc;
2216322810Shselasky
2217322810Shselasky	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2218322810Shselasky		err = device_create_file(&dev->ib_dev.dev,
2219322810Shselasky					 mlx5_class_attributes[i]);
2220322810Shselasky		if (err)
2221322810Shselasky			goto err_port_attrs;
2222322810Shselasky	}
2223322810Shselasky
2224322810Shselasky	if (1) {
2225322810Shselasky		struct thread *rl_thread = NULL;
2226322810Shselasky		struct proc *rl_proc = NULL;
2227322810Shselasky
2228322810Shselasky		for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2229322810Shselasky			(void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2230322810Shselasky			    RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2231322810Shselasky		}
2232322810Shselasky	}
2233322810Shselasky
2234322810Shselasky	dev->ib_active = true;
2235322810Shselasky
2236322810Shselasky	return dev;
2237322810Shselasky
2238322810Shselaskyerr_port_attrs:
2239322810Shselasky	destroy_ports_attrs(dev, dev->num_ports);
2240322810Shselasky
2241322810Shselaskyerr_dc:
2242322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2243322810Shselasky	    MLX5_CAP_PORT_TYPE_IB)
2244322810Shselasky		cleanup_dc_improvements(dev);
2245322810Shselasky	destroy_umrc_res(dev);
2246322810Shselasky
2247322810Shselaskyerr_dev:
2248322810Shselasky	ib_unregister_device(&dev->ib_dev);
2249322810Shselasky
2250322810Shselaskyerr_q_cnt:
2251322810Shselasky	mlx5_ib_dealloc_q_counters(dev);
2252322810Shselasky
2253322810Shselaskyerr_odp:
2254322810Shselasky	destroy_dev_resources(&dev->devr);
2255322810Shselasky
2256322810Shselaskyerr_disable_roce:
2257322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2258322810Shselasky	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2259322810Shselasky		mlx5_nic_vport_disable_roce(mdev);
2260322810Shselaskyerr_free_port:
2261322810Shselasky	kfree(dev->port);
2262322810Shselasky
2263322810Shselaskyerr_dealloc:
2264322810Shselasky	ib_dealloc_device((struct ib_device *)dev);
2265322810Shselasky
2266322810Shselasky	return NULL;
2267322810Shselasky}
2268322810Shselasky
2269322810Shselaskystatic void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2270322810Shselasky{
2271322810Shselasky	struct mlx5_ib_dev *dev = context;
2272322810Shselasky	int i;
2273322810Shselasky
2274322810Shselasky	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2275322810Shselasky		dev->port[i].port_gone = 1;
2276322810Shselasky		while (dev->port[i].port_gone != 2)
2277322810Shselasky			pause("W", hz);
2278322810Shselasky	}
2279322810Shselasky
2280322810Shselasky	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2281322810Shselasky		device_remove_file(&dev->ib_dev.dev,
2282322810Shselasky		    mlx5_class_attributes[i]);
2283322810Shselasky	}
2284322810Shselasky
2285322810Shselasky	destroy_ports_attrs(dev, dev->num_ports);
2286322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2287322810Shselasky	    MLX5_CAP_PORT_TYPE_IB)
2288322810Shselasky		cleanup_dc_improvements(dev);
2289322810Shselasky	mlx5_ib_dealloc_q_counters(dev);
2290322810Shselasky	ib_unregister_device(&dev->ib_dev);
2291322810Shselasky	destroy_umrc_res(dev);
2292322810Shselasky	destroy_dev_resources(&dev->devr);
2293322810Shselasky
2294322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2295322810Shselasky	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2296322810Shselasky		mlx5_nic_vport_disable_roce(mdev);
2297322810Shselasky
2298322810Shselasky	kfree(dev->port);
2299322810Shselasky	ib_dealloc_device(&dev->ib_dev);
2300322810Shselasky}
2301322810Shselasky
2302322810Shselaskystatic struct mlx5_interface mlx5_ib_interface = {
2303322810Shselasky	.add            = mlx5_ib_add,
2304322810Shselasky	.remove         = mlx5_ib_remove,
2305322810Shselasky	.event          = mlx5_ib_event,
2306322810Shselasky	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
2307322810Shselasky};
2308322810Shselasky
2309322810Shselaskystatic int __init mlx5_ib_init(void)
2310322810Shselasky{
2311322810Shselasky	int err;
2312322810Shselasky
2313322810Shselasky	if (deprecated_prof_sel != 2)
2314322810Shselasky		printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2315322810Shselasky
2316322810Shselasky	err = mlx5_register_interface(&mlx5_ib_interface);
2317322810Shselasky	if (err)
2318322810Shselasky		goto clean_odp;
2319322810Shselasky
2320322810Shselasky	mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2321322810Shselasky	if (!mlx5_ib_wq) {
2322322810Shselasky		printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2323322810Shselasky		goto err_unreg;
2324322810Shselasky	}
2325322810Shselasky
2326322810Shselasky	return err;
2327322810Shselasky
2328322810Shselaskyerr_unreg:
2329322810Shselasky	mlx5_unregister_interface(&mlx5_ib_interface);
2330322810Shselasky
2331322810Shselaskyclean_odp:
2332322810Shselasky	return err;
2333322810Shselasky}
2334322810Shselasky
2335322810Shselaskystatic void __exit mlx5_ib_cleanup(void)
2336322810Shselasky{
2337322810Shselasky	destroy_workqueue(mlx5_ib_wq);
2338322810Shselasky	mlx5_unregister_interface(&mlx5_ib_interface);
2339322810Shselasky}
2340322810Shselasky
2341322810Shselaskymodule_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2342322810Shselaskymodule_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);
2343