1/*
2 * Copyright (c) 2007 Cisco, Inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <config.h>
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <unistd.h>
38#include <errno.h>
39#include <sys/mman.h>
40#include <pthread.h>
41#include <string.h>
42
43#include "mlx4.h"
44#include "mlx4-abi.h"
45
46#ifndef PCI_VENDOR_ID_MELLANOX
47#define PCI_VENDOR_ID_MELLANOX			0x15b3
48#endif
49
50#define HCA(v, d) \
51	{ .vendor = PCI_VENDOR_ID_##v,			\
52	  .device = d }
53
54static struct {
55	unsigned		vendor;
56	unsigned		device;
57} hca_table[] = {
58	HCA(MELLANOX, 0x6340),	/* MT25408 "Hermon" SDR */
59	HCA(MELLANOX, 0x634a),	/* MT25408 "Hermon" DDR */
60	HCA(MELLANOX, 0x6354),	/* MT25408 "Hermon" QDR */
61	HCA(MELLANOX, 0x6732),	/* MT25408 "Hermon" DDR PCIe gen2 */
62	HCA(MELLANOX, 0x673c),	/* MT25408 "Hermon" QDR PCIe gen2 */
63	HCA(MELLANOX, 0x6368),	/* MT25408 "Hermon" EN 10GigE */
64	HCA(MELLANOX, 0x6750),	/* MT25408 "Hermon" EN 10GigE PCIe gen2 */
65	HCA(MELLANOX, 0x6372),	/* MT25458 ConnectX EN 10GBASE-T 10GigE */
66	HCA(MELLANOX, 0x675a),	/* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
67	HCA(MELLANOX, 0x6764),	/* MT26468 ConnectX EN 10GigE PCIe gen2*/
68	HCA(MELLANOX, 0x6746),	/* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
69	HCA(MELLANOX, 0x676e),	/* MT26478 ConnectX2 40GigE PCIe gen2 */
70	HCA(MELLANOX, 0x1002),	/* MT25400 Family [ConnectX-2 Virtual Function] */
71	HCA(MELLANOX, 0x1003),	/* MT27500 Family [ConnectX-3] */
72	HCA(MELLANOX, 0x1004),	/* MT27500 Family [ConnectX-3 Virtual Function] */
73	HCA(MELLANOX, 0x1005),	/* MT27510 Family */
74	HCA(MELLANOX, 0x1006),	/* MT27511 Family */
75	HCA(MELLANOX, 0x1007),	/* MT27520 Family */
76	HCA(MELLANOX, 0x1008),	/* MT27521 Family */
77	HCA(MELLANOX, 0x1009),	/* MT27530 Family */
78	HCA(MELLANOX, 0x100a),	/* MT27531 Family */
79	HCA(MELLANOX, 0x100b),	/* MT27540 Family */
80	HCA(MELLANOX, 0x100c),	/* MT27541 Family */
81	HCA(MELLANOX, 0x100d),	/* MT27550 Family */
82	HCA(MELLANOX, 0x100e),	/* MT27551 Family */
83	HCA(MELLANOX, 0x100f),	/* MT27560 Family */
84	HCA(MELLANOX, 0x1010),	/* MT27561 Family */
85};
86
87static struct ibv_context_ops mlx4_ctx_ops = {
88	.query_device  = mlx4_query_device,
89	.query_port    = mlx4_query_port,
90	.alloc_pd      = mlx4_alloc_pd,
91	.dealloc_pd    = mlx4_free_pd,
92	.reg_mr	       = mlx4_reg_mr,
93	.rereg_mr      = mlx4_rereg_mr,
94	.dereg_mr      = mlx4_dereg_mr,
95	.alloc_mw      = mlx4_alloc_mw,
96	.dealloc_mw    = mlx4_dealloc_mw,
97	.bind_mw       = mlx4_bind_mw,
98	.create_cq     = mlx4_create_cq,
99	.poll_cq       = mlx4_poll_cq,
100	.req_notify_cq = mlx4_arm_cq,
101	.cq_event      = mlx4_cq_event,
102	.resize_cq     = mlx4_resize_cq,
103	.destroy_cq    = mlx4_destroy_cq,
104	.create_srq    = mlx4_create_srq,
105	.modify_srq    = mlx4_modify_srq,
106	.query_srq     = mlx4_query_srq,
107	.destroy_srq   = mlx4_destroy_srq,
108	.post_srq_recv = mlx4_post_srq_recv,
109	.create_qp     = mlx4_create_qp,
110	.query_qp      = mlx4_query_qp,
111	.modify_qp     = mlx4_modify_qp,
112	.destroy_qp    = mlx4_destroy_qp,
113	.post_send     = mlx4_post_send,
114	.post_recv     = mlx4_post_recv,
115	.create_ah     = mlx4_create_ah,
116	.destroy_ah    = mlx4_destroy_ah,
117	.attach_mcast  = ibv_cmd_attach_mcast,
118	.detach_mcast  = ibv_cmd_detach_mcast
119};
120
121static int mlx4_map_internal_clock(struct mlx4_device *mdev,
122				   struct ibv_context *ibv_ctx)
123{
124	struct mlx4_context *context = to_mctx(ibv_ctx);
125	void *hca_clock_page;
126
127	hca_clock_page = mmap(NULL, mdev->page_size,
128			      PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd,
129			      mdev->page_size * 3);
130
131	if (hca_clock_page == MAP_FAILED) {
132		fprintf(stderr, PFX
133			"Warning: Timestamp available,\n"
134			"but failed to mmap() hca core clock page.\n");
135		return -1;
136	}
137
138	context->hca_core_clock = hca_clock_page +
139		(context->core_clock.offset & (mdev->page_size - 1));
140	return 0;
141}
142
143static int mlx4_init_context(struct verbs_device *v_device,
144				struct ibv_context *ibv_ctx, int cmd_fd)
145{
146	struct mlx4_context	       *context;
147	struct ibv_get_context		cmd;
148	struct mlx4_alloc_ucontext_resp resp;
149	int				i;
150	int				ret;
151	struct mlx4_alloc_ucontext_resp_v3 resp_v3;
152	__u16				bf_reg_size;
153	struct mlx4_device              *dev = to_mdev(&v_device->device);
154	struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
155	struct ibv_device_attr_ex	dev_attrs;
156
157	/* memory footprint of mlx4_context and verbs_context share
158	* struct ibv_context.
159	*/
160	context = to_mctx(ibv_ctx);
161	ibv_ctx->cmd_fd = cmd_fd;
162
163	if (dev->abi_version <= MLX4_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
164		if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd,
165					&resp_v3.ibv_resp, sizeof resp_v3))
166			return errno;
167
168		context->num_qps  = resp_v3.qp_tab_size;
169		bf_reg_size	  = resp_v3.bf_reg_size;
170		context->cqe_size = sizeof (struct mlx4_cqe);
171	} else  {
172		if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd,
173					&resp.ibv_resp, sizeof resp))
174			return errno;
175
176		context->num_qps  = resp.qp_tab_size;
177		bf_reg_size	  = resp.bf_reg_size;
178		if (resp.dev_caps & MLX4_USER_DEV_CAP_64B_CQE)
179			context->cqe_size = resp.cqe_size;
180		else
181			context->cqe_size = sizeof (struct mlx4_cqe);
182	}
183
184	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
185	context->qp_table_mask	= (1 << context->qp_table_shift) - 1;
186	for (i = 0; i < MLX4_PORTS_NUM; ++i)
187		context->port_query_cache[i].valid = 0;
188
189	ret = pthread_mutex_init(&context->qp_table_mutex, NULL);
190	if (ret)
191		return ret;
192	for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
193		context->qp_table[i].refcnt = 0;
194
195	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
196		context->db_list[i] = NULL;
197
198	ret = mlx4_init_xsrq_table(&context->xsrq_table, context->num_qps);
199	if (ret)
200		goto err;
201
202	ret = pthread_mutex_init(&context->db_list_mutex, NULL);
203	if (ret)
204		goto err_xsrq;
205
206	context->uar = mmap(NULL, dev->page_size, PROT_WRITE,
207			    MAP_SHARED, cmd_fd, 0);
208	if (context->uar == MAP_FAILED)
209		return errno;
210
211	if (bf_reg_size) {
212		context->bf_page = mmap(NULL, dev->page_size,
213					PROT_WRITE, MAP_SHARED, cmd_fd,
214					dev->page_size);
215		if (context->bf_page == MAP_FAILED) {
216			fprintf(stderr, PFX "Warning: BlueFlame available, "
217				"but failed to mmap() BlueFlame page.\n");
218				context->bf_page     = NULL;
219				context->bf_buf_size = 0;
220		} else {
221			context->bf_buf_size = bf_reg_size / 2;
222			context->bf_offset   = 0;
223			ret = pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE);
224			if (ret)
225				goto err_db_list;
226		}
227	} else {
228		context->bf_page     = NULL;
229		context->bf_buf_size = 0;
230	}
231
232	ret = pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
233	if (ret)
234		goto err_bf_lock;
235	ibv_ctx->ops = mlx4_ctx_ops;
236
237	context->hca_core_clock = NULL;
238	memset(&dev_attrs, 0, sizeof(dev_attrs));
239	if (!mlx4_query_device_ex(ibv_ctx, NULL, &dev_attrs,
240				  sizeof(struct ibv_device_attr_ex))) {
241		context->max_qp_wr = dev_attrs.orig_attr.max_qp_wr;
242		context->max_sge = dev_attrs.orig_attr.max_sge;
243		if (context->core_clock.offset_valid)
244			mlx4_map_internal_clock(dev, ibv_ctx);
245	}
246
247	verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
248					VERBS_CONTEXT_QP;
249	verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd);
250	verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd);
251	verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex);
252	verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num);
253	verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex);
254	verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp);
255	verbs_set_ctx_op(verbs_ctx, ibv_create_flow, ibv_cmd_create_flow);
256	verbs_set_ctx_op(verbs_ctx, ibv_destroy_flow, ibv_cmd_destroy_flow);
257	verbs_set_ctx_op(verbs_ctx, create_cq_ex, mlx4_create_cq_ex);
258	verbs_set_ctx_op(verbs_ctx, query_device_ex, mlx4_query_device_ex);
259	verbs_set_ctx_op(verbs_ctx, query_rt_values, mlx4_query_rt_values);
260
261	return 0;
262
263err_bf_lock:
264	if (context->bf_buf_size)
265		pthread_spin_destroy(&context->bf_lock);
266err_db_list:
267	pthread_mutex_destroy(&context->db_list_mutex);
268err_xsrq:
269	mlx4_cleanup_xsrq_table(&context->xsrq_table);
270err:
271	pthread_mutex_destroy(&context->qp_table_mutex);
272
273	return ret;
274}
275
276static void mlx4_uninit_context(struct verbs_device *v_device,
277					struct ibv_context *ibv_ctx)
278{
279	struct mlx4_context *context = to_mctx(ibv_ctx);
280
281	pthread_mutex_destroy(&context->qp_table_mutex);
282	mlx4_cleanup_xsrq_table(&context->xsrq_table);
283	pthread_mutex_destroy(&context->db_list_mutex);
284	pthread_spin_destroy(&context->bf_lock);
285	pthread_spin_destroy(&context->uar_lock);
286
287	munmap(context->uar, to_mdev(&v_device->device)->page_size);
288	if (context->bf_page)
289		munmap(context->bf_page, to_mdev(&v_device->device)->page_size);
290	if (context->hca_core_clock)
291		munmap(context->hca_core_clock - context->core_clock.offset,
292		       to_mdev(&v_device->device)->page_size);
293}
294
295static struct verbs_device_ops mlx4_dev_ops = {
296	.init_context = mlx4_init_context,
297	.uninit_context = mlx4_uninit_context,
298};
299
300static struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path, int abi_version)
301{
302	char			value[8];
303	struct mlx4_device    *dev;
304	unsigned		vendor, device;
305	int			i;
306
307	if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
308				value, sizeof value) < 0)
309		return NULL;
310	vendor = strtol(value, NULL, 16);
311
312	if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
313				value, sizeof value) < 0)
314		return NULL;
315	device = strtol(value, NULL, 16);
316
317	for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
318		if (vendor == hca_table[i].vendor &&
319		    device == hca_table[i].device)
320			goto found;
321
322	return NULL;
323
324found:
325	if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION ||
326	    abi_version > MLX4_UVERBS_MAX_ABI_VERSION) {
327		fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported "
328			"(min supported %d, max supported %d)\n",
329			abi_version, uverbs_sys_path,
330			MLX4_UVERBS_MIN_ABI_VERSION,
331			MLX4_UVERBS_MAX_ABI_VERSION);
332		return NULL;
333	}
334
335	dev = calloc(1, sizeof *dev);
336	if (!dev) {
337		fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
338			uverbs_sys_path);
339		return NULL;
340	}
341
342	dev->page_size   = sysconf(_SC_PAGESIZE);
343	dev->abi_version = abi_version;
344
345	dev->verbs_dev.ops = &mlx4_dev_ops;
346	dev->verbs_dev.sz = sizeof(*dev);
347	dev->verbs_dev.size_of_context =
348		sizeof(struct mlx4_context) - sizeof(struct ibv_context);
349
350	return &dev->verbs_dev;
351}
352
353static __attribute__((constructor)) void mlx4_register_driver(void)
354{
355	verbs_register_driver("mlx4", mlx4_driver_init);
356}
357