mlx4.c revision 277160
1/*
2 * Copyright (c) 2007 Cisco, Inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#if HAVE_CONFIG_H
34#  include <config.h>
35#endif /* HAVE_CONFIG_H */
36
37#include <stdio.h>
38#include <stdlib.h>
39#include <unistd.h>
40#include <errno.h>
41#include <sys/mman.h>
42#include <pthread.h>
43#include <string.h>
44
45#ifndef HAVE_IBV_REGISTER_DRIVER
46#include <sysfs/libsysfs.h>
47#endif
48
49#include "mlx4.h"
50#include "mlx4-abi.h"
51
52#ifndef PCI_VENDOR_ID_MELLANOX
53#define PCI_VENDOR_ID_MELLANOX			0x15b3
54#endif
55
56#define HCA(v, d) \
57	{ .vendor = PCI_VENDOR_ID_##v,			\
58	  .device = d }
59
60struct {
61	unsigned		vendor;
62	unsigned		device;
63} hca_table[] = {
64	HCA(MELLANOX, 0x6340),	/* MT25408 "Hermon" SDR */
65	HCA(MELLANOX, 0x634a),	/* MT25408 "Hermon" DDR */
66	HCA(MELLANOX, 0x6354),	/* MT25408 "Hermon" QDR */
67	HCA(MELLANOX, 0x6732),	/* MT25408 "Hermon" DDR PCIe gen2 */
68	HCA(MELLANOX, 0x673c),	/* MT25408 "Hermon" QDR PCIe gen2 */
69	HCA(MELLANOX, 0x6368), /* MT25448 [ConnectX EN 10GigE, PCIe 2.0 2.5GT/s] */
70	HCA(MELLANOX, 0x6750), /* MT26448 [ConnectX EN 10GigE, PCIe 2.0 5GT/s] */
71	HCA(MELLANOX, 0x6372), /* MT25408 [ConnectX EN 10GigE 10GBaseT, PCIe 2.0 2.5GT/s] */
72	HCA(MELLANOX, 0x675a), /* MT25408 [ConnectX EN 10GigE 10GBaseT, PCIe Gen2 5GT/s] */
73	HCA(MELLANOX, 0x6764), /* MT26468 [ConnectX EN 10GigE, PCIe 2.0 5GT/s] */
74	HCA(MELLANOX, 0x6746), /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */
75	HCA(MELLANOX, 0x676e), /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */
76	HCA(MELLANOX, 0x6778), /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */
77	HCA(MELLANOX, 0x1000),
78	HCA(MELLANOX, 0x1001),
79	HCA(MELLANOX, 0x1002),
80	HCA(MELLANOX, 0x1003),
81	HCA(MELLANOX, 0x1004),
82	HCA(MELLANOX, 0x1005),
83	HCA(MELLANOX, 0x1006),
84	HCA(MELLANOX, 0x1007),
85	HCA(MELLANOX, 0x1008),
86	HCA(MELLANOX, 0x1009),
87	HCA(MELLANOX, 0x100a),
88	HCA(MELLANOX, 0x100b),
89	HCA(MELLANOX, 0x100c),
90	HCA(MELLANOX, 0x100d),
91	HCA(MELLANOX, 0x100e),
92	HCA(MELLANOX, 0x100f),
93};
94
95#ifdef HAVE_IBV_MORE_OPS
96static struct ibv_more_ops mlx4_more_ops = {
97#ifdef HAVE_IBV_XRC_OPS
98	.create_xrc_srq   = mlx4_create_xrc_srq,
99	.open_xrc_domain  = mlx4_open_xrc_domain,
100	.close_xrc_domain = mlx4_close_xrc_domain,
101	.create_xrc_rcv_qp = mlx4_create_xrc_rcv_qp,
102	.modify_xrc_rcv_qp = mlx4_modify_xrc_rcv_qp,
103	.query_xrc_rcv_qp = mlx4_query_xrc_rcv_qp,
104	.reg_xrc_rcv_qp   = mlx4_reg_xrc_rcv_qp,
105	.unreg_xrc_rcv_qp = mlx4_unreg_xrc_rcv_qp,
106#endif
107};
108#endif
109
110static struct ibv_context_ops mlx4_ctx_ops = {
111	.query_device  = mlx4_query_device,
112	.query_port    = mlx4_query_port,
113	.alloc_pd      = mlx4_alloc_pd,
114	.dealloc_pd    = mlx4_free_pd,
115	.reg_mr	       = mlx4_reg_mr,
116	.dereg_mr      = mlx4_dereg_mr,
117	.create_cq     = mlx4_create_cq,
118	.poll_cq       = mlx4_poll_cq,
119	.req_notify_cq = mlx4_arm_cq,
120	.cq_event      = mlx4_cq_event,
121	.resize_cq     = mlx4_resize_cq,
122	.destroy_cq    = mlx4_destroy_cq,
123	.create_srq    = mlx4_create_srq,
124	.modify_srq    = mlx4_modify_srq,
125	.query_srq     = mlx4_query_srq,
126	.destroy_srq   = mlx4_destroy_srq,
127	.post_srq_recv = mlx4_post_srq_recv,
128	.create_qp     = mlx4_create_qp,
129	.query_qp      = mlx4_query_qp,
130	.modify_qp     = mlx4_modify_qp,
131	.destroy_qp    = mlx4_destroy_qp,
132	.post_send     = mlx4_post_send,
133	.post_recv     = mlx4_post_recv,
134	.create_ah     = mlx4_create_ah,
135	.destroy_ah    = mlx4_destroy_ah,
136	.attach_mcast  = ibv_cmd_attach_mcast,
137	.detach_mcast  = ibv_cmd_detach_mcast
138};
139
140static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd)
141{
142	struct mlx4_context	       *context;
143	struct ibv_get_context		cmd;
144	struct mlx4_alloc_ucontext_resp resp;
145	struct mlx4_alloc_ucontext_resp_v3 resp_v3;
146	int				i;
147	struct ibv_device_attr		dev_attrs;
148	unsigned int			bf_reg_size;
149
150	context = calloc(1, sizeof *context);
151	if (!context)
152		return NULL;
153
154	context->ibv_ctx.cmd_fd = cmd_fd;
155
156	if (to_mdev(ibdev)->driver_abi_ver > 3) {
157		if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
158					&resp.ibv_resp, sizeof resp))
159			goto err_free;
160
161		context->num_qps	= resp.qp_tab_size;
162		context->num_xrc_srqs	= resp.qp_tab_size;
163		bf_reg_size		= resp.bf_reg_size;
164		context->cqe_size	= resp.cqe_size;
165	} else {
166		if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
167					&resp_v3.ibv_resp, sizeof resp_v3))
168			goto err_free;
169
170		context->num_qps	= resp_v3.qp_tab_size;
171		context->num_xrc_srqs	= resp_v3.qp_tab_size;
172		bf_reg_size		= resp_v3.bf_reg_size;
173		context->cqe_size	= 32;
174	}
175
176	context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS;
177	context->qp_table_mask	= (1 << context->qp_table_shift) - 1;
178
179	pthread_mutex_init(&context->qp_table_mutex, NULL);
180	for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
181		context->qp_table[i].refcnt = 0;
182
183	context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1
184				       - MLX4_XRC_SRQ_TABLE_BITS;
185	context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1;
186
187	pthread_mutex_init(&context->xrc_srq_table_mutex, NULL);
188	for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i)
189		context->xrc_srq_table[i].refcnt = 0;
190
191	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
192		context->db_list[i] = NULL;
193
194	pthread_mutex_init(&context->db_list_mutex, NULL);
195
196	context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE,
197			    MAP_SHARED, cmd_fd, 0);
198	if (context->uar == MAP_FAILED)
199		goto err_free;
200
201	if (bf_reg_size) {
202		context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
203					PROT_WRITE, MAP_SHARED, cmd_fd,
204					to_mdev(ibdev)->page_size);
205		if (context->bf_page == MAP_FAILED) {
206			fprintf(stderr, PFX "Warning: BlueFlame available, "
207				"but failed to mmap() BlueFlame page.\n");
208				context->bf_page     = NULL;
209				context->bf_buf_size = 0;
210		} else {
211			context->bf_buf_size = bf_reg_size / 2;
212			context->bf_offset   = 0;
213			pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE);
214		}
215	} else {
216		context->bf_page     = NULL;
217		context->bf_buf_size = 0;
218	}
219
220	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
221
222	context->ibv_ctx.ops = mlx4_ctx_ops;
223#ifdef HAVE_IBV_XRC_OPS
224	context->ibv_ctx.more_ops = &mlx4_more_ops;
225#endif
226
227	if (mlx4_query_device(&context->ibv_ctx, &dev_attrs))
228		goto query_free;
229
230	context->max_qp_wr = dev_attrs.max_qp_wr;
231	context->max_sge = dev_attrs.max_sge;
232	context->max_cqe = dev_attrs.max_cqe;
233	if (!(dev_attrs.device_cap_flags & IBV_DEVICE_XRC)) {
234		fprintf(stderr, PFX "There is a mismatch between "
235		        "the kernel and the userspace libraries: "
236			"Kernel does not support XRC. Exiting.\n");
237		goto query_free;
238	}
239
240	return &context->ibv_ctx;
241
242query_free:
243	munmap(context->uar, to_mdev(ibdev)->page_size);
244	if (context->bf_page)
245		munmap(context->bf_page, to_mdev(ibdev)->page_size);
246
247err_free:
248	free(context);
249	return NULL;
250}
251
252static void mlx4_free_context(struct ibv_context *ibctx)
253{
254	struct mlx4_context *context = to_mctx(ibctx);
255
256	munmap(context->uar, to_mdev(ibctx->device)->page_size);
257	if (context->bf_page)
258		munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
259	free(context);
260}
261
262static struct ibv_device_ops mlx4_dev_ops = {
263	.alloc_context = mlx4_alloc_context,
264	.free_context  = mlx4_free_context
265};
266
267static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path,
268					    int abi_version)
269{
270	char			value[8];
271	struct mlx4_device    *dev;
272	unsigned		vendor, device;
273	int			i;
274
275	if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
276				value, sizeof value) < 0)
277		return NULL;
278	sscanf(value, "%i", &vendor);
279
280	if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
281				value, sizeof value) < 0)
282		return NULL;
283	sscanf(value, "%i", &device);
284
285	for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
286		if (vendor == hca_table[i].vendor &&
287		    device == hca_table[i].device)
288			goto found;
289
290	return NULL;
291
292found:
293	if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION ||
294	    abi_version > MLX4_UVERBS_MAX_ABI_VERSION) {
295		fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported "
296			"(min supported %d, max supported %d)\n",
297			abi_version, uverbs_sys_path,
298			MLX4_UVERBS_MIN_ABI_VERSION,
299			MLX4_UVERBS_MAX_ABI_VERSION);
300		return NULL;
301	}
302
303	dev = malloc(sizeof *dev);
304	if (!dev) {
305		fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n",
306			uverbs_sys_path);
307		return NULL;
308	}
309
310	dev->ibv_dev.ops = mlx4_dev_ops;
311	dev->page_size   = sysconf(_SC_PAGESIZE);
312	dev->driver_abi_ver = abi_version;
313
314	return &dev->ibv_dev;
315}
316
317#ifdef HAVE_IBV_REGISTER_DRIVER
318static __attribute__((constructor)) void mlx4_register_driver(void)
319{
320	ibv_register_driver("mlx4", mlx4_driver_init);
321}
322#else
323/*
324 * Export the old libsysfs sysfs_class_device-based driver entry point
325 * if libibverbs does not export an ibv_register_driver() function.
326 */
327struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev)
328{
329	int abi_ver = 0;
330	char value[8];
331
332	if (ibv_read_sysfs_file(sysdev->path, "abi_version",
333				value, sizeof value) > 0)
334		abi_ver = strtol(value, NULL, 10);
335
336	return mlx4_driver_init(sysdev->path, abi_ver);
337}
338#endif /* HAVE_IBV_REGISTER_DRIVER */
339