mlx4.c revision 277160
1/* 2 * Copyright (c) 2007 Cisco, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#if HAVE_CONFIG_H 34# include <config.h> 35#endif /* HAVE_CONFIG_H */ 36 37#include <stdio.h> 38#include <stdlib.h> 39#include <unistd.h> 40#include <errno.h> 41#include <sys/mman.h> 42#include <pthread.h> 43#include <string.h> 44 45#ifndef HAVE_IBV_REGISTER_DRIVER 46#include <sysfs/libsysfs.h> 47#endif 48 49#include "mlx4.h" 50#include "mlx4-abi.h" 51 52#ifndef PCI_VENDOR_ID_MELLANOX 53#define PCI_VENDOR_ID_MELLANOX 0x15b3 54#endif 55 56#define HCA(v, d) \ 57 { .vendor = PCI_VENDOR_ID_##v, \ 58 .device = d } 59 60struct { 61 unsigned vendor; 62 unsigned device; 63} hca_table[] = { 64 HCA(MELLANOX, 0x6340), /* MT25408 "Hermon" SDR */ 65 HCA(MELLANOX, 0x634a), /* MT25408 "Hermon" DDR */ 66 HCA(MELLANOX, 0x6354), /* MT25408 "Hermon" QDR */ 67 HCA(MELLANOX, 0x6732), /* MT25408 "Hermon" DDR PCIe gen2 */ 68 HCA(MELLANOX, 0x673c), /* MT25408 "Hermon" QDR PCIe gen2 */ 69 HCA(MELLANOX, 0x6368), /* MT25448 [ConnectX EN 10GigE, PCIe 2.0 2.5GT/s] */ 70 HCA(MELLANOX, 0x6750), /* MT26448 [ConnectX EN 10GigE, PCIe 2.0 5GT/s] */ 71 HCA(MELLANOX, 0x6372), /* MT25408 [ConnectX EN 10GigE 10GBaseT, PCIe 2.0 2.5GT/s] */ 72 HCA(MELLANOX, 0x675a), /* MT25408 [ConnectX EN 10GigE 10GBaseT, PCIe Gen2 5GT/s] */ 73 HCA(MELLANOX, 0x6764), /* MT26468 [ConnectX EN 10GigE, PCIe 2.0 5GT/s] */ 74 HCA(MELLANOX, 0x6746), /* MT26438 ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE Virt+ */ 75 HCA(MELLANOX, 0x676e), /* MT26478 ConnectX EN 40GigE PCIe 2.0 5GT/s */ 76 HCA(MELLANOX, 0x6778), /* MT26488 ConnectX VPI PCIe 2.0 5GT/s - IB DDR / 10GigE Virt+ */ 77 HCA(MELLANOX, 0x1000), 78 HCA(MELLANOX, 0x1001), 79 HCA(MELLANOX, 0x1002), 80 HCA(MELLANOX, 0x1003), 81 HCA(MELLANOX, 0x1004), 82 HCA(MELLANOX, 0x1005), 83 HCA(MELLANOX, 0x1006), 84 HCA(MELLANOX, 0x1007), 85 HCA(MELLANOX, 0x1008), 86 HCA(MELLANOX, 0x1009), 87 HCA(MELLANOX, 0x100a), 88 HCA(MELLANOX, 0x100b), 89 HCA(MELLANOX, 0x100c), 90 HCA(MELLANOX, 0x100d), 91 HCA(MELLANOX, 0x100e), 92 HCA(MELLANOX, 0x100f), 93}; 94 95#ifdef HAVE_IBV_MORE_OPS 96static struct ibv_more_ops mlx4_more_ops = { 97#ifdef HAVE_IBV_XRC_OPS 98 .create_xrc_srq = mlx4_create_xrc_srq, 99 .open_xrc_domain = mlx4_open_xrc_domain, 100 .close_xrc_domain = mlx4_close_xrc_domain, 101 .create_xrc_rcv_qp = mlx4_create_xrc_rcv_qp, 102 .modify_xrc_rcv_qp = mlx4_modify_xrc_rcv_qp, 103 .query_xrc_rcv_qp = mlx4_query_xrc_rcv_qp, 104 .reg_xrc_rcv_qp = mlx4_reg_xrc_rcv_qp, 105 .unreg_xrc_rcv_qp = mlx4_unreg_xrc_rcv_qp, 106#endif 107}; 108#endif 109 110static struct ibv_context_ops mlx4_ctx_ops = { 111 .query_device = mlx4_query_device, 112 .query_port = mlx4_query_port, 113 .alloc_pd = mlx4_alloc_pd, 114 .dealloc_pd = mlx4_free_pd, 115 .reg_mr = mlx4_reg_mr, 116 .dereg_mr = mlx4_dereg_mr, 117 .create_cq = mlx4_create_cq, 118 .poll_cq = mlx4_poll_cq, 119 .req_notify_cq = mlx4_arm_cq, 120 .cq_event = mlx4_cq_event, 121 .resize_cq = mlx4_resize_cq, 122 .destroy_cq = mlx4_destroy_cq, 123 .create_srq = mlx4_create_srq, 124 .modify_srq = mlx4_modify_srq, 125 .query_srq = mlx4_query_srq, 126 .destroy_srq = mlx4_destroy_srq, 127 .post_srq_recv = mlx4_post_srq_recv, 128 .create_qp = mlx4_create_qp, 129 .query_qp = mlx4_query_qp, 130 .modify_qp = mlx4_modify_qp, 131 .destroy_qp = mlx4_destroy_qp, 132 .post_send = mlx4_post_send, 133 .post_recv = mlx4_post_recv, 134 .create_ah = mlx4_create_ah, 135 .destroy_ah = mlx4_destroy_ah, 136 .attach_mcast = ibv_cmd_attach_mcast, 137 .detach_mcast = ibv_cmd_detach_mcast 138}; 139 140static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_fd) 141{ 142 struct mlx4_context *context; 143 struct ibv_get_context cmd; 144 struct mlx4_alloc_ucontext_resp resp; 145 struct mlx4_alloc_ucontext_resp_v3 resp_v3; 146 int i; 147 struct ibv_device_attr dev_attrs; 148 unsigned int bf_reg_size; 149 150 context = calloc(1, sizeof *context); 151 if (!context) 152 return NULL; 153 154 context->ibv_ctx.cmd_fd = cmd_fd; 155 156 if (to_mdev(ibdev)->driver_abi_ver > 3) { 157 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, 158 &resp.ibv_resp, sizeof resp)) 159 goto err_free; 160 161 context->num_qps = resp.qp_tab_size; 162 context->num_xrc_srqs = resp.qp_tab_size; 163 bf_reg_size = resp.bf_reg_size; 164 context->cqe_size = resp.cqe_size; 165 } else { 166 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, 167 &resp_v3.ibv_resp, sizeof resp_v3)) 168 goto err_free; 169 170 context->num_qps = resp_v3.qp_tab_size; 171 context->num_xrc_srqs = resp_v3.qp_tab_size; 172 bf_reg_size = resp_v3.bf_reg_size; 173 context->cqe_size = 32; 174 } 175 176 context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; 177 context->qp_table_mask = (1 << context->qp_table_shift) - 1; 178 179 pthread_mutex_init(&context->qp_table_mutex, NULL); 180 for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) 181 context->qp_table[i].refcnt = 0; 182 183 context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1 184 - MLX4_XRC_SRQ_TABLE_BITS; 185 context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1; 186 187 pthread_mutex_init(&context->xrc_srq_table_mutex, NULL); 188 for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i) 189 context->xrc_srq_table[i].refcnt = 0; 190 191 for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) 192 context->db_list[i] = NULL; 193 194 pthread_mutex_init(&context->db_list_mutex, NULL); 195 196 context->uar = mmap(NULL, to_mdev(ibdev)->page_size, PROT_WRITE, 197 MAP_SHARED, cmd_fd, 0); 198 if (context->uar == MAP_FAILED) 199 goto err_free; 200 201 if (bf_reg_size) { 202 context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size, 203 PROT_WRITE, MAP_SHARED, cmd_fd, 204 to_mdev(ibdev)->page_size); 205 if (context->bf_page == MAP_FAILED) { 206 fprintf(stderr, PFX "Warning: BlueFlame available, " 207 "but failed to mmap() BlueFlame page.\n"); 208 context->bf_page = NULL; 209 context->bf_buf_size = 0; 210 } else { 211 context->bf_buf_size = bf_reg_size / 2; 212 context->bf_offset = 0; 213 pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE); 214 } 215 } else { 216 context->bf_page = NULL; 217 context->bf_buf_size = 0; 218 } 219 220 pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); 221 222 context->ibv_ctx.ops = mlx4_ctx_ops; 223#ifdef HAVE_IBV_XRC_OPS 224 context->ibv_ctx.more_ops = &mlx4_more_ops; 225#endif 226 227 if (mlx4_query_device(&context->ibv_ctx, &dev_attrs)) 228 goto query_free; 229 230 context->max_qp_wr = dev_attrs.max_qp_wr; 231 context->max_sge = dev_attrs.max_sge; 232 context->max_cqe = dev_attrs.max_cqe; 233 if (!(dev_attrs.device_cap_flags & IBV_DEVICE_XRC)) { 234 fprintf(stderr, PFX "There is a mismatch between " 235 "the kernel and the userspace libraries: " 236 "Kernel does not support XRC. Exiting.\n"); 237 goto query_free; 238 } 239 240 return &context->ibv_ctx; 241 242query_free: 243 munmap(context->uar, to_mdev(ibdev)->page_size); 244 if (context->bf_page) 245 munmap(context->bf_page, to_mdev(ibdev)->page_size); 246 247err_free: 248 free(context); 249 return NULL; 250} 251 252static void mlx4_free_context(struct ibv_context *ibctx) 253{ 254 struct mlx4_context *context = to_mctx(ibctx); 255 256 munmap(context->uar, to_mdev(ibctx->device)->page_size); 257 if (context->bf_page) 258 munmap(context->bf_page, to_mdev(ibctx->device)->page_size); 259 free(context); 260} 261 262static struct ibv_device_ops mlx4_dev_ops = { 263 .alloc_context = mlx4_alloc_context, 264 .free_context = mlx4_free_context 265}; 266 267static struct ibv_device *mlx4_driver_init(const char *uverbs_sys_path, 268 int abi_version) 269{ 270 char value[8]; 271 struct mlx4_device *dev; 272 unsigned vendor, device; 273 int i; 274 275 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", 276 value, sizeof value) < 0) 277 return NULL; 278 sscanf(value, "%i", &vendor); 279 280 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", 281 value, sizeof value) < 0) 282 return NULL; 283 sscanf(value, "%i", &device); 284 285 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) 286 if (vendor == hca_table[i].vendor && 287 device == hca_table[i].device) 288 goto found; 289 290 return NULL; 291 292found: 293 if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION || 294 abi_version > MLX4_UVERBS_MAX_ABI_VERSION) { 295 fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported " 296 "(min supported %d, max supported %d)\n", 297 abi_version, uverbs_sys_path, 298 MLX4_UVERBS_MIN_ABI_VERSION, 299 MLX4_UVERBS_MAX_ABI_VERSION); 300 return NULL; 301 } 302 303 dev = malloc(sizeof *dev); 304 if (!dev) { 305 fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", 306 uverbs_sys_path); 307 return NULL; 308 } 309 310 dev->ibv_dev.ops = mlx4_dev_ops; 311 dev->page_size = sysconf(_SC_PAGESIZE); 312 dev->driver_abi_ver = abi_version; 313 314 return &dev->ibv_dev; 315} 316 317#ifdef HAVE_IBV_REGISTER_DRIVER 318static __attribute__((constructor)) void mlx4_register_driver(void) 319{ 320 ibv_register_driver("mlx4", mlx4_driver_init); 321} 322#else 323/* 324 * Export the old libsysfs sysfs_class_device-based driver entry point 325 * if libibverbs does not export an ibv_register_driver() function. 326 */ 327struct ibv_device *openib_driver_init(struct sysfs_class_device *sysdev) 328{ 329 int abi_ver = 0; 330 char value[8]; 331 332 if (ibv_read_sysfs_file(sysdev->path, "abi_version", 333 value, sizeof value) > 0) 334 abi_ver = strtol(value, NULL, 10); 335 336 return mlx4_driver_init(sysdev->path, abi_ver); 337} 338#endif /* HAVE_IBV_REGISTER_DRIVER */ 339