dev.c revision 319255
1/* 2 * Copyright (c) 2006-2014 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32#if HAVE_CONFIG_H 33# include <config.h> 34#endif /* HAVE_CONFIG_H */ 35 36#include <stdio.h> 37#include <stdlib.h> 38#include <unistd.h> 39#include <errno.h> 40#include <sys/mman.h> 41#include <pthread.h> 42#include <string.h> 43#include <signal.h> 44 45#include "libcxgb4.h" 46#include "cxgb4-abi.h" 47 48#define PCI_VENDOR_ID_CHELSIO 0x1425 49 50/* 51 * Macros needed to support the PCI Device ID Table ... 52 */ 53#define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ 54 struct { \ 55 unsigned vendor; \ 56 unsigned device; \ 57 } hca_table[] = { 58 59#define CH_PCI_DEVICE_ID_FUNCTION \ 60 0x4 61 62#define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \ 63 { \ 64 .vendor = PCI_VENDOR_ID_CHELSIO, \ 65 .device = (__DeviceID), \ 66 } 67 68#define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \ 69 } 70 71#include "t4_chip_type.h" 72#include "t4_pci_id_tbl.h" 73 74unsigned long c4iw_page_size; 75unsigned long c4iw_page_shift; 76unsigned long c4iw_page_mask; 77int ma_wr; 78int t5_en_wc = 1; 79 80SLIST_HEAD(devices_struct, c4iw_dev) devices; 81 82static struct ibv_context_ops c4iw_ctx_ops = { 83 .query_device = c4iw_query_device, 84 .query_port = c4iw_query_port, 85 .alloc_pd = c4iw_alloc_pd, 86 .dealloc_pd = c4iw_free_pd, 87 .reg_mr = c4iw_reg_mr, 88 .dereg_mr = c4iw_dereg_mr, 89 .create_cq = c4iw_create_cq, 90 .resize_cq = c4iw_resize_cq, 91 .destroy_cq = c4iw_destroy_cq, 92 .create_srq = c4iw_create_srq, 93 .modify_srq = c4iw_modify_srq, 94 .destroy_srq = c4iw_destroy_srq, 95 .create_qp = c4iw_create_qp, 96 .modify_qp = c4iw_modify_qp, 97 .destroy_qp = c4iw_destroy_qp, 98 .query_qp = c4iw_query_qp, 99 .create_ah = c4iw_create_ah, 100 .destroy_ah = c4iw_destroy_ah, 101 .attach_mcast = c4iw_attach_mcast, 102 .detach_mcast = c4iw_detach_mcast, 103 .post_srq_recv = c4iw_post_srq_recv, 104 .req_notify_cq = c4iw_arm_cq, 105}; 106 107static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev, 108 int cmd_fd) 109{ 110 struct c4iw_context *context; 111 struct ibv_get_context cmd; 112 struct c4iw_alloc_ucontext_resp resp; 113 struct c4iw_dev *rhp = to_c4iw_dev(ibdev); 114 struct ibv_query_device qcmd; 115 uint64_t raw_fw_ver; 116 struct ibv_device_attr attr; 117 118 context = malloc(sizeof *context); 119 if (!context) 120 return NULL; 121 122 memset(context, 0, sizeof *context); 123 context->ibv_ctx.cmd_fd = cmd_fd; 124 125 resp.status_page_size = 0; 126 resp.reserved = 0; 127 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, 128 &resp.ibv_resp, sizeof resp)) 129 goto err_free; 130 131 if (resp.reserved) 132 PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n", 133 __FUNCTION__); 134 135 context->status_page_size = resp.status_page_size; 136 if (resp.status_page_size) { 137 context->status_page = mmap(NULL, resp.status_page_size, 138 PROT_READ, MAP_SHARED, cmd_fd, 139 resp.status_page_key); 140 if (context->status_page == MAP_FAILED) 141 goto err_free; 142 } 143 144 context->ibv_ctx.device = ibdev; 145 context->ibv_ctx.ops = c4iw_ctx_ops; 146 147 switch (rhp->chip_version) { 148 case CHELSIO_T6: 149 case CHELSIO_T5: 150 case CHELSIO_T4: 151 PDBG("%s T%d device\n", __FUNCTION__, rhp->chip_version); 152 context->ibv_ctx.ops.async_event = c4iw_async_event; 153 context->ibv_ctx.ops.post_send = c4iw_post_send; 154 context->ibv_ctx.ops.post_recv = c4iw_post_receive; 155 context->ibv_ctx.ops.poll_cq = c4iw_poll_cq; 156 context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq; 157 break; 158 default: 159 PDBG("%s unknown hca type %d\n", __FUNCTION__, 160 rhp->chip_version); 161 goto err_unmap; 162 break; 163 } 164 165 if (!rhp->mmid2ptr) { 166 int ret; 167 168 ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd, 169 sizeof qcmd); 170 if (ret) 171 goto err_unmap; 172 rhp->max_mr = attr.max_mr; 173 rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *)); 174 if (!rhp->mmid2ptr) { 175 goto err_unmap; 176 } 177 rhp->max_qp = T4_QID_BASE + attr.max_cq; 178 rhp->qpid2ptr = calloc(T4_QID_BASE + attr.max_cq, sizeof(void *)); 179 if (!rhp->qpid2ptr) { 180 goto err_unmap; 181 } 182 rhp->max_cq = T4_QID_BASE + attr.max_cq; 183 rhp->cqid2ptr = calloc(T4_QID_BASE + attr.max_cq, sizeof(void *)); 184 if (!rhp->cqid2ptr) 185 goto err_unmap; 186 } 187 188 return &context->ibv_ctx; 189 190err_unmap: 191 munmap(context->status_page, context->status_page_size); 192err_free: 193 if (rhp->cqid2ptr) 194 free(rhp->cqid2ptr); 195 if (rhp->qpid2ptr) 196 free(rhp->cqid2ptr); 197 if (rhp->mmid2ptr) 198 free(rhp->cqid2ptr); 199 free(context); 200 return NULL; 201} 202 203static void c4iw_free_context(struct ibv_context *ibctx) 204{ 205 struct c4iw_context *context = to_c4iw_context(ibctx); 206 207 if (context->status_page_size) 208 munmap(context->status_page, context->status_page_size); 209 free(context); 210} 211 212static struct ibv_device_ops c4iw_dev_ops = { 213 .alloc_context = c4iw_alloc_context, 214 .free_context = c4iw_free_context 215}; 216 217#ifdef STALL_DETECTION 218 219int stall_to; 220 221static void dump_cq(struct c4iw_cq *chp) 222{ 223 int i; 224 225 fprintf(stderr, 226 "CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d " 227 "cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp, 228 chp->cq.cqid, chp->cq.queue, chp->cq.cidx, 229 chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use, 230 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64_to_cpu(chp->cq.bits_type_ts), 231 t4_cq_notempty(&chp->cq) || (chp->iq ? t4_iq_notempty(chp->iq) : 0)); 232 233 for (i=0; i < chp->cq.size; i++) { 234 u64 *p = (u64 *)(chp->cq.queue + i); 235 236 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64_to_cpu(p[0]), be64_to_cpu(p[1])); 237 if (i == chp->cq.cidx) 238 fprintf(stderr, " <-- cidx\n"); 239 else 240 fprintf(stderr, "\n"); 241 p+= 2; 242 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1])); 243 p+= 2; 244 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1])); 245 p+= 2; 246 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1])); 247 p+= 2; 248 } 249} 250 251static void dump_qp(struct c4iw_qp *qhp) 252{ 253 int i; 254 int j; 255 struct t4_swsqe *swsqe; 256 struct t4_swrqe *swrqe; 257 u16 cidx, pidx; 258 u64 *p; 259 260 fprintf(stderr, 261 "QP: %p id %u error %d flushed %d qid_mask 0x%x\n" 262 " SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n" 263 " RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n", 264 qhp, 265 qhp->wq.sq.qid, 266 qhp->wq.error, 267 qhp->wq.flushed, 268 qhp->wq.qid_mask, 269 qhp->wq.sq.qid, 270 qhp->wq.sq.queue, 271 qhp->wq.sq.sw_sq, 272 qhp->wq.sq.cidx, 273 qhp->wq.sq.pidx, 274 qhp->wq.sq.in_use, 275 qhp->wq.sq.wq_pidx, 276 qhp->wq.sq.size, 277 qhp->wq.sq.flags, 278 qhp->wq.sq.flush_cidx, 279 qhp->wq.rq.qid, 280 qhp->wq.rq.queue, 281 qhp->wq.rq.sw_rq, 282 qhp->wq.rq.cidx, 283 qhp->wq.rq.pidx, 284 qhp->wq.rq.in_use, 285 qhp->wq.rq.size); 286 cidx = qhp->wq.sq.cidx; 287 pidx = qhp->wq.sq.pidx; 288 if (cidx != pidx) 289 fprintf(stderr, "SQ: \n"); 290 while (cidx != pidx) { 291 swsqe = &qhp->wq.sq.sw_sq[cidx]; 292 fprintf(stderr, "%04u: wr_id %016" PRIx64 293 " sq_wptr %08x read_len %u opcode 0x%x " 294 "complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n", 295 cidx, 296 swsqe->wr_id, 297 swsqe->idx, 298 swsqe->read_len, 299 swsqe->opcode, 300 swsqe->complete, 301 swsqe->signaled, 302 cpu_to_be64(swsqe->cqe.u.flits[0]), 303 cpu_to_be64(swsqe->cqe.u.flits[1]), 304 cpu_to_be64((u64)swsqe->cqe.reserved), 305 cpu_to_be64(swsqe->cqe.bits_type_ts)); 306 if (++cidx == qhp->wq.sq.size) 307 cidx = 0; 308 } 309 310 fprintf(stderr, "SQ WQ: \n"); 311 p = (u64 *)qhp->wq.sq.queue; 312 for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) { 313 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 314 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 315 i, ntohll(p[0]), ntohll(p[1])); 316 if (j == 0 && i == qhp->wq.sq.wq_pidx) 317 fprintf(stderr, " <-- pidx"); 318 fprintf(stderr, "\n"); 319 p += 2; 320 } 321 } 322 cidx = qhp->wq.rq.cidx; 323 pidx = qhp->wq.rq.pidx; 324 if (cidx != pidx) 325 fprintf(stderr, "RQ: \n"); 326 while (cidx != pidx) { 327 swrqe = &qhp->wq.rq.sw_rq[cidx]; 328 fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n", 329 cidx, 330 swrqe->wr_id ); 331 if (++cidx == qhp->wq.rq.size) 332 cidx = 0; 333 } 334 335 fprintf(stderr, "RQ WQ: \n"); 336 p = (u64 *)qhp->wq.rq.queue; 337 for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) { 338 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 339 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 340 i, ntohll(p[0]), ntohll(p[1])); 341 if (j == 0 && i == qhp->wq.rq.pidx) 342 fprintf(stderr, " <-- pidx"); 343 if (j == 0 && i == qhp->wq.rq.cidx) 344 fprintf(stderr, " <-- cidx"); 345 fprintf(stderr, "\n"); 346 p+=2; 347 } 348 } 349} 350 351void dump_state() 352{ 353 struct c4iw_dev *dev; 354 int i; 355 356 fprintf(stderr, "STALL DETECTED:\n"); 357 SLIST_FOREACH(dev, &devices, list) { 358 //pthread_spin_lock(&dev->lock); 359 fprintf(stderr, "Device %s\n", dev->ibv_dev.name); 360 for (i=0; i < dev->max_cq; i++) { 361 if (dev->cqid2ptr[i]) { 362 struct c4iw_cq *chp = dev->cqid2ptr[i]; 363 //pthread_spin_lock(&chp->lock); 364 dump_cq(chp); 365 //pthread_spin_unlock(&chp->lock); 366 } 367 } 368 for (i=0; i < dev->max_qp; i++) { 369 if (dev->qpid2ptr[i]) { 370 struct c4iw_qp *qhp = dev->qpid2ptr[i]; 371 //pthread_spin_lock(&qhp->lock); 372 dump_qp(qhp); 373 //pthread_spin_unlock(&qhp->lock); 374 } 375 } 376 //pthread_spin_unlock(&dev->lock); 377 } 378 fprintf(stderr, "DUMP COMPLETE:\n"); 379 fflush(stderr); 380} 381#endif /* end of STALL_DETECTION */ 382 383/* 384 * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library 385 * can know if the driver supports the kernel mode db ringing. 386 */ 387int c4iw_abi_version = 1; 388 389static struct ibv_device *cxgb4_driver_init(const char *uverbs_sys_path, 390 int abi_version) 391{ 392 char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp; 393 char dev_str[IBV_SYSFS_PATH_MAX]; 394 struct c4iw_dev *dev; 395 unsigned vendor, device, fw_maj, fw_min; 396 int i; 397 char devnum; 398 char ib_param[16]; 399 400#ifndef __linux__ 401 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 402 ibdev, sizeof ibdev) < 0) 403 return NULL; 404 405 if (ibdev[0] == 't' && ibdev[1] >= '4' && ibdev[1] <= '6' && 406 strstr(&ibdev[2], "nex") && (devnum = atoi(&ibdev[5])) >= 0) { 407 snprintf(dev_str, sizeof(dev_str), "/dev/t%cnex/%d", ibdev[1], 408 devnum); 409 } else 410 return NULL; 411 412 if (ibv_read_sysfs_file(dev_str, "\%pnpinfo", value, sizeof value) < 0) 413 return NULL; 414 else { 415 if (strstr(value,"vendor=")) { 416 strncpy(ib_param, strstr(value,"vendor=")+strlen("vendor="),6); 417 sscanf(ib_param,"%i",&vendor); 418 } 419 420 if (strstr(value,"device=")) { 421 strncpy(ib_param, strstr(value,"device=")+strlen("device="),6); 422 sscanf(ib_param,"%i",&device); 423 } 424 } 425#else 426 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", 427 value, sizeof value) < 0) 428 return NULL; 429 sscanf(value, "%i", &vendor); 430 431 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", 432 value, sizeof value) < 0) 433 return NULL; 434 sscanf(value, "%i", &device); 435#endif 436 437 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) 438 if (vendor == hca_table[i].vendor && 439 device == hca_table[i].device) 440 goto found; 441 442 return NULL; 443 444found: 445 c4iw_abi_version = abi_version; 446 447 448#ifndef __linux__ 449 if (ibv_read_sysfs_file(dev_str, "firmware_version", 450 value, sizeof value) < 0) 451 return NULL; 452#else 453 /* 454 * Verify that the firmware major number matches. Major number 455 * mismatches are fatal. Minor number mismatches are tolerated. 456 */ 457 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 458 ibdev, sizeof ibdev) < 0) 459 return NULL; 460 461 memset(devstr, 0, sizeof devstr); 462 snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s", 463 ibv_get_sysfs_path(), ibdev); 464 if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0) 465 return NULL; 466#endif 467 468 cp = strtok(value+1, "."); 469 sscanf(cp, "%i", &fw_maj); 470 cp = strtok(NULL, "."); 471 sscanf(cp, "%i", &fw_min); 472 473 if (fw_maj < FW_MAJ) { 474 fprintf(stderr, "libcxgb4: Fatal firmware version mismatch. " 475 "Firmware major number is %u and libcxgb4 needs %u.\n", 476 fw_maj, FW_MAJ); 477 fflush(stderr); 478 return NULL; 479 } 480 481 DBGLOG("libcxgb4"); 482 483 if (fw_min < FW_MIN) { 484 PDBG("libcxgb4: non-fatal firmware version mismatch. " 485 "Firmware minor number is %u and libcxgb4 needs %u.\n", 486 fw_maj, FW_MAJ); 487 fflush(stderr); 488 } 489 490 PDBG("%s found vendor %d device %d type %d\n", 491 __FUNCTION__, vendor, device, 492 CHELSIO_PCI_ID_CHIP_VERSION(hca_table[i].device)); 493 494 dev = calloc(1, sizeof *dev); 495 if (!dev) { 496 return NULL; 497 } 498 499 pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE); 500 dev->ibv_dev.ops = c4iw_dev_ops; 501 dev->chip_version = CHELSIO_PCI_ID_CHIP_VERSION(hca_table[i].device); 502 dev->abi_version = abi_version; 503 504 PDBG("%s device claimed\n", __FUNCTION__); 505 SLIST_INSERT_HEAD(&devices, dev, list); 506#ifdef STALL_DETECTION 507{ 508 char *c = getenv("CXGB4_STALL_TIMEOUT"); 509 if (c) { 510 stall_to = strtol(c, NULL, 0); 511 if (errno || stall_to < 0) 512 stall_to = 0; 513 } 514} 515#endif 516{ 517 char *c = getenv("CXGB4_MA_WR"); 518 if (c) { 519 ma_wr = strtol(c, NULL, 0); 520 if (ma_wr != 1) 521 ma_wr = 0; 522 } 523} 524{ 525 char *c = getenv("T5_ENABLE_WC"); 526 if (c) { 527 t5_en_wc = strtol(c, NULL, 0); 528 if (t5_en_wc != 1) 529 t5_en_wc = 0; 530 } 531} 532 533 return &dev->ibv_dev; 534} 535 536static __attribute__((constructor)) void cxgb4_register_driver(void) 537{ 538 c4iw_page_size = sysconf(_SC_PAGESIZE); 539 c4iw_page_shift = long_log2(c4iw_page_size); 540 c4iw_page_mask = ~(c4iw_page_size - 1); 541 ibv_register_driver("cxgb4", cxgb4_driver_init); 542} 543 544#ifdef STATS 545void __attribute__ ((destructor)) cs_fini(void); 546void __attribute__ ((destructor)) cs_fini(void) 547{ 548 syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu " 549 "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n", 550 c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read, 551 c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe, 552 c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq); 553} 554#endif 555