main.c revision 292107
1/* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/kmod.h> 37/* 38 * kmod.h must be included before module.h since it includes (indirectly) sys/module.h 39 * To use the FBSD macro sys/module.h should define MODULE_VERSION before linux/module does. 40*/ 41#include <linux/module.h> 42#include <linux/errno.h> 43#include <linux/pci.h> 44#include <linux/dma-mapping.h> 45#include <linux/slab.h> 46#include <linux/io-mapping.h> 47#include <linux/delay.h> 48#include <linux/netdevice.h> 49#include <linux/string.h> 50#include <linux/fs.h> 51 52#include <linux/mlx4/device.h> 53#include <linux/mlx4/doorbell.h> 54 55#include "mlx4.h" 56#include "fw.h" 57#include "icm.h" 58#include "mlx4_stats.h" 59 60/* Mellanox ConnectX HCA low-level driver */ 61 62struct workqueue_struct *mlx4_wq; 63 64#ifdef CONFIG_MLX4_DEBUG 65 66int mlx4_debug_level = 0; 67module_param_named(debug_level, mlx4_debug_level, int, 0644); 68MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 69 70#endif /* CONFIG_MLX4_DEBUG */ 71 72#ifdef CONFIG_PCI_MSI 73 74static int msi_x = 1; 75module_param(msi_x, int, 0444); 76MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); 77 78#else /* CONFIG_PCI_MSI */ 79 80#define msi_x (0) 81 82#endif /* CONFIG_PCI_MSI */ 83 84static int enable_sys_tune = 0; 85module_param(enable_sys_tune, int, 0444); 86MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); 87 88int mlx4_blck_lb = 1; 89module_param_named(block_loopback, mlx4_blck_lb, int, 0644); 90MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " 91 "(default: 1)"); 92enum { 93 DEFAULT_DOMAIN = 0, 94 BDF_STR_SIZE = 8, /* bb:dd.f- */ 95 DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ 96}; 97 98enum { 99 NUM_VFS, 100 PROBE_VF, 101 PORT_TYPE_ARRAY 102}; 103 104enum { 105 VALID_DATA, 106 INVALID_DATA, 107 INVALID_STR 108}; 109 110struct param_data { 111 int id; 112 struct mlx4_dbdf2val_lst dbdf2val; 113}; 114 115static struct param_data num_vfs = { 116 .id = NUM_VFS, 117 .dbdf2val = { 118 .name = "num_vfs param", 119 .num_vals = 1, 120 .def_val = {0}, 121 .range = {0, MLX4_MAX_NUM_VF} 122 } 123}; 124module_param_string(num_vfs, num_vfs.dbdf2val.str, 125 sizeof(num_vfs.dbdf2val.str), 0444); 126MODULE_PARM_DESC(num_vfs, 127 "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" 128 "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" 129 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); 130 131static struct param_data probe_vf = { 132 .id = PROBE_VF, 133 .dbdf2val = { 134 .name = "probe_vf param", 135 .num_vals = 1, 136 .def_val = {0}, 137 .range = {0, MLX4_MAX_NUM_VF} 138 } 139}; 140module_param_string(probe_vf, probe_vf.dbdf2val.str, 141 sizeof(probe_vf.dbdf2val.str), 0444); 142MODULE_PARM_DESC(probe_vf, 143 "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" 144 "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" 145 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); 146 147int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 148 149module_param_named(log_num_mgm_entry_size, 150 mlx4_log_num_mgm_entry_size, int, 0444); 151MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" 152 " of qp per mcg, for example:" 153 " 10 gives 248.range: 7 <=" 154 " log_num_mgm_entry_size <= 12." 155 " To activate device managed" 156 " flow steering when available, set to -1"); 157 158static int high_rate_steer; 159module_param(high_rate_steer, int, 0444); 160MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" 161 " (default off)"); 162 163static int fast_drop; 164module_param_named(fast_drop, fast_drop, int, 0444); 165MODULE_PARM_DESC(fast_drop, 166 "Enable fast packet drop when no recieve WQEs are posted"); 167 168int mlx4_enable_64b_cqe_eqe = 1; 169module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); 170MODULE_PARM_DESC(enable_64b_cqe_eqe, 171 "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)"); 172 173#define HCA_GLOBAL_CAP_MASK 0 174 175#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE 176 177static char mlx4_version[] __devinitdata = 178 DRV_NAME ": Mellanox ConnectX VPI driver v" 179 DRV_VERSION " (" DRV_RELDATE ")\n"; 180 181static int log_num_mac = 7; 182module_param_named(log_num_mac, log_num_mac, int, 0444); 183MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); 184 185static int log_num_vlan; 186module_param_named(log_num_vlan, log_num_vlan, int, 0444); 187MODULE_PARM_DESC(log_num_vlan, 188 "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); 189/* Log2 max number of VLANs per ETH port (0-7) */ 190#define MLX4_LOG_NUM_VLANS 7 191 192int log_mtts_per_seg = ilog2(1); 193module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 194MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " 195 "(0-7) (default: 0)"); 196 197static struct param_data port_type_array = { 198 .id = PORT_TYPE_ARRAY, 199 .dbdf2val = { 200 .name = "port_type_array param", 201 .num_vals = 2, 202 .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, 203 .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} 204 } 205}; 206module_param_string(port_type_array, port_type_array.dbdf2val.str, 207 sizeof(port_type_array.dbdf2val.str), 0444); 208MODULE_PARM_DESC(port_type_array, 209 "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" 210 "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" 211 "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" 212 "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); 213 214 215struct mlx4_port_config { 216 struct list_head list; 217 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; 218 struct pci_dev *pdev; 219}; 220 221#define MLX4_LOG_NUM_MTT 20 222/* We limit to 30 as of a bit map issue which uses int and not uint. 223 see mlx4_buddy_init -> bitmap_zero which gets int. 224*/ 225#define MLX4_MAX_LOG_NUM_MTT 30 226static struct mlx4_profile mod_param_profile = { 227 .num_qp = 19, 228 .num_srq = 16, 229 .rdmarc_per_qp = 4, 230 .num_cq = 16, 231 .num_mcg = 13, 232 .num_mpt = 19, 233 .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ 234}; 235 236module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); 237MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); 238 239module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); 240MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " 241 "(default: 16)"); 242 243module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 244 0444); 245MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " 246 "(default: 4)"); 247 248module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); 249MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); 250 251module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); 252MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " 253 "(default: 13)"); 254 255module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); 256MODULE_PARM_DESC(log_num_mpt, 257 "log maximum number of memory protection table entries per " 258 "HCA (default: 19)"); 259 260module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); 261MODULE_PARM_DESC(log_num_mtt, 262 "log maximum number of memory translation table segments per " 263 "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); 264 265enum { 266 MLX4_IF_STATE_BASIC, 267 MLX4_IF_STATE_EXTENDED 268}; 269 270static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) 271{ 272 return (domain << 20) | (bus << 12) | (dev << 4) | fn; 273} 274 275static inline void pr_bdf_err(const char *dbdf, const char *pname) 276{ 277 pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); 278} 279 280static inline void pr_val_err(const char *dbdf, const char *pname, 281 const char *val) 282{ 283 pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" 284 , val, dbdf, pname); 285} 286 287static inline void pr_out_of_range_bdf(const char *dbdf, int val, 288 struct mlx4_dbdf2val_lst *dbdf2val) 289{ 290 pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" 291 , val, dbdf, dbdf2val->name , dbdf2val->range.min, 292 dbdf2val->range.max); 293} 294 295static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) 296{ 297 pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" 298 , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); 299} 300 301static inline int is_in_range(int val, struct mlx4_range *r) 302{ 303 return (val >= r->min && val <= r->max); 304} 305 306static int update_defaults(struct param_data *pdata) 307{ 308 long int val[MLX4_MAX_BDF_VALS]; 309 int ret; 310 char *t, *p = pdata->dbdf2val.str; 311 char sval[32]; 312 int val_len; 313 314 if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) 315 return INVALID_STR; 316 317 switch (pdata->id) { 318 case PORT_TYPE_ARRAY: 319 t = strchr(p, ','); 320 if (!t || t == p || (t - p) > sizeof(sval)) 321 return INVALID_STR; 322 323 val_len = t - p; 324 strncpy(sval, p, val_len); 325 sval[val_len] = 0; 326 327 ret = kstrtol(sval, 0, &val[0]); 328 if (ret == -EINVAL) 329 return INVALID_STR; 330 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 331 pr_out_of_range(&pdata->dbdf2val); 332 return INVALID_DATA; 333 } 334 335 ret = kstrtol(t + 1, 0, &val[1]); 336 if (ret == -EINVAL) 337 return INVALID_STR; 338 if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { 339 pr_out_of_range(&pdata->dbdf2val); 340 return INVALID_DATA; 341 } 342 343 pdata->dbdf2val.tbl[0].val[0] = val[0]; 344 pdata->dbdf2val.tbl[0].val[1] = val[1]; 345 break; 346 347 case NUM_VFS: 348 case PROBE_VF: 349 ret = kstrtol(p, 0, &val[0]); 350 if (ret == -EINVAL) 351 return INVALID_STR; 352 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 353 pr_out_of_range(&pdata->dbdf2val); 354 return INVALID_DATA; 355 } 356 pdata->dbdf2val.tbl[0].val[0] = val[0]; 357 break; 358 } 359 pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; 360 361 return VALID_DATA; 362} 363 364int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) 365{ 366 int domain, bus, dev, fn; 367 u64 dbdf; 368 char *p, *t, *v; 369 char tmp[32]; 370 char sbdf[32]; 371 char sep = ','; 372 int j, k, str_size, i = 1; 373 int prfx_size; 374 375 p = dbdf2val_lst->str; 376 377 for (j = 0; j < dbdf2val_lst->num_vals; j++) 378 dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; 379 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 380 381 str_size = strlen(dbdf2val_lst->str); 382 383 if (str_size == 0) 384 return 0; 385 386 while (strlen(p)) { 387 prfx_size = BDF_STR_SIZE; 388 sbdf[prfx_size] = 0; 389 strncpy(sbdf, p, prfx_size); 390 domain = DEFAULT_DOMAIN; 391 if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { 392 prfx_size = DBDF_STR_SIZE; 393 sbdf[prfx_size] = 0; 394 strncpy(sbdf, p, prfx_size); 395 if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, 396 &dev, &fn) != 4) { 397 pr_bdf_err(sbdf, dbdf2val_lst->name); 398 goto err; 399 } 400 sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, 401 fn); 402 } else { 403 sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); 404 } 405 406 if (strnicmp(sbdf, tmp, sizeof(tmp))) { 407 pr_bdf_err(sbdf, dbdf2val_lst->name); 408 goto err; 409 } 410 411 dbdf = dbdf_to_u64(domain, bus, dev, fn); 412 413 for (j = 1; j < i; j++) 414 if (dbdf2val_lst->tbl[j].dbdf == dbdf) { 415 pr_warn("mlx4_core: in '%s', %s appears multiple times\n" 416 , dbdf2val_lst->name, sbdf); 417 goto err; 418 } 419 420 if (i >= MLX4_DEVS_TBL_SIZE) { 421 pr_warn("mlx4_core: Too many devices in '%s'\n" 422 , dbdf2val_lst->name); 423 goto err; 424 } 425 426 p += prfx_size; 427 t = strchr(p, sep); 428 t = t ? t : p + strlen(p); 429 if (p >= t) { 430 pr_val_err(sbdf, dbdf2val_lst->name, ""); 431 goto err; 432 } 433 434 for (k = 0; k < dbdf2val_lst->num_vals; k++) { 435 char sval[32]; 436 long int val; 437 int ret, val_len; 438 char vsep = ';'; 439 440 v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); 441 if (!v || v > t || v == p || (v - p) > sizeof(sval)) { 442 pr_val_err(sbdf, dbdf2val_lst->name, p); 443 goto err; 444 } 445 val_len = v - p; 446 strncpy(sval, p, val_len); 447 sval[val_len] = 0; 448 449 ret = kstrtol(sval, 0, &val); 450 if (ret) { 451 if (strchr(p, vsep)) 452 pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" 453 , sbdf, dbdf2val_lst->name); 454 else 455 pr_val_err(sbdf, dbdf2val_lst->name, 456 sval); 457 goto err; 458 } 459 if (!is_in_range(val, &dbdf2val_lst->range)) { 460 pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); 461 goto err; 462 } 463 464 dbdf2val_lst->tbl[i].val[k] = val; 465 p = v; 466 if (p[0] == vsep) 467 p++; 468 } 469 470 dbdf2val_lst->tbl[i].dbdf = dbdf; 471 if (strlen(p)) { 472 if (p[0] != sep) { 473 pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" 474 , sep, p, dbdf2val_lst->name); 475 goto err; 476 } 477 p++; 478 } 479 i++; 480 if (i < MLX4_DEVS_TBL_SIZE) 481 dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; 482 } 483 484 return 0; 485 486err: 487 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 488 pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" 489 , dbdf2val_lst->name); 490 491 return -EINVAL; 492} 493EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); 494 495int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, 496 int *val) 497{ 498 u64 dbdf; 499 int i = 1; 500 501 *val = tbl[0].val[idx]; 502 if (!pdev) 503 return -EINVAL; 504 505 dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), 506 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 507 508 while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { 509 if (tbl[i].dbdf == dbdf) { 510 *val = tbl[i].val[idx]; 511 return 0; 512 } 513 i++; 514 } 515 516 return 0; 517} 518EXPORT_SYMBOL(mlx4_get_val); 519 520static void process_mod_param_profile(struct mlx4_profile *profile) 521{ 522 vm_size_t hwphyssz; 523 hwphyssz = 0; 524 TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); 525 526 profile->num_qp = 1 << mod_param_profile.num_qp; 527 profile->num_srq = 1 << mod_param_profile.num_srq; 528 profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; 529 profile->num_cq = 1 << mod_param_profile.num_cq; 530 profile->num_mcg = 1 << mod_param_profile.num_mcg; 531 profile->num_mpt = 1 << mod_param_profile.num_mpt; 532 /* 533 * We want to scale the number of MTTs with the size of the 534 * system memory, since it makes sense to register a lot of 535 * memory on a system with a lot of memory. As a heuristic, 536 * make sure we have enough MTTs to register twice the system 537 * memory (with PAGE_SIZE entries). 538 * 539 * This number has to be a power of two and fit into 32 bits 540 * due to device limitations. We cap this at 2^30 as of bit map 541 * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) 542 * That limits us to 4TB of memory registration per HCA with 543 * 4KB pages, which is probably OK for the next few months. 544 */ 545 if (mod_param_profile.num_mtt_segs) 546 profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; 547 else { 548 profile->num_mtt_segs = 549 roundup_pow_of_two(max_t(unsigned, 550 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), 551 min(1UL << 552 (MLX4_MAX_LOG_NUM_MTT - 553 log_mtts_per_seg), 554 (hwphyssz << 1) 555 >> log_mtts_per_seg))); 556 /* set the actual value, so it will be reflected to the user 557 using the sysfs */ 558 mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); 559 } 560} 561 562int mlx4_check_port_params(struct mlx4_dev *dev, 563 enum mlx4_port_type *port_type) 564{ 565 int i; 566 567 for (i = 0; i < dev->caps.num_ports - 1; i++) { 568 if (port_type[i] != port_type[i + 1]) { 569 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { 570 mlx4_err(dev, "Only same port types supported " 571 "on this HCA, aborting.\n"); 572 return -EINVAL; 573 } 574 } 575 } 576 577 for (i = 0; i < dev->caps.num_ports; i++) { 578 if (!(port_type[i] & dev->caps.supported_type[i+1])) { 579 mlx4_err(dev, "Requested port type for port %d is not " 580 "supported on this HCA\n", i + 1); 581 return -EINVAL; 582 } 583 } 584 return 0; 585} 586 587static void mlx4_set_port_mask(struct mlx4_dev *dev) 588{ 589 int i; 590 591 for (i = 1; i <= dev->caps.num_ports; ++i) 592 dev->caps.port_mask[i] = dev->caps.port_type[i]; 593} 594 595static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 596{ 597 int err; 598 int i; 599 600 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 601 if (err) { 602 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 603 return err; 604 } 605 606 if (dev_cap->min_page_sz > PAGE_SIZE) { 607 mlx4_err(dev, "HCA minimum page size of %d bigger than " 608 "kernel PAGE_SIZE of %d, aborting.\n", 609 dev_cap->min_page_sz, (int)PAGE_SIZE); 610 return -ENODEV; 611 } 612 if (dev_cap->num_ports > MLX4_MAX_PORTS) { 613 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 614 "aborting.\n", 615 dev_cap->num_ports, MLX4_MAX_PORTS); 616 return -ENODEV; 617 } 618 619 if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { 620 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " 621 "PCI resource 2 size of 0x%llx, aborting.\n", 622 dev_cap->uar_size, 623 (unsigned long long) pci_resource_len(dev->pdev, 2)); 624 return -ENODEV; 625 } 626 627 dev->caps.num_ports = dev_cap->num_ports; 628 dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; 629 for (i = 1; i <= dev->caps.num_ports; ++i) { 630 dev->caps.vl_cap[i] = dev_cap->max_vl[i]; 631 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; 632 dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; 633 dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; 634 /* set gid and pkey table operating lengths by default 635 * to non-sriov values */ 636 dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; 637 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; 638 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; 639 dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; 640 dev->caps.def_mac[i] = dev_cap->def_mac[i]; 641 dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; 642 dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; 643 dev->caps.default_sense[i] = dev_cap->default_sense[i]; 644 dev->caps.trans_type[i] = dev_cap->trans_type[i]; 645 dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; 646 dev->caps.wavelength[i] = dev_cap->wavelength[i]; 647 dev->caps.trans_code[i] = dev_cap->trans_code[i]; 648 } 649 650 dev->caps.uar_page_size = PAGE_SIZE; 651 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; 652 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; 653 dev->caps.bf_reg_size = dev_cap->bf_reg_size; 654 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; 655 dev->caps.max_sq_sg = dev_cap->max_sq_sg; 656 dev->caps.max_rq_sg = dev_cap->max_rq_sg; 657 dev->caps.max_wqes = dev_cap->max_qp_sz; 658 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; 659 dev->caps.max_srq_wqes = dev_cap->max_srq_sz; 660 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; 661 dev->caps.reserved_srqs = dev_cap->reserved_srqs; 662 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; 663 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; 664 /* 665 * Subtract 1 from the limit because we need to allocate a 666 * spare CQE to enable resizing the CQ 667 */ 668 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 669 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 670 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 671 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 672 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 673 674 /* The first 128 UARs are used for EQ doorbells */ 675 dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); 676 dev->caps.reserved_pds = dev_cap->reserved_pds; 677 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 678 dev_cap->reserved_xrcds : 0; 679 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 680 dev_cap->max_xrcds : 0; 681 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; 682 683 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 684 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 685 dev->caps.flags = dev_cap->flags; 686 dev->caps.flags2 = dev_cap->flags2; 687 dev->caps.bmme_flags = dev_cap->bmme_flags; 688 dev->caps.reserved_lkey = dev_cap->reserved_lkey; 689 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 690 dev->caps.cq_timestamp = dev_cap->timestamp_support; 691 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 692 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; 693 694 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ 695 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) 696 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 697 /* Don't do sense port on multifunction devices (for now at least) */ 698 if (mlx4_is_mfunc(dev)) 699 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 700 701 dev->caps.log_num_macs = log_num_mac; 702 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 703 704 dev->caps.fast_drop = fast_drop ? 705 !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 706 0; 707 708 for (i = 1; i <= dev->caps.num_ports; ++i) { 709 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; 710 if (dev->caps.supported_type[i]) { 711 /* if only ETH is supported - assign ETH */ 712 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) 713 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; 714 /* if only IB is supported, assign IB */ 715 else if (dev->caps.supported_type[i] == 716 MLX4_PORT_TYPE_IB) 717 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; 718 else { 719 /* 720 * if IB and ETH are supported, we set the port 721 * type according to user selection of port type; 722 * if there is no user selection, take the FW hint 723 */ 724 int pta; 725 mlx4_get_val(port_type_array.dbdf2val.tbl, 726 pci_physfn(dev->pdev), i - 1, 727 &pta); 728 if (pta == MLX4_PORT_TYPE_NONE) { 729 dev->caps.port_type[i] = dev->caps.suggested_type[i] ? 730 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; 731 } else if (pta == MLX4_PORT_TYPE_NA) { 732 mlx4_err(dev, "Port %d is valid port. " 733 "It is not allowed to configure its type to N/A(%d)\n", 734 i, MLX4_PORT_TYPE_NA); 735 return -EINVAL; 736 } else { 737 dev->caps.port_type[i] = pta; 738 } 739 } 740 } 741 /* 742 * Link sensing is allowed on the port if 3 conditions are true: 743 * 1. Both protocols are supported on the port. 744 * 2. Different types are supported on the port 745 * 3. FW declared that it supports link sensing 746 */ 747 mlx4_priv(dev)->sense.sense_allowed[i] = 748 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && 749 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 750 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); 751 752 /* Disablling auto sense for default Eth ports support */ 753 mlx4_priv(dev)->sense.sense_allowed[i] = 0; 754 755 /* 756 * If "default_sense" bit is set, we move the port to "AUTO" mode 757 * and perform sense_port FW command to try and set the correct 758 * port type from beginning 759 */ 760 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { 761 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; 762 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; 763 mlx4_SENSE_PORT(dev, i, &sensed_port); 764 if (sensed_port != MLX4_PORT_TYPE_NONE) 765 dev->caps.port_type[i] = sensed_port; 766 } else { 767 dev->caps.possible_type[i] = dev->caps.port_type[i]; 768 } 769 770 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { 771 dev->caps.log_num_macs = dev_cap->log_max_macs[i]; 772 mlx4_warn(dev, "Requested number of MACs is too much " 773 "for port %d, reducing to %d.\n", 774 i, 1 << dev->caps.log_num_macs); 775 } 776 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { 777 dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; 778 mlx4_warn(dev, "Requested number of VLANs is too much " 779 "for port %d, reducing to %d.\n", 780 i, 1 << dev->caps.log_num_vlans); 781 } 782 } 783 784 dev->caps.max_basic_counters = dev_cap->max_basic_counters; 785 dev->caps.max_extended_counters = dev_cap->max_extended_counters; 786 /* support extended counters if available */ 787 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) 788 dev->caps.max_counters = dev->caps.max_extended_counters; 789 else 790 dev->caps.max_counters = dev->caps.max_basic_counters; 791 792 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; 793 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = 794 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = 795 (1 << dev->caps.log_num_macs) * 796 (1 << dev->caps.log_num_vlans) * 797 dev->caps.num_ports; 798 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 799 800 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 801 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 802 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 803 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 804 805 dev->caps.sync_qp = dev_cap->sync_qp; 806 if (dev->pdev->device == 0x1003) 807 dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; 808 809 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 810 811 if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { 812 if (dev_cap->flags & 813 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { 814 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); 815 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 816 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 817 } 818 } 819 820 if ((dev->caps.flags & 821 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && 822 mlx4_is_master(dev)) 823 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; 824 825 if (!mlx4_is_slave(dev)) { 826 for (i = 0; i < dev->caps.num_ports; ++i) 827 dev->caps.def_counter_index[i] = i << 1; 828 } 829 830 return 0; 831} 832/*The function checks if there are live vf, return the num of them*/ 833static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) 834{ 835 struct mlx4_priv *priv = mlx4_priv(dev); 836 struct mlx4_slave_state *s_state; 837 int i; 838 int ret = 0; 839 840 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { 841 s_state = &priv->mfunc.master.slave_state[i]; 842 if (s_state->active && s_state->last_cmd != 843 MLX4_COMM_CMD_RESET) { 844 mlx4_warn(dev, "%s: slave: %d is still active\n", 845 __func__, i); 846 ret++; 847 } 848 } 849 return ret; 850} 851 852int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 853{ 854 u32 qk = MLX4_RESERVED_QKEY_BASE; 855 856 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 857 qpn < dev->phys_caps.base_proxy_sqpn) 858 return -EINVAL; 859 860 if (qpn >= dev->phys_caps.base_tunnel_sqpn) 861 /* tunnel qp */ 862 qk += qpn - dev->phys_caps.base_tunnel_sqpn; 863 else 864 qk += qpn - dev->phys_caps.base_proxy_sqpn; 865 *qkey = qk; 866 return 0; 867} 868EXPORT_SYMBOL(mlx4_get_parav_qkey); 869 870void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) 871{ 872 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 873 874 if (!mlx4_is_master(dev)) 875 return; 876 877 priv->virt2phys_pkey[slave][port - 1][i] = val; 878} 879EXPORT_SYMBOL(mlx4_sync_pkey_table); 880 881void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) 882{ 883 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 884 885 if (!mlx4_is_master(dev)) 886 return; 887 888 priv->slave_node_guids[slave] = guid; 889} 890EXPORT_SYMBOL(mlx4_put_slave_node_guid); 891 892__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) 893{ 894 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 895 896 if (!mlx4_is_master(dev)) 897 return 0; 898 899 return priv->slave_node_guids[slave]; 900} 901EXPORT_SYMBOL(mlx4_get_slave_node_guid); 902 903int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 904{ 905 struct mlx4_priv *priv = mlx4_priv(dev); 906 struct mlx4_slave_state *s_slave; 907 908 if (!mlx4_is_master(dev)) 909 return 0; 910 911 s_slave = &priv->mfunc.master.slave_state[slave]; 912 return !!s_slave->active; 913} 914EXPORT_SYMBOL(mlx4_is_slave_active); 915 916static void slave_adjust_steering_mode(struct mlx4_dev *dev, 917 struct mlx4_dev_cap *dev_cap, 918 struct mlx4_init_hca_param *hca_param) 919{ 920 dev->caps.steering_mode = hca_param->steering_mode; 921 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) 922 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 923 else 924 dev->caps.num_qp_per_mgm = 925 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); 926 927 mlx4_dbg(dev, "Steering mode is: %s\n", 928 mlx4_steering_mode_str(dev->caps.steering_mode)); 929} 930 931static int mlx4_slave_cap(struct mlx4_dev *dev) 932{ 933 int err; 934 u32 page_size; 935 struct mlx4_dev_cap dev_cap; 936 struct mlx4_func_cap func_cap; 937 struct mlx4_init_hca_param hca_param; 938 int i; 939 940 memset(&hca_param, 0, sizeof(hca_param)); 941 err = mlx4_QUERY_HCA(dev, &hca_param); 942 if (err) { 943 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); 944 return err; 945 } 946 947 /*fail if the hca has an unknown capability */ 948 if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != 949 HCA_GLOBAL_CAP_MASK) { 950 mlx4_err(dev, "Unknown hca global capabilities\n"); 951 return -ENOSYS; 952 } 953 954 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; 955 956 dev->caps.hca_core_clock = hca_param.hca_core_clock; 957 958 memset(&dev_cap, 0, sizeof(dev_cap)); 959 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; 960 err = mlx4_dev_cap(dev, &dev_cap); 961 if (err) { 962 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 963 return err; 964 } 965 966 err = mlx4_QUERY_FW(dev); 967 if (err) 968 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); 969 970 if (!hca_param.mw_enable) { 971 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; 972 dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; 973 } 974 975 page_size = ~dev->caps.page_size_cap + 1; 976 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); 977 if (page_size > PAGE_SIZE) { 978 mlx4_err(dev, "HCA minimum page size of %d bigger than " 979 "kernel PAGE_SIZE of %d, aborting.\n", 980 page_size, (int)PAGE_SIZE); 981 return -ENODEV; 982 } 983 984 /* slave gets uar page size from QUERY_HCA fw command */ 985 dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); 986 987 /* TODO: relax this assumption */ 988 if (dev->caps.uar_page_size != PAGE_SIZE) { 989 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", 990 dev->caps.uar_page_size, (int)PAGE_SIZE); 991 return -ENODEV; 992 } 993 994 memset(&func_cap, 0, sizeof(func_cap)); 995 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); 996 if (err) { 997 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", 998 err); 999 return err; 1000 } 1001 1002 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != 1003 PF_CONTEXT_BEHAVIOUR_MASK) { 1004 mlx4_err(dev, "Unknown pf context behaviour\n"); 1005 return -ENOSYS; 1006 } 1007 1008 dev->caps.num_ports = func_cap.num_ports; 1009 dev->quotas.qp = func_cap.qp_quota; 1010 dev->quotas.srq = func_cap.srq_quota; 1011 dev->quotas.cq = func_cap.cq_quota; 1012 dev->quotas.mpt = func_cap.mpt_quota; 1013 dev->quotas.mtt = func_cap.mtt_quota; 1014 dev->caps.num_qps = 1 << hca_param.log_num_qps; 1015 dev->caps.num_srqs = 1 << hca_param.log_num_srqs; 1016 dev->caps.num_cqs = 1 << hca_param.log_num_cqs; 1017 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; 1018 dev->caps.num_eqs = func_cap.max_eq; 1019 dev->caps.reserved_eqs = func_cap.reserved_eq; 1020 dev->caps.num_pds = MLX4_NUM_PDS; 1021 dev->caps.num_mgms = 0; 1022 dev->caps.num_amgms = 0; 1023 1024 if (dev->caps.num_ports > MLX4_MAX_PORTS) { 1025 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 1026 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); 1027 return -ENODEV; 1028 } 1029 1030 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1031 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1032 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1033 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1034 1035 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || 1036 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { 1037 err = -ENOMEM; 1038 goto err_mem; 1039 } 1040 1041 for (i = 1; i <= dev->caps.num_ports; ++i) { 1042 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); 1043 if (err) { 1044 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" 1045 " port %d, aborting (%d).\n", i, err); 1046 goto err_mem; 1047 } 1048 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; 1049 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; 1050 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; 1051 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; 1052 dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; 1053 1054 dev->caps.port_mask[i] = dev->caps.port_type[i]; 1055 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, 1056 &dev->caps.gid_table_len[i], 1057 &dev->caps.pkey_table_len[i]); 1058 if (err) 1059 goto err_mem; 1060 } 1061 1062 if (dev->caps.uar_page_size * (dev->caps.num_uars - 1063 dev->caps.reserved_uars) > 1064 pci_resource_len(dev->pdev, 2)) { 1065 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " 1066 "PCI resource 2 size of 0x%llx, aborting.\n", 1067 dev->caps.uar_page_size * dev->caps.num_uars, 1068 (unsigned long long) pci_resource_len(dev->pdev, 2)); 1069 err = -ENOMEM; 1070 goto err_mem; 1071 } 1072 1073 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { 1074 dev->caps.eqe_size = 64; 1075 dev->caps.eqe_factor = 1; 1076 } else { 1077 dev->caps.eqe_size = 32; 1078 dev->caps.eqe_factor = 0; 1079 } 1080 1081 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { 1082 dev->caps.cqe_size = 64; 1083 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; 1084 } else { 1085 dev->caps.cqe_size = 32; 1086 } 1087 1088 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1089 mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); 1090 1091 slave_adjust_steering_mode(dev, &dev_cap, &hca_param); 1092 1093 return 0; 1094 1095err_mem: 1096 kfree(dev->caps.qp0_tunnel); 1097 kfree(dev->caps.qp0_proxy); 1098 kfree(dev->caps.qp1_tunnel); 1099 kfree(dev->caps.qp1_proxy); 1100 dev->caps.qp0_tunnel = dev->caps.qp0_proxy = 1101 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; 1102 1103 return err; 1104} 1105 1106static void mlx4_request_modules(struct mlx4_dev *dev) 1107{ 1108 int port; 1109 int has_ib_port = false; 1110 int has_eth_port = false; 1111#define EN_DRV_NAME "mlx4_en" 1112#define IB_DRV_NAME "mlx4_ib" 1113 1114 for (port = 1; port <= dev->caps.num_ports; port++) { 1115 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) 1116 has_ib_port = true; 1117 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) 1118 has_eth_port = true; 1119 } 1120 1121 if (has_ib_port) 1122 request_module_nowait(IB_DRV_NAME); 1123 if (has_eth_port) 1124 request_module_nowait(EN_DRV_NAME); 1125} 1126 1127/* 1128 * Change the port configuration of the device. 1129 * Every user of this function must hold the port mutex. 1130 */ 1131int mlx4_change_port_types(struct mlx4_dev *dev, 1132 enum mlx4_port_type *port_types) 1133{ 1134 int err = 0; 1135 int change = 0; 1136 int port; 1137 1138 for (port = 0; port < dev->caps.num_ports; port++) { 1139 /* Change the port type only if the new type is different 1140 * from the current, and not set to Auto */ 1141 if (port_types[port] != dev->caps.port_type[port + 1]) 1142 change = 1; 1143 } 1144 if (change) { 1145 mlx4_unregister_device(dev); 1146 for (port = 1; port <= dev->caps.num_ports; port++) { 1147 mlx4_CLOSE_PORT(dev, port); 1148 dev->caps.port_type[port] = port_types[port - 1]; 1149 err = mlx4_SET_PORT(dev, port, -1); 1150 if (err) { 1151 mlx4_err(dev, "Failed to set port %d, " 1152 "aborting\n", port); 1153 goto out; 1154 } 1155 } 1156 mlx4_set_port_mask(dev); 1157 err = mlx4_register_device(dev); 1158 if (err) { 1159 mlx4_err(dev, "Failed to register device\n"); 1160 goto out; 1161 } 1162 mlx4_request_modules(dev); 1163 } 1164 1165out: 1166 return err; 1167} 1168 1169static ssize_t show_port_type(struct device *dev, 1170 struct device_attribute *attr, 1171 char *buf) 1172{ 1173 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1174 port_attr); 1175 struct mlx4_dev *mdev = info->dev; 1176 char type[8]; 1177 1178 sprintf(type, "%s", 1179 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? 1180 "ib" : "eth"); 1181 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) 1182 sprintf(buf, "auto (%s)\n", type); 1183 else 1184 sprintf(buf, "%s\n", type); 1185 1186 return strlen(buf); 1187} 1188 1189static ssize_t set_port_type(struct device *dev, 1190 struct device_attribute *attr, 1191 const char *buf, size_t count) 1192{ 1193 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1194 port_attr); 1195 struct mlx4_dev *mdev = info->dev; 1196 struct mlx4_priv *priv = mlx4_priv(mdev); 1197 enum mlx4_port_type types[MLX4_MAX_PORTS]; 1198 enum mlx4_port_type new_types[MLX4_MAX_PORTS]; 1199 int i; 1200 int err = 0; 1201 1202 if (!strcmp(buf, "ib\n")) 1203 info->tmp_type = MLX4_PORT_TYPE_IB; 1204 else if (!strcmp(buf, "eth\n")) 1205 info->tmp_type = MLX4_PORT_TYPE_ETH; 1206 else if (!strcmp(buf, "auto\n")) 1207 info->tmp_type = MLX4_PORT_TYPE_AUTO; 1208 else { 1209 mlx4_err(mdev, "%s is not supported port type\n", buf); 1210 return -EINVAL; 1211 } 1212 1213 if ((info->tmp_type & mdev->caps.supported_type[info->port]) != 1214 info->tmp_type) { 1215 mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", 1216 info->port); 1217 return -EINVAL; 1218 } 1219 1220 mlx4_stop_sense(mdev); 1221 mutex_lock(&priv->port_mutex); 1222 /* Possible type is always the one that was delivered */ 1223 mdev->caps.possible_type[info->port] = info->tmp_type; 1224 1225 for (i = 0; i < mdev->caps.num_ports; i++) { 1226 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : 1227 mdev->caps.possible_type[i+1]; 1228 if (types[i] == MLX4_PORT_TYPE_AUTO) 1229 types[i] = mdev->caps.port_type[i+1]; 1230 } 1231 1232 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 1233 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { 1234 for (i = 1; i <= mdev->caps.num_ports; i++) { 1235 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { 1236 mdev->caps.possible_type[i] = mdev->caps.port_type[i]; 1237 err = -EINVAL; 1238 } 1239 } 1240 } 1241 if (err) { 1242 mlx4_err(mdev, "Auto sensing is not supported on this HCA. " 1243 "Set only 'eth' or 'ib' for both ports " 1244 "(should be the same)\n"); 1245 goto out; 1246 } 1247 1248 mlx4_do_sense_ports(mdev, new_types, types); 1249 1250 err = mlx4_check_port_params(mdev, new_types); 1251 if (err) 1252 goto out; 1253 1254 /* We are about to apply the changes after the configuration 1255 * was verified, no need to remember the temporary types 1256 * any more */ 1257 for (i = 0; i < mdev->caps.num_ports; i++) 1258 priv->port[i + 1].tmp_type = 0; 1259 1260 err = mlx4_change_port_types(mdev, new_types); 1261 1262out: 1263 mlx4_start_sense(mdev); 1264 mutex_unlock(&priv->port_mutex); 1265 return err ? err : count; 1266} 1267 1268enum ibta_mtu { 1269 IB_MTU_256 = 1, 1270 IB_MTU_512 = 2, 1271 IB_MTU_1024 = 3, 1272 IB_MTU_2048 = 4, 1273 IB_MTU_4096 = 5 1274}; 1275 1276static inline int int_to_ibta_mtu(int mtu) 1277{ 1278 switch (mtu) { 1279 case 256: return IB_MTU_256; 1280 case 512: return IB_MTU_512; 1281 case 1024: return IB_MTU_1024; 1282 case 2048: return IB_MTU_2048; 1283 case 4096: return IB_MTU_4096; 1284 default: return -1; 1285 } 1286} 1287 1288static inline int ibta_mtu_to_int(enum ibta_mtu mtu) 1289{ 1290 switch (mtu) { 1291 case IB_MTU_256: return 256; 1292 case IB_MTU_512: return 512; 1293 case IB_MTU_1024: return 1024; 1294 case IB_MTU_2048: return 2048; 1295 case IB_MTU_4096: return 4096; 1296 default: return -1; 1297 } 1298} 1299 1300static ssize_t 1301show_board(struct device *device, struct device_attribute *attr, 1302 char *buf) 1303{ 1304 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1305 board_attr); 1306 struct mlx4_dev *mdev = info->dev; 1307 1308 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 1309 mdev->board_id); 1310} 1311 1312static ssize_t 1313show_hca(struct device *device, struct device_attribute *attr, 1314 char *buf) 1315{ 1316 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1317 hca_attr); 1318 struct mlx4_dev *mdev = info->dev; 1319 1320 return sprintf(buf, "MT%d\n", mdev->pdev->device); 1321} 1322 1323static ssize_t 1324show_firmware_version(struct device *dev, 1325 struct device_attribute *attr, 1326 char *buf) 1327{ 1328 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1329 firmware_attr); 1330 struct mlx4_dev *mdev = info->dev; 1331 1332 return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32), 1333 (int)(mdev->caps.fw_ver >> 16) & 0xffff, 1334 (int)mdev->caps.fw_ver & 0xffff); 1335} 1336 1337static ssize_t show_port_ib_mtu(struct device *dev, 1338 struct device_attribute *attr, 1339 char *buf) 1340{ 1341 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1342 port_mtu_attr); 1343 struct mlx4_dev *mdev = info->dev; 1344 1345 /* When port type is eth, port mtu value isn't used. */ 1346 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) 1347 return -EINVAL; 1348 1349 sprintf(buf, "%d\n", 1350 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); 1351 return strlen(buf); 1352} 1353 1354static ssize_t set_port_ib_mtu(struct device *dev, 1355 struct device_attribute *attr, 1356 const char *buf, size_t count) 1357{ 1358 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1359 port_mtu_attr); 1360 struct mlx4_dev *mdev = info->dev; 1361 struct mlx4_priv *priv = mlx4_priv(mdev); 1362 int err, port, mtu, ibta_mtu = -1; 1363 1364 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { 1365 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 1366 return -EINVAL; 1367 } 1368 1369 mtu = (int) simple_strtol(buf, NULL, 0); 1370 ibta_mtu = int_to_ibta_mtu(mtu); 1371 1372 if (ibta_mtu < 0) { 1373 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); 1374 return -EINVAL; 1375 } 1376 1377 mdev->caps.port_ib_mtu[info->port] = ibta_mtu; 1378 1379 mlx4_stop_sense(mdev); 1380 mutex_lock(&priv->port_mutex); 1381 mlx4_unregister_device(mdev); 1382 for (port = 1; port <= mdev->caps.num_ports; port++) { 1383 mlx4_CLOSE_PORT(mdev, port); 1384 err = mlx4_SET_PORT(mdev, port, -1); 1385 if (err) { 1386 mlx4_err(mdev, "Failed to set port %d, " 1387 "aborting\n", port); 1388 goto err_set_port; 1389 } 1390 } 1391 err = mlx4_register_device(mdev); 1392err_set_port: 1393 mutex_unlock(&priv->port_mutex); 1394 mlx4_start_sense(mdev); 1395 return err ? err : count; 1396} 1397 1398static int mlx4_load_fw(struct mlx4_dev *dev) 1399{ 1400 struct mlx4_priv *priv = mlx4_priv(dev); 1401 int err, unmap_flag = 0; 1402 1403 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 1404 GFP_HIGHUSER | __GFP_NOWARN, 0); 1405 if (!priv->fw.fw_icm) { 1406 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); 1407 return -ENOMEM; 1408 } 1409 1410 err = mlx4_MAP_FA(dev, priv->fw.fw_icm); 1411 if (err) { 1412 mlx4_err(dev, "MAP_FA command failed, aborting.\n"); 1413 goto err_free; 1414 } 1415 1416 err = mlx4_RUN_FW(dev); 1417 if (err) { 1418 mlx4_err(dev, "RUN_FW command failed, aborting.\n"); 1419 goto err_unmap_fa; 1420 } 1421 1422 return 0; 1423 1424err_unmap_fa: 1425 unmap_flag = mlx4_UNMAP_FA(dev); 1426 if (unmap_flag) 1427 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1428 1429err_free: 1430 if (!unmap_flag) 1431 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1432 return err; 1433} 1434 1435static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, 1436 int cmpt_entry_sz) 1437{ 1438 struct mlx4_priv *priv = mlx4_priv(dev); 1439 int err; 1440 int num_eqs; 1441 1442 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, 1443 cmpt_base + 1444 ((u64) (MLX4_CMPT_TYPE_QP * 1445 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1446 cmpt_entry_sz, dev->caps.num_qps, 1447 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1448 0, 0); 1449 if (err) 1450 goto err; 1451 1452 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, 1453 cmpt_base + 1454 ((u64) (MLX4_CMPT_TYPE_SRQ * 1455 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1456 cmpt_entry_sz, dev->caps.num_srqs, 1457 dev->caps.reserved_srqs, 0, 0); 1458 if (err) 1459 goto err_qp; 1460 1461 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, 1462 cmpt_base + 1463 ((u64) (MLX4_CMPT_TYPE_CQ * 1464 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1465 cmpt_entry_sz, dev->caps.num_cqs, 1466 dev->caps.reserved_cqs, 0, 0); 1467 if (err) 1468 goto err_srq; 1469 1470 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1471 dev->caps.num_eqs; 1472 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1473 cmpt_base + 1474 ((u64) (MLX4_CMPT_TYPE_EQ * 1475 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1476 cmpt_entry_sz, num_eqs, num_eqs, 0, 0); 1477 if (err) 1478 goto err_cq; 1479 1480 return 0; 1481 1482err_cq: 1483 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1484 1485err_srq: 1486 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1487 1488err_qp: 1489 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1490 1491err: 1492 return err; 1493} 1494 1495static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 1496 struct mlx4_init_hca_param *init_hca, u64 icm_size) 1497{ 1498 struct mlx4_priv *priv = mlx4_priv(dev); 1499 u64 aux_pages; 1500 int num_eqs; 1501 int err, unmap_flag = 0; 1502 1503 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); 1504 if (err) { 1505 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); 1506 return err; 1507 } 1508 1509 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", 1510 (unsigned long long) icm_size >> 10, 1511 (unsigned long long) aux_pages << 2); 1512 1513 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 1514 GFP_HIGHUSER | __GFP_NOWARN, 0); 1515 if (!priv->fw.aux_icm) { 1516 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); 1517 return -ENOMEM; 1518 } 1519 1520 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); 1521 if (err) { 1522 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); 1523 goto err_free_aux; 1524 } 1525 1526 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); 1527 if (err) { 1528 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); 1529 goto err_unmap_aux; 1530 } 1531 1532 1533 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1534 dev->caps.num_eqs; 1535 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1536 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1537 num_eqs, num_eqs, 0, 0); 1538 if (err) { 1539 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); 1540 goto err_unmap_cmpt; 1541 } 1542 1543 /* 1544 * Reserved MTT entries must be aligned up to a cacheline 1545 * boundary, since the FW will write to them, while the driver 1546 * writes to all other MTT entries. (The variable 1547 * dev->caps.mtt_entry_sz below is really the MTT segment 1548 * size, not the raw entry size) 1549 */ 1550 dev->caps.reserved_mtts = 1551 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, 1552 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; 1553 1554 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 1555 init_hca->mtt_base, 1556 dev->caps.mtt_entry_sz, 1557 dev->caps.num_mtts, 1558 dev->caps.reserved_mtts, 1, 0); 1559 if (err) { 1560 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); 1561 goto err_unmap_eq; 1562 } 1563 1564 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, 1565 init_hca->dmpt_base, 1566 dev_cap->dmpt_entry_sz, 1567 dev->caps.num_mpts, 1568 dev->caps.reserved_mrws, 1, 1); 1569 if (err) { 1570 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); 1571 goto err_unmap_mtt; 1572 } 1573 1574 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, 1575 init_hca->qpc_base, 1576 dev_cap->qpc_entry_sz, 1577 dev->caps.num_qps, 1578 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1579 0, 0); 1580 if (err) { 1581 mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); 1582 goto err_unmap_dmpt; 1583 } 1584 1585 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, 1586 init_hca->auxc_base, 1587 dev_cap->aux_entry_sz, 1588 dev->caps.num_qps, 1589 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1590 0, 0); 1591 if (err) { 1592 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); 1593 goto err_unmap_qp; 1594 } 1595 1596 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, 1597 init_hca->altc_base, 1598 dev_cap->altc_entry_sz, 1599 dev->caps.num_qps, 1600 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1601 0, 0); 1602 if (err) { 1603 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); 1604 goto err_unmap_auxc; 1605 } 1606 1607 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, 1608 init_hca->rdmarc_base, 1609 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 1610 dev->caps.num_qps, 1611 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1612 0, 0); 1613 if (err) { 1614 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 1615 goto err_unmap_altc; 1616 } 1617 1618 err = mlx4_init_icm_table(dev, &priv->cq_table.table, 1619 init_hca->cqc_base, 1620 dev_cap->cqc_entry_sz, 1621 dev->caps.num_cqs, 1622 dev->caps.reserved_cqs, 0, 0); 1623 if (err) { 1624 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); 1625 goto err_unmap_rdmarc; 1626 } 1627 1628 err = mlx4_init_icm_table(dev, &priv->srq_table.table, 1629 init_hca->srqc_base, 1630 dev_cap->srq_entry_sz, 1631 dev->caps.num_srqs, 1632 dev->caps.reserved_srqs, 0, 0); 1633 if (err) { 1634 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); 1635 goto err_unmap_cq; 1636 } 1637 1638 /* 1639 * For flow steering device managed mode it is required to use 1640 * mlx4_init_icm_table. For B0 steering mode it's not strictly 1641 * required, but for simplicity just map the whole multicast 1642 * group table now. The table isn't very big and it's a lot 1643 * easier than trying to track ref counts. 1644 */ 1645 err = mlx4_init_icm_table(dev, &priv->mcg_table.table, 1646 init_hca->mc_base, 1647 mlx4_get_mgm_entry_size(dev), 1648 dev->caps.num_mgms + dev->caps.num_amgms, 1649 dev->caps.num_mgms + dev->caps.num_amgms, 1650 0, 0); 1651 if (err) { 1652 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); 1653 goto err_unmap_srq; 1654 } 1655 1656 return 0; 1657 1658err_unmap_srq: 1659 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1660 1661err_unmap_cq: 1662 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1663 1664err_unmap_rdmarc: 1665 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1666 1667err_unmap_altc: 1668 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1669 1670err_unmap_auxc: 1671 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1672 1673err_unmap_qp: 1674 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1675 1676err_unmap_dmpt: 1677 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1678 1679err_unmap_mtt: 1680 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1681 1682err_unmap_eq: 1683 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1684 1685err_unmap_cmpt: 1686 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1687 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1688 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1689 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1690 1691err_unmap_aux: 1692 unmap_flag = mlx4_UNMAP_ICM_AUX(dev); 1693 if (unmap_flag) 1694 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1695 1696err_free_aux: 1697 if (!unmap_flag) 1698 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1699 1700 return err; 1701} 1702 1703static void mlx4_free_icms(struct mlx4_dev *dev) 1704{ 1705 struct mlx4_priv *priv = mlx4_priv(dev); 1706 1707 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); 1708 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1709 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1710 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1711 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1712 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1713 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1714 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1715 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1716 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1717 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1718 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1719 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1720 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1721 1722 if (!mlx4_UNMAP_ICM_AUX(dev)) 1723 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1724 else 1725 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1726} 1727 1728static void mlx4_slave_exit(struct mlx4_dev *dev) 1729{ 1730 struct mlx4_priv *priv = mlx4_priv(dev); 1731 1732 mutex_lock(&priv->cmd.slave_cmd_mutex); 1733 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) 1734 mlx4_warn(dev, "Failed to close slave function.\n"); 1735 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1736} 1737 1738static int map_bf_area(struct mlx4_dev *dev) 1739{ 1740 struct mlx4_priv *priv = mlx4_priv(dev); 1741 resource_size_t bf_start; 1742 resource_size_t bf_len; 1743 int err = 0; 1744 1745 if (!dev->caps.bf_reg_size) 1746 return -ENXIO; 1747 1748 bf_start = pci_resource_start(dev->pdev, 2) + 1749 (dev->caps.num_uars << PAGE_SHIFT); 1750 bf_len = pci_resource_len(dev->pdev, 2) - 1751 (dev->caps.num_uars << PAGE_SHIFT); 1752 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); 1753 if (!priv->bf_mapping) 1754 err = -ENOMEM; 1755 1756 return err; 1757} 1758 1759static void unmap_bf_area(struct mlx4_dev *dev) 1760{ 1761 if (mlx4_priv(dev)->bf_mapping) 1762 io_mapping_free(mlx4_priv(dev)->bf_mapping); 1763} 1764 1765int mlx4_read_clock(struct mlx4_dev *dev) 1766{ 1767 u32 clockhi, clocklo, clockhi1; 1768 cycle_t cycles; 1769 int i; 1770 struct mlx4_priv *priv = mlx4_priv(dev); 1771 1772 if (!priv->clock_mapping) 1773 return -ENOTSUPP; 1774 1775 for (i = 0; i < 10; i++) { 1776 clockhi = swab32(readl(priv->clock_mapping)); 1777 clocklo = swab32(readl(priv->clock_mapping + 4)); 1778 clockhi1 = swab32(readl(priv->clock_mapping)); 1779 if (clockhi == clockhi1) 1780 break; 1781 } 1782 1783 cycles = (u64) clockhi << 32 | (u64) clocklo; 1784 1785 return cycles; 1786} 1787EXPORT_SYMBOL_GPL(mlx4_read_clock); 1788 1789 1790static int map_internal_clock(struct mlx4_dev *dev) 1791{ 1792 struct mlx4_priv *priv = mlx4_priv(dev); 1793 1794 priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, 1795 priv->fw.clock_bar) + 1796 priv->fw.clock_offset, MLX4_CLOCK_SIZE); 1797 1798 if (!priv->clock_mapping) 1799 return -ENOMEM; 1800 1801 return 0; 1802} 1803 1804 1805int mlx4_get_internal_clock_params(struct mlx4_dev *dev, 1806 struct mlx4_clock_params *params) 1807{ 1808 struct mlx4_priv *priv = mlx4_priv(dev); 1809 1810 if (mlx4_is_slave(dev)) 1811 return -ENOTSUPP; 1812 if (!params) 1813 return -EINVAL; 1814 1815 params->bar = priv->fw.clock_bar; 1816 params->offset = priv->fw.clock_offset; 1817 params->size = MLX4_CLOCK_SIZE; 1818 1819 return 0; 1820} 1821EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); 1822 1823static void unmap_internal_clock(struct mlx4_dev *dev) 1824{ 1825 struct mlx4_priv *priv = mlx4_priv(dev); 1826 1827 if (priv->clock_mapping) 1828 iounmap(priv->clock_mapping); 1829} 1830 1831static void mlx4_close_hca(struct mlx4_dev *dev) 1832{ 1833 unmap_internal_clock(dev); 1834 unmap_bf_area(dev); 1835 if (mlx4_is_slave(dev)) { 1836 mlx4_slave_exit(dev); 1837 } else { 1838 mlx4_CLOSE_HCA(dev, 0); 1839 mlx4_free_icms(dev); 1840 1841 if (!mlx4_UNMAP_FA(dev)) 1842 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); 1843 else 1844 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1845 } 1846} 1847 1848static int mlx4_init_slave(struct mlx4_dev *dev) 1849{ 1850 struct mlx4_priv *priv = mlx4_priv(dev); 1851 u64 dma = (u64) priv->mfunc.vhcr_dma; 1852 int num_of_reset_retries = NUM_OF_RESET_RETRIES; 1853 int ret_from_reset = 0; 1854 u32 slave_read; 1855 u32 cmd_channel_ver; 1856 1857 mutex_lock(&priv->cmd.slave_cmd_mutex); 1858 priv->cmd.max_cmds = 1; 1859 mlx4_warn(dev, "Sending reset\n"); 1860 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1861 MLX4_COMM_TIME); 1862 /* if we are in the middle of flr the slave will try 1863 * NUM_OF_RESET_RETRIES times before leaving.*/ 1864 if (ret_from_reset) { 1865 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { 1866 msleep(SLEEP_TIME_IN_RESET); 1867 while (ret_from_reset && num_of_reset_retries) { 1868 mlx4_warn(dev, "slave is currently in the" 1869 "middle of FLR. retrying..." 1870 "(try num:%d)\n", 1871 (NUM_OF_RESET_RETRIES - 1872 num_of_reset_retries + 1)); 1873 ret_from_reset = 1874 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 1875 0, MLX4_COMM_TIME); 1876 num_of_reset_retries = num_of_reset_retries - 1; 1877 } 1878 } else 1879 goto err; 1880 } 1881 1882 /* check the driver version - the slave I/F revision 1883 * must match the master's */ 1884 slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); 1885 cmd_channel_ver = mlx4_comm_get_version(); 1886 1887 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != 1888 MLX4_COMM_GET_IF_REV(slave_read)) { 1889 mlx4_err(dev, "slave driver version is not supported" 1890 " by the master\n"); 1891 goto err; 1892 } 1893 1894 mlx4_warn(dev, "Sending vhcr0\n"); 1895 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, 1896 MLX4_COMM_TIME)) 1897 goto err; 1898 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, 1899 MLX4_COMM_TIME)) 1900 goto err; 1901 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, 1902 MLX4_COMM_TIME)) 1903 goto err; 1904 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) 1905 goto err; 1906 1907 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1908 return 0; 1909 1910err: 1911 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); 1912 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1913 return -EIO; 1914} 1915 1916static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) 1917{ 1918 int i; 1919 1920 for (i = 1; i <= dev->caps.num_ports; i++) { 1921 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) 1922 dev->caps.gid_table_len[i] = 1923 mlx4_get_slave_num_gids(dev, 0); 1924 else 1925 dev->caps.gid_table_len[i] = 1; 1926 dev->caps.pkey_table_len[i] = 1927 dev->phys_caps.pkey_phys_table_len[i] - 1; 1928 } 1929} 1930 1931static int choose_log_fs_mgm_entry_size(int qp_per_entry) 1932{ 1933 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; 1934 1935 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; 1936 i++) { 1937 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) 1938 break; 1939 } 1940 1941 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; 1942} 1943 1944static void choose_steering_mode(struct mlx4_dev *dev, 1945 struct mlx4_dev_cap *dev_cap) 1946{ 1947 int nvfs; 1948 1949 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); 1950 if (high_rate_steer && !mlx4_is_mfunc(dev)) { 1951 dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | 1952 MLX4_DEV_CAP_FLAG_VEP_UC_STEER); 1953 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; 1954 } 1955 1956 if (mlx4_log_num_mgm_entry_size == -1 && 1957 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && 1958 (!mlx4_is_mfunc(dev) || 1959 (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && 1960 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= 1961 MLX4_MIN_MGM_LOG_ENTRY_SIZE) { 1962 dev->oper_log_mgm_entry_size = 1963 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); 1964 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; 1965 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 1966 } else { 1967 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && 1968 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1969 dev->caps.steering_mode = MLX4_STEERING_MODE_B0; 1970 else { 1971 dev->caps.steering_mode = MLX4_STEERING_MODE_A0; 1972 1973 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || 1974 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1975 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " 1976 "set to use B0 steering. Falling back to A0 steering mode.\n"); 1977 } 1978 dev->oper_log_mgm_entry_size = 1979 mlx4_log_num_mgm_entry_size > 0 ? 1980 mlx4_log_num_mgm_entry_size : 1981 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 1982 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); 1983 } 1984 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " 1985 "log_num_mgm_entry_size = %d\n", 1986 mlx4_steering_mode_str(dev->caps.steering_mode), 1987 dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); 1988} 1989 1990static int mlx4_init_hca(struct mlx4_dev *dev) 1991{ 1992 struct mlx4_priv *priv = mlx4_priv(dev); 1993 struct mlx4_dev_cap *dev_cap = NULL; 1994 struct mlx4_adapter adapter; 1995 struct mlx4_mod_stat_cfg mlx4_cfg; 1996 struct mlx4_profile profile; 1997 struct mlx4_init_hca_param init_hca; 1998 u64 icm_size; 1999 int err; 2000 2001 if (!mlx4_is_slave(dev)) { 2002 err = mlx4_QUERY_FW(dev); 2003 if (err) { 2004 if (err == -EACCES) 2005 mlx4_info(dev, "non-primary physical function, skipping.\n"); 2006 else 2007 mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); 2008 return err; 2009 } 2010 2011 err = mlx4_load_fw(dev); 2012 if (err) { 2013 mlx4_err(dev, "Failed to start FW, aborting.\n"); 2014 return err; 2015 } 2016 2017 mlx4_cfg.log_pg_sz_m = 1; 2018 mlx4_cfg.log_pg_sz = 0; 2019 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); 2020 if (err) 2021 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); 2022 2023 dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); 2024 if (!dev_cap) { 2025 mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); 2026 err = -ENOMEM; 2027 goto err_stop_fw; 2028 } 2029 2030 err = mlx4_dev_cap(dev, dev_cap); 2031 if (err) { 2032 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 2033 goto err_stop_fw; 2034 } 2035 2036 choose_steering_mode(dev, dev_cap); 2037 2038 if (mlx4_is_master(dev)) 2039 mlx4_parav_master_pf_caps(dev); 2040 2041 process_mod_param_profile(&profile); 2042 if (dev->caps.steering_mode == 2043 MLX4_STEERING_MODE_DEVICE_MANAGED) 2044 profile.num_mcg = MLX4_FS_NUM_MCG; 2045 2046 icm_size = mlx4_make_profile(dev, &profile, dev_cap, 2047 &init_hca); 2048 if ((long long) icm_size < 0) { 2049 err = icm_size; 2050 goto err_stop_fw; 2051 } 2052 2053 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; 2054 2055 init_hca.log_uar_sz = ilog2(dev->caps.num_uars); 2056 init_hca.uar_page_sz = PAGE_SHIFT - 12; 2057 2058 err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); 2059 if (err) 2060 goto err_stop_fw; 2061 2062 init_hca.mw_enable = 1; 2063 2064 err = mlx4_INIT_HCA(dev, &init_hca); 2065 if (err) { 2066 mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); 2067 goto err_free_icm; 2068 } 2069 2070 /* 2071 * Read HCA frequency by QUERY_HCA command 2072 */ 2073 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { 2074 memset(&init_hca, 0, sizeof(init_hca)); 2075 err = mlx4_QUERY_HCA(dev, &init_hca); 2076 if (err) { 2077 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); 2078 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2079 } else { 2080 dev->caps.hca_core_clock = 2081 init_hca.hca_core_clock; 2082 } 2083 2084 /* In case we got HCA frequency 0 - disable timestamping 2085 * to avoid dividing by zero 2086 */ 2087 if (!dev->caps.hca_core_clock) { 2088 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2089 mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); 2090 } else if (map_internal_clock(dev)) { 2091 /* Map internal clock, 2092 * in case of failure disable timestamping 2093 */ 2094 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2095 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); 2096 } 2097 } 2098 } else { 2099 err = mlx4_init_slave(dev); 2100 if (err) { 2101 mlx4_err(dev, "Failed to initialize slave\n"); 2102 return err; 2103 } 2104 2105 err = mlx4_slave_cap(dev); 2106 if (err) { 2107 mlx4_err(dev, "Failed to obtain slave caps\n"); 2108 goto err_close; 2109 } 2110 } 2111 2112 if (map_bf_area(dev)) 2113 mlx4_dbg(dev, "Failed to map blue flame area\n"); 2114 2115 /* Only the master set the ports, all the rest got it from it.*/ 2116 if (!mlx4_is_slave(dev)) 2117 mlx4_set_port_mask(dev); 2118 2119 err = mlx4_QUERY_ADAPTER(dev, &adapter); 2120 if (err) { 2121 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); 2122 goto unmap_bf; 2123 } 2124 2125 priv->eq_table.inta_pin = adapter.inta_pin; 2126 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); 2127 memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); 2128 dev->vsd_vendor_id = adapter.vsd_vendor_id; 2129 2130 if (!mlx4_is_slave(dev)) 2131 kfree(dev_cap); 2132 2133 return 0; 2134 2135unmap_bf: 2136 if (!mlx4_is_slave(dev)) 2137 unmap_internal_clock(dev); 2138 unmap_bf_area(dev); 2139 2140 if (mlx4_is_slave(dev)) { 2141 kfree(dev->caps.qp0_tunnel); 2142 kfree(dev->caps.qp0_proxy); 2143 kfree(dev->caps.qp1_tunnel); 2144 kfree(dev->caps.qp1_proxy); 2145 } 2146 2147err_close: 2148 if (mlx4_is_slave(dev)) 2149 mlx4_slave_exit(dev); 2150 else 2151 mlx4_CLOSE_HCA(dev, 0); 2152 2153err_free_icm: 2154 if (!mlx4_is_slave(dev)) 2155 mlx4_free_icms(dev); 2156 2157err_stop_fw: 2158 if (!mlx4_is_slave(dev)) { 2159 if (!mlx4_UNMAP_FA(dev)) 2160 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 2161 else 2162 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 2163 kfree(dev_cap); 2164 } 2165 return err; 2166} 2167 2168static int mlx4_init_counters_table(struct mlx4_dev *dev) 2169{ 2170 struct mlx4_priv *priv = mlx4_priv(dev); 2171 int nent_pow2, port_indx, vf_index, num_counters; 2172 int res, index = 0; 2173 struct counter_index *new_counter_index; 2174 2175 2176 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2177 return -ENOENT; 2178 2179 if (!mlx4_is_slave(dev) && 2180 dev->caps.max_counters == dev->caps.max_extended_counters) { 2181 res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, 2182 MLX4_CMD_SET_IF_STAT, 2183 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 2184 if (res) { 2185 mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); 2186 return res; 2187 } 2188 } 2189 2190 mutex_init(&priv->counters_table.mutex); 2191 2192 if (mlx4_is_slave(dev)) { 2193 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2194 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2195 if (dev->caps.def_counter_index[port_indx] != 0xFF) { 2196 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2197 if (!new_counter_index) 2198 return -ENOMEM; 2199 new_counter_index->index = dev->caps.def_counter_index[port_indx]; 2200 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); 2201 } 2202 } 2203 mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", 2204 __func__, dev->caps.num_ports, dev->caps.num_ports); 2205 return 0; 2206 } 2207 2208 nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); 2209 2210 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2211 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2212 /* allocating 2 counters per port for PFs */ 2213 /* For the PF, the ETH default counters are 0,2; */ 2214 /* and the RoCE default counters are 1,3 */ 2215 for (num_counters = 0; num_counters < 2; num_counters++, index++) { 2216 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2217 if (!new_counter_index) 2218 return -ENOMEM; 2219 new_counter_index->index = index; 2220 list_add_tail(&new_counter_index->list, 2221 &priv->counters_table.global_port_list[port_indx]); 2222 } 2223 } 2224 2225 if (mlx4_is_master(dev)) { 2226 for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { 2227 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2228 INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); 2229 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2230 if (!new_counter_index) 2231 return -ENOMEM; 2232 if (index < nent_pow2 - 2) { 2233 new_counter_index->index = index; 2234 index++; 2235 } else { 2236 new_counter_index->index = MLX4_SINK_COUNTER_INDEX; 2237 } 2238 2239 list_add_tail(&new_counter_index->list, 2240 &priv->counters_table.vf_list[vf_index][port_indx]); 2241 } 2242 } 2243 2244 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2245 nent_pow2, nent_pow2 - 1, 2246 index, 1); 2247 mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", 2248 __func__, index, dev->num_vfs); 2249 } else { 2250 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2251 nent_pow2, nent_pow2 - 1, 2252 index, 1); 2253 mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", 2254 __func__, index, dev->caps.num_ports); 2255 } 2256 2257 return 0; 2258 2259} 2260 2261static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) 2262{ 2263 struct mlx4_priv *priv = mlx4_priv(dev); 2264 int i, j; 2265 struct counter_index *port, *tmp_port; 2266 struct counter_index *vf, *tmp_vf; 2267 2268 mutex_lock(&priv->counters_table.mutex); 2269 2270 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { 2271 for (i = 0; i < dev->caps.num_ports; i++) { 2272 list_for_each_entry_safe(port, tmp_port, 2273 &priv->counters_table.global_port_list[i], 2274 list) { 2275 list_del(&port->list); 2276 kfree(port); 2277 } 2278 } 2279 if (!mlx4_is_slave(dev)) { 2280 for (i = 0; i < dev->num_vfs; i++) { 2281 for (j = 0; j < dev->caps.num_ports; j++) { 2282 list_for_each_entry_safe(vf, tmp_vf, 2283 &priv->counters_table.vf_list[i][j], 2284 list) { 2285 /* clear the counter statistic */ 2286 if (__mlx4_clear_if_stat(dev, vf->index)) 2287 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2288 __func__, vf->index); 2289 list_del(&vf->list); 2290 kfree(vf); 2291 } 2292 } 2293 } 2294 mlx4_bitmap_cleanup(&priv->counters_table.bitmap); 2295 } 2296 } 2297 mutex_unlock(&priv->counters_table.mutex); 2298} 2299 2300int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) 2301{ 2302 struct mlx4_priv *priv = mlx4_priv(dev); 2303 int i, first; 2304 struct counter_index *vf, *tmp_vf; 2305 2306 /* clean VF's counters for the next useg */ 2307 if (slave > 0 && slave <= dev->num_vfs) { 2308 mlx4_dbg(dev, "%s: free counters of slave(%d)\n" 2309 , __func__, slave); 2310 2311 mutex_lock(&priv->counters_table.mutex); 2312 for (i = 0; i < dev->caps.num_ports; i++) { 2313 first = 0; 2314 list_for_each_entry_safe(vf, tmp_vf, 2315 &priv->counters_table.vf_list[slave - 1][i], 2316 list) { 2317 /* clear the counter statistic */ 2318 if (__mlx4_clear_if_stat(dev, vf->index)) 2319 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2320 __func__, vf->index); 2321 if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { 2322 mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" 2323 , __func__, vf->index, slave, i + 1); 2324 mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); 2325 list_del(&vf->list); 2326 kfree(vf); 2327 } else { 2328 mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" 2329 , __func__, vf->index, slave, i + 1); 2330 } 2331 } 2332 } 2333 mutex_unlock(&priv->counters_table.mutex); 2334 } 2335 2336 return 0; 2337} 2338 2339int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) 2340{ 2341 struct mlx4_priv *priv = mlx4_priv(dev); 2342 struct counter_index *new_counter_index; 2343 2344 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2345 return -ENOENT; 2346 2347 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2348 (port < 0) || (port > MLX4_MAX_PORTS)) { 2349 mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", 2350 __func__, slave, port); 2351 return -EINVAL; 2352 } 2353 2354 /* handle old guest request does not support request by port index */ 2355 if (port == 0) { 2356 *idx = MLX4_SINK_COUNTER_INDEX; 2357 mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" 2358 , __func__, *idx, slave, port); 2359 return 0; 2360 } 2361 2362 mutex_lock(&priv->counters_table.mutex); 2363 2364 *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); 2365 /* if no resources return the default counter of the slave and port */ 2366 if (*idx == -1) { 2367 if (slave == 0) { /* its the ethernet counter ?????? */ 2368 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2369 struct counter_index, 2370 list); 2371 } else { 2372 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2373 struct counter_index, 2374 list); 2375 } 2376 2377 *idx = new_counter_index->index; 2378 mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" 2379 , __func__, *idx, slave, port); 2380 goto out; 2381 } 2382 2383 if (slave == 0) { /* native or master */ 2384 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2385 if (!new_counter_index) 2386 goto no_mem; 2387 new_counter_index->index = *idx; 2388 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2389 } else { 2390 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2391 if (!new_counter_index) 2392 goto no_mem; 2393 new_counter_index->index = *idx; 2394 list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); 2395 } 2396 2397 mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" 2398 , __func__, *idx, slave, port); 2399out: 2400 mutex_unlock(&priv->counters_table.mutex); 2401 return 0; 2402 2403no_mem: 2404 mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); 2405 mutex_unlock(&priv->counters_table.mutex); 2406 *idx = MLX4_SINK_COUNTER_INDEX; 2407 mlx4_dbg(dev, "%s: failed err (%d)\n" 2408 , __func__, -ENOMEM); 2409 return -ENOMEM; 2410} 2411 2412int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) 2413{ 2414 u64 out_param; 2415 int err; 2416 struct mlx4_priv *priv = mlx4_priv(dev); 2417 struct counter_index *new_counter_index, *c_index; 2418 2419 if (mlx4_is_mfunc(dev)) { 2420 err = mlx4_cmd_imm(dev, 0, &out_param, 2421 ((u32) port) << 8 | (u32) RES_COUNTER, 2422 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, 2423 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 2424 if (!err) { 2425 *idx = get_param_l(&out_param); 2426 if (*idx == MLX4_SINK_COUNTER_INDEX) 2427 return -ENOSPC; 2428 2429 mutex_lock(&priv->counters_table.mutex); 2430 c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2431 struct counter_index, 2432 list); 2433 mutex_unlock(&priv->counters_table.mutex); 2434 if (c_index->index == *idx) 2435 return -EEXIST; 2436 2437 if (mlx4_is_slave(dev)) { 2438 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2439 if (!new_counter_index) { 2440 mlx4_counter_free(dev, port, *idx); 2441 return -ENOMEM; 2442 } 2443 new_counter_index->index = *idx; 2444 mutex_lock(&priv->counters_table.mutex); 2445 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2446 mutex_unlock(&priv->counters_table.mutex); 2447 mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" 2448 , __func__, *idx, port); 2449 } 2450 } 2451 return err; 2452 } 2453 return __mlx4_counter_alloc(dev, 0, port, idx); 2454} 2455EXPORT_SYMBOL_GPL(mlx4_counter_alloc); 2456 2457void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) 2458{ 2459 /* check if native or slave and deletes acordingly */ 2460 struct mlx4_priv *priv = mlx4_priv(dev); 2461 struct counter_index *pf, *tmp_pf; 2462 struct counter_index *vf, *tmp_vf; 2463 int first; 2464 2465 2466 if (idx == MLX4_SINK_COUNTER_INDEX) { 2467 mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" 2468 , __func__, idx, port); 2469 return; 2470 } 2471 2472 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2473 (port < 0) || (port > MLX4_MAX_PORTS)) { 2474 mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" 2475 , __func__, slave, idx); 2476 return; 2477 } 2478 2479 mutex_lock(&priv->counters_table.mutex); 2480 if (slave == 0) { 2481 first = 0; 2482 list_for_each_entry_safe(pf, tmp_pf, 2483 &priv->counters_table.global_port_list[port - 1], 2484 list) { 2485 /* the first 2 counters are reserved */ 2486 if (pf->index == idx) { 2487 /* clear the counter statistic */ 2488 if (__mlx4_clear_if_stat(dev, pf->index)) 2489 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2490 __func__, pf->index); 2491 if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { 2492 list_del(&pf->list); 2493 kfree(pf); 2494 mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" 2495 , __func__, idx, slave, port); 2496 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2497 goto out; 2498 } else { 2499 mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" 2500 , __func__, idx, slave, port); 2501 goto out; 2502 } 2503 } 2504 first++; 2505 } 2506 mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" 2507 , __func__, idx, slave, port); 2508 } else { 2509 first = 0; 2510 list_for_each_entry_safe(vf, tmp_vf, 2511 &priv->counters_table.vf_list[slave - 1][port - 1], 2512 list) { 2513 /* the first element is reserved */ 2514 if (vf->index == idx) { 2515 /* clear the counter statistic */ 2516 if (__mlx4_clear_if_stat(dev, vf->index)) 2517 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2518 __func__, vf->index); 2519 if (first) { 2520 list_del(&vf->list); 2521 kfree(vf); 2522 mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", 2523 __func__, idx, slave, port); 2524 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2525 goto out; 2526 } else { 2527 mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" 2528 , __func__, slave, idx, port); 2529 goto out; 2530 } 2531 } 2532 first++; 2533 } 2534 mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" 2535 , __func__, slave, idx, port); 2536 } 2537 2538out: 2539 mutex_unlock(&priv->counters_table.mutex); 2540} 2541 2542void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) 2543{ 2544 u64 in_param = 0; 2545 struct mlx4_priv *priv = mlx4_priv(dev); 2546 struct counter_index *counter, *tmp_counter; 2547 int first = 0; 2548 2549 if (mlx4_is_mfunc(dev)) { 2550 set_param_l(&in_param, idx); 2551 mlx4_cmd(dev, in_param, 2552 ((u32) port) << 8 | (u32) RES_COUNTER, 2553 RES_OP_RESERVE, 2554 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, 2555 MLX4_CMD_WRAPPED); 2556 2557 if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { 2558 mutex_lock(&priv->counters_table.mutex); 2559 list_for_each_entry_safe(counter, tmp_counter, 2560 &priv->counters_table.global_port_list[port - 1], 2561 list) { 2562 if (counter->index == idx && first++) { 2563 list_del(&counter->list); 2564 kfree(counter); 2565 mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" 2566 , __func__, idx, port); 2567 mutex_unlock(&priv->counters_table.mutex); 2568 return; 2569 } 2570 } 2571 mutex_unlock(&priv->counters_table.mutex); 2572 } 2573 2574 return; 2575 } 2576 __mlx4_counter_free(dev, 0, port, idx); 2577} 2578EXPORT_SYMBOL_GPL(mlx4_counter_free); 2579 2580int __mlx4_clear_if_stat(struct mlx4_dev *dev, 2581 u8 counter_index) 2582{ 2583 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2584 int err = 0; 2585 u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); 2586 2587 if (counter_index == MLX4_SINK_COUNTER_INDEX) 2588 return -EINVAL; 2589 2590 if (mlx4_is_slave(dev)) 2591 return 0; 2592 2593 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2594 if (IS_ERR(if_stat_mailbox)) { 2595 err = PTR_ERR(if_stat_mailbox); 2596 return err; 2597 } 2598 2599 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, 2600 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, 2601 MLX4_CMD_NATIVE); 2602 2603 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2604 return err; 2605} 2606 2607u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) 2608{ 2609 struct mlx4_priv *priv = mlx4_priv(dev); 2610 struct counter_index *new_counter_index; 2611 2612 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { 2613 mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", 2614 __func__, MLX4_SINK_COUNTER_INDEX, slave, port); 2615 return (u8)MLX4_SINK_COUNTER_INDEX; 2616 } 2617 2618 mutex_lock(&priv->counters_table.mutex); 2619 if (slave == 0) { 2620 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2621 struct counter_index, 2622 list); 2623 } else { 2624 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2625 struct counter_index, 2626 list); 2627 } 2628 mutex_unlock(&priv->counters_table.mutex); 2629 2630 mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", 2631 __func__, new_counter_index->index, slave, port); 2632 2633 2634 return (u8)new_counter_index->index; 2635} 2636 2637int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, 2638 struct mlx4_en_vport_stats *vport_stats, 2639 int reset) 2640{ 2641 struct mlx4_priv *priv = mlx4_priv(dev); 2642 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2643 union mlx4_counter *counter; 2644 int err = 0; 2645 u32 if_stat_in_mod; 2646 struct counter_index *vport, *tmp_vport; 2647 2648 if (!vport_stats) 2649 return -EINVAL; 2650 2651 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2652 if (IS_ERR(if_stat_mailbox)) { 2653 err = PTR_ERR(if_stat_mailbox); 2654 return err; 2655 } 2656 2657 mutex_lock(&priv->counters_table.mutex); 2658 list_for_each_entry_safe(vport, tmp_vport, 2659 &priv->counters_table.global_port_list[port - 1], 2660 list) { 2661 if (vport->index == MLX4_SINK_COUNTER_INDEX) 2662 continue; 2663 2664 memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); 2665 if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); 2666 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, 2667 if_stat_in_mod, 0, 2668 MLX4_CMD_QUERY_IF_STAT, 2669 MLX4_CMD_TIME_CLASS_C, 2670 MLX4_CMD_NATIVE); 2671 if (err) { 2672 mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", 2673 __func__, vport->index); 2674 goto if_stat_out; 2675 } 2676 counter = (union mlx4_counter *)if_stat_mailbox->buf; 2677 if ((counter->control.cnt_mode & 0xf) == 1) { 2678 vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); 2679 vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); 2680 vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); 2681 vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); 2682 vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); 2683 vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); 2684 vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); 2685 vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); 2686 vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); 2687 vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); 2688 vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); 2689 vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); 2690 vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); 2691 vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); 2692 vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); 2693 } 2694 } 2695 2696if_stat_out: 2697 mutex_unlock(&priv->counters_table.mutex); 2698 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2699 2700 return err; 2701} 2702EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); 2703 2704static int mlx4_setup_hca(struct mlx4_dev *dev) 2705{ 2706 struct mlx4_priv *priv = mlx4_priv(dev); 2707 int err; 2708 int port; 2709 __be32 ib_port_default_caps; 2710 2711 err = mlx4_init_uar_table(dev); 2712 if (err) { 2713 mlx4_err(dev, "Failed to initialize " 2714 "user access region table (err=%d), aborting.\n", 2715 err); 2716 return err; 2717 } 2718 2719 err = mlx4_uar_alloc(dev, &priv->driver_uar); 2720 if (err) { 2721 mlx4_err(dev, "Failed to allocate driver access region " 2722 "(err=%d), aborting.\n", err); 2723 goto err_uar_table_free; 2724 } 2725 2726 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 2727 if (!priv->kar) { 2728 mlx4_err(dev, "Couldn't map kernel access region, " 2729 "aborting.\n"); 2730 err = -ENOMEM; 2731 goto err_uar_free; 2732 } 2733 2734 err = mlx4_init_pd_table(dev); 2735 if (err) { 2736 mlx4_err(dev, "Failed to initialize " 2737 "protection domain table (err=%d), aborting.\n", err); 2738 goto err_kar_unmap; 2739 } 2740 2741 err = mlx4_init_xrcd_table(dev); 2742 if (err) { 2743 mlx4_err(dev, "Failed to initialize " 2744 "reliable connection domain table (err=%d), " 2745 "aborting.\n", err); 2746 goto err_pd_table_free; 2747 } 2748 2749 err = mlx4_init_mr_table(dev); 2750 if (err) { 2751 mlx4_err(dev, "Failed to initialize " 2752 "memory region table (err=%d), aborting.\n", err); 2753 goto err_xrcd_table_free; 2754 } 2755 2756 if (!mlx4_is_slave(dev)) { 2757 err = mlx4_init_mcg_table(dev); 2758 if (err) { 2759 mlx4_err(dev, "Failed to initialize " 2760 "multicast group table (err=%d), aborting.\n", 2761 err); 2762 goto err_mr_table_free; 2763 } 2764 } 2765 2766 err = mlx4_init_eq_table(dev); 2767 if (err) { 2768 mlx4_err(dev, "Failed to initialize " 2769 "event queue table (err=%d), aborting.\n", err); 2770 goto err_mcg_table_free; 2771 } 2772 2773 err = mlx4_cmd_use_events(dev); 2774 if (err) { 2775 mlx4_err(dev, "Failed to switch to event-driven " 2776 "firmware commands (err=%d), aborting.\n", err); 2777 goto err_eq_table_free; 2778 } 2779 2780 err = mlx4_NOP(dev); 2781 if (err) { 2782 if (dev->flags & MLX4_FLAG_MSI_X) { 2783 mlx4_warn(dev, "NOP command failed to generate MSI-X " 2784 "interrupt IRQ %d).\n", 2785 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2786 mlx4_warn(dev, "Trying again without MSI-X.\n"); 2787 } else { 2788 mlx4_err(dev, "NOP command failed to generate interrupt " 2789 "(IRQ %d), aborting.\n", 2790 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2791 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 2792 } 2793 2794 goto err_cmd_poll; 2795 } 2796 2797 mlx4_dbg(dev, "NOP command IRQ test passed\n"); 2798 2799 err = mlx4_init_cq_table(dev); 2800 if (err) { 2801 mlx4_err(dev, "Failed to initialize " 2802 "completion queue table (err=%d), aborting.\n", err); 2803 goto err_cmd_poll; 2804 } 2805 2806 err = mlx4_init_srq_table(dev); 2807 if (err) { 2808 mlx4_err(dev, "Failed to initialize " 2809 "shared receive queue table (err=%d), aborting.\n", 2810 err); 2811 goto err_cq_table_free; 2812 } 2813 2814 err = mlx4_init_qp_table(dev); 2815 if (err) { 2816 mlx4_err(dev, "Failed to initialize " 2817 "queue pair table (err=%d), aborting.\n", err); 2818 goto err_srq_table_free; 2819 } 2820 2821 err = mlx4_init_counters_table(dev); 2822 if (err && err != -ENOENT) { 2823 mlx4_err(dev, "Failed to initialize counters table (err=%d), " 2824 "aborting.\n", err); 2825 goto err_qp_table_free; 2826 } 2827 2828 if (!mlx4_is_slave(dev)) { 2829 for (port = 1; port <= dev->caps.num_ports; port++) { 2830 ib_port_default_caps = 0; 2831 err = mlx4_get_port_ib_caps(dev, port, 2832 &ib_port_default_caps); 2833 if (err) 2834 mlx4_warn(dev, "failed to get port %d default " 2835 "ib capabilities (%d). Continuing " 2836 "with caps = 0\n", port, err); 2837 dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 2838 2839 /* initialize per-slave default ib port capabilities */ 2840 if (mlx4_is_master(dev)) { 2841 int i; 2842 for (i = 0; i < dev->num_slaves; i++) { 2843 if (i == mlx4_master_func_num(dev)) 2844 continue; 2845 priv->mfunc.master.slave_state[i].ib_cap_mask[port] = 2846 ib_port_default_caps; 2847 } 2848 } 2849 2850 dev->caps.port_ib_mtu[port] = IB_MTU_4096; 2851 2852 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? 2853 dev->caps.pkey_table_len[port] : -1); 2854 if (err) { 2855 mlx4_err(dev, "Failed to set port %d (err=%d), " 2856 "aborting\n", port, err); 2857 goto err_counters_table_free; 2858 } 2859 } 2860 } 2861 2862 return 0; 2863 2864err_counters_table_free: 2865 mlx4_cleanup_counters_table(dev); 2866 2867err_qp_table_free: 2868 mlx4_cleanup_qp_table(dev); 2869 2870err_srq_table_free: 2871 mlx4_cleanup_srq_table(dev); 2872 2873err_cq_table_free: 2874 mlx4_cleanup_cq_table(dev); 2875 2876err_cmd_poll: 2877 mlx4_cmd_use_polling(dev); 2878 2879err_eq_table_free: 2880 mlx4_cleanup_eq_table(dev); 2881 2882err_mcg_table_free: 2883 if (!mlx4_is_slave(dev)) 2884 mlx4_cleanup_mcg_table(dev); 2885 2886err_mr_table_free: 2887 mlx4_cleanup_mr_table(dev); 2888 2889err_xrcd_table_free: 2890 mlx4_cleanup_xrcd_table(dev); 2891 2892err_pd_table_free: 2893 mlx4_cleanup_pd_table(dev); 2894 2895err_kar_unmap: 2896 iounmap(priv->kar); 2897 2898err_uar_free: 2899 mlx4_uar_free(dev, &priv->driver_uar); 2900 2901err_uar_table_free: 2902 mlx4_cleanup_uar_table(dev); 2903 return err; 2904} 2905 2906static void mlx4_enable_msi_x(struct mlx4_dev *dev) 2907{ 2908 struct mlx4_priv *priv = mlx4_priv(dev); 2909 struct msix_entry *entries; 2910 int nreq = min_t(int, dev->caps.num_ports * 2911 min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) 2912 + MSIX_LEGACY_SZ, MAX_MSIX); 2913 int err; 2914 int i; 2915 2916 if (msi_x) { 2917 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 2918 nreq); 2919 2920 if (msi_x > 1 && !mlx4_is_mfunc(dev)) 2921 nreq = min_t(int, nreq, msi_x); 2922 2923 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); 2924 if (!entries) 2925 goto no_msi; 2926 2927 for (i = 0; i < nreq; ++i) 2928 entries[i].entry = i; 2929 2930 retry: 2931 err = pci_enable_msix(dev->pdev, entries, nreq); 2932 if (err) { 2933 /* Try again if at least 2 vectors are available */ 2934 if (err > 1) { 2935 mlx4_info(dev, "Requested %d vectors, " 2936 "but only %d MSI-X vectors available, " 2937 "trying again\n", nreq, err); 2938 nreq = err; 2939 goto retry; 2940 } 2941 kfree(entries); 2942 /* if error, or can't alloc even 1 IRQ */ 2943 if (err < 0) { 2944 mlx4_err(dev, "No IRQs left, device can't " 2945 "be started.\n"); 2946 goto no_irq; 2947 } 2948 goto no_msi; 2949 } 2950 2951 if (nreq < 2952 MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { 2953 /*Working in legacy mode , all EQ's shared*/ 2954 dev->caps.comp_pool = 0; 2955 dev->caps.num_comp_vectors = nreq - 1; 2956 } else { 2957 dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; 2958 dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; 2959 } 2960 for (i = 0; i < nreq; ++i) 2961 priv->eq_table.eq[i].irq = entries[i].vector; 2962 2963 dev->flags |= MLX4_FLAG_MSI_X; 2964 2965 kfree(entries); 2966 return; 2967 } 2968 2969no_msi: 2970 dev->caps.num_comp_vectors = 1; 2971 dev->caps.comp_pool = 0; 2972 2973 for (i = 0; i < 2; ++i) 2974 priv->eq_table.eq[i].irq = dev->pdev->irq; 2975 return; 2976no_irq: 2977 dev->caps.num_comp_vectors = 0; 2978 dev->caps.comp_pool = 0; 2979 return; 2980} 2981 2982static void 2983mlx4_init_hca_info(struct mlx4_dev *dev) 2984{ 2985 struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info; 2986 2987 info->dev = dev; 2988 2989 info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO, 2990 show_firmware_version, NULL); 2991 if (device_create_file(&dev->pdev->dev, &info->firmware_attr)) 2992 mlx4_err(dev, "Failed to add file firmware version"); 2993 2994 info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca, 2995 NULL); 2996 if (device_create_file(&dev->pdev->dev, &info->hca_attr)) 2997 mlx4_err(dev, "Failed to add file hca type"); 2998 2999 info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO, 3000 show_board, NULL); 3001 if (device_create_file(&dev->pdev->dev, &info->board_attr)) 3002 mlx4_err(dev, "Failed to add file board id type"); 3003} 3004 3005static int mlx4_init_port_info(struct mlx4_dev *dev, int port) 3006{ 3007 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 3008 int err = 0; 3009 3010 info->dev = dev; 3011 info->port = port; 3012 if (!mlx4_is_slave(dev)) { 3013 mlx4_init_mac_table(dev, &info->mac_table); 3014 mlx4_init_vlan_table(dev, &info->vlan_table); 3015 info->base_qpn = mlx4_get_base_qpn(dev, port); 3016 } 3017 3018 sprintf(info->dev_name, "mlx4_port%d", port); 3019 info->port_attr.attr.name = info->dev_name; 3020 if (mlx4_is_mfunc(dev)) 3021 info->port_attr.attr.mode = S_IRUGO; 3022 else { 3023 info->port_attr.attr.mode = S_IRUGO | S_IWUSR; 3024 info->port_attr.store = set_port_type; 3025 } 3026 info->port_attr.show = show_port_type; 3027 sysfs_attr_init(&info->port_attr.attr); 3028 3029 err = device_create_file(&dev->pdev->dev, &info->port_attr); 3030 if (err) { 3031 mlx4_err(dev, "Failed to create file for port %d\n", port); 3032 info->port = -1; 3033 } 3034 3035 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); 3036 info->port_mtu_attr.attr.name = info->dev_mtu_name; 3037 if (mlx4_is_mfunc(dev)) 3038 info->port_mtu_attr.attr.mode = S_IRUGO; 3039 else { 3040 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; 3041 info->port_mtu_attr.store = set_port_ib_mtu; 3042 } 3043 info->port_mtu_attr.show = show_port_ib_mtu; 3044 sysfs_attr_init(&info->port_mtu_attr.attr); 3045 3046 err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); 3047 if (err) { 3048 mlx4_err(dev, "Failed to create mtu file for port %d\n", port); 3049 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3050 info->port = -1; 3051 } 3052 3053 return err; 3054} 3055 3056static void 3057mlx4_cleanup_hca_info(struct mlx4_hca_info *info) 3058{ 3059 device_remove_file(&info->dev->pdev->dev, &info->firmware_attr); 3060 device_remove_file(&info->dev->pdev->dev, &info->board_attr); 3061 device_remove_file(&info->dev->pdev->dev, &info->hca_attr); 3062} 3063 3064static void mlx4_cleanup_port_info(struct mlx4_port_info *info) 3065{ 3066 if (info->port < 0) 3067 return; 3068 3069 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3070 device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); 3071} 3072 3073static int mlx4_init_steering(struct mlx4_dev *dev) 3074{ 3075 struct mlx4_priv *priv = mlx4_priv(dev); 3076 int num_entries = dev->caps.num_ports; 3077 int i, j; 3078 3079 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); 3080 if (!priv->steer) 3081 return -ENOMEM; 3082 3083 for (i = 0; i < num_entries; i++) 3084 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3085 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); 3086 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); 3087 } 3088 return 0; 3089} 3090 3091static void mlx4_clear_steering(struct mlx4_dev *dev) 3092{ 3093 struct mlx4_priv *priv = mlx4_priv(dev); 3094 struct mlx4_steer_index *entry, *tmp_entry; 3095 struct mlx4_promisc_qp *pqp, *tmp_pqp; 3096 int num_entries = dev->caps.num_ports; 3097 int i, j; 3098 3099 for (i = 0; i < num_entries; i++) { 3100 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3101 list_for_each_entry_safe(pqp, tmp_pqp, 3102 &priv->steer[i].promisc_qps[j], 3103 list) { 3104 list_del(&pqp->list); 3105 kfree(pqp); 3106 } 3107 list_for_each_entry_safe(entry, tmp_entry, 3108 &priv->steer[i].steer_entries[j], 3109 list) { 3110 list_del(&entry->list); 3111 list_for_each_entry_safe(pqp, tmp_pqp, 3112 &entry->duplicates, 3113 list) { 3114 list_del(&pqp->list); 3115 kfree(pqp); 3116 } 3117 kfree(entry); 3118 } 3119 } 3120 } 3121 kfree(priv->steer); 3122} 3123 3124static int extended_func_num(struct pci_dev *pdev) 3125{ 3126 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); 3127} 3128 3129#define MLX4_OWNER_BASE 0x8069c 3130#define MLX4_OWNER_SIZE 4 3131 3132static int mlx4_get_ownership(struct mlx4_dev *dev) 3133{ 3134 void __iomem *owner; 3135 u32 ret; 3136 3137 if (pci_channel_offline(dev->pdev)) 3138 return -EIO; 3139 3140 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3141 MLX4_OWNER_SIZE); 3142 if (!owner) { 3143 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3144 return -ENOMEM; 3145 } 3146 3147 ret = readl(owner); 3148 iounmap(owner); 3149 return (int) !!ret; 3150} 3151 3152static void mlx4_free_ownership(struct mlx4_dev *dev) 3153{ 3154 void __iomem *owner; 3155 3156 if (pci_channel_offline(dev->pdev)) 3157 return; 3158 3159 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3160 MLX4_OWNER_SIZE); 3161 if (!owner) { 3162 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3163 return; 3164 } 3165 writel(0, owner); 3166 msleep(1000); 3167 iounmap(owner); 3168} 3169 3170static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) 3171{ 3172 struct mlx4_priv *priv; 3173 struct mlx4_dev *dev; 3174 int err; 3175 int port; 3176 int nvfs, prb_vf; 3177 3178 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); 3179 3180 err = pci_enable_device(pdev); 3181 if (err) { 3182 dev_err(&pdev->dev, "Cannot enable PCI device, " 3183 "aborting.\n"); 3184 return err; 3185 } 3186 3187 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); 3188 mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); 3189 if (nvfs > MLX4_MAX_NUM_VF) { 3190 dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", 3191 nvfs, MLX4_MAX_NUM_VF); 3192 return -EINVAL; 3193 } 3194 3195 if (nvfs < 0) { 3196 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); 3197 return -EINVAL; 3198 } 3199 /* 3200 * Check for BARs. 3201 */ 3202 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && 3203 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 3204 dev_err(&pdev->dev, "Missing DCS, aborting." 3205 "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", 3206 pci_dev_data, pci_resource_flags(pdev, 0)); 3207 err = -ENODEV; 3208 goto err_disable_pdev; 3209 } 3210 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { 3211 dev_err(&pdev->dev, "Missing UAR, aborting.\n"); 3212 err = -ENODEV; 3213 goto err_disable_pdev; 3214 } 3215 3216 err = pci_request_regions(pdev, DRV_NAME); 3217 if (err) { 3218 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 3219 goto err_disable_pdev; 3220 } 3221 3222 pci_set_master(pdev); 3223 3224 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3225 if (err) { 3226 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); 3227 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3228 if (err) { 3229 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); 3230 goto err_release_regions; 3231 } 3232 } 3233 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3234 if (err) { 3235 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " 3236 "consistent PCI DMA mask.\n"); 3237 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 3238 if (err) { 3239 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " 3240 "aborting.\n"); 3241 goto err_release_regions; 3242 } 3243 } 3244 3245 /* Allow large DMA segments, up to the firmware limit of 1 GB */ 3246 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); 3247 3248 priv = kzalloc(sizeof *priv, GFP_KERNEL); 3249 if (!priv) { 3250 dev_err(&pdev->dev, "Device struct alloc failed, " 3251 "aborting.\n"); 3252 err = -ENOMEM; 3253 goto err_release_regions; 3254 } 3255 3256 dev = &priv->dev; 3257 dev->pdev = pdev; 3258 INIT_LIST_HEAD(&priv->dev_list); 3259 INIT_LIST_HEAD(&priv->ctx_list); 3260 spin_lock_init(&priv->ctx_lock); 3261 3262 mutex_init(&priv->port_mutex); 3263 3264 INIT_LIST_HEAD(&priv->pgdir_list); 3265 mutex_init(&priv->pgdir_mutex); 3266 3267 INIT_LIST_HEAD(&priv->bf_list); 3268 mutex_init(&priv->bf_mutex); 3269 3270 dev->rev_id = pdev->revision; 3271 dev->numa_node = dev_to_node(&pdev->dev); 3272 /* Detect if this device is a virtual function */ 3273 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 3274 /* When acting as pf, we normally skip vfs unless explicitly 3275 * requested to probe them. */ 3276 if (nvfs && extended_func_num(pdev) > prb_vf) { 3277 mlx4_warn(dev, "Skipping virtual function:%d\n", 3278 extended_func_num(pdev)); 3279 err = -ENODEV; 3280 goto err_free_dev; 3281 } 3282 mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); 3283 dev->flags |= MLX4_FLAG_SLAVE; 3284 } else { 3285 /* We reset the device and enable SRIOV only for physical 3286 * devices. Try to claim ownership on the device; 3287 * if already taken, skip -- do not allow multiple PFs */ 3288 err = mlx4_get_ownership(dev); 3289 if (err) { 3290 if (err < 0) 3291 goto err_free_dev; 3292 else { 3293 mlx4_warn(dev, "Multiple PFs not yet supported." 3294 " Skipping PF.\n"); 3295 err = -EINVAL; 3296 goto err_free_dev; 3297 } 3298 } 3299 3300 if (nvfs) { 3301 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); 3302 err = pci_enable_sriov(pdev, nvfs); 3303 if (err) { 3304 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", 3305 err); 3306 err = 0; 3307 } else { 3308 mlx4_warn(dev, "Running in master mode\n"); 3309 dev->flags |= MLX4_FLAG_SRIOV | 3310 MLX4_FLAG_MASTER; 3311 dev->num_vfs = nvfs; 3312 } 3313 } 3314 3315 atomic_set(&priv->opreq_count, 0); 3316 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 3317 3318 /* 3319 * Now reset the HCA before we touch the PCI capabilities or 3320 * attempt a firmware command, since a boot ROM may have left 3321 * the HCA in an undefined state. 3322 */ 3323 err = mlx4_reset(dev); 3324 if (err) { 3325 mlx4_err(dev, "Failed to reset HCA, aborting.\n"); 3326 goto err_sriov; 3327 } 3328 } 3329 3330slave_start: 3331 err = mlx4_cmd_init(dev); 3332 if (err) { 3333 mlx4_err(dev, "Failed to init command interface, aborting.\n"); 3334 goto err_sriov; 3335 } 3336 3337 /* In slave functions, the communication channel must be initialized 3338 * before posting commands. Also, init num_slaves before calling 3339 * mlx4_init_hca */ 3340 if (mlx4_is_mfunc(dev)) { 3341 if (mlx4_is_master(dev)) 3342 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 3343 else { 3344 dev->num_slaves = 0; 3345 err = mlx4_multi_func_init(dev); 3346 if (err) { 3347 mlx4_err(dev, "Failed to init slave mfunc" 3348 " interface, aborting.\n"); 3349 goto err_cmd; 3350 } 3351 } 3352 } 3353 3354 err = mlx4_init_hca(dev); 3355 if (err) { 3356 if (err == -EACCES) { 3357 /* Not primary Physical function 3358 * Running in slave mode */ 3359 mlx4_cmd_cleanup(dev); 3360 dev->flags |= MLX4_FLAG_SLAVE; 3361 dev->flags &= ~MLX4_FLAG_MASTER; 3362 goto slave_start; 3363 } else 3364 goto err_mfunc; 3365 } 3366 3367 /* In master functions, the communication channel must be initialized 3368 * after obtaining its address from fw */ 3369 if (mlx4_is_master(dev)) { 3370 err = mlx4_multi_func_init(dev); 3371 if (err) { 3372 mlx4_err(dev, "Failed to init master mfunc" 3373 "interface, aborting.\n"); 3374 goto err_close; 3375 } 3376 } 3377 3378 err = mlx4_alloc_eq_table(dev); 3379 if (err) 3380 goto err_master_mfunc; 3381 3382 priv->msix_ctl.pool_bm = 0; 3383 mutex_init(&priv->msix_ctl.pool_lock); 3384 3385 mlx4_enable_msi_x(dev); 3386 3387 /* no MSIX and no shared IRQ */ 3388 if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) { 3389 err = -ENOSPC; 3390 goto err_free_eq; 3391 } 3392 3393 if ((mlx4_is_mfunc(dev)) && 3394 !(dev->flags & MLX4_FLAG_MSI_X)) { 3395 err = -ENOSYS; 3396 mlx4_err(dev, "INTx is not supported in multi-function mode." 3397 " aborting.\n"); 3398 goto err_free_eq; 3399 } 3400 3401 if (!mlx4_is_slave(dev)) { 3402 err = mlx4_init_steering(dev); 3403 if (err) 3404 goto err_free_eq; 3405 } 3406 3407 err = mlx4_setup_hca(dev); 3408 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && 3409 !mlx4_is_mfunc(dev)) { 3410 dev->flags &= ~MLX4_FLAG_MSI_X; 3411 dev->caps.num_comp_vectors = 1; 3412 dev->caps.comp_pool = 0; 3413 pci_disable_msix(pdev); 3414 err = mlx4_setup_hca(dev); 3415 } 3416 3417 if (err) 3418 goto err_steer; 3419 3420 mlx4_init_quotas(dev); 3421 mlx4_init_hca_info(dev); 3422 3423 for (port = 1; port <= dev->caps.num_ports; port++) { 3424 err = mlx4_init_port_info(dev, port); 3425 if (err) 3426 goto err_port; 3427 } 3428 3429 err = mlx4_register_device(dev); 3430 if (err) 3431 goto err_port; 3432 3433 mlx4_request_modules(dev); 3434 3435 mlx4_sense_init(dev); 3436 mlx4_start_sense(dev); 3437 3438 priv->pci_dev_data = pci_dev_data; 3439 pci_set_drvdata(pdev, dev); 3440 3441 return 0; 3442 3443err_port: 3444 for (--port; port >= 1; --port) 3445 mlx4_cleanup_port_info(&priv->port[port]); 3446 3447 mlx4_cleanup_counters_table(dev); 3448 mlx4_cleanup_qp_table(dev); 3449 mlx4_cleanup_srq_table(dev); 3450 mlx4_cleanup_cq_table(dev); 3451 mlx4_cmd_use_polling(dev); 3452 mlx4_cleanup_eq_table(dev); 3453 mlx4_cleanup_mcg_table(dev); 3454 mlx4_cleanup_mr_table(dev); 3455 mlx4_cleanup_xrcd_table(dev); 3456 mlx4_cleanup_pd_table(dev); 3457 mlx4_cleanup_uar_table(dev); 3458 3459err_steer: 3460 if (!mlx4_is_slave(dev)) 3461 mlx4_clear_steering(dev); 3462 3463err_free_eq: 3464 mlx4_free_eq_table(dev); 3465 3466err_master_mfunc: 3467 if (mlx4_is_master(dev)) { 3468 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); 3469 mlx4_multi_func_cleanup(dev); 3470 } 3471 3472 if (mlx4_is_slave(dev)) { 3473 kfree(dev->caps.qp0_tunnel); 3474 kfree(dev->caps.qp0_proxy); 3475 kfree(dev->caps.qp1_tunnel); 3476 kfree(dev->caps.qp1_proxy); 3477 } 3478 3479err_close: 3480 if (dev->flags & MLX4_FLAG_MSI_X) 3481 pci_disable_msix(pdev); 3482 3483 mlx4_close_hca(dev); 3484 3485err_mfunc: 3486 if (mlx4_is_slave(dev)) 3487 mlx4_multi_func_cleanup(dev); 3488 3489err_cmd: 3490 mlx4_cmd_cleanup(dev); 3491 3492err_sriov: 3493 if (dev->flags & MLX4_FLAG_SRIOV) 3494 pci_disable_sriov(pdev); 3495 3496 if (!mlx4_is_slave(dev)) 3497 mlx4_free_ownership(dev); 3498 3499err_free_dev: 3500 kfree(priv); 3501 3502err_release_regions: 3503 pci_release_regions(pdev); 3504 3505err_disable_pdev: 3506 pci_disable_device(pdev); 3507 pci_set_drvdata(pdev, NULL); 3508 return err; 3509} 3510 3511static int __devinit mlx4_init_one(struct pci_dev *pdev, 3512 const struct pci_device_id *id) 3513{ 3514 device_set_desc(pdev->dev.bsddev, mlx4_version); 3515 return __mlx4_init_one(pdev, id->driver_data); 3516} 3517 3518static void mlx4_remove_one(struct pci_dev *pdev) 3519{ 3520 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3521 struct mlx4_priv *priv = mlx4_priv(dev); 3522 int p; 3523 3524 if (dev) { 3525 /* in SRIOV it is not allowed to unload the pf's 3526 * driver while there are alive vf's */ 3527 if (mlx4_is_master(dev)) { 3528 if (mlx4_how_many_lives_vf(dev)) 3529 mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); 3530 } 3531 mlx4_stop_sense(dev); 3532 mlx4_unregister_device(dev); 3533 3534 mlx4_cleanup_hca_info(&priv->hca_info); 3535 for (p = 1; p <= dev->caps.num_ports; p++) { 3536 mlx4_cleanup_port_info(&priv->port[p]); 3537 mlx4_CLOSE_PORT(dev, p); 3538 } 3539 3540 if (mlx4_is_master(dev)) 3541 mlx4_free_resource_tracker(dev, 3542 RES_TR_FREE_SLAVES_ONLY); 3543 3544 mlx4_cleanup_counters_table(dev); 3545 mlx4_cleanup_qp_table(dev); 3546 mlx4_cleanup_srq_table(dev); 3547 mlx4_cleanup_cq_table(dev); 3548 mlx4_cmd_use_polling(dev); 3549 mlx4_cleanup_eq_table(dev); 3550 mlx4_cleanup_mcg_table(dev); 3551 mlx4_cleanup_mr_table(dev); 3552 mlx4_cleanup_xrcd_table(dev); 3553 mlx4_cleanup_pd_table(dev); 3554 3555 if (mlx4_is_master(dev)) 3556 mlx4_free_resource_tracker(dev, 3557 RES_TR_FREE_STRUCTS_ONLY); 3558 3559 iounmap(priv->kar); 3560 mlx4_uar_free(dev, &priv->driver_uar); 3561 mlx4_cleanup_uar_table(dev); 3562 if (!mlx4_is_slave(dev)) 3563 mlx4_clear_steering(dev); 3564 mlx4_free_eq_table(dev); 3565 if (mlx4_is_master(dev)) 3566 mlx4_multi_func_cleanup(dev); 3567 mlx4_close_hca(dev); 3568 if (mlx4_is_slave(dev)) 3569 mlx4_multi_func_cleanup(dev); 3570 mlx4_cmd_cleanup(dev); 3571 3572 if (dev->flags & MLX4_FLAG_MSI_X) 3573 pci_disable_msix(pdev); 3574 if (dev->flags & MLX4_FLAG_SRIOV) { 3575 mlx4_warn(dev, "Disabling SR-IOV\n"); 3576 pci_disable_sriov(pdev); 3577 } 3578 3579 if (!mlx4_is_slave(dev)) 3580 mlx4_free_ownership(dev); 3581 3582 kfree(dev->caps.qp0_tunnel); 3583 kfree(dev->caps.qp0_proxy); 3584 kfree(dev->caps.qp1_tunnel); 3585 kfree(dev->caps.qp1_proxy); 3586 3587 kfree(priv); 3588 pci_release_regions(pdev); 3589 pci_disable_device(pdev); 3590 pci_set_drvdata(pdev, NULL); 3591 } 3592} 3593 3594static int restore_current_port_types(struct mlx4_dev *dev, 3595 enum mlx4_port_type *types, 3596 enum mlx4_port_type *poss_types) 3597{ 3598 struct mlx4_priv *priv = mlx4_priv(dev); 3599 int err, i; 3600 3601 mlx4_stop_sense(dev); 3602 mutex_lock(&priv->port_mutex); 3603 for (i = 0; i < dev->caps.num_ports; i++) 3604 dev->caps.possible_type[i + 1] = poss_types[i]; 3605 err = mlx4_change_port_types(dev, types); 3606 mlx4_start_sense(dev); 3607 mutex_unlock(&priv->port_mutex); 3608 return err; 3609} 3610 3611int mlx4_restart_one(struct pci_dev *pdev) 3612{ 3613 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3614 struct mlx4_priv *priv = mlx4_priv(dev); 3615 enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; 3616 enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; 3617 int pci_dev_data, err, i; 3618 3619 pci_dev_data = priv->pci_dev_data; 3620 for (i = 0; i < dev->caps.num_ports; i++) { 3621 curr_type[i] = dev->caps.port_type[i + 1]; 3622 poss_type[i] = dev->caps.possible_type[i + 1]; 3623 } 3624 3625 mlx4_remove_one(pdev); 3626 err = __mlx4_init_one(pdev, pci_dev_data); 3627 if (err) 3628 return err; 3629 3630 dev = pci_get_drvdata(pdev); 3631 err = restore_current_port_types(dev, curr_type, poss_type); 3632 if (err) 3633 mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", 3634 err); 3635 return 0; 3636} 3637 3638static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { 3639 /* MT25408 "Hermon" SDR */ 3640 { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3641 /* MT25408 "Hermon" DDR */ 3642 { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3643 /* MT25408 "Hermon" QDR */ 3644 { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3645 /* MT25408 "Hermon" DDR PCIe gen2 */ 3646 { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3647 /* MT25408 "Hermon" QDR PCIe gen2 */ 3648 { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3649 /* MT25408 "Hermon" EN 10GigE */ 3650 { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3651 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 3652 { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3653 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 3654 { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3655 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 3656 { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3657 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 3658 { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3659 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 3660 { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3661 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 3662 { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3663 /* MT25400 Family [ConnectX-2 Virtual Function] */ 3664 { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, 3665 /* MT27500 Family [ConnectX-3] */ 3666 { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, 3667 /* MT27500 Family [ConnectX-3 Virtual Function] */ 3668 { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, 3669 { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ 3670 { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ 3671 { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ 3672 { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ 3673 { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ 3674 { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ 3675 { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ 3676 { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ 3677 { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ 3678 { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ 3679 { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ 3680 { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ 3681 { 0, } 3682}; 3683 3684MODULE_DEVICE_TABLE(pci, mlx4_pci_table); 3685 3686static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, 3687 pci_channel_state_t state) 3688{ 3689 mlx4_remove_one(pdev); 3690 3691 return state == pci_channel_io_perm_failure ? 3692 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 3693} 3694 3695static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 3696{ 3697 int ret = __mlx4_init_one(pdev, 0); 3698 3699 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 3700} 3701 3702static const struct pci_error_handlers mlx4_err_handler = { 3703 .error_detected = mlx4_pci_err_detected, 3704 .slot_reset = mlx4_pci_slot_reset, 3705}; 3706 3707static int suspend(struct pci_dev *pdev, pm_message_t state) 3708{ 3709 mlx4_remove_one(pdev); 3710 3711 return 0; 3712} 3713 3714static int resume(struct pci_dev *pdev) 3715{ 3716 return __mlx4_init_one(pdev, 0); 3717} 3718 3719static struct pci_driver mlx4_driver = { 3720 .name = DRV_NAME, 3721 .id_table = mlx4_pci_table, 3722 .probe = mlx4_init_one, 3723 .remove = __devexit_p(mlx4_remove_one), 3724 .suspend = suspend, 3725 .resume = resume, 3726 .err_handler = &mlx4_err_handler, 3727}; 3728 3729static int __init mlx4_verify_params(void) 3730{ 3731 int status; 3732 3733 status = update_defaults(&port_type_array); 3734 if (status == INVALID_STR) { 3735 if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) 3736 return -1; 3737 } else if (status == INVALID_DATA) { 3738 return -1; 3739 } 3740 3741 status = update_defaults(&num_vfs); 3742 if (status == INVALID_STR) { 3743 if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) 3744 return -1; 3745 } else if (status == INVALID_DATA) { 3746 return -1; 3747 } 3748 3749 status = update_defaults(&probe_vf); 3750 if (status == INVALID_STR) { 3751 if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) 3752 return -1; 3753 } else if (status == INVALID_DATA) { 3754 return -1; 3755 } 3756 3757 if (msi_x < 0) { 3758 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); 3759 return -1; 3760 } 3761 3762 if ((log_num_mac < 0) || (log_num_mac > 7)) { 3763 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); 3764 return -1; 3765 } 3766 3767 if (log_num_vlan != 0) 3768 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", 3769 MLX4_LOG_NUM_VLANS); 3770 3771 if (mlx4_set_4k_mtu != -1) 3772 pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); 3773 3774 if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { 3775 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); 3776 return -1; 3777 } 3778 3779 if (mlx4_log_num_mgm_entry_size != -1 && 3780 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || 3781 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { 3782 pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " 3783 "in legal range (-1 or %d..%d)\n", 3784 mlx4_log_num_mgm_entry_size, 3785 MLX4_MIN_MGM_LOG_ENTRY_SIZE, 3786 MLX4_MAX_MGM_LOG_ENTRY_SIZE); 3787 return -1; 3788 } 3789 3790 if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { 3791 pr_warning("mlx4_core: bad log_num_qp: %d\n", 3792 mod_param_profile.num_qp); 3793 return -1; 3794 } 3795 3796 if (mod_param_profile.num_srq < 10) { 3797 pr_warning("mlx4_core: too low log_num_srq: %d\n", 3798 mod_param_profile.num_srq); 3799 return -1; 3800 } 3801 3802 if (mod_param_profile.num_cq < 10) { 3803 pr_warning("mlx4_core: too low log_num_cq: %d\n", 3804 mod_param_profile.num_cq); 3805 return -1; 3806 } 3807 3808 if (mod_param_profile.num_mpt < 10) { 3809 pr_warning("mlx4_core: too low log_num_mpt: %d\n", 3810 mod_param_profile.num_mpt); 3811 return -1; 3812 } 3813 3814 if (mod_param_profile.num_mtt_segs && 3815 mod_param_profile.num_mtt_segs < 15) { 3816 pr_warning("mlx4_core: too low log_num_mtt: %d\n", 3817 mod_param_profile.num_mtt_segs); 3818 return -1; 3819 } 3820 3821 if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { 3822 pr_warning("mlx4_core: too high log_num_mtt: %d\n", 3823 mod_param_profile.num_mtt_segs); 3824 return -1; 3825 } 3826 return 0; 3827} 3828 3829static int __init mlx4_init(void) 3830{ 3831 int ret; 3832 3833 if (mlx4_verify_params()) 3834 return -EINVAL; 3835 3836 mlx4_catas_init(); 3837 3838 mlx4_wq = create_singlethread_workqueue("mlx4"); 3839 if (!mlx4_wq) 3840 return -ENOMEM; 3841 3842 if (enable_sys_tune) 3843 sys_tune_init(); 3844 3845 ret = pci_register_driver(&mlx4_driver); 3846 if (ret < 0) 3847 goto err; 3848 3849 return 0; 3850 3851err: 3852 if (enable_sys_tune) 3853 sys_tune_fini(); 3854 3855 destroy_workqueue(mlx4_wq); 3856 3857 return ret; 3858} 3859 3860static void __exit mlx4_cleanup(void) 3861{ 3862 if (enable_sys_tune) 3863 sys_tune_fini(); 3864 3865 pci_unregister_driver(&mlx4_driver); 3866 destroy_workqueue(mlx4_wq); 3867} 3868 3869module_init_order(mlx4_init, SI_ORDER_MIDDLE); 3870module_exit(mlx4_cleanup); 3871 3872#include <sys/module.h> 3873static int 3874mlx4_evhand(module_t mod, int event, void *arg) 3875{ 3876 return (0); 3877} 3878 3879static moduledata_t mlx4_mod = { 3880 .name = "mlx4", 3881 .evhand = mlx4_evhand, 3882}; 3883MODULE_VERSION(mlx4, 1); 3884DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); 3885