1/* 2 * Copyright (c) 2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/dma-mapping.h> 34#include <linux/err.h> 35#include <linux/interrupt.h> 36#include <linux/rbtree.h> 37#include <linux/mutex.h> 38#include <linux/spinlock.h> 39#include <linux/pci.h> 40#include <linux/miscdevice.h> 41#include <linux/random.h> 42 43#include <rdma/ib_cache.h> 44#include <rdma/ib_sa.h> 45#include "sa.h" 46 47MODULE_AUTHOR("Sean Hefty"); 48MODULE_DESCRIPTION("InfiniBand subnet administration caching"); 49MODULE_LICENSE("Dual BSD/GPL"); 50 51enum { 52 SA_DB_MAX_PATHS_PER_DEST = 0x7F, 53 SA_DB_MIN_RETRY_TIMER = 4000, /* 4 sec */ 54 SA_DB_MAX_RETRY_TIMER = 256000 /* 256 sec */ 55}; 56 57static int set_paths_per_dest(const char *val, struct kernel_param *kp); 58static unsigned long paths_per_dest = 0; 59module_param_call(paths_per_dest, set_paths_per_dest, param_get_ulong, 60 &paths_per_dest, 0644); 61MODULE_PARM_DESC(paths_per_dest, "Maximum number of paths to retrieve " 62 "to each destination (DGID). Set to 0 " 63 "to disable cache."); 64 65static int set_subscribe_inform_info(const char *val, struct kernel_param *kp); 66static char subscribe_inform_info = 1; 67module_param_call(subscribe_inform_info, set_subscribe_inform_info, 68 param_get_bool, &subscribe_inform_info, 0644); 69MODULE_PARM_DESC(subscribe_inform_info, 70 "Subscribe for SA InformInfo/Notice events."); 71 72static int do_refresh(const char *val, struct kernel_param *kp); 73module_param_call(refresh, do_refresh, NULL, NULL, 0200); 74 75static unsigned long retry_timer = SA_DB_MIN_RETRY_TIMER; 76 77enum sa_db_lookup_method { 78 SA_DB_LOOKUP_LEAST_USED, 79 SA_DB_LOOKUP_RANDOM 80}; 81 82static int set_lookup_method(const char *val, struct kernel_param *kp); 83static int get_lookup_method(char *buf, struct kernel_param *kp); 84static unsigned long lookup_method; 85module_param_call(lookup_method, set_lookup_method, get_lookup_method, 86 &lookup_method, 0644); 87MODULE_PARM_DESC(lookup_method, "Method used to return path records when " 88 "multiple paths exist to a given destination."); 89 90static void sa_db_add_dev(struct ib_device *device); 91static void sa_db_remove_dev(struct ib_device *device); 92 93static struct ib_client sa_db_client = { 94 .name = "local_sa", 95 .add = sa_db_add_dev, 96 .remove = sa_db_remove_dev 97}; 98 99static LIST_HEAD(dev_list); 100static DEFINE_MUTEX(lock); 101static rwlock_t rwlock; 102static struct workqueue_struct *sa_wq; 103static struct ib_sa_client sa_client; 104 105enum sa_db_state { 106 SA_DB_IDLE, 107 SA_DB_REFRESH, 108 SA_DB_DESTROY 109}; 110 111struct sa_db_port { 112 struct sa_db_device *dev; 113 struct ib_mad_agent *agent; 114 /* Limit number of outstanding MADs to SA to reduce SA flooding */ 115 struct ib_mad_send_buf *msg; 116 u16 sm_lid; 117 u8 sm_sl; 118 struct ib_inform_info *in_info; 119 struct ib_inform_info *out_info; 120 struct rb_root paths; 121 struct list_head update_list; 122 unsigned long update_id; 123 enum sa_db_state state; 124 struct work_struct work; 125 union ib_gid gid; 126 int port_num; 127}; 128 129struct sa_db_device { 130 struct list_head list; 131 struct ib_device *device; 132 struct ib_event_handler event_handler; 133 int start_port; 134 int port_count; 135 struct sa_db_port port[0]; 136}; 137 138struct ib_sa_iterator { 139 struct ib_sa_iterator *next; 140}; 141 142struct ib_sa_attr_iter { 143 struct ib_sa_iterator *iter; 144 unsigned long flags; 145}; 146 147struct ib_sa_attr_list { 148 struct ib_sa_iterator iter; 149 struct ib_sa_iterator *tail; 150 int update_id; 151 union ib_gid gid; 152 struct rb_node node; 153}; 154 155struct ib_path_rec_info { 156 struct ib_sa_iterator iter; /* keep first */ 157 struct ib_sa_path_rec rec; 158 unsigned long lookups; 159}; 160 161struct ib_sa_mad_iter { 162 struct ib_mad_recv_wc *recv_wc; 163 struct ib_mad_recv_buf *recv_buf; 164 int attr_size; 165 int attr_offset; 166 int data_offset; 167 int data_left; 168 void *attr; 169 u8 attr_data[0]; 170}; 171 172enum sa_update_type { 173 SA_UPDATE_FULL, 174 SA_UPDATE_ADD, 175 SA_UPDATE_REMOVE 176}; 177 178struct update_info { 179 struct list_head list; 180 union ib_gid gid; 181 enum sa_update_type type; 182}; 183 184struct sa_path_request { 185 struct work_struct work; 186 struct ib_sa_client *client; 187 void (*callback)(int, struct ib_sa_path_rec *, void *); 188 void *context; 189 struct ib_sa_path_rec path_rec; 190}; 191 192static void process_updates(struct sa_db_port *port); 193 194static void free_attr_list(struct ib_sa_attr_list *attr_list) 195{ 196 struct ib_sa_iterator *cur; 197 198 for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) { 199 attr_list->iter.next = cur->next; 200 kfree(cur); 201 } 202 attr_list->tail = &attr_list->iter; 203} 204 205static void remove_attr(struct rb_root *root, struct ib_sa_attr_list *attr_list) 206{ 207 rb_erase(&attr_list->node, root); 208 free_attr_list(attr_list); 209 kfree(attr_list); 210} 211 212static void remove_all_attrs(struct rb_root *root) 213{ 214 struct rb_node *node, *next_node; 215 struct ib_sa_attr_list *attr_list; 216 217 write_lock_irq(&rwlock); 218 for (node = rb_first(root); node; node = next_node) { 219 next_node = rb_next(node); 220 attr_list = rb_entry(node, struct ib_sa_attr_list, node); 221 remove_attr(root, attr_list); 222 } 223 write_unlock_irq(&rwlock); 224} 225 226static void remove_old_attrs(struct rb_root *root, unsigned long update_id) 227{ 228 struct rb_node *node, *next_node; 229 struct ib_sa_attr_list *attr_list; 230 231 write_lock_irq(&rwlock); 232 for (node = rb_first(root); node; node = next_node) { 233 next_node = rb_next(node); 234 attr_list = rb_entry(node, struct ib_sa_attr_list, node); 235 if (attr_list->update_id != update_id) 236 remove_attr(root, attr_list); 237 } 238 write_unlock_irq(&rwlock); 239} 240 241static struct ib_sa_attr_list *insert_attr_list(struct rb_root *root, 242 struct ib_sa_attr_list *attr_list) 243{ 244 struct rb_node **link = &root->rb_node; 245 struct rb_node *parent = NULL; 246 struct ib_sa_attr_list *cur_attr_list; 247 int cmp; 248 249 while (*link) { 250 parent = *link; 251 cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node); 252 cmp = memcmp(&cur_attr_list->gid, &attr_list->gid, 253 sizeof attr_list->gid); 254 if (cmp < 0) 255 link = &(*link)->rb_left; 256 else if (cmp > 0) 257 link = &(*link)->rb_right; 258 else 259 return cur_attr_list; 260 } 261 rb_link_node(&attr_list->node, parent, link); 262 rb_insert_color(&attr_list->node, root); 263 return NULL; 264} 265 266static struct ib_sa_attr_list *find_attr_list(struct rb_root *root, u8 *gid) 267{ 268 struct rb_node *node = root->rb_node; 269 struct ib_sa_attr_list *attr_list; 270 int cmp; 271 272 while (node) { 273 attr_list = rb_entry(node, struct ib_sa_attr_list, node); 274 cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid); 275 if (cmp < 0) 276 node = node->rb_left; 277 else if (cmp > 0) 278 node = node->rb_right; 279 else 280 return attr_list; 281 } 282 return NULL; 283} 284 285static int insert_attr(struct rb_root *root, unsigned long update_id, void *key, 286 struct ib_sa_iterator *iter) 287{ 288 struct ib_sa_attr_list *attr_list; 289 void *err; 290 291 write_lock_irq(&rwlock); 292 attr_list = find_attr_list(root, key); 293 if (!attr_list) { 294 write_unlock_irq(&rwlock); 295 attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL); 296 if (!attr_list) 297 return -ENOMEM; 298 299 attr_list->iter.next = NULL; 300 attr_list->tail = &attr_list->iter; 301 attr_list->update_id = update_id; 302 memcpy(attr_list->gid.raw, key, sizeof attr_list->gid); 303 304 write_lock_irq(&rwlock); 305 err = insert_attr_list(root, attr_list); 306 if (err) { 307 write_unlock_irq(&rwlock); 308 kfree(attr_list); 309 return PTR_ERR(err); 310 } 311 } else if (attr_list->update_id != update_id) { 312 free_attr_list(attr_list); 313 attr_list->update_id = update_id; 314 } 315 316 attr_list->tail->next = iter; 317 iter->next = NULL; 318 attr_list->tail = iter; 319 write_unlock_irq(&rwlock); 320 return 0; 321} 322 323static struct ib_sa_mad_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc) 324{ 325 struct ib_sa_mad_iter *iter; 326 struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad; 327 int attr_size, attr_offset; 328 329 attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8; 330 attr_size = 64; /* path record length */ 331 if (attr_offset < attr_size) 332 return ERR_PTR(-EINVAL); 333 334 iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL); 335 if (!iter) 336 return ERR_PTR(-ENOMEM); 337 338 iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR; 339 iter->recv_wc = mad_recv_wc; 340 iter->recv_buf = &mad_recv_wc->recv_buf; 341 iter->attr_offset = attr_offset; 342 iter->attr_size = attr_size; 343 return iter; 344} 345 346static void ib_sa_iter_free(struct ib_sa_mad_iter *iter) 347{ 348 kfree(iter); 349} 350 351static void *ib_sa_iter_next(struct ib_sa_mad_iter *iter) 352{ 353 struct ib_sa_mad *mad; 354 int left, offset = 0; 355 356 while (iter->data_left >= iter->attr_offset) { 357 while (iter->data_offset < IB_MGMT_SA_DATA) { 358 mad = (struct ib_sa_mad *) iter->recv_buf->mad; 359 360 left = IB_MGMT_SA_DATA - iter->data_offset; 361 if (left < iter->attr_size) { 362 /* copy first piece of the attribute */ 363 iter->attr = &iter->attr_data; 364 memcpy(iter->attr, 365 &mad->data[iter->data_offset], left); 366 offset = left; 367 break; 368 } else if (offset) { 369 /* copy the second piece of the attribute */ 370 memcpy(iter->attr + offset, &mad->data[0], 371 iter->attr_size - offset); 372 iter->data_offset = iter->attr_size - offset; 373 offset = 0; 374 } else { 375 iter->attr = &mad->data[iter->data_offset]; 376 iter->data_offset += iter->attr_size; 377 } 378 379 iter->data_left -= iter->attr_offset; 380 goto out; 381 } 382 iter->data_offset = 0; 383 iter->recv_buf = list_entry(iter->recv_buf->list.next, 384 struct ib_mad_recv_buf, list); 385 } 386 iter->attr = NULL; 387out: 388 return iter->attr; 389} 390 391/* 392 * Copy path records from a received response and insert them into our cache. 393 * A path record in the MADs are in network order, packed, and may 394 * span multiple MAD buffers, just to make our life hard. 395 */ 396static void update_path_db(struct sa_db_port *port, 397 struct ib_mad_recv_wc *mad_recv_wc, 398 enum sa_update_type type) 399{ 400 struct ib_sa_mad_iter *iter; 401 struct ib_path_rec_info *path_info; 402 void *attr; 403 int ret; 404 405 iter = ib_sa_iter_create(mad_recv_wc); 406 if (IS_ERR(iter)) 407 return; 408 409 port->update_id += (type == SA_UPDATE_FULL); 410 411 while ((attr = ib_sa_iter_next(iter)) && 412 (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) { 413 414 ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC); 415 416 ret = insert_attr(&port->paths, port->update_id, 417 path_info->rec.dgid.raw, &path_info->iter); 418 if (ret) { 419 kfree(path_info); 420 break; 421 } 422 } 423 ib_sa_iter_free(iter); 424 425 if (type == SA_UPDATE_FULL) 426 remove_old_attrs(&port->paths, port->update_id); 427} 428 429static struct ib_mad_send_buf *get_sa_msg(struct sa_db_port *port, 430 struct update_info *update) 431{ 432 struct ib_ah_attr ah_attr; 433 struct ib_mad_send_buf *msg; 434 435 msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR, 436 IB_MGMT_SA_DATA, GFP_KERNEL); 437 if (IS_ERR(msg)) 438 return NULL; 439 440 memset(&ah_attr, 0, sizeof ah_attr); 441 ah_attr.dlid = port->sm_lid; 442 ah_attr.sl = port->sm_sl; 443 ah_attr.port_num = port->port_num; 444 445 msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); 446 if (IS_ERR(msg->ah)) { 447 ib_free_send_mad(msg); 448 return NULL; 449 } 450 451 msg->timeout_ms = retry_timer; 452 msg->retries = 0; 453 msg->context[0] = port; 454 msg->context[1] = update; 455 return msg; 456} 457 458static __be64 form_tid(u32 hi_tid) 459{ 460 static atomic_t tid; 461 return cpu_to_be64((((u64) hi_tid) << 32) | 462 ((u32) atomic_inc_return(&tid))); 463} 464 465static void format_path_req(struct sa_db_port *port, 466 struct update_info *update, 467 struct ib_mad_send_buf *msg) 468{ 469 struct ib_sa_mad *mad = msg->mad; 470 struct ib_sa_path_rec path_rec; 471 472 mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; 473 mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; 474 mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; 475 mad->mad_hdr.method = IB_SA_METHOD_GET_TABLE; 476 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); 477 mad->mad_hdr.tid = form_tid(msg->mad_agent->hi_tid); 478 479 mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH; 480 481 path_rec.sgid = port->gid; 482 path_rec.numb_path = (u8) paths_per_dest; 483 484 if (update->type == SA_UPDATE_ADD) { 485 mad->sa_hdr.comp_mask |= IB_SA_PATH_REC_DGID; 486 memcpy(&path_rec.dgid, &update->gid, sizeof path_rec.dgid); 487 } 488 489 ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC); 490} 491 492static int send_query(struct sa_db_port *port, 493 struct update_info *update) 494{ 495 int ret; 496 497 port->msg = get_sa_msg(port, update); 498 if (!port->msg) 499 return -ENOMEM; 500 501 format_path_req(port, update, port->msg); 502 503 ret = ib_post_send_mad(port->msg, NULL); 504 if (ret) 505 goto err; 506 507 return 0; 508 509err: 510 ib_destroy_ah(port->msg->ah); 511 ib_free_send_mad(port->msg); 512 return ret; 513} 514 515static void add_update(struct sa_db_port *port, u8 *gid, 516 enum sa_update_type type) 517{ 518 struct update_info *update; 519 520 update = kmalloc(sizeof *update, GFP_KERNEL); 521 if (update) { 522 if (gid) 523 memcpy(&update->gid, gid, sizeof update->gid); 524 update->type = type; 525 list_add(&update->list, &port->update_list); 526 } 527 528 if (port->state == SA_DB_IDLE) { 529 port->state = SA_DB_REFRESH; 530 process_updates(port); 531 } 532} 533 534static void clean_update_list(struct sa_db_port *port) 535{ 536 struct update_info *update; 537 538 while (!list_empty(&port->update_list)) { 539 update = list_entry(port->update_list.next, 540 struct update_info, list); 541 list_del(&update->list); 542 kfree(update); 543 } 544} 545 546static int notice_handler(int status, struct ib_inform_info *info, 547 struct ib_sa_notice *notice) 548{ 549 struct sa_db_port *port = info->context; 550 struct ib_sa_notice_data_gid *gid_data; 551 struct ib_inform_info **pinfo; 552 enum sa_update_type type; 553 554 if (info->trap_number == IB_SA_SM_TRAP_GID_IN_SERVICE) { 555 pinfo = &port->in_info; 556 type = SA_UPDATE_ADD; 557 } else { 558 pinfo = &port->out_info; 559 type = SA_UPDATE_REMOVE; 560 } 561 562 mutex_lock(&lock); 563 if (port->state == SA_DB_DESTROY || !*pinfo) { 564 mutex_unlock(&lock); 565 return 0; 566 } 567 568 if (notice) { 569 gid_data = (struct ib_sa_notice_data_gid *) 570 ¬ice->data_details; 571 add_update(port, gid_data->gid, type); 572 mutex_unlock(&lock); 573 } else if (status == -ENETRESET) { 574 *pinfo = NULL; 575 mutex_unlock(&lock); 576 } else { 577 if (status) 578 *pinfo = ERR_PTR(-EINVAL); 579 port->state = SA_DB_IDLE; 580 clean_update_list(port); 581 mutex_unlock(&lock); 582 queue_work(sa_wq, &port->work); 583 } 584 585 return status; 586} 587 588static int reg_in_info(struct sa_db_port *port) 589{ 590 int ret = 0; 591 592 port->in_info = ib_sa_register_inform_info(&sa_client, 593 port->dev->device, 594 port->port_num, 595 IB_SA_SM_TRAP_GID_IN_SERVICE, 596 GFP_KERNEL, notice_handler, 597 port); 598 if (IS_ERR(port->in_info)) 599 ret = PTR_ERR(port->in_info); 600 601 return ret; 602} 603 604static int reg_out_info(struct sa_db_port *port) 605{ 606 int ret = 0; 607 608 port->out_info = ib_sa_register_inform_info(&sa_client, 609 port->dev->device, 610 port->port_num, 611 IB_SA_SM_TRAP_GID_OUT_OF_SERVICE, 612 GFP_KERNEL, notice_handler, 613 port); 614 if (IS_ERR(port->out_info)) 615 ret = PTR_ERR(port->out_info); 616 617 return ret; 618} 619 620static void unsubscribe_port(struct sa_db_port *port) 621{ 622 if (port->in_info && !IS_ERR(port->in_info)) 623 ib_sa_unregister_inform_info(port->in_info); 624 625 if (port->out_info && !IS_ERR(port->out_info)) 626 ib_sa_unregister_inform_info(port->out_info); 627 628 port->out_info = NULL; 629 port->in_info = NULL; 630 631} 632 633static void cleanup_port(struct sa_db_port *port) 634{ 635 unsubscribe_port(port); 636 637 clean_update_list(port); 638 remove_all_attrs(&port->paths); 639} 640 641static int update_port_info(struct sa_db_port *port) 642{ 643 struct ib_port_attr port_attr; 644 int ret; 645 646 ret = ib_query_port(port->dev->device, port->port_num, &port_attr); 647 if (ret) 648 return ret; 649 650 if (port_attr.state != IB_PORT_ACTIVE) 651 return -ENODATA; 652 653 port->sm_lid = port_attr.sm_lid; 654 port->sm_sl = port_attr.sm_sl; 655 return 0; 656} 657 658static void process_updates(struct sa_db_port *port) 659{ 660 struct update_info *update; 661 struct ib_sa_attr_list *attr_list; 662 int ret; 663 664 if (!paths_per_dest || update_port_info(port)) { 665 cleanup_port(port); 666 goto out; 667 } 668 669 /* Event registration is an optimization, so ignore failures. */ 670 if (subscribe_inform_info) { 671 if (!port->out_info) { 672 ret = reg_out_info(port); 673 if (!ret) 674 return; 675 } 676 677 if (!port->in_info) { 678 ret = reg_in_info(port); 679 if (!ret) 680 return; 681 } 682 } else 683 unsubscribe_port(port); 684 685 while (!list_empty(&port->update_list)) { 686 update = list_entry(port->update_list.next, 687 struct update_info, list); 688 689 if (update->type == SA_UPDATE_REMOVE) { 690 write_lock_irq(&rwlock); 691 attr_list = find_attr_list(&port->paths, 692 update->gid.raw); 693 if (attr_list) 694 remove_attr(&port->paths, attr_list); 695 write_unlock_irq(&rwlock); 696 } else { 697 ret = send_query(port, update); 698 if (!ret) 699 return; 700 701 } 702 list_del(&update->list); 703 kfree(update); 704 } 705out: 706 port->state = SA_DB_IDLE; 707} 708 709static void refresh_port_db(struct sa_db_port *port) 710{ 711 if (port->state == SA_DB_DESTROY) 712 return; 713 714 if (port->state == SA_DB_REFRESH) { 715 clean_update_list(port); 716 ib_cancel_mad(port->agent, port->msg); 717 } 718 719 add_update(port, NULL, SA_UPDATE_FULL); 720} 721 722static void refresh_dev_db(struct sa_db_device *dev) 723{ 724 int i; 725 726 for (i = 0; i < dev->port_count; i++) 727 refresh_port_db(&dev->port[i]); 728} 729 730static void refresh_db(void) 731{ 732 struct sa_db_device *dev; 733 734 list_for_each_entry(dev, &dev_list, list) 735 refresh_dev_db(dev); 736} 737 738static int do_refresh(const char *val, struct kernel_param *kp) 739{ 740 mutex_lock(&lock); 741 refresh_db(); 742 mutex_unlock(&lock); 743 return 0; 744} 745 746static int get_lookup_method(char *buf, struct kernel_param *kp) 747{ 748 return sprintf(buf, 749 "%c %d round robin\n" 750 "%c %d random", 751 (lookup_method == SA_DB_LOOKUP_LEAST_USED) ? '*' : ' ', 752 SA_DB_LOOKUP_LEAST_USED, 753 (lookup_method == SA_DB_LOOKUP_RANDOM) ? '*' : ' ', 754 SA_DB_LOOKUP_RANDOM); 755} 756 757static int set_lookup_method(const char *val, struct kernel_param *kp) 758{ 759 unsigned long method; 760 int ret = 0; 761 762 method = simple_strtoul(val, NULL, 0); 763 764 switch (method) { 765 case SA_DB_LOOKUP_LEAST_USED: 766 case SA_DB_LOOKUP_RANDOM: 767 lookup_method = method; 768 break; 769 default: 770 ret = -EINVAL; 771 break; 772 } 773 774 return ret; 775} 776 777static int set_paths_per_dest(const char *val, struct kernel_param *kp) 778{ 779 int ret; 780 781 mutex_lock(&lock); 782 ret = param_set_ulong(val, kp); 783 if (ret) 784 goto out; 785 786 if (paths_per_dest > SA_DB_MAX_PATHS_PER_DEST) 787 paths_per_dest = SA_DB_MAX_PATHS_PER_DEST; 788 refresh_db(); 789out: 790 mutex_unlock(&lock); 791 return ret; 792} 793 794static int set_subscribe_inform_info(const char *val, struct kernel_param *kp) 795{ 796 int ret; 797 798 ret = param_set_bool(val, kp); 799 if (ret) 800 return ret; 801 802 return do_refresh(val, kp); 803} 804 805static void port_work_handler(struct work_struct *work) 806{ 807 struct sa_db_port *port; 808 809 port = container_of(work, typeof(*port), work); 810 mutex_lock(&lock); 811 refresh_port_db(port); 812 mutex_unlock(&lock); 813} 814 815static void handle_event(struct ib_event_handler *event_handler, 816 struct ib_event *event) 817{ 818 struct sa_db_device *dev; 819 struct sa_db_port *port; 820 821 dev = container_of(event_handler, typeof(*dev), event_handler); 822 port = &dev->port[event->element.port_num - dev->start_port]; 823 824 switch (event->event) { 825 case IB_EVENT_PORT_ERR: 826 case IB_EVENT_LID_CHANGE: 827 case IB_EVENT_SM_CHANGE: 828 case IB_EVENT_CLIENT_REREGISTER: 829 case IB_EVENT_PKEY_CHANGE: 830 case IB_EVENT_PORT_ACTIVE: 831 queue_work(sa_wq, &port->work); 832 break; 833 default: 834 break; 835 } 836} 837 838static void ib_free_path_iter(struct ib_sa_attr_iter *iter) 839{ 840 read_unlock_irqrestore(&rwlock, iter->flags); 841} 842 843static int ib_create_path_iter(struct ib_device *device, u8 port_num, 844 union ib_gid *dgid, struct ib_sa_attr_iter *iter) 845{ 846 struct sa_db_device *dev; 847 struct sa_db_port *port; 848 struct ib_sa_attr_list *list; 849 850 dev = ib_get_client_data(device, &sa_db_client); 851 if (!dev) 852 return -ENODEV; 853 854 port = &dev->port[port_num - dev->start_port]; 855 856 read_lock_irqsave(&rwlock, iter->flags); 857 list = find_attr_list(&port->paths, dgid->raw); 858 if (!list) { 859 ib_free_path_iter(iter); 860 return -ENODATA; 861 } 862 863 iter->iter = &list->iter; 864 return 0; 865} 866 867static struct ib_sa_path_rec *ib_get_next_path(struct ib_sa_attr_iter *iter) 868{ 869 struct ib_path_rec_info *next_path; 870 871 iter->iter = iter->iter->next; 872 if (iter->iter) { 873 next_path = container_of(iter->iter, struct ib_path_rec_info, iter); 874 return &next_path->rec; 875 } else 876 return NULL; 877} 878 879static int cmp_rec(struct ib_sa_path_rec *src, 880 struct ib_sa_path_rec *dst, ib_sa_comp_mask comp_mask) 881{ 882 /* DGID check already done */ 883 if (comp_mask & IB_SA_PATH_REC_SGID && 884 memcmp(&src->sgid, &dst->sgid, sizeof src->sgid)) 885 return -EINVAL; 886 if (comp_mask & IB_SA_PATH_REC_DLID && src->dlid != dst->dlid) 887 return -EINVAL; 888 if (comp_mask & IB_SA_PATH_REC_SLID && src->slid != dst->slid) 889 return -EINVAL; 890 if (comp_mask & IB_SA_PATH_REC_RAW_TRAFFIC && 891 src->raw_traffic != dst->raw_traffic) 892 return -EINVAL; 893 894 if (comp_mask & IB_SA_PATH_REC_FLOW_LABEL && 895 src->flow_label != dst->flow_label) 896 return -EINVAL; 897 if (comp_mask & IB_SA_PATH_REC_HOP_LIMIT && 898 src->hop_limit != dst->hop_limit) 899 return -EINVAL; 900 if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS && 901 src->traffic_class != dst->traffic_class) 902 return -EINVAL; 903 if (comp_mask & IB_SA_PATH_REC_REVERSIBLE && 904 dst->reversible && !src->reversible) 905 return -EINVAL; 906 /* Numb path check already done */ 907 if (comp_mask & IB_SA_PATH_REC_PKEY && src->pkey != dst->pkey) 908 return -EINVAL; 909 910 if (comp_mask & IB_SA_PATH_REC_SL && src->sl != dst->sl) 911 return -EINVAL; 912 913 if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_MTU_SELECTOR, 914 IB_SA_PATH_REC_MTU, dst->mtu_selector, 915 src->mtu, dst->mtu)) 916 return -EINVAL; 917 if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_RATE_SELECTOR, 918 IB_SA_PATH_REC_RATE, dst->rate_selector, 919 src->rate, dst->rate)) 920 return -EINVAL; 921 if (ib_sa_check_selector(comp_mask, 922 IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR, 923 IB_SA_PATH_REC_PACKET_LIFE_TIME, 924 dst->packet_life_time_selector, 925 src->packet_life_time, dst->packet_life_time)) 926 return -EINVAL; 927 928 return 0; 929} 930 931static struct ib_sa_path_rec *get_random_path(struct ib_sa_attr_iter *iter, 932 struct ib_sa_path_rec *req_path, 933 ib_sa_comp_mask comp_mask) 934{ 935 struct ib_sa_path_rec *path, *rand_path = NULL; 936 int num, count = 0; 937 938 for (path = ib_get_next_path(iter); path; 939 path = ib_get_next_path(iter)) { 940 if (!cmp_rec(path, req_path, comp_mask)) { 941 get_random_bytes(&num, sizeof num); 942 if ((num % ++count) == 0) 943 rand_path = path; 944 } 945 } 946 947 return rand_path; 948} 949 950static struct ib_sa_path_rec *get_next_path(struct ib_sa_attr_iter *iter, 951 struct ib_sa_path_rec *req_path, 952 ib_sa_comp_mask comp_mask) 953{ 954 struct ib_path_rec_info *cur_path, *next_path = NULL; 955 struct ib_sa_path_rec *path; 956 unsigned long lookups = ~0; 957 958 for (path = ib_get_next_path(iter); path; 959 path = ib_get_next_path(iter)) { 960 if (!cmp_rec(path, req_path, comp_mask)) { 961 962 cur_path = container_of(iter->iter, struct ib_path_rec_info, 963 iter); 964 if (cur_path->lookups < lookups) { 965 lookups = cur_path->lookups; 966 next_path = cur_path; 967 } 968 } 969 } 970 971 if (next_path) { 972 next_path->lookups++; 973 return &next_path->rec; 974 } else 975 return NULL; 976} 977 978static void report_path(struct work_struct *work) 979{ 980 struct sa_path_request *req; 981 982 req = container_of(work, struct sa_path_request, work); 983 req->callback(0, &req->path_rec, req->context); 984 ib_sa_client_put(req->client); 985 kfree(req); 986} 987 988/** 989 * ib_sa_path_rec_get - Start a Path get query 990 * @client:SA client 991 * @device:device to send query on 992 * @port_num: port number to send query on 993 * @rec:Path Record to send in query 994 * @comp_mask:component mask to send in query 995 * @timeout_ms:time to wait for response 996 * @gfp_mask:GFP mask to use for internal allocations 997 * @callback:function called when query completes, times out or is 998 * canceled 999 * @context:opaque user context passed to callback 1000 * @sa_query:query context, used to cancel query 1001 * 1002 * Send a Path Record Get query to the SA to look up a path. The 1003 * callback function will be called when the query completes (or 1004 * fails); status is 0 for a successful response, -EINTR if the query 1005 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error 1006 * occurred sending the query. The resp parameter of the callback is 1007 * only valid if status is 0. 1008 * 1009 * If the return value of ib_sa_path_rec_get() is negative, it is an 1010 * error code. Otherwise it is a query ID that can be used to cancel 1011 * the query. 1012 */ 1013int ib_sa_path_rec_get(struct ib_sa_client *client, 1014 struct ib_device *device, u8 port_num, 1015 struct ib_sa_path_rec *rec, 1016 ib_sa_comp_mask comp_mask, 1017 int timeout_ms, gfp_t gfp_mask, 1018 void (*callback)(int status, 1019 struct ib_sa_path_rec *resp, 1020 void *context), 1021 void *context, 1022 struct ib_sa_query **sa_query) 1023{ 1024 struct sa_path_request *req; 1025 struct ib_sa_attr_iter iter; 1026 struct ib_sa_path_rec *path_rec; 1027 int ret; 1028 1029 if (!paths_per_dest) 1030 goto query_sa; 1031 1032 if (!(comp_mask & IB_SA_PATH_REC_DGID) || 1033 !(comp_mask & IB_SA_PATH_REC_NUMB_PATH) || rec->numb_path != 1) 1034 goto query_sa; 1035 1036 req = kmalloc(sizeof *req, gfp_mask); 1037 if (!req) 1038 goto query_sa; 1039 1040 ret = ib_create_path_iter(device, port_num, &rec->dgid, &iter); 1041 if (ret) 1042 goto free_req; 1043 1044 if (lookup_method == SA_DB_LOOKUP_RANDOM) 1045 path_rec = get_random_path(&iter, rec, comp_mask); 1046 else 1047 path_rec = get_next_path(&iter, rec, comp_mask); 1048 1049 if (!path_rec) 1050 goto free_iter; 1051 1052 memcpy(&req->path_rec, path_rec, sizeof *path_rec); 1053 ib_free_path_iter(&iter); 1054 1055 INIT_WORK(&req->work, report_path); 1056 req->client = client; 1057 req->callback = callback; 1058 req->context = context; 1059 1060 ib_sa_client_get(client); 1061 queue_work(sa_wq, &req->work); 1062 *sa_query = ERR_PTR(-EEXIST); 1063 return 0; 1064 1065free_iter: 1066 ib_free_path_iter(&iter); 1067free_req: 1068 kfree(req); 1069query_sa: 1070 return ib_sa_path_rec_query(client, device, port_num, rec, comp_mask, 1071 timeout_ms, gfp_mask, callback, context, 1072 sa_query); 1073} 1074EXPORT_SYMBOL(ib_sa_path_rec_get); 1075 1076static void recv_handler(struct ib_mad_agent *mad_agent, 1077 struct ib_mad_recv_wc *mad_recv_wc) 1078{ 1079 struct sa_db_port *port; 1080 struct update_info *update; 1081 struct ib_mad_send_buf *msg; 1082 enum sa_update_type type; 1083 1084 msg = (struct ib_mad_send_buf *) (unsigned long) mad_recv_wc->wc->wr_id; 1085 port = msg->context[0]; 1086 update = msg->context[1]; 1087 1088 mutex_lock(&lock); 1089 if (port->state == SA_DB_DESTROY || 1090 update != list_entry(port->update_list.next, 1091 struct update_info, list)) { 1092 mutex_unlock(&lock); 1093 } else { 1094 type = update->type; 1095 mutex_unlock(&lock); 1096 update_path_db(mad_agent->context, mad_recv_wc, type); 1097 } 1098 1099 ib_free_recv_mad(mad_recv_wc); 1100} 1101 1102static void send_handler(struct ib_mad_agent *agent, 1103 struct ib_mad_send_wc *mad_send_wc) 1104{ 1105 struct ib_mad_send_buf *msg; 1106 struct sa_db_port *port; 1107 struct update_info *update; 1108 int ret; 1109 1110 msg = mad_send_wc->send_buf; 1111 port = msg->context[0]; 1112 update = msg->context[1]; 1113 1114 mutex_lock(&lock); 1115 if (port->state == SA_DB_DESTROY) 1116 goto unlock; 1117 1118 if (update == list_entry(port->update_list.next, 1119 struct update_info, list)) { 1120 1121 if (mad_send_wc->status == IB_WC_RESP_TIMEOUT_ERR && 1122 msg->timeout_ms < SA_DB_MAX_RETRY_TIMER) { 1123 1124 msg->timeout_ms <<= 1; 1125 ret = ib_post_send_mad(msg, NULL); 1126 if (!ret) { 1127 mutex_unlock(&lock); 1128 return; 1129 } 1130 } 1131 list_del(&update->list); 1132 kfree(update); 1133 } 1134 process_updates(port); 1135unlock: 1136 mutex_unlock(&lock); 1137 1138 ib_destroy_ah(msg->ah); 1139 ib_free_send_mad(msg); 1140} 1141 1142static int init_port(struct sa_db_device *dev, int port_num) 1143{ 1144 struct sa_db_port *port; 1145 int ret; 1146 1147 port = &dev->port[port_num - dev->start_port]; 1148 port->dev = dev; 1149 port->port_num = port_num; 1150 INIT_WORK(&port->work, port_work_handler); 1151 port->paths = RB_ROOT; 1152 INIT_LIST_HEAD(&port->update_list); 1153 1154 ret = ib_get_cached_gid(dev->device, port_num, 0, &port->gid); 1155 if (ret) 1156 return ret; 1157 1158 port->agent = ib_register_mad_agent(dev->device, port_num, IB_QPT_GSI, 1159 NULL, IB_MGMT_RMPP_VERSION, 1160 send_handler, recv_handler, port); 1161 if (IS_ERR(port->agent)) 1162 ret = PTR_ERR(port->agent); 1163 1164 return ret; 1165} 1166 1167static void destroy_port(struct sa_db_port *port) 1168{ 1169 mutex_lock(&lock); 1170 port->state = SA_DB_DESTROY; 1171 mutex_unlock(&lock); 1172 1173 ib_unregister_mad_agent(port->agent); 1174 cleanup_port(port); 1175 flush_workqueue(sa_wq); 1176} 1177 1178static void sa_db_add_dev(struct ib_device *device) 1179{ 1180 struct sa_db_device *dev; 1181 struct sa_db_port *port; 1182 int s, e, i, ret; 1183 1184 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 1185 return; 1186 1187 if (device->node_type == RDMA_NODE_IB_SWITCH) { 1188 s = e = 0; 1189 } else { 1190 s = 1; 1191 e = device->phys_port_cnt; 1192 } 1193 1194 dev = kzalloc(sizeof *dev + (e - s + 1) * sizeof *port, GFP_KERNEL); 1195 if (!dev) 1196 return; 1197 1198 dev->start_port = s; 1199 dev->port_count = e - s + 1; 1200 dev->device = device; 1201 for (i = 0; i < dev->port_count; i++) { 1202 ret = init_port(dev, s + i); 1203 if (ret) 1204 goto err; 1205 } 1206 1207 ib_set_client_data(device, &sa_db_client, dev); 1208 1209 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event); 1210 1211 mutex_lock(&lock); 1212 list_add_tail(&dev->list, &dev_list); 1213 refresh_dev_db(dev); 1214 mutex_unlock(&lock); 1215 1216 ib_register_event_handler(&dev->event_handler); 1217 return; 1218err: 1219 while (i--) 1220 destroy_port(&dev->port[i]); 1221 kfree(dev); 1222} 1223 1224static void sa_db_remove_dev(struct ib_device *device) 1225{ 1226 struct sa_db_device *dev; 1227 int i; 1228 1229 dev = ib_get_client_data(device, &sa_db_client); 1230 if (!dev) 1231 return; 1232 1233 ib_unregister_event_handler(&dev->event_handler); 1234 flush_workqueue(sa_wq); 1235 1236 for (i = 0; i < dev->port_count; i++) 1237 destroy_port(&dev->port[i]); 1238 1239 mutex_lock(&lock); 1240 list_del(&dev->list); 1241 mutex_unlock(&lock); 1242 1243 kfree(dev); 1244} 1245 1246int sa_db_init(void) 1247{ 1248 int ret; 1249 1250 rwlock_init(&rwlock); 1251 sa_wq = create_singlethread_workqueue("local_sa"); 1252 if (!sa_wq) 1253 return -ENOMEM; 1254 1255 ib_sa_register_client(&sa_client); 1256 ret = ib_register_client(&sa_db_client); 1257 if (ret) 1258 goto err; 1259 1260 return 0; 1261 1262err: 1263 ib_sa_unregister_client(&sa_client); 1264 destroy_workqueue(sa_wq); 1265 return ret; 1266} 1267 1268void sa_db_cleanup(void) 1269{ 1270 ib_unregister_client(&sa_db_client); 1271 ib_sa_unregister_client(&sa_client); 1272 destroy_workqueue(sa_wq); 1273} 1274