1/* 2 * Copyright (c) 2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/completion.h> 34#include <linux/dma-mapping.h> 35#include <linux/err.h> 36#include <linux/interrupt.h> 37#include <linux/pci.h> 38#include <linux/bitops.h> 39#include <linux/random.h> 40 41#include "sa.h" 42 43MODULE_AUTHOR("Sean Hefty"); 44MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling"); 45MODULE_LICENSE("Dual BSD/GPL"); 46 47static void inform_add_one(struct ib_device *device); 48static void inform_remove_one(struct ib_device *device); 49 50static struct ib_client inform_client = { 51 .name = "ib_notice", 52 .add = inform_add_one, 53 .remove = inform_remove_one 54}; 55 56static struct ib_sa_client sa_client; 57static struct workqueue_struct *inform_wq; 58 59struct inform_device; 60 61struct inform_port { 62 struct inform_device *dev; 63 spinlock_t lock; 64 struct rb_root table; 65 atomic_t refcount; 66 struct completion comp; 67 u8 port_num; 68}; 69 70struct inform_device { 71 struct ib_device *device; 72 struct ib_event_handler event_handler; 73 int start_port; 74 int end_port; 75 struct inform_port port[0]; 76}; 77 78enum inform_state { 79 INFORM_IDLE, 80 INFORM_REGISTERING, 81 INFORM_MEMBER, 82 INFORM_BUSY, 83 INFORM_ERROR 84}; 85 86struct inform_member; 87 88struct inform_group { 89 u16 trap_number; 90 struct rb_node node; 91 struct inform_port *port; 92 spinlock_t lock; 93 struct work_struct work; 94 struct list_head pending_list; 95 struct list_head active_list; 96 struct list_head notice_list; 97 struct inform_member *last_join; 98 int members; 99 enum inform_state join_state; /* State relative to SA */ 100 atomic_t refcount; 101 enum inform_state state; 102 struct ib_sa_query *query; 103 int query_id; 104}; 105 106struct inform_member { 107 struct ib_inform_info info; 108 struct ib_sa_client *client; 109 struct inform_group *group; 110 struct list_head list; 111 enum inform_state state; 112 atomic_t refcount; 113 struct completion comp; 114}; 115 116struct inform_notice { 117 struct list_head list; 118 struct ib_sa_notice notice; 119}; 120 121static void reg_handler(int status, struct ib_sa_inform *inform, 122 void *context); 123static void unreg_handler(int status, struct ib_sa_inform *inform, 124 void *context); 125 126static struct inform_group *inform_find(struct inform_port *port, 127 u16 trap_number) 128{ 129 struct rb_node *node = port->table.rb_node; 130 struct inform_group *group; 131 132 while (node) { 133 group = rb_entry(node, struct inform_group, node); 134 if (trap_number < group->trap_number) 135 node = node->rb_left; 136 else if (trap_number > group->trap_number) 137 node = node->rb_right; 138 else 139 return group; 140 } 141 return NULL; 142} 143 144static struct inform_group *inform_insert(struct inform_port *port, 145 struct inform_group *group) 146{ 147 struct rb_node **link = &port->table.rb_node; 148 struct rb_node *parent = NULL; 149 struct inform_group *cur_group; 150 151 while (*link) { 152 parent = *link; 153 cur_group = rb_entry(parent, struct inform_group, node); 154 if (group->trap_number < cur_group->trap_number) 155 link = &(*link)->rb_left; 156 else if (group->trap_number > cur_group->trap_number) 157 link = &(*link)->rb_right; 158 else 159 return cur_group; 160 } 161 rb_link_node(&group->node, parent, link); 162 rb_insert_color(&group->node, &port->table); 163 return NULL; 164} 165 166static void deref_port(struct inform_port *port) 167{ 168 if (atomic_dec_and_test(&port->refcount)) 169 complete(&port->comp); 170} 171 172static void release_group(struct inform_group *group) 173{ 174 struct inform_port *port = group->port; 175 unsigned long flags; 176 177 spin_lock_irqsave(&port->lock, flags); 178 if (atomic_dec_and_test(&group->refcount)) { 179 rb_erase(&group->node, &port->table); 180 spin_unlock_irqrestore(&port->lock, flags); 181 kfree(group); 182 deref_port(port); 183 } else 184 spin_unlock_irqrestore(&port->lock, flags); 185} 186 187static void deref_member(struct inform_member *member) 188{ 189 if (atomic_dec_and_test(&member->refcount)) 190 complete(&member->comp); 191} 192 193static void queue_reg(struct inform_member *member) 194{ 195 struct inform_group *group = member->group; 196 unsigned long flags; 197 198 spin_lock_irqsave(&group->lock, flags); 199 list_add(&member->list, &group->pending_list); 200 if (group->state == INFORM_IDLE) { 201 group->state = INFORM_BUSY; 202 atomic_inc(&group->refcount); 203 queue_work(inform_wq, &group->work); 204 } 205 spin_unlock_irqrestore(&group->lock, flags); 206} 207 208static int send_reg(struct inform_group *group, struct inform_member *member) 209{ 210 struct inform_port *port = group->port; 211 struct ib_sa_inform inform; 212 int ret; 213 214 memset(&inform, 0, sizeof inform); 215 inform.lid_range_begin = cpu_to_be16(0xFFFF); 216 inform.is_generic = 1; 217 inform.subscribe = 1; 218 inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); 219 inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number); 220 inform.trap.generic.resp_time = 19; 221 inform.trap.generic.producer_type = 222 cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL); 223 224 group->last_join = member; 225 ret = ib_sa_informinfo_query(&sa_client, port->dev->device, 226 port->port_num, &inform, 3000, GFP_KERNEL, 227 reg_handler, group,&group->query); 228 if (ret >= 0) { 229 group->query_id = ret; 230 ret = 0; 231 } 232 return ret; 233} 234 235static int send_unreg(struct inform_group *group) 236{ 237 struct inform_port *port = group->port; 238 struct ib_sa_inform inform; 239 int ret; 240 241 memset(&inform, 0, sizeof inform); 242 inform.lid_range_begin = cpu_to_be16(0xFFFF); 243 inform.is_generic = 1; 244 inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); 245 inform.trap.generic.trap_num = cpu_to_be16(group->trap_number); 246 inform.trap.generic.qpn = IB_QP1; 247 inform.trap.generic.resp_time = 19; 248 inform.trap.generic.producer_type = 249 cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL); 250 251 ret = ib_sa_informinfo_query(&sa_client, port->dev->device, 252 port->port_num, &inform, 3000, GFP_KERNEL, 253 unreg_handler, group, &group->query); 254 if (ret >= 0) { 255 group->query_id = ret; 256 ret = 0; 257 } 258 return ret; 259} 260 261static void join_group(struct inform_group *group, struct inform_member *member) 262{ 263 member->state = INFORM_MEMBER; 264 group->members++; 265 list_move(&member->list, &group->active_list); 266} 267 268static int fail_join(struct inform_group *group, struct inform_member *member, 269 int status) 270{ 271 spin_lock_irq(&group->lock); 272 list_del_init(&member->list); 273 spin_unlock_irq(&group->lock); 274 return member->info.callback(status, &member->info, NULL); 275} 276 277static void process_group_error(struct inform_group *group) 278{ 279 struct inform_member *member; 280 int ret; 281 282 spin_lock_irq(&group->lock); 283 while (!list_empty(&group->active_list)) { 284 member = list_entry(group->active_list.next, 285 struct inform_member, list); 286 atomic_inc(&member->refcount); 287 list_del_init(&member->list); 288 group->members--; 289 member->state = INFORM_ERROR; 290 spin_unlock_irq(&group->lock); 291 292 ret = member->info.callback(-ENETRESET, &member->info, NULL); 293 deref_member(member); 294 if (ret) 295 ib_sa_unregister_inform_info(&member->info); 296 spin_lock_irq(&group->lock); 297 } 298 299 group->join_state = INFORM_IDLE; 300 group->state = INFORM_BUSY; 301 spin_unlock_irq(&group->lock); 302} 303 304/* 305 * Report a notice to all active subscribers. We use a temporary list to 306 * handle unsubscription requests while the notice is being reported, which 307 * avoids holding the group lock while in the user's callback. 308 */ 309static void process_notice(struct inform_group *group, 310 struct inform_notice *info_notice) 311{ 312 struct inform_member *member; 313 struct list_head list; 314 int ret; 315 316 INIT_LIST_HEAD(&list); 317 318 spin_lock_irq(&group->lock); 319 list_splice_init(&group->active_list, &list); 320 while (!list_empty(&list)) { 321 322 member = list_entry(list.next, struct inform_member, list); 323 atomic_inc(&member->refcount); 324 list_move(&member->list, &group->active_list); 325 spin_unlock_irq(&group->lock); 326 327 ret = member->info.callback(0, &member->info, 328 &info_notice->notice); 329 deref_member(member); 330 if (ret) 331 ib_sa_unregister_inform_info(&member->info); 332 spin_lock_irq(&group->lock); 333 } 334 spin_unlock_irq(&group->lock); 335} 336 337static void inform_work_handler(struct work_struct *work) 338{ 339 struct inform_group *group; 340 struct inform_member *member; 341 struct ib_inform_info *info; 342 struct inform_notice *info_notice; 343 int status, ret; 344 345 group = container_of(work, typeof(*group), work); 346retest: 347 spin_lock_irq(&group->lock); 348 while (!list_empty(&group->pending_list) || 349 !list_empty(&group->notice_list) || 350 (group->state == INFORM_ERROR)) { 351 352 if (group->state == INFORM_ERROR) { 353 spin_unlock_irq(&group->lock); 354 process_group_error(group); 355 goto retest; 356 } 357 358 if (!list_empty(&group->notice_list)) { 359 info_notice = list_entry(group->notice_list.next, 360 struct inform_notice, list); 361 list_del(&info_notice->list); 362 spin_unlock_irq(&group->lock); 363 process_notice(group, info_notice); 364 kfree(info_notice); 365 goto retest; 366 } 367 368 member = list_entry(group->pending_list.next, 369 struct inform_member, list); 370 info = &member->info; 371 atomic_inc(&member->refcount); 372 373 if (group->join_state == INFORM_MEMBER) { 374 join_group(group, member); 375 spin_unlock_irq(&group->lock); 376 ret = info->callback(0, info, NULL); 377 } else { 378 spin_unlock_irq(&group->lock); 379 status = send_reg(group, member); 380 if (!status) { 381 deref_member(member); 382 return; 383 } 384 ret = fail_join(group, member, status); 385 } 386 387 deref_member(member); 388 if (ret) 389 ib_sa_unregister_inform_info(&member->info); 390 spin_lock_irq(&group->lock); 391 } 392 393 if (!group->members && (group->join_state == INFORM_MEMBER)) { 394 group->join_state = INFORM_IDLE; 395 spin_unlock_irq(&group->lock); 396 if (send_unreg(group)) 397 goto retest; 398 } else { 399 group->state = INFORM_IDLE; 400 spin_unlock_irq(&group->lock); 401 release_group(group); 402 } 403} 404 405/* 406 * Fail a join request if it is still active - at the head of the pending queue. 407 */ 408static void process_join_error(struct inform_group *group, int status) 409{ 410 struct inform_member *member; 411 int ret; 412 413 spin_lock_irq(&group->lock); 414 member = list_entry(group->pending_list.next, 415 struct inform_member, list); 416 if (group->last_join == member) { 417 atomic_inc(&member->refcount); 418 list_del_init(&member->list); 419 spin_unlock_irq(&group->lock); 420 ret = member->info.callback(status, &member->info, NULL); 421 deref_member(member); 422 if (ret) 423 ib_sa_unregister_inform_info(&member->info); 424 } else 425 spin_unlock_irq(&group->lock); 426} 427 428static void reg_handler(int status, struct ib_sa_inform *inform, void *context) 429{ 430 struct inform_group *group = context; 431 432 if (status) 433 process_join_error(group, status); 434 else 435 group->join_state = INFORM_MEMBER; 436 437 inform_work_handler(&group->work); 438} 439 440static void unreg_handler(int status, struct ib_sa_inform *rec, void *context) 441{ 442 struct inform_group *group = context; 443 444 inform_work_handler(&group->work); 445} 446 447int notice_dispatch(struct ib_device *device, u8 port_num, 448 struct ib_sa_notice *notice) 449{ 450 struct inform_device *dev; 451 struct inform_port *port; 452 struct inform_group *group; 453 struct inform_notice *info_notice; 454 455 dev = ib_get_client_data(device, &inform_client); 456 if (!dev) 457 return 0; /* No one to give notice to. */ 458 459 port = &dev->port[port_num - dev->start_port]; 460 spin_lock_irq(&port->lock); 461 group = inform_find(port, __be16_to_cpu(notice->trap. 462 generic.trap_num)); 463 if (!group) { 464 spin_unlock_irq(&port->lock); 465 return 0; 466 } 467 468 atomic_inc(&group->refcount); 469 spin_unlock_irq(&port->lock); 470 471 info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL); 472 if (!info_notice) { 473 release_group(group); 474 return -ENOMEM; 475 } 476 477 info_notice->notice = *notice; 478 479 spin_lock_irq(&group->lock); 480 list_add(&info_notice->list, &group->notice_list); 481 if (group->state == INFORM_IDLE) { 482 group->state = INFORM_BUSY; 483 spin_unlock_irq(&group->lock); 484 inform_work_handler(&group->work); 485 } else { 486 spin_unlock_irq(&group->lock); 487 release_group(group); 488 } 489 490 return 0; 491} 492 493static struct inform_group *acquire_group(struct inform_port *port, 494 u16 trap_number, gfp_t gfp_mask) 495{ 496 struct inform_group *group, *cur_group; 497 unsigned long flags; 498 499 spin_lock_irqsave(&port->lock, flags); 500 group = inform_find(port, trap_number); 501 if (group) 502 goto found; 503 spin_unlock_irqrestore(&port->lock, flags); 504 505 group = kzalloc(sizeof *group, gfp_mask); 506 if (!group) 507 return NULL; 508 509 group->port = port; 510 group->trap_number = trap_number; 511 INIT_LIST_HEAD(&group->pending_list); 512 INIT_LIST_HEAD(&group->active_list); 513 INIT_LIST_HEAD(&group->notice_list); 514 INIT_WORK(&group->work, inform_work_handler); 515 spin_lock_init(&group->lock); 516 517 spin_lock_irqsave(&port->lock, flags); 518 cur_group = inform_insert(port, group); 519 if (cur_group) { 520 kfree(group); 521 group = cur_group; 522 } else 523 atomic_inc(&port->refcount); 524found: 525 atomic_inc(&group->refcount); 526 spin_unlock_irqrestore(&port->lock, flags); 527 return group; 528} 529 530/* 531 * We serialize all join requests to a single group to make our lives much 532 * easier. Otherwise, two users could try to join the same group 533 * simultaneously, with different configurations, one could leave while the 534 * join is in progress, etc., which makes locking around error recovery 535 * difficult. 536 */ 537struct ib_inform_info * 538ib_sa_register_inform_info(struct ib_sa_client *client, 539 struct ib_device *device, u8 port_num, 540 u16 trap_number, gfp_t gfp_mask, 541 int (*callback)(int status, 542 struct ib_inform_info *info, 543 struct ib_sa_notice *notice), 544 void *context) 545{ 546 struct inform_device *dev; 547 struct inform_member *member; 548 struct ib_inform_info *info; 549 int ret; 550 551 dev = ib_get_client_data(device, &inform_client); 552 if (!dev) 553 return ERR_PTR(-ENODEV); 554 555 member = kzalloc(sizeof *member, gfp_mask); 556 if (!member) 557 return ERR_PTR(-ENOMEM); 558 559 ib_sa_client_get(client); 560 member->client = client; 561 member->info.trap_number = trap_number; 562 member->info.callback = callback; 563 member->info.context = context; 564 init_completion(&member->comp); 565 atomic_set(&member->refcount, 1); 566 member->state = INFORM_REGISTERING; 567 568 member->group = acquire_group(&dev->port[port_num - dev->start_port], 569 trap_number, gfp_mask); 570 if (!member->group) { 571 ret = -ENOMEM; 572 goto err; 573 } 574 575 /* 576 * The user will get the info structure in their callback. They 577 * could then free the info structure before we can return from 578 * this routine. So we save the pointer to return before queuing 579 * any callback. 580 */ 581 info = &member->info; 582 queue_reg(member); 583 return info; 584 585err: 586 ib_sa_client_put(member->client); 587 kfree(member); 588 return ERR_PTR(ret); 589} 590EXPORT_SYMBOL(ib_sa_register_inform_info); 591 592void ib_sa_unregister_inform_info(struct ib_inform_info *info) 593{ 594 struct inform_member *member; 595 struct inform_group *group; 596 597 member = container_of(info, struct inform_member, info); 598 group = member->group; 599 600 spin_lock_irq(&group->lock); 601 if (member->state == INFORM_MEMBER) 602 group->members--; 603 604 list_del_init(&member->list); 605 606 if (group->state == INFORM_IDLE) { 607 group->state = INFORM_BUSY; 608 spin_unlock_irq(&group->lock); 609 /* Continue to hold reference on group until callback */ 610 queue_work(inform_wq, &group->work); 611 } else { 612 spin_unlock_irq(&group->lock); 613 release_group(group); 614 } 615 616 deref_member(member); 617 wait_for_completion(&member->comp); 618 ib_sa_client_put(member->client); 619 kfree(member); 620} 621EXPORT_SYMBOL(ib_sa_unregister_inform_info); 622 623static void inform_groups_lost(struct inform_port *port) 624{ 625 struct inform_group *group; 626 struct rb_node *node; 627 unsigned long flags; 628 629 spin_lock_irqsave(&port->lock, flags); 630 for (node = rb_first(&port->table); node; node = rb_next(node)) { 631 group = rb_entry(node, struct inform_group, node); 632 spin_lock(&group->lock); 633 if (group->state == INFORM_IDLE) { 634 atomic_inc(&group->refcount); 635 queue_work(inform_wq, &group->work); 636 } 637 group->state = INFORM_ERROR; 638 spin_unlock(&group->lock); 639 } 640 spin_unlock_irqrestore(&port->lock, flags); 641} 642 643static void inform_event_handler(struct ib_event_handler *handler, 644 struct ib_event *event) 645{ 646 struct inform_device *dev; 647 648 dev = container_of(handler, struct inform_device, event_handler); 649 650 switch (event->event) { 651 case IB_EVENT_PORT_ERR: 652 case IB_EVENT_LID_CHANGE: 653 case IB_EVENT_SM_CHANGE: 654 case IB_EVENT_CLIENT_REREGISTER: 655 inform_groups_lost(&dev->port[event->element.port_num - 656 dev->start_port]); 657 break; 658 default: 659 break; 660 } 661} 662 663static void inform_add_one(struct ib_device *device) 664{ 665 struct inform_device *dev; 666 struct inform_port *port; 667 int i; 668 669 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 670 return; 671 672 dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, 673 GFP_KERNEL); 674 if (!dev) 675 return; 676 677 if (device->node_type == RDMA_NODE_IB_SWITCH) 678 dev->start_port = dev->end_port = 0; 679 else { 680 dev->start_port = 1; 681 dev->end_port = device->phys_port_cnt; 682 } 683 684 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 685 port = &dev->port[i]; 686 port->dev = dev; 687 port->port_num = dev->start_port + i; 688 spin_lock_init(&port->lock); 689 port->table = RB_ROOT; 690 init_completion(&port->comp); 691 atomic_set(&port->refcount, 1); 692 } 693 694 dev->device = device; 695 ib_set_client_data(device, &inform_client, dev); 696 697 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler); 698 ib_register_event_handler(&dev->event_handler); 699} 700 701static void inform_remove_one(struct ib_device *device) 702{ 703 struct inform_device *dev; 704 struct inform_port *port; 705 int i; 706 707 dev = ib_get_client_data(device, &inform_client); 708 if (!dev) 709 return; 710 711 ib_unregister_event_handler(&dev->event_handler); 712 flush_workqueue(inform_wq); 713 714 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 715 port = &dev->port[i]; 716 deref_port(port); 717 wait_for_completion(&port->comp); 718 } 719 720 kfree(dev); 721} 722 723int notice_init(void) 724{ 725 int ret; 726 727 inform_wq = create_singlethread_workqueue("ib_inform"); 728 if (!inform_wq) 729 return -ENOMEM; 730 731 ib_sa_register_client(&sa_client); 732 733 ret = ib_register_client(&inform_client); 734 if (ret) 735 goto err; 736 return 0; 737 738err: 739 ib_sa_unregister_client(&sa_client); 740 destroy_workqueue(inform_wq); 741 return ret; 742} 743 744void notice_cleanup(void) 745{ 746 ib_unregister_client(&inform_client); 747 ib_sa_unregister_client(&sa_client); 748 destroy_workqueue(inform_wq); 749} 750