1/* 2 * Copyright (c) 2006 Intel Corporation. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33#include <linux/completion.h> 34#include <linux/dma-mapping.h> 35#include <linux/err.h> 36#include <linux/interrupt.h> 37#include <linux/bitops.h> 38#include <linux/random.h> 39 40#include <rdma/ib_cache.h> 41#include "sa.h" 42 43static void mcast_add_one(struct ib_device *device); 44static void mcast_remove_one(struct ib_device *device); 45 46static struct ib_client mcast_client = { 47 .name = "ib_multicast", 48 .add = mcast_add_one, 49 .remove = mcast_remove_one 50}; 51 52static struct ib_sa_client sa_client; 53static struct workqueue_struct *mcast_wq; 54static union ib_gid mgid0; 55 56struct mcast_device; 57 58struct mcast_port { 59 struct mcast_device *dev; 60 spinlock_t lock; 61 struct rb_root table; 62 atomic_t refcount; 63 struct completion comp; 64 u8 port_num; 65}; 66 67struct mcast_device { 68 struct ib_device *device; 69 struct ib_event_handler event_handler; 70 int start_port; 71 int end_port; 72 struct mcast_port port[0]; 73}; 74 75enum mcast_state { 76 MCAST_JOINING, 77 MCAST_MEMBER, 78 MCAST_ERROR, 79}; 80 81enum mcast_group_state { 82 MCAST_IDLE, 83 MCAST_BUSY, 84 MCAST_GROUP_ERROR, 85 MCAST_PKEY_EVENT 86}; 87 88enum { 89 MCAST_INVALID_PKEY_INDEX = 0xFFFF 90}; 91 92struct mcast_member; 93 94struct mcast_group { 95 struct ib_sa_mcmember_rec rec; 96 struct rb_node node; 97 struct mcast_port *port; 98 spinlock_t lock; 99 struct work_struct work; 100 struct list_head pending_list; 101 struct list_head active_list; 102 struct mcast_member *last_join; 103 int members[3]; 104 atomic_t refcount; 105 enum mcast_group_state state; 106 struct ib_sa_query *query; 107 int query_id; 108 u16 pkey_index; 109 u8 leave_state; 110 int retries; 111}; 112 113struct mcast_member { 114 struct ib_sa_multicast multicast; 115 struct ib_sa_client *client; 116 struct mcast_group *group; 117 struct list_head list; 118 enum mcast_state state; 119 atomic_t refcount; 120 struct completion comp; 121}; 122 123static void join_handler(int status, struct ib_sa_mcmember_rec *rec, 124 void *context); 125static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, 126 void *context); 127 128static struct mcast_group *mcast_find(struct mcast_port *port, 129 union ib_gid *mgid) 130{ 131 struct rb_node *node = port->table.rb_node; 132 struct mcast_group *group; 133 int ret; 134 135 while (node) { 136 group = rb_entry(node, struct mcast_group, node); 137 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); 138 if (!ret) 139 return group; 140 141 if (ret < 0) 142 node = node->rb_left; 143 else 144 node = node->rb_right; 145 } 146 return NULL; 147} 148 149static struct mcast_group *mcast_insert(struct mcast_port *port, 150 struct mcast_group *group, 151 int allow_duplicates) 152{ 153 struct rb_node **link = &port->table.rb_node; 154 struct rb_node *parent = NULL; 155 struct mcast_group *cur_group; 156 int ret; 157 158 while (*link) { 159 parent = *link; 160 cur_group = rb_entry(parent, struct mcast_group, node); 161 162 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, 163 sizeof group->rec.mgid); 164 if (ret < 0) 165 link = &(*link)->rb_left; 166 else if (ret > 0) 167 link = &(*link)->rb_right; 168 else if (allow_duplicates) 169 link = &(*link)->rb_left; 170 else 171 return cur_group; 172 } 173 rb_link_node(&group->node, parent, link); 174 rb_insert_color(&group->node, &port->table); 175 return NULL; 176} 177 178static void deref_port(struct mcast_port *port) 179{ 180 if (atomic_dec_and_test(&port->refcount)) 181 complete(&port->comp); 182} 183 184static void release_group(struct mcast_group *group) 185{ 186 struct mcast_port *port = group->port; 187 unsigned long flags; 188 189 spin_lock_irqsave(&port->lock, flags); 190 if (atomic_dec_and_test(&group->refcount)) { 191 rb_erase(&group->node, &port->table); 192 spin_unlock_irqrestore(&port->lock, flags); 193 kfree(group); 194 deref_port(port); 195 } else 196 spin_unlock_irqrestore(&port->lock, flags); 197} 198 199static void deref_member(struct mcast_member *member) 200{ 201 if (atomic_dec_and_test(&member->refcount)) 202 complete(&member->comp); 203} 204 205static void queue_join(struct mcast_member *member) 206{ 207 struct mcast_group *group = member->group; 208 unsigned long flags; 209 210 spin_lock_irqsave(&group->lock, flags); 211 list_add_tail(&member->list, &group->pending_list); 212 if (group->state == MCAST_IDLE) { 213 group->state = MCAST_BUSY; 214 atomic_inc(&group->refcount); 215 queue_work(mcast_wq, &group->work); 216 } 217 spin_unlock_irqrestore(&group->lock, flags); 218} 219 220/* 221 * A multicast group has three types of members: full member, non member, and 222 * send only member. We need to keep track of the number of members of each 223 * type based on their join state. Adjust the number of members the belong to 224 * the specified join states. 225 */ 226static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) 227{ 228 int i; 229 230 for (i = 0; i < 3; i++, join_state >>= 1) 231 if (join_state & 0x1) 232 group->members[i] += inc; 233} 234 235/* 236 * If a multicast group has zero members left for a particular join state, but 237 * the group is still a member with the SA, we need to leave that join state. 238 * Determine which join states we still belong to, but that do not have any 239 * active members. 240 */ 241static u8 get_leave_state(struct mcast_group *group) 242{ 243 u8 leave_state = 0; 244 int i; 245 246 for (i = 0; i < 3; i++) 247 if (!group->members[i]) 248 leave_state |= (0x1 << i); 249 250 return leave_state & group->rec.join_state; 251} 252 253static int cmp_rec(struct ib_sa_mcmember_rec *src, 254 struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) 255{ 256 /* MGID must already match */ 257 258 if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && 259 memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) 260 return -EINVAL; 261 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) 262 return -EINVAL; 263 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) 264 return -EINVAL; 265 if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, 266 IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, 267 src->mtu, dst->mtu)) 268 return -EINVAL; 269 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && 270 src->traffic_class != dst->traffic_class) 271 return -EINVAL; 272 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) 273 return -EINVAL; 274 if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, 275 IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, 276 src->rate, dst->rate)) 277 return -EINVAL; 278 if (ib_sa_check_selector(comp_mask, 279 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, 280 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, 281 dst->packet_life_time_selector, 282 src->packet_life_time, dst->packet_life_time)) 283 return -EINVAL; 284 if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) 285 return -EINVAL; 286 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && 287 src->flow_label != dst->flow_label) 288 return -EINVAL; 289 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && 290 src->hop_limit != dst->hop_limit) 291 return -EINVAL; 292 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) 293 return -EINVAL; 294 295 /* join_state checked separately, proxy_join ignored */ 296 297 return 0; 298} 299 300static int send_join(struct mcast_group *group, struct mcast_member *member) 301{ 302 struct mcast_port *port = group->port; 303 int ret; 304 305 group->last_join = member; 306 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, 307 port->port_num, IB_MGMT_METHOD_SET, 308 &member->multicast.rec, 309 member->multicast.comp_mask, 310 3000, GFP_KERNEL, join_handler, group, 311 &group->query); 312 if (ret >= 0) { 313 group->query_id = ret; 314 ret = 0; 315 } 316 return ret; 317} 318 319static int send_leave(struct mcast_group *group, u8 leave_state) 320{ 321 struct mcast_port *port = group->port; 322 struct ib_sa_mcmember_rec rec; 323 int ret; 324 325 rec = group->rec; 326 rec.join_state = leave_state; 327 group->leave_state = leave_state; 328 329 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, 330 port->port_num, IB_SA_METHOD_DELETE, &rec, 331 IB_SA_MCMEMBER_REC_MGID | 332 IB_SA_MCMEMBER_REC_PORT_GID | 333 IB_SA_MCMEMBER_REC_JOIN_STATE, 334 3000, GFP_KERNEL, leave_handler, 335 group, &group->query); 336 if (ret >= 0) { 337 group->query_id = ret; 338 ret = 0; 339 } 340 return ret; 341} 342 343static void join_group(struct mcast_group *group, struct mcast_member *member, 344 u8 join_state) 345{ 346 member->state = MCAST_MEMBER; 347 adjust_membership(group, join_state, 1); 348 group->rec.join_state |= join_state; 349 member->multicast.rec = group->rec; 350 member->multicast.rec.join_state = join_state; 351 list_move(&member->list, &group->active_list); 352} 353 354static int fail_join(struct mcast_group *group, struct mcast_member *member, 355 int status) 356{ 357 spin_lock_irq(&group->lock); 358 list_del_init(&member->list); 359 spin_unlock_irq(&group->lock); 360 return member->multicast.callback(status, &member->multicast); 361} 362 363static void process_group_error(struct mcast_group *group) 364{ 365 struct mcast_member *member; 366 int ret = 0; 367 u16 pkey_index; 368 369 if (group->state == MCAST_PKEY_EVENT) 370 ret = ib_find_pkey(group->port->dev->device, 371 group->port->port_num, 372 be16_to_cpu(group->rec.pkey), &pkey_index); 373 374 spin_lock_irq(&group->lock); 375 if (group->state == MCAST_PKEY_EVENT && !ret && 376 group->pkey_index == pkey_index) 377 goto out; 378 379 while (!list_empty(&group->active_list)) { 380 member = list_entry(group->active_list.next, 381 struct mcast_member, list); 382 atomic_inc(&member->refcount); 383 list_del_init(&member->list); 384 adjust_membership(group, member->multicast.rec.join_state, -1); 385 member->state = MCAST_ERROR; 386 spin_unlock_irq(&group->lock); 387 388 ret = member->multicast.callback(-ENETRESET, 389 &member->multicast); 390 deref_member(member); 391 if (ret) 392 ib_sa_free_multicast(&member->multicast); 393 spin_lock_irq(&group->lock); 394 } 395 396 group->rec.join_state = 0; 397out: 398 group->state = MCAST_BUSY; 399 spin_unlock_irq(&group->lock); 400} 401 402static void mcast_work_handler(struct work_struct *work) 403{ 404 struct mcast_group *group; 405 struct mcast_member *member; 406 struct ib_sa_multicast *multicast; 407 int status, ret; 408 u8 join_state; 409 410 group = container_of(work, typeof(*group), work); 411retest: 412 spin_lock_irq(&group->lock); 413 while (!list_empty(&group->pending_list) || 414 (group->state != MCAST_BUSY)) { 415 416 if (group->state != MCAST_BUSY) { 417 spin_unlock_irq(&group->lock); 418 process_group_error(group); 419 goto retest; 420 } 421 422 member = list_entry(group->pending_list.next, 423 struct mcast_member, list); 424 multicast = &member->multicast; 425 join_state = multicast->rec.join_state; 426 atomic_inc(&member->refcount); 427 428 if (join_state == (group->rec.join_state & join_state)) { 429 status = cmp_rec(&group->rec, &multicast->rec, 430 multicast->comp_mask); 431 if (!status) 432 join_group(group, member, join_state); 433 else 434 list_del_init(&member->list); 435 spin_unlock_irq(&group->lock); 436 ret = multicast->callback(status, multicast); 437 } else { 438 spin_unlock_irq(&group->lock); 439 status = send_join(group, member); 440 if (!status) { 441 deref_member(member); 442 return; 443 } 444 ret = fail_join(group, member, status); 445 } 446 447 deref_member(member); 448 if (ret) 449 ib_sa_free_multicast(&member->multicast); 450 spin_lock_irq(&group->lock); 451 } 452 453 join_state = get_leave_state(group); 454 if (join_state) { 455 group->rec.join_state &= ~join_state; 456 spin_unlock_irq(&group->lock); 457 if (send_leave(group, join_state)) 458 goto retest; 459 } else { 460 group->state = MCAST_IDLE; 461 spin_unlock_irq(&group->lock); 462 release_group(group); 463 } 464} 465 466/* 467 * Fail a join request if it is still active - at the head of the pending queue. 468 */ 469static void process_join_error(struct mcast_group *group, int status) 470{ 471 struct mcast_member *member; 472 int ret; 473 474 spin_lock_irq(&group->lock); 475 member = list_entry(group->pending_list.next, 476 struct mcast_member, list); 477 if (group->last_join == member) { 478 atomic_inc(&member->refcount); 479 list_del_init(&member->list); 480 spin_unlock_irq(&group->lock); 481 ret = member->multicast.callback(status, &member->multicast); 482 deref_member(member); 483 if (ret) 484 ib_sa_free_multicast(&member->multicast); 485 } else 486 spin_unlock_irq(&group->lock); 487} 488 489static void join_handler(int status, struct ib_sa_mcmember_rec *rec, 490 void *context) 491{ 492 struct mcast_group *group = context; 493 u16 pkey_index = MCAST_INVALID_PKEY_INDEX; 494 495 if (status) 496 process_join_error(group, status); 497 else { 498 ib_find_pkey(group->port->dev->device, group->port->port_num, 499 be16_to_cpu(rec->pkey), &pkey_index); 500 501 spin_lock_irq(&group->port->lock); 502 group->rec = *rec; 503 if (group->state == MCAST_BUSY && 504 group->pkey_index == MCAST_INVALID_PKEY_INDEX) 505 group->pkey_index = pkey_index; 506 if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { 507 rb_erase(&group->node, &group->port->table); 508 mcast_insert(group->port, group, 1); 509 } 510 spin_unlock_irq(&group->port->lock); 511 } 512 mcast_work_handler(&group->work); 513} 514 515static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, 516 void *context) 517{ 518 struct mcast_group *group = context; 519 520 if (status && (group->retries > 0) && 521 !send_leave(group, group->leave_state)) 522 group->retries--; 523 else 524 mcast_work_handler(&group->work); 525} 526 527static struct mcast_group *acquire_group(struct mcast_port *port, 528 union ib_gid *mgid, gfp_t gfp_mask) 529{ 530 struct mcast_group *group, *cur_group; 531 unsigned long flags; 532 int is_mgid0; 533 534 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); 535 if (!is_mgid0) { 536 spin_lock_irqsave(&port->lock, flags); 537 group = mcast_find(port, mgid); 538 if (group) 539 goto found; 540 spin_unlock_irqrestore(&port->lock, flags); 541 } 542 543 group = kzalloc(sizeof *group, gfp_mask); 544 if (!group) 545 return NULL; 546 547 group->retries = 3; 548 group->port = port; 549 group->rec.mgid = *mgid; 550 group->pkey_index = MCAST_INVALID_PKEY_INDEX; 551 INIT_LIST_HEAD(&group->pending_list); 552 INIT_LIST_HEAD(&group->active_list); 553 INIT_WORK(&group->work, mcast_work_handler); 554 spin_lock_init(&group->lock); 555 556 spin_lock_irqsave(&port->lock, flags); 557 cur_group = mcast_insert(port, group, is_mgid0); 558 if (cur_group) { 559 kfree(group); 560 group = cur_group; 561 } else 562 atomic_inc(&port->refcount); 563found: 564 atomic_inc(&group->refcount); 565 spin_unlock_irqrestore(&port->lock, flags); 566 return group; 567} 568 569/* 570 * We serialize all join requests to a single group to make our lives much 571 * easier. Otherwise, two users could try to join the same group 572 * simultaneously, with different configurations, one could leave while the 573 * join is in progress, etc., which makes locking around error recovery 574 * difficult. 575 */ 576struct ib_sa_multicast * 577ib_sa_join_multicast(struct ib_sa_client *client, 578 struct ib_device *device, u8 port_num, 579 struct ib_sa_mcmember_rec *rec, 580 ib_sa_comp_mask comp_mask, gfp_t gfp_mask, 581 int (*callback)(int status, 582 struct ib_sa_multicast *multicast), 583 void *context) 584{ 585 struct mcast_device *dev; 586 struct mcast_member *member; 587 struct ib_sa_multicast *multicast; 588 int ret; 589 590 dev = ib_get_client_data(device, &mcast_client); 591 if (!dev) 592 return ERR_PTR(-ENODEV); 593 594 member = kmalloc(sizeof *member, gfp_mask); 595 if (!member) 596 return ERR_PTR(-ENOMEM); 597 598 ib_sa_client_get(client); 599 member->client = client; 600 member->multicast.rec = *rec; 601 member->multicast.comp_mask = comp_mask; 602 member->multicast.callback = callback; 603 member->multicast.context = context; 604 init_completion(&member->comp); 605 atomic_set(&member->refcount, 1); 606 member->state = MCAST_JOINING; 607 608 member->group = acquire_group(&dev->port[port_num - dev->start_port], 609 &rec->mgid, gfp_mask); 610 if (!member->group) { 611 ret = -ENOMEM; 612 goto err; 613 } 614 615 /* 616 * The user will get the multicast structure in their callback. They 617 * could then free the multicast structure before we can return from 618 * this routine. So we save the pointer to return before queuing 619 * any callback. 620 */ 621 multicast = &member->multicast; 622 queue_join(member); 623 return multicast; 624 625err: 626 ib_sa_client_put(client); 627 kfree(member); 628 return ERR_PTR(ret); 629} 630EXPORT_SYMBOL(ib_sa_join_multicast); 631 632void ib_sa_free_multicast(struct ib_sa_multicast *multicast) 633{ 634 struct mcast_member *member; 635 struct mcast_group *group; 636 637 member = container_of(multicast, struct mcast_member, multicast); 638 group = member->group; 639 640 spin_lock_irq(&group->lock); 641 if (member->state == MCAST_MEMBER) 642 adjust_membership(group, multicast->rec.join_state, -1); 643 644 list_del_init(&member->list); 645 646 if (group->state == MCAST_IDLE) { 647 group->state = MCAST_BUSY; 648 spin_unlock_irq(&group->lock); 649 /* Continue to hold reference on group until callback */ 650 queue_work(mcast_wq, &group->work); 651 } else { 652 spin_unlock_irq(&group->lock); 653 release_group(group); 654 } 655 656 deref_member(member); 657 wait_for_completion(&member->comp); 658 ib_sa_client_put(member->client); 659 kfree(member); 660} 661EXPORT_SYMBOL(ib_sa_free_multicast); 662 663int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, 664 union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) 665{ 666 struct mcast_device *dev; 667 struct mcast_port *port; 668 struct mcast_group *group; 669 unsigned long flags; 670 int ret = 0; 671 672 dev = ib_get_client_data(device, &mcast_client); 673 if (!dev) 674 return -ENODEV; 675 676 port = &dev->port[port_num - dev->start_port]; 677 spin_lock_irqsave(&port->lock, flags); 678 group = mcast_find(port, mgid); 679 if (group) 680 *rec = group->rec; 681 else 682 ret = -EADDRNOTAVAIL; 683 spin_unlock_irqrestore(&port->lock, flags); 684 685 return ret; 686} 687EXPORT_SYMBOL(ib_sa_get_mcmember_rec); 688 689int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, 690 struct ib_sa_mcmember_rec *rec, 691 struct ib_ah_attr *ah_attr) 692{ 693 int ret; 694 u16 gid_index; 695 u8 p; 696 697 ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); 698 if (ret) 699 return ret; 700 701 memset(ah_attr, 0, sizeof *ah_attr); 702 ah_attr->dlid = be16_to_cpu(rec->mlid); 703 ah_attr->sl = rec->sl; 704 ah_attr->port_num = port_num; 705 ah_attr->static_rate = rec->rate; 706 707 ah_attr->ah_flags = IB_AH_GRH; 708 ah_attr->grh.dgid = rec->mgid; 709 710 ah_attr->grh.sgid_index = (u8) gid_index; 711 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); 712 ah_attr->grh.hop_limit = rec->hop_limit; 713 ah_attr->grh.traffic_class = rec->traffic_class; 714 715 return 0; 716} 717EXPORT_SYMBOL(ib_init_ah_from_mcmember); 718 719static void mcast_groups_event(struct mcast_port *port, 720 enum mcast_group_state state) 721{ 722 struct mcast_group *group; 723 struct rb_node *node; 724 unsigned long flags; 725 726 spin_lock_irqsave(&port->lock, flags); 727 for (node = rb_first(&port->table); node; node = rb_next(node)) { 728 group = rb_entry(node, struct mcast_group, node); 729 spin_lock(&group->lock); 730 if (group->state == MCAST_IDLE) { 731 atomic_inc(&group->refcount); 732 queue_work(mcast_wq, &group->work); 733 } 734 if (group->state != MCAST_GROUP_ERROR) 735 group->state = state; 736 spin_unlock(&group->lock); 737 } 738 spin_unlock_irqrestore(&port->lock, flags); 739} 740 741static void mcast_event_handler(struct ib_event_handler *handler, 742 struct ib_event *event) 743{ 744 struct mcast_device *dev; 745 int index; 746 747 dev = container_of(handler, struct mcast_device, event_handler); 748 if (rdma_port_get_link_layer(dev->device, event->element.port_num) != 749 IB_LINK_LAYER_INFINIBAND) 750 return; 751 752 index = event->element.port_num - dev->start_port; 753 754 switch (event->event) { 755 case IB_EVENT_PORT_ERR: 756 case IB_EVENT_LID_CHANGE: 757 case IB_EVENT_SM_CHANGE: 758 case IB_EVENT_CLIENT_REREGISTER: 759 mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); 760 break; 761 case IB_EVENT_PKEY_CHANGE: 762 mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); 763 break; 764 default: 765 break; 766 } 767} 768 769static void mcast_add_one(struct ib_device *device) 770{ 771 struct mcast_device *dev; 772 struct mcast_port *port; 773 int i; 774 int count = 0; 775 776 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) 777 return; 778 779 dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, 780 GFP_KERNEL); 781 if (!dev) 782 return; 783 784 if (device->node_type == RDMA_NODE_IB_SWITCH) 785 dev->start_port = dev->end_port = 0; 786 else { 787 dev->start_port = 1; 788 dev->end_port = device->phys_port_cnt; 789 } 790 791 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 792 if (rdma_port_get_link_layer(device, dev->start_port + i) != 793 IB_LINK_LAYER_INFINIBAND) 794 continue; 795 port = &dev->port[i]; 796 port->dev = dev; 797 port->port_num = dev->start_port + i; 798 spin_lock_init(&port->lock); 799 port->table = RB_ROOT; 800 init_completion(&port->comp); 801 atomic_set(&port->refcount, 1); 802 ++count; 803 } 804 805 if (!count) { 806 kfree(dev); 807 return; 808 } 809 810 dev->device = device; 811 ib_set_client_data(device, &mcast_client, dev); 812 813 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); 814 ib_register_event_handler(&dev->event_handler); 815} 816 817static void mcast_remove_one(struct ib_device *device) 818{ 819 struct mcast_device *dev; 820 struct mcast_port *port; 821 int i; 822 823 dev = ib_get_client_data(device, &mcast_client); 824 if (!dev) 825 return; 826 827 ib_unregister_event_handler(&dev->event_handler); 828 flush_workqueue(mcast_wq); 829 830 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 831 if (rdma_port_get_link_layer(device, dev->start_port + i) == 832 IB_LINK_LAYER_INFINIBAND) { 833 port = &dev->port[i]; 834 deref_port(port); 835 wait_for_completion(&port->comp); 836 } 837 } 838 839 kfree(dev); 840} 841 842int mcast_init(void) 843{ 844 int ret; 845 846 mcast_wq = create_singlethread_workqueue("ib_mcast"); 847 if (!mcast_wq) 848 return -ENOMEM; 849 850 ib_sa_register_client(&sa_client); 851 852 ret = ib_register_client(&mcast_client); 853 if (ret) 854 goto err; 855 return 0; 856 857err: 858 ib_sa_unregister_client(&sa_client); 859 destroy_workqueue(mcast_wq); 860 return ret; 861} 862 863void mcast_cleanup(void) 864{ 865 ib_unregister_client(&mcast_client); 866 ib_sa_unregister_client(&sa_client); 867 destroy_workqueue(mcast_wq); 868} 869