addr.c revision 324527
1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/mutex.h> 37#include <linux/inetdevice.h> 38#include <linux/slab.h> 39#include <linux/workqueue.h> 40#include <linux/module.h> 41#include <linux/notifier.h> 42#include <net/route.h> 43#include <net/netevent.h> 44#include <rdma/ib_addr.h> 45#include <netinet/if_ether.h> 46 47 48MODULE_AUTHOR("Sean Hefty"); 49MODULE_DESCRIPTION("IB Address Translation"); 50MODULE_LICENSE("Dual BSD/GPL"); 51 52struct addr_req { 53 struct list_head list; 54 struct sockaddr_storage src_addr; 55 struct sockaddr_storage dst_addr; 56 struct rdma_dev_addr *addr; 57 struct rdma_addr_client *client; 58 void *context; 59 void (*callback)(int status, struct sockaddr *src_addr, 60 struct rdma_dev_addr *addr, void *context); 61 unsigned long timeout; 62 int status; 63}; 64 65static void process_req(struct work_struct *work); 66 67static DEFINE_MUTEX(lock); 68static LIST_HEAD(req_list); 69static struct delayed_work work; 70static struct workqueue_struct *addr_wq; 71 72void rdma_addr_register_client(struct rdma_addr_client *client) 73{ 74 atomic_set(&client->refcount, 1); 75 init_completion(&client->comp); 76} 77EXPORT_SYMBOL(rdma_addr_register_client); 78 79static inline void put_client(struct rdma_addr_client *client) 80{ 81 if (atomic_dec_and_test(&client->refcount)) 82 complete(&client->comp); 83} 84 85void rdma_addr_unregister_client(struct rdma_addr_client *client) 86{ 87 put_client(client); 88 wait_for_completion(&client->comp); 89} 90EXPORT_SYMBOL(rdma_addr_unregister_client); 91 92#ifdef __linux__ 93int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 94 const unsigned char *dst_dev_addr) 95{ 96 dev_addr->dev_type = dev->type; 97 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 98 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 99 if (dst_dev_addr) 100 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 101 dev_addr->bound_dev_if = dev->ifindex; 102 return 0; 103} 104#else 105int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, 106 const unsigned char *dst_dev_addr) 107{ 108 if (dev->if_type == IFT_INFINIBAND) 109 dev_addr->dev_type = ARPHRD_INFINIBAND; 110 else if (dev->if_type == IFT_ETHER) 111 dev_addr->dev_type = ARPHRD_ETHER; 112 else 113 dev_addr->dev_type = 0; 114 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); 115 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), 116 dev->if_addrlen); 117 if (dst_dev_addr) 118 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); 119 dev_addr->bound_dev_if = dev->if_index; 120 return 0; 121} 122#endif 123EXPORT_SYMBOL(rdma_copy_addr); 124 125int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 126{ 127 struct net_device *dev; 128 int ret = -EADDRNOTAVAIL; 129 130 if (dev_addr->bound_dev_if) { 131 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 132 if (!dev) 133 return -ENODEV; 134 ret = rdma_copy_addr(dev_addr, dev, NULL); 135 dev_put(dev); 136 return ret; 137 } 138 139 switch (addr->sa_family) { 140#ifdef INET 141 case AF_INET: 142 dev = ip_dev_find(NULL, 143 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 144 145 if (!dev) 146 return ret; 147 148 ret = rdma_copy_addr(dev_addr, dev, NULL); 149 dev_put(dev); 150 break; 151#endif 152 153#if defined(INET6) 154 case AF_INET6: 155#ifdef __linux__ 156 read_lock(&dev_base_lock); 157 for_each_netdev(&init_net, dev) { 158 if (ipv6_chk_addr(&init_net, 159 &((struct sockaddr_in6 *) addr)->sin6_addr, 160 dev, 1)) { 161 ret = rdma_copy_addr(dev_addr, dev, NULL); 162 break; 163 } 164 } 165 read_unlock(&dev_base_lock); 166#else 167 { 168 struct sockaddr_in6 *sin6; 169 struct ifaddr *ifa; 170 in_port_t port; 171 172 sin6 = (struct sockaddr_in6 *)addr; 173 port = sin6->sin6_port; 174 sin6->sin6_port = 0; 175 CURVNET_SET_QUIET(&init_net); 176 ifa = ifa_ifwithaddr(addr); 177 CURVNET_RESTORE(); 178 sin6->sin6_port = port; 179 if (ifa == NULL) { 180 ret = -ENODEV; 181 break; 182 } 183 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); 184 ifa_free(ifa); 185 break; 186 } 187#endif 188 break; 189#endif 190 } 191 return ret; 192} 193EXPORT_SYMBOL(rdma_translate_ip); 194 195static void set_timeout(unsigned long time) 196{ 197 int delay; /* under FreeBSD ticks are 32-bit */ 198 199 delay = time - jiffies; 200 if (delay <= 0) 201 delay = 1; 202 203 mod_delayed_work(addr_wq, &work, delay); 204} 205 206static void queue_req(struct addr_req *req) 207{ 208 struct addr_req *temp_req; 209 210 mutex_lock(&lock); 211 list_for_each_entry_reverse(temp_req, &req_list, list) { 212 if (time_after_eq(req->timeout, temp_req->timeout)) 213 break; 214 } 215 216 list_add(&req->list, &temp_req->list); 217 218 if (req_list.next == &req->list) 219 set_timeout(req->timeout); 220 mutex_unlock(&lock); 221} 222 223#ifdef __linux__ 224static int addr4_resolve(struct sockaddr_in *src_in, 225 struct sockaddr_in *dst_in, 226 struct rdma_dev_addr *addr) 227{ 228 __be32 src_ip = src_in->sin_addr.s_addr; 229 __be32 dst_ip = dst_in->sin_addr.s_addr; 230 struct flowi fl; 231 struct rtable *rt; 232 struct neighbour *neigh; 233 int ret; 234 235 memset(&fl, 0, sizeof fl); 236 fl.nl_u.ip4_u.daddr = dst_ip; 237 fl.nl_u.ip4_u.saddr = src_ip; 238 fl.oif = addr->bound_dev_if; 239 240 ret = ip_route_output_key(&init_net, &rt, &fl); 241 if (ret) 242 goto out; 243 244 src_in->sin_family = AF_INET; 245 src_in->sin_addr.s_addr = rt->rt_src; 246 247 if (rt->idev->dev->flags & IFF_LOOPBACK) { 248 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 249 if (!ret) 250 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 251 goto put; 252 } 253 254 /* If the device does ARP internally, return 'done' */ 255 if (rt->idev->dev->flags & IFF_NOARP) { 256 rdma_copy_addr(addr, rt->idev->dev, NULL); 257 goto put; 258 } 259 260 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 261 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 262 neigh_event_send(rt->u.dst.neighbour, NULL); 263 ret = -ENODATA; 264 if (neigh) 265 goto release; 266 goto put; 267 } 268 269 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 270release: 271 neigh_release(neigh); 272put: 273 ip_rt_put(rt); 274out: 275 return ret; 276} 277 278#if defined(INET6) 279static int addr6_resolve(struct sockaddr_in6 *src_in, 280 struct sockaddr_in6 *dst_in, 281 struct rdma_dev_addr *addr) 282{ 283 struct flowi fl; 284 struct neighbour *neigh; 285 struct dst_entry *dst; 286 int ret; 287 288 memset(&fl, 0, sizeof fl); 289 ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); 290 ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); 291 fl.oif = addr->bound_dev_if; 292 293 dst = ip6_route_output(&init_net, NULL, &fl); 294 if ((ret = dst->error)) 295 goto put; 296 297 if (ipv6_addr_any(&fl.fl6_src)) { 298 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, 299 &fl.fl6_dst, 0, &fl.fl6_src); 300 if (ret) 301 goto put; 302 303 src_in->sin6_family = AF_INET6; 304 ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); 305 } 306 307 if (dst->dev->flags & IFF_LOOPBACK) { 308 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 309 if (!ret) 310 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 311 goto put; 312 } 313 314 /* If the device does ARP internally, return 'done' */ 315 if (dst->dev->flags & IFF_NOARP) { 316 ret = rdma_copy_addr(addr, dst->dev, NULL); 317 goto put; 318 } 319 320 neigh = dst->neighbour; 321 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 322 neigh_event_send(dst->neighbour, NULL); 323 ret = -ENODATA; 324 goto put; 325 } 326 327 ret = rdma_copy_addr(addr, dst->dev, neigh->ha); 328put: 329 dst_release(dst); 330 return ret; 331} 332#else 333static int addr6_resolve(struct sockaddr_in6 *src_in, 334 struct sockaddr_in6 *dst_in, 335 struct rdma_dev_addr *addr) 336{ 337 return -EADDRNOTAVAIL; 338} 339#endif 340 341#else 342#include <netinet/if_ether.h> 343 344static int addr_resolve_sub(struct sockaddr *src_in, 345 struct sockaddr *dst_in, 346 struct rdma_dev_addr *addr) 347{ 348 struct sockaddr_in *sin; 349 struct sockaddr_in6 *sin6; 350 struct ifaddr *ifa; 351 struct ifnet *ifp; 352#if defined(INET) || defined(INET6) 353 struct llentry *lle; 354#endif 355 struct rtentry *rte; 356 in_port_t port; 357 u_char edst[MAX_ADDR_LEN]; 358 int multi; 359 int bcast; 360 int error = 0; 361 362 /* 363 * Determine whether the address is unicast, multicast, or broadcast 364 * and whether the source interface is valid. 365 */ 366 multi = 0; 367 bcast = 0; 368 sin = NULL; 369 sin6 = NULL; 370 ifp = NULL; 371 rte = NULL; 372 switch (dst_in->sa_family) { 373#ifdef INET 374 case AF_INET: 375 sin = (struct sockaddr_in *)dst_in; 376 if (sin->sin_addr.s_addr == INADDR_BROADCAST) 377 bcast = 1; 378 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 379 multi = 1; 380 sin = (struct sockaddr_in *)src_in; 381 if (sin->sin_addr.s_addr != INADDR_ANY) { 382 /* 383 * Address comparison fails if the port is set 384 * cache it here to be restored later. 385 */ 386 port = sin->sin_port; 387 sin->sin_port = 0; 388 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 389 } else 390 src_in = NULL; 391 break; 392#endif 393#ifdef INET6 394 case AF_INET6: 395 sin6 = (struct sockaddr_in6 *)dst_in; 396 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 397 multi = 1; 398 sin6 = (struct sockaddr_in6 *)src_in; 399 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 400 port = sin6->sin6_port; 401 sin6->sin6_port = 0; 402 } else 403 src_in = NULL; 404 break; 405#endif 406 default: 407 return -EINVAL; 408 } 409 /* 410 * If we have a source address to use look it up first and verify 411 * that it is a local interface. 412 */ 413 if (src_in) { 414 ifa = ifa_ifwithaddr(src_in); 415 if (sin) 416 sin->sin_port = port; 417 if (sin6) 418 sin6->sin6_port = port; 419 if (ifa == NULL) 420 return -ENETUNREACH; 421 ifp = ifa->ifa_ifp; 422 ifa_free(ifa); 423 if (bcast || multi) 424 goto mcast; 425 } 426 /* 427 * Make sure the route exists and has a valid link. 428 */ 429 rte = rtalloc1(dst_in, 1, 0); 430 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { 431 if (rte) 432 RTFREE_LOCKED(rte); 433 return -EHOSTUNREACH; 434 } 435 /* 436 * If it's not multicast or broadcast and the route doesn't match the 437 * requested interface return unreachable. Otherwise fetch the 438 * correct interface pointer and unlock the route. 439 */ 440 if (multi || bcast) { 441 if (ifp == NULL) 442 ifp = rte->rt_ifp; 443 RTFREE_LOCKED(rte); 444 } else if (ifp && ifp != rte->rt_ifp) { 445 RTFREE_LOCKED(rte); 446 return -ENETUNREACH; 447 } else { 448 if (ifp == NULL) 449 ifp = rte->rt_ifp; 450 RT_UNLOCK(rte); 451 } 452mcast: 453 if (bcast) 454 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); 455 if (multi) { 456 struct sockaddr *llsa; 457 458 error = ifp->if_resolvemulti(ifp, &llsa, dst_in); 459 if (error) 460 return -error; 461 error = rdma_copy_addr(addr, ifp, 462 LLADDR((struct sockaddr_dl *)llsa)); 463 free(llsa, M_IFMADDR); 464 return error; 465 } 466 /* 467 * Resolve the link local address. 468 */ 469 switch (dst_in->sa_family) { 470#ifdef INET 471 case AF_INET: 472 error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle); 473 break; 474#endif 475#ifdef INET6 476 case AF_INET6: 477 error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle); 478 break; 479#endif 480 default: 481 /* XXX: Shouldn't happen. */ 482 error = -EINVAL; 483 } 484 RTFREE(rte); 485 if (error == 0) 486 return rdma_copy_addr(addr, ifp, edst); 487 if (error == EWOULDBLOCK) 488 return -ENODATA; 489 return -error; 490} 491 492static int addr_resolve(struct sockaddr *src_in, 493 struct sockaddr *dst_in, 494 struct rdma_dev_addr *addr) 495{ 496 int error; 497 498 CURVNET_SET_QUIET(&init_net); 499 error = addr_resolve_sub(src_in, dst_in, addr); 500 CURVNET_RESTORE(); 501 502 return (error); 503} 504#endif 505 506static void process_req(struct work_struct *work) 507{ 508 struct addr_req *req, *temp_req; 509 struct sockaddr *src_in, *dst_in; 510 struct list_head done_list; 511 512 INIT_LIST_HEAD(&done_list); 513 514 mutex_lock(&lock); 515 list_for_each_entry_safe(req, temp_req, &req_list, list) { 516 if (req->status == -ENODATA) { 517 src_in = (struct sockaddr *) &req->src_addr; 518 dst_in = (struct sockaddr *) &req->dst_addr; 519 req->status = addr_resolve(src_in, dst_in, req->addr); 520 if (req->status && time_after_eq(jiffies, req->timeout)) 521 req->status = -ETIMEDOUT; 522 else if (req->status == -ENODATA) 523 continue; 524 } 525 list_move_tail(&req->list, &done_list); 526 } 527 528 if (!list_empty(&req_list)) { 529 req = list_entry(req_list.next, struct addr_req, list); 530 set_timeout(req->timeout); 531 } 532 mutex_unlock(&lock); 533 534 list_for_each_entry_safe(req, temp_req, &done_list, list) { 535 list_del(&req->list); 536 req->callback(req->status, (struct sockaddr *) &req->src_addr, 537 req->addr, req->context); 538 put_client(req->client); 539 kfree(req); 540 } 541} 542 543int rdma_resolve_ip(struct rdma_addr_client *client, 544 struct sockaddr *src_addr, struct sockaddr *dst_addr, 545 struct rdma_dev_addr *addr, int timeout_ms, 546 void (*callback)(int status, struct sockaddr *src_addr, 547 struct rdma_dev_addr *addr, void *context), 548 void *context) 549{ 550 struct sockaddr *src_in, *dst_in; 551 struct addr_req *req; 552 int ret = 0; 553 554 req = kzalloc(sizeof *req, GFP_KERNEL); 555 if (!req) 556 return -ENOMEM; 557 558 src_in = (struct sockaddr *) &req->src_addr; 559 dst_in = (struct sockaddr *) &req->dst_addr; 560 561 if (src_addr) { 562 if (src_addr->sa_family != dst_addr->sa_family) { 563 ret = -EINVAL; 564 goto err; 565 } 566 567 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 568 } else { 569 src_in->sa_family = dst_addr->sa_family; 570 } 571 572 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 573 req->addr = addr; 574 req->callback = callback; 575 req->context = context; 576 req->client = client; 577 atomic_inc(&client->refcount); 578 579 req->status = addr_resolve(src_in, dst_in, addr); 580 switch (req->status) { 581 case 0: 582 req->timeout = jiffies; 583 queue_req(req); 584 break; 585 case -ENODATA: 586 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 587 queue_req(req); 588 break; 589 default: 590 ret = req->status; 591 atomic_dec(&client->refcount); 592 goto err; 593 } 594 return ret; 595err: 596 kfree(req); 597 return ret; 598} 599EXPORT_SYMBOL(rdma_resolve_ip); 600 601void rdma_addr_cancel(struct rdma_dev_addr *addr) 602{ 603 struct addr_req *req, *temp_req; 604 605 mutex_lock(&lock); 606 list_for_each_entry_safe(req, temp_req, &req_list, list) { 607 if (req->addr == addr) { 608 req->status = -ECANCELED; 609 req->timeout = jiffies; 610 list_move(&req->list, &req_list); 611 set_timeout(req->timeout); 612 break; 613 } 614 } 615 mutex_unlock(&lock); 616} 617EXPORT_SYMBOL(rdma_addr_cancel); 618 619static int netevent_callback(struct notifier_block *self, unsigned long event, 620 void *ctx) 621{ 622 if (event == NETEVENT_NEIGH_UPDATE) { 623#ifdef __linux__ 624 struct neighbour *neigh = ctx; 625 626 if (neigh->nud_state & NUD_VALID) { 627 set_timeout(jiffies); 628 } 629#else 630 set_timeout(jiffies); 631#endif 632 } 633 return 0; 634} 635 636static struct notifier_block nb = { 637 .notifier_call = netevent_callback 638}; 639 640static int __init addr_init(void) 641{ 642 INIT_DELAYED_WORK(&work, process_req); 643 addr_wq = create_singlethread_workqueue("ib_addr"); 644 if (!addr_wq) 645 return -ENOMEM; 646 647 register_netevent_notifier(&nb); 648 return 0; 649} 650 651static void __exit addr_cleanup(void) 652{ 653 unregister_netevent_notifier(&nb); 654 destroy_workqueue(addr_wq); 655} 656 657module_init(addr_init); 658module_exit(addr_cleanup); 659