addr.c revision 271127
1/* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/mutex.h> 37#include <linux/inetdevice.h> 38#include <linux/workqueue.h> 39#include <net/route.h> 40#include <net/netevent.h> 41#include <rdma/ib_addr.h> 42 43MODULE_AUTHOR("Sean Hefty"); 44MODULE_DESCRIPTION("IB Address Translation"); 45MODULE_LICENSE("Dual BSD/GPL"); 46 47struct addr_req { 48 struct list_head list; 49 struct sockaddr_storage src_addr; 50 struct sockaddr_storage dst_addr; 51 struct rdma_dev_addr *addr; 52 struct rdma_addr_client *client; 53 void *context; 54 void (*callback)(int status, struct sockaddr *src_addr, 55 struct rdma_dev_addr *addr, void *context); 56 unsigned long timeout; 57 int status; 58}; 59 60static void process_req(struct work_struct *work); 61 62static DEFINE_MUTEX(lock); 63static LIST_HEAD(req_list); 64static struct delayed_work work; 65static struct workqueue_struct *addr_wq; 66 67void rdma_addr_register_client(struct rdma_addr_client *client) 68{ 69 atomic_set(&client->refcount, 1); 70 init_completion(&client->comp); 71} 72EXPORT_SYMBOL(rdma_addr_register_client); 73 74static inline void put_client(struct rdma_addr_client *client) 75{ 76 if (atomic_dec_and_test(&client->refcount)) 77 complete(&client->comp); 78} 79 80void rdma_addr_unregister_client(struct rdma_addr_client *client) 81{ 82 put_client(client); 83 wait_for_completion(&client->comp); 84} 85EXPORT_SYMBOL(rdma_addr_unregister_client); 86 87#ifdef __linux__ 88int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 89 const unsigned char *dst_dev_addr) 90{ 91 dev_addr->dev_type = dev->type; 92 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 93 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 94 if (dst_dev_addr) 95 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 96 dev_addr->bound_dev_if = dev->ifindex; 97 return 0; 98} 99#else 100int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev, 101 const unsigned char *dst_dev_addr) 102{ 103 if (dev->if_type == IFT_INFINIBAND) 104 dev_addr->dev_type = ARPHRD_INFINIBAND; 105 else if (dev->if_type == IFT_ETHER) 106 dev_addr->dev_type = ARPHRD_ETHER; 107 else 108 dev_addr->dev_type = 0; 109 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen); 110 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr), 111 dev->if_addrlen); 112 if (dst_dev_addr) 113 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen); 114 dev_addr->bound_dev_if = dev->if_index; 115 return 0; 116} 117#endif 118EXPORT_SYMBOL(rdma_copy_addr); 119 120int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 121{ 122 struct net_device *dev; 123 int ret = -EADDRNOTAVAIL; 124 125 if (dev_addr->bound_dev_if) { 126 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 127 if (!dev) 128 return -ENODEV; 129 ret = rdma_copy_addr(dev_addr, dev, NULL); 130 dev_put(dev); 131 return ret; 132 } 133 134 switch (addr->sa_family) { 135#ifdef INET 136 case AF_INET: 137 dev = ip_dev_find(NULL, 138 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 139 140 if (!dev) 141 return ret; 142 143 ret = rdma_copy_addr(dev_addr, dev, NULL); 144 dev_put(dev); 145 break; 146#endif 147 148#if defined(INET6) 149 case AF_INET6: 150#ifdef __linux__ 151 read_lock(&dev_base_lock); 152 for_each_netdev(&init_net, dev) { 153 if (ipv6_chk_addr(&init_net, 154 &((struct sockaddr_in6 *) addr)->sin6_addr, 155 dev, 1)) { 156 ret = rdma_copy_addr(dev_addr, dev, NULL); 157 break; 158 } 159 } 160 read_unlock(&dev_base_lock); 161#else 162 { 163 struct sockaddr_in6 *sin6; 164 struct ifaddr *ifa; 165 in_port_t port; 166 167 sin6 = (struct sockaddr_in6 *)addr; 168 port = sin6->sin6_port; 169 sin6->sin6_port = 0; 170 ifa = ifa_ifwithaddr(addr); 171 sin6->sin6_port = port; 172 if (ifa == NULL) { 173 ret = -ENODEV; 174 break; 175 } 176 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL); 177 ifa_free(ifa); 178 break; 179 } 180#endif 181 break; 182#endif 183 } 184 return ret; 185} 186EXPORT_SYMBOL(rdma_translate_ip); 187 188static void set_timeout(unsigned long time) 189{ 190 unsigned long delay; 191 192 cancel_delayed_work(&work); 193 194 delay = time - jiffies; 195 if ((long)delay <= 0) 196 delay = 1; 197 198 queue_delayed_work(addr_wq, &work, delay); 199} 200 201static void queue_req(struct addr_req *req) 202{ 203 struct addr_req *temp_req; 204 205 mutex_lock(&lock); 206 list_for_each_entry_reverse(temp_req, &req_list, list) { 207 if (time_after_eq(req->timeout, temp_req->timeout)) 208 break; 209 } 210 211 list_add(&req->list, &temp_req->list); 212 213 if (req_list.next == &req->list) 214 set_timeout(req->timeout); 215 mutex_unlock(&lock); 216} 217 218#ifdef __linux__ 219static int addr4_resolve(struct sockaddr_in *src_in, 220 struct sockaddr_in *dst_in, 221 struct rdma_dev_addr *addr) 222{ 223 __be32 src_ip = src_in->sin_addr.s_addr; 224 __be32 dst_ip = dst_in->sin_addr.s_addr; 225 struct flowi fl; 226 struct rtable *rt; 227 struct neighbour *neigh; 228 int ret; 229 230 memset(&fl, 0, sizeof fl); 231 fl.nl_u.ip4_u.daddr = dst_ip; 232 fl.nl_u.ip4_u.saddr = src_ip; 233 fl.oif = addr->bound_dev_if; 234 235 ret = ip_route_output_key(&init_net, &rt, &fl); 236 if (ret) 237 goto out; 238 239 src_in->sin_family = AF_INET; 240 src_in->sin_addr.s_addr = rt->rt_src; 241 242 if (rt->idev->dev->flags & IFF_LOOPBACK) { 243 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 244 if (!ret) 245 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 246 goto put; 247 } 248 249 /* If the device does ARP internally, return 'done' */ 250 if (rt->idev->dev->flags & IFF_NOARP) { 251 rdma_copy_addr(addr, rt->idev->dev, NULL); 252 goto put; 253 } 254 255 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 256 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 257 neigh_event_send(rt->u.dst.neighbour, NULL); 258 ret = -ENODATA; 259 if (neigh) 260 goto release; 261 goto put; 262 } 263 264 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 265release: 266 neigh_release(neigh); 267put: 268 ip_rt_put(rt); 269out: 270 return ret; 271} 272 273#if defined(INET6) 274static int addr6_resolve(struct sockaddr_in6 *src_in, 275 struct sockaddr_in6 *dst_in, 276 struct rdma_dev_addr *addr) 277{ 278 struct flowi fl; 279 struct neighbour *neigh; 280 struct dst_entry *dst; 281 int ret; 282 283 memset(&fl, 0, sizeof fl); 284 ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); 285 ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); 286 fl.oif = addr->bound_dev_if; 287 288 dst = ip6_route_output(&init_net, NULL, &fl); 289 if ((ret = dst->error)) 290 goto put; 291 292 if (ipv6_addr_any(&fl.fl6_src)) { 293 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, 294 &fl.fl6_dst, 0, &fl.fl6_src); 295 if (ret) 296 goto put; 297 298 src_in->sin6_family = AF_INET6; 299 ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); 300 } 301 302 if (dst->dev->flags & IFF_LOOPBACK) { 303 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); 304 if (!ret) 305 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); 306 goto put; 307 } 308 309 /* If the device does ARP internally, return 'done' */ 310 if (dst->dev->flags & IFF_NOARP) { 311 ret = rdma_copy_addr(addr, dst->dev, NULL); 312 goto put; 313 } 314 315 neigh = dst->neighbour; 316 if (!neigh || !(neigh->nud_state & NUD_VALID)) { 317 neigh_event_send(dst->neighbour, NULL); 318 ret = -ENODATA; 319 goto put; 320 } 321 322 ret = rdma_copy_addr(addr, dst->dev, neigh->ha); 323put: 324 dst_release(dst); 325 return ret; 326} 327#else 328static int addr6_resolve(struct sockaddr_in6 *src_in, 329 struct sockaddr_in6 *dst_in, 330 struct rdma_dev_addr *addr) 331{ 332 return -EADDRNOTAVAIL; 333} 334#endif 335 336#else 337#include <netinet/if_ether.h> 338 339static int addr_resolve(struct sockaddr *src_in, 340 struct sockaddr *dst_in, 341 struct rdma_dev_addr *addr) 342{ 343 struct sockaddr_in *sin; 344 struct sockaddr_in6 *sin6; 345 struct ifaddr *ifa; 346 struct ifnet *ifp; 347#if defined(INET) || defined(INET6) 348 struct llentry *lle; 349#endif 350 struct rtentry *rte; 351 in_port_t port; 352 u_char edst[MAX_ADDR_LEN]; 353 int multi; 354 int bcast; 355 int error = 0; 356 357 /* 358 * Determine whether the address is unicast, multicast, or broadcast 359 * and whether the source interface is valid. 360 */ 361 multi = 0; 362 bcast = 0; 363 sin = NULL; 364 sin6 = NULL; 365 ifp = NULL; 366 rte = NULL; 367 switch (dst_in->sa_family) { 368#ifdef INET 369 case AF_INET: 370 sin = (struct sockaddr_in *)dst_in; 371 if (sin->sin_addr.s_addr == INADDR_BROADCAST) 372 bcast = 1; 373 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) 374 multi = 1; 375 sin = (struct sockaddr_in *)src_in; 376 if (sin->sin_addr.s_addr != INADDR_ANY) { 377 /* 378 * Address comparison fails if the port is set 379 * cache it here to be restored later. 380 */ 381 port = sin->sin_port; 382 sin->sin_port = 0; 383 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); 384 } else 385 src_in = NULL; 386 break; 387#endif 388#ifdef INET6 389 case AF_INET6: 390 sin6 = (struct sockaddr_in6 *)dst_in; 391 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 392 multi = 1; 393 sin6 = (struct sockaddr_in6 *)src_in; 394 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 395 port = sin6->sin6_port; 396 sin6->sin6_port = 0; 397 } else 398 src_in = NULL; 399 break; 400#endif 401 default: 402 return -EINVAL; 403 } 404 /* 405 * If we have a source address to use look it up first and verify 406 * that it is a local interface. 407 */ 408 if (src_in) { 409 ifa = ifa_ifwithaddr(src_in); 410 if (sin) 411 sin->sin_port = port; 412 if (sin6) 413 sin6->sin6_port = port; 414 if (ifa == NULL) 415 return -ENETUNREACH; 416 ifp = ifa->ifa_ifp; 417 ifa_free(ifa); 418 if (bcast || multi) 419 goto mcast; 420 } 421 /* 422 * Make sure the route exists and has a valid link. 423 */ 424 rte = rtalloc1(dst_in, 1, 0); 425 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { 426 if (rte) 427 RTFREE_LOCKED(rte); 428 return -EHOSTUNREACH; 429 } 430 /* 431 * If it's not multicast or broadcast and the route doesn't match the 432 * requested interface return unreachable. Otherwise fetch the 433 * correct interface pointer and unlock the route. 434 */ 435 if (multi || bcast) { 436 if (ifp == NULL) 437 ifp = rte->rt_ifp; 438 RTFREE_LOCKED(rte); 439 } else if (ifp && ifp != rte->rt_ifp) { 440 RTFREE_LOCKED(rte); 441 return -ENETUNREACH; 442 } else { 443 if (ifp == NULL) 444 ifp = rte->rt_ifp; 445 RT_UNLOCK(rte); 446 } 447mcast: 448 if (bcast) 449 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr); 450 if (multi) { 451 struct sockaddr *llsa; 452 453 error = ifp->if_resolvemulti(ifp, &llsa, dst_in); 454 if (error) 455 return -error; 456 error = rdma_copy_addr(addr, ifp, 457 LLADDR((struct sockaddr_dl *)llsa)); 458 free(llsa, M_IFMADDR); 459 return error; 460 } 461 /* 462 * Resolve the link local address. 463 */ 464 switch (dst_in->sa_family) { 465#ifdef INET 466 case AF_INET: 467 error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle); 468 break; 469#endif 470#ifdef INET6 471 case AF_INET6: 472 error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle); 473 break; 474#endif 475 default: 476 /* XXX: Shouldn't happen. */ 477 error = -EINVAL; 478 } 479 RTFREE(rte); 480 if (error == 0) 481 return rdma_copy_addr(addr, ifp, edst); 482 if (error == EWOULDBLOCK) 483 return -ENODATA; 484 return -error; 485} 486 487#endif 488 489static void process_req(struct work_struct *work) 490{ 491 struct addr_req *req, *temp_req; 492 struct sockaddr *src_in, *dst_in; 493 struct list_head done_list; 494 495 INIT_LIST_HEAD(&done_list); 496 497 mutex_lock(&lock); 498 list_for_each_entry_safe(req, temp_req, &req_list, list) { 499 if (req->status == -ENODATA) { 500 src_in = (struct sockaddr *) &req->src_addr; 501 dst_in = (struct sockaddr *) &req->dst_addr; 502 req->status = addr_resolve(src_in, dst_in, req->addr); 503 if (req->status && time_after_eq(jiffies, req->timeout)) 504 req->status = -ETIMEDOUT; 505 else if (req->status == -ENODATA) 506 continue; 507 } 508 list_move_tail(&req->list, &done_list); 509 } 510 511 if (!list_empty(&req_list)) { 512 req = list_entry(req_list.next, struct addr_req, list); 513 set_timeout(req->timeout); 514 } 515 mutex_unlock(&lock); 516 517 list_for_each_entry_safe(req, temp_req, &done_list, list) { 518 list_del(&req->list); 519 req->callback(req->status, (struct sockaddr *) &req->src_addr, 520 req->addr, req->context); 521 put_client(req->client); 522 kfree(req); 523 } 524} 525 526int rdma_resolve_ip(struct rdma_addr_client *client, 527 struct sockaddr *src_addr, struct sockaddr *dst_addr, 528 struct rdma_dev_addr *addr, int timeout_ms, 529 void (*callback)(int status, struct sockaddr *src_addr, 530 struct rdma_dev_addr *addr, void *context), 531 void *context) 532{ 533 struct sockaddr *src_in, *dst_in; 534 struct addr_req *req; 535 int ret = 0; 536 537 req = kzalloc(sizeof *req, GFP_KERNEL); 538 if (!req) 539 return -ENOMEM; 540 541 src_in = (struct sockaddr *) &req->src_addr; 542 dst_in = (struct sockaddr *) &req->dst_addr; 543 544 if (src_addr) { 545 if (src_addr->sa_family != dst_addr->sa_family) { 546 ret = -EINVAL; 547 goto err; 548 } 549 550 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 551 } else { 552 src_in->sa_family = dst_addr->sa_family; 553 } 554 555 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 556 req->addr = addr; 557 req->callback = callback; 558 req->context = context; 559 req->client = client; 560 atomic_inc(&client->refcount); 561 562 req->status = addr_resolve(src_in, dst_in, addr); 563 switch (req->status) { 564 case 0: 565 req->timeout = jiffies; 566 queue_req(req); 567 break; 568 case -ENODATA: 569 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 570 queue_req(req); 571 break; 572 default: 573 ret = req->status; 574 atomic_dec(&client->refcount); 575 goto err; 576 } 577 return ret; 578err: 579 kfree(req); 580 return ret; 581} 582EXPORT_SYMBOL(rdma_resolve_ip); 583 584void rdma_addr_cancel(struct rdma_dev_addr *addr) 585{ 586 struct addr_req *req, *temp_req; 587 588 mutex_lock(&lock); 589 list_for_each_entry_safe(req, temp_req, &req_list, list) { 590 if (req->addr == addr) { 591 req->status = -ECANCELED; 592 req->timeout = jiffies; 593 list_move(&req->list, &req_list); 594 set_timeout(req->timeout); 595 break; 596 } 597 } 598 mutex_unlock(&lock); 599} 600EXPORT_SYMBOL(rdma_addr_cancel); 601 602static int netevent_callback(struct notifier_block *self, unsigned long event, 603 void *ctx) 604{ 605 if (event == NETEVENT_NEIGH_UPDATE) { 606#ifdef __linux__ 607 struct neighbour *neigh = ctx; 608 609 if (neigh->nud_state & NUD_VALID) { 610 set_timeout(jiffies); 611 } 612#else 613 set_timeout(jiffies); 614#endif 615 } 616 return 0; 617} 618 619static struct notifier_block nb = { 620 .notifier_call = netevent_callback 621}; 622 623static int addr_init(void) 624{ 625 INIT_DELAYED_WORK(&work, process_req); 626 addr_wq = create_singlethread_workqueue("ib_addr"); 627 if (!addr_wq) 628 return -ENOMEM; 629 630 register_netevent_notifier(&nb); 631 return 0; 632} 633 634static void addr_cleanup(void) 635{ 636 unregister_netevent_notifier(&nb); 637 destroy_workqueue(addr_wq); 638} 639 640module_init(addr_init); 641module_exit(addr_cleanup); 642