route.c revision 267193
1/*- 2 * Copyright (c) 1980, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 30 * $FreeBSD: stable/10/sys/net/route.c 267193 2014-06-06 21:45:14Z asomers $ 31 */ 32/************************************************************************ 33 * Note: In this file a 'fib' is a "forwarding information base" * 34 * Which is the new name for an in kernel routing (next hop) table. * 35 ***********************************************************************/ 36 37#include "opt_inet.h" 38#include "opt_inet6.h" 39#include "opt_route.h" 40#include "opt_sctp.h" 41#include "opt_mrouting.h" 42#include "opt_mpath.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/syslog.h> 47#include <sys/malloc.h> 48#include <sys/mbuf.h> 49#include <sys/socket.h> 50#include <sys/sysctl.h> 51#include <sys/syslog.h> 52#include <sys/sysproto.h> 53#include <sys/proc.h> 54#include <sys/domain.h> 55#include <sys/kernel.h> 56 57#include <net/if.h> 58#include <net/if_dl.h> 59#include <net/route.h> 60#include <net/vnet.h> 61#include <net/flowtable.h> 62 63#ifdef RADIX_MPATH 64#include <net/radix_mpath.h> 65#endif 66 67#include <netinet/in.h> 68#include <netinet/ip_mroute.h> 69 70#include <vm/uma.h> 71 72#define RT_MAXFIBS UINT16_MAX 73 74/* Kernel config default option. */ 75#ifdef ROUTETABLES 76#if ROUTETABLES <= 0 77#error "ROUTETABLES defined too low" 78#endif 79#if ROUTETABLES > RT_MAXFIBS 80#error "ROUTETABLES defined too big" 81#endif 82#define RT_NUMFIBS ROUTETABLES 83#endif /* ROUTETABLES */ 84/* Initialize to default if not otherwise set. */ 85#ifndef RT_NUMFIBS 86#define RT_NUMFIBS 1 87#endif 88 89#if defined(INET) || defined(INET6) 90#ifdef SCTP 91extern void sctp_addr_change(struct ifaddr *ifa, int cmd); 92#endif /* SCTP */ 93#endif 94 95 96/* This is read-only.. */ 97u_int rt_numfibs = RT_NUMFIBS; 98SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); 99/* and this can be set too big but will be fixed before it is used */ 100TUNABLE_INT("net.fibs", &rt_numfibs); 101 102/* 103 * By default add routes to all fibs for new interfaces. 104 * Once this is set to 0 then only allocate routes on interface 105 * changes for the FIB of the caller when adding a new set of addresses 106 * to an interface. XXX this is a shotgun aproach to a problem that needs 107 * a more fine grained solution.. that will come. 108 * XXX also has the problems getting the FIB from curthread which will not 109 * always work given the fib can be overridden and prefixes can be added 110 * from the network stack context. 111 */ 112u_int rt_add_addr_allfibs = 1; 113SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, 114 &rt_add_addr_allfibs, 0, ""); 115TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); 116 117VNET_DEFINE(struct rtstat, rtstat); 118#define V_rtstat VNET(rtstat) 119 120VNET_DEFINE(struct radix_node_head *, rt_tables); 121#define V_rt_tables VNET(rt_tables) 122 123VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ 124#define V_rttrash VNET(rttrash) 125 126 127/* compare two sockaddr structures */ 128#define sa_equal(a1, a2) (((a1)->sa_len == (a2)->sa_len) && \ 129 (bcmp((a1), (a2), (a1)->sa_len) == 0)) 130 131/* 132 * Convert a 'struct radix_node *' to a 'struct rtentry *'. 133 * The operation can be done safely (in this code) because a 134 * 'struct rtentry' starts with two 'struct radix_node''s, the first 135 * one representing leaf nodes in the routing tree, which is 136 * what the code in radix.c passes us as a 'struct radix_node'. 137 * 138 * But because there are a lot of assumptions in this conversion, 139 * do not cast explicitly, but always use the macro below. 140 */ 141#define RNTORT(p) ((struct rtentry *)(p)) 142 143static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ 144#define V_rtzone VNET(rtzone) 145 146/* 147 * handler for net.my_fibnum 148 */ 149static int 150sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 151{ 152 int fibnum; 153 int error; 154 155 fibnum = curthread->td_proc->p_fibnum; 156 error = sysctl_handle_int(oidp, &fibnum, 0, req); 157 return (error); 158} 159 160SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, 161 NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); 162 163static __inline struct radix_node_head ** 164rt_tables_get_rnh_ptr(int table, int fam) 165{ 166 struct radix_node_head **rnh; 167 168 KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", 169 __func__)); 170 KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", 171 __func__)); 172 173 /* rnh is [fib=0][af=0]. */ 174 rnh = (struct radix_node_head **)V_rt_tables; 175 /* Get the offset to the requested table and fam. */ 176 rnh += table * (AF_MAX+1) + fam; 177 178 return (rnh); 179} 180 181struct radix_node_head * 182rt_tables_get_rnh(int table, int fam) 183{ 184 185 return (*rt_tables_get_rnh_ptr(table, fam)); 186} 187 188/* 189 * route initialization must occur before ip6_init2(), which happenas at 190 * SI_ORDER_MIDDLE. 191 */ 192static void 193route_init(void) 194{ 195 struct domain *dom; 196 int max_keylen = 0; 197 198 /* whack the tunable ints into line. */ 199 if (rt_numfibs > RT_MAXFIBS) 200 rt_numfibs = RT_MAXFIBS; 201 if (rt_numfibs == 0) 202 rt_numfibs = 1; 203 204 for (dom = domains; dom; dom = dom->dom_next) 205 if (dom->dom_maxrtkey > max_keylen) 206 max_keylen = dom->dom_maxrtkey; 207 208 rn_init(max_keylen); /* init all zeroes, all ones, mask table */ 209} 210SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); 211 212static int 213rtentry_zinit(void *mem, int size, int how) 214{ 215 struct rtentry *rt = mem; 216 217 rt->rt_pksent = counter_u64_alloc(how); 218 if (rt->rt_pksent == NULL) 219 return (ENOMEM); 220 221 RT_LOCK_INIT(rt); 222 223 return (0); 224} 225 226static void 227rtentry_zfini(void *mem, int size) 228{ 229 struct rtentry *rt = mem; 230 231 RT_LOCK_DESTROY(rt); 232 counter_u64_free(rt->rt_pksent); 233} 234 235static int 236rtentry_ctor(void *mem, int size, void *arg, int how) 237{ 238 struct rtentry *rt = mem; 239 240 bzero(rt, offsetof(struct rtentry, rt_endzero)); 241 counter_u64_zero(rt->rt_pksent); 242 243 return (0); 244} 245 246static void 247rtentry_dtor(void *mem, int size, void *arg) 248{ 249 struct rtentry *rt = mem; 250 251 RT_UNLOCK_COND(rt); 252} 253 254static void 255vnet_route_init(const void *unused __unused) 256{ 257 struct domain *dom; 258 struct radix_node_head **rnh; 259 int table; 260 int fam; 261 262 V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * 263 sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); 264 265 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 266 rtentry_ctor, rtentry_dtor, 267 rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0); 268 for (dom = domains; dom; dom = dom->dom_next) { 269 if (dom->dom_rtattach == NULL) 270 continue; 271 272 for (table = 0; table < rt_numfibs; table++) { 273 fam = dom->dom_family; 274 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 275 break; 276 277 /* 278 * XXX MRT rtattach will be also called from 279 * vfs_export.c but the offset will be 0 (only for 280 * AF_INET and AF_INET6 which don't need it anyhow). 281 */ 282 rnh = rt_tables_get_rnh_ptr(table, fam); 283 if (rnh == NULL) 284 panic("%s: rnh NULL", __func__); 285 dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); 286 } 287 } 288} 289VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 290 vnet_route_init, 0); 291 292#ifdef VIMAGE 293static void 294vnet_route_uninit(const void *unused __unused) 295{ 296 int table; 297 int fam; 298 struct domain *dom; 299 struct radix_node_head **rnh; 300 301 for (dom = domains; dom; dom = dom->dom_next) { 302 if (dom->dom_rtdetach == NULL) 303 continue; 304 305 for (table = 0; table < rt_numfibs; table++) { 306 fam = dom->dom_family; 307 308 if (table != 0 && fam != AF_INET6 && fam != AF_INET) 309 break; 310 311 rnh = rt_tables_get_rnh_ptr(table, fam); 312 if (rnh == NULL) 313 panic("%s: rnh NULL", __func__); 314 dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset); 315 } 316 } 317 318 free(V_rt_tables, M_RTABLE); 319 uma_zdestroy(V_rtzone); 320} 321VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, 322 vnet_route_uninit, 0); 323#endif 324 325#ifndef _SYS_SYSPROTO_H_ 326struct setfib_args { 327 int fibnum; 328}; 329#endif 330int 331sys_setfib(struct thread *td, struct setfib_args *uap) 332{ 333 if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 334 return EINVAL; 335 td->td_proc->p_fibnum = uap->fibnum; 336 return (0); 337} 338 339/* 340 * Packet routing routines. 341 */ 342void 343rtalloc(struct route *ro) 344{ 345 346 rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); 347} 348 349void 350rtalloc_fib(struct route *ro, u_int fibnum) 351{ 352 rtalloc_ign_fib(ro, 0UL, fibnum); 353} 354 355void 356rtalloc_ign(struct route *ro, u_long ignore) 357{ 358 struct rtentry *rt; 359 360 if ((rt = ro->ro_rt) != NULL) { 361 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 362 return; 363 RTFREE(rt); 364 ro->ro_rt = NULL; 365 } 366 ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); 367 if (ro->ro_rt) 368 RT_UNLOCK(ro->ro_rt); 369} 370 371void 372rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) 373{ 374 struct rtentry *rt; 375 376 if ((rt = ro->ro_rt) != NULL) { 377 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 378 return; 379 RTFREE(rt); 380 ro->ro_rt = NULL; 381 } 382 ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); 383 if (ro->ro_rt) 384 RT_UNLOCK(ro->ro_rt); 385} 386 387/* 388 * Look up the route that matches the address given 389 * Or, at least try.. Create a cloned route if needed. 390 * 391 * The returned route, if any, is locked. 392 */ 393struct rtentry * 394rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 395{ 396 397 return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); 398} 399 400struct rtentry * 401rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 402 u_int fibnum) 403{ 404 struct radix_node_head *rnh; 405 struct radix_node *rn; 406 struct rtentry *newrt; 407 struct rt_addrinfo info; 408 int err = 0, msgtype = RTM_MISS; 409 int needlock; 410 411 KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 412 switch (dst->sa_family) { 413 case AF_INET6: 414 case AF_INET: 415 /* We support multiple FIBs. */ 416 break; 417 default: 418 fibnum = RT_DEFAULT_FIB; 419 break; 420 } 421 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 422 newrt = NULL; 423 if (rnh == NULL) 424 goto miss; 425 426 /* 427 * Look up the address in the table for that Address Family 428 */ 429 needlock = !(ignflags & RTF_RNH_LOCKED); 430 if (needlock) 431 RADIX_NODE_HEAD_RLOCK(rnh); 432#ifdef INVARIANTS 433 else 434 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 435#endif 436 rn = rnh->rnh_matchaddr(dst, rnh); 437 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 438 newrt = RNTORT(rn); 439 RT_LOCK(newrt); 440 RT_ADDREF(newrt); 441 if (needlock) 442 RADIX_NODE_HEAD_RUNLOCK(rnh); 443 goto done; 444 445 } else if (needlock) 446 RADIX_NODE_HEAD_RUNLOCK(rnh); 447 448 /* 449 * Either we hit the root or couldn't find any match, 450 * Which basically means 451 * "caint get there frm here" 452 */ 453miss: 454 V_rtstat.rts_unreach++; 455 456 if (report) { 457 /* 458 * If required, report the failure to the supervising 459 * Authorities. 460 * For a delete, this is not an error. (report == 0) 461 */ 462 bzero(&info, sizeof(info)); 463 info.rti_info[RTAX_DST] = dst; 464 rt_missmsg_fib(msgtype, &info, 0, err, fibnum); 465 } 466done: 467 if (newrt) 468 RT_LOCK_ASSERT(newrt); 469 return (newrt); 470} 471 472/* 473 * Remove a reference count from an rtentry. 474 * If the count gets low enough, take it out of the routing table 475 */ 476void 477rtfree(struct rtentry *rt) 478{ 479 struct radix_node_head *rnh; 480 481 KASSERT(rt != NULL,("%s: NULL rt", __func__)); 482 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 483 KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 484 485 RT_LOCK_ASSERT(rt); 486 487 /* 488 * The callers should use RTFREE_LOCKED() or RTFREE(), so 489 * we should come here exactly with the last reference. 490 */ 491 RT_REMREF(rt); 492 if (rt->rt_refcnt > 0) { 493 log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 494 goto done; 495 } 496 497 /* 498 * On last reference give the "close method" a chance 499 * to cleanup private state. This also permits (for 500 * IPv4 and IPv6) a chance to decide if the routing table 501 * entry should be purged immediately or at a later time. 502 * When an immediate purge is to happen the close routine 503 * typically calls rtexpunge which clears the RTF_UP flag 504 * on the entry so that the code below reclaims the storage. 505 */ 506 if (rt->rt_refcnt == 0 && rnh->rnh_close) 507 rnh->rnh_close((struct radix_node *)rt, rnh); 508 509 /* 510 * If we are no longer "up" (and ref == 0) 511 * then we can free the resources associated 512 * with the route. 513 */ 514 if ((rt->rt_flags & RTF_UP) == 0) { 515 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 516 panic("rtfree 2"); 517 /* 518 * the rtentry must have been removed from the routing table 519 * so it is represented in rttrash.. remove that now. 520 */ 521 V_rttrash--; 522#ifdef DIAGNOSTIC 523 if (rt->rt_refcnt < 0) { 524 printf("rtfree: %p not freed (neg refs)\n", rt); 525 goto done; 526 } 527#endif 528 /* 529 * release references on items we hold them on.. 530 * e.g other routes and ifaddrs. 531 */ 532 if (rt->rt_ifa) 533 ifa_free(rt->rt_ifa); 534 /* 535 * The key is separatly alloc'd so free it (see rt_setgate()). 536 * This also frees the gateway, as they are always malloc'd 537 * together. 538 */ 539 Free(rt_key(rt)); 540 541 /* 542 * and the rtentry itself of course 543 */ 544 uma_zfree(V_rtzone, rt); 545 return; 546 } 547done: 548 RT_UNLOCK(rt); 549} 550 551 552/* 553 * Force a routing table entry to the specified 554 * destination to go through the given gateway. 555 * Normally called as a result of a routing redirect 556 * message from the network layer. 557 */ 558void 559rtredirect(struct sockaddr *dst, 560 struct sockaddr *gateway, 561 struct sockaddr *netmask, 562 int flags, 563 struct sockaddr *src) 564{ 565 566 rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); 567} 568 569void 570rtredirect_fib(struct sockaddr *dst, 571 struct sockaddr *gateway, 572 struct sockaddr *netmask, 573 int flags, 574 struct sockaddr *src, 575 u_int fibnum) 576{ 577 struct rtentry *rt, *rt0 = NULL; 578 int error = 0; 579 short *stat = NULL; 580 struct rt_addrinfo info; 581 struct ifaddr *ifa; 582 struct radix_node_head *rnh; 583 584 ifa = NULL; 585 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 586 if (rnh == NULL) { 587 error = EAFNOSUPPORT; 588 goto out; 589 } 590 591 /* verify the gateway is directly reachable */ 592 if ((ifa = ifa_ifwithnet_fib(gateway, 0, fibnum)) == NULL) { 593 error = ENETUNREACH; 594 goto out; 595 } 596 rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ 597 /* 598 * If the redirect isn't from our current router for this dst, 599 * it's either old or wrong. If it redirects us to ourselves, 600 * we have a routing loop, perhaps as a result of an interface 601 * going down recently. 602 */ 603 if (!(flags & RTF_DONE) && rt && 604 (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 605 error = EINVAL; 606 else if (ifa_ifwithaddr_check(gateway)) 607 error = EHOSTUNREACH; 608 if (error) 609 goto done; 610 /* 611 * Create a new entry if we just got back a wildcard entry 612 * or the lookup failed. This is necessary for hosts 613 * which use routing redirects generated by smart gateways 614 * to dynamically build the routing tables. 615 */ 616 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 617 goto create; 618 /* 619 * Don't listen to the redirect if it's 620 * for a route to an interface. 621 */ 622 if (rt->rt_flags & RTF_GATEWAY) { 623 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 624 /* 625 * Changing from route to net => route to host. 626 * Create new route, rather than smashing route to net. 627 */ 628 create: 629 rt0 = rt; 630 rt = NULL; 631 632 flags |= RTF_GATEWAY | RTF_DYNAMIC; 633 bzero((caddr_t)&info, sizeof(info)); 634 info.rti_info[RTAX_DST] = dst; 635 info.rti_info[RTAX_GATEWAY] = gateway; 636 info.rti_info[RTAX_NETMASK] = netmask; 637 info.rti_ifa = ifa; 638 info.rti_flags = flags; 639 if (rt0 != NULL) 640 RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ 641 error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 642 if (rt != NULL) { 643 RT_LOCK(rt); 644 if (rt0 != NULL) 645 EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); 646 flags = rt->rt_flags; 647 } 648 if (rt0 != NULL) 649 RTFREE(rt0); 650 651 stat = &V_rtstat.rts_dynamic; 652 } else { 653 struct rtentry *gwrt; 654 655 /* 656 * Smash the current notion of the gateway to 657 * this destination. Should check about netmask!!! 658 */ 659 rt->rt_flags |= RTF_MODIFIED; 660 flags |= RTF_MODIFIED; 661 stat = &V_rtstat.rts_newgateway; 662 /* 663 * add the key and gateway (in one malloc'd chunk). 664 */ 665 RT_UNLOCK(rt); 666 RADIX_NODE_HEAD_LOCK(rnh); 667 RT_LOCK(rt); 668 rt_setgate(rt, rt_key(rt), gateway); 669 gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); 670 RADIX_NODE_HEAD_UNLOCK(rnh); 671 EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); 672 RTFREE_LOCKED(gwrt); 673 } 674 } else 675 error = EHOSTUNREACH; 676done: 677 if (rt) 678 RTFREE_LOCKED(rt); 679out: 680 if (error) 681 V_rtstat.rts_badredirect++; 682 else if (stat != NULL) 683 (*stat)++; 684 bzero((caddr_t)&info, sizeof(info)); 685 info.rti_info[RTAX_DST] = dst; 686 info.rti_info[RTAX_GATEWAY] = gateway; 687 info.rti_info[RTAX_NETMASK] = netmask; 688 info.rti_info[RTAX_AUTHOR] = src; 689 rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); 690 if (ifa != NULL) 691 ifa_free(ifa); 692} 693 694int 695rtioctl(u_long req, caddr_t data) 696{ 697 698 return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); 699} 700 701/* 702 * Routing table ioctl interface. 703 */ 704int 705rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 706{ 707 708 /* 709 * If more ioctl commands are added here, make sure the proper 710 * super-user checks are being performed because it is possible for 711 * prison-root to make it this far if raw sockets have been enabled 712 * in jails. 713 */ 714#ifdef INET 715 /* Multicast goop, grrr... */ 716 return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 717#else /* INET */ 718 return ENXIO; 719#endif /* INET */ 720} 721 722/* 723 * For both ifa_ifwithroute() routines, 'ifa' is returned referenced. 724 */ 725struct ifaddr * 726ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) 727{ 728 729 return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB)); 730} 731 732struct ifaddr * 733ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, 734 u_int fibnum) 735{ 736 register struct ifaddr *ifa; 737 int not_found = 0; 738 739 if ((flags & RTF_GATEWAY) == 0) { 740 /* 741 * If we are adding a route to an interface, 742 * and the interface is a pt to pt link 743 * we should search for the destination 744 * as our clue to the interface. Otherwise 745 * we can use the local address. 746 */ 747 ifa = NULL; 748 if (flags & RTF_HOST) 749 ifa = ifa_ifwithdstaddr_fib(dst, fibnum); 750 if (ifa == NULL) 751 ifa = ifa_ifwithaddr(gateway); 752 } else { 753 /* 754 * If we are adding a route to a remote net 755 * or host, the gateway may still be on the 756 * other end of a pt to pt link. 757 */ 758 ifa = ifa_ifwithdstaddr_fib(gateway, fibnum); 759 } 760 if (ifa == NULL) 761 ifa = ifa_ifwithnet_fib(gateway, 0, fibnum); 762 if (ifa == NULL) { 763 struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); 764 if (rt == NULL) 765 return (NULL); 766 /* 767 * dismiss a gateway that is reachable only 768 * through the default router 769 */ 770 switch (gateway->sa_family) { 771 case AF_INET: 772 if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) 773 not_found = 1; 774 break; 775 case AF_INET6: 776 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) 777 not_found = 1; 778 break; 779 default: 780 break; 781 } 782 if (!not_found && rt->rt_ifa != NULL) { 783 ifa = rt->rt_ifa; 784 ifa_ref(ifa); 785 } 786 RT_REMREF(rt); 787 RT_UNLOCK(rt); 788 if (not_found || ifa == NULL) 789 return (NULL); 790 } 791 if (ifa->ifa_addr->sa_family != dst->sa_family) { 792 struct ifaddr *oifa = ifa; 793 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 794 if (ifa == NULL) 795 ifa = oifa; 796 else 797 ifa_free(oifa); 798 } 799 return (ifa); 800} 801 802/* 803 * Do appropriate manipulations of a routing tree given 804 * all the bits of info needed 805 */ 806int 807rtrequest(int req, 808 struct sockaddr *dst, 809 struct sockaddr *gateway, 810 struct sockaddr *netmask, 811 int flags, 812 struct rtentry **ret_nrt) 813{ 814 815 return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 816 RT_DEFAULT_FIB)); 817} 818 819int 820rtrequest_fib(int req, 821 struct sockaddr *dst, 822 struct sockaddr *gateway, 823 struct sockaddr *netmask, 824 int flags, 825 struct rtentry **ret_nrt, 826 u_int fibnum) 827{ 828 struct rt_addrinfo info; 829 830 if (dst->sa_len == 0) 831 return(EINVAL); 832 833 bzero((caddr_t)&info, sizeof(info)); 834 info.rti_flags = flags; 835 info.rti_info[RTAX_DST] = dst; 836 info.rti_info[RTAX_GATEWAY] = gateway; 837 info.rti_info[RTAX_NETMASK] = netmask; 838 return rtrequest1_fib(req, &info, ret_nrt, fibnum); 839} 840 841/* 842 * These (questionable) definitions of apparent local variables apply 843 * to the next two functions. XXXXXX!!! 844 */ 845#define dst info->rti_info[RTAX_DST] 846#define gateway info->rti_info[RTAX_GATEWAY] 847#define netmask info->rti_info[RTAX_NETMASK] 848#define ifaaddr info->rti_info[RTAX_IFA] 849#define ifpaddr info->rti_info[RTAX_IFP] 850#define flags info->rti_flags 851 852int 853rt_getifa(struct rt_addrinfo *info) 854{ 855 856 return (rt_getifa_fib(info, RT_DEFAULT_FIB)); 857} 858 859/* 860 * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, 861 * it will be referenced so the caller must free it. 862 */ 863int 864rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 865{ 866 struct ifaddr *ifa; 867 int error = 0; 868 869 /* 870 * ifp may be specified by sockaddr_dl 871 * when protocol address is ambiguous. 872 */ 873 if (info->rti_ifp == NULL && ifpaddr != NULL && 874 ifpaddr->sa_family == AF_LINK && 875 (ifa = ifa_ifwithnet_fib(ifpaddr, 0, fibnum)) != NULL) { 876 info->rti_ifp = ifa->ifa_ifp; 877 ifa_free(ifa); 878 } 879 if (info->rti_ifa == NULL && ifaaddr != NULL) 880 info->rti_ifa = ifa_ifwithaddr(ifaaddr); 881 if (info->rti_ifa == NULL) { 882 struct sockaddr *sa; 883 884 sa = ifaaddr != NULL ? ifaaddr : 885 (gateway != NULL ? gateway : dst); 886 if (sa != NULL && info->rti_ifp != NULL) 887 info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 888 else if (dst != NULL && gateway != NULL) 889 info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, 890 fibnum); 891 else if (sa != NULL) 892 info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, 893 fibnum); 894 } 895 if ((ifa = info->rti_ifa) != NULL) { 896 if (info->rti_ifp == NULL) 897 info->rti_ifp = ifa->ifa_ifp; 898 } else 899 error = ENETUNREACH; 900 return (error); 901} 902 903/* 904 * Expunges references to a route that's about to be reclaimed. 905 * The route must be locked. 906 */ 907int 908rtexpunge(struct rtentry *rt) 909{ 910#if !defined(RADIX_MPATH) 911 struct radix_node *rn; 912#else 913 struct rt_addrinfo info; 914 int fib; 915 struct rtentry *rt0; 916#endif 917 struct radix_node_head *rnh; 918 struct ifaddr *ifa; 919 int error = 0; 920 921 /* 922 * Find the correct routing tree to use for this Address Family 923 */ 924 rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 925 RT_LOCK_ASSERT(rt); 926 if (rnh == NULL) 927 return (EAFNOSUPPORT); 928 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 929 930#ifdef RADIX_MPATH 931 fib = rt->rt_fibnum; 932 bzero(&info, sizeof(info)); 933 info.rti_ifp = rt->rt_ifp; 934 info.rti_flags = RTF_RNH_LOCKED; 935 info.rti_info[RTAX_DST] = rt_key(rt); 936 info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr; 937 938 RT_UNLOCK(rt); 939 error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib); 940 941 if (error == 0 && rt0 != NULL) { 942 rt = rt0; 943 RT_LOCK(rt); 944 } else if (error != 0) { 945 RT_LOCK(rt); 946 return (error); 947 } 948#else 949 /* 950 * Remove the item from the tree; it should be there, 951 * but when callers invoke us blindly it may not (sigh). 952 */ 953 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh); 954 if (rn == NULL) { 955 error = ESRCH; 956 goto bad; 957 } 958 KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, 959 ("unexpected flags 0x%x", rn->rn_flags)); 960 KASSERT(rt == RNTORT(rn), 961 ("lookup mismatch, rt %p rn %p", rt, rn)); 962#endif /* RADIX_MPATH */ 963 964 rt->rt_flags &= ~RTF_UP; 965 966 /* 967 * Give the protocol a chance to keep things in sync. 968 */ 969 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { 970 struct rt_addrinfo info; 971 972 bzero((caddr_t)&info, sizeof(info)); 973 info.rti_flags = rt->rt_flags; 974 info.rti_info[RTAX_DST] = rt_key(rt); 975 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 976 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 977 ifa->ifa_rtrequest(RTM_DELETE, rt, &info); 978 } 979 980 /* 981 * one more rtentry floating around that is not 982 * linked to the routing table. 983 */ 984 V_rttrash++; 985#if !defined(RADIX_MPATH) 986bad: 987#endif 988 return (error); 989} 990 991#if 0 992int p_sockaddr(char *buf, int buflen, struct sockaddr *s); 993int rt_print(char *buf, int buflen, struct rtentry *rt); 994 995int 996p_sockaddr(char *buf, int buflen, struct sockaddr *s) 997{ 998 void *paddr = NULL; 999 1000 switch (s->sa_family) { 1001 case AF_INET: 1002 paddr = &((struct sockaddr_in *)s)->sin_addr; 1003 break; 1004 case AF_INET6: 1005 paddr = &((struct sockaddr_in6 *)s)->sin6_addr; 1006 break; 1007 } 1008 1009 if (paddr == NULL) 1010 return (0); 1011 1012 if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) 1013 return (0); 1014 1015 return (strlen(buf)); 1016} 1017 1018int 1019rt_print(char *buf, int buflen, struct rtentry *rt) 1020{ 1021 struct sockaddr *addr, *mask; 1022 int i = 0; 1023 1024 addr = rt_key(rt); 1025 mask = rt_mask(rt); 1026 1027 i = p_sockaddr(buf, buflen, addr); 1028 if (!(rt->rt_flags & RTF_HOST)) { 1029 buf[i++] = '/'; 1030 i += p_sockaddr(buf + i, buflen - i, mask); 1031 } 1032 1033 if (rt->rt_flags & RTF_GATEWAY) { 1034 buf[i++] = '>'; 1035 i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway); 1036 } 1037 1038 return (i); 1039} 1040#endif 1041 1042#ifdef RADIX_MPATH 1043static int 1044rn_mpath_update(int req, struct rt_addrinfo *info, 1045 struct radix_node_head *rnh, struct rtentry **ret_nrt) 1046{ 1047 /* 1048 * if we got multipath routes, we require users to specify 1049 * a matching RTAX_GATEWAY. 1050 */ 1051 struct rtentry *rt, *rto = NULL; 1052 register struct radix_node *rn; 1053 int error = 0; 1054 1055 rn = rnh->rnh_lookup(dst, netmask, rnh); 1056 if (rn == NULL) 1057 return (ESRCH); 1058 rto = rt = RNTORT(rn); 1059 1060 rt = rt_mpath_matchgate(rt, gateway); 1061 if (rt == NULL) 1062 return (ESRCH); 1063 /* 1064 * this is the first entry in the chain 1065 */ 1066 if (rto == rt) { 1067 rn = rn_mpath_next((struct radix_node *)rt); 1068 /* 1069 * there is another entry, now it's active 1070 */ 1071 if (rn) { 1072 rto = RNTORT(rn); 1073 RT_LOCK(rto); 1074 rto->rt_flags |= RTF_UP; 1075 RT_UNLOCK(rto); 1076 } else if (rt->rt_flags & RTF_GATEWAY) { 1077 /* 1078 * For gateway routes, we need to 1079 * make sure that we we are deleting 1080 * the correct gateway. 1081 * rt_mpath_matchgate() does not 1082 * check the case when there is only 1083 * one route in the chain. 1084 */ 1085 if (gateway && 1086 (rt->rt_gateway->sa_len != gateway->sa_len || 1087 memcmp(rt->rt_gateway, gateway, gateway->sa_len))) 1088 error = ESRCH; 1089 else { 1090 /* 1091 * remove from tree before returning it 1092 * to the caller 1093 */ 1094 rn = rnh->rnh_deladdr(dst, netmask, rnh); 1095 KASSERT(rt == RNTORT(rn), ("radix node disappeared")); 1096 goto gwdelete; 1097 } 1098 1099 } 1100 /* 1101 * use the normal delete code to remove 1102 * the first entry 1103 */ 1104 if (req != RTM_DELETE) 1105 goto nondelete; 1106 1107 error = ENOENT; 1108 goto done; 1109 } 1110 1111 /* 1112 * if the entry is 2nd and on up 1113 */ 1114 if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) 1115 panic ("rtrequest1: rt_mpath_deldup"); 1116gwdelete: 1117 RT_LOCK(rt); 1118 RT_ADDREF(rt); 1119 if (req == RTM_DELETE) { 1120 rt->rt_flags &= ~RTF_UP; 1121 /* 1122 * One more rtentry floating around that is not 1123 * linked to the routing table. rttrash will be decremented 1124 * when RTFREE(rt) is eventually called. 1125 */ 1126 V_rttrash++; 1127 } 1128 1129nondelete: 1130 if (req != RTM_DELETE) 1131 panic("unrecognized request %d", req); 1132 1133 1134 /* 1135 * If the caller wants it, then it can have it, 1136 * but it's up to it to free the rtentry as we won't be 1137 * doing it. 1138 */ 1139 if (ret_nrt) { 1140 *ret_nrt = rt; 1141 RT_UNLOCK(rt); 1142 } else 1143 RTFREE_LOCKED(rt); 1144done: 1145 return (error); 1146} 1147#endif 1148 1149int 1150rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 1151 u_int fibnum) 1152{ 1153 int error = 0, needlock = 0; 1154 register struct rtentry *rt; 1155#ifdef FLOWTABLE 1156 register struct rtentry *rt0; 1157#endif 1158 register struct radix_node *rn; 1159 register struct radix_node_head *rnh; 1160 struct ifaddr *ifa; 1161 struct sockaddr *ndst; 1162 struct sockaddr_storage mdst; 1163#define senderr(x) { error = x ; goto bad; } 1164 1165 KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 1166 switch (dst->sa_family) { 1167 case AF_INET6: 1168 case AF_INET: 1169 /* We support multiple FIBs. */ 1170 break; 1171 default: 1172 fibnum = RT_DEFAULT_FIB; 1173 break; 1174 } 1175 1176 /* 1177 * Find the correct routing tree to use for this Address Family 1178 */ 1179 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1180 if (rnh == NULL) 1181 return (EAFNOSUPPORT); 1182 needlock = ((flags & RTF_RNH_LOCKED) == 0); 1183 flags &= ~RTF_RNH_LOCKED; 1184 if (needlock) 1185 RADIX_NODE_HEAD_LOCK(rnh); 1186 else 1187 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1188 /* 1189 * If we are adding a host route then we don't want to put 1190 * a netmask in the tree, nor do we want to clone it. 1191 */ 1192 if (flags & RTF_HOST) 1193 netmask = NULL; 1194 1195 switch (req) { 1196 case RTM_DELETE: 1197 if (netmask) { 1198 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 1199 dst = (struct sockaddr *)&mdst; 1200 } 1201#ifdef RADIX_MPATH 1202 if (rn_mpath_capable(rnh)) { 1203 error = rn_mpath_update(req, info, rnh, ret_nrt); 1204 /* 1205 * "bad" holds true for the success case 1206 * as well 1207 */ 1208 if (error != ENOENT) 1209 goto bad; 1210 error = 0; 1211 } 1212#endif 1213 if ((flags & RTF_PINNED) == 0) { 1214 /* Check if target route can be deleted */ 1215 rt = (struct rtentry *)rnh->rnh_lookup(dst, 1216 netmask, rnh); 1217 if ((rt != NULL) && (rt->rt_flags & RTF_PINNED)) 1218 senderr(EADDRINUSE); 1219 } 1220 1221 /* 1222 * Remove the item from the tree and return it. 1223 * Complain if it is not there and do no more processing. 1224 */ 1225 rn = rnh->rnh_deladdr(dst, netmask, rnh); 1226 if (rn == NULL) 1227 senderr(ESRCH); 1228 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 1229 panic ("rtrequest delete"); 1230 rt = RNTORT(rn); 1231 RT_LOCK(rt); 1232 RT_ADDREF(rt); 1233 rt->rt_flags &= ~RTF_UP; 1234 1235 /* 1236 * give the protocol a chance to keep things in sync. 1237 */ 1238 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) 1239 ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1240 1241 /* 1242 * One more rtentry floating around that is not 1243 * linked to the routing table. rttrash will be decremented 1244 * when RTFREE(rt) is eventually called. 1245 */ 1246 V_rttrash++; 1247 1248 /* 1249 * If the caller wants it, then it can have it, 1250 * but it's up to it to free the rtentry as we won't be 1251 * doing it. 1252 */ 1253 if (ret_nrt) { 1254 *ret_nrt = rt; 1255 RT_UNLOCK(rt); 1256 } else 1257 RTFREE_LOCKED(rt); 1258 break; 1259 case RTM_RESOLVE: 1260 /* 1261 * resolve was only used for route cloning 1262 * here for compat 1263 */ 1264 break; 1265 case RTM_ADD: 1266 if ((flags & RTF_GATEWAY) && !gateway) 1267 senderr(EINVAL); 1268 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 1269 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 1270 senderr(EINVAL); 1271 1272 if (info->rti_ifa == NULL) { 1273 error = rt_getifa_fib(info, fibnum); 1274 if (error) 1275 senderr(error); 1276 } else 1277 ifa_ref(info->rti_ifa); 1278 ifa = info->rti_ifa; 1279 rt = uma_zalloc(V_rtzone, M_NOWAIT); 1280 if (rt == NULL) { 1281 ifa_free(ifa); 1282 senderr(ENOBUFS); 1283 } 1284 rt->rt_flags = RTF_UP | flags; 1285 rt->rt_fibnum = fibnum; 1286 /* 1287 * Add the gateway. Possibly re-malloc-ing the storage for it. 1288 */ 1289 RT_LOCK(rt); 1290 if ((error = rt_setgate(rt, dst, gateway)) != 0) { 1291 ifa_free(ifa); 1292 uma_zfree(V_rtzone, rt); 1293 senderr(error); 1294 } 1295 1296 /* 1297 * point to the (possibly newly malloc'd) dest address. 1298 */ 1299 ndst = (struct sockaddr *)rt_key(rt); 1300 1301 /* 1302 * make sure it contains the value we want (masked if needed). 1303 */ 1304 if (netmask) { 1305 rt_maskedcopy(dst, ndst, netmask); 1306 } else 1307 bcopy(dst, ndst, dst->sa_len); 1308 1309 /* 1310 * We use the ifa reference returned by rt_getifa_fib(). 1311 * This moved from below so that rnh->rnh_addaddr() can 1312 * examine the ifa and ifa->ifa_ifp if it so desires. 1313 */ 1314 rt->rt_ifa = ifa; 1315 rt->rt_ifp = ifa->ifa_ifp; 1316 rt->rt_weight = 1; 1317 1318#ifdef RADIX_MPATH 1319 /* do not permit exactly the same dst/mask/gw pair */ 1320 if (rn_mpath_capable(rnh) && 1321 rt_mpath_conflict(rnh, rt, netmask)) { 1322 ifa_free(rt->rt_ifa); 1323 Free(rt_key(rt)); 1324 uma_zfree(V_rtzone, rt); 1325 senderr(EEXIST); 1326 } 1327#endif 1328 1329#ifdef FLOWTABLE 1330 rt0 = NULL; 1331 /* "flow-table" only supports IPv6 and IPv4 at the moment. */ 1332 switch (dst->sa_family) { 1333#ifdef INET6 1334 case AF_INET6: 1335#endif 1336#ifdef INET 1337 case AF_INET: 1338#endif 1339#if defined(INET6) || defined(INET) 1340 rn = rnh->rnh_matchaddr(dst, rnh); 1341 if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 1342 struct sockaddr *mask; 1343 u_char *m, *n; 1344 int len; 1345 1346 /* 1347 * compare mask to see if the new route is 1348 * more specific than the existing one 1349 */ 1350 rt0 = RNTORT(rn); 1351 RT_LOCK(rt0); 1352 RT_ADDREF(rt0); 1353 RT_UNLOCK(rt0); 1354 /* 1355 * A host route is already present, so 1356 * leave the flow-table entries as is. 1357 */ 1358 if (rt0->rt_flags & RTF_HOST) { 1359 RTFREE(rt0); 1360 rt0 = NULL; 1361 } else if (!(flags & RTF_HOST) && netmask) { 1362 mask = rt_mask(rt0); 1363 len = mask->sa_len; 1364 m = (u_char *)mask; 1365 n = (u_char *)netmask; 1366 while (len-- > 0) { 1367 if (*n != *m) 1368 break; 1369 n++; 1370 m++; 1371 } 1372 if (len == 0 || (*n < *m)) { 1373 RTFREE(rt0); 1374 rt0 = NULL; 1375 } 1376 } 1377 } 1378#endif/* INET6 || INET */ 1379 } 1380#endif /* FLOWTABLE */ 1381 1382 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 1383 rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); 1384 /* 1385 * If it still failed to go into the tree, 1386 * then un-make it (this should be a function) 1387 */ 1388 if (rn == NULL) { 1389 ifa_free(rt->rt_ifa); 1390 Free(rt_key(rt)); 1391 uma_zfree(V_rtzone, rt); 1392#ifdef FLOWTABLE 1393 if (rt0 != NULL) 1394 RTFREE(rt0); 1395#endif 1396 senderr(EEXIST); 1397 } 1398#ifdef FLOWTABLE 1399 else if (rt0 != NULL) { 1400 flowtable_route_flush(dst->sa_family, rt0); 1401 RTFREE(rt0); 1402 } 1403#endif 1404 1405 /* 1406 * If this protocol has something to add to this then 1407 * allow it to do that as well. 1408 */ 1409 if (ifa->ifa_rtrequest) 1410 ifa->ifa_rtrequest(req, rt, info); 1411 1412 /* 1413 * actually return a resultant rtentry and 1414 * give the caller a single reference. 1415 */ 1416 if (ret_nrt) { 1417 *ret_nrt = rt; 1418 RT_ADDREF(rt); 1419 } 1420 RT_UNLOCK(rt); 1421 break; 1422 default: 1423 error = EOPNOTSUPP; 1424 } 1425bad: 1426 if (needlock) 1427 RADIX_NODE_HEAD_UNLOCK(rnh); 1428 return (error); 1429#undef senderr 1430} 1431 1432#undef dst 1433#undef gateway 1434#undef netmask 1435#undef ifaaddr 1436#undef ifpaddr 1437#undef flags 1438 1439int 1440rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1441{ 1442 /* XXX dst may be overwritten, can we move this to below */ 1443 int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 1444#ifdef INVARIANTS 1445 struct radix_node_head *rnh; 1446 1447 rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); 1448#endif 1449 1450 RT_LOCK_ASSERT(rt); 1451 RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1452 1453 /* 1454 * Prepare to store the gateway in rt->rt_gateway. 1455 * Both dst and gateway are stored one after the other in the same 1456 * malloc'd chunk. If we have room, we can reuse the old buffer, 1457 * rt_gateway already points to the right place. 1458 * Otherwise, malloc a new block and update the 'dst' address. 1459 */ 1460 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { 1461 caddr_t new; 1462 1463 R_Malloc(new, caddr_t, dlen + glen); 1464 if (new == NULL) 1465 return ENOBUFS; 1466 /* 1467 * XXX note, we copy from *dst and not *rt_key(rt) because 1468 * rt_setgate() can be called to initialize a newly 1469 * allocated route entry, in which case rt_key(rt) == NULL 1470 * (and also rt->rt_gateway == NULL). 1471 * Free()/free() handle a NULL argument just fine. 1472 */ 1473 bcopy(dst, new, dlen); 1474 Free(rt_key(rt)); /* free old block, if any */ 1475 rt_key(rt) = (struct sockaddr *)new; 1476 rt->rt_gateway = (struct sockaddr *)(new + dlen); 1477 } 1478 1479 /* 1480 * Copy the new gateway value into the memory chunk. 1481 */ 1482 bcopy(gate, rt->rt_gateway, glen); 1483 1484 return (0); 1485} 1486 1487void 1488rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1489{ 1490 register u_char *cp1 = (u_char *)src; 1491 register u_char *cp2 = (u_char *)dst; 1492 register u_char *cp3 = (u_char *)netmask; 1493 u_char *cplim = cp2 + *cp3; 1494 u_char *cplim2 = cp2 + *cp1; 1495 1496 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1497 cp3 += 2; 1498 if (cplim > cplim2) 1499 cplim = cplim2; 1500 while (cp2 < cplim) 1501 *cp2++ = *cp1++ & *cp3++; 1502 if (cp2 < cplim2) 1503 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1504} 1505 1506/* 1507 * Set up a routing table entry, normally 1508 * for an interface. 1509 */ 1510#define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 1511static inline int 1512rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1513{ 1514 struct sockaddr *dst; 1515 struct sockaddr *netmask; 1516 struct rtentry *rt = NULL; 1517 struct rt_addrinfo info; 1518 int error = 0; 1519 int startfib, endfib; 1520 char tempbuf[_SOCKADDR_TMPSIZE]; 1521 int didwork = 0; 1522 int a_failure = 0; 1523 static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; 1524 struct radix_node_head *rnh; 1525 1526 if (flags & RTF_HOST) { 1527 dst = ifa->ifa_dstaddr; 1528 netmask = NULL; 1529 } else { 1530 dst = ifa->ifa_addr; 1531 netmask = ifa->ifa_netmask; 1532 } 1533 if (dst->sa_len == 0) 1534 return(EINVAL); 1535 switch (dst->sa_family) { 1536 case AF_INET6: 1537 case AF_INET: 1538 /* We support multiple FIBs. */ 1539 break; 1540 default: 1541 fibnum = RT_DEFAULT_FIB; 1542 break; 1543 } 1544 if (fibnum == RT_ALL_FIBS) { 1545 if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { 1546 startfib = endfib = ifa->ifa_ifp->if_fib; 1547 } else { 1548 startfib = 0; 1549 endfib = rt_numfibs - 1; 1550 } 1551 } else { 1552 KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 1553 startfib = fibnum; 1554 endfib = fibnum; 1555 } 1556 1557 /* 1558 * If it's a delete, check that if it exists, 1559 * it's on the correct interface or we might scrub 1560 * a route to another ifa which would 1561 * be confusing at best and possibly worse. 1562 */ 1563 if (cmd == RTM_DELETE) { 1564 /* 1565 * It's a delete, so it should already exist.. 1566 * If it's a net, mask off the host bits 1567 * (Assuming we have a mask) 1568 * XXX this is kinda inet specific.. 1569 */ 1570 if (netmask != NULL) { 1571 rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 1572 dst = (struct sockaddr *)tempbuf; 1573 } 1574 } 1575 /* 1576 * Now go through all the requested tables (fibs) and do the 1577 * requested action. Realistically, this will either be fib 0 1578 * for protocols that don't do multiple tables or all the 1579 * tables for those that do. 1580 */ 1581 for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 1582 if (cmd == RTM_DELETE) { 1583 struct radix_node *rn; 1584 /* 1585 * Look up an rtentry that is in the routing tree and 1586 * contains the correct info. 1587 */ 1588 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1589 if (rnh == NULL) 1590 /* this table doesn't exist but others might */ 1591 continue; 1592 RADIX_NODE_HEAD_RLOCK(rnh); 1593 rn = rnh->rnh_lookup(dst, netmask, rnh); 1594#ifdef RADIX_MPATH 1595 if (rn_mpath_capable(rnh)) { 1596 1597 if (rn == NULL) 1598 error = ESRCH; 1599 else { 1600 rt = RNTORT(rn); 1601 /* 1602 * for interface route the 1603 * rt->rt_gateway is sockaddr_intf 1604 * for cloning ARP entries, so 1605 * rt_mpath_matchgate must use the 1606 * interface address 1607 */ 1608 rt = rt_mpath_matchgate(rt, 1609 ifa->ifa_addr); 1610 if (rt == NULL) 1611 error = ESRCH; 1612 } 1613 } 1614#endif 1615 error = (rn == NULL || 1616 (rn->rn_flags & RNF_ROOT) || 1617 RNTORT(rn)->rt_ifa != ifa); 1618 RADIX_NODE_HEAD_RUNLOCK(rnh); 1619 if (error) { 1620 /* this is only an error if bad on ALL tables */ 1621 continue; 1622 } 1623 } 1624 /* 1625 * Do the actual request 1626 */ 1627 bzero((caddr_t)&info, sizeof(info)); 1628 info.rti_ifa = ifa; 1629 info.rti_flags = flags | 1630 (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 1631 info.rti_info[RTAX_DST] = dst; 1632 /* 1633 * doing this for compatibility reasons 1634 */ 1635 if (cmd == RTM_ADD) 1636 info.rti_info[RTAX_GATEWAY] = 1637 (struct sockaddr *)&null_sdl; 1638 else 1639 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 1640 info.rti_info[RTAX_NETMASK] = netmask; 1641 error = rtrequest1_fib(cmd, &info, &rt, fibnum); 1642 1643 if ((error == EEXIST) && (cmd == RTM_ADD)) { 1644 /* 1645 * Interface route addition failed. 1646 * Atomically delete current prefix generating 1647 * RTM_DELETE message, and retry adding 1648 * interface prefix. 1649 */ 1650 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1651 RADIX_NODE_HEAD_LOCK(rnh); 1652 1653 /* Delete old prefix */ 1654 info.rti_ifa = NULL; 1655 info.rti_flags = RTF_RNH_LOCKED; 1656 1657 error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); 1658 if (error == 0) { 1659 info.rti_ifa = ifa; 1660 info.rti_flags = flags | RTF_RNH_LOCKED | 1661 (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 1662 error = rtrequest1_fib(cmd, &info, &rt, fibnum); 1663 } 1664 1665 RADIX_NODE_HEAD_UNLOCK(rnh); 1666 } 1667 1668 1669 if (error == 0 && rt != NULL) { 1670 /* 1671 * notify any listening routing agents of the change 1672 */ 1673 RT_LOCK(rt); 1674#ifdef RADIX_MPATH 1675 /* 1676 * in case address alias finds the first address 1677 * e.g. ifconfig bge0 192.0.2.246/24 1678 * e.g. ifconfig bge0 192.0.2.247/24 1679 * the address set in the route is 192.0.2.246 1680 * so we need to replace it with 192.0.2.247 1681 */ 1682 if (memcmp(rt->rt_ifa->ifa_addr, 1683 ifa->ifa_addr, ifa->ifa_addr->sa_len)) { 1684 ifa_free(rt->rt_ifa); 1685 ifa_ref(ifa); 1686 rt->rt_ifp = ifa->ifa_ifp; 1687 rt->rt_ifa = ifa; 1688 } 1689#endif 1690 /* 1691 * doing this for compatibility reasons 1692 */ 1693 if (cmd == RTM_ADD) { 1694 ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = 1695 rt->rt_ifp->if_type; 1696 ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = 1697 rt->rt_ifp->if_index; 1698 } 1699 RT_ADDREF(rt); 1700 RT_UNLOCK(rt); 1701 rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); 1702 RT_LOCK(rt); 1703 RT_REMREF(rt); 1704 if (cmd == RTM_DELETE) { 1705 /* 1706 * If we are deleting, and we found an entry, 1707 * then it's been removed from the tree.. 1708 * now throw it away. 1709 */ 1710 RTFREE_LOCKED(rt); 1711 } else { 1712 if (cmd == RTM_ADD) { 1713 /* 1714 * We just wanted to add it.. 1715 * we don't actually need a reference. 1716 */ 1717 RT_REMREF(rt); 1718 } 1719 RT_UNLOCK(rt); 1720 } 1721 didwork = 1; 1722 } 1723 if (error) 1724 a_failure = error; 1725 } 1726 if (cmd == RTM_DELETE) { 1727 if (didwork) { 1728 error = 0; 1729 } else { 1730 /* we only give an error if it wasn't in any table */ 1731 error = ((flags & RTF_HOST) ? 1732 EHOSTUNREACH : ENETUNREACH); 1733 } 1734 } else { 1735 if (a_failure) { 1736 /* return an error if any of them failed */ 1737 error = a_failure; 1738 } 1739 } 1740 return (error); 1741} 1742 1743/* 1744 * Set up a routing table entry, normally 1745 * for an interface. 1746 */ 1747int 1748rtinit(struct ifaddr *ifa, int cmd, int flags) 1749{ 1750 struct sockaddr *dst; 1751 int fib = RT_DEFAULT_FIB; 1752 1753 if (flags & RTF_HOST) { 1754 dst = ifa->ifa_dstaddr; 1755 } else { 1756 dst = ifa->ifa_addr; 1757 } 1758 1759 switch (dst->sa_family) { 1760 case AF_INET6: 1761 case AF_INET: 1762 /* We do support multiple FIBs. */ 1763 fib = RT_ALL_FIBS; 1764 break; 1765 } 1766 return (rtinit1(ifa, cmd, flags, fib)); 1767} 1768 1769/* 1770 * Announce interface address arrival/withdraw 1771 * Returns 0 on success. 1772 */ 1773int 1774rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) 1775{ 1776 1777 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 1778 ("unexpected cmd %d", cmd)); 1779 1780 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 1781 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 1782 1783 return (rtsock_addrmsg(cmd, ifa, fibnum)); 1784} 1785 1786/* 1787 * Announce route addition/removal. 1788 * Users of this function MUST validate input data BEFORE calling. 1789 * However we have to be able to handle invalid data: 1790 * if some userland app sends us "invalid" route message (invalid mask, 1791 * no dst, wrong address families, etc...) we need to pass it back 1792 * to app (and any other rtsock consumers) with rtm_errno field set to 1793 * non-zero value. 1794 * Returns 0 on success. 1795 */ 1796int 1797rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, 1798 int fibnum) 1799{ 1800 1801 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 1802 ("unexpected cmd %d", cmd)); 1803 1804 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 1805 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 1806 1807 KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); 1808 1809 return (rtsock_routemsg(cmd, ifp, error, rt, fibnum)); 1810} 1811 1812void 1813rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) 1814{ 1815 1816 rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS); 1817} 1818 1819/* 1820 * This is called to generate messages from the routing socket 1821 * indicating a network interface has had addresses associated with it. 1822 */ 1823void 1824rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, 1825 int fibnum) 1826{ 1827 1828 KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, 1829 ("unexpected cmd %u", cmd)); 1830 KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs), 1831 ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs)); 1832 1833#if defined(INET) || defined(INET6) 1834#ifdef SCTP 1835 /* 1836 * notify the SCTP stack 1837 * this will only get called when an address is added/deleted 1838 * XXX pass the ifaddr struct instead if ifa->ifa_addr... 1839 */ 1840 sctp_addr_change(ifa, cmd); 1841#endif /* SCTP */ 1842#endif 1843 if (cmd == RTM_ADD) { 1844 rt_addrmsg(cmd, ifa, fibnum); 1845 if (rt != NULL) 1846 rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); 1847 } else { 1848 if (rt != NULL) 1849 rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); 1850 rt_addrmsg(cmd, ifa, fibnum); 1851 } 1852} 1853 1854