1139826Simp/*- 262587Sitojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 362587Sitojun * All rights reserved. 462587Sitojun * 562587Sitojun * Redistribution and use in source and binary forms, with or without 662587Sitojun * modification, are permitted provided that the following conditions 762587Sitojun * are met: 862587Sitojun * 1. Redistributions of source code must retain the above copyright 962587Sitojun * notice, this list of conditions and the following disclaimer. 1062587Sitojun * 2. Redistributions in binary form must reproduce the above copyright 1162587Sitojun * notice, this list of conditions and the following disclaimer in the 1262587Sitojun * documentation and/or other materials provided with the distribution. 1362587Sitojun * 3. Neither the name of the project nor the names of its contributors 1462587Sitojun * may be used to endorse or promote products derived from this software 1562587Sitojun * without specific prior written permission. 1662587Sitojun * 1762587Sitojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 1862587Sitojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1962587Sitojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2062587Sitojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 2162587Sitojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2262587Sitojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2362587Sitojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2462587Sitojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2562587Sitojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2662587Sitojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2762587Sitojun * SUCH DAMAGE. 28174510Sobrien * 29174510Sobrien * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ 3062587Sitojun */ 3162587Sitojun 32139826Simp/*- 3362587Sitojun * Copyright (c) 1982, 1986, 1991, 1993 3462587Sitojun * The Regents of the University of California. All rights reserved. 3562587Sitojun * 3662587Sitojun * Redistribution and use in source and binary forms, with or without 3762587Sitojun * modification, are permitted provided that the following conditions 3862587Sitojun * are met: 3962587Sitojun * 1. Redistributions of source code must retain the above copyright 4062587Sitojun * notice, this list of conditions and the following disclaimer. 4162587Sitojun * 2. Redistributions in binary form must reproduce the above copyright 4262587Sitojun * notice, this list of conditions and the following disclaimer in the 4362587Sitojun * documentation and/or other materials provided with the distribution. 4462587Sitojun * 4. Neither the name of the University nor the names of its contributors 4562587Sitojun * may be used to endorse or promote products derived from this software 4662587Sitojun * without specific prior written permission. 4762587Sitojun * 4862587Sitojun * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 4962587Sitojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 5062587Sitojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 5162587Sitojun * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 5262587Sitojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 5362587Sitojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 5462587Sitojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 5562587Sitojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 5662587Sitojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 5762587Sitojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5862587Sitojun * SUCH DAMAGE. 5962587Sitojun * 6062587Sitojun * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 6162587Sitojun */ 6262587Sitojun 63174510Sobrien#include <sys/cdefs.h> 64174510Sobrien__FBSDID("$FreeBSD$"); 65174510Sobrien 6662587Sitojun#include "opt_inet.h" 6762587Sitojun#include "opt_inet6.h" 68178167Sqingli#include "opt_mpath.h" 6962587Sitojun 7062587Sitojun#include <sys/param.h> 7162587Sitojun#include <sys/systm.h> 72168191Sjhb#include <sys/lock.h> 7378064Sume#include <sys/malloc.h> 7462587Sitojun#include <sys/mbuf.h> 75164033Srwatson#include <sys/priv.h> 7662587Sitojun#include <sys/protosw.h> 7762587Sitojun#include <sys/socket.h> 7862587Sitojun#include <sys/socketvar.h> 79121742Sume#include <sys/sockio.h> 80121742Sume#include <sys/sysctl.h> 8162587Sitojun#include <sys/errno.h> 8262587Sitojun#include <sys/time.h> 83185435Sbz#include <sys/jail.h> 84122058Sume#include <sys/kernel.h> 85149200Sume#include <sys/sx.h> 8662587Sitojun 8762587Sitojun#include <net/if.h> 88196864Sqingli#include <net/if_dl.h> 8962587Sitojun#include <net/route.h> 90186119Sqingli#include <net/if_llatbl.h> 91178167Sqingli#ifdef RADIX_MPATH 92178167Sqingli#include <net/radix_mpath.h> 93178167Sqingli#endif 9462587Sitojun 9562587Sitojun#include <netinet/in.h> 9662587Sitojun#include <netinet/in_var.h> 9762587Sitojun#include <netinet/in_systm.h> 9862587Sitojun#include <netinet/ip.h> 9962587Sitojun#include <netinet/in_pcb.h> 100184096Sbz#include <netinet/ip_var.h> 101184096Sbz#include <netinet/udp.h> 102184096Sbz#include <netinet/udp_var.h> 103185571Sbz 10462587Sitojun#include <netinet6/in6_var.h> 10562587Sitojun#include <netinet/ip6.h> 10662587Sitojun#include <netinet6/in6_pcb.h> 10762587Sitojun#include <netinet6/ip6_var.h> 108148385Sume#include <netinet6/scope6_var.h> 10962587Sitojun#include <netinet6/nd6.h> 11062587Sitojun 111121742Sumestatic struct mtx addrsel_lock; 112121742Sume#define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 113121742Sume#define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 114121742Sume#define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 115121742Sume#define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 116121742Sume 117149200Sumestatic struct sx addrsel_sxlock; 118149200Sume#define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") 119149200Sume#define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) 120149200Sume#define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) 121149200Sume#define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) 122149200Sume#define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) 123149200Sume 124121742Sume#define ADDR_LABEL_NOTAPP (-1) 125215701Sdimstatic VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); 126195727Srwatson#define V_defaultaddrpolicy VNET(defaultaddrpolicy) 127195699Srwatson 128207369SbzVNET_DEFINE(int, ip6_prefer_tempaddr) = 0; 129207369Sbz 130241916Sdelphijstatic int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, 131148385Sume struct ip6_moptions *, struct route_in6 *, struct ifnet **, 132241916Sdelphij struct rtentry **, int, u_int); 133241916Sdelphijstatic int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, 134231852Sbz struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, 135241916Sdelphij struct ifnet *, u_int); 136122077Sume 137175162Sobrienstatic struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); 138122077Sume 139175162Sobrienstatic void init_policy_queue(void); 140175162Sobrienstatic int add_addrsel_policyent(struct in6_addrpolicy *); 141175162Sobrienstatic int delete_addrsel_policyent(struct in6_addrpolicy *); 142241916Sdelphijstatic int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *), 143242938Sobrien void *); 144175162Sobrienstatic int dump_addrsel_policyent(struct in6_addrpolicy *, void *); 145175162Sobrienstatic struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); 146121742Sume 14762587Sitojun/* 14878064Sume * Return an IPv6 address, which is the most appropriate for a given 14962587Sitojun * destination and user specified options. 15078064Sume * If necessary, this function lookups the routing table and returns 15162587Sitojun * an entry to the caller for later use. 15262587Sitojun */ 153122077Sume#define REPLACE(r) do {\ 154249546Sae IP6STAT_INC(ip6s_sources_rule[(r)]); \ 155249528Sae rule = (r); \ 156175512Sbz /* { \ 157175512Sbz char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 158175512Sbz printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 159175512Sbz } */ \ 160122077Sume goto replace; \ 161122077Sume} while(0) 162122077Sume#define NEXT(r) do {\ 163175512Sbz /* { \ 164175512Sbz char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 165175512Sbz printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 166175512Sbz } */ \ 167171260Sdelphij goto next; /* XXX: we can't use 'continue' here */ \ 168122077Sume} while(0) 169122077Sume#define BREAK(r) do { \ 170249546Sae IP6STAT_INC(ip6s_sources_rule[(r)]); \ 171249528Sae rule = (r); \ 172171260Sdelphij goto out; /* XXX: we can't use 'break' here */ \ 173122077Sume} while(0) 174122077Sume 175194777Sbzint 176171259Sdelphijin6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 177180371Sbz struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, 178194777Sbz struct ifnet **ifpp, struct in6_addr *srcp) 17962587Sitojun{ 180207276Sbz struct in6_addr dst, tmp; 181231852Sbz struct ifnet *ifp = NULL, *oifp = NULL; 182122077Sume struct in6_ifaddr *ia = NULL, *ia_best = NULL; 18362587Sitojun struct in6_pktinfo *pi = NULL; 184122077Sume int dst_scope = -1, best_scope = -1, best_matchlen = -1; 185122077Sume struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 186122077Sume u_int32_t odstzone; 187122077Sume int prefer_tempaddr; 188249528Sae int error, rule; 189180371Sbz struct ip6_moptions *mopts; 19062587Sitojun 191194777Sbz KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); 192194777Sbz 193148385Sume dst = dstsock->sin6_addr; /* make a copy for local operation */ 194231852Sbz if (ifpp) { 195231852Sbz /* 196231852Sbz * Save a possibly passed in ifp for in6_selectsrc. Only 197231852Sbz * neighbor discovery code should use this feature, where 198231852Sbz * we may know the interface but not the FIB number holding 199231852Sbz * the connected subnet in case someone deleted it from the 200231852Sbz * default FIB and we need to check the interface. 201231852Sbz */ 202231852Sbz if (*ifpp != NULL) 203231852Sbz oifp = *ifpp; 204148385Sume *ifpp = NULL; 205231852Sbz } 20662587Sitojun 207180386Sbz if (inp != NULL) { 208180386Sbz INP_LOCK_ASSERT(inp); 209180371Sbz mopts = inp->in6p_moptions; 210180386Sbz } else { 211180371Sbz mopts = NULL; 212180386Sbz } 213180371Sbz 21462587Sitojun /* 21562587Sitojun * If the source address is explicitly specified by the caller, 216122077Sume * check if the requested source address is indeed a unicast address 217122077Sume * assigned to the node, and can be used as the packet's source 218122077Sume * address. If everything is okay, use the address as source. 21962587Sitojun */ 22062587Sitojun if (opts && (pi = opts->ip6po_pktinfo) && 221122077Sume !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 222122077Sume struct sockaddr_in6 srcsock; 223122077Sume struct in6_ifaddr *ia6; 22462587Sitojun 225122077Sume /* get the outgoing interface */ 226231852Sbz if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, 227231852Sbz (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) 228231852Sbz != 0) 229194777Sbz return (error); 230125436Sume 231122077Sume /* 232122077Sume * determine the appropriate zone id of the source based on 233122077Sume * the zone of the destination and the outgoing interface. 234148385Sume * If the specified address is ambiguous wrt the scope zone, 235148385Sume * the interface must be specified; otherwise, ifa_ifwithaddr() 236148385Sume * will fail matching the address. 237122077Sume */ 238122077Sume bzero(&srcsock, sizeof(srcsock)); 239122077Sume srcsock.sin6_family = AF_INET6; 240122077Sume srcsock.sin6_len = sizeof(srcsock); 241122077Sume srcsock.sin6_addr = pi->ipi6_addr; 242122077Sume if (ifp) { 243194777Sbz error = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 244194777Sbz if (error) 245194777Sbz return (error); 246122077Sume } 247194777Sbz if (cred != NULL && (error = prison_local_ip6(cred, 248188144Sjamie &srcsock.sin6_addr, (inp != NULL && 249188144Sjamie (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 250194777Sbz return (error); 251148385Sume 252297445Sae /* 253297445Sae * If IPV6_BINDANY socket option is set, we allow to specify 254297445Sae * non local addresses as source address in IPV6_PKTINFO 255297445Sae * ancillary data. 256297445Sae */ 257297445Sae if ((inp->inp_flags & INP_BINDANY) == 0) { 258297445Sae ia6 = (struct in6_ifaddr *)ifa_ifwithaddr( 259297445Sae (struct sockaddr *)&srcsock); 260297445Sae if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST | 261297445Sae IN6_IFF_NOTREADY))) { 262297445Sae if (ia6 != NULL) 263297445Sae ifa_free(&ia6->ia_ifa); 264297445Sae return (EADDRNOTAVAIL); 265297445Sae } 266297445Sae bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp)); 267297445Sae ifa_free(&ia6->ia_ifa); 268297445Sae } else 269297445Sae bcopy(&srcsock.sin6_addr, srcp, sizeof(*srcp)); 270122077Sume pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 271148385Sume if (ifpp) 272148385Sume *ifpp = ifp; 273194777Sbz return (0); 274122077Sume } 275122077Sume 27662587Sitojun /* 277122077Sume * Otherwise, if the socket has already bound the source, just use it. 27862587Sitojun */ 279180371Sbz if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 280188144Sjamie if (cred != NULL && 281194777Sbz (error = prison_local_ip6(cred, &inp->in6p_laddr, 282188144Sjamie ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 283194777Sbz return (error); 284194777Sbz bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp)); 285194777Sbz return (0); 286180371Sbz } 28762587Sitojun 28862587Sitojun /* 289202468Sbz * Bypass source address selection and use the primary jail IP 290202468Sbz * if requested. 291202468Sbz */ 292202468Sbz if (cred != NULL && !prison_saddrsel_ip6(cred, srcp)) 293202468Sbz return (0); 294202468Sbz 295202468Sbz /* 296122077Sume * If the address is not specified, choose the best one based on 297122077Sume * the outgoing interface and the destination address. 29862587Sitojun */ 299122077Sume /* get the outgoing interface */ 300231852Sbz if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, oifp, 301231852Sbz (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) 302194777Sbz return (error); 30362587Sitojun 304122077Sume#ifdef DIAGNOSTIC 305122077Sume if (ifp == NULL) /* this should not happen */ 306122077Sume panic("in6_selectsrc: NULL ifp"); 307122077Sume#endif 308194777Sbz error = in6_setscope(&dst, ifp, &odstzone); 309194777Sbz if (error) 310194777Sbz return (error); 311148385Sume 312249528Sae rule = 0; 313194971Srwatson IN6_IFADDR_RLOCK(); 314194907Srwatson TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 315122077Sume int new_scope = -1, new_matchlen = -1; 316122077Sume struct in6_addrpolicy *new_policy = NULL; 317122077Sume u_int32_t srczone, osrczone, dstzone; 318148385Sume struct in6_addr src; 319122077Sume struct ifnet *ifp1 = ia->ia_ifp; 320122077Sume 321122077Sume /* 322122077Sume * We'll never take an address that breaks the scope zone 323122077Sume * of the destination. We also skip an address if its zone 324122077Sume * does not contain the outgoing interface. 325122077Sume * XXX: we should probably use sin6_scope_id here. 326122077Sume */ 327148385Sume if (in6_setscope(&dst, ifp1, &dstzone) || 328122077Sume odstzone != dstzone) { 329122077Sume continue; 33062587Sitojun } 331148385Sume src = ia->ia_addr.sin6_addr; 332148385Sume if (in6_setscope(&src, ifp, &osrczone) || 333148385Sume in6_setscope(&src, ifp1, &srczone) || 334122077Sume osrczone != srczone) { 335122077Sume continue; 336122077Sume } 33762587Sitojun 338122077Sume /* avoid unusable addresses */ 339122077Sume if ((ia->ia6_flags & 340122077Sume (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 341122077Sume continue; 34262587Sitojun } 343181803Sbz if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 344122077Sume continue; 345122077Sume 346207276Sbz /* If jailed only take addresses of the jail into account. */ 347185435Sbz if (cred != NULL && 348207276Sbz prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0) 349185435Sbz continue; 350185435Sbz 351122077Sume /* Rule 1: Prefer same address */ 352148385Sume if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 353122077Sume ia_best = ia; 354122077Sume BREAK(1); /* there should be no better candidate */ 355122077Sume } 356122077Sume 357122077Sume if (ia_best == NULL) 358122077Sume REPLACE(0); 359122077Sume 360122077Sume /* Rule 2: Prefer appropriate scope */ 361122077Sume if (dst_scope < 0) 362148385Sume dst_scope = in6_addrscope(&dst); 363122077Sume new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 364122077Sume if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 365122077Sume if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 366122077Sume REPLACE(2); 367122077Sume NEXT(2); 368122077Sume } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 369122077Sume if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 370122077Sume NEXT(2); 371122077Sume REPLACE(2); 372122077Sume } 373122077Sume 374122077Sume /* 375122077Sume * Rule 3: Avoid deprecated addresses. Note that the case of 376122077Sume * !ip6_use_deprecated is already rejected above. 377122077Sume */ 378122077Sume if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 379122077Sume NEXT(3); 380122077Sume if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 381122077Sume REPLACE(3); 382122077Sume 383122077Sume /* Rule 4: Prefer home addresses */ 384122077Sume /* 385122077Sume * XXX: This is a TODO. We should probably merge the MIP6 386122077Sume * case above. 387122077Sume */ 388122077Sume 389122077Sume /* Rule 5: Prefer outgoing interface */ 390245230Sume if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) { 391245230Sume if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 392245230Sume NEXT(5); 393245230Sume if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 394245230Sume REPLACE(5); 395245230Sume } 396122077Sume 397122077Sume /* 398122077Sume * Rule 6: Prefer matching label 399122077Sume * Note that best_policy should be non-NULL here. 400122077Sume */ 401122077Sume if (dst_policy == NULL) 402122077Sume dst_policy = lookup_addrsel_policy(dstsock); 403122077Sume if (dst_policy->label != ADDR_LABEL_NOTAPP) { 404122077Sume new_policy = lookup_addrsel_policy(&ia->ia_addr); 405122077Sume if (dst_policy->label == best_policy->label && 406122077Sume dst_policy->label != new_policy->label) 407122077Sume NEXT(6); 408122077Sume if (dst_policy->label != best_policy->label && 409122077Sume dst_policy->label == new_policy->label) 410122077Sume REPLACE(6); 411122077Sume } 412122077Sume 413122077Sume /* 414122077Sume * Rule 7: Prefer public addresses. 415122077Sume * We allow users to reverse the logic by configuring 416122077Sume * a sysctl variable, so that privacy conscious users can 417122077Sume * always prefer temporary addresses. 418122077Sume */ 419122077Sume if (opts == NULL || 420122077Sume opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 421181803Sbz prefer_tempaddr = V_ip6_prefer_tempaddr; 422122077Sume } else if (opts->ip6po_prefer_tempaddr == 423122077Sume IP6PO_TEMPADDR_NOTPREFER) { 424122077Sume prefer_tempaddr = 0; 425122077Sume } else 426122077Sume prefer_tempaddr = 1; 427122077Sume if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 428122077Sume (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 429122077Sume if (prefer_tempaddr) 430122077Sume REPLACE(7); 431122077Sume else 432122077Sume NEXT(7); 433122077Sume } 434122077Sume if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 435122077Sume !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 436122077Sume if (prefer_tempaddr) 437122077Sume NEXT(7); 438122077Sume else 439122077Sume REPLACE(7); 440122077Sume } 441122077Sume 442122077Sume /* 443122077Sume * Rule 8: prefer addresses on alive interfaces. 444122077Sume * This is a KAME specific rule. 445122077Sume */ 446122077Sume if ((ia_best->ia_ifp->if_flags & IFF_UP) && 447122077Sume !(ia->ia_ifp->if_flags & IFF_UP)) 448122077Sume NEXT(8); 449122077Sume if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 450122077Sume (ia->ia_ifp->if_flags & IFF_UP)) 451122077Sume REPLACE(8); 452122077Sume 453122077Sume /* 454269944Sae * Rule 9: prefer address with better virtual status. 455269944Sae */ 456269944Sae if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa)) 457269944Sae REPLACE(9); 458271288Sae if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa)) 459271288Sae NEXT(9); 460269944Sae 461269944Sae /* 462272790Sae * Rule 10: prefer address with `prefer_source' flag. 463272790Sae */ 464272790Sae if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 && 465272790Sae (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0) 466272790Sae REPLACE(10); 467272790Sae if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 && 468272790Sae (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0) 469272790Sae NEXT(10); 470272790Sae 471272790Sae /* 472122077Sume * Rule 14: Use longest matching prefix. 473122077Sume * Note: in the address selection draft, this rule is 474122077Sume * documented as "Rule 8". However, since it is also 475122077Sume * documented that this rule can be overridden, we assign 476122077Sume * a large number so that it is easy to assign smaller numbers 477122077Sume * to more preferred rules. 478122077Sume */ 479148385Sume new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 480122077Sume if (best_matchlen < new_matchlen) 481122077Sume REPLACE(14); 482122077Sume if (new_matchlen < best_matchlen) 483122077Sume NEXT(14); 484122077Sume 485122077Sume /* Rule 15 is reserved. */ 486122077Sume 487122077Sume /* 488122077Sume * Last resort: just keep the current candidate. 489122077Sume * Or, do we need more rules? 490122077Sume */ 491122077Sume continue; 492122077Sume 493122077Sume replace: 494122077Sume ia_best = ia; 495122077Sume best_scope = (new_scope >= 0 ? new_scope : 496122077Sume in6_addrscope(&ia_best->ia_addr.sin6_addr)); 497122077Sume best_policy = (new_policy ? new_policy : 498122077Sume lookup_addrsel_policy(&ia_best->ia_addr)); 499122077Sume best_matchlen = (new_matchlen >= 0 ? new_matchlen : 500122077Sume in6_matchlen(&ia_best->ia_addr.sin6_addr, 501148385Sume &dst)); 502122077Sume 503122077Sume next: 504122077Sume continue; 505122077Sume 506122077Sume out: 507122077Sume break; 50862587Sitojun } 50962587Sitojun 510194971Srwatson if ((ia = ia_best) == NULL) { 511194971Srwatson IN6_IFADDR_RUNLOCK(); 512249528Sae IP6STAT_INC(ip6s_sources_none); 513194777Sbz return (EADDRNOTAVAIL); 514194971Srwatson } 515122077Sume 516207276Sbz /* 517207276Sbz * At this point at least one of the addresses belonged to the jail 518207276Sbz * but it could still be, that we want to further restrict it, e.g. 519207276Sbz * theoratically IN6_IS_ADDR_LOOPBACK. 520207276Sbz * It must not be IN6_IS_ADDR_UNSPECIFIED anymore. 521207276Sbz * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should 522207276Sbz * let all others previously selected pass. 523207276Sbz * Use tmp to not change ::1 on lo0 to the primary jail address. 524207276Sbz */ 525207276Sbz tmp = ia->ia_addr.sin6_addr; 526207276Sbz if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && 527207276Sbz (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { 528207276Sbz IN6_IFADDR_RUNLOCK(); 529249528Sae IP6STAT_INC(ip6s_sources_none); 530207276Sbz return (EADDRNOTAVAIL); 531207276Sbz } 532207276Sbz 533148385Sume if (ifpp) 534148385Sume *ifpp = ifp; 535148385Sume 536207276Sbz bcopy(&tmp, srcp, sizeof(*srcp)); 537249546Sae if (ia->ia_ifp == ifp) 538249546Sae IP6STAT_INC(ip6s_sources_sameif[best_scope]); 539249546Sae else 540249546Sae IP6STAT_INC(ip6s_sources_otherif[best_scope]); 541249546Sae if (dst_scope == best_scope) 542249546Sae IP6STAT_INC(ip6s_sources_samescope[best_scope]); 543249546Sae else 544249546Sae IP6STAT_INC(ip6s_sources_otherscope[best_scope]); 545249546Sae if (IFA6_IS_DEPRECATED(ia)) 546249546Sae IP6STAT_INC(ip6s_sources_deprecated[best_scope]); 547194971Srwatson IN6_IFADDR_RUNLOCK(); 548194777Sbz return (0); 549122077Sume} 550122077Sume 551171259Sdelphij/* 552171259Sdelphij * clone - meaningful only for bsdi and freebsd 553171259Sdelphij */ 554122077Sumestatic int 555171259Sdelphijselectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 556171259Sdelphij struct ip6_moptions *mopts, struct route_in6 *ro, 557232127Sbz struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum) 558122077Sume{ 559122077Sume int error = 0; 560122077Sume struct ifnet *ifp = NULL; 561122077Sume struct rtentry *rt = NULL; 562122077Sume struct sockaddr_in6 *sin6_next; 563122077Sume struct in6_pktinfo *pi = NULL; 564122077Sume struct in6_addr *dst = &dstsock->sin6_addr; 565165118Sbz#if 0 566165118Sbz char ip6buf[INET6_ADDRSTRLEN]; 567122077Sume 568122077Sume if (dstsock->sin6_addr.s6_addr32[0] == 0 && 569122077Sume dstsock->sin6_addr.s6_addr32[1] == 0 && 570122077Sume !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 571122077Sume printf("in6_selectroute: strange destination %s\n", 572165118Sbz ip6_sprintf(ip6buf, &dstsock->sin6_addr)); 573122077Sume } else { 574122077Sume printf("in6_selectroute: destination = %s%%%d\n", 575165118Sbz ip6_sprintf(ip6buf, &dstsock->sin6_addr), 576122077Sume dstsock->sin6_scope_id); /* for debug */ 577122077Sume } 578122077Sume#endif 579122077Sume 580122077Sume /* If the caller specify the outgoing interface explicitly, use it. */ 581122077Sume if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 582122077Sume /* XXX boundary check is assumed to be already done. */ 583122077Sume ifp = ifnet_byindex(pi->ipi6_ifindex); 584122077Sume if (ifp != NULL && 585148385Sume (norouteok || retrt == NULL || 586148385Sume IN6_IS_ADDR_MULTICAST(dst))) { 587122077Sume /* 588148987Sume * we do not have to check or get the route for 589122077Sume * multicast. 590122077Sume */ 591122077Sume goto done; 592122077Sume } else 593122077Sume goto getroute; 594122077Sume } 595122077Sume 596122077Sume /* 597122077Sume * If the destination address is a multicast address and the outgoing 598122077Sume * interface for the address is specified by the caller, use it. 599122077Sume */ 600122077Sume if (IN6_IS_ADDR_MULTICAST(dst) && 601122077Sume mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 602122077Sume goto done; /* we do not need a route for multicast. */ 603122077Sume } 604122077Sume 605122077Sume getroute: 606122077Sume /* 607122077Sume * If the next hop address for the packet is specified by the caller, 608122077Sume * use it as the gateway. 609122077Sume */ 610122077Sume if (opts && opts->ip6po_nexthop) { 611122077Sume struct route_in6 *ron; 612186119Sqingli struct llentry *la; 613186119Sqingli 614122077Sume sin6_next = satosin6(opts->ip6po_nexthop); 615186119Sqingli 616122077Sume /* at this moment, we only support AF_INET6 next hops */ 617122077Sume if (sin6_next->sin6_family != AF_INET6) { 618122077Sume error = EAFNOSUPPORT; /* or should we proceed? */ 619122077Sume goto done; 620122077Sume } 621122077Sume 622122077Sume /* 623122077Sume * If the next hop is an IPv6 address, then the node identified 624122077Sume * by that address must be a neighbor of the sending host. 625122077Sume */ 626122077Sume ron = &opts->ip6po_nextroute; 627186119Sqingli /* 628186119Sqingli * XXX what do we do here? 629186119Sqingli * PLZ to be fixing 630186119Sqingli */ 631186119Sqingli 632186119Sqingli 633186119Sqingli if (ron->ro_rt == NULL) { 634231852Sbz in6_rtalloc(ron, fibnum); /* multi path case? */ 635186119Sqingli if (ron->ro_rt == NULL) { 636237459Sbz /* XXX-BZ WT.? */ 637186119Sqingli if (ron->ro_rt) { 638186119Sqingli RTFREE(ron->ro_rt); 639186119Sqingli ron->ro_rt = NULL; 640186119Sqingli } 641186119Sqingli error = EHOSTUNREACH; 642186119Sqingli goto done; 643186119Sqingli } 644186119Sqingli } 645186119Sqingli 646186119Sqingli rt = ron->ro_rt; 647186119Sqingli ifp = rt->rt_ifp; 648243148Sae IF_AFDATA_RLOCK(ifp); 649186119Sqingli la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr); 650243148Sae IF_AFDATA_RUNLOCK(ifp); 651186158Skmacy if (la != NULL) 652186119Sqingli LLE_RUNLOCK(la); 653186119Sqingli else { 654186119Sqingli error = EHOSTUNREACH; 655186119Sqingli goto done; 656186119Sqingli } 657186119Sqingli#if 0 658122077Sume if ((ron->ro_rt && 659122077Sume (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 660122077Sume (RTF_UP | RTF_LLINFO)) || 661151478Ssuz !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 662151478Ssuz &sin6_next->sin6_addr)) { 663122077Sume if (ron->ro_rt) { 664122077Sume RTFREE(ron->ro_rt); 665122077Sume ron->ro_rt = NULL; 66662587Sitojun } 667122077Sume *satosin6(&ron->ro_dst) = *sin6_next; 668122077Sume } 669122077Sume if (ron->ro_rt == NULL) { 670232127Sbz in6_rtalloc(ron, fibnum); /* multi path case? */ 671122077Sume if (ron->ro_rt == NULL || 672122077Sume !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 673122077Sume if (ron->ro_rt) { 674122077Sume RTFREE(ron->ro_rt); 675122077Sume ron->ro_rt = NULL; 676122077Sume } 677122077Sume error = EHOSTUNREACH; 678122077Sume goto done; 67962587Sitojun } 68062587Sitojun } 681186119Sqingli#endif 682122077Sume 683122077Sume /* 684122077Sume * When cloning is required, try to allocate a route to the 685122077Sume * destination so that the caller can store path MTU 686122077Sume * information. 687122077Sume */ 688186119Sqingli goto done; 68962587Sitojun } 69062587Sitojun 69162587Sitojun /* 692122077Sume * Use a cached route if it exists and is valid, else try to allocate 693122077Sume * a new one. Note that we should check the address family of the 694122077Sume * cached destination, in case of sharing the cache with IPv4. 69562587Sitojun */ 69662587Sitojun if (ro) { 69762587Sitojun if (ro->ro_rt && 69889623Sume (!(ro->ro_rt->rt_flags & RTF_UP) || 699122077Sume ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 70089623Sume !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 701125436Sume dst))) { 70262587Sitojun RTFREE(ro->ro_rt); 703122077Sume ro->ro_rt = (struct rtentry *)NULL; 70462587Sitojun } 705122077Sume if (ro->ro_rt == (struct rtentry *)NULL) { 70678064Sume struct sockaddr_in6 *sa6; 70778064Sume 70862587Sitojun /* No route yet, so try to acquire one */ 70962587Sitojun bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 71078064Sume sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 711122077Sume *sa6 = *dstsock; 712122122Sume sa6->sin6_scope_id = 0; 713122922Sandre 714178167Sqingli#ifdef RADIX_MPATH 715231852Sbz rtalloc_mpath_fib((struct route *)ro, 716231852Sbz ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); 717186119Sqingli#else 718231852Sbz ro->ro_rt = in6_rtalloc1((struct sockaddr *) 719231852Sbz &ro->ro_dst, 0, 0UL, fibnum); 720121445Ssam if (ro->ro_rt) 721121445Ssam RT_UNLOCK(ro->ro_rt); 722186119Sqingli#endif 72362587Sitojun } 724186119Sqingli 72562587Sitojun /* 726122077Sume * do not care about the result if we have the nexthop 727122077Sume * explicitly specified. 72862587Sitojun */ 729122077Sume if (opts && opts->ip6po_nexthop) 730122077Sume goto done; 73162587Sitojun 73262587Sitojun if (ro->ro_rt) { 733122077Sume ifp = ro->ro_rt->rt_ifp; 734122077Sume 735122077Sume if (ifp == NULL) { /* can this really happen? */ 736122077Sume RTFREE(ro->ro_rt); 737122077Sume ro->ro_rt = NULL; 738122077Sume } 73962587Sitojun } 740122077Sume if (ro->ro_rt == NULL) 741122077Sume error = EHOSTUNREACH; 742122077Sume rt = ro->ro_rt; 743122077Sume 744122077Sume /* 745122077Sume * Check if the outgoing interface conflicts with 746122077Sume * the interface specified by ipi6_ifindex (if specified). 747122077Sume * Note that loopback interface is always okay. 748122077Sume * (this may happen when we are sending a packet to one of 749122077Sume * our own addresses.) 750122077Sume */ 751146228Sgnn if (ifp && opts && opts->ip6po_pktinfo && 752125436Sume opts->ip6po_pktinfo->ipi6_ifindex) { 753122077Sume if (!(ifp->if_flags & IFF_LOOPBACK) && 754122077Sume ifp->if_index != 755122077Sume opts->ip6po_pktinfo->ipi6_ifindex) { 756122077Sume error = EHOSTUNREACH; 757122077Sume goto done; 758122077Sume } 75962587Sitojun } 76062587Sitojun } 76162587Sitojun 762122077Sume done: 763122077Sume if (ifp == NULL && rt == NULL) { 764122077Sume /* 765122077Sume * This can happen if the caller did not pass a cached route 766122077Sume * nor any other hints. We treat this case an error. 767122077Sume */ 768122077Sume error = EHOSTUNREACH; 769122077Sume } 770122077Sume if (error == EHOSTUNREACH) 771249294Sae IP6STAT_INC(ip6s_noroute); 772122077Sume 773196864Sqingli if (retifp != NULL) { 774122077Sume *retifp = ifp; 775196864Sqingli 776196864Sqingli /* 777196864Sqingli * Adjust the "outgoing" interface. If we're going to loop 778196864Sqingli * the packet back to ourselves, the ifp would be the loopback 779196864Sqingli * interface. However, we'd rather know the interface associated 780196864Sqingli * to the destination address (which should probably be one of 781196864Sqingli * our own addresses.) 782196864Sqingli */ 783196864Sqingli if (rt) { 784196864Sqingli if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && 785196864Sqingli (rt->rt_gateway->sa_family == AF_LINK)) 786196864Sqingli *retifp = 787196864Sqingli ifnet_byindex(((struct sockaddr_dl *) 788196864Sqingli rt->rt_gateway)->sdl_index); 789196864Sqingli } 790196864Sqingli } 791196864Sqingli 792122077Sume if (retrt != NULL) 793122077Sume *retrt = rt; /* rt may be NULL */ 794122077Sume 795122077Sume return (error); 79662587Sitojun} 79762587Sitojun 798148385Sumestatic int 799171259Sdelphijin6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 800231852Sbz struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, 801232127Sbz struct ifnet *oifp, u_int fibnum) 802148385Sume{ 803148385Sume int error; 804148385Sume struct route_in6 sro; 805148385Sume struct rtentry *rt = NULL; 806148385Sume 807231852Sbz KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); 808231852Sbz 809148385Sume if (ro == NULL) { 810148385Sume bzero(&sro, sizeof(sro)); 811148385Sume ro = &sro; 812148385Sume } 813148385Sume 814148385Sume if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 815231852Sbz &rt, 1, fibnum)) != 0) { 816158843Stanimura if (ro == &sro && rt && rt == sro.ro_rt) 817148385Sume RTFREE(rt); 818231852Sbz /* Help ND. See oifp comment in in6_selectsrc(). */ 819231852Sbz if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { 820231852Sbz *retifp = oifp; 821231852Sbz error = 0; 822231852Sbz } 823148385Sume return (error); 824148385Sume } 825148385Sume 826148385Sume /* 827148385Sume * do not use a rejected or black hole route. 828148385Sume * XXX: this check should be done in the L2 output routine. 829148385Sume * However, if we skipped this check here, we'd see the following 830148385Sume * scenario: 831148385Sume * - install a rejected route for a scoped address prefix 832148385Sume * (like fe80::/10) 833148385Sume * - send a packet to a destination that matches the scoped prefix, 834148385Sume * with ambiguity about the scope zone. 835148385Sume * - pick the outgoing interface from the route, and disambiguate the 836148385Sume * scope zone with the interface. 837148385Sume * - ip6_output() would try to get another route with the "new" 838148385Sume * destination, which may be valid. 839148385Sume * - we'd see no error on output. 840148385Sume * Although this may not be very harmful, it should still be confusing. 841148385Sume * We thus reject the case here. 842148385Sume */ 843148385Sume if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 844148385Sume int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 845148385Sume 846158843Stanimura if (ro == &sro && rt && rt == sro.ro_rt) 847148385Sume RTFREE(rt); 848148385Sume return (flags); 849148385Sume } 850148385Sume 851158843Stanimura if (ro == &sro && rt && rt == sro.ro_rt) 852148385Sume RTFREE(rt); 853148385Sume return (0); 854148385Sume} 855148385Sume 856171259Sdelphij/* 857231852Sbz * Public wrapper function to selectroute(). 858231852Sbz * 859231852Sbz * XXX-BZ in6_selectroute() should and will grow the FIB argument. The 860231852Sbz * in6_selectroute_fib() function is only there for backward compat on stable. 861171259Sdelphij */ 862148385Sumeint 863171259Sdelphijin6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 864171259Sdelphij struct ip6_moptions *mopts, struct route_in6 *ro, 865186119Sqingli struct ifnet **retifp, struct rtentry **retrt) 866148385Sume{ 867171259Sdelphij 868148385Sume return (selectroute(dstsock, opts, mopts, ro, retifp, 869231852Sbz retrt, 0, RT_DEFAULT_FIB)); 870148385Sume} 871148385Sume 872231852Sbz#ifndef BURN_BRIDGES 873231852Sbzint 874231852Sbzin6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 875231852Sbz struct ip6_moptions *mopts, struct route_in6 *ro, 876232127Sbz struct ifnet **retifp, struct rtentry **retrt, u_int fibnum) 877231852Sbz{ 878231852Sbz 879231852Sbz return (selectroute(dstsock, opts, mopts, ro, retifp, 880231852Sbz retrt, 0, fibnum)); 881231852Sbz} 882231852Sbz#endif 883231852Sbz 88462587Sitojun/* 88562587Sitojun * Default hop limit selection. The precedence is as follows: 88662587Sitojun * 1. Hoplimit value specified via ioctl. 88762587Sitojun * 2. (If the outgoing interface is detected) the current 88862587Sitojun * hop limit of the interface specified by router advertisement. 88962587Sitojun * 3. The system default hoplimit. 890122922Sandre */ 89162587Sitojunint 892186141Sbzin6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) 89362587Sitojun{ 894171259Sdelphij 89562587Sitojun if (in6p && in6p->in6p_hops >= 0) 896120856Sume return (in6p->in6p_hops); 89762587Sitojun else if (ifp) 898121161Sume return (ND_IFINFO(ifp)->chlim); 899122922Sandre else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 900122922Sandre struct route_in6 ro6; 901122922Sandre struct ifnet *lifp; 902122922Sandre 903122922Sandre bzero(&ro6, sizeof(ro6)); 904122922Sandre ro6.ro_dst.sin6_family = AF_INET6; 905122922Sandre ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 906122922Sandre ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 907232127Sbz in6_rtalloc(&ro6, in6p->inp_inc.inc_fibnum); 908122922Sandre if (ro6.ro_rt) { 909122922Sandre lifp = ro6.ro_rt->rt_ifp; 910122922Sandre RTFREE(ro6.ro_rt); 911122922Sandre if (lifp) 912122922Sandre return (ND_IFINFO(lifp)->chlim); 913235955Sbz } 914122922Sandre } 915181803Sbz return (V_ip6_defhlim); 91662587Sitojun} 91762587Sitojun 91862587Sitojun/* 91962587Sitojun * XXX: this is borrowed from in6_pcbbind(). If possible, we should 92062587Sitojun * share this function by all *bsd*... 92162587Sitojun */ 92262587Sitojunint 923171259Sdelphijin6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) 92462587Sitojun{ 92562587Sitojun struct socket *so = inp->inp_socket; 926219579Sbz u_int16_t lport = 0; 927222215Srwatson int error, lookupflags = 0; 928219579Sbz#ifdef INVARIANTS 92962587Sitojun struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 930219579Sbz#endif 93162587Sitojun 932178285Srwatson INP_WLOCK_ASSERT(inp); 933222488Srwatson INP_HASH_WLOCK_ASSERT(pcbinfo); 934158011Srwatson 935188144Sjamie error = prison_local_ip6(cred, laddr, 936188144Sjamie ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); 937188144Sjamie if (error) 938188144Sjamie return(error); 939185435Sbz 94062587Sitojun /* XXX: this is redundant when called from in6_pcbbind */ 94162587Sitojun if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 942222215Srwatson lookupflags = INPLOOKUP_WILDCARD; 94362587Sitojun 94462587Sitojun inp->inp_flags |= INP_ANONPORT; 94562587Sitojun 946222215Srwatson error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags); 947219579Sbz if (error != 0) 948219579Sbz return (error); 949184096Sbz 95062587Sitojun inp->inp_lport = lport; 95162587Sitojun if (in_pcbinshash(inp) != 0) { 95262587Sitojun inp->in6p_laddr = in6addr_any; 95362587Sitojun inp->inp_lport = 0; 95462587Sitojun return (EAGAIN); 95562587Sitojun } 95662587Sitojun 957120856Sume return (0); 95862587Sitojun} 95962587Sitojun 96078064Sumevoid 961171259Sdelphijaddrsel_policy_init(void) 962121742Sume{ 963121742Sume 964121742Sume init_policy_queue(); 965121742Sume 966121742Sume /* initialize the "last resort" policy */ 967181803Sbz bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy)); 968181803Sbz V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 969190787Szec 970190787Szec if (!IS_DEFAULT_VNET(curvnet)) 971190787Szec return; 972190787Szec 973190787Szec ADDRSEL_LOCK_INIT(); 974190787Szec ADDRSEL_SXLOCK_INIT(); 975121742Sume} 976121742Sume 977122077Sumestatic struct in6_addrpolicy * 978171259Sdelphijlookup_addrsel_policy(struct sockaddr_in6 *key) 979122077Sume{ 980122077Sume struct in6_addrpolicy *match = NULL; 981122077Sume 982122077Sume ADDRSEL_LOCK(); 983122077Sume match = match_addrsel_policy(key); 984122077Sume 985122077Sume if (match == NULL) 986181803Sbz match = &V_defaultaddrpolicy; 987122077Sume else 988122077Sume match->use++; 989122077Sume ADDRSEL_UNLOCK(); 990122077Sume 991122077Sume return (match); 992122077Sume} 993122077Sume 994121742Sume/* 995121742Sume * Subroutines to manage the address selection policy table via sysctl. 996121742Sume */ 997121742Sumestruct walkarg { 998121742Sume struct sysctl_req *w_req; 999121742Sume}; 1000121742Sume 1001121742Sumestatic int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 1002121742SumeSYSCTL_DECL(_net_inet6_ip6); 1003227309Sedstatic SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 1004121742Sume CTLFLAG_RD, in6_src_sysctl, ""); 1005121742Sume 1006121742Sumestatic int 1007121742Sumein6_src_sysctl(SYSCTL_HANDLER_ARGS) 1008121742Sume{ 1009121742Sume struct walkarg w; 1010121742Sume 1011121742Sume if (req->newptr) 1012121742Sume return EPERM; 1013121742Sume 1014121742Sume bzero(&w, sizeof(w)); 1015121742Sume w.w_req = req; 1016121742Sume 1017121742Sume return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 1018121742Sume} 1019121742Sume 1020121742Sumeint 1021171259Sdelphijin6_src_ioctl(u_long cmd, caddr_t data) 1022121742Sume{ 1023121742Sume int i; 1024121742Sume struct in6_addrpolicy ent0; 1025121742Sume 1026121742Sume if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1027121742Sume return (EOPNOTSUPP); /* check for safety */ 1028121742Sume 1029121742Sume ent0 = *(struct in6_addrpolicy *)data; 1030121742Sume 1031121742Sume if (ent0.label == ADDR_LABEL_NOTAPP) 1032121742Sume return (EINVAL); 1033121742Sume /* check if the prefix mask is consecutive. */ 1034121742Sume if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1035121742Sume return (EINVAL); 1036121742Sume /* clear trailing garbages (if any) of the prefix address. */ 1037121742Sume for (i = 0; i < 4; i++) { 1038121742Sume ent0.addr.sin6_addr.s6_addr32[i] &= 1039121742Sume ent0.addrmask.sin6_addr.s6_addr32[i]; 1040121742Sume } 1041121742Sume ent0.use = 0; 1042121742Sume 1043121742Sume switch (cmd) { 1044121742Sume case SIOCAADDRCTL_POLICY: 1045121742Sume return (add_addrsel_policyent(&ent0)); 1046121742Sume case SIOCDADDRCTL_POLICY: 1047121742Sume return (delete_addrsel_policyent(&ent0)); 1048121742Sume } 1049121742Sume 1050121742Sume return (0); /* XXX: compromise compilers */ 1051121742Sume} 1052121742Sume 1053121742Sume/* 1054121742Sume * The followings are implementation of the policy table using a 1055121742Sume * simple tail queue. 1056121742Sume * XXX such details should be hidden. 1057121742Sume * XXX implementation using binary tree should be more efficient. 1058121742Sume */ 1059121742Sumestruct addrsel_policyent { 1060121742Sume TAILQ_ENTRY(addrsel_policyent) ape_entry; 1061121742Sume struct in6_addrpolicy ape_policy; 1062121742Sume}; 1063121742Sume 1064121742SumeTAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1065121742Sume 1066215701Sdimstatic VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab); 1067195727Srwatson#define V_addrsel_policytab VNET(addrsel_policytab) 1068121742Sume 1069121742Sumestatic void 1070171259Sdelphijinit_policy_queue(void) 1071121742Sume{ 1072171259Sdelphij 1073181803Sbz TAILQ_INIT(&V_addrsel_policytab); 1074121742Sume} 1075121742Sume 1076121742Sumestatic int 1077171259Sdelphijadd_addrsel_policyent(struct in6_addrpolicy *newpolicy) 1078121742Sume{ 1079121742Sume struct addrsel_policyent *new, *pol; 1080121742Sume 1081184205Sdes new = malloc(sizeof(*new), M_IFADDR, 1082121750Sume M_WAITOK); 1083149200Sume ADDRSEL_XLOCK(); 1084121742Sume ADDRSEL_LOCK(); 1085121742Sume 1086121742Sume /* duplication check */ 1087181803Sbz TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1088151478Ssuz if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 1089151478Ssuz &pol->ape_policy.addr.sin6_addr) && 1090151478Ssuz IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 1091151478Ssuz &pol->ape_policy.addrmask.sin6_addr)) { 1092121750Sume ADDRSEL_UNLOCK(); 1093149200Sume ADDRSEL_XUNLOCK(); 1094184205Sdes free(new, M_IFADDR); 1095121742Sume return (EEXIST); /* or override it? */ 1096121742Sume } 1097121742Sume } 1098121742Sume 1099121742Sume bzero(new, sizeof(*new)); 1100121742Sume 1101121742Sume /* XXX: should validate entry */ 1102121742Sume new->ape_policy = *newpolicy; 1103121742Sume 1104181803Sbz TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry); 1105121742Sume ADDRSEL_UNLOCK(); 1106149200Sume ADDRSEL_XUNLOCK(); 1107121742Sume 1108121742Sume return (0); 1109121742Sume} 1110121742Sume 1111121742Sumestatic int 1112171259Sdelphijdelete_addrsel_policyent(struct in6_addrpolicy *key) 1113121742Sume{ 1114121742Sume struct addrsel_policyent *pol; 1115121742Sume 1116149200Sume ADDRSEL_XLOCK(); 1117121742Sume ADDRSEL_LOCK(); 1118121742Sume 1119121742Sume /* search for the entry in the table */ 1120181803Sbz TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1121151478Ssuz if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1122151478Ssuz &pol->ape_policy.addr.sin6_addr) && 1123151478Ssuz IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1124151478Ssuz &pol->ape_policy.addrmask.sin6_addr)) { 1125121742Sume break; 1126121742Sume } 1127121742Sume } 1128121750Sume if (pol == NULL) { 1129121750Sume ADDRSEL_UNLOCK(); 1130149200Sume ADDRSEL_XUNLOCK(); 1131121742Sume return (ESRCH); 1132121750Sume } 1133121742Sume 1134181803Sbz TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry); 1135121742Sume ADDRSEL_UNLOCK(); 1136149200Sume ADDRSEL_XUNLOCK(); 1137249398Sae free(pol, M_IFADDR); 1138121742Sume 1139121742Sume return (0); 1140121742Sume} 1141121742Sume 1142121742Sumestatic int 1143242938Sobrienwalk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w) 1144121742Sume{ 1145121742Sume struct addrsel_policyent *pol; 1146121742Sume int error = 0; 1147121742Sume 1148149200Sume ADDRSEL_SLOCK(); 1149181803Sbz TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1150149200Sume if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1151149200Sume ADDRSEL_SUNLOCK(); 1152121742Sume return (error); 1153149200Sume } 1154121742Sume } 1155149200Sume ADDRSEL_SUNLOCK(); 1156121742Sume return (error); 1157121742Sume} 1158121742Sume 1159121742Sumestatic int 1160171259Sdelphijdump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) 1161121742Sume{ 1162121742Sume int error = 0; 1163121742Sume struct walkarg *w = arg; 1164121742Sume 1165121742Sume error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1166121742Sume 1167121742Sume return (error); 1168121742Sume} 1169122077Sume 1170122077Sumestatic struct in6_addrpolicy * 1171171259Sdelphijmatch_addrsel_policy(struct sockaddr_in6 *key) 1172122077Sume{ 1173122077Sume struct addrsel_policyent *pent; 1174122077Sume struct in6_addrpolicy *bestpol = NULL, *pol; 1175122077Sume int matchlen, bestmatchlen = -1; 1176122077Sume u_char *mp, *ep, *k, *p, m; 1177122077Sume 1178181803Sbz TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) { 1179122077Sume matchlen = 0; 1180122077Sume 1181122077Sume pol = &pent->ape_policy; 1182122077Sume mp = (u_char *)&pol->addrmask.sin6_addr; 1183122077Sume ep = mp + 16; /* XXX: scope field? */ 1184122077Sume k = (u_char *)&key->sin6_addr; 1185122077Sume p = (u_char *)&pol->addr.sin6_addr; 1186122077Sume for (; mp < ep && *mp; mp++, k++, p++) { 1187122077Sume m = *mp; 1188122077Sume if ((*k & m) != *p) 1189122077Sume goto next; /* not match */ 1190122077Sume if (m == 0xff) /* short cut for a typical case */ 1191122077Sume matchlen += 8; 1192122077Sume else { 1193122077Sume while (m >= 0x80) { 1194122077Sume matchlen++; 1195122077Sume m <<= 1; 1196122077Sume } 1197122077Sume } 1198122077Sume } 1199122077Sume 1200122077Sume /* matched. check if this is better than the current best. */ 1201122077Sume if (bestpol == NULL || 1202122077Sume matchlen > bestmatchlen) { 1203122077Sume bestpol = pol; 1204122077Sume bestmatchlen = matchlen; 1205122077Sume } 1206122077Sume 1207122077Sume next: 1208122077Sume continue; 1209122077Sume } 1210122077Sume 1211122077Sume return (bestpol); 1212122077Sume} 1213