nd6_rtr.c revision 299014
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/netinet6/nd6_rtr.c 299014 2016-05-03 23:46:01Z markj $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/socket.h>
43#include <sys/sockio.h>
44#include <sys/time.h>
45#include <sys/kernel.h>
46#include <sys/lock.h>
47#include <sys/errno.h>
48#include <sys/rwlock.h>
49#include <sys/syslog.h>
50#include <sys/queue.h>
51
52#include <net/if.h>
53#include <net/if_types.h>
54#include <net/if_dl.h>
55#include <net/route.h>
56#include <net/radix.h>
57#include <net/vnet.h>
58
59#include <netinet/in.h>
60#include <net/if_llatbl.h>
61#include <netinet6/in6_var.h>
62#include <netinet6/in6_ifattach.h>
63#include <netinet/ip6.h>
64#include <netinet6/ip6_var.h>
65#include <netinet6/nd6.h>
66#include <netinet/icmp6.h>
67#include <netinet6/scope6_var.h>
68
69static int rtpref(struct nd_defrouter *);
70static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
71static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *,
72    struct mbuf *, int);
73static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
74static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
75	struct nd_defrouter *);
76static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
77static void pfxrtr_del(struct nd_pfxrouter *);
78static struct nd_pfxrouter *find_pfxlist_reachable_router
79(struct nd_prefix *);
80static void defrouter_delreq(struct nd_defrouter *);
81static void nd6_rtmsg(int, struct rtentry *);
82
83static int in6_init_prefix_ltimes(struct nd_prefix *);
84static void in6_init_address_ltimes(struct nd_prefix *,
85	struct in6_addrlifetime *);
86
87static int nd6_prefix_onlink(struct nd_prefix *);
88static int nd6_prefix_offlink(struct nd_prefix *);
89
90static int rt6_deleteroute(struct radix_node *, void *);
91
92VNET_DECLARE(int, nd6_recalc_reachtm_interval);
93#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
94
95static VNET_DEFINE(struct ifnet *, nd6_defifp);
96VNET_DEFINE(int, nd6_defifindex);
97#define	V_nd6_defifp			VNET(nd6_defifp)
98
99VNET_DEFINE(int, ip6_use_tempaddr) = 0;
100
101VNET_DEFINE(int, ip6_desync_factor);
102VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME;
103VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
104
105VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
106
107/* RTPREF_MEDIUM has to be 0! */
108#define RTPREF_HIGH	1
109#define RTPREF_MEDIUM	0
110#define RTPREF_LOW	(-1)
111#define RTPREF_RESERVED	(-2)
112#define RTPREF_INVALID	(-3)	/* internal */
113
114/*
115 * Receive Router Solicitation Message - just for routers.
116 * Router solicitation/advertisement is mostly managed by userland program
117 * (rtadvd) so here we have no function like nd6_ra_output().
118 *
119 * Based on RFC 2461
120 */
121void
122nd6_rs_input(struct mbuf *m, int off, int icmp6len)
123{
124	struct ifnet *ifp = m->m_pkthdr.rcvif;
125	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
126	struct nd_router_solicit *nd_rs;
127	struct in6_addr saddr6 = ip6->ip6_src;
128	char *lladdr = NULL;
129	int lladdrlen = 0;
130	union nd_opts ndopts;
131	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
132
133	/*
134	 * Accept RS only when V_ip6_forwarding=1 and the interface has
135	 * no ND6_IFF_ACCEPT_RTADV.
136	 */
137	if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV)
138		goto freeit;
139
140	/* Sanity checks */
141	if (ip6->ip6_hlim != 255) {
142		nd6log((LOG_ERR,
143		    "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
144		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
145		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
146		goto bad;
147	}
148
149	/*
150	 * Don't update the neighbor cache, if src = ::.
151	 * This indicates that the src has no IP address assigned yet.
152	 */
153	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
154		goto freeit;
155
156#ifndef PULLDOWN_TEST
157	IP6_EXTHDR_CHECK(m, off, icmp6len,);
158	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
159#else
160	IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
161	if (nd_rs == NULL) {
162		ICMP6STAT_INC(icp6s_tooshort);
163		return;
164	}
165#endif
166
167	icmp6len -= sizeof(*nd_rs);
168	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
169	if (nd6_options(&ndopts) < 0) {
170		nd6log((LOG_INFO,
171		    "nd6_rs_input: invalid ND option, ignored\n"));
172		/* nd6_options have incremented stats */
173		goto freeit;
174	}
175
176	if (ndopts.nd_opts_src_lladdr) {
177		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
178		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
179	}
180
181	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
182		nd6log((LOG_INFO,
183		    "nd6_rs_input: lladdrlen mismatch for %s "
184		    "(if %d, RS packet %d)\n",
185		    ip6_sprintf(ip6bufs, &saddr6),
186		    ifp->if_addrlen, lladdrlen - 2));
187		goto bad;
188	}
189
190	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
191
192 freeit:
193	m_freem(m);
194	return;
195
196 bad:
197	ICMP6STAT_INC(icp6s_badrs);
198	m_freem(m);
199}
200
201/*
202 * Receive Router Advertisement Message.
203 *
204 * Based on RFC 2461
205 * TODO: on-link bit on prefix information
206 * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
207 */
208void
209nd6_ra_input(struct mbuf *m, int off, int icmp6len)
210{
211	struct ifnet *ifp = m->m_pkthdr.rcvif;
212	struct nd_ifinfo *ndi = ND_IFINFO(ifp);
213	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
214	struct nd_router_advert *nd_ra;
215	struct in6_addr saddr6 = ip6->ip6_src;
216	int mcast = 0;
217	union nd_opts ndopts;
218	struct nd_defrouter *dr;
219	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
220
221	/*
222	 * We only accept RAs only when the per-interface flag
223	 * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
224	 */
225	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
226		goto freeit;
227
228	if (ip6->ip6_hlim != 255) {
229		nd6log((LOG_ERR,
230		    "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
231		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
232		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
233		goto bad;
234	}
235
236	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
237		nd6log((LOG_ERR,
238		    "nd6_ra_input: src %s is not link-local\n",
239		    ip6_sprintf(ip6bufs, &saddr6)));
240		goto bad;
241	}
242
243#ifndef PULLDOWN_TEST
244	IP6_EXTHDR_CHECK(m, off, icmp6len,);
245	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
246#else
247	IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
248	if (nd_ra == NULL) {
249		ICMP6STAT_INC(icp6s_tooshort);
250		return;
251	}
252#endif
253
254	icmp6len -= sizeof(*nd_ra);
255	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
256	if (nd6_options(&ndopts) < 0) {
257		nd6log((LOG_INFO,
258		    "nd6_ra_input: invalid ND option, ignored\n"));
259		/* nd6_options have incremented stats */
260		goto freeit;
261	}
262
263    {
264	struct nd_defrouter dr0;
265	u_int32_t advreachable = nd_ra->nd_ra_reachable;
266
267	/* remember if this is a multicasted advertisement */
268	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
269		mcast = 1;
270
271	bzero(&dr0, sizeof(dr0));
272	dr0.rtaddr = saddr6;
273	dr0.flags  = nd_ra->nd_ra_flags_reserved;
274	/*
275	 * Effectively-disable routes from RA messages when
276	 * ND6_IFF_NO_RADR enabled on the receiving interface or
277	 * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1).
278	 */
279	if (ndi->flags & ND6_IFF_NO_RADR)
280		dr0.rtlifetime = 0;
281	else if (V_ip6_forwarding && !V_ip6_rfc6204w3)
282		dr0.rtlifetime = 0;
283	else
284		dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
285	dr0.expire = time_uptime + dr0.rtlifetime;
286	dr0.ifp = ifp;
287	/* unspecified or not? (RFC 2461 6.3.4) */
288	if (advreachable) {
289		advreachable = ntohl(advreachable);
290		if (advreachable <= MAX_REACHABLE_TIME &&
291		    ndi->basereachable != advreachable) {
292			ndi->basereachable = advreachable;
293			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
294			ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
295		}
296	}
297	if (nd_ra->nd_ra_retransmit)
298		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
299	if (nd_ra->nd_ra_curhoplimit) {
300		if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
301			ndi->chlim = nd_ra->nd_ra_curhoplimit;
302		else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
303			log(LOG_ERR, "RA with a lower CurHopLimit sent from "
304			    "%s on %s (current = %d, received = %d). "
305			    "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
306			    if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
307		}
308	}
309	dr = defrtrlist_update(&dr0);
310    }
311
312	/*
313	 * prefix
314	 */
315	if (ndopts.nd_opts_pi) {
316		struct nd_opt_hdr *pt;
317		struct nd_opt_prefix_info *pi = NULL;
318		struct nd_prefixctl pr;
319
320		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
321		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
322		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
323						(pt->nd_opt_len << 3))) {
324			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
325				continue;
326			pi = (struct nd_opt_prefix_info *)pt;
327
328			if (pi->nd_opt_pi_len != 4) {
329				nd6log((LOG_INFO,
330				    "nd6_ra_input: invalid option "
331				    "len %d for prefix information option, "
332				    "ignored\n", pi->nd_opt_pi_len));
333				continue;
334			}
335
336			if (128 < pi->nd_opt_pi_prefix_len) {
337				nd6log((LOG_INFO,
338				    "nd6_ra_input: invalid prefix "
339				    "len %d for prefix information option, "
340				    "ignored\n", pi->nd_opt_pi_prefix_len));
341				continue;
342			}
343
344			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
345			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
346				nd6log((LOG_INFO,
347				    "nd6_ra_input: invalid prefix "
348				    "%s, ignored\n",
349				    ip6_sprintf(ip6bufs,
350					&pi->nd_opt_pi_prefix)));
351				continue;
352			}
353
354			bzero(&pr, sizeof(pr));
355			pr.ndpr_prefix.sin6_family = AF_INET6;
356			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
357			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
358			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
359
360			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
361			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
362			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
363			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
364			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
365			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
366			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
367			(void)prelist_update(&pr, dr, m, mcast);
368		}
369	}
370
371	/*
372	 * MTU
373	 */
374	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
375		u_long mtu;
376		u_long maxmtu;
377
378		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
379
380		/* lower bound */
381		if (mtu < IPV6_MMTU) {
382			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
383			    "mtu=%lu sent from %s, ignoring\n",
384			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
385			goto skip;
386		}
387
388		/* upper bound */
389		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
390		    ? ndi->maxmtu : ifp->if_mtu;
391		if (mtu <= maxmtu) {
392			int change = (ndi->linkmtu != mtu);
393
394			ndi->linkmtu = mtu;
395			if (change) /* in6_maxmtu may change */
396				in6_setmaxmtu();
397		} else {
398			nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
399			    "mtu=%lu sent from %s; "
400			    "exceeds maxmtu %lu, ignoring\n",
401			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
402		}
403	}
404
405 skip:
406
407	/*
408	 * Source link layer address
409	 */
410    {
411	char *lladdr = NULL;
412	int lladdrlen = 0;
413
414	if (ndopts.nd_opts_src_lladdr) {
415		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
416		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
417	}
418
419	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
420		nd6log((LOG_INFO,
421		    "nd6_ra_input: lladdrlen mismatch for %s "
422		    "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
423		    ifp->if_addrlen, lladdrlen - 2));
424		goto bad;
425	}
426
427	nd6_cache_lladdr(ifp, &saddr6, lladdr,
428	    lladdrlen, ND_ROUTER_ADVERT, 0);
429
430	/*
431	 * Installing a link-layer address might change the state of the
432	 * router's neighbor cache, which might also affect our on-link
433	 * detection of adveritsed prefixes.
434	 */
435	pfxlist_onlink_check();
436    }
437
438 freeit:
439	m_freem(m);
440	return;
441
442 bad:
443	ICMP6STAT_INC(icp6s_badra);
444	m_freem(m);
445}
446
447/*
448 * default router list proccessing sub routines
449 */
450
451/* tell the change to user processes watching the routing socket. */
452static void
453nd6_rtmsg(int cmd, struct rtentry *rt)
454{
455	struct rt_addrinfo info;
456	struct ifnet *ifp;
457	struct ifaddr *ifa;
458
459	bzero((caddr_t)&info, sizeof(info));
460	info.rti_info[RTAX_DST] = rt_key(rt);
461	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
462	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
463	ifp = rt->rt_ifp;
464	if (ifp != NULL) {
465		IF_ADDR_RLOCK(ifp);
466		ifa = TAILQ_FIRST(&ifp->if_addrhead);
467		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
468		ifa_ref(ifa);
469		IF_ADDR_RUNLOCK(ifp);
470		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
471	} else
472		ifa = NULL;
473
474	rt_missmsg_fib(cmd, &info, rt->rt_flags, 0, rt->rt_fibnum);
475	if (ifa != NULL)
476		ifa_free(ifa);
477}
478
479static void
480defrouter_addreq(struct nd_defrouter *new)
481{
482	struct sockaddr_in6 def, mask, gate;
483	struct rtentry *newrt = NULL;
484	int error;
485
486	bzero(&def, sizeof(def));
487	bzero(&mask, sizeof(mask));
488	bzero(&gate, sizeof(gate));
489
490	def.sin6_len = mask.sin6_len = gate.sin6_len =
491	    sizeof(struct sockaddr_in6);
492	def.sin6_family = gate.sin6_family = AF_INET6;
493	gate.sin6_addr = new->rtaddr;
494
495	error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
496	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
497	    RTF_GATEWAY, &newrt, RT_DEFAULT_FIB);
498	if (newrt) {
499		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
500		RTFREE(newrt);
501	}
502	if (error == 0)
503		new->installed = 1;
504}
505
506struct nd_defrouter *
507defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
508{
509	struct nd_defrouter *dr;
510
511	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
512		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
513			return (dr);
514	}
515
516	return (NULL);		/* search failed */
517}
518
519/*
520 * Remove the default route for a given router.
521 * This is just a subroutine function for defrouter_select(), and should
522 * not be called from anywhere else.
523 */
524static void
525defrouter_delreq(struct nd_defrouter *dr)
526{
527	struct sockaddr_in6 def, mask, gate;
528	struct rtentry *oldrt = NULL;
529
530	bzero(&def, sizeof(def));
531	bzero(&mask, sizeof(mask));
532	bzero(&gate, sizeof(gate));
533
534	def.sin6_len = mask.sin6_len = gate.sin6_len =
535	    sizeof(struct sockaddr_in6);
536	def.sin6_family = gate.sin6_family = AF_INET6;
537	gate.sin6_addr = dr->rtaddr;
538
539	in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def,
540	    (struct sockaddr *)&gate,
541	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, RT_DEFAULT_FIB);
542	if (oldrt) {
543		nd6_rtmsg(RTM_DELETE, oldrt);
544		RTFREE(oldrt);
545	}
546
547	dr->installed = 0;
548}
549
550/*
551 * remove all default routes from default router list
552 */
553void
554defrouter_reset(void)
555{
556	struct nd_defrouter *dr;
557
558	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
559		defrouter_delreq(dr);
560
561	/*
562	 * XXX should we also nuke any default routers in the kernel, by
563	 * going through them by rtalloc1()?
564	 */
565}
566
567void
568defrtrlist_del(struct nd_defrouter *dr)
569{
570	struct nd_defrouter *deldr = NULL;
571	struct nd_prefix *pr;
572
573	/*
574	 * Flush all the routing table entries that use the router
575	 * as a next hop.
576	 */
577	if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
578		rt6_flush(&dr->rtaddr, dr->ifp);
579
580	if (dr->installed) {
581		deldr = dr;
582		defrouter_delreq(dr);
583	}
584	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
585
586	/*
587	 * Also delete all the pointers to the router in each prefix lists.
588	 */
589	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
590		struct nd_pfxrouter *pfxrtr;
591		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
592			pfxrtr_del(pfxrtr);
593	}
594	pfxlist_onlink_check();
595
596	/*
597	 * If the router is the primary one, choose a new one.
598	 * Note that defrouter_select() will remove the current gateway
599	 * from the routing table.
600	 */
601	if (deldr)
602		defrouter_select();
603
604	free(dr, M_IP6NDP);
605}
606
607/*
608 * Default Router Selection according to Section 6.3.6 of RFC 2461 and
609 * draft-ietf-ipngwg-router-selection:
610 * 1) Routers that are reachable or probably reachable should be preferred.
611 *    If we have more than one (probably) reachable router, prefer ones
612 *    with the highest router preference.
613 * 2) When no routers on the list are known to be reachable or
614 *    probably reachable, routers SHOULD be selected in a round-robin
615 *    fashion, regardless of router preference values.
616 * 3) If the Default Router List is empty, assume that all
617 *    destinations are on-link.
618 *
619 * We assume nd_defrouter is sorted by router preference value.
620 * Since the code below covers both with and without router preference cases,
621 * we do not need to classify the cases by ifdef.
622 *
623 * At this moment, we do not try to install more than one default router,
624 * even when the multipath routing is available, because we're not sure about
625 * the benefits for stub hosts comparing to the risk of making the code
626 * complicated and the possibility of introducing bugs.
627 */
628void
629defrouter_select(void)
630{
631	struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
632	struct llentry *ln = NULL;
633
634	/*
635	 * Let's handle easy case (3) first:
636	 * If default router list is empty, there's nothing to be done.
637	 */
638	if (TAILQ_EMPTY(&V_nd_defrouter))
639		return;
640
641	/*
642	 * Search for a (probably) reachable router from the list.
643	 * We just pick up the first reachable one (if any), assuming that
644	 * the ordering rule of the list described in defrtrlist_update().
645	 */
646	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
647		IF_AFDATA_RLOCK(dr->ifp);
648		if (selected_dr == NULL &&
649		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
650		    ND6_IS_LLINFO_PROBREACH(ln)) {
651			selected_dr = dr;
652		}
653		IF_AFDATA_RUNLOCK(dr->ifp);
654		if (ln != NULL) {
655			LLE_RUNLOCK(ln);
656			ln = NULL;
657		}
658
659		if (dr->installed && installed_dr == NULL)
660			installed_dr = dr;
661		else if (dr->installed && installed_dr) {
662			/* this should not happen.  warn for diagnosis. */
663			log(LOG_ERR, "defrouter_select: more than one router"
664			    " is installed\n");
665		}
666	}
667	/*
668	 * If none of the default routers was found to be reachable,
669	 * round-robin the list regardless of preference.
670	 * Otherwise, if we have an installed router, check if the selected
671	 * (reachable) router should really be preferred to the installed one.
672	 * We only prefer the new router when the old one is not reachable
673	 * or when the new one has a really higher preference value.
674	 */
675	if (selected_dr == NULL) {
676		if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
677			selected_dr = TAILQ_FIRST(&V_nd_defrouter);
678		else
679			selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
680	} else if (installed_dr) {
681		IF_AFDATA_RLOCK(installed_dr->ifp);
682		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
683		    ND6_IS_LLINFO_PROBREACH(ln) &&
684		    rtpref(selected_dr) <= rtpref(installed_dr)) {
685			selected_dr = installed_dr;
686		}
687		IF_AFDATA_RUNLOCK(installed_dr->ifp);
688		if (ln != NULL)
689			LLE_RUNLOCK(ln);
690	}
691
692	/*
693	 * If the selected router is different than the installed one,
694	 * remove the installed router and install the selected one.
695	 * Note that the selected router is never NULL here.
696	 */
697	if (installed_dr != selected_dr) {
698		if (installed_dr)
699			defrouter_delreq(installed_dr);
700		defrouter_addreq(selected_dr);
701	}
702}
703
704/*
705 * for default router selection
706 * regards router-preference field as a 2-bit signed integer
707 */
708static int
709rtpref(struct nd_defrouter *dr)
710{
711	switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
712	case ND_RA_FLAG_RTPREF_HIGH:
713		return (RTPREF_HIGH);
714	case ND_RA_FLAG_RTPREF_MEDIUM:
715	case ND_RA_FLAG_RTPREF_RSV:
716		return (RTPREF_MEDIUM);
717	case ND_RA_FLAG_RTPREF_LOW:
718		return (RTPREF_LOW);
719	default:
720		/*
721		 * This case should never happen.  If it did, it would mean a
722		 * serious bug of kernel internal.  We thus always bark here.
723		 * Or, can we even panic?
724		 */
725		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
726		return (RTPREF_INVALID);
727	}
728	/* NOTREACHED */
729}
730
731static struct nd_defrouter *
732defrtrlist_update(struct nd_defrouter *new)
733{
734	struct nd_defrouter *dr, *n;
735	int oldpref;
736
737	if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
738		/* entry exists */
739		if (new->rtlifetime == 0) {
740			defrtrlist_del(dr);
741			return (NULL);
742		}
743
744		oldpref = rtpref(dr);
745
746		/* override */
747		dr->flags = new->flags; /* xxx flag check */
748		dr->rtlifetime = new->rtlifetime;
749		dr->expire = new->expire;
750
751		/*
752		 * If the preference does not change, there's no need
753		 * to sort the entries. Also make sure the selected
754		 * router is still installed in the kernel.
755		 */
756		if (dr->installed && rtpref(new) == oldpref)
757			return (dr);
758
759		/*
760		 * The preferred router may have changed, so relocate this
761		 * router.
762		 */
763		TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
764		n = dr;
765		goto insert;
766	}
767
768	/* entry does not exist */
769	if (new->rtlifetime == 0)
770		return (NULL);
771
772	n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
773	if (n == NULL)
774		return (NULL);
775	memcpy(n, new, sizeof(*n));
776
777insert:
778	/*
779	 * Insert the new router in the Default Router List;
780	 * The Default Router List should be in the descending order
781	 * of router-preferece.  Routers with the same preference are
782	 * sorted in the arriving time order.
783	 */
784
785	/* insert at the end of the group */
786	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
787		if (rtpref(n) > rtpref(dr))
788			break;
789	}
790	if (dr)
791		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
792	else
793		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
794
795	defrouter_select();
796
797	return (n);
798}
799
800static struct nd_pfxrouter *
801pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
802{
803	struct nd_pfxrouter *search;
804
805	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
806		if (search->router == dr)
807			break;
808	}
809
810	return (search);
811}
812
813static void
814pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
815{
816	struct nd_pfxrouter *new;
817
818	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
819	if (new == NULL)
820		return;
821	new->router = dr;
822
823	LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
824
825	pfxlist_onlink_check();
826}
827
828static void
829pfxrtr_del(struct nd_pfxrouter *pfr)
830{
831	LIST_REMOVE(pfr, pfr_entry);
832	free(pfr, M_IP6NDP);
833}
834
835struct nd_prefix *
836nd6_prefix_lookup(struct nd_prefixctl *key)
837{
838	struct nd_prefix *search;
839
840	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
841		if (key->ndpr_ifp == search->ndpr_ifp &&
842		    key->ndpr_plen == search->ndpr_plen &&
843		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
844		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
845			break;
846		}
847	}
848
849	return (search);
850}
851
852int
853nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
854    struct nd_prefix **newp)
855{
856	struct nd_prefix *new = NULL;
857	int error = 0;
858	int i;
859	char ip6buf[INET6_ADDRSTRLEN];
860
861	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
862	if (new == NULL)
863		return (ENOMEM);
864	new->ndpr_ifp = pr->ndpr_ifp;
865	new->ndpr_prefix = pr->ndpr_prefix;
866	new->ndpr_plen = pr->ndpr_plen;
867	new->ndpr_vltime = pr->ndpr_vltime;
868	new->ndpr_pltime = pr->ndpr_pltime;
869	new->ndpr_flags = pr->ndpr_flags;
870	if ((error = in6_init_prefix_ltimes(new)) != 0) {
871		free(new, M_IP6NDP);
872		return(error);
873	}
874	new->ndpr_lastupdate = time_uptime;
875	if (newp != NULL)
876		*newp = new;
877
878	/* initialization */
879	LIST_INIT(&new->ndpr_advrtrs);
880	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
881	/* make prefix in the canonical form */
882	for (i = 0; i < 4; i++)
883		new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
884		    new->ndpr_mask.s6_addr32[i];
885
886	/* link ndpr_entry to nd_prefix list */
887	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
888
889	/* ND_OPT_PI_FLAG_ONLINK processing */
890	if (new->ndpr_raf_onlink) {
891		int e;
892
893		if ((e = nd6_prefix_onlink(new)) != 0) {
894			nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
895			    "the prefix %s/%d on-link on %s (errno=%d)\n",
896			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
897			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
898			/* proceed anyway. XXX: is it correct? */
899		}
900	}
901
902	if (dr)
903		pfxrtr_add(new, dr);
904
905	return 0;
906}
907
908void
909prelist_remove(struct nd_prefix *pr)
910{
911	struct nd_pfxrouter *pfr, *next;
912	int e;
913	char ip6buf[INET6_ADDRSTRLEN];
914
915	/* make sure to invalidate the prefix until it is really freed. */
916	pr->ndpr_vltime = 0;
917	pr->ndpr_pltime = 0;
918
919	/*
920	 * Though these flags are now meaningless, we'd rather keep the value
921	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
922	 * when executing "ndp -p".
923	 */
924
925	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
926	    (e = nd6_prefix_offlink(pr)) != 0) {
927		nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
928		    "on %s, errno=%d\n",
929		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
930		    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
931		/* what should we do? */
932	}
933
934	if (pr->ndpr_refcnt > 0)
935		return;		/* notice here? */
936
937	/* unlink ndpr_entry from nd_prefix list */
938	LIST_REMOVE(pr, ndpr_entry);
939
940	/* free list of routers that adversed the prefix */
941	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) {
942		free(pfr, M_IP6NDP);
943	}
944	free(pr, M_IP6NDP);
945
946	pfxlist_onlink_check();
947}
948
949/*
950 * dr - may be NULL
951 */
952
953static int
954prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
955    struct mbuf *m, int mcast)
956{
957	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
958	struct ifaddr *ifa;
959	struct ifnet *ifp = new->ndpr_ifp;
960	struct nd_prefix *pr;
961	int error = 0;
962	int newprefix = 0;
963	int auth;
964	struct in6_addrlifetime lt6_tmp;
965	char ip6buf[INET6_ADDRSTRLEN];
966
967	auth = 0;
968	if (m) {
969		/*
970		 * Authenticity for NA consists authentication for
971		 * both IP header and IP datagrams, doesn't it ?
972		 */
973#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
974		auth = ((m->m_flags & M_AUTHIPHDR) &&
975		    (m->m_flags & M_AUTHIPDGM));
976#endif
977	}
978
979	if ((pr = nd6_prefix_lookup(new)) != NULL) {
980		/*
981		 * nd6_prefix_lookup() ensures that pr and new have the same
982		 * prefix on a same interface.
983		 */
984
985		/*
986		 * Update prefix information.  Note that the on-link (L) bit
987		 * and the autonomous (A) bit should NOT be changed from 1
988		 * to 0.
989		 */
990		if (new->ndpr_raf_onlink == 1)
991			pr->ndpr_raf_onlink = 1;
992		if (new->ndpr_raf_auto == 1)
993			pr->ndpr_raf_auto = 1;
994		if (new->ndpr_raf_onlink) {
995			pr->ndpr_vltime = new->ndpr_vltime;
996			pr->ndpr_pltime = new->ndpr_pltime;
997			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
998			pr->ndpr_lastupdate = time_uptime;
999		}
1000
1001		if (new->ndpr_raf_onlink &&
1002		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1003			int e;
1004
1005			if ((e = nd6_prefix_onlink(pr)) != 0) {
1006				nd6log((LOG_ERR,
1007				    "prelist_update: failed to make "
1008				    "the prefix %s/%d on-link on %s "
1009				    "(errno=%d)\n",
1010				    ip6_sprintf(ip6buf,
1011					    &pr->ndpr_prefix.sin6_addr),
1012				    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
1013				/* proceed anyway. XXX: is it correct? */
1014			}
1015		}
1016
1017		if (dr && pfxrtr_lookup(pr, dr) == NULL)
1018			pfxrtr_add(pr, dr);
1019	} else {
1020		struct nd_prefix *newpr = NULL;
1021
1022		newprefix = 1;
1023
1024		if (new->ndpr_vltime == 0)
1025			goto end;
1026		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
1027			goto end;
1028
1029		error = nd6_prelist_add(new, dr, &newpr);
1030		if (error != 0 || newpr == NULL) {
1031			nd6log((LOG_NOTICE, "prelist_update: "
1032			    "nd6_prelist_add failed for %s/%d on %s "
1033			    "errno=%d, returnpr=%p\n",
1034			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
1035			    new->ndpr_plen, if_name(new->ndpr_ifp),
1036			    error, newpr));
1037			goto end; /* we should just give up in this case. */
1038		}
1039
1040		/*
1041		 * XXX: from the ND point of view, we can ignore a prefix
1042		 * with the on-link bit being zero.  However, we need a
1043		 * prefix structure for references from autoconfigured
1044		 * addresses.  Thus, we explicitly make sure that the prefix
1045		 * itself expires now.
1046		 */
1047		if (newpr->ndpr_raf_onlink == 0) {
1048			newpr->ndpr_vltime = 0;
1049			newpr->ndpr_pltime = 0;
1050			in6_init_prefix_ltimes(newpr);
1051		}
1052
1053		pr = newpr;
1054	}
1055
1056	/*
1057	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
1058	 * Note that pr must be non NULL at this point.
1059	 */
1060
1061	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
1062	if (!new->ndpr_raf_auto)
1063		goto end;
1064
1065	/*
1066	 * 5.5.3 (b). the link-local prefix should have been ignored in
1067	 * nd6_ra_input.
1068	 */
1069
1070	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
1071	if (new->ndpr_pltime > new->ndpr_vltime) {
1072		error = EINVAL;	/* XXX: won't be used */
1073		goto end;
1074	}
1075
1076	/*
1077	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
1078	 * an address configured by stateless autoconfiguration already in the
1079	 * list of addresses associated with the interface, and the Valid
1080	 * Lifetime is not 0, form an address.  We first check if we have
1081	 * a matching prefix.
1082	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
1083	 * consider autoconfigured addresses while RFC2462 simply said
1084	 * "address".
1085	 */
1086	IF_ADDR_RLOCK(ifp);
1087	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1088		struct in6_ifaddr *ifa6;
1089		u_int32_t remaininglifetime;
1090
1091		if (ifa->ifa_addr->sa_family != AF_INET6)
1092			continue;
1093
1094		ifa6 = (struct in6_ifaddr *)ifa;
1095
1096		/*
1097		 * We only consider autoconfigured addresses as per rfc2462bis.
1098		 */
1099		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
1100			continue;
1101
1102		/*
1103		 * Spec is not clear here, but I believe we should concentrate
1104		 * on unicast (i.e. not anycast) addresses.
1105		 * XXX: other ia6_flags? detached or duplicated?
1106		 */
1107		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
1108			continue;
1109
1110		/*
1111		 * Ignore the address if it is not associated with a prefix
1112		 * or is associated with a prefix that is different from this
1113		 * one.  (pr is never NULL here)
1114		 */
1115		if (ifa6->ia6_ndpr != pr)
1116			continue;
1117
1118		if (ia6_match == NULL) /* remember the first one */
1119			ia6_match = ifa6;
1120
1121		/*
1122		 * An already autoconfigured address matched.  Now that we
1123		 * are sure there is at least one matched address, we can
1124		 * proceed to 5.5.3. (e): update the lifetimes according to the
1125		 * "two hours" rule and the privacy extension.
1126		 * We apply some clarifications in rfc2462bis:
1127		 * - use remaininglifetime instead of storedlifetime as a
1128		 *   variable name
1129		 * - remove the dead code in the "two-hour" rule
1130		 */
1131#define TWOHOUR		(120*60)
1132		lt6_tmp = ifa6->ia6_lifetime;
1133
1134		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
1135			remaininglifetime = ND6_INFINITE_LIFETIME;
1136		else if (time_uptime - ifa6->ia6_updatetime >
1137			 lt6_tmp.ia6t_vltime) {
1138			/*
1139			 * The case of "invalid" address.  We should usually
1140			 * not see this case.
1141			 */
1142			remaininglifetime = 0;
1143		} else
1144			remaininglifetime = lt6_tmp.ia6t_vltime -
1145			    (time_uptime - ifa6->ia6_updatetime);
1146
1147		/* when not updating, keep the current stored lifetime. */
1148		lt6_tmp.ia6t_vltime = remaininglifetime;
1149
1150		if (TWOHOUR < new->ndpr_vltime ||
1151		    remaininglifetime < new->ndpr_vltime) {
1152			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1153		} else if (remaininglifetime <= TWOHOUR) {
1154			if (auth) {
1155				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1156			}
1157		} else {
1158			/*
1159			 * new->ndpr_vltime <= TWOHOUR &&
1160			 * TWOHOUR < remaininglifetime
1161			 */
1162			lt6_tmp.ia6t_vltime = TWOHOUR;
1163		}
1164
1165		/* The 2 hour rule is not imposed for preferred lifetime. */
1166		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
1167
1168		in6_init_address_ltimes(pr, &lt6_tmp);
1169
1170		/*
1171		 * We need to treat lifetimes for temporary addresses
1172		 * differently, according to
1173		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
1174		 * we only update the lifetimes when they are in the maximum
1175		 * intervals.
1176		 */
1177		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1178			u_int32_t maxvltime, maxpltime;
1179
1180			if (V_ip6_temp_valid_lifetime >
1181			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
1182			    V_ip6_desync_factor)) {
1183				maxvltime = V_ip6_temp_valid_lifetime -
1184				    (time_uptime - ifa6->ia6_createtime) -
1185				    V_ip6_desync_factor;
1186			} else
1187				maxvltime = 0;
1188			if (V_ip6_temp_preferred_lifetime >
1189			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
1190			    V_ip6_desync_factor)) {
1191				maxpltime = V_ip6_temp_preferred_lifetime -
1192				    (time_uptime - ifa6->ia6_createtime) -
1193				    V_ip6_desync_factor;
1194			} else
1195				maxpltime = 0;
1196
1197			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
1198			    lt6_tmp.ia6t_vltime > maxvltime) {
1199				lt6_tmp.ia6t_vltime = maxvltime;
1200			}
1201			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
1202			    lt6_tmp.ia6t_pltime > maxpltime) {
1203				lt6_tmp.ia6t_pltime = maxpltime;
1204			}
1205		}
1206		ifa6->ia6_lifetime = lt6_tmp;
1207		ifa6->ia6_updatetime = time_uptime;
1208	}
1209	IF_ADDR_RUNLOCK(ifp);
1210	if (ia6_match == NULL && new->ndpr_vltime) {
1211		int ifidlen;
1212
1213		/*
1214		 * 5.5.3 (d) (continued)
1215		 * No address matched and the valid lifetime is non-zero.
1216		 * Create a new address.
1217		 */
1218
1219		/*
1220		 * Prefix Length check:
1221		 * If the sum of the prefix length and interface identifier
1222		 * length does not equal 128 bits, the Prefix Information
1223		 * option MUST be ignored.  The length of the interface
1224		 * identifier is defined in a separate link-type specific
1225		 * document.
1226		 */
1227		ifidlen = in6_if2idlen(ifp);
1228		if (ifidlen < 0) {
1229			/* this should not happen, so we always log it. */
1230			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
1231			    if_name(ifp));
1232			goto end;
1233		}
1234		if (ifidlen + pr->ndpr_plen != 128) {
1235			nd6log((LOG_INFO,
1236			    "prelist_update: invalid prefixlen "
1237			    "%d for %s, ignored\n",
1238			    pr->ndpr_plen, if_name(ifp)));
1239			goto end;
1240		}
1241
1242		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
1243			/*
1244			 * note that we should use pr (not new) for reference.
1245			 */
1246			pr->ndpr_refcnt++;
1247			ia6->ia6_ndpr = pr;
1248
1249			/*
1250			 * RFC 3041 3.3 (2).
1251			 * When a new public address is created as described
1252			 * in RFC2462, also create a new temporary address.
1253			 *
1254			 * RFC 3041 3.5.
1255			 * When an interface connects to a new link, a new
1256			 * randomized interface identifier should be generated
1257			 * immediately together with a new set of temporary
1258			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
1259			 * in6_tmpifadd().
1260			 */
1261			if (V_ip6_use_tempaddr) {
1262				int e;
1263				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
1264					nd6log((LOG_NOTICE, "prelist_update: "
1265					    "failed to create a temporary "
1266					    "address, errno=%d\n",
1267					    e));
1268				}
1269			}
1270			ifa_free(&ia6->ia_ifa);
1271
1272			/*
1273			 * A newly added address might affect the status
1274			 * of other addresses, so we check and update it.
1275			 * XXX: what if address duplication happens?
1276			 */
1277			pfxlist_onlink_check();
1278		} else {
1279			/* just set an error. do not bark here. */
1280			error = EADDRNOTAVAIL; /* XXX: might be unused. */
1281		}
1282	}
1283
1284 end:
1285	return error;
1286}
1287
1288/*
1289 * A supplement function used in the on-link detection below;
1290 * detect if a given prefix has a (probably) reachable advertising router.
1291 * XXX: lengthy function name...
1292 */
1293static struct nd_pfxrouter *
1294find_pfxlist_reachable_router(struct nd_prefix *pr)
1295{
1296	struct nd_pfxrouter *pfxrtr;
1297	struct llentry *ln;
1298	int canreach;
1299
1300	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
1301		IF_AFDATA_RLOCK(pfxrtr->router->ifp);
1302		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
1303		IF_AFDATA_RUNLOCK(pfxrtr->router->ifp);
1304		if (ln == NULL)
1305			continue;
1306		canreach = ND6_IS_LLINFO_PROBREACH(ln);
1307		LLE_RUNLOCK(ln);
1308		if (canreach)
1309			break;
1310	}
1311	return (pfxrtr);
1312}
1313
1314/*
1315 * Check if each prefix in the prefix list has at least one available router
1316 * that advertised the prefix (a router is "available" if its neighbor cache
1317 * entry is reachable or probably reachable).
1318 * If the check fails, the prefix may be off-link, because, for example,
1319 * we have moved from the network but the lifetime of the prefix has not
1320 * expired yet.  So we should not use the prefix if there is another prefix
1321 * that has an available router.
1322 * But, if there is no prefix that has an available router, we still regards
1323 * all the prefixes as on-link.  This is because we can't tell if all the
1324 * routers are simply dead or if we really moved from the network and there
1325 * is no router around us.
1326 */
1327void
1328pfxlist_onlink_check()
1329{
1330	struct nd_prefix *pr;
1331	struct in6_ifaddr *ifa;
1332	struct nd_defrouter *dr;
1333	struct nd_pfxrouter *pfxrtr = NULL;
1334
1335	/*
1336	 * Check if there is a prefix that has a reachable advertising
1337	 * router.
1338	 */
1339	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1340		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
1341			break;
1342	}
1343
1344	/*
1345	 * If we have no such prefix, check whether we still have a router
1346	 * that does not advertise any prefixes.
1347	 */
1348	if (pr == NULL) {
1349		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
1350			struct nd_prefix *pr0;
1351
1352			LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) {
1353				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
1354					break;
1355			}
1356			if (pfxrtr != NULL)
1357				break;
1358		}
1359	}
1360	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
1361		/*
1362		 * There is at least one prefix that has a reachable router,
1363		 * or at least a router which probably does not advertise
1364		 * any prefixes.  The latter would be the case when we move
1365		 * to a new link where we have a router that does not provide
1366		 * prefixes and we configure an address by hand.
1367		 * Detach prefixes which have no reachable advertising
1368		 * router, and attach other prefixes.
1369		 */
1370		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1371			/* XXX: a link-local prefix should never be detached */
1372			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1373				continue;
1374
1375			/*
1376			 * we aren't interested in prefixes without the L bit
1377			 * set.
1378			 */
1379			if (pr->ndpr_raf_onlink == 0)
1380				continue;
1381
1382			if (pr->ndpr_raf_auto == 0)
1383				continue;
1384
1385			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1386			    find_pfxlist_reachable_router(pr) == NULL)
1387				pr->ndpr_stateflags |= NDPRF_DETACHED;
1388			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1389			    find_pfxlist_reachable_router(pr) != 0)
1390				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1391		}
1392	} else {
1393		/* there is no prefix that has a reachable router */
1394		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1395			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1396				continue;
1397
1398			if (pr->ndpr_raf_onlink == 0)
1399				continue;
1400
1401			if (pr->ndpr_raf_auto == 0)
1402				continue;
1403
1404			if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
1405				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1406		}
1407	}
1408
1409	/*
1410	 * Remove each interface route associated with a (just) detached
1411	 * prefix, and reinstall the interface route for a (just) attached
1412	 * prefix.  Note that all attempt of reinstallation does not
1413	 * necessarily success, when a same prefix is shared among multiple
1414	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
1415	 * so we don't have to care about them.
1416	 */
1417	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1418		int e;
1419		char ip6buf[INET6_ADDRSTRLEN];
1420
1421		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1422			continue;
1423
1424		if (pr->ndpr_raf_onlink == 0)
1425			continue;
1426
1427		if (pr->ndpr_raf_auto == 0)
1428			continue;
1429
1430		if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1431		    (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1432			if ((e = nd6_prefix_offlink(pr)) != 0) {
1433				nd6log((LOG_ERR,
1434				    "pfxlist_onlink_check: failed to "
1435				    "make %s/%d offlink, errno=%d\n",
1436				    ip6_sprintf(ip6buf,
1437					    &pr->ndpr_prefix.sin6_addr),
1438					    pr->ndpr_plen, e));
1439			}
1440		}
1441		if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1442		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
1443		    pr->ndpr_raf_onlink) {
1444			if ((e = nd6_prefix_onlink(pr)) != 0) {
1445				nd6log((LOG_ERR,
1446				    "pfxlist_onlink_check: failed to "
1447				    "make %s/%d onlink, errno=%d\n",
1448				    ip6_sprintf(ip6buf,
1449					    &pr->ndpr_prefix.sin6_addr),
1450					    pr->ndpr_plen, e));
1451			}
1452		}
1453	}
1454
1455	/*
1456	 * Changes on the prefix status might affect address status as well.
1457	 * Make sure that all addresses derived from an attached prefix are
1458	 * attached, and that all addresses derived from a detached prefix are
1459	 * detached.  Note, however, that a manually configured address should
1460	 * always be attached.
1461	 * The precise detection logic is same as the one for prefixes.
1462	 *
1463	 * XXXRW: in6_ifaddrhead locking.
1464	 */
1465	TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1466		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
1467			continue;
1468
1469		if (ifa->ia6_ndpr == NULL) {
1470			/*
1471			 * This can happen when we first configure the address
1472			 * (i.e. the address exists, but the prefix does not).
1473			 * XXX: complicated relationships...
1474			 */
1475			continue;
1476		}
1477
1478		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
1479			break;
1480	}
1481	if (ifa) {
1482		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1483			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1484				continue;
1485
1486			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
1487				continue;
1488
1489			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
1490				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1491					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1492					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1493					nd6_dad_start((struct ifaddr *)ifa, 0);
1494				}
1495			} else {
1496				ifa->ia6_flags |= IN6_IFF_DETACHED;
1497			}
1498		}
1499	}
1500	else {
1501		TAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1502			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1503				continue;
1504
1505			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1506				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1507				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1508				/* Do we need a delay in this case? */
1509				nd6_dad_start((struct ifaddr *)ifa, 0);
1510			}
1511		}
1512	}
1513}
1514
1515static int
1516nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
1517{
1518	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1519	struct radix_node_head *rnh;
1520	struct rtentry *rt;
1521	struct sockaddr_in6 mask6;
1522	u_long rtflags;
1523	int error, a_failure, fibnum;
1524
1525	/*
1526	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
1527	 * ifa->ifa_rtrequest = nd6_rtrequest;
1528	 */
1529	bzero(&mask6, sizeof(mask6));
1530	mask6.sin6_len = sizeof(mask6);
1531	mask6.sin6_addr = pr->ndpr_mask;
1532	rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
1533
1534	a_failure = 0;
1535	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1536
1537		rt = NULL;
1538		error = in6_rtrequest(RTM_ADD,
1539		    (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr,
1540		    (struct sockaddr *)&mask6, rtflags, &rt, fibnum);
1541		if (error == 0) {
1542			KASSERT(rt != NULL, ("%s: in6_rtrequest return no "
1543			    "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__,
1544			    error, pr, ifa));
1545
1546			rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
1547			/* XXX what if rhn == NULL? */
1548			RADIX_NODE_HEAD_LOCK(rnh);
1549			RT_LOCK(rt);
1550			if (rt_setgate(rt, rt_key(rt),
1551			    (struct sockaddr *)&null_sdl) == 0) {
1552				struct sockaddr_dl *dl;
1553
1554				dl = (struct sockaddr_dl *)rt->rt_gateway;
1555				dl->sdl_type = rt->rt_ifp->if_type;
1556				dl->sdl_index = rt->rt_ifp->if_index;
1557			}
1558			RADIX_NODE_HEAD_UNLOCK(rnh);
1559			nd6_rtmsg(RTM_ADD, rt);
1560			RT_UNLOCK(rt);
1561			pr->ndpr_stateflags |= NDPRF_ONLINK;
1562		} else {
1563			char ip6buf[INET6_ADDRSTRLEN];
1564			char ip6bufg[INET6_ADDRSTRLEN];
1565			char ip6bufm[INET6_ADDRSTRLEN];
1566			struct sockaddr_in6 *sin6;
1567
1568			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
1569			nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add "
1570			    "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, "
1571			    "flags=%lx errno = %d\n",
1572			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1573			    pr->ndpr_plen, if_name(pr->ndpr_ifp),
1574			    ip6_sprintf(ip6bufg, &sin6->sin6_addr),
1575			    ip6_sprintf(ip6bufm, &mask6.sin6_addr),
1576			    rtflags, error));
1577
1578			/* Save last error to return, see rtinit(). */
1579			a_failure = error;
1580		}
1581
1582		if (rt != NULL) {
1583			RT_LOCK(rt);
1584			RT_REMREF(rt);
1585			RT_UNLOCK(rt);
1586		}
1587	}
1588
1589	/* Return the last error we got. */
1590	return (a_failure);
1591}
1592
1593static int
1594nd6_prefix_onlink(struct nd_prefix *pr)
1595{
1596	struct ifaddr *ifa;
1597	struct ifnet *ifp = pr->ndpr_ifp;
1598	struct nd_prefix *opr;
1599	int error = 0;
1600	char ip6buf[INET6_ADDRSTRLEN];
1601
1602	/* sanity check */
1603	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1604		nd6log((LOG_ERR,
1605		    "nd6_prefix_onlink: %s/%d is already on-link\n",
1606		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1607		    pr->ndpr_plen));
1608		return (EEXIST);
1609	}
1610
1611	/*
1612	 * Add the interface route associated with the prefix.  Before
1613	 * installing the route, check if there's the same prefix on another
1614	 * interface, and the prefix has already installed the interface route.
1615	 * Although such a configuration is expected to be rare, we explicitly
1616	 * allow it.
1617	 */
1618	LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
1619		if (opr == pr)
1620			continue;
1621
1622		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
1623			continue;
1624
1625		if (opr->ndpr_plen == pr->ndpr_plen &&
1626		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
1627		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
1628			return (0);
1629	}
1630
1631	/*
1632	 * We prefer link-local addresses as the associated interface address.
1633	 */
1634	/* search for a link-local addr */
1635	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
1636	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
1637	if (ifa == NULL) {
1638		/* XXX: freebsd does not have ifa_ifwithaf */
1639		IF_ADDR_RLOCK(ifp);
1640		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1641			if (ifa->ifa_addr->sa_family == AF_INET6)
1642				break;
1643		}
1644		if (ifa != NULL)
1645			ifa_ref(ifa);
1646		IF_ADDR_RUNLOCK(ifp);
1647		/* should we care about ia6_flags? */
1648	}
1649	if (ifa == NULL) {
1650		/*
1651		 * This can still happen, when, for example, we receive an RA
1652		 * containing a prefix with the L bit set and the A bit clear,
1653		 * after removing all IPv6 addresses on the receiving
1654		 * interface.  This should, of course, be rare though.
1655		 */
1656		nd6log((LOG_NOTICE,
1657		    "nd6_prefix_onlink: failed to find any ifaddr"
1658		    " to add route for a prefix(%s/%d) on %s\n",
1659		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1660		    pr->ndpr_plen, if_name(ifp)));
1661		return (0);
1662	}
1663
1664	error = nd6_prefix_onlink_rtrequest(pr, ifa);
1665
1666	if (ifa != NULL)
1667		ifa_free(ifa);
1668
1669	return (error);
1670}
1671
1672static int
1673nd6_prefix_offlink(struct nd_prefix *pr)
1674{
1675	int error = 0;
1676	struct ifnet *ifp = pr->ndpr_ifp;
1677	struct nd_prefix *opr;
1678	struct sockaddr_in6 sa6, mask6;
1679	struct rtentry *rt;
1680	char ip6buf[INET6_ADDRSTRLEN];
1681	int fibnum, a_failure;
1682
1683	/* sanity check */
1684	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1685		nd6log((LOG_ERR,
1686		    "nd6_prefix_offlink: %s/%d is already off-link\n",
1687		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1688		    pr->ndpr_plen));
1689		return (EEXIST);
1690	}
1691
1692	bzero(&sa6, sizeof(sa6));
1693	sa6.sin6_family = AF_INET6;
1694	sa6.sin6_len = sizeof(sa6);
1695	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
1696	    sizeof(struct in6_addr));
1697	bzero(&mask6, sizeof(mask6));
1698	mask6.sin6_family = AF_INET6;
1699	mask6.sin6_len = sizeof(sa6);
1700	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
1701
1702	a_failure = 0;
1703	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
1704		rt = NULL;
1705		error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
1706		    (struct sockaddr *)&mask6, 0, &rt, fibnum);
1707		if (error == 0) {
1708			/* report the route deletion to the routing socket. */
1709			if (rt != NULL)
1710				nd6_rtmsg(RTM_DELETE, rt);
1711		} else {
1712			/* Save last error to return, see rtinit(). */
1713			a_failure = error;
1714		}
1715		if (rt != NULL) {
1716			RTFREE(rt);
1717		}
1718	}
1719	error = a_failure;
1720	a_failure = 1;
1721	if (error == 0) {
1722		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
1723
1724		/*
1725		 * There might be the same prefix on another interface,
1726		 * the prefix which could not be on-link just because we have
1727		 * the interface route (see comments in nd6_prefix_onlink).
1728		 * If there's one, try to make the prefix on-link on the
1729		 * interface.
1730		 */
1731		LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
1732			if (opr == pr)
1733				continue;
1734
1735			if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
1736				continue;
1737
1738			/*
1739			 * KAME specific: detached prefixes should not be
1740			 * on-link.
1741			 */
1742			if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
1743				continue;
1744
1745			if (opr->ndpr_plen == pr->ndpr_plen &&
1746			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
1747			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
1748				int e;
1749
1750				if ((e = nd6_prefix_onlink(opr)) != 0) {
1751					nd6log((LOG_ERR,
1752					    "nd6_prefix_offlink: failed to "
1753					    "recover a prefix %s/%d from %s "
1754					    "to %s (errno = %d)\n",
1755					    ip6_sprintf(ip6buf,
1756						&opr->ndpr_prefix.sin6_addr),
1757					    opr->ndpr_plen, if_name(ifp),
1758					    if_name(opr->ndpr_ifp), e));
1759				} else
1760					a_failure = 0;
1761			}
1762		}
1763	} else {
1764		/* XXX: can we still set the NDPRF_ONLINK flag? */
1765		nd6log((LOG_ERR,
1766		    "nd6_prefix_offlink: failed to delete route: "
1767		    "%s/%d on %s (errno = %d)\n",
1768		    ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
1769		    if_name(ifp), error));
1770	}
1771
1772	if (a_failure)
1773		lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6,
1774		    (struct sockaddr *)&mask6, LLE_STATIC);
1775
1776	return (error);
1777}
1778
1779static struct in6_ifaddr *
1780in6_ifadd(struct nd_prefixctl *pr, int mcast)
1781{
1782	struct ifnet *ifp = pr->ndpr_ifp;
1783	struct ifaddr *ifa;
1784	struct in6_aliasreq ifra;
1785	struct in6_ifaddr *ia, *ib;
1786	int error, plen0;
1787	struct in6_addr mask;
1788	int prefixlen = pr->ndpr_plen;
1789	int updateflags;
1790	char ip6buf[INET6_ADDRSTRLEN];
1791
1792	in6_prefixlen2mask(&mask, prefixlen);
1793
1794	/*
1795	 * find a link-local address (will be interface ID).
1796	 * Is it really mandatory? Theoretically, a global or a site-local
1797	 * address can be configured without a link-local address, if we
1798	 * have a unique interface identifier...
1799	 *
1800	 * it is not mandatory to have a link-local address, we can generate
1801	 * interface identifier on the fly.  we do this because:
1802	 * (1) it should be the easiest way to find interface identifier.
1803	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
1804	 * for multiple addresses on a single interface, and possible shortcut
1805	 * of DAD.  we omitted DAD for this reason in the past.
1806	 * (3) a user can prevent autoconfiguration of global address
1807	 * by removing link-local address by hand (this is partly because we
1808	 * don't have other way to control the use of IPv6 on an interface.
1809	 * this has been our design choice - cf. NRL's "ifconfig auto").
1810	 * (4) it is easier to manage when an interface has addresses
1811	 * with the same interface identifier, than to have multiple addresses
1812	 * with different interface identifiers.
1813	 */
1814	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
1815	if (ifa)
1816		ib = (struct in6_ifaddr *)ifa;
1817	else
1818		return NULL;
1819
1820	/* prefixlen + ifidlen must be equal to 128 */
1821	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
1822	if (prefixlen != plen0) {
1823		ifa_free(ifa);
1824		nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
1825		    "(prefix=%d ifid=%d)\n",
1826		    if_name(ifp), prefixlen, 128 - plen0));
1827		return NULL;
1828	}
1829
1830	/* make ifaddr */
1831
1832	bzero(&ifra, sizeof(ifra));
1833	/*
1834	 * in6_update_ifa() does not use ifra_name, but we accurately set it
1835	 * for safety.
1836	 */
1837	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
1838	ifra.ifra_addr.sin6_family = AF_INET6;
1839	ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
1840	/* prefix */
1841	ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
1842	ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1843	ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1844	ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1845	ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1846
1847	/* interface ID */
1848	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
1849	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
1850	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
1851	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
1852	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
1853	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
1854	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
1855	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
1856	ifa_free(ifa);
1857
1858	/* new prefix mask. */
1859	ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
1860	ifra.ifra_prefixmask.sin6_family = AF_INET6;
1861	bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
1862	    sizeof(ifra.ifra_prefixmask.sin6_addr));
1863
1864	/* lifetimes. */
1865	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
1866	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
1867
1868	/* XXX: scope zone ID? */
1869
1870	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
1871
1872	/*
1873	 * Make sure that we do not have this address already.  This should
1874	 * usually not happen, but we can still see this case, e.g., if we
1875	 * have manually configured the exact address to be configured.
1876	 */
1877	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
1878	    &ifra.ifra_addr.sin6_addr);
1879	if (ifa != NULL) {
1880		ifa_free(ifa);
1881		/* this should be rare enough to make an explicit log */
1882		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
1883		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
1884		return (NULL);
1885	}
1886
1887	/*
1888	 * Allocate ifaddr structure, link into chain, etc.
1889	 * If we are going to create a new address upon receiving a multicasted
1890	 * RA, we need to impose a random delay before starting DAD.
1891	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
1892	 */
1893	updateflags = 0;
1894	if (mcast)
1895		updateflags |= IN6_IFAUPDATE_DADDELAY;
1896	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
1897		nd6log((LOG_ERR,
1898		    "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
1899		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
1900		    if_name(ifp), error));
1901		return (NULL);	/* ifaddr must not have been allocated. */
1902	}
1903
1904	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
1905	/*
1906	 * XXXRW: Assumption of non-NULLness here might not be true with
1907	 * fine-grained locking -- should we validate it?  Or just return
1908	 * earlier ifa rather than looking it up again?
1909	 */
1910	return (ia);		/* this is always non-NULL  and referenced. */
1911}
1912
1913/*
1914 * ia0 - corresponding public address
1915 */
1916int
1917in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
1918{
1919	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
1920	struct in6_ifaddr *newia, *ia;
1921	struct in6_aliasreq ifra;
1922	int i, error;
1923	int trylimit = 3;	/* XXX: adhoc value */
1924	int updateflags;
1925	u_int32_t randid[2];
1926	time_t vltime0, pltime0;
1927
1928	bzero(&ifra, sizeof(ifra));
1929	strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
1930	ifra.ifra_addr = ia0->ia_addr;
1931	/* copy prefix mask */
1932	ifra.ifra_prefixmask = ia0->ia_prefixmask;
1933	/* clear the old IFID */
1934	for (i = 0; i < 4; i++) {
1935		ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
1936		    ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
1937	}
1938
1939  again:
1940	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
1941	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
1942		nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
1943		    "random IFID\n"));
1944		return (EINVAL);
1945	}
1946	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
1947	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
1948	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
1949	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
1950
1951	/*
1952	 * in6_get_tmpifid() quite likely provided a unique interface ID.
1953	 * However, we may still have a chance to see collision, because
1954	 * there may be a time lag between generation of the ID and generation
1955	 * of the address.  So, we'll do one more sanity check.
1956	 */
1957	IN6_IFADDR_RLOCK();
1958	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
1959		if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
1960		    &ifra.ifra_addr.sin6_addr)) {
1961			if (trylimit-- == 0) {
1962				IN6_IFADDR_RUNLOCK();
1963				/*
1964				 * Give up.  Something strange should have
1965				 * happened.
1966				 */
1967				nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
1968				    "find a unique random IFID\n"));
1969				return (EEXIST);
1970			}
1971			IN6_IFADDR_RUNLOCK();
1972			forcegen = 1;
1973			goto again;
1974		}
1975	}
1976	IN6_IFADDR_RUNLOCK();
1977
1978	/*
1979	 * The Valid Lifetime is the lower of the Valid Lifetime of the
1980         * public address or TEMP_VALID_LIFETIME.
1981	 * The Preferred Lifetime is the lower of the Preferred Lifetime
1982         * of the public address or TEMP_PREFERRED_LIFETIME -
1983         * DESYNC_FACTOR.
1984	 */
1985	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
1986		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
1987		    (ia0->ia6_lifetime.ia6t_vltime -
1988		    (time_uptime - ia0->ia6_updatetime));
1989		if (vltime0 > V_ip6_temp_valid_lifetime)
1990			vltime0 = V_ip6_temp_valid_lifetime;
1991	} else
1992		vltime0 = V_ip6_temp_valid_lifetime;
1993	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
1994		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
1995		    (ia0->ia6_lifetime.ia6t_pltime -
1996		    (time_uptime - ia0->ia6_updatetime));
1997		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
1998			pltime0 = V_ip6_temp_preferred_lifetime -
1999			    V_ip6_desync_factor;
2000		}
2001	} else
2002		pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
2003	ifra.ifra_lifetime.ia6t_vltime = vltime0;
2004	ifra.ifra_lifetime.ia6t_pltime = pltime0;
2005
2006	/*
2007	 * A temporary address is created only if this calculated Preferred
2008	 * Lifetime is greater than REGEN_ADVANCE time units.
2009	 */
2010	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
2011		return (0);
2012
2013	/* XXX: scope zone ID? */
2014
2015	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
2016
2017	/* allocate ifaddr structure, link into chain, etc. */
2018	updateflags = 0;
2019	if (delay)
2020		updateflags |= IN6_IFAUPDATE_DADDELAY;
2021	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
2022		return (error);
2023
2024	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
2025	if (newia == NULL) {	/* XXX: can it happen? */
2026		nd6log((LOG_ERR,
2027		    "in6_tmpifadd: ifa update succeeded, but we got "
2028		    "no ifaddr\n"));
2029		return (EINVAL); /* XXX */
2030	}
2031	newia->ia6_ndpr = ia0->ia6_ndpr;
2032	newia->ia6_ndpr->ndpr_refcnt++;
2033	ifa_free(&newia->ia_ifa);
2034
2035	/*
2036	 * A newly added address might affect the status of other addresses.
2037	 * XXX: when the temporary address is generated with a new public
2038	 * address, the onlink check is redundant.  However, it would be safe
2039	 * to do the check explicitly everywhere a new address is generated,
2040	 * and, in fact, we surely need the check when we create a new
2041	 * temporary address due to deprecation of an old temporary address.
2042	 */
2043	pfxlist_onlink_check();
2044
2045	return (0);
2046}
2047
2048static int
2049in6_init_prefix_ltimes(struct nd_prefix *ndpr)
2050{
2051	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
2052		ndpr->ndpr_preferred = 0;
2053	else
2054		ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
2055	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2056		ndpr->ndpr_expire = 0;
2057	else
2058		ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
2059
2060	return 0;
2061}
2062
2063static void
2064in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
2065{
2066	/* init ia6t_expire */
2067	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
2068		lt6->ia6t_expire = 0;
2069	else {
2070		lt6->ia6t_expire = time_uptime;
2071		lt6->ia6t_expire += lt6->ia6t_vltime;
2072	}
2073
2074	/* init ia6t_preferred */
2075	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
2076		lt6->ia6t_preferred = 0;
2077	else {
2078		lt6->ia6t_preferred = time_uptime;
2079		lt6->ia6t_preferred += lt6->ia6t_pltime;
2080	}
2081}
2082
2083/*
2084 * Delete all the routing table entries that use the specified gateway.
2085 * XXX: this function causes search through all entries of routing table, so
2086 * it shouldn't be called when acting as a router.
2087 */
2088void
2089rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
2090{
2091	struct radix_node_head *rnh;
2092	u_int fibnum;
2093
2094	/* We'll care only link-local addresses */
2095	if (!IN6_IS_ADDR_LINKLOCAL(gateway))
2096		return;
2097
2098	/* XXX Do we really need to walk any but the default FIB? */
2099	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
2100		rnh = rt_tables_get_rnh(fibnum, AF_INET6);
2101		if (rnh == NULL)
2102			continue;
2103
2104		RADIX_NODE_HEAD_LOCK(rnh);
2105		rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
2106		RADIX_NODE_HEAD_UNLOCK(rnh);
2107	}
2108}
2109
2110static int
2111rt6_deleteroute(struct radix_node *rn, void *arg)
2112{
2113#define SIN6(s)	((struct sockaddr_in6 *)s)
2114	struct rtentry *rt = (struct rtentry *)rn;
2115	struct in6_addr *gate = (struct in6_addr *)arg;
2116
2117	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
2118		return (0);
2119
2120	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
2121		return (0);
2122	}
2123
2124	/*
2125	 * Do not delete a static route.
2126	 * XXX: this seems to be a bit ad-hoc. Should we consider the
2127	 * 'cloned' bit instead?
2128	 */
2129	if ((rt->rt_flags & RTF_STATIC) != 0)
2130		return (0);
2131
2132	/*
2133	 * We delete only host route. This means, in particular, we don't
2134	 * delete default route.
2135	 */
2136	if ((rt->rt_flags & RTF_HOST) == 0)
2137		return (0);
2138
2139	return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
2140	    rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum));
2141#undef SIN6
2142}
2143
2144int
2145nd6_setdefaultiface(int ifindex)
2146{
2147	int error = 0;
2148
2149	if (ifindex < 0 || V_if_index < ifindex)
2150		return (EINVAL);
2151	if (ifindex != 0 && !ifnet_byindex(ifindex))
2152		return (EINVAL);
2153
2154	if (V_nd6_defifindex != ifindex) {
2155		V_nd6_defifindex = ifindex;
2156		if (V_nd6_defifindex > 0)
2157			V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
2158		else
2159			V_nd6_defifp = NULL;
2160
2161		/*
2162		 * Our current implementation assumes one-to-one maping between
2163		 * interfaces and links, so it would be natural to use the
2164		 * default interface as the default link.
2165		 */
2166		scope6_setdefault(V_nd6_defifp);
2167	}
2168
2169	return (error);
2170}
2171