nd6.c revision 317067
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/netinet6/nd6.c 317067 2017-04-17 20:13:20Z asomers $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/callout.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/mutex.h>
45#include <sys/socket.h>
46#include <sys/sockio.h>
47#include <sys/time.h>
48#include <sys/kernel.h>
49#include <sys/protosw.h>
50#include <sys/errno.h>
51#include <sys/syslog.h>
52#include <sys/rwlock.h>
53#include <sys/queue.h>
54#include <sys/sdt.h>
55#include <sys/sysctl.h>
56
57#include <net/if.h>
58#include <net/if_var.h>
59#include <net/if_arc.h>
60#include <net/if_dl.h>
61#include <net/if_types.h>
62#include <net/iso88025.h>
63#include <net/fddi.h>
64#include <net/route.h>
65#include <net/vnet.h>
66
67#include <netinet/in.h>
68#include <netinet/in_kdtrace.h>
69#include <net/if_llatbl.h>
70#include <netinet/if_ether.h>
71#include <netinet6/in6_var.h>
72#include <netinet/ip6.h>
73#include <netinet6/ip6_var.h>
74#include <netinet6/scope6_var.h>
75#include <netinet6/nd6.h>
76#include <netinet6/in6_ifattach.h>
77#include <netinet/icmp6.h>
78#include <netinet6/send.h>
79
80#include <sys/limits.h>
81
82#include <security/mac/mac_framework.h>
83
84#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
85#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
86
87#define SIN6(s) ((const struct sockaddr_in6 *)(s))
88
89MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
90
91/* timer values */
92VNET_DEFINE(int, nd6_prune)	= 1;	/* walk list every 1 seconds */
93VNET_DEFINE(int, nd6_delay)	= 5;	/* delay first probe time 5 second */
94VNET_DEFINE(int, nd6_umaxtries)	= 3;	/* maximum unicast query */
95VNET_DEFINE(int, nd6_mmaxtries)	= 3;	/* maximum multicast query */
96VNET_DEFINE(int, nd6_useloopback) = 1;	/* use loopback interface for
97					 * local traffic */
98VNET_DEFINE(int, nd6_gctimer)	= (60 * 60 * 24); /* 1 day: garbage
99					 * collection timer */
100
101/* preventing too many loops in ND option parsing */
102static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
103
104VNET_DEFINE(int, nd6_maxnudhint) = 0;	/* max # of subsequent upper
105					 * layer hints */
106static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
107					 * ND entries */
108#define	V_nd6_maxndopt			VNET(nd6_maxndopt)
109#define	V_nd6_maxqueuelen		VNET(nd6_maxqueuelen)
110
111#ifdef ND6_DEBUG
112VNET_DEFINE(int, nd6_debug) = 1;
113#else
114VNET_DEFINE(int, nd6_debug) = 0;
115#endif
116
117static eventhandler_tag lle_event_eh, iflladdr_event_eh;
118
119VNET_DEFINE(struct nd_drhead, nd_defrouter);
120VNET_DEFINE(struct nd_prhead, nd_prefix);
121VNET_DEFINE(struct rwlock, nd6_lock);
122VNET_DEFINE(uint64_t, nd6_list_genid);
123VNET_DEFINE(struct mtx, nd6_onlink_mtx);
124
125VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
126#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
127
128int	(*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
129
130static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *,
131	struct ifnet *);
132static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
133static void nd6_slowtimo(void *);
134static int regen_tmpaddr(struct in6_ifaddr *);
135static void nd6_free(struct llentry **, int);
136static void nd6_free_redirect(const struct llentry *);
137static void nd6_llinfo_timer(void *);
138static void nd6_llinfo_settimer_locked(struct llentry *, long);
139static void clear_llinfo_pqueue(struct llentry *);
140static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
141static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
142    const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **);
143static int nd6_need_cache(struct ifnet *);
144
145
146static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
147#define	V_nd6_slowtimo_ch		VNET(nd6_slowtimo_ch)
148
149VNET_DEFINE(struct callout, nd6_timer_ch);
150#define	V_nd6_timer_ch			VNET(nd6_timer_ch)
151
152static void
153nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
154{
155	struct rt_addrinfo rtinfo;
156	struct sockaddr_in6 dst;
157	struct sockaddr_dl gw;
158	struct ifnet *ifp;
159	int type;
160	int fibnum;
161
162	LLE_WLOCK_ASSERT(lle);
163
164	if (lltable_get_af(lle->lle_tbl) != AF_INET6)
165		return;
166
167	switch (evt) {
168	case LLENTRY_RESOLVED:
169		type = RTM_ADD;
170		KASSERT(lle->la_flags & LLE_VALID,
171		    ("%s: %p resolved but not valid?", __func__, lle));
172		break;
173	case LLENTRY_EXPIRED:
174		type = RTM_DELETE;
175		break;
176	default:
177		return;
178	}
179
180	ifp = lltable_get_ifp(lle->lle_tbl);
181
182	bzero(&dst, sizeof(dst));
183	bzero(&gw, sizeof(gw));
184	bzero(&rtinfo, sizeof(rtinfo));
185	lltable_fill_sa_entry(lle, (struct sockaddr *)&dst);
186	dst.sin6_scope_id = in6_getscopezone(ifp,
187	    in6_addrscope(&dst.sin6_addr));
188	gw.sdl_len = sizeof(struct sockaddr_dl);
189	gw.sdl_family = AF_LINK;
190	gw.sdl_alen = ifp->if_addrlen;
191	gw.sdl_index = ifp->if_index;
192	gw.sdl_type = ifp->if_type;
193	if (evt == LLENTRY_RESOLVED)
194		bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
195	rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
196	rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
197	rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
198	fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : ifp->if_fib;
199	rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | (
200	    type == RTM_ADD ? RTF_UP: 0), 0, fibnum);
201}
202
203/*
204 * A handler for interface link layer address change event.
205 */
206static void
207nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
208{
209
210	lltable_update_ifaddr(LLTABLE6(ifp));
211}
212
213void
214nd6_init(void)
215{
216
217	mtx_init(&V_nd6_onlink_mtx, "nd6 onlink", NULL, MTX_DEF);
218	rw_init(&V_nd6_lock, "nd6 list");
219
220	LIST_INIT(&V_nd_prefix);
221	TAILQ_INIT(&V_nd_defrouter);
222
223	/* Start timers. */
224	callout_init(&V_nd6_slowtimo_ch, 0);
225	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
226	    nd6_slowtimo, curvnet);
227
228	callout_init(&V_nd6_timer_ch, 0);
229	callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
230
231	nd6_dad_init();
232	if (IS_DEFAULT_VNET(curvnet)) {
233		lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
234		    NULL, EVENTHANDLER_PRI_ANY);
235		iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
236		    nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
237	}
238}
239
240#ifdef VIMAGE
241void
242nd6_destroy()
243{
244
245	callout_drain(&V_nd6_slowtimo_ch);
246	callout_drain(&V_nd6_timer_ch);
247	if (IS_DEFAULT_VNET(curvnet)) {
248		EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
249		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
250	}
251	rw_destroy(&V_nd6_lock);
252	mtx_destroy(&V_nd6_onlink_mtx);
253}
254#endif
255
256struct nd_ifinfo *
257nd6_ifattach(struct ifnet *ifp)
258{
259	struct nd_ifinfo *nd;
260
261	nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
262	nd->initialized = 1;
263
264	nd->chlim = IPV6_DEFHLIM;
265	nd->basereachable = REACHABLE_TIME;
266	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
267	nd->retrans = RETRANS_TIMER;
268
269	nd->flags = ND6_IFF_PERFORMNUD;
270
271	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
272	 * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
273	 * default regardless of the V_ip6_auto_linklocal configuration to
274	 * give a reasonable default behavior.
275	 */
276	if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
277	    (ifp->if_flags & IFF_LOOPBACK))
278		nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
279	/*
280	 * A loopback interface does not need to accept RTADV.
281	 * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
282	 * default regardless of the V_ip6_accept_rtadv configuration to
283	 * prevent the interface from accepting RA messages arrived
284	 * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
285	 */
286	if (V_ip6_accept_rtadv &&
287	    !(ifp->if_flags & IFF_LOOPBACK) &&
288	    (ifp->if_type != IFT_BRIDGE))
289			nd->flags |= ND6_IFF_ACCEPT_RTADV;
290	if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
291		nd->flags |= ND6_IFF_NO_RADR;
292
293	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
294	nd6_setmtu0(ifp, nd);
295
296	return nd;
297}
298
299void
300nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
301{
302	struct ifaddr *ifa, *next;
303
304	IF_ADDR_RLOCK(ifp);
305	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
306		if (ifa->ifa_addr->sa_family != AF_INET6)
307			continue;
308
309		/* stop DAD processing */
310		nd6_dad_stop(ifa);
311	}
312	IF_ADDR_RUNLOCK(ifp);
313
314	free(nd, M_IP6NDP);
315}
316
317/*
318 * Reset ND level link MTU. This function is called when the physical MTU
319 * changes, which means we might have to adjust the ND level MTU.
320 */
321void
322nd6_setmtu(struct ifnet *ifp)
323{
324	if (ifp->if_afdata[AF_INET6] == NULL)
325		return;
326
327	nd6_setmtu0(ifp, ND_IFINFO(ifp));
328}
329
330/* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
331void
332nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
333{
334	u_int32_t omaxmtu;
335
336	omaxmtu = ndi->maxmtu;
337
338	switch (ifp->if_type) {
339	case IFT_ARCNET:
340		ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
341		break;
342	case IFT_FDDI:
343		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
344		break;
345	case IFT_ISO88025:
346		 ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
347		 break;
348	default:
349		ndi->maxmtu = ifp->if_mtu;
350		break;
351	}
352
353	/*
354	 * Decreasing the interface MTU under IPV6 minimum MTU may cause
355	 * undesirable situation.  We thus notify the operator of the change
356	 * explicitly.  The check for omaxmtu is necessary to restrict the
357	 * log to the case of changing the MTU, not initializing it.
358	 */
359	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
360		log(LOG_NOTICE, "nd6_setmtu0: "
361		    "new link MTU on %s (%lu) is too small for IPv6\n",
362		    if_name(ifp), (unsigned long)ndi->maxmtu);
363	}
364
365	if (ndi->maxmtu > V_in6_maxmtu)
366		in6_setmaxmtu(); /* check all interfaces just in case */
367
368}
369
370void
371nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
372{
373
374	bzero(ndopts, sizeof(*ndopts));
375	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
376	ndopts->nd_opts_last
377		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
378
379	if (icmp6len == 0) {
380		ndopts->nd_opts_done = 1;
381		ndopts->nd_opts_search = NULL;
382	}
383}
384
385/*
386 * Take one ND option.
387 */
388struct nd_opt_hdr *
389nd6_option(union nd_opts *ndopts)
390{
391	struct nd_opt_hdr *nd_opt;
392	int olen;
393
394	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
395	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
396	    __func__));
397	if (ndopts->nd_opts_search == NULL)
398		return NULL;
399	if (ndopts->nd_opts_done)
400		return NULL;
401
402	nd_opt = ndopts->nd_opts_search;
403
404	/* make sure nd_opt_len is inside the buffer */
405	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
406		bzero(ndopts, sizeof(*ndopts));
407		return NULL;
408	}
409
410	olen = nd_opt->nd_opt_len << 3;
411	if (olen == 0) {
412		/*
413		 * Message validation requires that all included
414		 * options have a length that is greater than zero.
415		 */
416		bzero(ndopts, sizeof(*ndopts));
417		return NULL;
418	}
419
420	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
421	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
422		/* option overruns the end of buffer, invalid */
423		bzero(ndopts, sizeof(*ndopts));
424		return NULL;
425	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
426		/* reached the end of options chain */
427		ndopts->nd_opts_done = 1;
428		ndopts->nd_opts_search = NULL;
429	}
430	return nd_opt;
431}
432
433/*
434 * Parse multiple ND options.
435 * This function is much easier to use, for ND routines that do not need
436 * multiple options of the same type.
437 */
438int
439nd6_options(union nd_opts *ndopts)
440{
441	struct nd_opt_hdr *nd_opt;
442	int i = 0;
443
444	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
445	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
446	    __func__));
447	if (ndopts->nd_opts_search == NULL)
448		return 0;
449
450	while (1) {
451		nd_opt = nd6_option(ndopts);
452		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
453			/*
454			 * Message validation requires that all included
455			 * options have a length that is greater than zero.
456			 */
457			ICMP6STAT_INC(icp6s_nd_badopt);
458			bzero(ndopts, sizeof(*ndopts));
459			return -1;
460		}
461
462		if (nd_opt == NULL)
463			goto skip1;
464
465		switch (nd_opt->nd_opt_type) {
466		case ND_OPT_SOURCE_LINKADDR:
467		case ND_OPT_TARGET_LINKADDR:
468		case ND_OPT_MTU:
469		case ND_OPT_REDIRECTED_HEADER:
470		case ND_OPT_NONCE:
471			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
472				nd6log((LOG_INFO,
473				    "duplicated ND6 option found (type=%d)\n",
474				    nd_opt->nd_opt_type));
475				/* XXX bark? */
476			} else {
477				ndopts->nd_opt_array[nd_opt->nd_opt_type]
478					= nd_opt;
479			}
480			break;
481		case ND_OPT_PREFIX_INFORMATION:
482			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
483				ndopts->nd_opt_array[nd_opt->nd_opt_type]
484					= nd_opt;
485			}
486			ndopts->nd_opts_pi_end =
487				(struct nd_opt_prefix_info *)nd_opt;
488			break;
489		/* What about ND_OPT_ROUTE_INFO? RFC 4191 */
490		case ND_OPT_RDNSS:	/* RFC 6106 */
491		case ND_OPT_DNSSL:	/* RFC 6106 */
492			/*
493			 * Silently ignore options we know and do not care about
494			 * in the kernel.
495			 */
496			break;
497		default:
498			/*
499			 * Unknown options must be silently ignored,
500			 * to accommodate future extension to the protocol.
501			 */
502			nd6log((LOG_DEBUG,
503			    "nd6_options: unsupported option %d - "
504			    "option ignored\n", nd_opt->nd_opt_type));
505		}
506
507skip1:
508		i++;
509		if (i > V_nd6_maxndopt) {
510			ICMP6STAT_INC(icp6s_nd_toomanyopt);
511			nd6log((LOG_INFO, "too many loop in nd opt\n"));
512			break;
513		}
514
515		if (ndopts->nd_opts_done)
516			break;
517	}
518
519	return 0;
520}
521
522/*
523 * ND6 timer routine to handle ND6 entries
524 */
525static void
526nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
527{
528	int canceled;
529
530	LLE_WLOCK_ASSERT(ln);
531
532	if (tick < 0) {
533		ln->la_expire = 0;
534		ln->ln_ntick = 0;
535		canceled = callout_stop(&ln->lle_timer);
536	} else {
537		ln->la_expire = time_uptime + tick / hz;
538		LLE_ADDREF(ln);
539		if (tick > INT_MAX) {
540			ln->ln_ntick = tick - INT_MAX;
541			canceled = callout_reset(&ln->lle_timer, INT_MAX,
542			    nd6_llinfo_timer, ln);
543		} else {
544			ln->ln_ntick = 0;
545			canceled = callout_reset(&ln->lle_timer, tick,
546			    nd6_llinfo_timer, ln);
547		}
548	}
549	if (canceled > 0)
550		LLE_REMREF(ln);
551}
552
553/*
554 * Gets source address of the first packet in hold queue
555 * and stores it in @src.
556 * Returns pointer to @src (if hold queue is not empty) or NULL.
557 *
558 * Set noinline to be dtrace-friendly
559 */
560static __noinline struct in6_addr *
561nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
562{
563	struct ip6_hdr hdr;
564	struct mbuf *m;
565
566	if (ln->la_hold == NULL)
567		return (NULL);
568
569	/*
570	 * assume every packet in la_hold has the same IP header
571	 */
572	m = ln->la_hold;
573	if (sizeof(hdr) > m->m_len)
574		return (NULL);
575
576	m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr);
577	*src = hdr.ip6_src;
578
579	return (src);
580}
581
582/*
583 * Checks if we need to switch from STALE state.
584 *
585 * RFC 4861 requires switching from STALE to DELAY state
586 * on first packet matching entry, waiting V_nd6_delay and
587 * transition to PROBE state (if upper layer confirmation was
588 * not received).
589 *
590 * This code performs a bit differently:
591 * On packet hit we don't change state (but desired state
592 * can be guessed by control plane). However, after V_nd6_delay
593 * seconds code will transition to PROBE state (so DELAY state
594 * is kinda skipped in most situations).
595 *
596 * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
597 * we perform the following upon entering STALE state:
598 *
599 * 1) Arm timer to run each V_nd6_delay seconds to make sure that
600 * if packet was transmitted at the start of given interval, we
601 * would be able to switch to PROBE state in V_nd6_delay seconds
602 * as user expects.
603 *
604 * 2) Reschedule timer until original V_nd6_gctimer expires keeping
605 * lle in STALE state (remaining timer value stored in lle_remtime).
606 *
607 * 3) Reschedule timer if packet was transmitted less that V_nd6_delay
608 * seconds ago.
609 *
610 * Returns non-zero value if the entry is still STALE (storing
611 * the next timer interval in @pdelay).
612 *
613 * Returns zero value if original timer expired or we need to switch to
614 * PROBE (store that in @do_switch variable).
615 */
616static int
617nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
618{
619	int nd_delay, nd_gctimer, r_skip_req;
620	time_t lle_hittime;
621	long delay;
622
623	*do_switch = 0;
624	nd_gctimer = V_nd6_gctimer;
625	nd_delay = V_nd6_delay;
626
627	LLE_REQ_LOCK(lle);
628	r_skip_req = lle->r_skip_req;
629	lle_hittime = lle->lle_hittime;
630	LLE_REQ_UNLOCK(lle);
631
632	if (r_skip_req > 0) {
633
634		/*
635		 * Nonzero r_skip_req value was set upon entering
636		 * STALE state. Since value was not changed, no
637		 * packets were passed using this lle. Ask for
638		 * timer reschedule and keep STALE state.
639		 */
640		delay = (long)(MIN(nd_gctimer, nd_delay));
641		delay *= hz;
642		if (lle->lle_remtime > delay)
643			lle->lle_remtime -= delay;
644		else {
645			delay = lle->lle_remtime;
646			lle->lle_remtime = 0;
647		}
648
649		if (delay == 0) {
650
651			/*
652			 * The original ng6_gctime timeout ended,
653			 * no more rescheduling.
654			 */
655			return (0);
656		}
657
658		*pdelay = delay;
659		return (1);
660	}
661
662	/*
663	 * Packet received. Verify timestamp
664	 */
665	delay = (long)(time_uptime - lle_hittime);
666	if (delay < nd_delay) {
667
668		/*
669		 * V_nd6_delay still not passed since the first
670		 * hit in STALE state.
671		 * Reshedule timer and return.
672		 */
673		*pdelay = (long)(nd_delay - delay) * hz;
674		return (1);
675	}
676
677	/* Request switching to probe */
678	*do_switch = 1;
679	return (0);
680}
681
682
683/*
684 * Switch @lle state to new state optionally arming timers.
685 *
686 * Set noinline to be dtrace-friendly
687 */
688__noinline void
689nd6_llinfo_setstate(struct llentry *lle, int newstate)
690{
691	struct ifnet *ifp;
692	int nd_gctimer, nd_delay;
693	long delay, remtime;
694
695	delay = 0;
696	remtime = 0;
697
698	switch (newstate) {
699	case ND6_LLINFO_INCOMPLETE:
700		ifp = lle->lle_tbl->llt_ifp;
701		delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
702		break;
703	case ND6_LLINFO_REACHABLE:
704		if (!ND6_LLINFO_PERMANENT(lle)) {
705			ifp = lle->lle_tbl->llt_ifp;
706			delay = (long)ND_IFINFO(ifp)->reachable * hz;
707		}
708		break;
709	case ND6_LLINFO_STALE:
710
711		/*
712		 * Notify fast path that we want to know if any packet
713		 * is transmitted by setting r_skip_req.
714		 */
715		LLE_REQ_LOCK(lle);
716		lle->r_skip_req = 1;
717		LLE_REQ_UNLOCK(lle);
718		nd_delay = V_nd6_delay;
719		nd_gctimer = V_nd6_gctimer;
720
721		delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
722		remtime = (long)nd_gctimer * hz - delay;
723		break;
724	case ND6_LLINFO_DELAY:
725		lle->la_asked = 0;
726		delay = (long)V_nd6_delay * hz;
727		break;
728	}
729
730	if (delay > 0)
731		nd6_llinfo_settimer_locked(lle, delay);
732
733	lle->lle_remtime = remtime;
734	lle->ln_state = newstate;
735}
736
737/*
738 * Timer-dependent part of nd state machine.
739 *
740 * Set noinline to be dtrace-friendly
741 */
742static __noinline void
743nd6_llinfo_timer(void *arg)
744{
745	struct llentry *ln;
746	struct in6_addr *dst, *pdst, *psrc, src;
747	struct ifnet *ifp;
748	struct nd_ifinfo *ndi;
749	int do_switch, send_ns;
750	long delay;
751
752	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
753	ln = (struct llentry *)arg;
754	ifp = lltable_get_ifp(ln->lle_tbl);
755	CURVNET_SET(ifp->if_vnet);
756
757	ND6_RLOCK();
758	LLE_WLOCK(ln);
759	if (callout_pending(&ln->lle_timer)) {
760		/*
761		 * Here we are a bit odd here in the treatment of
762		 * active/pending. If the pending bit is set, it got
763		 * rescheduled before I ran. The active
764		 * bit we ignore, since if it was stopped
765		 * in ll_tablefree() and was currently running
766		 * it would have return 0 so the code would
767		 * not have deleted it since the callout could
768		 * not be stopped so we want to go through
769		 * with the delete here now. If the callout
770		 * was restarted, the pending bit will be back on and
771		 * we just want to bail since the callout_reset would
772		 * return 1 and our reference would have been removed
773		 * by nd6_llinfo_settimer_locked above since canceled
774		 * would have been 1.
775		 */
776		LLE_WUNLOCK(ln);
777		ND6_RUNLOCK();
778		CURVNET_RESTORE();
779		return;
780	}
781	ndi = ND_IFINFO(ifp);
782	send_ns = 0;
783	dst = &ln->r_l3addr.addr6;
784	pdst = dst;
785
786	if (ln->ln_ntick > 0) {
787		if (ln->ln_ntick > INT_MAX) {
788			ln->ln_ntick -= INT_MAX;
789			nd6_llinfo_settimer_locked(ln, INT_MAX);
790		} else {
791			ln->ln_ntick = 0;
792			nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
793		}
794		goto done;
795	}
796
797	if (ln->la_flags & LLE_STATIC) {
798		goto done;
799	}
800
801	if (ln->la_flags & LLE_DELETED) {
802		nd6_free(&ln, 0);
803		goto done;
804	}
805
806	switch (ln->ln_state) {
807	case ND6_LLINFO_INCOMPLETE:
808		if (ln->la_asked < V_nd6_mmaxtries) {
809			ln->la_asked++;
810			send_ns = 1;
811			/* Send NS to multicast address */
812			pdst = NULL;
813		} else {
814			struct mbuf *m = ln->la_hold;
815			if (m) {
816				struct mbuf *m0;
817
818				/*
819				 * assuming every packet in la_hold has the
820				 * same IP header.  Send error after unlock.
821				 */
822				m0 = m->m_nextpkt;
823				m->m_nextpkt = NULL;
824				ln->la_hold = m0;
825				clear_llinfo_pqueue(ln);
826			}
827			nd6_free(&ln, 0);
828			if (m != NULL)
829				icmp6_error2(m, ICMP6_DST_UNREACH,
830				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
831		}
832		break;
833	case ND6_LLINFO_REACHABLE:
834		if (!ND6_LLINFO_PERMANENT(ln))
835			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
836		break;
837
838	case ND6_LLINFO_STALE:
839		if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
840
841			/*
842			 * No packet has used this entry and GC timeout
843			 * has not been passed. Reshedule timer and
844			 * return.
845			 */
846			nd6_llinfo_settimer_locked(ln, delay);
847			break;
848		}
849
850		if (do_switch == 0) {
851
852			/*
853			 * GC timer has ended and entry hasn't been used.
854			 * Run Garbage collector (RFC 4861, 5.3)
855			 */
856			if (!ND6_LLINFO_PERMANENT(ln))
857				nd6_free(&ln, 1);
858			break;
859		}
860
861		/* Entry has been used AND delay timer has ended. */
862
863		/* FALLTHROUGH */
864
865	case ND6_LLINFO_DELAY:
866		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
867			/* We need NUD */
868			ln->la_asked = 1;
869			nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE);
870			send_ns = 1;
871		} else
872			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */
873		break;
874	case ND6_LLINFO_PROBE:
875		if (ln->la_asked < V_nd6_umaxtries) {
876			ln->la_asked++;
877			send_ns = 1;
878		} else {
879			nd6_free(&ln, 0);
880		}
881		break;
882	default:
883		panic("%s: paths in a dark night can be confusing: %d",
884		    __func__, ln->ln_state);
885	}
886done:
887	if (ln != NULL)
888		ND6_RUNLOCK();
889	if (send_ns != 0) {
890		nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
891		psrc = nd6_llinfo_get_holdsrc(ln, &src);
892		LLE_FREE_LOCKED(ln);
893		ln = NULL;
894		nd6_ns_output(ifp, psrc, pdst, dst, NULL);
895	}
896
897	if (ln != NULL)
898		LLE_FREE_LOCKED(ln);
899	CURVNET_RESTORE();
900}
901
902
903/*
904 * ND6 timer routine to expire default route list and prefix list
905 */
906void
907nd6_timer(void *arg)
908{
909	CURVNET_SET((struct vnet *) arg);
910	struct nd_drhead drq;
911	struct nd_prhead prl;
912	struct nd_defrouter *dr, *ndr;
913	struct nd_prefix *pr, *npr;
914	struct in6_ifaddr *ia6, *nia6;
915	uint64_t genid;
916
917	TAILQ_INIT(&drq);
918	LIST_INIT(&prl);
919
920	ND6_WLOCK();
921	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
922		if (dr->expire && dr->expire < time_uptime)
923			defrouter_unlink(dr, &drq);
924	ND6_WUNLOCK();
925
926	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
927		TAILQ_REMOVE(&drq, dr, dr_entry);
928		defrouter_del(dr);
929	}
930
931	/*
932	 * expire interface addresses.
933	 * in the past the loop was inside prefix expiry processing.
934	 * However, from a stricter speci-confrmance standpoint, we should
935	 * rather separate address lifetimes and prefix lifetimes.
936	 *
937	 * XXXRW: in6_ifaddrhead locking.
938	 */
939  addrloop:
940	TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
941		/* check address lifetime */
942		if (IFA6_IS_INVALID(ia6)) {
943			int regen = 0;
944
945			/*
946			 * If the expiring address is temporary, try
947			 * regenerating a new one.  This would be useful when
948			 * we suspended a laptop PC, then turned it on after a
949			 * period that could invalidate all temporary
950			 * addresses.  Although we may have to restart the
951			 * loop (see below), it must be after purging the
952			 * address.  Otherwise, we'd see an infinite loop of
953			 * regeneration.
954			 */
955			if (V_ip6_use_tempaddr &&
956			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
957				if (regen_tmpaddr(ia6) == 0)
958					regen = 1;
959			}
960
961			in6_purgeaddr(&ia6->ia_ifa);
962
963			if (regen)
964				goto addrloop; /* XXX: see below */
965		} else if (IFA6_IS_DEPRECATED(ia6)) {
966			int oldflags = ia6->ia6_flags;
967
968			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
969
970			/*
971			 * If a temporary address has just become deprecated,
972			 * regenerate a new one if possible.
973			 */
974			if (V_ip6_use_tempaddr &&
975			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
976			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
977
978				if (regen_tmpaddr(ia6) == 0) {
979					/*
980					 * A new temporary address is
981					 * generated.
982					 * XXX: this means the address chain
983					 * has changed while we are still in
984					 * the loop.  Although the change
985					 * would not cause disaster (because
986					 * it's not a deletion, but an
987					 * addition,) we'd rather restart the
988					 * loop just for safety.  Or does this
989					 * significantly reduce performance??
990					 */
991					goto addrloop;
992				}
993			}
994		} else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
995			/*
996			 * Schedule DAD for a tentative address.  This happens
997			 * if the interface was down or not running
998			 * when the address was configured.
999			 */
1000			int delay;
1001
1002			delay = arc4random() %
1003			    (MAX_RTR_SOLICITATION_DELAY * hz);
1004			nd6_dad_start((struct ifaddr *)ia6, delay);
1005		} else {
1006			/*
1007			 * Check status of the interface.  If it is down,
1008			 * mark the address as tentative for future DAD.
1009			 */
1010			if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
1011			    (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
1012				== 0 ||
1013			    (ND_IFINFO(ia6->ia_ifp)->flags &
1014				ND6_IFF_IFDISABLED) != 0) {
1015				ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
1016				ia6->ia6_flags |= IN6_IFF_TENTATIVE;
1017			}
1018			/*
1019			 * A new RA might have made a deprecated address
1020			 * preferred.
1021			 */
1022			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
1023		}
1024	}
1025
1026	ND6_WLOCK();
1027restart:
1028	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
1029		/*
1030		 * Expire prefixes. Since the pltime is only used for
1031		 * autoconfigured addresses, pltime processing for prefixes is
1032		 * not necessary.
1033		 *
1034		 * Only unlink after all derived addresses have expired. This
1035		 * may not occur until two hours after the prefix has expired
1036		 * per RFC 4862. If the prefix expires before its derived
1037		 * addresses, mark it off-link. This will be done automatically
1038		 * after unlinking if no address references remain.
1039		 */
1040		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME ||
1041		    time_uptime - pr->ndpr_lastupdate <= pr->ndpr_vltime)
1042			continue;
1043
1044		if (pr->ndpr_addrcnt == 0) {
1045			nd6_prefix_unlink(pr, &prl);
1046			continue;
1047		}
1048		if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1049			genid = V_nd6_list_genid;
1050			nd6_prefix_ref(pr);
1051			ND6_WUNLOCK();
1052			ND6_ONLINK_LOCK();
1053			(void)nd6_prefix_offlink(pr);
1054			ND6_ONLINK_UNLOCK();
1055			ND6_WLOCK();
1056			nd6_prefix_rele(pr);
1057			if (genid != V_nd6_list_genid)
1058				goto restart;
1059		}
1060	}
1061	ND6_WUNLOCK();
1062
1063	while ((pr = LIST_FIRST(&prl)) != NULL) {
1064		LIST_REMOVE(pr, ndpr_entry);
1065		nd6_prefix_del(pr);
1066	}
1067
1068	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
1069	    nd6_timer, curvnet);
1070
1071	CURVNET_RESTORE();
1072}
1073
1074/*
1075 * ia6 - deprecated/invalidated temporary address
1076 */
1077static int
1078regen_tmpaddr(struct in6_ifaddr *ia6)
1079{
1080	struct ifaddr *ifa;
1081	struct ifnet *ifp;
1082	struct in6_ifaddr *public_ifa6 = NULL;
1083
1084	ifp = ia6->ia_ifa.ifa_ifp;
1085	IF_ADDR_RLOCK(ifp);
1086	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1087		struct in6_ifaddr *it6;
1088
1089		if (ifa->ifa_addr->sa_family != AF_INET6)
1090			continue;
1091
1092		it6 = (struct in6_ifaddr *)ifa;
1093
1094		/* ignore no autoconf addresses. */
1095		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1096			continue;
1097
1098		/* ignore autoconf addresses with different prefixes. */
1099		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
1100			continue;
1101
1102		/*
1103		 * Now we are looking at an autoconf address with the same
1104		 * prefix as ours.  If the address is temporary and is still
1105		 * preferred, do not create another one.  It would be rare, but
1106		 * could happen, for example, when we resume a laptop PC after
1107		 * a long period.
1108		 */
1109		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1110		    !IFA6_IS_DEPRECATED(it6)) {
1111			public_ifa6 = NULL;
1112			break;
1113		}
1114
1115		/*
1116		 * This is a public autoconf address that has the same prefix
1117		 * as ours.  If it is preferred, keep it.  We can't break the
1118		 * loop here, because there may be a still-preferred temporary
1119		 * address with the prefix.
1120		 */
1121		if (!IFA6_IS_DEPRECATED(it6))
1122			public_ifa6 = it6;
1123	}
1124	if (public_ifa6 != NULL)
1125		ifa_ref(&public_ifa6->ia_ifa);
1126	IF_ADDR_RUNLOCK(ifp);
1127
1128	if (public_ifa6 != NULL) {
1129		int e;
1130
1131		if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
1132			ifa_free(&public_ifa6->ia_ifa);
1133			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
1134			    " tmp addr,errno=%d\n", e);
1135			return (-1);
1136		}
1137		ifa_free(&public_ifa6->ia_ifa);
1138		return (0);
1139	}
1140
1141	return (-1);
1142}
1143
1144/*
1145 * Remove prefix and default router list entries corresponding to ifp. Neighbor
1146 * cache entries are freed in in6_domifdetach().
1147 */
1148void
1149nd6_purge(struct ifnet *ifp)
1150{
1151	struct nd_drhead drq;
1152	struct nd_prhead prl;
1153	struct nd_defrouter *dr, *ndr;
1154	struct nd_prefix *pr, *npr;
1155
1156	TAILQ_INIT(&drq);
1157	LIST_INIT(&prl);
1158
1159	/*
1160	 * Nuke default router list entries toward ifp.
1161	 * We defer removal of default router list entries that is installed
1162	 * in the routing table, in order to keep additional side effects as
1163	 * small as possible.
1164	 */
1165	ND6_WLOCK();
1166	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
1167		if (dr->installed)
1168			continue;
1169		if (dr->ifp == ifp)
1170			defrouter_unlink(dr, &drq);
1171	}
1172	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
1173		if (!dr->installed)
1174			continue;
1175		if (dr->ifp == ifp)
1176			defrouter_unlink(dr, &drq);
1177	}
1178
1179	/*
1180	 * Remove prefixes on ifp. We should have already removed addresses on
1181	 * this interface, so no addresses should be referencing these prefixes.
1182	 */
1183	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
1184		if (pr->ndpr_ifp == ifp)
1185			nd6_prefix_unlink(pr, &prl);
1186	}
1187	ND6_WUNLOCK();
1188
1189	/* Delete the unlinked router and prefix objects. */
1190	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
1191		TAILQ_REMOVE(&drq, dr, dr_entry);
1192		defrouter_del(dr);
1193	}
1194	while ((pr = LIST_FIRST(&prl)) != NULL) {
1195		LIST_REMOVE(pr, ndpr_entry);
1196		nd6_prefix_del(pr);
1197	}
1198
1199	/* cancel default outgoing interface setting */
1200	if (V_nd6_defifindex == ifp->if_index)
1201		nd6_setdefaultiface(0);
1202
1203	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
1204		/* Refresh default router list. */
1205		defrouter_select_fib(ifp->if_fib);
1206	}
1207}
1208
1209/*
1210 * the caller acquires and releases the lock on the lltbls
1211 * Returns the llentry locked
1212 */
1213struct llentry *
1214nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
1215{
1216	struct sockaddr_in6 sin6;
1217	struct llentry *ln;
1218
1219	bzero(&sin6, sizeof(sin6));
1220	sin6.sin6_len = sizeof(struct sockaddr_in6);
1221	sin6.sin6_family = AF_INET6;
1222	sin6.sin6_addr = *addr6;
1223
1224	IF_AFDATA_LOCK_ASSERT(ifp);
1225
1226	ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
1227
1228	return (ln);
1229}
1230
1231struct llentry *
1232nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
1233{
1234	struct sockaddr_in6 sin6;
1235	struct llentry *ln;
1236
1237	bzero(&sin6, sizeof(sin6));
1238	sin6.sin6_len = sizeof(struct sockaddr_in6);
1239	sin6.sin6_family = AF_INET6;
1240	sin6.sin6_addr = *addr6;
1241
1242	ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6);
1243	if (ln != NULL)
1244		ln->ln_state = ND6_LLINFO_NOSTATE;
1245
1246	return (ln);
1247}
1248
1249/*
1250 * Test whether a given IPv6 address is a neighbor or not, ignoring
1251 * the actual neighbor cache.  The neighbor cache is ignored in order
1252 * to not reenter the routing code from within itself.
1253 */
1254static int
1255nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
1256{
1257	struct nd_prefix *pr;
1258	struct ifaddr *ifa;
1259	struct rt_addrinfo info;
1260	struct sockaddr_in6 rt_key;
1261	const struct sockaddr *dst6;
1262	uint64_t genid;
1263	int error, fibnum;
1264
1265	/*
1266	 * A link-local address is always a neighbor.
1267	 * XXX: a link does not necessarily specify a single interface.
1268	 */
1269	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
1270		struct sockaddr_in6 sin6_copy;
1271		u_int32_t zone;
1272
1273		/*
1274		 * We need sin6_copy since sa6_recoverscope() may modify the
1275		 * content (XXX).
1276		 */
1277		sin6_copy = *addr;
1278		if (sa6_recoverscope(&sin6_copy))
1279			return (0); /* XXX: should be impossible */
1280		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
1281			return (0);
1282		if (sin6_copy.sin6_scope_id == zone)
1283			return (1);
1284		else
1285			return (0);
1286	}
1287
1288	bzero(&rt_key, sizeof(rt_key));
1289	bzero(&info, sizeof(info));
1290	info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
1291
1292	/*
1293	 * If the address matches one of our addresses,
1294	 * it should be a neighbor.
1295	 * If the address matches one of our on-link prefixes, it should be a
1296	 * neighbor.
1297	 */
1298	ND6_RLOCK();
1299restart:
1300	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1301		if (pr->ndpr_ifp != ifp)
1302			continue;
1303
1304		if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1305			dst6 = (const struct sockaddr *)&pr->ndpr_prefix;
1306
1307			/*
1308			 * We only need to check all FIBs if add_addr_allfibs
1309			 * is unset. If set, checking any FIB will suffice.
1310			 */
1311			fibnum = V_rt_add_addr_allfibs ? rt_numfibs - 1 : 0;
1312			for (; fibnum < rt_numfibs; fibnum++) {
1313				genid = V_nd6_list_genid;
1314				ND6_RUNLOCK();
1315
1316				/*
1317				 * Restore length field before
1318				 * retrying lookup
1319				 */
1320				rt_key.sin6_len = sizeof(rt_key);
1321				error = rib_lookup_info(fibnum, dst6, 0, 0,
1322						        &info);
1323
1324				ND6_RLOCK();
1325				if (genid != V_nd6_list_genid)
1326					goto restart;
1327				if (error == 0)
1328					break;
1329			}
1330			if (error != 0)
1331				continue;
1332
1333			/*
1334			 * This is the case where multiple interfaces
1335			 * have the same prefix, but only one is installed
1336			 * into the routing table and that prefix entry
1337			 * is not the one being examined here. In the case
1338			 * where RADIX_MPATH is enabled, multiple route
1339			 * entries (of the same rt_key value) will be
1340			 * installed because the interface addresses all
1341			 * differ.
1342			 */
1343			if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1344			    &rt_key.sin6_addr))
1345				continue;
1346		}
1347
1348		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1349		    &addr->sin6_addr, &pr->ndpr_mask)) {
1350			ND6_RUNLOCK();
1351			return (1);
1352		}
1353	}
1354	ND6_RUNLOCK();
1355
1356	/*
1357	 * If the address is assigned on the node of the other side of
1358	 * a p2p interface, the address should be a neighbor.
1359	 */
1360	if (ifp->if_flags & IFF_POINTOPOINT) {
1361		IF_ADDR_RLOCK(ifp);
1362		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1363			if (ifa->ifa_addr->sa_family != addr->sin6_family)
1364				continue;
1365			if (ifa->ifa_dstaddr != NULL &&
1366			    sa_equal(addr, ifa->ifa_dstaddr)) {
1367				IF_ADDR_RUNLOCK(ifp);
1368				return 1;
1369			}
1370		}
1371		IF_ADDR_RUNLOCK(ifp);
1372	}
1373
1374	/*
1375	 * If the default router list is empty, all addresses are regarded
1376	 * as on-link, and thus, as a neighbor.
1377	 */
1378	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
1379	    TAILQ_EMPTY(&V_nd_defrouter) &&
1380	    V_nd6_defifindex == ifp->if_index) {
1381		return (1);
1382	}
1383
1384	return (0);
1385}
1386
1387
1388/*
1389 * Detect if a given IPv6 address identifies a neighbor on a given link.
1390 * XXX: should take care of the destination of a p2p link?
1391 */
1392int
1393nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
1394{
1395	struct llentry *lle;
1396	int rc = 0;
1397
1398	IF_AFDATA_UNLOCK_ASSERT(ifp);
1399	if (nd6_is_new_addr_neighbor(addr, ifp))
1400		return (1);
1401
1402	/*
1403	 * Even if the address matches none of our addresses, it might be
1404	 * in the neighbor cache.
1405	 */
1406	IF_AFDATA_RLOCK(ifp);
1407	if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
1408		LLE_RUNLOCK(lle);
1409		rc = 1;
1410	}
1411	IF_AFDATA_RUNLOCK(ifp);
1412	return (rc);
1413}
1414
1415/*
1416 * Free an nd6 llinfo entry.
1417 * Since the function would cause significant changes in the kernel, DO NOT
1418 * make it global, unless you have a strong reason for the change, and are sure
1419 * that the change is safe.
1420 *
1421 * Set noinline to be dtrace-friendly
1422 */
1423static __noinline void
1424nd6_free(struct llentry **lnp, int gc)
1425{
1426	struct ifnet *ifp;
1427	struct llentry *ln;
1428	struct nd_defrouter *dr;
1429
1430	ln = *lnp;
1431	*lnp = NULL;
1432
1433	LLE_WLOCK_ASSERT(ln);
1434	ND6_RLOCK_ASSERT();
1435
1436	ifp = lltable_get_ifp(ln->lle_tbl);
1437	if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
1438		dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp);
1439	else
1440		dr = NULL;
1441	ND6_RUNLOCK();
1442
1443	if ((ln->la_flags & LLE_DELETED) == 0)
1444		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
1445
1446	/*
1447	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
1448	 * even though it is not harmful, it was not really necessary.
1449	 */
1450
1451	/* cancel timer */
1452	nd6_llinfo_settimer_locked(ln, -1);
1453
1454	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
1455		if (dr != NULL && dr->expire &&
1456		    ln->ln_state == ND6_LLINFO_STALE && gc) {
1457			/*
1458			 * If the reason for the deletion is just garbage
1459			 * collection, and the neighbor is an active default
1460			 * router, do not delete it.  Instead, reset the GC
1461			 * timer using the router's lifetime.
1462			 * Simply deleting the entry would affect default
1463			 * router selection, which is not necessarily a good
1464			 * thing, especially when we're using router preference
1465			 * values.
1466			 * XXX: the check for ln_state would be redundant,
1467			 *      but we intentionally keep it just in case.
1468			 */
1469			if (dr->expire > time_uptime)
1470				nd6_llinfo_settimer_locked(ln,
1471				    (dr->expire - time_uptime) * hz);
1472			else
1473				nd6_llinfo_settimer_locked(ln,
1474				    (long)V_nd6_gctimer * hz);
1475
1476			LLE_REMREF(ln);
1477			LLE_WUNLOCK(ln);
1478			defrouter_rele(dr);
1479			return;
1480		}
1481
1482		if (dr) {
1483			/*
1484			 * Unreachablity of a router might affect the default
1485			 * router selection and on-link detection of advertised
1486			 * prefixes.
1487			 */
1488
1489			/*
1490			 * Temporarily fake the state to choose a new default
1491			 * router and to perform on-link determination of
1492			 * prefixes correctly.
1493			 * Below the state will be set correctly,
1494			 * or the entry itself will be deleted.
1495			 */
1496			ln->ln_state = ND6_LLINFO_INCOMPLETE;
1497		}
1498
1499		if (ln->ln_router || dr) {
1500
1501			/*
1502			 * We need to unlock to avoid a LOR with rt6_flush() with the
1503			 * rnh and for the calls to pfxlist_onlink_check() and
1504			 * defrouter_select_fib() in the block further down for calls
1505			 * into nd6_lookup().  We still hold a ref.
1506			 */
1507			LLE_WUNLOCK(ln);
1508
1509			/*
1510			 * rt6_flush must be called whether or not the neighbor
1511			 * is in the Default Router List.
1512			 * See a corresponding comment in nd6_na_input().
1513			 */
1514			rt6_flush(&ln->r_l3addr.addr6, ifp);
1515		}
1516
1517		if (dr) {
1518			/*
1519			 * Since defrouter_select_fib() does not affect the
1520			 * on-link determination and MIP6 needs the check
1521			 * before the default router selection, we perform
1522			 * the check now.
1523			 */
1524			pfxlist_onlink_check();
1525
1526			/*
1527			 * Refresh default router list.
1528			 */
1529			defrouter_select_fib(dr->ifp->if_fib);
1530		}
1531
1532		/*
1533		 * If this entry was added by an on-link redirect, remove the
1534		 * corresponding host route.
1535		 */
1536		if (ln->la_flags & LLE_REDIRECT)
1537			nd6_free_redirect(ln);
1538
1539		if (ln->ln_router || dr)
1540			LLE_WLOCK(ln);
1541	}
1542
1543	/*
1544	 * Save to unlock. We still hold an extra reference and will not
1545	 * free(9) in llentry_free() if someone else holds one as well.
1546	 */
1547	LLE_WUNLOCK(ln);
1548	IF_AFDATA_LOCK(ifp);
1549	LLE_WLOCK(ln);
1550	/* Guard against race with other llentry_free(). */
1551	if (ln->la_flags & LLE_LINKED) {
1552		/* Remove callout reference */
1553		LLE_REMREF(ln);
1554		lltable_unlink_entry(ln->lle_tbl, ln);
1555	}
1556	IF_AFDATA_UNLOCK(ifp);
1557
1558	llentry_free(ln);
1559	if (dr != NULL)
1560		defrouter_rele(dr);
1561}
1562
1563static int
1564nd6_isdynrte(const struct rtentry *rt, void *xap)
1565{
1566
1567	if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC))
1568		return (1);
1569
1570	return (0);
1571}
1572/*
1573 * Remove the rtentry for the given llentry,
1574 * both of which were installed by a redirect.
1575 */
1576static void
1577nd6_free_redirect(const struct llentry *ln)
1578{
1579	int fibnum;
1580	struct sockaddr_in6 sin6;
1581	struct rt_addrinfo info;
1582
1583	lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
1584	memset(&info, 0, sizeof(info));
1585	info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6;
1586	info.rti_filter = nd6_isdynrte;
1587
1588	for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
1589		rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
1590}
1591
1592/*
1593 * Rejuvenate this function for routing operations related
1594 * processing.
1595 */
1596void
1597nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
1598{
1599	struct sockaddr_in6 *gateway;
1600	struct nd_defrouter *dr;
1601	struct ifnet *ifp;
1602
1603	gateway = (struct sockaddr_in6 *)rt->rt_gateway;
1604	ifp = rt->rt_ifp;
1605
1606	switch (req) {
1607	case RTM_ADD:
1608		break;
1609
1610	case RTM_DELETE:
1611		if (!ifp)
1612			return;
1613		/*
1614		 * Only indirect routes are interesting.
1615		 */
1616		if ((rt->rt_flags & RTF_GATEWAY) == 0)
1617			return;
1618		/*
1619		 * check for default route
1620		 */
1621		if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
1622		    &SIN6(rt_key(rt))->sin6_addr)) {
1623			dr = defrouter_lookup(&gateway->sin6_addr, ifp);
1624			if (dr != NULL) {
1625				dr->installed = 0;
1626				defrouter_rele(dr);
1627			}
1628		}
1629		break;
1630	}
1631}
1632
1633
1634int
1635nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
1636{
1637	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1638	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
1639	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
1640	int error = 0;
1641
1642	if (ifp->if_afdata[AF_INET6] == NULL)
1643		return (EPFNOSUPPORT);
1644	switch (cmd) {
1645	case OSIOCGIFINFO_IN6:
1646#define ND	ndi->ndi
1647		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
1648		bzero(&ND, sizeof(ND));
1649		ND.linkmtu = IN6_LINKMTU(ifp);
1650		ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
1651		ND.basereachable = ND_IFINFO(ifp)->basereachable;
1652		ND.reachable = ND_IFINFO(ifp)->reachable;
1653		ND.retrans = ND_IFINFO(ifp)->retrans;
1654		ND.flags = ND_IFINFO(ifp)->flags;
1655		ND.recalctm = ND_IFINFO(ifp)->recalctm;
1656		ND.chlim = ND_IFINFO(ifp)->chlim;
1657		break;
1658	case SIOCGIFINFO_IN6:
1659		ND = *ND_IFINFO(ifp);
1660		break;
1661	case SIOCSIFINFO_IN6:
1662		/*
1663		 * used to change host variables from userland.
1664		 * intended for a use on router to reflect RA configurations.
1665		 */
1666		/* 0 means 'unspecified' */
1667		if (ND.linkmtu != 0) {
1668			if (ND.linkmtu < IPV6_MMTU ||
1669			    ND.linkmtu > IN6_LINKMTU(ifp)) {
1670				error = EINVAL;
1671				break;
1672			}
1673			ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
1674		}
1675
1676		if (ND.basereachable != 0) {
1677			int obasereachable = ND_IFINFO(ifp)->basereachable;
1678
1679			ND_IFINFO(ifp)->basereachable = ND.basereachable;
1680			if (ND.basereachable != obasereachable)
1681				ND_IFINFO(ifp)->reachable =
1682				    ND_COMPUTE_RTIME(ND.basereachable);
1683		}
1684		if (ND.retrans != 0)
1685			ND_IFINFO(ifp)->retrans = ND.retrans;
1686		if (ND.chlim != 0)
1687			ND_IFINFO(ifp)->chlim = ND.chlim;
1688		/* FALLTHROUGH */
1689	case SIOCSIFINFO_FLAGS:
1690	{
1691		struct ifaddr *ifa;
1692		struct in6_ifaddr *ia;
1693
1694		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
1695		    !(ND.flags & ND6_IFF_IFDISABLED)) {
1696			/* ifdisabled 1->0 transision */
1697
1698			/*
1699			 * If the interface is marked as ND6_IFF_IFDISABLED and
1700			 * has an link-local address with IN6_IFF_DUPLICATED,
1701			 * do not clear ND6_IFF_IFDISABLED.
1702			 * See RFC 4862, Section 5.4.5.
1703			 */
1704			IF_ADDR_RLOCK(ifp);
1705			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1706				if (ifa->ifa_addr->sa_family != AF_INET6)
1707					continue;
1708				ia = (struct in6_ifaddr *)ifa;
1709				if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
1710				    IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
1711					break;
1712			}
1713			IF_ADDR_RUNLOCK(ifp);
1714
1715			if (ifa != NULL) {
1716				/* LLA is duplicated. */
1717				ND.flags |= ND6_IFF_IFDISABLED;
1718				log(LOG_ERR, "Cannot enable an interface"
1719				    " with a link-local address marked"
1720				    " duplicate.\n");
1721			} else {
1722				ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED;
1723				if (ifp->if_flags & IFF_UP)
1724					in6_if_up(ifp);
1725			}
1726		} else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
1727			    (ND.flags & ND6_IFF_IFDISABLED)) {
1728			/* ifdisabled 0->1 transision */
1729			/* Mark all IPv6 address as tentative. */
1730
1731			ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
1732			if (V_ip6_dad_count > 0 &&
1733			    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
1734				IF_ADDR_RLOCK(ifp);
1735				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
1736				    ifa_link) {
1737					if (ifa->ifa_addr->sa_family !=
1738					    AF_INET6)
1739						continue;
1740					ia = (struct in6_ifaddr *)ifa;
1741					ia->ia6_flags |= IN6_IFF_TENTATIVE;
1742				}
1743				IF_ADDR_RUNLOCK(ifp);
1744			}
1745		}
1746
1747		if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
1748			if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) {
1749				/* auto_linklocal 0->1 transision */
1750
1751				/* If no link-local address on ifp, configure */
1752				ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
1753				in6_ifattach(ifp, NULL);
1754			} else if (!(ND.flags & ND6_IFF_IFDISABLED) &&
1755			    ifp->if_flags & IFF_UP) {
1756				/*
1757				 * When the IF already has
1758				 * ND6_IFF_AUTO_LINKLOCAL, no link-local
1759				 * address is assigned, and IFF_UP, try to
1760				 * assign one.
1761				 */
1762				IF_ADDR_RLOCK(ifp);
1763				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
1764				    ifa_link) {
1765					if (ifa->ifa_addr->sa_family !=
1766					    AF_INET6)
1767						continue;
1768					ia = (struct in6_ifaddr *)ifa;
1769					if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
1770						break;
1771				}
1772				IF_ADDR_RUNLOCK(ifp);
1773				if (ifa != NULL)
1774					/* No LLA is configured. */
1775					in6_ifattach(ifp, NULL);
1776			}
1777		}
1778	}
1779		ND_IFINFO(ifp)->flags = ND.flags;
1780		break;
1781#undef ND
1782	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
1783		/* sync kernel routing table with the default router list */
1784		defrouter_reset();
1785		defrouter_select();
1786		break;
1787	case SIOCSPFXFLUSH_IN6:
1788	{
1789		/* flush all the prefix advertised by routers */
1790		struct in6_ifaddr *ia, *ia_next;
1791		struct nd_prefix *pr, *next;
1792		struct nd_prhead prl;
1793
1794		LIST_INIT(&prl);
1795
1796		ND6_WLOCK();
1797		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
1798			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
1799				continue; /* XXX */
1800			nd6_prefix_unlink(pr, &prl);
1801		}
1802		ND6_WUNLOCK();
1803
1804		while ((pr = LIST_FIRST(&prl)) != NULL) {
1805			LIST_REMOVE(pr, ndpr_entry);
1806			/* XXXRW: in6_ifaddrhead locking. */
1807			TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
1808			    ia_next) {
1809				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1810					continue;
1811
1812				if (ia->ia6_ndpr == pr)
1813					in6_purgeaddr(&ia->ia_ifa);
1814			}
1815			nd6_prefix_del(pr);
1816		}
1817		break;
1818	}
1819	case SIOCSRTRFLUSH_IN6:
1820	{
1821		/* flush all the default routers */
1822		struct nd_drhead drq;
1823		struct nd_defrouter *dr;
1824
1825		TAILQ_INIT(&drq);
1826
1827		defrouter_reset();
1828
1829		ND6_WLOCK();
1830		while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL)
1831			defrouter_unlink(dr, &drq);
1832		ND6_WUNLOCK();
1833		while ((dr = TAILQ_FIRST(&drq)) != NULL) {
1834			TAILQ_REMOVE(&drq, dr, dr_entry);
1835			defrouter_del(dr);
1836		}
1837
1838		defrouter_select();
1839		break;
1840	}
1841	case SIOCGNBRINFO_IN6:
1842	{
1843		struct llentry *ln;
1844		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
1845
1846		if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
1847			return (error);
1848
1849		IF_AFDATA_RLOCK(ifp);
1850		ln = nd6_lookup(&nb_addr, 0, ifp);
1851		IF_AFDATA_RUNLOCK(ifp);
1852
1853		if (ln == NULL) {
1854			error = EINVAL;
1855			break;
1856		}
1857		nbi->state = ln->ln_state;
1858		nbi->asked = ln->la_asked;
1859		nbi->isrouter = ln->ln_router;
1860		if (ln->la_expire == 0)
1861			nbi->expire = 0;
1862		else
1863			nbi->expire = ln->la_expire + ln->lle_remtime / hz +
1864			    (time_second - time_uptime);
1865		LLE_RUNLOCK(ln);
1866		break;
1867	}
1868	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1869		ndif->ifindex = V_nd6_defifindex;
1870		break;
1871	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
1872		return (nd6_setdefaultiface(ndif->ifindex));
1873	}
1874	return (error);
1875}
1876
1877/*
1878 * Calculates new isRouter value based on provided parameters and
1879 * returns it.
1880 */
1881static int
1882nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr,
1883    int ln_router)
1884{
1885
1886	/*
1887	 * ICMP6 type dependent behavior.
1888	 *
1889	 * NS: clear IsRouter if new entry
1890	 * RS: clear IsRouter
1891	 * RA: set IsRouter if there's lladdr
1892	 * redir: clear IsRouter if new entry
1893	 *
1894	 * RA case, (1):
1895	 * The spec says that we must set IsRouter in the following cases:
1896	 * - If lladdr exist, set IsRouter.  This means (1-5).
1897	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
1898	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
1899	 * A quetion arises for (1) case.  (1) case has no lladdr in the
1900	 * neighbor cache, this is similar to (6).
1901	 * This case is rare but we figured that we MUST NOT set IsRouter.
1902	 *
1903	 *   is_new  old_addr new_addr 	    NS  RS  RA	redir
1904	 *							D R
1905	 *	0	n	n	(1)	c   ?     s
1906	 *	0	y	n	(2)	c   s     s
1907	 *	0	n	y	(3)	c   s     s
1908	 *	0	y	y	(4)	c   s     s
1909	 *	0	y	y	(5)	c   s     s
1910	 *	1	--	n	(6) c	c	c s
1911	 *	1	--	y	(7) c	c   s	c s
1912	 *
1913	 *					(c=clear s=set)
1914	 */
1915	switch (type & 0xff) {
1916	case ND_NEIGHBOR_SOLICIT:
1917		/*
1918		 * New entry must have is_router flag cleared.
1919		 */
1920		if (is_new)					/* (6-7) */
1921			ln_router = 0;
1922		break;
1923	case ND_REDIRECT:
1924		/*
1925		 * If the icmp is a redirect to a better router, always set the
1926		 * is_router flag.  Otherwise, if the entry is newly created,
1927		 * clear the flag.  [RFC 2461, sec 8.3]
1928		 */
1929		if (code == ND_REDIRECT_ROUTER)
1930			ln_router = 1;
1931		else {
1932			if (is_new)				/* (6-7) */
1933				ln_router = 0;
1934		}
1935		break;
1936	case ND_ROUTER_SOLICIT:
1937		/*
1938		 * is_router flag must always be cleared.
1939		 */
1940		ln_router = 0;
1941		break;
1942	case ND_ROUTER_ADVERT:
1943		/*
1944		 * Mark an entry with lladdr as a router.
1945		 */
1946		if ((!is_new && (old_addr || new_addr)) ||	/* (2-5) */
1947		    (is_new && new_addr)) {			/* (7) */
1948			ln_router = 1;
1949		}
1950		break;
1951	}
1952
1953	return (ln_router);
1954}
1955
1956/*
1957 * Create neighbor cache entry and cache link-layer address,
1958 * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
1959 *
1960 * type - ICMP6 type
1961 * code - type dependent information
1962 *
1963 */
1964void
1965nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
1966    int lladdrlen, int type, int code)
1967{
1968	struct llentry *ln = NULL, *ln_tmp;
1969	int is_newentry;
1970	int do_update;
1971	int olladdr;
1972	int llchange;
1973	int flags;
1974	uint16_t router = 0;
1975	struct sockaddr_in6 sin6;
1976	struct mbuf *chain = NULL;
1977	u_char linkhdr[LLE_MAX_LINKHDR];
1978	size_t linkhdrsize;
1979	int lladdr_off;
1980
1981	IF_AFDATA_UNLOCK_ASSERT(ifp);
1982
1983	KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__));
1984	KASSERT(from != NULL, ("%s: from == NULL", __func__));
1985
1986	/* nothing must be updated for unspecified address */
1987	if (IN6_IS_ADDR_UNSPECIFIED(from))
1988		return;
1989
1990	/*
1991	 * Validation about ifp->if_addrlen and lladdrlen must be done in
1992	 * the caller.
1993	 *
1994	 * XXX If the link does not have link-layer adderss, what should
1995	 * we do? (ifp->if_addrlen == 0)
1996	 * Spec says nothing in sections for RA, RS and NA.  There's small
1997	 * description on it in NS section (RFC 2461 7.2.3).
1998	 */
1999	flags = lladdr ? LLE_EXCLUSIVE : 0;
2000	IF_AFDATA_RLOCK(ifp);
2001	ln = nd6_lookup(from, flags, ifp);
2002	IF_AFDATA_RUNLOCK(ifp);
2003	is_newentry = 0;
2004	if (ln == NULL) {
2005		flags |= LLE_EXCLUSIVE;
2006		ln = nd6_alloc(from, 0, ifp);
2007		if (ln == NULL)
2008			return;
2009
2010		/*
2011		 * Since we already know all the data for the new entry,
2012		 * fill it before insertion.
2013		 */
2014		if (lladdr != NULL) {
2015			linkhdrsize = sizeof(linkhdr);
2016			if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
2017			    linkhdr, &linkhdrsize, &lladdr_off) != 0)
2018				return;
2019			lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
2020			    lladdr_off);
2021		}
2022
2023		IF_AFDATA_WLOCK(ifp);
2024		LLE_WLOCK(ln);
2025		/* Prefer any existing lle over newly-created one */
2026		ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp);
2027		if (ln_tmp == NULL)
2028			lltable_link_entry(LLTABLE6(ifp), ln);
2029		IF_AFDATA_WUNLOCK(ifp);
2030		if (ln_tmp == NULL) {
2031			/* No existing lle, mark as new entry (6,7) */
2032			is_newentry = 1;
2033			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
2034			if (lladdr != NULL)	/* (7) */
2035				EVENTHANDLER_INVOKE(lle_event, ln,
2036				    LLENTRY_RESOLVED);
2037		} else {
2038			lltable_free_entry(LLTABLE6(ifp), ln);
2039			ln = ln_tmp;
2040			ln_tmp = NULL;
2041		}
2042	}
2043	/* do nothing if static ndp is set */
2044	if ((ln->la_flags & LLE_STATIC)) {
2045		if (flags & LLE_EXCLUSIVE)
2046			LLE_WUNLOCK(ln);
2047		else
2048			LLE_RUNLOCK(ln);
2049		return;
2050	}
2051
2052	olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
2053	if (olladdr && lladdr) {
2054		llchange = bcmp(lladdr, ln->ll_addr,
2055		    ifp->if_addrlen);
2056	} else if (!olladdr && lladdr)
2057		llchange = 1;
2058	else
2059		llchange = 0;
2060
2061	/*
2062	 * newentry olladdr  lladdr  llchange	(*=record)
2063	 *	0	n	n	--	(1)
2064	 *	0	y	n	--	(2)
2065	 *	0	n	y	y	(3) * STALE
2066	 *	0	y	y	n	(4) *
2067	 *	0	y	y	y	(5) * STALE
2068	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
2069	 *	1	--	y	--	(7) * STALE
2070	 */
2071
2072	do_update = 0;
2073	if (is_newentry == 0 && llchange != 0) {
2074		do_update = 1;	/* (3,5) */
2075
2076		/*
2077		 * Record source link-layer address
2078		 * XXX is it dependent to ifp->if_type?
2079		 */
2080		linkhdrsize = sizeof(linkhdr);
2081		if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
2082		    linkhdr, &linkhdrsize, &lladdr_off) != 0)
2083			return;
2084
2085		if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
2086		    lladdr_off) == 0) {
2087			/* Entry was deleted */
2088			return;
2089		}
2090
2091		nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
2092
2093		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
2094
2095		if (ln->la_hold != NULL)
2096			nd6_grab_holdchain(ln, &chain, &sin6);
2097	}
2098
2099	/* Calculates new router status */
2100	router = nd6_is_router(type, code, is_newentry, olladdr,
2101	    lladdr != NULL ? 1 : 0, ln->ln_router);
2102
2103	ln->ln_router = router;
2104	/* Mark non-router redirects with special flag */
2105	if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER)
2106		ln->la_flags |= LLE_REDIRECT;
2107
2108	if (flags & LLE_EXCLUSIVE)
2109		LLE_WUNLOCK(ln);
2110	else
2111		LLE_RUNLOCK(ln);
2112
2113	if (chain != NULL)
2114		nd6_flush_holdchain(ifp, ifp, chain, &sin6);
2115
2116	/*
2117	 * When the link-layer address of a router changes, select the
2118	 * best router again.  In particular, when the neighbor entry is newly
2119	 * created, it might affect the selection policy.
2120	 * Question: can we restrict the first condition to the "is_newentry"
2121	 * case?
2122	 * XXX: when we hear an RA from a new router with the link-layer
2123	 * address option, defrouter_select_fib() is called twice, since
2124	 * defrtrlist_update called the function as well.  However, I believe
2125	 * we can compromise the overhead, since it only happens the first
2126	 * time.
2127	 * XXX: although defrouter_select_fib() should not have a bad effect
2128	 * for those are not autoconfigured hosts, we explicitly avoid such
2129	 * cases for safety.
2130	 */
2131	if ((do_update || is_newentry) && router &&
2132	    ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
2133		/*
2134		 * guaranteed recursion
2135		 */
2136		defrouter_select_fib(ifp->if_fib);
2137	}
2138}
2139
2140static void
2141nd6_slowtimo(void *arg)
2142{
2143	CURVNET_SET((struct vnet *) arg);
2144	struct nd_ifinfo *nd6if;
2145	struct ifnet *ifp;
2146
2147	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
2148	    nd6_slowtimo, curvnet);
2149	IFNET_RLOCK_NOSLEEP();
2150	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2151		if (ifp->if_afdata[AF_INET6] == NULL)
2152			continue;
2153		nd6if = ND_IFINFO(ifp);
2154		if (nd6if->basereachable && /* already initialized */
2155		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
2156			/*
2157			 * Since reachable time rarely changes by router
2158			 * advertisements, we SHOULD insure that a new random
2159			 * value gets recomputed at least once every few hours.
2160			 * (RFC 2461, 6.3.4)
2161			 */
2162			nd6if->recalctm = V_nd6_recalc_reachtm_interval;
2163			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
2164		}
2165	}
2166	IFNET_RUNLOCK_NOSLEEP();
2167	CURVNET_RESTORE();
2168}
2169
2170void
2171nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
2172    struct sockaddr_in6 *sin6)
2173{
2174
2175	LLE_WLOCK_ASSERT(ln);
2176
2177	*chain = ln->la_hold;
2178	ln->la_hold = NULL;
2179	lltable_fill_sa_entry(ln, (struct sockaddr *)sin6);
2180
2181	if (ln->ln_state == ND6_LLINFO_STALE) {
2182
2183		/*
2184		 * The first time we send a packet to a
2185		 * neighbor whose entry is STALE, we have
2186		 * to change the state to DELAY and a sets
2187		 * a timer to expire in DELAY_FIRST_PROBE_TIME
2188		 * seconds to ensure do neighbor unreachability
2189		 * detection on expiration.
2190		 * (RFC 2461 7.3.3)
2191		 */
2192		nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY);
2193	}
2194}
2195
2196int
2197nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
2198    struct sockaddr_in6 *dst, struct route *ro)
2199{
2200	int error;
2201	int ip6len;
2202	struct ip6_hdr *ip6;
2203	struct m_tag *mtag;
2204
2205#ifdef MAC
2206	mac_netinet6_nd6_send(ifp, m);
2207#endif
2208
2209	/*
2210	 * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
2211	 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
2212	 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
2213	 * to be diverted to user space.  When re-injected into the kernel,
2214	 * send_output() will directly dispatch them to the outgoing interface.
2215	 */
2216	if (send_sendso_input_hook != NULL) {
2217		mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
2218		if (mtag != NULL) {
2219			ip6 = mtod(m, struct ip6_hdr *);
2220			ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
2221			/* Use the SEND socket */
2222			error = send_sendso_input_hook(m, ifp, SND_OUT,
2223			    ip6len);
2224			/* -1 == no app on SEND socket */
2225			if (error == 0 || error != -1)
2226			    return (error);
2227		}
2228	}
2229
2230	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
2231	IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
2232	    mtod(m, struct ip6_hdr *));
2233
2234	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
2235		origifp = ifp;
2236
2237	error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro);
2238	return (error);
2239}
2240
2241/*
2242 * Lookup link headerfor @sa_dst address. Stores found
2243 * data in @desten buffer. Copy of lle ln_flags can be also
2244 * saved in @pflags if @pflags is non-NULL.
2245 *
2246 * If destination LLE does not exists or lle state modification
2247 * is required, call "slow" version.
2248 *
2249 * Return values:
2250 * - 0 on success (address copied to buffer).
2251 * - EWOULDBLOCK (no local error, but address is still unresolved)
2252 * - other errors (alloc failure, etc)
2253 */
2254int
2255nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
2256    const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
2257    struct llentry **plle)
2258{
2259	struct llentry *ln = NULL;
2260	const struct sockaddr_in6 *dst6;
2261
2262	if (pflags != NULL)
2263		*pflags = 0;
2264
2265	dst6 = (const struct sockaddr_in6 *)sa_dst;
2266
2267	/* discard the packet if IPv6 operation is disabled on the interface */
2268	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
2269		m_freem(m);
2270		return (ENETDOWN); /* better error? */
2271	}
2272
2273	if (m != NULL && m->m_flags & M_MCAST) {
2274		switch (ifp->if_type) {
2275		case IFT_ETHER:
2276		case IFT_FDDI:
2277		case IFT_L2VLAN:
2278		case IFT_IEEE80211:
2279		case IFT_BRIDGE:
2280		case IFT_ISO88025:
2281			ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
2282						 desten);
2283			return (0);
2284		default:
2285			m_freem(m);
2286			return (EAFNOSUPPORT);
2287		}
2288	}
2289
2290	IF_AFDATA_RLOCK(ifp);
2291	ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED,
2292	    ifp);
2293	if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
2294		/* Entry found, let's copy lle info */
2295		bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
2296		if (pflags != NULL)
2297			*pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
2298		/* Check if we have feedback request from nd6 timer */
2299		if (ln->r_skip_req != 0) {
2300			LLE_REQ_LOCK(ln);
2301			ln->r_skip_req = 0; /* Notify that entry was used */
2302			ln->lle_hittime = time_uptime;
2303			LLE_REQ_UNLOCK(ln);
2304		}
2305		if (plle) {
2306			LLE_ADDREF(ln);
2307			*plle = ln;
2308			LLE_WUNLOCK(ln);
2309		}
2310		IF_AFDATA_RUNLOCK(ifp);
2311		return (0);
2312	} else if (plle && ln)
2313		LLE_WUNLOCK(ln);
2314	IF_AFDATA_RUNLOCK(ifp);
2315
2316	return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle));
2317}
2318
2319
2320/*
2321 * Do L2 address resolution for @sa_dst address. Stores found
2322 * address in @desten buffer. Copy of lle ln_flags can be also
2323 * saved in @pflags if @pflags is non-NULL.
2324 *
2325 * Heavy version.
2326 * Function assume that destination LLE does not exist,
2327 * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired.
2328 *
2329 * Set noinline to be dtrace-friendly
2330 */
2331static __noinline int
2332nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
2333    const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags,
2334    struct llentry **plle)
2335{
2336	struct llentry *lle = NULL, *lle_tmp;
2337	struct in6_addr *psrc, src;
2338	int send_ns, ll_len;
2339	char *lladdr;
2340
2341	/*
2342	 * Address resolution or Neighbor Unreachability Detection
2343	 * for the next hop.
2344	 * At this point, the destination of the packet must be a unicast
2345	 * or an anycast address(i.e. not a multicast).
2346	 */
2347	if (lle == NULL) {
2348		IF_AFDATA_RLOCK(ifp);
2349		lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
2350		IF_AFDATA_RUNLOCK(ifp);
2351		if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
2352			/*
2353			 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
2354			 * the condition below is not very efficient.  But we believe
2355			 * it is tolerable, because this should be a rare case.
2356			 */
2357			lle = nd6_alloc(&dst->sin6_addr, 0, ifp);
2358			if (lle == NULL) {
2359				char ip6buf[INET6_ADDRSTRLEN];
2360				log(LOG_DEBUG,
2361				    "nd6_output: can't allocate llinfo for %s "
2362				    "(ln=%p)\n",
2363				    ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
2364				m_freem(m);
2365				return (ENOBUFS);
2366			}
2367
2368			IF_AFDATA_WLOCK(ifp);
2369			LLE_WLOCK(lle);
2370			/* Prefer any existing entry over newly-created one */
2371			lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
2372			if (lle_tmp == NULL)
2373				lltable_link_entry(LLTABLE6(ifp), lle);
2374			IF_AFDATA_WUNLOCK(ifp);
2375			if (lle_tmp != NULL) {
2376				lltable_free_entry(LLTABLE6(ifp), lle);
2377				lle = lle_tmp;
2378				lle_tmp = NULL;
2379			}
2380		}
2381	}
2382	if (lle == NULL) {
2383		if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
2384			m_freem(m);
2385			return (ENOBUFS);
2386		}
2387
2388		if (m != NULL)
2389			m_freem(m);
2390		return (ENOBUFS);
2391	}
2392
2393	LLE_WLOCK_ASSERT(lle);
2394
2395	/*
2396	 * The first time we send a packet to a neighbor whose entry is
2397	 * STALE, we have to change the state to DELAY and a sets a timer to
2398	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
2399	 * neighbor unreachability detection on expiration.
2400	 * (RFC 2461 7.3.3)
2401	 */
2402	if (lle->ln_state == ND6_LLINFO_STALE)
2403		nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
2404
2405	/*
2406	 * If the neighbor cache entry has a state other than INCOMPLETE
2407	 * (i.e. its link-layer address is already resolved), just
2408	 * send the packet.
2409	 */
2410	if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
2411		if (flags & LLE_ADDRONLY) {
2412			lladdr = lle->ll_addr;
2413			ll_len = ifp->if_addrlen;
2414		} else {
2415			lladdr = lle->r_linkdata;
2416			ll_len = lle->r_hdrlen;
2417		}
2418		bcopy(lladdr, desten, ll_len);
2419		if (pflags != NULL)
2420			*pflags = lle->la_flags;
2421		if (plle) {
2422			LLE_ADDREF(lle);
2423			*plle = lle;
2424		}
2425		LLE_WUNLOCK(lle);
2426		return (0);
2427	}
2428
2429	/*
2430	 * There is a neighbor cache entry, but no ethernet address
2431	 * response yet.  Append this latest packet to the end of the
2432	 * packet queue in the mbuf.  When it exceeds nd6_maxqueuelen,
2433	 * the oldest packet in the queue will be removed.
2434	 */
2435
2436	if (lle->la_hold != NULL) {
2437		struct mbuf *m_hold;
2438		int i;
2439
2440		i = 0;
2441		for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
2442			i++;
2443			if (m_hold->m_nextpkt == NULL) {
2444				m_hold->m_nextpkt = m;
2445				break;
2446			}
2447		}
2448		while (i >= V_nd6_maxqueuelen) {
2449			m_hold = lle->la_hold;
2450			lle->la_hold = lle->la_hold->m_nextpkt;
2451			m_freem(m_hold);
2452			i--;
2453		}
2454	} else {
2455		lle->la_hold = m;
2456	}
2457
2458	/*
2459	 * If there has been no NS for the neighbor after entering the
2460	 * INCOMPLETE state, send the first solicitation.
2461	 * Note that for newly-created lle la_asked will be 0,
2462	 * so we will transition from ND6_LLINFO_NOSTATE to
2463	 * ND6_LLINFO_INCOMPLETE state here.
2464	 */
2465	psrc = NULL;
2466	send_ns = 0;
2467	if (lle->la_asked == 0) {
2468		lle->la_asked++;
2469		send_ns = 1;
2470		psrc = nd6_llinfo_get_holdsrc(lle, &src);
2471
2472		nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE);
2473	}
2474	LLE_WUNLOCK(lle);
2475	if (send_ns != 0)
2476		nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL);
2477
2478	return (EWOULDBLOCK);
2479}
2480
2481/*
2482 * Do L2 address resolution for @sa_dst address. Stores found
2483 * address in @desten buffer. Copy of lle ln_flags can be also
2484 * saved in @pflags if @pflags is non-NULL.
2485 *
2486 * Return values:
2487 * - 0 on success (address copied to buffer).
2488 * - EWOULDBLOCK (no local error, but address is still unresolved)
2489 * - other errors (alloc failure, etc)
2490 */
2491int
2492nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
2493    char *desten, uint32_t *pflags)
2494{
2495	int error;
2496
2497	flags |= LLE_ADDRONLY;
2498	error = nd6_resolve_slow(ifp, flags, NULL,
2499	    (const struct sockaddr_in6 *)dst, desten, pflags, NULL);
2500	return (error);
2501}
2502
2503int
2504nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
2505    struct sockaddr_in6 *dst)
2506{
2507	struct mbuf *m, *m_head;
2508	struct ifnet *outifp;
2509	int error = 0;
2510
2511	m_head = chain;
2512	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
2513		outifp = origifp;
2514	else
2515		outifp = ifp;
2516
2517	while (m_head) {
2518		m = m_head;
2519		m_head = m_head->m_nextpkt;
2520		error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
2521	}
2522
2523	/*
2524	 * XXX
2525	 * note that intermediate errors are blindly ignored
2526	 */
2527	return (error);
2528}
2529
2530static int
2531nd6_need_cache(struct ifnet *ifp)
2532{
2533	/*
2534	 * XXX: we currently do not make neighbor cache on any interface
2535	 * other than ARCnet, Ethernet, FDDI and GIF.
2536	 *
2537	 * RFC2893 says:
2538	 * - unidirectional tunnels needs no ND
2539	 */
2540	switch (ifp->if_type) {
2541	case IFT_ARCNET:
2542	case IFT_ETHER:
2543	case IFT_FDDI:
2544	case IFT_IEEE1394:
2545	case IFT_L2VLAN:
2546	case IFT_IEEE80211:
2547	case IFT_INFINIBAND:
2548	case IFT_BRIDGE:
2549	case IFT_PROPVIRTUAL:
2550		return (1);
2551	default:
2552		return (0);
2553	}
2554}
2555
2556/*
2557 * Add pernament ND6 link-layer record for given
2558 * interface address.
2559 *
2560 * Very similar to IPv4 arp_ifinit(), but:
2561 * 1) IPv6 DAD is performed in different place
2562 * 2) It is called by IPv6 protocol stack in contrast to
2563 * arp_ifinit() which is typically called in SIOCSIFADDR
2564 * driver ioctl handler.
2565 *
2566 */
2567int
2568nd6_add_ifa_lle(struct in6_ifaddr *ia)
2569{
2570	struct ifnet *ifp;
2571	struct llentry *ln, *ln_tmp;
2572	struct sockaddr *dst;
2573
2574	ifp = ia->ia_ifa.ifa_ifp;
2575	if (nd6_need_cache(ifp) == 0)
2576		return (0);
2577
2578	ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
2579	dst = (struct sockaddr *)&ia->ia_addr;
2580	ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst);
2581	if (ln == NULL)
2582		return (ENOBUFS);
2583
2584	IF_AFDATA_WLOCK(ifp);
2585	LLE_WLOCK(ln);
2586	/* Unlink any entry if exists */
2587	ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst);
2588	if (ln_tmp != NULL)
2589		lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
2590	lltable_link_entry(LLTABLE6(ifp), ln);
2591	IF_AFDATA_WUNLOCK(ifp);
2592
2593	if (ln_tmp != NULL)
2594		EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED);
2595	EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
2596
2597	LLE_WUNLOCK(ln);
2598	if (ln_tmp != NULL)
2599		llentry_free(ln_tmp);
2600
2601	return (0);
2602}
2603
2604/*
2605 * Removes either all lle entries for given @ia, or lle
2606 * corresponding to @ia address.
2607 */
2608void
2609nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
2610{
2611	struct sockaddr_in6 mask, addr;
2612	struct sockaddr *saddr, *smask;
2613	struct ifnet *ifp;
2614
2615	ifp = ia->ia_ifa.ifa_ifp;
2616	memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
2617	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
2618	saddr = (struct sockaddr *)&addr;
2619	smask = (struct sockaddr *)&mask;
2620
2621	if (all != 0)
2622		lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC);
2623	else
2624		lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
2625}
2626
2627static void
2628clear_llinfo_pqueue(struct llentry *ln)
2629{
2630	struct mbuf *m_hold, *m_hold_next;
2631
2632	for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
2633		m_hold_next = m_hold->m_nextpkt;
2634		m_freem(m_hold);
2635	}
2636
2637	ln->la_hold = NULL;
2638}
2639
2640static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
2641static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
2642
2643SYSCTL_DECL(_net_inet6_icmp6);
2644SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
2645	CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
2646	NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter",
2647	"NDP default router list");
2648SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
2649	CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
2650	NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
2651	"NDP prefix list");
2652SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
2653	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
2654SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
2655	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
2656
2657static int
2658nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
2659{
2660	struct in6_defrouter d;
2661	struct nd_defrouter *dr;
2662	int error;
2663
2664	if (req->newptr != NULL)
2665		return (EPERM);
2666
2667	error = sysctl_wire_old_buffer(req, 0);
2668	if (error != 0)
2669		return (error);
2670
2671	bzero(&d, sizeof(d));
2672	d.rtaddr.sin6_family = AF_INET6;
2673	d.rtaddr.sin6_len = sizeof(d.rtaddr);
2674
2675	ND6_RLOCK();
2676	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
2677		d.rtaddr.sin6_addr = dr->rtaddr;
2678		error = sa6_recoverscope(&d.rtaddr);
2679		if (error != 0)
2680			break;
2681		d.flags = dr->raflags;
2682		d.rtlifetime = dr->rtlifetime;
2683		d.expire = dr->expire + (time_second - time_uptime);
2684		d.if_index = dr->ifp->if_index;
2685		error = SYSCTL_OUT(req, &d, sizeof(d));
2686		if (error != 0)
2687			break;
2688	}
2689	ND6_RUNLOCK();
2690	return (error);
2691}
2692
2693static int
2694nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
2695{
2696	struct in6_prefix p;
2697	struct sockaddr_in6 s6;
2698	struct nd_prefix *pr;
2699	struct nd_pfxrouter *pfr;
2700	time_t maxexpire;
2701	int error;
2702	char ip6buf[INET6_ADDRSTRLEN];
2703
2704	if (req->newptr)
2705		return (EPERM);
2706
2707	error = sysctl_wire_old_buffer(req, 0);
2708	if (error != 0)
2709		return (error);
2710
2711	bzero(&p, sizeof(p));
2712	p.origin = PR_ORIG_RA;
2713	bzero(&s6, sizeof(s6));
2714	s6.sin6_family = AF_INET6;
2715	s6.sin6_len = sizeof(s6);
2716
2717	ND6_RLOCK();
2718	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
2719		p.prefix = pr->ndpr_prefix;
2720		if (sa6_recoverscope(&p.prefix)) {
2721			log(LOG_ERR, "scope error in prefix list (%s)\n",
2722			    ip6_sprintf(ip6buf, &p.prefix.sin6_addr));
2723			/* XXX: press on... */
2724		}
2725		p.raflags = pr->ndpr_raf;
2726		p.prefixlen = pr->ndpr_plen;
2727		p.vltime = pr->ndpr_vltime;
2728		p.pltime = pr->ndpr_pltime;
2729		p.if_index = pr->ndpr_ifp->if_index;
2730		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
2731			p.expire = 0;
2732		else {
2733			/* XXX: we assume time_t is signed. */
2734			maxexpire = (-1) &
2735			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
2736			if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
2737				p.expire = pr->ndpr_lastupdate +
2738				    pr->ndpr_vltime +
2739				    (time_second - time_uptime);
2740			else
2741				p.expire = maxexpire;
2742		}
2743		p.refcnt = pr->ndpr_addrcnt;
2744		p.flags = pr->ndpr_stateflags;
2745		p.advrtrs = 0;
2746		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
2747			p.advrtrs++;
2748		error = SYSCTL_OUT(req, &p, sizeof(p));
2749		if (error != 0)
2750			break;
2751		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
2752			s6.sin6_addr = pfr->router->rtaddr;
2753			if (sa6_recoverscope(&s6))
2754				log(LOG_ERR,
2755				    "scope error in prefix list (%s)\n",
2756				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
2757			error = SYSCTL_OUT(req, &s6, sizeof(s6));
2758			if (error != 0)
2759				goto out;
2760		}
2761	}
2762out:
2763	ND6_RUNLOCK();
2764	return (error);
2765}
2766