in.c revision 228571
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (C) 2001 WIDE Project.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 *	@(#)in.c	8.4 (Berkeley) 1/9/95
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/netinet/in.c 228571 2011-12-16 12:16:56Z glebius $");
35
36#include "opt_mpath.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/sockio.h>
41#include <sys/malloc.h>
42#include <sys/priv.h>
43#include <sys/socket.h>
44#include <sys/jail.h>
45#include <sys/kernel.h>
46#include <sys/proc.h>
47#include <sys/sysctl.h>
48#include <sys/syslog.h>
49
50#include <net/if.h>
51#include <net/if_var.h>
52#include <net/if_arp.h>
53#include <net/if_dl.h>
54#include <net/if_llatbl.h>
55#include <net/if_types.h>
56#include <net/route.h>
57#include <net/vnet.h>
58
59#include <netinet/if_ether.h>
60#include <netinet/in.h>
61#include <netinet/in_var.h>
62#include <netinet/in_pcb.h>
63#include <netinet/ip_var.h>
64#include <netinet/ip_carp.h>
65#include <netinet/igmp_var.h>
66#include <netinet/udp.h>
67#include <netinet/udp_var.h>
68
69static int in_mask2len(struct in_addr *);
70static void in_len2mask(struct in_addr *, int);
71static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
72	struct ifnet *, struct thread *);
73
74static void	in_socktrim(struct sockaddr_in *);
75static int	in_ifinit(struct ifnet *, struct in_ifaddr *,
76		    struct sockaddr_in *, int, int, int);
77static void	in_purgemaddrs(struct ifnet *);
78
79static VNET_DEFINE(int, nosameprefix);
80#define	V_nosameprefix			VNET(nosameprefix)
81SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
82	&VNET_NAME(nosameprefix), 0,
83	"Refuse to create same prefixes on different interfaces");
84
85VNET_DECLARE(struct inpcbinfo, ripcbinfo);
86#define	V_ripcbinfo			VNET(ripcbinfo)
87
88VNET_DECLARE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
89#define	V_arpstat		VNET(arpstat)
90
91/*
92 * Return 1 if an internet address is for a ``local'' host
93 * (one to which we have a connection).
94 */
95int
96in_localaddr(struct in_addr in)
97{
98	register u_long i = ntohl(in.s_addr);
99	register struct in_ifaddr *ia;
100
101	IN_IFADDR_RLOCK();
102	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
103		if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
104			IN_IFADDR_RUNLOCK();
105			return (1);
106		}
107	}
108	IN_IFADDR_RUNLOCK();
109	return (0);
110}
111
112/*
113 * Return 1 if an internet address is for the local host and configured
114 * on one of its interfaces.
115 */
116int
117in_localip(struct in_addr in)
118{
119	struct in_ifaddr *ia;
120
121	IN_IFADDR_RLOCK();
122	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
123		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
124			IN_IFADDR_RUNLOCK();
125			return (1);
126		}
127	}
128	IN_IFADDR_RUNLOCK();
129	return (0);
130}
131
132/*
133 * Determine whether an IP address is in a reserved set of addresses
134 * that may not be forwarded, or whether datagrams to that destination
135 * may be forwarded.
136 */
137int
138in_canforward(struct in_addr in)
139{
140	register u_long i = ntohl(in.s_addr);
141	register u_long net;
142
143	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
144		return (0);
145	if (IN_CLASSA(i)) {
146		net = i & IN_CLASSA_NET;
147		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
148			return (0);
149	}
150	return (1);
151}
152
153/*
154 * Trim a mask in a sockaddr
155 */
156static void
157in_socktrim(struct sockaddr_in *ap)
158{
159    register char *cplim = (char *) &ap->sin_addr;
160    register char *cp = (char *) (&ap->sin_addr + 1);
161
162    ap->sin_len = 0;
163    while (--cp >= cplim)
164	if (*cp) {
165	    (ap)->sin_len = cp - (char *) (ap) + 1;
166	    break;
167	}
168}
169
170static int
171in_mask2len(mask)
172	struct in_addr *mask;
173{
174	int x, y;
175	u_char *p;
176
177	p = (u_char *)mask;
178	for (x = 0; x < sizeof(*mask); x++) {
179		if (p[x] != 0xff)
180			break;
181	}
182	y = 0;
183	if (x < sizeof(*mask)) {
184		for (y = 0; y < 8; y++) {
185			if ((p[x] & (0x80 >> y)) == 0)
186				break;
187		}
188	}
189	return (x * 8 + y);
190}
191
192static void
193in_len2mask(struct in_addr *mask, int len)
194{
195	int i;
196	u_char *p;
197
198	p = (u_char *)mask;
199	bzero(mask, sizeof(*mask));
200	for (i = 0; i < len / 8; i++)
201		p[i] = 0xff;
202	if (len % 8)
203		p[i] = (0xff00 >> (len % 8)) & 0xff;
204}
205
206/*
207 * Generic internet control operations (ioctl's).
208 *
209 * ifp is NULL if not an interface-specific ioctl.
210 */
211/* ARGSUSED */
212int
213in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
214    struct thread *td)
215{
216	register struct ifreq *ifr = (struct ifreq *)data;
217	register struct in_ifaddr *ia, *iap;
218	register struct ifaddr *ifa;
219	struct in_addr allhosts_addr;
220	struct in_addr dst;
221	struct in_ifinfo *ii;
222	struct in_aliasreq *ifra = (struct in_aliasreq *)data;
223	struct sockaddr_in oldaddr;
224	int error, hostIsNew, iaIsNew, maskIsNew;
225	int iaIsFirst;
226
227	ia = NULL;
228	iaIsFirst = 0;
229	iaIsNew = 0;
230	allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
231
232	/*
233	 * Filter out ioctls we implement directly; forward the rest on to
234	 * in_lifaddr_ioctl() and ifp->if_ioctl().
235	 */
236	switch (cmd) {
237	case SIOCGIFADDR:
238	case SIOCGIFBRDADDR:
239	case SIOCGIFDSTADDR:
240	case SIOCGIFNETMASK:
241	case SIOCDIFADDR:
242		break;
243	case SIOCAIFADDR:
244		/*
245		 * ifra_addr must be present and be of INET family.
246		 * ifra_broadaddr and ifra_mask are optional.
247		 */
248		if (ifra->ifra_addr.sin_len != sizeof(struct sockaddr_in) ||
249		    ifra->ifra_addr.sin_family != AF_INET)
250			return (EINVAL);
251		if (ifra->ifra_broadaddr.sin_len != 0 &&
252		    (ifra->ifra_broadaddr.sin_len !=
253		    sizeof(struct sockaddr_in) ||
254		    ifra->ifra_broadaddr.sin_family != AF_INET))
255			return (EINVAL);
256#if 0
257		/*
258		 * ifconfig(8) historically doesn't set af_family for mask
259		 * for unknown reason.
260		 */
261		if (ifra->ifra_mask.sin_len != 0 &&
262		    (ifra->ifra_mask.sin_len != sizeof(struct sockaddr_in) ||
263		    ifra->ifra_mask.sin_family != AF_INET))
264			return (EINVAL);
265#endif
266		break;
267	case SIOCSIFADDR:
268	case SIOCSIFBRDADDR:
269	case SIOCSIFDSTADDR:
270	case SIOCSIFNETMASK:
271		if (ifr->ifr_addr.sa_family != AF_INET ||
272		    ifr->ifr_addr.sa_len != sizeof(struct sockaddr_in))
273			return (EINVAL);
274		break;
275
276	case SIOCALIFADDR:
277		if (td != NULL) {
278			error = priv_check(td, PRIV_NET_ADDIFADDR);
279			if (error)
280				return (error);
281		}
282		if (ifp == NULL)
283			return (EINVAL);
284		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
285
286	case SIOCDLIFADDR:
287		if (td != NULL) {
288			error = priv_check(td, PRIV_NET_DELIFADDR);
289			if (error)
290				return (error);
291		}
292		if (ifp == NULL)
293			return (EINVAL);
294		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
295
296	case SIOCGLIFADDR:
297		if (ifp == NULL)
298			return (EINVAL);
299		return in_lifaddr_ioctl(so, cmd, data, ifp, td);
300
301	default:
302		if (ifp == NULL || ifp->if_ioctl == NULL)
303			return (EOPNOTSUPP);
304		return ((*ifp->if_ioctl)(ifp, cmd, data));
305	}
306
307	if (ifp == NULL)
308		return (EADDRNOTAVAIL);
309
310	/*
311	 * Security checks before we get involved in any work.
312	 */
313	switch (cmd) {
314	case SIOCAIFADDR:
315	case SIOCSIFADDR:
316	case SIOCSIFBRDADDR:
317	case SIOCSIFNETMASK:
318	case SIOCSIFDSTADDR:
319		if (td != NULL) {
320			error = priv_check(td, PRIV_NET_ADDIFADDR);
321			if (error)
322				return (error);
323		}
324		break;
325
326	case SIOCDIFADDR:
327		if (td != NULL) {
328			error = priv_check(td, PRIV_NET_DELIFADDR);
329			if (error)
330				return (error);
331		}
332		break;
333	}
334
335	/*
336	 * Find address for this interface, if it exists.
337	 *
338	 * If an alias address was specified, find that one instead of the
339	 * first one on the interface, if possible.
340	 */
341	dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
342	IN_IFADDR_RLOCK();
343	LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
344		if (iap->ia_ifp == ifp &&
345		    iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
346			if (td == NULL || prison_check_ip4(td->td_ucred,
347			    &dst) == 0)
348				ia = iap;
349			break;
350		}
351	}
352	if (ia != NULL)
353		ifa_ref(&ia->ia_ifa);
354	IN_IFADDR_RUNLOCK();
355	if (ia == NULL) {
356		IF_ADDR_LOCK(ifp);
357		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
358			iap = ifatoia(ifa);
359			if (iap->ia_addr.sin_family == AF_INET) {
360				if (td != NULL &&
361				    prison_check_ip4(td->td_ucred,
362				    &iap->ia_addr.sin_addr) != 0)
363					continue;
364				ia = iap;
365				break;
366			}
367		}
368		if (ia != NULL)
369			ifa_ref(&ia->ia_ifa);
370		IF_ADDR_UNLOCK(ifp);
371	}
372	if (ia == NULL)
373		iaIsFirst = 1;
374
375	error = 0;
376	switch (cmd) {
377	case SIOCAIFADDR:
378	case SIOCDIFADDR:
379		if (ifra->ifra_addr.sin_family == AF_INET) {
380			struct in_ifaddr *oia;
381
382			IN_IFADDR_RLOCK();
383			for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
384				if (ia->ia_ifp == ifp  &&
385				    ia->ia_addr.sin_addr.s_addr ==
386				    ifra->ifra_addr.sin_addr.s_addr)
387					break;
388			}
389			if (ia != NULL && ia != oia)
390				ifa_ref(&ia->ia_ifa);
391			if (oia != NULL && ia != oia)
392				ifa_free(&oia->ia_ifa);
393			IN_IFADDR_RUNLOCK();
394			if ((ifp->if_flags & IFF_POINTOPOINT)
395			    && (cmd == SIOCAIFADDR)
396			    && (ifra->ifra_dstaddr.sin_addr.s_addr
397				== INADDR_ANY)) {
398				error = EDESTADDRREQ;
399				goto out;
400			}
401		}
402		if (cmd == SIOCDIFADDR && ia == NULL) {
403			error = EADDRNOTAVAIL;
404			goto out;
405		}
406		/* FALLTHROUGH */
407	case SIOCSIFADDR:
408	case SIOCSIFNETMASK:
409	case SIOCSIFDSTADDR:
410		if (ia == NULL) {
411			ia = (struct in_ifaddr *)
412				malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
413				    M_ZERO);
414			if (ia == NULL) {
415				error = ENOBUFS;
416				goto out;
417			}
418
419			ifa = &ia->ia_ifa;
420			ifa_init(ifa);
421			ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
422			ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
423			ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
424
425			ia->ia_sockmask.sin_len = 8;
426			ia->ia_sockmask.sin_family = AF_INET;
427			if (ifp->if_flags & IFF_BROADCAST) {
428				ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
429				ia->ia_broadaddr.sin_family = AF_INET;
430			}
431			ia->ia_ifp = ifp;
432
433			ifa_ref(ifa);			/* if_addrhead */
434			IF_ADDR_LOCK(ifp);
435			TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
436			IF_ADDR_UNLOCK(ifp);
437			ifa_ref(ifa);			/* in_ifaddrhead */
438			IN_IFADDR_WLOCK();
439			TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
440			IN_IFADDR_WUNLOCK();
441			iaIsNew = 1;
442		}
443		break;
444
445	case SIOCSIFBRDADDR:
446	case SIOCGIFADDR:
447	case SIOCGIFNETMASK:
448	case SIOCGIFDSTADDR:
449	case SIOCGIFBRDADDR:
450		if (ia == NULL) {
451			error = EADDRNOTAVAIL;
452			goto out;
453		}
454		break;
455	}
456
457	/*
458	 * Most paths in this switch return directly or via out.  Only paths
459	 * that remove the address break in order to hit common removal code.
460	 */
461	switch (cmd) {
462	case SIOCGIFADDR:
463		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
464		goto out;
465
466	case SIOCGIFBRDADDR:
467		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
468			error = EINVAL;
469			goto out;
470		}
471		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
472		goto out;
473
474	case SIOCGIFDSTADDR:
475		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
476			error = EINVAL;
477			goto out;
478		}
479		*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
480		goto out;
481
482	case SIOCGIFNETMASK:
483		*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
484		goto out;
485
486	case SIOCSIFDSTADDR:
487		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
488			error = EINVAL;
489			goto out;
490		}
491		oldaddr = ia->ia_dstaddr;
492		ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
493		if (ifp->if_ioctl != NULL) {
494			error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
495			    (caddr_t)ia);
496			if (error) {
497				ia->ia_dstaddr = oldaddr;
498				goto out;
499			}
500		}
501		if (ia->ia_flags & IFA_ROUTE) {
502			ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
503			rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
504			ia->ia_ifa.ifa_dstaddr =
505					(struct sockaddr *)&ia->ia_dstaddr;
506			rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
507		}
508		goto out;
509
510	case SIOCSIFBRDADDR:
511		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
512			error = EINVAL;
513			goto out;
514		}
515		ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
516		goto out;
517
518	case SIOCSIFADDR:
519		error = in_ifinit(ifp, ia,
520		    (struct sockaddr_in *) &ifr->ifr_addr, 1, 0, 0);
521		if (error != 0 && iaIsNew)
522			break;
523		if (error == 0) {
524			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
525			if (iaIsFirst &&
526			    (ifp->if_flags & IFF_MULTICAST) != 0) {
527				error = in_joingroup(ifp, &allhosts_addr,
528				    NULL, &ii->ii_allhosts);
529			}
530			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
531		}
532		error = 0;
533		goto out;
534
535	case SIOCSIFNETMASK:
536		ia->ia_sockmask.sin_addr = ((struct sockaddr_in *)
537		    &ifr->ifr_addr)->sin_addr;
538		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
539		goto out;
540
541	case SIOCAIFADDR:
542		maskIsNew = 0;
543		hostIsNew = 1;
544		error = 0;
545		if (ifra->ifra_addr.sin_addr.s_addr ==
546			    ia->ia_addr.sin_addr.s_addr)
547			hostIsNew = 0;
548		if (ifra->ifra_mask.sin_len) {
549			/*
550			 * QL: XXX
551			 * Need to scrub the prefix here in case
552			 * the issued command is SIOCAIFADDR with
553			 * the same address, but with a different
554			 * prefix length. And if the prefix length
555			 * is the same as before, then the call is
556			 * un-necessarily executed here.
557			 */
558			in_ifscrub(ifp, ia, LLE_STATIC);
559			ia->ia_sockmask = ifra->ifra_mask;
560			ia->ia_sockmask.sin_family = AF_INET;
561			ia->ia_subnetmask =
562			     ntohl(ia->ia_sockmask.sin_addr.s_addr);
563			maskIsNew = 1;
564		}
565		if ((ifp->if_flags & IFF_POINTOPOINT) &&
566		    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
567			in_ifscrub(ifp, ia, LLE_STATIC);
568			ia->ia_dstaddr = ifra->ifra_dstaddr;
569			maskIsNew  = 1; /* We lie; but the effect's the same */
570		}
571		if (hostIsNew || maskIsNew)
572			error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0,
573			    maskIsNew, ifra->ifra_vhid);
574		if (error != 0 && iaIsNew)
575			break;
576
577		if ((ifp->if_flags & IFF_BROADCAST) &&
578		    ifra->ifra_broadaddr.sin_len)
579			ia->ia_broadaddr = ifra->ifra_broadaddr;
580		if (error == 0) {
581			ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
582			if (iaIsFirst &&
583			    (ifp->if_flags & IFF_MULTICAST) != 0) {
584				error = in_joingroup(ifp, &allhosts_addr,
585				    NULL, &ii->ii_allhosts);
586			}
587			EVENTHANDLER_INVOKE(ifaddr_event, ifp);
588		}
589		goto out;
590
591	case SIOCDIFADDR:
592		/*
593		 * in_ifscrub kills the interface route.
594		 */
595		in_ifscrub(ifp, ia, LLE_STATIC);
596
597		/*
598		 * in_ifadown gets rid of all the rest of
599		 * the routes.  This is not quite the right
600		 * thing to do, but at least if we are running
601		 * a routing process they will come back.
602		 */
603		in_ifadown(&ia->ia_ifa, 1);
604		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
605		error = 0;
606		break;
607
608	default:
609		panic("in_control: unsupported ioctl");
610	}
611
612	if (ia->ia_ifa.ifa_carp)
613		(*carp_detach_p)(&ia->ia_ifa);
614
615	IF_ADDR_LOCK(ifp);
616	/* Re-check that ia is still part of the list. */
617	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
618		if (ifa == &ia->ia_ifa)
619			break;
620	}
621	if (ifa == NULL) {
622		/*
623		 * If we lost the race with another thread, there is no need to
624		 * try it again for the next loop as there is no other exit
625		 * path between here and out.
626		 */
627		IF_ADDR_UNLOCK(ifp);
628		error = EADDRNOTAVAIL;
629		goto out;
630	}
631	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
632	IF_ADDR_UNLOCK(ifp);
633	ifa_free(&ia->ia_ifa);				/* if_addrhead */
634
635	IN_IFADDR_WLOCK();
636	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
637
638	LIST_REMOVE(ia, ia_hash);
639	IN_IFADDR_WUNLOCK();
640	/*
641	 * If this is the last IPv4 address configured on this
642	 * interface, leave the all-hosts group.
643	 * No state-change report need be transmitted.
644	 */
645	IFP_TO_IA(ifp, iap);
646	if (iap == NULL) {
647		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
648		IN_MULTI_LOCK();
649		if (ii->ii_allhosts) {
650			(void)in_leavegroup_locked(ii->ii_allhosts, NULL);
651			ii->ii_allhosts = NULL;
652		}
653		IN_MULTI_UNLOCK();
654	} else
655		ifa_free(&iap->ia_ifa);
656
657	ifa_free(&ia->ia_ifa);				/* in_ifaddrhead */
658out:
659	if (ia != NULL)
660		ifa_free(&ia->ia_ifa);
661	return (error);
662}
663
664/*
665 * SIOC[GAD]LIFADDR.
666 *	SIOCGLIFADDR: get first address. (?!?)
667 *	SIOCGLIFADDR with IFLR_PREFIX:
668 *		get first address that matches the specified prefix.
669 *	SIOCALIFADDR: add the specified address.
670 *	SIOCALIFADDR with IFLR_PREFIX:
671 *		EINVAL since we can't deduce hostid part of the address.
672 *	SIOCDLIFADDR: delete the specified address.
673 *	SIOCDLIFADDR with IFLR_PREFIX:
674 *		delete the first address that matches the specified prefix.
675 * return values:
676 *	EINVAL on invalid parameters
677 *	EADDRNOTAVAIL on prefix match failed/specified address not found
678 *	other values may be returned from in_ioctl()
679 */
680static int
681in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
682    struct ifnet *ifp, struct thread *td)
683{
684	struct if_laddrreq *iflr = (struct if_laddrreq *)data;
685	struct ifaddr *ifa;
686
687	/* sanity checks */
688	if (data == NULL || ifp == NULL) {
689		panic("invalid argument to in_lifaddr_ioctl");
690		/*NOTRECHED*/
691	}
692
693	switch (cmd) {
694	case SIOCGLIFADDR:
695		/* address must be specified on GET with IFLR_PREFIX */
696		if ((iflr->flags & IFLR_PREFIX) == 0)
697			break;
698		/*FALLTHROUGH*/
699	case SIOCALIFADDR:
700	case SIOCDLIFADDR:
701		/* address must be specified on ADD and DELETE */
702		if (iflr->addr.ss_family != AF_INET)
703			return (EINVAL);
704		if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
705			return (EINVAL);
706		/* XXX need improvement */
707		if (iflr->dstaddr.ss_family
708		 && iflr->dstaddr.ss_family != AF_INET)
709			return (EINVAL);
710		if (iflr->dstaddr.ss_family
711		 && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
712			return (EINVAL);
713		break;
714	default: /*shouldn't happen*/
715		return (EOPNOTSUPP);
716	}
717	if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
718		return (EINVAL);
719
720	switch (cmd) {
721	case SIOCALIFADDR:
722	    {
723		struct in_aliasreq ifra;
724
725		if (iflr->flags & IFLR_PREFIX)
726			return (EINVAL);
727
728		/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
729		bzero(&ifra, sizeof(ifra));
730		bcopy(iflr->iflr_name, ifra.ifra_name,
731			sizeof(ifra.ifra_name));
732
733		bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
734
735		if (iflr->dstaddr.ss_family) {	/*XXX*/
736			bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
737				iflr->dstaddr.ss_len);
738		}
739
740		ifra.ifra_mask.sin_family = AF_INET;
741		ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
742		in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
743
744		return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
745	    }
746	case SIOCGLIFADDR:
747	case SIOCDLIFADDR:
748	    {
749		struct in_ifaddr *ia;
750		struct in_addr mask, candidate, match;
751		struct sockaddr_in *sin;
752
753		bzero(&mask, sizeof(mask));
754		bzero(&match, sizeof(match));
755		if (iflr->flags & IFLR_PREFIX) {
756			/* lookup a prefix rather than address. */
757			in_len2mask(&mask, iflr->prefixlen);
758
759			sin = (struct sockaddr_in *)&iflr->addr;
760			match.s_addr = sin->sin_addr.s_addr;
761			match.s_addr &= mask.s_addr;
762
763			/* if you set extra bits, that's wrong */
764			if (match.s_addr != sin->sin_addr.s_addr)
765				return (EINVAL);
766
767		} else {
768			/* on getting an address, take the 1st match */
769			/* on deleting an address, do exact match */
770			if (cmd != SIOCGLIFADDR) {
771				in_len2mask(&mask, 32);
772				sin = (struct sockaddr_in *)&iflr->addr;
773				match.s_addr = sin->sin_addr.s_addr;
774			}
775		}
776
777		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)	{
778			if (ifa->ifa_addr->sa_family != AF_INET6)
779				continue;
780			if (match.s_addr == 0)
781				break;
782			candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
783			candidate.s_addr &= mask.s_addr;
784			if (candidate.s_addr == match.s_addr)
785				break;
786		}
787		if (ifa == NULL)
788			return (EADDRNOTAVAIL);
789		ia = (struct in_ifaddr *)ifa;
790
791		if (cmd == SIOCGLIFADDR) {
792			/* fill in the if_laddrreq structure */
793			bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
794
795			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
796				bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
797					ia->ia_dstaddr.sin_len);
798			} else
799				bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
800
801			iflr->prefixlen =
802				in_mask2len(&ia->ia_sockmask.sin_addr);
803
804			iflr->flags = 0;	/*XXX*/
805
806			return (0);
807		} else {
808			struct in_aliasreq ifra;
809
810			/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
811			bzero(&ifra, sizeof(ifra));
812			bcopy(iflr->iflr_name, ifra.ifra_name,
813				sizeof(ifra.ifra_name));
814
815			bcopy(&ia->ia_addr, &ifra.ifra_addr,
816				ia->ia_addr.sin_len);
817			if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
818				bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
819					ia->ia_dstaddr.sin_len);
820			}
821			bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
822				ia->ia_sockmask.sin_len);
823
824			return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
825			    ifp, td));
826		}
827	    }
828	}
829
830	return (EOPNOTSUPP);	/*just for safety*/
831}
832
833/*
834 * Delete any existing route for an interface.
835 */
836void
837in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
838{
839
840	in_scrubprefix(ia, flags);
841}
842
843/*
844 * Initialize an interface's internet address
845 * and routing table entry.
846 */
847static int
848in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
849    int scrub, int masksupplied, int vhid)
850{
851	register u_long i = ntohl(sin->sin_addr.s_addr);
852	int flags = RTF_UP, error = 0;
853
854	if (scrub)
855		in_scrubprefix(ia, LLE_STATIC);
856
857	IN_IFADDR_WLOCK();
858	if (ia->ia_addr.sin_family == AF_INET)
859		LIST_REMOVE(ia, ia_hash);
860	ia->ia_addr = *sin;
861	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
862	    ia, ia_hash);
863	IN_IFADDR_WUNLOCK();
864
865	if (vhid > 0) {
866		if (carp_attach_p != NULL)
867			error = (*carp_attach_p)(&ia->ia_ifa, vhid);
868		else
869			error = EPROTONOSUPPORT;
870	}
871	if (error)
872		return (error);
873
874	/*
875	 * Give the interface a chance to initialize
876	 * if this is its first address,
877	 * and to validate the address if necessary.
878	 */
879	if (ifp->if_ioctl != NULL &&
880	    (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia)) != 0)
881			/* LIST_REMOVE(ia, ia_hash) is done in in_control */
882			return (error);
883
884	/*
885	 * Be compatible with network classes, if netmask isn't supplied,
886	 * guess it based on classes.
887	 */
888	if (!masksupplied) {
889		if (IN_CLASSA(i))
890			ia->ia_subnetmask = IN_CLASSA_NET;
891		else if (IN_CLASSB(i))
892			ia->ia_subnetmask = IN_CLASSB_NET;
893		else
894			ia->ia_subnetmask = IN_CLASSC_NET;
895		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
896	}
897	ia->ia_subnet = i & ia->ia_subnetmask;
898	in_socktrim(&ia->ia_sockmask);
899	/*
900	 * Add route for the network.
901	 */
902	ia->ia_ifa.ifa_metric = ifp->if_metric;
903	if (ifp->if_flags & IFF_BROADCAST) {
904		if (ia->ia_subnetmask == IN_RFC3021_MASK)
905			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
906		else
907			ia->ia_broadaddr.sin_addr.s_addr =
908			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
909	} else if (ifp->if_flags & IFF_LOOPBACK) {
910		ia->ia_dstaddr = ia->ia_addr;
911		flags |= RTF_HOST;
912	} else if (ifp->if_flags & IFF_POINTOPOINT) {
913		if (ia->ia_dstaddr.sin_family != AF_INET)
914			return (0);
915		flags |= RTF_HOST;
916	}
917	if (!vhid && (error = in_addprefix(ia, flags)) != 0)
918		return (error);
919
920	if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
921		return (0);
922
923	if (ifp->if_flags & IFF_POINTOPOINT &&
924	    ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
925			return (0);
926
927	/*
928	 * add a loopback route to self
929	 */
930	if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
931		struct route ia_ro;
932
933		bzero(&ia_ro, sizeof(ia_ro));
934		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
935		rtalloc_ign_fib(&ia_ro, 0, 0);
936		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
937		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
938			RT_LOCK(ia_ro.ro_rt);
939			RT_ADDREF(ia_ro.ro_rt);
940			RTFREE_LOCKED(ia_ro.ro_rt);
941		} else
942			error = ifa_add_loopback_route((struct ifaddr *)ia,
943				       (struct sockaddr *)&ia->ia_addr);
944		if (error == 0)
945			ia->ia_flags |= IFA_RTSELF;
946		if (ia_ro.ro_rt != NULL)
947			RTFREE(ia_ro.ro_rt);
948	}
949
950	return (error);
951}
952
953#define rtinitflags(x) \
954	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
955	    ? RTF_HOST : 0)
956
957/*
958 * Generate a routing message when inserting or deleting
959 * an interface address alias.
960 */
961static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
962    struct in_ifaddr *target)
963{
964	struct route pfx_ro;
965	struct sockaddr_in *pfx_addr;
966	struct rtentry msg_rt;
967
968	/* QL: XXX
969	 * This is a bit questionable because there is no
970	 * additional route entry added/deleted for an address
971	 * alias. Therefore this route report is inaccurate.
972	 */
973	bzero(&pfx_ro, sizeof(pfx_ro));
974	pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst);
975	pfx_addr->sin_len = sizeof(*pfx_addr);
976	pfx_addr->sin_family = AF_INET;
977	pfx_addr->sin_addr = *prefix;
978	rtalloc_ign_fib(&pfx_ro, 0, 0);
979	if (pfx_ro.ro_rt != NULL) {
980		msg_rt = *pfx_ro.ro_rt;
981
982		/* QL: XXX
983		 * Point the gateway to the new interface
984		 * address as if a new prefix route entry has
985		 * been added through the new address alias.
986		 * All other parts of the rtentry is accurate,
987		 * e.g., rt_key, rt_mask, rt_ifp etc.
988		 */
989		msg_rt.rt_gateway =
990			(struct sockaddr *)&target->ia_addr;
991		rt_newaddrmsg(cmd,
992			      (struct ifaddr *)target,
993			      0, &msg_rt);
994		RTFREE(pfx_ro.ro_rt);
995	}
996	return;
997}
998
999/*
1000 * Check if we have a route for the given prefix already or add one accordingly.
1001 */
1002int
1003in_addprefix(struct in_ifaddr *target, int flags)
1004{
1005	struct in_ifaddr *ia;
1006	struct in_addr prefix, mask, p, m;
1007	int error;
1008
1009	if ((flags & RTF_HOST) != 0) {
1010		prefix = target->ia_dstaddr.sin_addr;
1011		mask.s_addr = 0;
1012	} else {
1013		prefix = target->ia_addr.sin_addr;
1014		mask = target->ia_sockmask.sin_addr;
1015		prefix.s_addr &= mask.s_addr;
1016	}
1017
1018	IN_IFADDR_RLOCK();
1019	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1020		if (rtinitflags(ia)) {
1021			p = ia->ia_dstaddr.sin_addr;
1022
1023			if (prefix.s_addr != p.s_addr)
1024				continue;
1025		} else {
1026			p = ia->ia_addr.sin_addr;
1027			m = ia->ia_sockmask.sin_addr;
1028			p.s_addr &= m.s_addr;
1029
1030			if (prefix.s_addr != p.s_addr ||
1031			    mask.s_addr != m.s_addr)
1032				continue;
1033		}
1034
1035		/*
1036		 * If we got a matching prefix route inserted by other
1037		 * interface address, we are done here.
1038		 */
1039		if (ia->ia_flags & IFA_ROUTE) {
1040#ifdef RADIX_MPATH
1041			if (ia->ia_addr.sin_addr.s_addr ==
1042			    target->ia_addr.sin_addr.s_addr) {
1043				IN_IFADDR_RUNLOCK();
1044				return (EEXIST);
1045			} else
1046				break;
1047#endif
1048			if (V_nosameprefix) {
1049				IN_IFADDR_RUNLOCK();
1050				return (EEXIST);
1051			} else {
1052				in_addralias_rtmsg(RTM_ADD, &prefix, target);
1053				IN_IFADDR_RUNLOCK();
1054				return (0);
1055			}
1056		}
1057	}
1058	IN_IFADDR_RUNLOCK();
1059
1060	/*
1061	 * No-one seem to have this prefix route, so we try to insert it.
1062	 */
1063	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
1064	if (!error)
1065		target->ia_flags |= IFA_ROUTE;
1066	return (error);
1067}
1068
1069/*
1070 * If there is no other address in the system that can serve a route to the
1071 * same prefix, remove the route.  Hand over the route to the new address
1072 * otherwise.
1073 */
1074int
1075in_scrubprefix(struct in_ifaddr *target, u_int flags)
1076{
1077	struct in_ifaddr *ia;
1078	struct in_addr prefix, mask, p, m;
1079	int error = 0;
1080	struct sockaddr_in prefix0, mask0;
1081
1082	/*
1083	 * Remove the loopback route to the interface address.
1084	 * The "useloopback" setting is not consulted because if the
1085	 * user configures an interface address, turns off this
1086	 * setting, and then tries to delete that interface address,
1087	 * checking the current setting of "useloopback" would leave
1088	 * that interface address loopback route untouched, which
1089	 * would be wrong. Therefore the interface address loopback route
1090	 * deletion is unconditional.
1091	 */
1092	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
1093	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
1094	    (target->ia_flags & IFA_RTSELF)) {
1095		struct route ia_ro;
1096		int freeit = 0;
1097
1098		bzero(&ia_ro, sizeof(ia_ro));
1099		*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
1100		rtalloc_ign_fib(&ia_ro, 0, 0);
1101		if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
1102		    (ia_ro.ro_rt->rt_ifp == V_loif)) {
1103			RT_LOCK(ia_ro.ro_rt);
1104			if (ia_ro.ro_rt->rt_refcnt <= 1)
1105				freeit = 1;
1106			else if (flags & LLE_STATIC) {
1107				RT_REMREF(ia_ro.ro_rt);
1108				target->ia_flags &= ~IFA_RTSELF;
1109			}
1110			RTFREE_LOCKED(ia_ro.ro_rt);
1111		}
1112		if (freeit && (flags & LLE_STATIC)) {
1113			error = ifa_del_loopback_route((struct ifaddr *)target,
1114				       (struct sockaddr *)&target->ia_addr);
1115			if (error == 0)
1116				target->ia_flags &= ~IFA_RTSELF;
1117		}
1118		if ((flags & LLE_STATIC) &&
1119			!(target->ia_ifp->if_flags & IFF_NOARP))
1120			/* remove arp cache */
1121			arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
1122	}
1123
1124	if (rtinitflags(target)) {
1125		prefix = target->ia_dstaddr.sin_addr;
1126		mask.s_addr = 0;
1127	} else {
1128		prefix = target->ia_addr.sin_addr;
1129		mask = target->ia_sockmask.sin_addr;
1130		prefix.s_addr &= mask.s_addr;
1131	}
1132
1133	if ((target->ia_flags & IFA_ROUTE) == 0) {
1134		in_addralias_rtmsg(RTM_DELETE, &prefix, target);
1135		return (0);
1136	}
1137
1138	IN_IFADDR_RLOCK();
1139	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1140		if (rtinitflags(ia)) {
1141			p = ia->ia_dstaddr.sin_addr;
1142
1143			if (prefix.s_addr != p.s_addr)
1144				continue;
1145		} else {
1146			p = ia->ia_addr.sin_addr;
1147			m = ia->ia_sockmask.sin_addr;
1148			p.s_addr &= m.s_addr;
1149
1150			if (prefix.s_addr != p.s_addr ||
1151			    mask.s_addr != m.s_addr)
1152				continue;
1153		}
1154
1155		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1156			continue;
1157
1158		/*
1159		 * If we got a matching prefix address, move IFA_ROUTE and
1160		 * the route itself to it.  Make sure that routing daemons
1161		 * get a heads-up.
1162		 */
1163		if ((ia->ia_flags & IFA_ROUTE) == 0) {
1164			ifa_ref(&ia->ia_ifa);
1165			IN_IFADDR_RUNLOCK();
1166			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
1167			    rtinitflags(target));
1168			if (error == 0)
1169				target->ia_flags &= ~IFA_ROUTE;
1170			else
1171				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
1172					error);
1173			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
1174			    rtinitflags(ia) | RTF_UP);
1175			if (error == 0)
1176				ia->ia_flags |= IFA_ROUTE;
1177			else
1178				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
1179					error);
1180			ifa_free(&ia->ia_ifa);
1181			return (error);
1182		}
1183	}
1184	IN_IFADDR_RUNLOCK();
1185
1186	/*
1187	 * remove all L2 entries on the given prefix
1188	 */
1189	bzero(&prefix0, sizeof(prefix0));
1190	prefix0.sin_len = sizeof(prefix0);
1191	prefix0.sin_family = AF_INET;
1192	prefix0.sin_addr.s_addr = target->ia_subnet;
1193	bzero(&mask0, sizeof(mask0));
1194	mask0.sin_len = sizeof(mask0);
1195	mask0.sin_family = AF_INET;
1196	mask0.sin_addr.s_addr = target->ia_subnetmask;
1197	lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
1198			    (struct sockaddr *)&mask0, flags);
1199
1200	/*
1201	 * As no-one seem to have this prefix, we can remove the route.
1202	 */
1203	error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
1204	if (error == 0)
1205		target->ia_flags &= ~IFA_ROUTE;
1206	else
1207		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
1208	return (error);
1209}
1210
1211#undef rtinitflags
1212
1213/*
1214 * Return 1 if the address might be a local broadcast address.
1215 */
1216int
1217in_broadcast(struct in_addr in, struct ifnet *ifp)
1218{
1219	register struct ifaddr *ifa;
1220	u_long t;
1221
1222	if (in.s_addr == INADDR_BROADCAST ||
1223	    in.s_addr == INADDR_ANY)
1224		return (1);
1225	if ((ifp->if_flags & IFF_BROADCAST) == 0)
1226		return (0);
1227	t = ntohl(in.s_addr);
1228	/*
1229	 * Look through the list of addresses for a match
1230	 * with a broadcast address.
1231	 */
1232#define ia ((struct in_ifaddr *)ifa)
1233	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1234		if (ifa->ifa_addr->sa_family == AF_INET &&
1235		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
1236		     /*
1237		      * Check for old-style (host 0) broadcast, but
1238		      * taking into account that RFC 3021 obsoletes it.
1239		      */
1240		     (ia->ia_subnetmask != IN_RFC3021_MASK &&
1241		     t == ia->ia_subnet)) &&
1242		     /*
1243		      * Check for an all one subnetmask. These
1244		      * only exist when an interface gets a secondary
1245		      * address.
1246		      */
1247		     ia->ia_subnetmask != (u_long)0xffffffff)
1248			    return (1);
1249	return (0);
1250#undef ia
1251}
1252
1253/*
1254 * On interface removal, clean up IPv4 data structures hung off of the ifnet.
1255 */
1256void
1257in_ifdetach(struct ifnet *ifp)
1258{
1259
1260	in_pcbpurgeif0(&V_ripcbinfo, ifp);
1261	in_pcbpurgeif0(&V_udbinfo, ifp);
1262	in_purgemaddrs(ifp);
1263}
1264
1265/*
1266 * Delete all IPv4 multicast address records, and associated link-layer
1267 * multicast address records, associated with ifp.
1268 * XXX It looks like domifdetach runs AFTER the link layer cleanup.
1269 * XXX This should not race with ifma_protospec being set during
1270 * a new allocation, if it does, we have bigger problems.
1271 */
1272static void
1273in_purgemaddrs(struct ifnet *ifp)
1274{
1275	LIST_HEAD(,in_multi) purgeinms;
1276	struct in_multi		*inm, *tinm;
1277	struct ifmultiaddr	*ifma;
1278
1279	LIST_INIT(&purgeinms);
1280	IN_MULTI_LOCK();
1281
1282	/*
1283	 * Extract list of in_multi associated with the detaching ifp
1284	 * which the PF_INET layer is about to release.
1285	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
1286	 * by code further down.
1287	 */
1288	IF_ADDR_LOCK(ifp);
1289	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1290		if (ifma->ifma_addr->sa_family != AF_INET ||
1291		    ifma->ifma_protospec == NULL)
1292			continue;
1293#if 0
1294		KASSERT(ifma->ifma_protospec != NULL,
1295		    ("%s: ifma_protospec is NULL", __func__));
1296#endif
1297		inm = (struct in_multi *)ifma->ifma_protospec;
1298		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
1299	}
1300	IF_ADDR_UNLOCK(ifp);
1301
1302	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
1303		LIST_REMOVE(inm, inm_link);
1304		inm_release_locked(inm);
1305	}
1306	igmp_ifdetach(ifp);
1307
1308	IN_MULTI_UNLOCK();
1309}
1310
1311struct in_llentry {
1312	struct llentry		base;
1313	struct sockaddr_in	l3_addr4;
1314};
1315
1316static struct llentry *
1317in_lltable_new(const struct sockaddr *l3addr, u_int flags)
1318{
1319	struct in_llentry *lle;
1320
1321	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
1322	if (lle == NULL)		/* NB: caller generates msg */
1323		return NULL;
1324
1325	callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
1326	/*
1327	 * For IPv4 this will trigger "arpresolve" to generate
1328	 * an ARP request.
1329	 */
1330	lle->base.la_expire = time_uptime; /* mark expired */
1331	lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
1332	lle->base.lle_refcnt = 1;
1333	LLE_LOCK_INIT(&lle->base);
1334	return &lle->base;
1335}
1336
1337/*
1338 * Deletes an address from the address table.
1339 * This function is called by the timer functions
1340 * such as arptimer() and nd6_llinfo_timer(), and
1341 * the caller does the locking.
1342 */
1343static void
1344in_lltable_free(struct lltable *llt, struct llentry *lle)
1345{
1346	LLE_WUNLOCK(lle);
1347	LLE_LOCK_DESTROY(lle);
1348	free(lle, M_LLTABLE);
1349}
1350
1351
1352#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
1353	    (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
1354
1355static void
1356in_lltable_prefix_free(struct lltable *llt,
1357		       const struct sockaddr *prefix,
1358		       const struct sockaddr *mask,
1359		       u_int flags)
1360{
1361	const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
1362	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
1363	struct llentry *lle, *next;
1364	register int i;
1365	size_t pkts_dropped;
1366
1367	for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
1368		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
1369
1370		        /*
1371			 * (flags & LLE_STATIC) means deleting all entries
1372			 * including static ARP entries
1373			 */
1374			if (IN_ARE_MASKED_ADDR_EQUAL((struct sockaddr_in *)L3_ADDR(lle),
1375						     pfx, msk) &&
1376			    ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) {
1377				int canceled;
1378
1379				canceled = callout_drain(&lle->la_timer);
1380				LLE_WLOCK(lle);
1381				if (canceled)
1382					LLE_REMREF(lle);
1383				pkts_dropped = llentry_free(lle);
1384				ARPSTAT_ADD(dropped, pkts_dropped);
1385			}
1386		}
1387	}
1388}
1389
1390
1391static int
1392in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
1393{
1394	struct rtentry *rt;
1395
1396	KASSERT(l3addr->sa_family == AF_INET,
1397	    ("sin_family %d", l3addr->sa_family));
1398
1399	/* XXX rtalloc1 should take a const param */
1400	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
1401
1402	if (rt == NULL)
1403		return (EINVAL);
1404
1405	/*
1406	 * If the gateway for an existing host route matches the target L3
1407	 * address, which is a special route inserted by some implementation
1408	 * such as MANET, and the interface is of the correct type, then
1409	 * allow for ARP to proceed.
1410	 */
1411	if (rt->rt_flags & RTF_GATEWAY) {
1412		if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
1413			rt->rt_ifp->if_type != IFT_ETHER ||
1414			  (rt->rt_ifp->if_flags &
1415			   (IFF_NOARP | IFF_STATICARP)) != 0 ||
1416			  memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
1417				 sizeof(in_addr_t)) != 0) {
1418			RTFREE_LOCKED(rt);
1419			return (EINVAL);
1420		}
1421	}
1422
1423	/*
1424	 * Make sure that at least the destination address is covered
1425	 * by the route. This is for handling the case where 2 or more
1426	 * interfaces have the same prefix. An incoming packet arrives
1427	 * on one interface and the corresponding outgoing packet leaves
1428	 * another interface.
1429	 */
1430	if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
1431		const char *sa, *mask, *addr, *lim;
1432		int len;
1433
1434		mask = (const char *)rt_mask(rt);
1435		/*
1436		 * Just being extra cautious to avoid some custom
1437		 * code getting into trouble.
1438		 */
1439		if (mask == NULL) {
1440			RTFREE_LOCKED(rt);
1441			return (EINVAL);
1442		}
1443
1444		sa = (const char *)rt_key(rt);
1445		addr = (const char *)l3addr;
1446		len = ((const struct sockaddr_in *)l3addr)->sin_len;
1447		lim = addr + len;
1448
1449		for ( ; addr < lim; sa++, mask++, addr++) {
1450			if ((*sa ^ *addr) & *mask) {
1451#ifdef DIAGNOSTIC
1452				log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
1453				    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
1454#endif
1455				RTFREE_LOCKED(rt);
1456				return (EINVAL);
1457			}
1458		}
1459	}
1460
1461	RTFREE_LOCKED(rt);
1462	return (0);
1463}
1464
1465/*
1466 * Return NULL if not found or marked for deletion.
1467 * If found return lle read locked.
1468 */
1469static struct llentry *
1470in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
1471{
1472	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
1473	struct ifnet *ifp = llt->llt_ifp;
1474	struct llentry *lle;
1475	struct llentries *lleh;
1476	u_int hashkey;
1477
1478	IF_AFDATA_LOCK_ASSERT(ifp);
1479	KASSERT(l3addr->sa_family == AF_INET,
1480	    ("sin_family %d", l3addr->sa_family));
1481
1482	hashkey = sin->sin_addr.s_addr;
1483	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
1484	LIST_FOREACH(lle, lleh, lle_next) {
1485		struct sockaddr_in *sa2 = (struct sockaddr_in *)L3_ADDR(lle);
1486		if (lle->la_flags & LLE_DELETED)
1487			continue;
1488		if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
1489			break;
1490	}
1491	if (lle == NULL) {
1492#ifdef DIAGNOSTIC
1493		if (flags & LLE_DELETE)
1494			log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);
1495#endif
1496		if (!(flags & LLE_CREATE))
1497			return (NULL);
1498		/*
1499		 * A route that covers the given address must have
1500		 * been installed 1st because we are doing a resolution,
1501		 * verify this.
1502		 */
1503		if (!(flags & LLE_IFADDR) &&
1504		    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
1505			goto done;
1506
1507		lle = in_lltable_new(l3addr, flags);
1508		if (lle == NULL) {
1509			log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
1510			goto done;
1511		}
1512		lle->la_flags = flags & ~LLE_CREATE;
1513		if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
1514			bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
1515			lle->la_flags |= (LLE_VALID | LLE_STATIC);
1516		}
1517
1518		lle->lle_tbl  = llt;
1519		lle->lle_head = lleh;
1520		LIST_INSERT_HEAD(lleh, lle, lle_next);
1521	} else if (flags & LLE_DELETE) {
1522		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
1523			LLE_WLOCK(lle);
1524			lle->la_flags = LLE_DELETED;
1525			EVENTHANDLER_INVOKE(arp_update_event, lle);
1526			LLE_WUNLOCK(lle);
1527#ifdef DIAGNOSTIC
1528			log(LOG_INFO, "ifaddr cache = %p  is deleted\n", lle);
1529#endif
1530		}
1531		lle = (void *)-1;
1532
1533	}
1534	if (LLE_IS_VALID(lle)) {
1535		if (flags & LLE_EXCLUSIVE)
1536			LLE_WLOCK(lle);
1537		else
1538			LLE_RLOCK(lle);
1539	}
1540done:
1541	return (lle);
1542}
1543
1544static int
1545in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
1546{
1547#define	SIN(lle)	((struct sockaddr_in *) L3_ADDR(lle))
1548	struct ifnet *ifp = llt->llt_ifp;
1549	struct llentry *lle;
1550	/* XXX stack use */
1551	struct {
1552		struct rt_msghdr	rtm;
1553		struct sockaddr_inarp	sin;
1554		struct sockaddr_dl	sdl;
1555	} arpc;
1556	int error, i;
1557
1558	LLTABLE_LOCK_ASSERT();
1559
1560	error = 0;
1561	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
1562		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
1563			struct sockaddr_dl *sdl;
1564
1565			/* skip deleted entries */
1566			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
1567				continue;
1568			/* Skip if jailed and not a valid IP of the prison. */
1569			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
1570				continue;
1571			/*
1572			 * produce a msg made of:
1573			 *  struct rt_msghdr;
1574			 *  struct sockaddr_inarp; (IPv4)
1575			 *  struct sockaddr_dl;
1576			 */
1577			bzero(&arpc, sizeof(arpc));
1578			arpc.rtm.rtm_msglen = sizeof(arpc);
1579			arpc.rtm.rtm_version = RTM_VERSION;
1580			arpc.rtm.rtm_type = RTM_GET;
1581			arpc.rtm.rtm_flags = RTF_UP;
1582			arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
1583			arpc.sin.sin_family = AF_INET;
1584			arpc.sin.sin_len = sizeof(arpc.sin);
1585			arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
1586
1587			/* publish */
1588			if (lle->la_flags & LLE_PUB) {
1589				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
1590				/* proxy only */
1591				if (lle->la_flags & LLE_PROXY)
1592					arpc.sin.sin_other = SIN_PROXY;
1593			}
1594
1595			sdl = &arpc.sdl;
1596			sdl->sdl_family = AF_LINK;
1597			sdl->sdl_len = sizeof(*sdl);
1598			sdl->sdl_index = ifp->if_index;
1599			sdl->sdl_type = ifp->if_type;
1600			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
1601				sdl->sdl_alen = ifp->if_addrlen;
1602				bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
1603			} else {
1604				sdl->sdl_alen = 0;
1605				bzero(LLADDR(sdl), ifp->if_addrlen);
1606			}
1607
1608			arpc.rtm.rtm_rmx.rmx_expire =
1609			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
1610			arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
1611			if (lle->la_flags & LLE_STATIC)
1612				arpc.rtm.rtm_flags |= RTF_STATIC;
1613			arpc.rtm.rtm_index = ifp->if_index;
1614			error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
1615			if (error)
1616				break;
1617		}
1618	}
1619	return error;
1620#undef SIN
1621}
1622
1623void *
1624in_domifattach(struct ifnet *ifp)
1625{
1626	struct in_ifinfo *ii;
1627	struct lltable *llt;
1628
1629	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
1630
1631	llt = lltable_init(ifp, AF_INET);
1632	if (llt != NULL) {
1633		llt->llt_free = in_lltable_free;
1634		llt->llt_prefix_free = in_lltable_prefix_free;
1635		llt->llt_lookup = in_lltable_lookup;
1636		llt->llt_dump = in_lltable_dump;
1637	}
1638	ii->ii_llt = llt;
1639
1640	ii->ii_igmp = igmp_domifattach(ifp);
1641
1642	return ii;
1643}
1644
1645void
1646in_domifdetach(struct ifnet *ifp, void *aux)
1647{
1648	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
1649
1650	igmp_domifdetach(ifp);
1651	lltable_free(ii->ii_llt);
1652	free(ii, M_IFADDR);
1653}
1654