rtsock.c revision 263478
1/*-
2 * Copyright (c) 1988, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
30 * $FreeBSD: stable/10/sys/net/rtsock.c 263478 2014-03-21 15:15:30Z glebius $
31 */
32#include "opt_compat.h"
33#include "opt_sctp.h"
34#include "opt_mpath.h"
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/jail.h>
40#include <sys/kernel.h>
41#include <sys/domain.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/priv.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/rwlock.h>
49#include <sys/signalvar.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sysctl.h>
53#include <sys/systm.h>
54
55#include <net/if.h>
56#include <net/if_dl.h>
57#include <net/if_llatbl.h>
58#include <net/if_types.h>
59#include <net/netisr.h>
60#include <net/raw_cb.h>
61#include <net/route.h>
62#include <net/vnet.h>
63
64#include <netinet/in.h>
65#include <netinet/if_ether.h>
66#include <netinet/ip_carp.h>
67#ifdef INET6
68#include <netinet6/ip6_var.h>
69#include <netinet6/scope6_var.h>
70#endif
71
72#if defined(INET) || defined(INET6)
73#ifdef SCTP
74extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
75#endif /* SCTP */
76#endif
77
78#ifdef COMPAT_FREEBSD32
79#include <sys/mount.h>
80#include <compat/freebsd32/freebsd32.h>
81
82struct if_data32 {
83	uint8_t	ifi_type;
84	uint8_t	ifi_physical;
85	uint8_t	ifi_addrlen;
86	uint8_t	ifi_hdrlen;
87	uint8_t	ifi_link_state;
88	uint8_t	ifi_vhid;
89	uint8_t	ifi_baudrate_pf;
90	uint8_t	ifi_datalen;
91	uint32_t ifi_mtu;
92	uint32_t ifi_metric;
93	uint32_t ifi_baudrate;
94	uint32_t ifi_ipackets;
95	uint32_t ifi_ierrors;
96	uint32_t ifi_opackets;
97	uint32_t ifi_oerrors;
98	uint32_t ifi_collisions;
99	uint32_t ifi_ibytes;
100	uint32_t ifi_obytes;
101	uint32_t ifi_imcasts;
102	uint32_t ifi_omcasts;
103	uint32_t ifi_iqdrops;
104	uint32_t ifi_noproto;
105	uint32_t ifi_hwassist;
106	int32_t	ifi_epoch;
107	struct	timeval32 ifi_lastchange;
108};
109
110struct if_msghdr32 {
111	uint16_t ifm_msglen;
112	uint8_t	ifm_version;
113	uint8_t	ifm_type;
114	int32_t	ifm_addrs;
115	int32_t	ifm_flags;
116	uint16_t ifm_index;
117	struct	if_data32 ifm_data;
118};
119
120struct if_msghdrl32 {
121	uint16_t ifm_msglen;
122	uint8_t	ifm_version;
123	uint8_t	ifm_type;
124	int32_t	ifm_addrs;
125	int32_t	ifm_flags;
126	uint16_t ifm_index;
127	uint16_t _ifm_spare1;
128	uint16_t ifm_len;
129	uint16_t ifm_data_off;
130	struct	if_data32 ifm_data;
131};
132
133struct ifa_msghdrl32 {
134	uint16_t ifam_msglen;
135	uint8_t	ifam_version;
136	uint8_t	ifam_type;
137	int32_t	ifam_addrs;
138	int32_t	ifam_flags;
139	uint16_t ifam_index;
140	uint16_t _ifam_spare1;
141	uint16_t ifam_len;
142	uint16_t ifam_data_off;
143	int32_t	ifam_metric;
144	struct	if_data32 ifam_data;
145};
146#endif /* COMPAT_FREEBSD32 */
147
148MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
149
150/* NB: these are not modified */
151static struct	sockaddr route_src = { 2, PF_ROUTE, };
152static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
153
154/* These are external hooks for CARP. */
155int	(*carp_get_vhid_p)(struct ifaddr *);
156
157/*
158 * Used by rtsock/raw_input callback code to decide whether to filter the update
159 * notification to a socket bound to a particular FIB.
160 */
161#define	RTS_FILTER_FIB	M_PROTO8
162#define	RTS_ALLFIBS	-1
163
164static struct {
165	int	ip_count;	/* attached w/ AF_INET */
166	int	ip6_count;	/* attached w/ AF_INET6 */
167	int	ipx_count;	/* attached w/ AF_IPX */
168	int	any_count;	/* total attached */
169} route_cb;
170
171struct mtx rtsock_mtx;
172MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
173
174#define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
175#define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
176#define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
177
178static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
179
180struct walkarg {
181	int	w_tmemsize;
182	int	w_op, w_arg;
183	caddr_t	w_tmem;
184	struct sysctl_req *w_req;
185};
186
187static void	rts_input(struct mbuf *m);
188static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
189static int	rt_msg2(int type, struct rt_addrinfo *rtinfo,
190			caddr_t cp, struct walkarg *w);
191static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
192			struct rt_addrinfo *rtinfo);
193static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
194static int	sysctl_iflist(int af, struct walkarg *w);
195static int	sysctl_ifmalist(int af, struct walkarg *w);
196static int	route_output(struct mbuf *m, struct socket *so);
197static void	rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt);
198static void	rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out);
199static void	rt_dispatch(struct mbuf *, sa_family_t);
200
201static struct netisr_handler rtsock_nh = {
202	.nh_name = "rtsock",
203	.nh_handler = rts_input,
204	.nh_proto = NETISR_ROUTE,
205	.nh_policy = NETISR_POLICY_SOURCE,
206};
207
208static int
209sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
210{
211	int error, qlimit;
212
213	netisr_getqlimit(&rtsock_nh, &qlimit);
214	error = sysctl_handle_int(oidp, &qlimit, 0, req);
215        if (error || !req->newptr)
216                return (error);
217	if (qlimit < 1)
218		return (EINVAL);
219	return (netisr_setqlimit(&rtsock_nh, qlimit));
220}
221SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW,
222    0, 0, sysctl_route_netisr_maxqlen, "I",
223    "maximum routing socket dispatch queue length");
224
225static void
226rts_init(void)
227{
228	int tmp;
229
230	if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
231		rtsock_nh.nh_qlimit = tmp;
232	netisr_register(&rtsock_nh);
233}
234SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
235
236static int
237raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src,
238    struct rawcb *rp)
239{
240	int fibnum;
241
242	KASSERT(m != NULL, ("%s: m is NULL", __func__));
243	KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
244	KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
245
246	/* No filtering requested. */
247	if ((m->m_flags & RTS_FILTER_FIB) == 0)
248		return (0);
249
250	/* Check if it is a rts and the fib matches the one of the socket. */
251	fibnum = M_GETFIB(m);
252	if (proto->sp_family != PF_ROUTE ||
253	    rp->rcb_socket == NULL ||
254	    rp->rcb_socket->so_fibnum == fibnum)
255		return (0);
256
257	/* Filtering requested and no match, the socket shall be skipped. */
258	return (1);
259}
260
261static void
262rts_input(struct mbuf *m)
263{
264	struct sockproto route_proto;
265	unsigned short *family;
266	struct m_tag *tag;
267
268	route_proto.sp_family = PF_ROUTE;
269	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
270	if (tag != NULL) {
271		family = (unsigned short *)(tag + 1);
272		route_proto.sp_protocol = *family;
273		m_tag_delete(m, tag);
274	} else
275		route_proto.sp_protocol = 0;
276
277	raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb);
278}
279
280/*
281 * It really doesn't make any sense at all for this code to share much
282 * with raw_usrreq.c, since its functionality is so restricted.  XXX
283 */
284static void
285rts_abort(struct socket *so)
286{
287
288	raw_usrreqs.pru_abort(so);
289}
290
291static void
292rts_close(struct socket *so)
293{
294
295	raw_usrreqs.pru_close(so);
296}
297
298/* pru_accept is EOPNOTSUPP */
299
300static int
301rts_attach(struct socket *so, int proto, struct thread *td)
302{
303	struct rawcb *rp;
304	int error;
305
306	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
307
308	/* XXX */
309	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
310	if (rp == NULL)
311		return ENOBUFS;
312
313	so->so_pcb = (caddr_t)rp;
314	so->so_fibnum = td->td_proc->p_fibnum;
315	error = raw_attach(so, proto);
316	rp = sotorawcb(so);
317	if (error) {
318		so->so_pcb = NULL;
319		free(rp, M_PCB);
320		return error;
321	}
322	RTSOCK_LOCK();
323	switch(rp->rcb_proto.sp_protocol) {
324	case AF_INET:
325		route_cb.ip_count++;
326		break;
327	case AF_INET6:
328		route_cb.ip6_count++;
329		break;
330	case AF_IPX:
331		route_cb.ipx_count++;
332		break;
333	}
334	route_cb.any_count++;
335	RTSOCK_UNLOCK();
336	soisconnected(so);
337	so->so_options |= SO_USELOOPBACK;
338	return 0;
339}
340
341static int
342rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
343{
344
345	return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
346}
347
348static int
349rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
350{
351
352	return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
353}
354
355/* pru_connect2 is EOPNOTSUPP */
356/* pru_control is EOPNOTSUPP */
357
358static void
359rts_detach(struct socket *so)
360{
361	struct rawcb *rp = sotorawcb(so);
362
363	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
364
365	RTSOCK_LOCK();
366	switch(rp->rcb_proto.sp_protocol) {
367	case AF_INET:
368		route_cb.ip_count--;
369		break;
370	case AF_INET6:
371		route_cb.ip6_count--;
372		break;
373	case AF_IPX:
374		route_cb.ipx_count--;
375		break;
376	}
377	route_cb.any_count--;
378	RTSOCK_UNLOCK();
379	raw_usrreqs.pru_detach(so);
380}
381
382static int
383rts_disconnect(struct socket *so)
384{
385
386	return (raw_usrreqs.pru_disconnect(so));
387}
388
389/* pru_listen is EOPNOTSUPP */
390
391static int
392rts_peeraddr(struct socket *so, struct sockaddr **nam)
393{
394
395	return (raw_usrreqs.pru_peeraddr(so, nam));
396}
397
398/* pru_rcvd is EOPNOTSUPP */
399/* pru_rcvoob is EOPNOTSUPP */
400
401static int
402rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
403	 struct mbuf *control, struct thread *td)
404{
405
406	return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
407}
408
409/* pru_sense is null */
410
411static int
412rts_shutdown(struct socket *so)
413{
414
415	return (raw_usrreqs.pru_shutdown(so));
416}
417
418static int
419rts_sockaddr(struct socket *so, struct sockaddr **nam)
420{
421
422	return (raw_usrreqs.pru_sockaddr(so, nam));
423}
424
425static struct pr_usrreqs route_usrreqs = {
426	.pru_abort =		rts_abort,
427	.pru_attach =		rts_attach,
428	.pru_bind =		rts_bind,
429	.pru_connect =		rts_connect,
430	.pru_detach =		rts_detach,
431	.pru_disconnect =	rts_disconnect,
432	.pru_peeraddr =		rts_peeraddr,
433	.pru_send =		rts_send,
434	.pru_shutdown =		rts_shutdown,
435	.pru_sockaddr =		rts_sockaddr,
436	.pru_close =		rts_close,
437};
438
439#ifndef _SOCKADDR_UNION_DEFINED
440#define	_SOCKADDR_UNION_DEFINED
441/*
442 * The union of all possible address formats we handle.
443 */
444union sockaddr_union {
445	struct sockaddr		sa;
446	struct sockaddr_in	sin;
447	struct sockaddr_in6	sin6;
448};
449#endif /* _SOCKADDR_UNION_DEFINED */
450
451static int
452rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
453    struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
454{
455
456	/* First, see if the returned address is part of the jail. */
457	if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
458		info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
459		return (0);
460	}
461
462	switch (info->rti_info[RTAX_DST]->sa_family) {
463#ifdef INET
464	case AF_INET:
465	{
466		struct in_addr ia;
467		struct ifaddr *ifa;
468		int found;
469
470		found = 0;
471		/*
472		 * Try to find an address on the given outgoing interface
473		 * that belongs to the jail.
474		 */
475		IF_ADDR_RLOCK(ifp);
476		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
477			struct sockaddr *sa;
478			sa = ifa->ifa_addr;
479			if (sa->sa_family != AF_INET)
480				continue;
481			ia = ((struct sockaddr_in *)sa)->sin_addr;
482			if (prison_check_ip4(cred, &ia) == 0) {
483				found = 1;
484				break;
485			}
486		}
487		IF_ADDR_RUNLOCK(ifp);
488		if (!found) {
489			/*
490			 * As a last resort return the 'default' jail address.
491			 */
492			ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
493			    sin_addr;
494			if (prison_get_ip4(cred, &ia) != 0)
495				return (ESRCH);
496		}
497		bzero(&saun->sin, sizeof(struct sockaddr_in));
498		saun->sin.sin_len = sizeof(struct sockaddr_in);
499		saun->sin.sin_family = AF_INET;
500		saun->sin.sin_addr.s_addr = ia.s_addr;
501		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
502		break;
503	}
504#endif
505#ifdef INET6
506	case AF_INET6:
507	{
508		struct in6_addr ia6;
509		struct ifaddr *ifa;
510		int found;
511
512		found = 0;
513		/*
514		 * Try to find an address on the given outgoing interface
515		 * that belongs to the jail.
516		 */
517		IF_ADDR_RLOCK(ifp);
518		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
519			struct sockaddr *sa;
520			sa = ifa->ifa_addr;
521			if (sa->sa_family != AF_INET6)
522				continue;
523			bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
524			    &ia6, sizeof(struct in6_addr));
525			if (prison_check_ip6(cred, &ia6) == 0) {
526				found = 1;
527				break;
528			}
529		}
530		IF_ADDR_RUNLOCK(ifp);
531		if (!found) {
532			/*
533			 * As a last resort return the 'default' jail address.
534			 */
535			ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
536			    sin6_addr;
537			if (prison_get_ip6(cred, &ia6) != 0)
538				return (ESRCH);
539		}
540		bzero(&saun->sin6, sizeof(struct sockaddr_in6));
541		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
542		saun->sin6.sin6_family = AF_INET6;
543		bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
544		if (sa6_recoverscope(&saun->sin6) != 0)
545			return (ESRCH);
546		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
547		break;
548	}
549#endif
550	default:
551		return (ESRCH);
552	}
553	return (0);
554}
555
556/*ARGSUSED*/
557static int
558route_output(struct mbuf *m, struct socket *so)
559{
560#define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
561	struct rt_msghdr *rtm = NULL;
562	struct rtentry *rt = NULL;
563	struct radix_node_head *rnh;
564	struct rt_addrinfo info;
565#ifdef INET6
566	struct sockaddr_storage ss;
567	struct sockaddr_in6 *sin6;
568	int i, rti_need_deembed = 0;
569#endif
570	int len, error = 0;
571	struct ifnet *ifp = NULL;
572	union sockaddr_union saun;
573	sa_family_t saf = AF_UNSPEC;
574
575#define senderr(e) { error = e; goto flush;}
576	if (m == NULL || ((m->m_len < sizeof(long)) &&
577		       (m = m_pullup(m, sizeof(long))) == NULL))
578		return (ENOBUFS);
579	if ((m->m_flags & M_PKTHDR) == 0)
580		panic("route_output");
581	len = m->m_pkthdr.len;
582	if (len < sizeof(*rtm) ||
583	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
584		info.rti_info[RTAX_DST] = NULL;
585		senderr(EINVAL);
586	}
587	R_Malloc(rtm, struct rt_msghdr *, len);
588	if (rtm == NULL) {
589		info.rti_info[RTAX_DST] = NULL;
590		senderr(ENOBUFS);
591	}
592	m_copydata(m, 0, len, (caddr_t)rtm);
593	if (rtm->rtm_version != RTM_VERSION) {
594		info.rti_info[RTAX_DST] = NULL;
595		senderr(EPROTONOSUPPORT);
596	}
597	rtm->rtm_pid = curproc->p_pid;
598	bzero(&info, sizeof(info));
599	info.rti_addrs = rtm->rtm_addrs;
600	/*
601	 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
602	 * link-local address because rtrequest requires addresses with
603	 * embedded scope id.
604	 */
605	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
606		info.rti_info[RTAX_DST] = NULL;
607		senderr(EINVAL);
608	}
609	info.rti_flags = rtm->rtm_flags;
610	if (info.rti_info[RTAX_DST] == NULL ||
611	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
612	    (info.rti_info[RTAX_GATEWAY] != NULL &&
613	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
614		senderr(EINVAL);
615	saf = info.rti_info[RTAX_DST]->sa_family;
616	/*
617	 * Verify that the caller has the appropriate privilege; RTM_GET
618	 * is the only operation the non-superuser is allowed.
619	 */
620	if (rtm->rtm_type != RTM_GET) {
621		error = priv_check(curthread, PRIV_NET_ROUTE);
622		if (error)
623			senderr(error);
624	}
625
626	/*
627	 * The given gateway address may be an interface address.
628	 * For example, issuing a "route change" command on a route
629	 * entry that was created from a tunnel, and the gateway
630	 * address given is the local end point. In this case the
631	 * RTF_GATEWAY flag must be cleared or the destination will
632	 * not be reachable even though there is no error message.
633	 */
634	if (info.rti_info[RTAX_GATEWAY] != NULL &&
635	    info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
636		struct route gw_ro;
637
638		bzero(&gw_ro, sizeof(gw_ro));
639		gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY];
640		rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum);
641		/*
642		 * A host route through the loopback interface is
643		 * installed for each interface adddress. In pre 8.0
644		 * releases the interface address of a PPP link type
645		 * is not reachable locally. This behavior is fixed as
646		 * part of the new L2/L3 redesign and rewrite work. The
647		 * signature of this interface address route is the
648		 * AF_LINK sa_family type of the rt_gateway, and the
649		 * rt_ifp has the IFF_LOOPBACK flag set.
650		 */
651		if (gw_ro.ro_rt != NULL &&
652		    gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK &&
653		    gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) {
654			info.rti_flags &= ~RTF_GATEWAY;
655			info.rti_flags |= RTF_GWFLAG_COMPAT;
656		}
657		if (gw_ro.ro_rt != NULL)
658			RTFREE(gw_ro.ro_rt);
659	}
660
661	switch (rtm->rtm_type) {
662		struct rtentry *saved_nrt;
663
664	case RTM_ADD:
665		if (info.rti_info[RTAX_GATEWAY] == NULL)
666			senderr(EINVAL);
667		saved_nrt = NULL;
668
669		/* support for new ARP code */
670		if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
671		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
672			error = lla_rt_output(rtm, &info);
673#ifdef INET6
674			if (error == 0)
675				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
676#endif
677			break;
678		}
679		error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
680		    so->so_fibnum);
681		if (error == 0 && saved_nrt) {
682#ifdef INET6
683			rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
684#endif
685			RT_LOCK(saved_nrt);
686			rt_setmetrics(rtm, saved_nrt);
687			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
688			RT_REMREF(saved_nrt);
689			RT_UNLOCK(saved_nrt);
690		}
691		break;
692
693	case RTM_DELETE:
694		saved_nrt = NULL;
695		/* support for new ARP code */
696		if (info.rti_info[RTAX_GATEWAY] &&
697		    (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
698		    (rtm->rtm_flags & RTF_LLDATA) != 0) {
699			error = lla_rt_output(rtm, &info);
700#ifdef INET6
701			if (error == 0)
702				rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
703#endif
704			break;
705		}
706		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
707		    so->so_fibnum);
708		if (error == 0) {
709			RT_LOCK(saved_nrt);
710			rt = saved_nrt;
711			goto report;
712		}
713#ifdef INET6
714		/* rt_msg2() will not be used when RTM_DELETE fails. */
715		rti_need_deembed = (V_deembed_scopeid) ? 1 : 0;
716#endif
717		break;
718
719	case RTM_GET:
720	case RTM_CHANGE:
721	case RTM_LOCK:
722		rnh = rt_tables_get_rnh(so->so_fibnum,
723		    info.rti_info[RTAX_DST]->sa_family);
724		if (rnh == NULL)
725			senderr(EAFNOSUPPORT);
726		RADIX_NODE_HEAD_RLOCK(rnh);
727		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
728			info.rti_info[RTAX_NETMASK], rnh);
729		if (rt == NULL) {	/* XXX looks bogus */
730			RADIX_NODE_HEAD_RUNLOCK(rnh);
731			senderr(ESRCH);
732		}
733#ifdef RADIX_MPATH
734		/*
735		 * for RTM_CHANGE/LOCK, if we got multipath routes,
736		 * we require users to specify a matching RTAX_GATEWAY.
737		 *
738		 * for RTM_GET, gate is optional even with multipath.
739		 * if gate == NULL the first match is returned.
740		 * (no need to call rt_mpath_matchgate if gate == NULL)
741		 */
742		if (rn_mpath_capable(rnh) &&
743		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
744			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
745			if (!rt) {
746				RADIX_NODE_HEAD_RUNLOCK(rnh);
747				senderr(ESRCH);
748			}
749		}
750#endif
751		/*
752		 * If performing proxied L2 entry insertion, and
753		 * the actual PPP host entry is found, perform
754		 * another search to retrieve the prefix route of
755		 * the local end point of the PPP link.
756		 */
757		if (rtm->rtm_flags & RTF_ANNOUNCE) {
758			struct sockaddr laddr;
759
760			if (rt->rt_ifp != NULL &&
761			    rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
762				struct ifaddr *ifa;
763
764				ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1);
765				if (ifa != NULL)
766					rt_maskedcopy(ifa->ifa_addr,
767						      &laddr,
768						      ifa->ifa_netmask);
769			} else
770				rt_maskedcopy(rt->rt_ifa->ifa_addr,
771					      &laddr,
772					      rt->rt_ifa->ifa_netmask);
773			/*
774			 * refactor rt and no lock operation necessary
775			 */
776			rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh);
777			if (rt == NULL) {
778				RADIX_NODE_HEAD_RUNLOCK(rnh);
779				senderr(ESRCH);
780			}
781		}
782		RT_LOCK(rt);
783		RT_ADDREF(rt);
784		RADIX_NODE_HEAD_RUNLOCK(rnh);
785
786		/*
787		 * Fix for PR: 82974
788		 *
789		 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
790		 * returns a perfect match in case a netmask is
791		 * specified.  For host routes only a longest prefix
792		 * match is returned so it is necessary to compare the
793		 * existence of the netmask.  If both have a netmask
794		 * rnh_lookup() did a perfect match and if none of them
795		 * have a netmask both are host routes which is also a
796		 * perfect match.
797		 */
798
799		if (rtm->rtm_type != RTM_GET &&
800		    (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
801			RT_UNLOCK(rt);
802			senderr(ESRCH);
803		}
804
805		switch(rtm->rtm_type) {
806
807		case RTM_GET:
808		report:
809			RT_LOCK_ASSERT(rt);
810			if ((rt->rt_flags & RTF_HOST) == 0
811			    ? jailed_without_vnet(curthread->td_ucred)
812			    : prison_if(curthread->td_ucred,
813			    rt_key(rt)) != 0) {
814				RT_UNLOCK(rt);
815				senderr(ESRCH);
816			}
817			info.rti_info[RTAX_DST] = rt_key(rt);
818			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
819			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
820			info.rti_info[RTAX_GENMASK] = 0;
821			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
822				ifp = rt->rt_ifp;
823				if (ifp) {
824					info.rti_info[RTAX_IFP] =
825					    ifp->if_addr->ifa_addr;
826					error = rtm_get_jailed(&info, ifp, rt,
827					    &saun, curthread->td_ucred);
828					if (error != 0) {
829						RT_UNLOCK(rt);
830						senderr(error);
831					}
832					if (ifp->if_flags & IFF_POINTOPOINT)
833						info.rti_info[RTAX_BRD] =
834						    rt->rt_ifa->ifa_dstaddr;
835					rtm->rtm_index = ifp->if_index;
836				} else {
837					info.rti_info[RTAX_IFP] = NULL;
838					info.rti_info[RTAX_IFA] = NULL;
839				}
840			} else if ((ifp = rt->rt_ifp) != NULL) {
841				rtm->rtm_index = ifp->if_index;
842			}
843			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
844			if (len > rtm->rtm_msglen) {
845				struct rt_msghdr *new_rtm;
846				R_Malloc(new_rtm, struct rt_msghdr *, len);
847				if (new_rtm == NULL) {
848					RT_UNLOCK(rt);
849					senderr(ENOBUFS);
850				}
851				bcopy(rtm, new_rtm, rtm->rtm_msglen);
852				Free(rtm); rtm = new_rtm;
853			}
854			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
855			if (rt->rt_flags & RTF_GWFLAG_COMPAT)
856				rtm->rtm_flags = RTF_GATEWAY |
857					(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
858			else
859				rtm->rtm_flags = rt->rt_flags;
860			rt_getmetrics(rt, &rtm->rtm_rmx);
861			rtm->rtm_addrs = info.rti_addrs;
862			break;
863
864		case RTM_CHANGE:
865			/*
866			 * New gateway could require new ifaddr, ifp;
867			 * flags may also be different; ifp may be specified
868			 * by ll sockaddr when protocol address is ambiguous
869			 */
870			if (((rt->rt_flags & RTF_GATEWAY) &&
871			     info.rti_info[RTAX_GATEWAY] != NULL) ||
872			    info.rti_info[RTAX_IFP] != NULL ||
873			    (info.rti_info[RTAX_IFA] != NULL &&
874			     !sa_equal(info.rti_info[RTAX_IFA],
875				       rt->rt_ifa->ifa_addr))) {
876				RT_UNLOCK(rt);
877				RADIX_NODE_HEAD_LOCK(rnh);
878				error = rt_getifa_fib(&info, rt->rt_fibnum);
879				/*
880				 * XXXRW: Really we should release this
881				 * reference later, but this maintains
882				 * historical behavior.
883				 */
884				if (info.rti_ifa != NULL)
885					ifa_free(info.rti_ifa);
886				RADIX_NODE_HEAD_UNLOCK(rnh);
887				if (error != 0)
888					senderr(error);
889				RT_LOCK(rt);
890			}
891			if (info.rti_ifa != NULL &&
892			    info.rti_ifa != rt->rt_ifa &&
893			    rt->rt_ifa != NULL &&
894			    rt->rt_ifa->ifa_rtrequest != NULL) {
895				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
896				    &info);
897				ifa_free(rt->rt_ifa);
898			}
899			if (info.rti_info[RTAX_GATEWAY] != NULL) {
900				RT_UNLOCK(rt);
901				RADIX_NODE_HEAD_LOCK(rnh);
902				RT_LOCK(rt);
903
904				error = rt_setgate(rt, rt_key(rt),
905				    info.rti_info[RTAX_GATEWAY]);
906				RADIX_NODE_HEAD_UNLOCK(rnh);
907				if (error != 0) {
908					RT_UNLOCK(rt);
909					senderr(error);
910				}
911				rt->rt_flags &= ~RTF_GATEWAY;
912				rt->rt_flags |= (RTF_GATEWAY & info.rti_flags);
913			}
914			if (info.rti_ifa != NULL &&
915			    info.rti_ifa != rt->rt_ifa) {
916				ifa_ref(info.rti_ifa);
917				rt->rt_ifa = info.rti_ifa;
918				rt->rt_ifp = info.rti_ifp;
919			}
920			/* Allow some flags to be toggled on change. */
921			rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) |
922				    (rtm->rtm_flags & RTF_FMASK);
923			rt_setmetrics(rtm, rt);
924			rtm->rtm_index = rt->rt_ifp->if_index;
925			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
926			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
927			/* FALLTHROUGH */
928		case RTM_LOCK:
929			/* We don't support locks anymore */
930			break;
931		}
932		RT_UNLOCK(rt);
933		break;
934
935	default:
936		senderr(EOPNOTSUPP);
937	}
938
939flush:
940	if (rtm) {
941		if (error)
942			rtm->rtm_errno = error;
943		else
944			rtm->rtm_flags |= RTF_DONE;
945	}
946	if (rt)		/* XXX can this be true? */
947		RTFREE(rt);
948    {
949	struct rawcb *rp = NULL;
950	/*
951	 * Check to see if we don't want our own messages.
952	 */
953	if ((so->so_options & SO_USELOOPBACK) == 0) {
954		if (route_cb.any_count <= 1) {
955			if (rtm)
956				Free(rtm);
957			m_freem(m);
958			return (error);
959		}
960		/* There is another listener, so construct message */
961		rp = sotorawcb(so);
962	}
963	if (rtm) {
964#ifdef INET6
965		if (rti_need_deembed) {
966			/* sin6_scope_id is recovered before sending rtm. */
967			sin6 = (struct sockaddr_in6 *)&ss;
968			for (i = 0; i < RTAX_MAX; i++) {
969				if (info.rti_info[i] == NULL)
970					continue;
971				if (info.rti_info[i]->sa_family != AF_INET6)
972					continue;
973				bcopy(info.rti_info[i], sin6, sizeof(*sin6));
974				if (sa6_recoverscope(sin6) == 0)
975					bcopy(sin6, info.rti_info[i],
976						    sizeof(*sin6));
977			}
978		}
979#endif
980		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
981		if (m->m_pkthdr.len < rtm->rtm_msglen) {
982			m_freem(m);
983			m = NULL;
984		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
985			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
986	}
987	if (m) {
988		M_SETFIB(m, so->so_fibnum);
989		m->m_flags |= RTS_FILTER_FIB;
990		if (rp) {
991			/*
992			 * XXX insure we don't get a copy by
993			 * invalidating our protocol
994			 */
995			unsigned short family = rp->rcb_proto.sp_family;
996			rp->rcb_proto.sp_family = 0;
997			rt_dispatch(m, saf);
998			rp->rcb_proto.sp_family = family;
999		} else
1000			rt_dispatch(m, saf);
1001	}
1002	/* info.rti_info[RTAX_DST] (used above) can point inside of rtm */
1003	if (rtm)
1004		Free(rtm);
1005    }
1006	return (error);
1007#undef	sa_equal
1008}
1009
1010static void
1011rt_setmetrics(const struct rt_msghdr *rtm, struct rtentry *rt)
1012{
1013
1014	if (rtm->rtm_inits & RTV_MTU)
1015		rt->rt_mtu = rtm->rtm_rmx.rmx_mtu;
1016	if (rtm->rtm_inits & RTV_WEIGHT)
1017		rt->rt_weight = rtm->rtm_rmx.rmx_weight;
1018	/* Kernel -> userland timebase conversion. */
1019	if (rtm->rtm_inits & RTV_EXPIRE)
1020		rt->rt_expire = rtm->rtm_rmx.rmx_expire ?
1021		    rtm->rtm_rmx.rmx_expire - time_second + time_uptime : 0;
1022}
1023
1024static void
1025rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
1026{
1027
1028	bzero(out, sizeof(*out));
1029	out->rmx_mtu = rt->rt_mtu;
1030	out->rmx_weight = rt->rt_weight;
1031	out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
1032	/* Kernel -> userland timebase conversion. */
1033	out->rmx_expire = rt->rt_expire ?
1034	    rt->rt_expire - time_uptime + time_second : 0;
1035}
1036
1037/*
1038 * Extract the addresses of the passed sockaddrs.
1039 * Do a little sanity checking so as to avoid bad memory references.
1040 * This data is derived straight from userland.
1041 */
1042static int
1043rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1044{
1045	struct sockaddr *sa;
1046	int i;
1047
1048	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
1049		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1050			continue;
1051		sa = (struct sockaddr *)cp;
1052		/*
1053		 * It won't fit.
1054		 */
1055		if (cp + sa->sa_len > cplim)
1056			return (EINVAL);
1057		/*
1058		 * there are no more.. quit now
1059		 * If there are more bits, they are in error.
1060		 * I've seen this. route(1) can evidently generate these.
1061		 * This causes kernel to core dump.
1062		 * for compatibility, If we see this, point to a safe address.
1063		 */
1064		if (sa->sa_len == 0) {
1065			rtinfo->rti_info[i] = &sa_zero;
1066			return (0); /* should be EINVAL but for compat */
1067		}
1068		/* accept it */
1069#ifdef INET6
1070		if (sa->sa_family == AF_INET6)
1071			sa6_embedscope((struct sockaddr_in6 *)sa,
1072			    V_ip6_use_defzone);
1073#endif
1074		rtinfo->rti_info[i] = sa;
1075		cp += SA_SIZE(sa);
1076	}
1077	return (0);
1078}
1079
1080/*
1081 * Used by the routing socket.
1082 */
1083static struct mbuf *
1084rt_msg1(int type, struct rt_addrinfo *rtinfo)
1085{
1086	struct rt_msghdr *rtm;
1087	struct mbuf *m;
1088	int i;
1089	struct sockaddr *sa;
1090#ifdef INET6
1091	struct sockaddr_storage ss;
1092	struct sockaddr_in6 *sin6;
1093#endif
1094	int len, dlen;
1095
1096	switch (type) {
1097
1098	case RTM_DELADDR:
1099	case RTM_NEWADDR:
1100		len = sizeof(struct ifa_msghdr);
1101		break;
1102
1103	case RTM_DELMADDR:
1104	case RTM_NEWMADDR:
1105		len = sizeof(struct ifma_msghdr);
1106		break;
1107
1108	case RTM_IFINFO:
1109		len = sizeof(struct if_msghdr);
1110		break;
1111
1112	case RTM_IFANNOUNCE:
1113	case RTM_IEEE80211:
1114		len = sizeof(struct if_announcemsghdr);
1115		break;
1116
1117	default:
1118		len = sizeof(struct rt_msghdr);
1119	}
1120
1121	/* XXXGL: can we use MJUMPAGESIZE cluster here? */
1122	KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
1123	if (len > MHLEN)
1124		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1125	else
1126		m = m_gethdr(M_NOWAIT, MT_DATA);
1127	if (m == NULL)
1128		return (m);
1129
1130	m->m_pkthdr.len = m->m_len = len;
1131	rtm = mtod(m, struct rt_msghdr *);
1132	bzero((caddr_t)rtm, len);
1133	for (i = 0; i < RTAX_MAX; i++) {
1134		if ((sa = rtinfo->rti_info[i]) == NULL)
1135			continue;
1136		rtinfo->rti_addrs |= (1 << i);
1137		dlen = SA_SIZE(sa);
1138#ifdef INET6
1139		if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
1140			sin6 = (struct sockaddr_in6 *)&ss;
1141			bcopy(sa, sin6, sizeof(*sin6));
1142			if (sa6_recoverscope(sin6) == 0)
1143				sa = (struct sockaddr *)sin6;
1144		}
1145#endif
1146		m_copyback(m, len, dlen, (caddr_t)sa);
1147		len += dlen;
1148	}
1149	if (m->m_pkthdr.len != len) {
1150		m_freem(m);
1151		return (NULL);
1152	}
1153	rtm->rtm_msglen = len;
1154	rtm->rtm_version = RTM_VERSION;
1155	rtm->rtm_type = type;
1156	return (m);
1157}
1158
1159/*
1160 * Used by the sysctl code and routing socket.
1161 */
1162static int
1163rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
1164{
1165	int i;
1166	int len, dlen, second_time = 0;
1167	caddr_t cp0;
1168#ifdef INET6
1169	struct sockaddr_storage ss;
1170	struct sockaddr_in6 *sin6;
1171#endif
1172
1173	rtinfo->rti_addrs = 0;
1174again:
1175	switch (type) {
1176
1177	case RTM_DELADDR:
1178	case RTM_NEWADDR:
1179		if (w != NULL && w->w_op == NET_RT_IFLISTL) {
1180#ifdef COMPAT_FREEBSD32
1181			if (w->w_req->flags & SCTL_MASK32)
1182				len = sizeof(struct ifa_msghdrl32);
1183			else
1184#endif
1185				len = sizeof(struct ifa_msghdrl);
1186		} else
1187			len = sizeof(struct ifa_msghdr);
1188		break;
1189
1190	case RTM_IFINFO:
1191#ifdef COMPAT_FREEBSD32
1192		if (w != NULL && w->w_req->flags & SCTL_MASK32) {
1193			if (w->w_op == NET_RT_IFLISTL)
1194				len = sizeof(struct if_msghdrl32);
1195			else
1196				len = sizeof(struct if_msghdr32);
1197			break;
1198		}
1199#endif
1200		if (w != NULL && w->w_op == NET_RT_IFLISTL)
1201			len = sizeof(struct if_msghdrl);
1202		else
1203			len = sizeof(struct if_msghdr);
1204		break;
1205
1206	case RTM_NEWMADDR:
1207		len = sizeof(struct ifma_msghdr);
1208		break;
1209
1210	default:
1211		len = sizeof(struct rt_msghdr);
1212	}
1213	cp0 = cp;
1214	if (cp0)
1215		cp += len;
1216	for (i = 0; i < RTAX_MAX; i++) {
1217		struct sockaddr *sa;
1218
1219		if ((sa = rtinfo->rti_info[i]) == NULL)
1220			continue;
1221		rtinfo->rti_addrs |= (1 << i);
1222		dlen = SA_SIZE(sa);
1223		if (cp) {
1224#ifdef INET6
1225			if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
1226				sin6 = (struct sockaddr_in6 *)&ss;
1227				bcopy(sa, sin6, sizeof(*sin6));
1228				if (sa6_recoverscope(sin6) == 0)
1229					sa = (struct sockaddr *)sin6;
1230			}
1231#endif
1232			bcopy((caddr_t)sa, cp, (unsigned)dlen);
1233			cp += dlen;
1234		}
1235		len += dlen;
1236	}
1237	len = ALIGN(len);
1238	if (cp == NULL && w != NULL && !second_time) {
1239		struct walkarg *rw = w;
1240
1241		if (rw->w_req) {
1242			if (rw->w_tmemsize < len) {
1243				if (rw->w_tmem)
1244					free(rw->w_tmem, M_RTABLE);
1245				rw->w_tmem = (caddr_t)
1246					malloc(len, M_RTABLE, M_NOWAIT);
1247				if (rw->w_tmem)
1248					rw->w_tmemsize = len;
1249			}
1250			if (rw->w_tmem) {
1251				cp = rw->w_tmem;
1252				second_time = 1;
1253				goto again;
1254			}
1255		}
1256	}
1257	if (cp) {
1258		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1259
1260		rtm->rtm_version = RTM_VERSION;
1261		rtm->rtm_type = type;
1262		rtm->rtm_msglen = len;
1263	}
1264	return (len);
1265}
1266
1267/*
1268 * This routine is called to generate a message from the routing
1269 * socket indicating that a redirect has occured, a routing lookup
1270 * has failed, or that a protocol has detected timeouts to a particular
1271 * destination.
1272 */
1273void
1274rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
1275    int fibnum)
1276{
1277	struct rt_msghdr *rtm;
1278	struct mbuf *m;
1279	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1280
1281	if (route_cb.any_count == 0)
1282		return;
1283	m = rt_msg1(type, rtinfo);
1284	if (m == NULL)
1285		return;
1286
1287	if (fibnum != RTS_ALLFIBS) {
1288		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
1289		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
1290		M_SETFIB(m, fibnum);
1291		m->m_flags |= RTS_FILTER_FIB;
1292	}
1293
1294	rtm = mtod(m, struct rt_msghdr *);
1295	rtm->rtm_flags = RTF_DONE | flags;
1296	rtm->rtm_errno = error;
1297	rtm->rtm_addrs = rtinfo->rti_addrs;
1298	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1299}
1300
1301void
1302rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1303{
1304
1305	rt_missmsg_fib(type, rtinfo, flags, error, RTS_ALLFIBS);
1306}
1307
1308/*
1309 * This routine is called to generate a message from the routing
1310 * socket indicating that the status of a network interface has changed.
1311 */
1312void
1313rt_ifmsg(struct ifnet *ifp)
1314{
1315	struct if_msghdr *ifm;
1316	struct mbuf *m;
1317	struct rt_addrinfo info;
1318
1319	if (route_cb.any_count == 0)
1320		return;
1321	bzero((caddr_t)&info, sizeof(info));
1322	m = rt_msg1(RTM_IFINFO, &info);
1323	if (m == NULL)
1324		return;
1325	ifm = mtod(m, struct if_msghdr *);
1326	ifm->ifm_index = ifp->if_index;
1327	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1328	ifm->ifm_data = ifp->if_data;
1329	ifm->ifm_addrs = 0;
1330	rt_dispatch(m, AF_UNSPEC);
1331}
1332
1333/*
1334 * This is called to generate messages from the routing socket
1335 * indicating a network interface has had addresses associated with it.
1336 * if we ever reverse the logic and replace messages TO the routing
1337 * socket indicate a request to configure interfaces, then it will
1338 * be unnecessary as the routing socket will automatically generate
1339 * copies of it.
1340 */
1341void
1342rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
1343    int fibnum)
1344{
1345	struct rt_addrinfo info;
1346	struct sockaddr *sa = NULL;
1347	int pass;
1348	struct mbuf *m = NULL;
1349	struct ifnet *ifp = ifa->ifa_ifp;
1350
1351	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1352		("unexpected cmd %u", cmd));
1353#if defined(INET) || defined(INET6)
1354#ifdef SCTP
1355	/*
1356	 * notify the SCTP stack
1357	 * this will only get called when an address is added/deleted
1358	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
1359	 */
1360	sctp_addr_change(ifa, cmd);
1361#endif /* SCTP */
1362#endif
1363	if (route_cb.any_count == 0)
1364		return;
1365	for (pass = 1; pass < 3; pass++) {
1366		bzero((caddr_t)&info, sizeof(info));
1367		if ((cmd == RTM_ADD && pass == 1) ||
1368		    (cmd == RTM_DELETE && pass == 2)) {
1369			struct ifa_msghdr *ifam;
1370			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1371
1372			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1373			info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1374			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1375			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1376			if ((m = rt_msg1(ncmd, &info)) == NULL)
1377				continue;
1378			ifam = mtod(m, struct ifa_msghdr *);
1379			ifam->ifam_index = ifp->if_index;
1380			ifam->ifam_metric = ifa->ifa_metric;
1381			ifam->ifam_flags = ifa->ifa_flags;
1382			ifam->ifam_addrs = info.rti_addrs;
1383		}
1384		if ((cmd == RTM_ADD && pass == 2) ||
1385		    (cmd == RTM_DELETE && pass == 1)) {
1386			struct rt_msghdr *rtm;
1387
1388			if (rt == NULL)
1389				continue;
1390			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1391			info.rti_info[RTAX_DST] = sa = rt_key(rt);
1392			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1393			if ((m = rt_msg1(cmd, &info)) == NULL)
1394				continue;
1395			rtm = mtod(m, struct rt_msghdr *);
1396			rtm->rtm_index = ifp->if_index;
1397			rtm->rtm_flags |= rt->rt_flags;
1398			rtm->rtm_errno = error;
1399			rtm->rtm_addrs = info.rti_addrs;
1400		}
1401		if (fibnum != RTS_ALLFIBS) {
1402			KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: "
1403			    "fibnum out of range 0 <= %d < %d", __func__,
1404			     fibnum, rt_numfibs));
1405			M_SETFIB(m, fibnum);
1406			m->m_flags |= RTS_FILTER_FIB;
1407		}
1408		rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1409	}
1410}
1411
1412void
1413rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1414{
1415
1416	rt_newaddrmsg_fib(cmd, ifa, error, rt, RTS_ALLFIBS);
1417}
1418
1419/*
1420 * This is the analogue to the rt_newaddrmsg which performs the same
1421 * function but for multicast group memberhips.  This is easier since
1422 * there is no route state to worry about.
1423 */
1424void
1425rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1426{
1427	struct rt_addrinfo info;
1428	struct mbuf *m = NULL;
1429	struct ifnet *ifp = ifma->ifma_ifp;
1430	struct ifma_msghdr *ifmam;
1431
1432	if (route_cb.any_count == 0)
1433		return;
1434
1435	bzero((caddr_t)&info, sizeof(info));
1436	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1437	info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
1438	/*
1439	 * If a link-layer address is present, present it as a ``gateway''
1440	 * (similarly to how ARP entries, e.g., are presented).
1441	 */
1442	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1443	m = rt_msg1(cmd, &info);
1444	if (m == NULL)
1445		return;
1446	ifmam = mtod(m, struct ifma_msghdr *);
1447	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
1448	    __func__));
1449	ifmam->ifmam_index = ifp->if_index;
1450	ifmam->ifmam_addrs = info.rti_addrs;
1451	rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
1452}
1453
1454static struct mbuf *
1455rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1456	struct rt_addrinfo *info)
1457{
1458	struct if_announcemsghdr *ifan;
1459	struct mbuf *m;
1460
1461	if (route_cb.any_count == 0)
1462		return NULL;
1463	bzero((caddr_t)info, sizeof(*info));
1464	m = rt_msg1(type, info);
1465	if (m != NULL) {
1466		ifan = mtod(m, struct if_announcemsghdr *);
1467		ifan->ifan_index = ifp->if_index;
1468		strlcpy(ifan->ifan_name, ifp->if_xname,
1469			sizeof(ifan->ifan_name));
1470		ifan->ifan_what = what;
1471	}
1472	return m;
1473}
1474
1475/*
1476 * This is called to generate routing socket messages indicating
1477 * IEEE80211 wireless events.
1478 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1479 */
1480void
1481rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1482{
1483	struct mbuf *m;
1484	struct rt_addrinfo info;
1485
1486	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1487	if (m != NULL) {
1488		/*
1489		 * Append the ieee80211 data.  Try to stick it in the
1490		 * mbuf containing the ifannounce msg; otherwise allocate
1491		 * a new mbuf and append.
1492		 *
1493		 * NB: we assume m is a single mbuf.
1494		 */
1495		if (data_len > M_TRAILINGSPACE(m)) {
1496			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1497			if (n == NULL) {
1498				m_freem(m);
1499				return;
1500			}
1501			bcopy(data, mtod(n, void *), data_len);
1502			n->m_len = data_len;
1503			m->m_next = n;
1504		} else if (data_len > 0) {
1505			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1506			m->m_len += data_len;
1507		}
1508		if (m->m_flags & M_PKTHDR)
1509			m->m_pkthdr.len += data_len;
1510		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1511		rt_dispatch(m, AF_UNSPEC);
1512	}
1513}
1514
1515/*
1516 * This is called to generate routing socket messages indicating
1517 * network interface arrival and departure.
1518 */
1519void
1520rt_ifannouncemsg(struct ifnet *ifp, int what)
1521{
1522	struct mbuf *m;
1523	struct rt_addrinfo info;
1524
1525	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1526	if (m != NULL)
1527		rt_dispatch(m, AF_UNSPEC);
1528}
1529
1530static void
1531rt_dispatch(struct mbuf *m, sa_family_t saf)
1532{
1533	struct m_tag *tag;
1534
1535	/*
1536	 * Preserve the family from the sockaddr, if any, in an m_tag for
1537	 * use when injecting the mbuf into the routing socket buffer from
1538	 * the netisr.
1539	 */
1540	if (saf != AF_UNSPEC) {
1541		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1542		    M_NOWAIT);
1543		if (tag == NULL) {
1544			m_freem(m);
1545			return;
1546		}
1547		*(unsigned short *)(tag + 1) = saf;
1548		m_tag_prepend(m, tag);
1549	}
1550#ifdef VIMAGE
1551	if (V_loif)
1552		m->m_pkthdr.rcvif = V_loif;
1553	else {
1554		m_freem(m);
1555		return;
1556	}
1557#endif
1558	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
1559}
1560
1561/*
1562 * This is used in dumping the kernel table via sysctl().
1563 */
1564static int
1565sysctl_dumpentry(struct radix_node *rn, void *vw)
1566{
1567	struct walkarg *w = vw;
1568	struct rtentry *rt = (struct rtentry *)rn;
1569	int error = 0, size;
1570	struct rt_addrinfo info;
1571
1572	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1573		return 0;
1574	if ((rt->rt_flags & RTF_HOST) == 0
1575	    ? jailed_without_vnet(w->w_req->td->td_ucred)
1576	    : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0)
1577		return (0);
1578	bzero((caddr_t)&info, sizeof(info));
1579	info.rti_info[RTAX_DST] = rt_key(rt);
1580	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1581	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1582	info.rti_info[RTAX_GENMASK] = 0;
1583	if (rt->rt_ifp) {
1584		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
1585		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1586		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1587			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1588	}
1589	size = rt_msg2(RTM_GET, &info, NULL, w);
1590	if (w->w_req && w->w_tmem) {
1591		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1592
1593		if (rt->rt_flags & RTF_GWFLAG_COMPAT)
1594			rtm->rtm_flags = RTF_GATEWAY |
1595				(rt->rt_flags & ~RTF_GWFLAG_COMPAT);
1596		else
1597			rtm->rtm_flags = rt->rt_flags;
1598		rt_getmetrics(rt, &rtm->rtm_rmx);
1599		rtm->rtm_index = rt->rt_ifp->if_index;
1600		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1601		rtm->rtm_addrs = info.rti_addrs;
1602		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1603		return (error);
1604	}
1605	return (error);
1606}
1607
1608#ifdef COMPAT_FREEBSD32
1609static void
1610copy_ifdata32(struct if_data *src, struct if_data32 *dst)
1611{
1612
1613	bzero(dst, sizeof(*dst));
1614	CP(*src, *dst, ifi_type);
1615	CP(*src, *dst, ifi_physical);
1616	CP(*src, *dst, ifi_addrlen);
1617	CP(*src, *dst, ifi_hdrlen);
1618	CP(*src, *dst, ifi_link_state);
1619	CP(*src, *dst, ifi_vhid);
1620	CP(*src, *dst, ifi_baudrate_pf);
1621	dst->ifi_datalen = sizeof(struct if_data32);
1622	CP(*src, *dst, ifi_mtu);
1623	CP(*src, *dst, ifi_metric);
1624	CP(*src, *dst, ifi_baudrate);
1625	CP(*src, *dst, ifi_ipackets);
1626	CP(*src, *dst, ifi_ierrors);
1627	CP(*src, *dst, ifi_opackets);
1628	CP(*src, *dst, ifi_oerrors);
1629	CP(*src, *dst, ifi_collisions);
1630	CP(*src, *dst, ifi_ibytes);
1631	CP(*src, *dst, ifi_obytes);
1632	CP(*src, *dst, ifi_imcasts);
1633	CP(*src, *dst, ifi_omcasts);
1634	CP(*src, *dst, ifi_iqdrops);
1635	CP(*src, *dst, ifi_noproto);
1636	CP(*src, *dst, ifi_hwassist);
1637	CP(*src, *dst, ifi_epoch);
1638	TV_CP(*src, *dst, ifi_lastchange);
1639}
1640#endif
1641
1642static int
1643sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info,
1644    struct walkarg *w, int len)
1645{
1646	struct if_msghdrl *ifm;
1647
1648#ifdef COMPAT_FREEBSD32
1649	if (w->w_req->flags & SCTL_MASK32) {
1650		struct if_msghdrl32 *ifm32;
1651
1652		ifm32 = (struct if_msghdrl32 *)w->w_tmem;
1653		ifm32->ifm_addrs = info->rti_addrs;
1654		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1655		ifm32->ifm_index = ifp->if_index;
1656		ifm32->_ifm_spare1 = 0;
1657		ifm32->ifm_len = sizeof(*ifm32);
1658		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
1659
1660		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
1661		/* Fixup if_data carp(4) vhid. */
1662		if (carp_get_vhid_p != NULL)
1663			ifm32->ifm_data.ifi_vhid =
1664			    (*carp_get_vhid_p)(ifp->if_addr);
1665
1666		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
1667	}
1668#endif
1669	ifm = (struct if_msghdrl *)w->w_tmem;
1670	ifm->ifm_addrs = info->rti_addrs;
1671	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1672	ifm->ifm_index = ifp->if_index;
1673	ifm->_ifm_spare1 = 0;
1674	ifm->ifm_len = sizeof(*ifm);
1675	ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
1676
1677	ifm->ifm_data = ifp->if_data;
1678	/* Fixup if_data carp(4) vhid. */
1679	if (carp_get_vhid_p != NULL)
1680		ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr);
1681
1682	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
1683}
1684
1685static int
1686sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info,
1687    struct walkarg *w, int len)
1688{
1689	struct if_msghdr *ifm;
1690
1691#ifdef COMPAT_FREEBSD32
1692	if (w->w_req->flags & SCTL_MASK32) {
1693		struct if_msghdr32 *ifm32;
1694
1695		ifm32 = (struct if_msghdr32 *)w->w_tmem;
1696		ifm32->ifm_addrs = info->rti_addrs;
1697		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1698		ifm32->ifm_index = ifp->if_index;
1699
1700		copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
1701		/* Fixup if_data carp(4) vhid. */
1702		if (carp_get_vhid_p != NULL)
1703			ifm32->ifm_data.ifi_vhid =
1704			    (*carp_get_vhid_p)(ifp->if_addr);
1705
1706		return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len));
1707	}
1708#endif
1709	ifm = (struct if_msghdr *)w->w_tmem;
1710	ifm->ifm_addrs = info->rti_addrs;
1711	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1712	ifm->ifm_index = ifp->if_index;
1713
1714	ifm->ifm_data = ifp->if_data;
1715	/* Fixup if_data carp(4) vhid. */
1716	if (carp_get_vhid_p != NULL)
1717		ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr);
1718
1719	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
1720}
1721
1722static int
1723sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
1724    struct walkarg *w, int len)
1725{
1726	struct ifa_msghdrl *ifam;
1727
1728#ifdef COMPAT_FREEBSD32
1729	if (w->w_req->flags & SCTL_MASK32) {
1730		struct ifa_msghdrl32 *ifam32;
1731
1732		ifam32 = (struct ifa_msghdrl32 *)w->w_tmem;
1733		ifam32->ifam_addrs = info->rti_addrs;
1734		ifam32->ifam_flags = ifa->ifa_flags;
1735		ifam32->ifam_index = ifa->ifa_ifp->if_index;
1736		ifam32->_ifam_spare1 = 0;
1737		ifam32->ifam_len = sizeof(*ifam32);
1738		ifam32->ifam_data_off =
1739		    offsetof(struct ifa_msghdrl32, ifam_data);
1740		ifam32->ifam_metric = ifa->ifa_metric;
1741
1742		copy_ifdata32(&ifa->ifa_ifp->if_data, &ifam32->ifam_data);
1743		/* Fixup if_data carp(4) vhid. */
1744		if (carp_get_vhid_p != NULL)
1745			ifam32->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);
1746
1747		return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len));
1748	}
1749#endif
1750
1751	ifam = (struct ifa_msghdrl *)w->w_tmem;
1752	ifam->ifam_addrs = info->rti_addrs;
1753	ifam->ifam_flags = ifa->ifa_flags;
1754	ifam->ifam_index = ifa->ifa_ifp->if_index;
1755	ifam->_ifam_spare1 = 0;
1756	ifam->ifam_len = sizeof(*ifam);
1757	ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
1758	ifam->ifam_metric = ifa->ifa_metric;
1759
1760	ifam->ifam_data = ifa->if_data;
1761	/* Fixup if_data carp(4) vhid. */
1762	if (carp_get_vhid_p != NULL)
1763		ifam->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa);
1764
1765	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
1766}
1767
1768static int
1769sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
1770    struct walkarg *w, int len)
1771{
1772	struct ifa_msghdr *ifam;
1773
1774	ifam = (struct ifa_msghdr *)w->w_tmem;
1775	ifam->ifam_addrs = info->rti_addrs;
1776	ifam->ifam_flags = ifa->ifa_flags;
1777	ifam->ifam_index = ifa->ifa_ifp->if_index;
1778	ifam->ifam_metric = ifa->ifa_metric;
1779
1780	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
1781}
1782
1783static int
1784sysctl_iflist(int af, struct walkarg *w)
1785{
1786	struct ifnet *ifp;
1787	struct ifaddr *ifa;
1788	struct rt_addrinfo info;
1789	int len, error = 0;
1790
1791	bzero((caddr_t)&info, sizeof(info));
1792	IFNET_RLOCK_NOSLEEP();
1793	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1794		if (w->w_arg && w->w_arg != ifp->if_index)
1795			continue;
1796		IF_ADDR_RLOCK(ifp);
1797		ifa = ifp->if_addr;
1798		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1799		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1800		info.rti_info[RTAX_IFP] = NULL;
1801		if (w->w_req && w->w_tmem) {
1802			if (w->w_op == NET_RT_IFLISTL)
1803				error = sysctl_iflist_ifml(ifp, &info, w, len);
1804			else
1805				error = sysctl_iflist_ifm(ifp, &info, w, len);
1806			if (error)
1807				goto done;
1808		}
1809		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1810			if (af && af != ifa->ifa_addr->sa_family)
1811				continue;
1812			if (prison_if(w->w_req->td->td_ucred,
1813			    ifa->ifa_addr) != 0)
1814				continue;
1815			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1816			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1817			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1818			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1819			if (w->w_req && w->w_tmem) {
1820				if (w->w_op == NET_RT_IFLISTL)
1821					error = sysctl_iflist_ifaml(ifa, &info,
1822					    w, len);
1823				else
1824					error = sysctl_iflist_ifam(ifa, &info,
1825					    w, len);
1826				if (error)
1827					goto done;
1828			}
1829		}
1830		IF_ADDR_RUNLOCK(ifp);
1831		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1832			info.rti_info[RTAX_BRD] = NULL;
1833	}
1834done:
1835	if (ifp != NULL)
1836		IF_ADDR_RUNLOCK(ifp);
1837	IFNET_RUNLOCK_NOSLEEP();
1838	return (error);
1839}
1840
1841static int
1842sysctl_ifmalist(int af, struct walkarg *w)
1843{
1844	struct ifnet *ifp;
1845	struct ifmultiaddr *ifma;
1846	struct	rt_addrinfo info;
1847	int	len, error = 0;
1848	struct ifaddr *ifa;
1849
1850	bzero((caddr_t)&info, sizeof(info));
1851	IFNET_RLOCK_NOSLEEP();
1852	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1853		if (w->w_arg && w->w_arg != ifp->if_index)
1854			continue;
1855		ifa = ifp->if_addr;
1856		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1857		IF_ADDR_RLOCK(ifp);
1858		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1859			if (af && af != ifma->ifma_addr->sa_family)
1860				continue;
1861			if (prison_if(w->w_req->td->td_ucred,
1862			    ifma->ifma_addr) != 0)
1863				continue;
1864			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1865			info.rti_info[RTAX_GATEWAY] =
1866			    (ifma->ifma_addr->sa_family != AF_LINK) ?
1867			    ifma->ifma_lladdr : NULL;
1868			len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1869			if (w->w_req && w->w_tmem) {
1870				struct ifma_msghdr *ifmam;
1871
1872				ifmam = (struct ifma_msghdr *)w->w_tmem;
1873				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1874				ifmam->ifmam_flags = 0;
1875				ifmam->ifmam_addrs = info.rti_addrs;
1876				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1877				if (error) {
1878					IF_ADDR_RUNLOCK(ifp);
1879					goto done;
1880				}
1881			}
1882		}
1883		IF_ADDR_RUNLOCK(ifp);
1884	}
1885done:
1886	IFNET_RUNLOCK_NOSLEEP();
1887	return (error);
1888}
1889
1890static int
1891sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1892{
1893	int	*name = (int *)arg1;
1894	u_int	namelen = arg2;
1895	struct radix_node_head *rnh = NULL; /* silence compiler. */
1896	int	i, lim, error = EINVAL;
1897	int	fib = 0;
1898	u_char	af;
1899	struct	walkarg w;
1900
1901	name ++;
1902	namelen--;
1903	if (req->newptr)
1904		return (EPERM);
1905	if (name[1] == NET_RT_DUMP) {
1906		if (namelen == 3)
1907			fib = req->td->td_proc->p_fibnum;
1908		else if (namelen == 4)
1909			fib = (name[3] == -1) ?
1910			    req->td->td_proc->p_fibnum : name[3];
1911		else
1912			return ((namelen < 3) ? EISDIR : ENOTDIR);
1913		if (fib < 0 || fib >= rt_numfibs)
1914			return (EINVAL);
1915	} else if (namelen != 3)
1916		return ((namelen < 3) ? EISDIR : ENOTDIR);
1917	af = name[0];
1918	if (af > AF_MAX)
1919		return (EINVAL);
1920	bzero(&w, sizeof(w));
1921	w.w_op = name[1];
1922	w.w_arg = name[2];
1923	w.w_req = req;
1924
1925	error = sysctl_wire_old_buffer(req, 0);
1926	if (error)
1927		return (error);
1928	switch (w.w_op) {
1929
1930	case NET_RT_DUMP:
1931	case NET_RT_FLAGS:
1932		if (af == 0) {			/* dump all tables */
1933			i = 1;
1934			lim = AF_MAX;
1935		} else				/* dump only one table */
1936			i = lim = af;
1937
1938		/*
1939		 * take care of llinfo entries, the caller must
1940		 * specify an AF
1941		 */
1942		if (w.w_op == NET_RT_FLAGS &&
1943		    (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
1944			if (af != 0)
1945				error = lltable_sysctl_dumparp(af, w.w_req);
1946			else
1947				error = EINVAL;
1948			break;
1949		}
1950		/*
1951		 * take care of routing entries
1952		 */
1953		for (error = 0; error == 0 && i <= lim; i++) {
1954			rnh = rt_tables_get_rnh(fib, i);
1955			if (rnh != NULL) {
1956				RADIX_NODE_HEAD_RLOCK(rnh);
1957			    	error = rnh->rnh_walktree(rnh,
1958				    sysctl_dumpentry, &w);
1959				RADIX_NODE_HEAD_RUNLOCK(rnh);
1960			} else if (af != 0)
1961				error = EAFNOSUPPORT;
1962		}
1963		break;
1964
1965	case NET_RT_IFLIST:
1966	case NET_RT_IFLISTL:
1967		error = sysctl_iflist(af, &w);
1968		break;
1969
1970	case NET_RT_IFMALIST:
1971		error = sysctl_ifmalist(af, &w);
1972		break;
1973	}
1974	if (w.w_tmem)
1975		free(w.w_tmem, M_RTABLE);
1976	return (error);
1977}
1978
1979static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1980
1981/*
1982 * Definitions of protocols supported in the ROUTE domain.
1983 */
1984
1985static struct domain routedomain;		/* or at least forward */
1986
1987static struct protosw routesw[] = {
1988{
1989	.pr_type =		SOCK_RAW,
1990	.pr_domain =		&routedomain,
1991	.pr_flags =		PR_ATOMIC|PR_ADDR,
1992	.pr_output =		route_output,
1993	.pr_ctlinput =		raw_ctlinput,
1994	.pr_init =		raw_init,
1995	.pr_usrreqs =		&route_usrreqs
1996}
1997};
1998
1999static struct domain routedomain = {
2000	.dom_family =		PF_ROUTE,
2001	.dom_name =		 "route",
2002	.dom_protosw =		routesw,
2003	.dom_protoswNPROTOSW =	&routesw[sizeof(routesw)/sizeof(routesw[0])]
2004};
2005
2006VNET_DOMAIN_SET(route);
2007