1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1988, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31#include "opt_ddb.h"
32#include "opt_route.h"
33#include "opt_inet.h"
34#include "opt_inet6.h"
35
36#include <sys/param.h>
37#include <sys/jail.h>
38#include <sys/kernel.h>
39#include <sys/eventhandler.h>
40#include <sys/domain.h>
41#include <sys/lock.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/protosw.h>
47#include <sys/rmlock.h>
48#include <sys/rwlock.h>
49#include <sys/signalvar.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sysctl.h>
53#include <sys/systm.h>
54
55#include <net/if.h>
56#include <net/if_var.h>
57#include <net/if_private.h>
58#include <net/if_dl.h>
59#include <net/if_llatbl.h>
60#include <net/if_types.h>
61#include <net/netisr.h>
62#include <net/route.h>
63#include <net/route/route_ctl.h>
64#include <net/route/route_var.h>
65#include <net/vnet.h>
66
67#include <netinet/in.h>
68#include <netinet/if_ether.h>
69#include <netinet/ip_carp.h>
70#ifdef INET6
71#include <netinet6/in6_var.h>
72#include <netinet6/ip6_var.h>
73#include <netinet6/scope6_var.h>
74#endif
75#include <net/route/nhop.h>
76
77#define	DEBUG_MOD_NAME	rtsock
78#define	DEBUG_MAX_LEVEL	LOG_DEBUG
79#include <net/route/route_debug.h>
80_DECLARE_DEBUG(LOG_INFO);
81
82#ifdef COMPAT_FREEBSD32
83#include <sys/mount.h>
84#include <compat/freebsd32/freebsd32.h>
85
86struct if_msghdr32 {
87	uint16_t ifm_msglen;
88	uint8_t	ifm_version;
89	uint8_t	ifm_type;
90	int32_t	ifm_addrs;
91	int32_t	ifm_flags;
92	uint16_t ifm_index;
93	uint16_t _ifm_spare1;
94	struct	if_data ifm_data;
95};
96
97struct if_msghdrl32 {
98	uint16_t ifm_msglen;
99	uint8_t	ifm_version;
100	uint8_t	ifm_type;
101	int32_t	ifm_addrs;
102	int32_t	ifm_flags;
103	uint16_t ifm_index;
104	uint16_t _ifm_spare1;
105	uint16_t ifm_len;
106	uint16_t ifm_data_off;
107	uint32_t _ifm_spare2;
108	struct	if_data ifm_data;
109};
110
111struct ifa_msghdrl32 {
112	uint16_t ifam_msglen;
113	uint8_t	ifam_version;
114	uint8_t	ifam_type;
115	int32_t	ifam_addrs;
116	int32_t	ifam_flags;
117	uint16_t ifam_index;
118	uint16_t _ifam_spare1;
119	uint16_t ifam_len;
120	uint16_t ifam_data_off;
121	int32_t	ifam_metric;
122	struct	if_data ifam_data;
123};
124
125#define SA_SIZE32(sa)						\
126    (  (((struct sockaddr *)(sa))->sa_len == 0) ?		\
127	sizeof(int)		:				\
128	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
129
130#endif /* COMPAT_FREEBSD32 */
131
132struct linear_buffer {
133	char		*base;	/* Base allocated memory pointer */
134	uint32_t	offset;	/* Currently used offset */
135	uint32_t	size;	/* Total buffer size */
136};
137#define	SCRATCH_BUFFER_SIZE	1024
138
139#define	RTS_PID_LOG(_l, _fmt, ...)	RT_LOG_##_l(_l, "PID %d: " _fmt, curproc ? curproc->p_pid : 0, ## __VA_ARGS__)
140
141MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
142
143/* NB: these are not modified */
144static struct	sockaddr route_src = { 2, PF_ROUTE, };
145static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
146
147/* These are external hooks for CARP. */
148int	(*carp_get_vhid_p)(struct ifaddr *);
149
150/*
151 * Used by rtsock callback code to decide whether to filter the update
152 * notification to a socket bound to a particular FIB.
153 */
154#define	RTS_FILTER_FIB	M_PROTO8
155/*
156 * Used to store address family of the notification.
157 */
158#define	m_rtsock_family	m_pkthdr.PH_loc.eight[0]
159
160struct rcb {
161	LIST_ENTRY(rcb) list;
162	struct socket	*rcb_socket;
163	sa_family_t	rcb_family;
164};
165
166typedef struct {
167	LIST_HEAD(, rcb)	cblist;
168	int	ip_count;	/* attached w/ AF_INET */
169	int	ip6_count;	/* attached w/ AF_INET6 */
170	int	any_count;	/* total attached */
171} route_cb_t;
172VNET_DEFINE_STATIC(route_cb_t, route_cb);
173#define	V_route_cb VNET(route_cb)
174
175struct mtx rtsock_mtx;
176MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
177
178#define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
179#define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
180#define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
181
182SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
183
184struct walkarg {
185	int	family;
186	int	w_tmemsize;
187	int	w_op, w_arg;
188	caddr_t	w_tmem;
189	struct sysctl_req *w_req;
190	struct sockaddr *dst;
191	struct sockaddr *mask;
192};
193
194static void	rts_input(struct mbuf *m);
195static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
196static int	rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
197			struct walkarg *w, int *plen);
198static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
199			struct rt_addrinfo *rtinfo);
200static int	cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb);
201static int	sysctl_dumpentry(struct rtentry *rt, void *vw);
202static int	sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh,
203			uint32_t weight, struct walkarg *w);
204static int	sysctl_iflist(int af, struct walkarg *w);
205static int	sysctl_ifmalist(int af, struct walkarg *w);
206static void	rt_getmetrics(const struct rtentry *rt,
207			const struct nhop_object *nh, struct rt_metrics *out);
208static void	rt_dispatch(struct mbuf *, sa_family_t);
209static void	rt_ifannouncemsg(struct ifnet *ifp, int what);
210static int	handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
211			struct rt_msghdr *rtm, struct rib_cmd_info *rc);
212static int	update_rtm_from_rc(struct rt_addrinfo *info,
213			struct rt_msghdr **prtm, int alloc_len,
214			struct rib_cmd_info *rc, struct nhop_object *nh);
215static void	send_rtm_reply(struct socket *so, struct rt_msghdr *rtm,
216			struct mbuf *m, sa_family_t saf, u_int fibnum,
217			int rtm_errno);
218static void	rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc);
219static void	rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask);
220
221static struct netisr_handler rtsock_nh = {
222	.nh_name = "rtsock",
223	.nh_handler = rts_input,
224	.nh_proto = NETISR_ROUTE,
225	.nh_policy = NETISR_POLICY_SOURCE,
226};
227
228static int
229sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
230{
231	int error, qlimit;
232
233	netisr_getqlimit(&rtsock_nh, &qlimit);
234	error = sysctl_handle_int(oidp, &qlimit, 0, req);
235        if (error || !req->newptr)
236                return (error);
237	if (qlimit < 1)
238		return (EINVAL);
239	return (netisr_setqlimit(&rtsock_nh, qlimit));
240}
241SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen,
242    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
243    0, 0, sysctl_route_netisr_maxqlen, "I",
244    "maximum routing socket dispatch queue length");
245
246static void
247vnet_rts_init(void)
248{
249	int tmp;
250
251	if (IS_DEFAULT_VNET(curvnet)) {
252		if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
253			rtsock_nh.nh_qlimit = tmp;
254		netisr_register(&rtsock_nh);
255	}
256#ifdef VIMAGE
257	 else
258		netisr_register_vnet(&rtsock_nh);
259#endif
260}
261VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
262    vnet_rts_init, 0);
263
264#ifdef VIMAGE
265static void
266vnet_rts_uninit(void)
267{
268
269	netisr_unregister_vnet(&rtsock_nh);
270}
271VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
272    vnet_rts_uninit, 0);
273#endif
274
275static void
276report_route_event(const struct rib_cmd_info *rc, void *_cbdata)
277{
278	uint32_t fibnum = (uint32_t)(uintptr_t)_cbdata;
279	struct nhop_object *nh;
280
281	nh = rc->rc_cmd == RTM_DELETE ? rc->rc_nh_old : rc->rc_nh_new;
282	rt_routemsg(rc->rc_cmd, rc->rc_rt, nh, fibnum);
283}
284
285static void
286rts_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
287{
288#ifdef ROUTE_MPATH
289	if ((rc->rc_nh_new && NH_IS_NHGRP(rc->rc_nh_new)) ||
290	    (rc->rc_nh_old && NH_IS_NHGRP(rc->rc_nh_old))) {
291		rib_decompose_notification(rc, report_route_event,
292		    (void *)(uintptr_t)fibnum);
293	} else
294#endif
295		report_route_event(rc, (void *)(uintptr_t)fibnum);
296}
297static struct rtbridge rtsbridge = {
298	.route_f = rts_handle_route_event,
299	.ifmsg_f = rtsock_ifmsg,
300};
301static struct rtbridge *rtsbridge_orig_p;
302
303static void
304rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc)
305{
306	netlink_callback_p->route_f(fibnum, rc);
307}
308
309static void
310rtsock_init(void)
311{
312	rtsbridge_orig_p = rtsock_callback_p;
313	rtsock_callback_p = &rtsbridge;
314}
315SYSINIT(rtsock_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtsock_init, NULL);
316
317static void
318rts_handle_ifnet_arrival(void *arg __unused, struct ifnet *ifp)
319{
320	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
321}
322EVENTHANDLER_DEFINE(ifnet_arrival_event, rts_handle_ifnet_arrival, NULL, 0);
323
324static void
325rts_handle_ifnet_departure(void *arg __unused, struct ifnet *ifp)
326{
327	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
328}
329EVENTHANDLER_DEFINE(ifnet_departure_event, rts_handle_ifnet_departure, NULL, 0);
330
331static void
332rts_append_data(struct socket *so, struct mbuf *m)
333{
334
335	if (sbappendaddr(&so->so_rcv, &route_src, m, NULL) == 0) {
336		soroverflow(so);
337		m_freem(m);
338	} else
339		sorwakeup(so);
340}
341
342static void
343rts_input(struct mbuf *m)
344{
345	struct rcb *rcb;
346	struct socket *last;
347
348	last = NULL;
349	RTSOCK_LOCK();
350	LIST_FOREACH(rcb, &V_route_cb.cblist, list) {
351		if (rcb->rcb_family != AF_UNSPEC &&
352		    rcb->rcb_family != m->m_rtsock_family)
353			continue;
354		if ((m->m_flags & RTS_FILTER_FIB) &&
355		    M_GETFIB(m) != rcb->rcb_socket->so_fibnum)
356			continue;
357		if (last != NULL) {
358			struct mbuf *n;
359
360			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
361			if (n != NULL)
362				rts_append_data(last, n);
363		}
364		last = rcb->rcb_socket;
365	}
366	if (last != NULL)
367		rts_append_data(last, m);
368	else
369		m_freem(m);
370	RTSOCK_UNLOCK();
371}
372
373static void
374rts_close(struct socket *so)
375{
376
377	soisdisconnected(so);
378}
379
380static SYSCTL_NODE(_net, OID_AUTO, rtsock, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
381    "Routing socket infrastructure");
382static u_long rts_sendspace = 8192;
383SYSCTL_ULONG(_net_rtsock, OID_AUTO, sendspace, CTLFLAG_RW, &rts_sendspace, 0,
384    "Default routing socket send space");
385static u_long rts_recvspace = 8192;
386SYSCTL_ULONG(_net_rtsock, OID_AUTO, recvspace, CTLFLAG_RW, &rts_recvspace, 0,
387    "Default routing socket receive space");
388
389static int
390rts_attach(struct socket *so, int proto, struct thread *td)
391{
392	struct rcb *rcb;
393	int error;
394
395	error = soreserve(so, rts_sendspace, rts_recvspace);
396	if (error)
397		return (error);
398
399	rcb = malloc(sizeof(*rcb), M_PCB, M_WAITOK);
400	rcb->rcb_socket = so;
401	rcb->rcb_family = proto;
402
403	so->so_pcb = rcb;
404	so->so_fibnum = td->td_proc->p_fibnum;
405	so->so_options |= SO_USELOOPBACK;
406
407	RTSOCK_LOCK();
408	LIST_INSERT_HEAD(&V_route_cb.cblist, rcb, list);
409	switch (proto) {
410	case AF_INET:
411		V_route_cb.ip_count++;
412		break;
413	case AF_INET6:
414		V_route_cb.ip6_count++;
415		break;
416	}
417	V_route_cb.any_count++;
418	RTSOCK_UNLOCK();
419	soisconnected(so);
420
421	return (0);
422}
423
424static void
425rts_detach(struct socket *so)
426{
427	struct rcb *rcb = so->so_pcb;
428
429	RTSOCK_LOCK();
430	LIST_REMOVE(rcb, list);
431	switch(rcb->rcb_family) {
432	case AF_INET:
433		V_route_cb.ip_count--;
434		break;
435	case AF_INET6:
436		V_route_cb.ip6_count--;
437		break;
438	}
439	V_route_cb.any_count--;
440	RTSOCK_UNLOCK();
441	free(rcb, M_PCB);
442	so->so_pcb = NULL;
443}
444
445static int
446rts_disconnect(struct socket *so)
447{
448
449	return (ENOTCONN);
450}
451
452static int
453rts_shutdown(struct socket *so, enum shutdown_how how)
454{
455	/*
456	 * Note: route socket marks itself as connected through its lifetime.
457	 */
458	switch (how) {
459	case SHUT_RD:
460		sorflush(so);
461		break;
462	case SHUT_RDWR:
463		sorflush(so);
464		/* FALLTHROUGH */
465	case SHUT_WR:
466		socantsendmore(so);
467	}
468
469	return (0);
470}
471
472#ifndef _SOCKADDR_UNION_DEFINED
473#define	_SOCKADDR_UNION_DEFINED
474/*
475 * The union of all possible address formats we handle.
476 */
477union sockaddr_union {
478	struct sockaddr		sa;
479	struct sockaddr_in	sin;
480	struct sockaddr_in6	sin6;
481};
482#endif /* _SOCKADDR_UNION_DEFINED */
483
484static int
485rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
486    struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred)
487{
488#if defined(INET) || defined(INET6)
489	struct epoch_tracker et;
490#endif
491
492	/* First, see if the returned address is part of the jail. */
493	if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) {
494		info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
495		return (0);
496	}
497
498	switch (info->rti_info[RTAX_DST]->sa_family) {
499#ifdef INET
500	case AF_INET:
501	{
502		struct in_addr ia;
503		struct ifaddr *ifa;
504		int found;
505
506		found = 0;
507		/*
508		 * Try to find an address on the given outgoing interface
509		 * that belongs to the jail.
510		 */
511		NET_EPOCH_ENTER(et);
512		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
513			struct sockaddr *sa;
514			sa = ifa->ifa_addr;
515			if (sa->sa_family != AF_INET)
516				continue;
517			ia = ((struct sockaddr_in *)sa)->sin_addr;
518			if (prison_check_ip4(cred, &ia) == 0) {
519				found = 1;
520				break;
521			}
522		}
523		NET_EPOCH_EXIT(et);
524		if (!found) {
525			/*
526			 * As a last resort return the 'default' jail address.
527			 */
528			ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)->
529			    sin_addr;
530			if (prison_get_ip4(cred, &ia) != 0)
531				return (ESRCH);
532		}
533		bzero(&saun->sin, sizeof(struct sockaddr_in));
534		saun->sin.sin_len = sizeof(struct sockaddr_in);
535		saun->sin.sin_family = AF_INET;
536		saun->sin.sin_addr.s_addr = ia.s_addr;
537		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
538		break;
539	}
540#endif
541#ifdef INET6
542	case AF_INET6:
543	{
544		struct in6_addr ia6;
545		struct ifaddr *ifa;
546		int found;
547
548		found = 0;
549		/*
550		 * Try to find an address on the given outgoing interface
551		 * that belongs to the jail.
552		 */
553		NET_EPOCH_ENTER(et);
554		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
555			struct sockaddr *sa;
556			sa = ifa->ifa_addr;
557			if (sa->sa_family != AF_INET6)
558				continue;
559			bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
560			    &ia6, sizeof(struct in6_addr));
561			if (prison_check_ip6(cred, &ia6) == 0) {
562				found = 1;
563				break;
564			}
565		}
566		NET_EPOCH_EXIT(et);
567		if (!found) {
568			/*
569			 * As a last resort return the 'default' jail address.
570			 */
571			ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)->
572			    sin6_addr;
573			if (prison_get_ip6(cred, &ia6) != 0)
574				return (ESRCH);
575		}
576		bzero(&saun->sin6, sizeof(struct sockaddr_in6));
577		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
578		saun->sin6.sin6_family = AF_INET6;
579		bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
580		if (sa6_recoverscope(&saun->sin6) != 0)
581			return (ESRCH);
582		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
583		break;
584	}
585#endif
586	default:
587		return (ESRCH);
588	}
589	return (0);
590}
591
592static int
593fill_blackholeinfo(struct rt_addrinfo *info, union sockaddr_union *saun)
594{
595	struct ifaddr *ifa;
596	sa_family_t saf;
597
598	if (V_loif == NULL) {
599		RTS_PID_LOG(LOG_INFO, "Unable to add blackhole/reject nhop without loopback");
600		return (ENOTSUP);
601	}
602	info->rti_ifp = V_loif;
603
604	saf = info->rti_info[RTAX_DST]->sa_family;
605
606	CK_STAILQ_FOREACH(ifa, &info->rti_ifp->if_addrhead, ifa_link) {
607		if (ifa->ifa_addr->sa_family == saf) {
608			info->rti_ifa = ifa;
609			break;
610		}
611	}
612	if (info->rti_ifa == NULL) {
613		RTS_PID_LOG(LOG_INFO, "Unable to find ifa for blackhole/reject nhop");
614		return (ENOTSUP);
615	}
616
617	bzero(saun, sizeof(union sockaddr_union));
618	switch (saf) {
619#ifdef INET
620	case AF_INET:
621		saun->sin.sin_family = AF_INET;
622		saun->sin.sin_len = sizeof(struct sockaddr_in);
623		saun->sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
624		break;
625#endif
626#ifdef INET6
627	case AF_INET6:
628		saun->sin6.sin6_family = AF_INET6;
629		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
630		saun->sin6.sin6_addr = in6addr_loopback;
631		break;
632#endif
633	default:
634		RTS_PID_LOG(LOG_INFO, "unsupported family: %d", saf);
635		return (ENOTSUP);
636	}
637	info->rti_info[RTAX_GATEWAY] = &saun->sa;
638	info->rti_flags |= RTF_GATEWAY;
639
640	return (0);
641}
642
643/*
644 * Fills in @info based on userland-provided @rtm message.
645 *
646 * Returns 0 on success.
647 */
648static int
649fill_addrinfo(struct rt_msghdr *rtm, int len, struct linear_buffer *lb, u_int fibnum,
650    struct rt_addrinfo *info)
651{
652	int error;
653
654	rtm->rtm_pid = curproc->p_pid;
655	info->rti_addrs = rtm->rtm_addrs;
656
657	info->rti_mflags = rtm->rtm_inits;
658	info->rti_rmx = &rtm->rtm_rmx;
659
660	/*
661	 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
662	 * link-local address because rtrequest requires addresses with
663	 * embedded scope id.
664	 */
665	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, info))
666		return (EINVAL);
667
668	info->rti_flags = rtm->rtm_flags;
669	error = cleanup_xaddrs(info, lb);
670	if (error != 0)
671		return (error);
672	/*
673	 * Verify that the caller has the appropriate privilege; RTM_GET
674	 * is the only operation the non-superuser is allowed.
675	 */
676	if (rtm->rtm_type != RTM_GET) {
677		error = priv_check(curthread, PRIV_NET_ROUTE);
678		if (error != 0)
679			return (error);
680	}
681
682	/*
683	 * The given gateway address may be an interface address.
684	 * For example, issuing a "route change" command on a route
685	 * entry that was created from a tunnel, and the gateway
686	 * address given is the local end point. In this case the
687	 * RTF_GATEWAY flag must be cleared or the destination will
688	 * not be reachable even though there is no error message.
689	 */
690	if (info->rti_info[RTAX_GATEWAY] != NULL &&
691	    info->rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
692		struct nhop_object *nh;
693
694		/*
695		 * A host route through the loopback interface is
696		 * installed for each interface address. In pre 8.0
697		 * releases the interface address of a PPP link type
698		 * is not reachable locally. This behavior is fixed as
699		 * part of the new L2/L3 redesign and rewrite work. The
700		 * signature of this interface address route is the
701		 * AF_LINK sa_family type of the gateway, and the
702		 * rt_ifp has the IFF_LOOPBACK flag set.
703		 */
704		nh = rib_lookup(fibnum, info->rti_info[RTAX_GATEWAY], NHR_NONE, 0);
705		if (nh != NULL && nh->gw_sa.sa_family == AF_LINK &&
706		    nh->nh_ifp->if_flags & IFF_LOOPBACK) {
707				info->rti_flags &= ~RTF_GATEWAY;
708				info->rti_flags |= RTF_GWFLAG_COMPAT;
709		}
710	}
711
712	return (0);
713}
714
715static struct nhop_object *
716select_nhop(struct nhop_object *nh, const struct sockaddr *gw)
717{
718	if (!NH_IS_NHGRP(nh))
719		return (nh);
720#ifdef ROUTE_MPATH
721	const struct weightened_nhop *wn;
722	uint32_t num_nhops;
723	wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
724	if (gw == NULL)
725		return (wn[0].nh);
726	for (int i = 0; i < num_nhops; i++) {
727		if (match_nhop_gw(wn[i].nh, gw))
728			return (wn[i].nh);
729	}
730#endif
731	return (NULL);
732}
733
734/*
735 * Handles RTM_GET message from routing socket, returning matching rt.
736 *
737 * Returns:
738 * 0 on success, with locked and referenced matching rt in @rt_nrt
739 * errno of failure
740 */
741static int
742handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
743    struct rt_msghdr *rtm, struct rib_cmd_info *rc)
744{
745	RIB_RLOCK_TRACKER;
746	struct rib_head *rnh;
747	struct nhop_object *nh;
748	sa_family_t saf;
749
750	saf = info->rti_info[RTAX_DST]->sa_family;
751
752	rnh = rt_tables_get_rnh(fibnum, saf);
753	if (rnh == NULL)
754		return (EAFNOSUPPORT);
755
756	RIB_RLOCK(rnh);
757
758	/*
759	 * By (implicit) convention host route (one without netmask)
760	 * means longest-prefix-match request and the route with netmask
761	 * means exact-match lookup.
762	 * As cleanup_xaddrs() cleans up info flags&addrs for the /32,/128
763	 * prefixes, use original data to check for the netmask presence.
764	 */
765	if ((rtm->rtm_addrs & RTA_NETMASK) == 0) {
766		/*
767		 * Provide longest prefix match for
768		 * address lookup (no mask).
769		 * 'route -n get addr'
770		 */
771		rc->rc_rt = (struct rtentry *) rnh->rnh_matchaddr(
772		    info->rti_info[RTAX_DST], &rnh->head);
773	} else
774		rc->rc_rt = (struct rtentry *) rnh->rnh_lookup(
775		    info->rti_info[RTAX_DST],
776		    info->rti_info[RTAX_NETMASK], &rnh->head);
777
778	if (rc->rc_rt == NULL) {
779		RIB_RUNLOCK(rnh);
780		return (ESRCH);
781	}
782
783	nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
784	if (nh == NULL) {
785		RIB_RUNLOCK(rnh);
786		return (ESRCH);
787	}
788	/*
789	 * If performing proxied L2 entry insertion, and
790	 * the actual PPP host entry is found, perform
791	 * another search to retrieve the prefix route of
792	 * the local end point of the PPP link.
793	 * TODO: move this logic to userland.
794	 */
795	if (rtm->rtm_flags & RTF_ANNOUNCE) {
796		struct sockaddr_storage laddr;
797
798		if (nh->nh_ifp != NULL &&
799		    nh->nh_ifp->if_type == IFT_PROPVIRTUAL) {
800			struct ifaddr *ifa;
801
802			ifa = ifa_ifwithnet(info->rti_info[RTAX_DST], 1,
803					RT_ALL_FIBS);
804			if (ifa != NULL)
805				rt_maskedcopy(ifa->ifa_addr,
806					      (struct sockaddr *)&laddr,
807					      ifa->ifa_netmask);
808		} else
809			rt_maskedcopy(nh->nh_ifa->ifa_addr,
810				      (struct sockaddr *)&laddr,
811				      nh->nh_ifa->ifa_netmask);
812		/*
813		 * refactor rt and no lock operation necessary
814		 */
815		rc->rc_rt = (struct rtentry *)rnh->rnh_matchaddr(
816		    (struct sockaddr *)&laddr, &rnh->head);
817		if (rc->rc_rt == NULL) {
818			RIB_RUNLOCK(rnh);
819			return (ESRCH);
820		}
821		nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
822		if (nh == NULL) {
823			RIB_RUNLOCK(rnh);
824			return (ESRCH);
825		}
826	}
827	rc->rc_nh_new = nh;
828	rc->rc_nh_weight = rc->rc_rt->rt_weight;
829	RIB_RUNLOCK(rnh);
830
831	return (0);
832}
833
834static void
835init_sockaddrs_family(int family, struct sockaddr *dst, struct sockaddr *mask)
836{
837#ifdef INET
838	if (family == AF_INET) {
839		struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
840		struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
841
842		bzero(dst4, sizeof(struct sockaddr_in));
843		bzero(mask4, sizeof(struct sockaddr_in));
844
845		dst4->sin_family = AF_INET;
846		dst4->sin_len = sizeof(struct sockaddr_in);
847		mask4->sin_family = AF_INET;
848		mask4->sin_len = sizeof(struct sockaddr_in);
849	}
850#endif
851#ifdef INET6
852	if (family == AF_INET6) {
853		struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
854		struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
855
856		bzero(dst6, sizeof(struct sockaddr_in6));
857		bzero(mask6, sizeof(struct sockaddr_in6));
858
859		dst6->sin6_family = AF_INET6;
860		dst6->sin6_len = sizeof(struct sockaddr_in6);
861		mask6->sin6_family = AF_INET6;
862		mask6->sin6_len = sizeof(struct sockaddr_in6);
863	}
864#endif
865}
866
867static void
868export_rtaddrs(const struct rtentry *rt, struct sockaddr *dst,
869    struct sockaddr *mask)
870{
871#ifdef INET
872	if (dst->sa_family == AF_INET) {
873		struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
874		struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
875		uint32_t scopeid = 0;
876		rt_get_inet_prefix_pmask(rt, &dst4->sin_addr, &mask4->sin_addr,
877		    &scopeid);
878		return;
879	}
880#endif
881#ifdef INET6
882	if (dst->sa_family == AF_INET6) {
883		struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
884		struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
885		uint32_t scopeid = 0;
886		rt_get_inet6_prefix_pmask(rt, &dst6->sin6_addr,
887		    &mask6->sin6_addr, &scopeid);
888		dst6->sin6_scope_id = scopeid;
889		return;
890	}
891#endif
892}
893
894static int
895update_rtm_from_info(struct rt_addrinfo *info, struct rt_msghdr **prtm,
896    int alloc_len)
897{
898	struct rt_msghdr *rtm, *orig_rtm = NULL;
899	struct walkarg w;
900	int len;
901
902	rtm = *prtm;
903	/* Check if we need to realloc storage */
904	rtsock_msg_buffer(rtm->rtm_type, info, NULL, &len);
905	if (len > alloc_len) {
906		struct rt_msghdr *tmp_rtm;
907
908		tmp_rtm = malloc(len, M_TEMP, M_NOWAIT);
909		if (tmp_rtm == NULL)
910			return (ENOBUFS);
911		bcopy(rtm, tmp_rtm, rtm->rtm_msglen);
912		orig_rtm = rtm;
913		rtm = tmp_rtm;
914		alloc_len = len;
915
916		/*
917		 * Delay freeing original rtm as info contains
918		 * data referencing it.
919		 */
920	}
921
922	w.w_tmem = (caddr_t)rtm;
923	w.w_tmemsize = alloc_len;
924	rtsock_msg_buffer(rtm->rtm_type, info, &w, &len);
925	rtm->rtm_addrs = info->rti_addrs;
926
927	if (orig_rtm != NULL)
928		free(orig_rtm, M_TEMP);
929	*prtm = rtm;
930	return (0);
931}
932
933
934/*
935 * Update sockaddrs, flags, etc in @prtm based on @rc data.
936 * rtm can be reallocated.
937 *
938 * Returns 0 on success, along with pointer to (potentially reallocated)
939 *  rtm.
940 *
941 */
942static int
943update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm,
944    int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh)
945{
946	union sockaddr_union saun;
947	struct rt_msghdr *rtm;
948	struct ifnet *ifp;
949	int error;
950
951	rtm = *prtm;
952	union sockaddr_union sa_dst, sa_mask;
953	int family = info->rti_info[RTAX_DST]->sa_family;
954	init_sockaddrs_family(family, &sa_dst.sa, &sa_mask.sa);
955	export_rtaddrs(rc->rc_rt, &sa_dst.sa, &sa_mask.sa);
956
957	info->rti_info[RTAX_DST] = &sa_dst.sa;
958	info->rti_info[RTAX_NETMASK] = rt_is_host(rc->rc_rt) ? NULL : &sa_mask.sa;
959	info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
960	info->rti_info[RTAX_GENMASK] = 0;
961	ifp = nh->nh_ifp;
962	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
963		if (ifp) {
964			info->rti_info[RTAX_IFP] =
965			    ifp->if_addr->ifa_addr;
966			error = rtm_get_jailed(info, ifp, nh,
967			    &saun, curthread->td_ucred);
968			if (error != 0)
969				return (error);
970			if (ifp->if_flags & IFF_POINTOPOINT)
971				info->rti_info[RTAX_BRD] =
972				    nh->nh_ifa->ifa_dstaddr;
973			rtm->rtm_index = ifp->if_index;
974		} else {
975			info->rti_info[RTAX_IFP] = NULL;
976			info->rti_info[RTAX_IFA] = NULL;
977		}
978	} else if (ifp != NULL)
979		rtm->rtm_index = ifp->if_index;
980
981	if ((error = update_rtm_from_info(info, prtm, alloc_len)) != 0)
982		return (error);
983
984	rtm = *prtm;
985	rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh);
986	if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
987		rtm->rtm_flags = RTF_GATEWAY |
988			(rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
989	rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx);
990	rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight;
991
992	return (0);
993}
994
995#ifdef ROUTE_MPATH
996static void
997save_del_notification(const struct rib_cmd_info *rc, void *_cbdata)
998{
999	struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
1000
1001	if (rc->rc_cmd == RTM_DELETE)
1002		*rc_new = *rc;
1003}
1004
1005static void
1006save_add_notification(const struct rib_cmd_info *rc, void *_cbdata)
1007{
1008	struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
1009
1010	if (rc->rc_cmd == RTM_ADD)
1011		*rc_new = *rc;
1012}
1013#endif
1014
1015#if defined(INET6) || defined(INET)
1016static struct sockaddr *
1017alloc_sockaddr_aligned(struct linear_buffer *lb, int len)
1018{
1019	len = roundup2(len, sizeof(uint64_t));
1020	if (lb->offset + len > lb->size)
1021		return (NULL);
1022	struct sockaddr *sa = (struct sockaddr *)(lb->base + lb->offset);
1023	lb->offset += len;
1024	return (sa);
1025}
1026#endif
1027
1028static int
1029rts_send(struct socket *so, int flags, struct mbuf *m,
1030    struct sockaddr *nam, struct mbuf *control, struct thread *td)
1031{
1032	struct rt_msghdr *rtm = NULL;
1033	struct rt_addrinfo info;
1034	struct epoch_tracker et;
1035#ifdef INET6
1036	struct sockaddr_storage ss;
1037	struct sockaddr_in6 *sin6;
1038	int i, rti_need_deembed = 0;
1039#endif
1040	int alloc_len = 0, len, error = 0, fibnum;
1041	sa_family_t saf = AF_UNSPEC;
1042	struct rib_cmd_info rc;
1043	struct nhop_object *nh;
1044
1045	if ((flags & PRUS_OOB) || control != NULL) {
1046		m_freem(m);
1047		if (control != NULL)
1048			m_freem(control);
1049		return (EOPNOTSUPP);
1050	}
1051
1052	fibnum = so->so_fibnum;
1053#define senderr(e) { error = e; goto flush;}
1054	if (m == NULL || ((m->m_len < sizeof(long)) &&
1055		       (m = m_pullup(m, sizeof(long))) == NULL))
1056		return (ENOBUFS);
1057	if ((m->m_flags & M_PKTHDR) == 0)
1058		panic("route_output");
1059	NET_EPOCH_ENTER(et);
1060	len = m->m_pkthdr.len;
1061	if (len < sizeof(*rtm) ||
1062	    len != mtod(m, struct rt_msghdr *)->rtm_msglen)
1063		senderr(EINVAL);
1064
1065	/*
1066	 * Most of current messages are in range 200-240 bytes,
1067	 * minimize possible re-allocation on reply using larger size
1068	 * buffer aligned on 1k boundaty.
1069	 */
1070	alloc_len = roundup2(len, 1024);
1071	int total_len = alloc_len + SCRATCH_BUFFER_SIZE;
1072	if ((rtm = malloc(total_len, M_TEMP, M_NOWAIT)) == NULL)
1073		senderr(ENOBUFS);
1074
1075	m_copydata(m, 0, len, (caddr_t)rtm);
1076	bzero(&info, sizeof(info));
1077	nh = NULL;
1078	struct linear_buffer lb = {
1079		.base = (char *)rtm + alloc_len,
1080		.size = SCRATCH_BUFFER_SIZE,
1081	};
1082
1083	if (rtm->rtm_version != RTM_VERSION) {
1084		/* Do not touch message since format is unknown */
1085		free(rtm, M_TEMP);
1086		rtm = NULL;
1087		senderr(EPROTONOSUPPORT);
1088	}
1089
1090	/*
1091	 * Starting from here, it is possible
1092	 * to alter original message and insert
1093	 * caller PID and error value.
1094	 */
1095
1096	if ((error = fill_addrinfo(rtm, len, &lb, fibnum, &info)) != 0) {
1097		senderr(error);
1098	}
1099	/* fill_addringo() embeds scope into IPv6 addresses */
1100#ifdef INET6
1101	rti_need_deembed = 1;
1102#endif
1103
1104	saf = info.rti_info[RTAX_DST]->sa_family;
1105
1106	/* support for new ARP code */
1107	if (rtm->rtm_flags & RTF_LLDATA) {
1108		error = lla_rt_output(rtm, &info);
1109		goto flush;
1110	}
1111
1112	union sockaddr_union gw_saun;
1113	int blackhole_flags = rtm->rtm_flags & (RTF_BLACKHOLE|RTF_REJECT);
1114	if (blackhole_flags != 0) {
1115		if (blackhole_flags != (RTF_BLACKHOLE | RTF_REJECT))
1116			error = fill_blackholeinfo(&info, &gw_saun);
1117		else {
1118			RTS_PID_LOG(LOG_DEBUG, "both BLACKHOLE and REJECT flags specifiied");
1119			error = EINVAL;
1120		}
1121		if (error != 0)
1122			senderr(error);
1123	}
1124
1125	switch (rtm->rtm_type) {
1126	case RTM_ADD:
1127	case RTM_CHANGE:
1128		if (rtm->rtm_type == RTM_ADD) {
1129			if (info.rti_info[RTAX_GATEWAY] == NULL) {
1130				RTS_PID_LOG(LOG_DEBUG, "RTM_ADD w/o gateway");
1131				senderr(EINVAL);
1132			}
1133		}
1134		error = rib_action(fibnum, rtm->rtm_type, &info, &rc);
1135		if (error == 0) {
1136			rtsock_notify_event(fibnum, &rc);
1137#ifdef ROUTE_MPATH
1138			if (NH_IS_NHGRP(rc.rc_nh_new) ||
1139			    (rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) {
1140				struct rib_cmd_info rc_simple = {};
1141				rib_decompose_notification(&rc,
1142				    save_add_notification, (void *)&rc_simple);
1143				rc = rc_simple;
1144			}
1145#endif
1146			/* nh MAY be empty if RTM_CHANGE request is no-op */
1147			nh = rc.rc_nh_new;
1148			if (nh != NULL) {
1149				rtm->rtm_index = nh->nh_ifp->if_index;
1150				rtm->rtm_flags = rc.rc_rt->rte_flags | nhop_get_rtflags(nh);
1151			}
1152		}
1153		break;
1154
1155	case RTM_DELETE:
1156		error = rib_action(fibnum, RTM_DELETE, &info, &rc);
1157		if (error == 0) {
1158			rtsock_notify_event(fibnum, &rc);
1159#ifdef ROUTE_MPATH
1160			if (NH_IS_NHGRP(rc.rc_nh_old) ||
1161			    (rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) {
1162				struct rib_cmd_info rc_simple = {};
1163				rib_decompose_notification(&rc,
1164				    save_del_notification, (void *)&rc_simple);
1165				rc = rc_simple;
1166			}
1167#endif
1168			nh = rc.rc_nh_old;
1169		}
1170		break;
1171
1172	case RTM_GET:
1173		error = handle_rtm_get(&info, fibnum, rtm, &rc);
1174		if (error != 0)
1175			senderr(error);
1176		nh = rc.rc_nh_new;
1177
1178		if (!rt_is_exportable(rc.rc_rt, curthread->td_ucred))
1179			senderr(ESRCH);
1180		break;
1181
1182	default:
1183		senderr(EOPNOTSUPP);
1184	}
1185
1186	if (error == 0 && nh != NULL) {
1187		error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh);
1188		/*
1189		 * Note that some sockaddr pointers may have changed to
1190		 * point to memory outsize @rtm. Some may be pointing
1191		 * to the on-stack variables.
1192		 * Given that, any pointer in @info CANNOT BE USED.
1193		 */
1194
1195		/*
1196		 * scopeid deembedding has been performed while
1197		 * writing updated rtm in rtsock_msg_buffer().
1198		 * With that in mind, skip deembedding procedure below.
1199		 */
1200#ifdef INET6
1201		rti_need_deembed = 0;
1202#endif
1203	}
1204
1205flush:
1206	NET_EPOCH_EXIT(et);
1207
1208#ifdef INET6
1209	if (rtm != NULL) {
1210		if (rti_need_deembed) {
1211			/* sin6_scope_id is recovered before sending rtm. */
1212			sin6 = (struct sockaddr_in6 *)&ss;
1213			for (i = 0; i < RTAX_MAX; i++) {
1214				if (info.rti_info[i] == NULL)
1215					continue;
1216				if (info.rti_info[i]->sa_family != AF_INET6)
1217					continue;
1218				bcopy(info.rti_info[i], sin6, sizeof(*sin6));
1219				if (sa6_recoverscope(sin6) == 0)
1220					bcopy(sin6, info.rti_info[i],
1221						    sizeof(*sin6));
1222			}
1223			if (update_rtm_from_info(&info, &rtm, alloc_len) != 0) {
1224				if (error != 0)
1225					error = ENOBUFS;
1226			}
1227		}
1228	}
1229#endif
1230	send_rtm_reply(so, rtm, m, saf, fibnum, error);
1231
1232	return (error);
1233}
1234
1235/*
1236 * Sends the prepared reply message in @rtm to all rtsock clients.
1237 * Frees @m and @rtm.
1238 *
1239 */
1240static void
1241send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
1242    sa_family_t saf, u_int fibnum, int rtm_errno)
1243{
1244	struct rcb *rcb = NULL;
1245
1246	/*
1247	 * Check to see if we don't want our own messages.
1248	 */
1249	if ((so->so_options & SO_USELOOPBACK) == 0) {
1250		if (V_route_cb.any_count <= 1) {
1251			if (rtm != NULL)
1252				free(rtm, M_TEMP);
1253			m_freem(m);
1254			return;
1255		}
1256		/* There is another listener, so construct message */
1257		rcb = so->so_pcb;
1258	}
1259
1260	if (rtm != NULL) {
1261		if (rtm_errno!= 0)
1262			rtm->rtm_errno = rtm_errno;
1263		else
1264			rtm->rtm_flags |= RTF_DONE;
1265
1266		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
1267		if (m->m_pkthdr.len < rtm->rtm_msglen) {
1268			m_freem(m);
1269			m = NULL;
1270		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
1271			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
1272
1273		free(rtm, M_TEMP);
1274	}
1275	if (m != NULL) {
1276		M_SETFIB(m, fibnum);
1277		m->m_flags |= RTS_FILTER_FIB;
1278		if (rcb) {
1279			/*
1280			 * XXX insure we don't get a copy by
1281			 * invalidating our protocol
1282			 */
1283			sa_family_t family = rcb->rcb_family;
1284			rcb->rcb_family = AF_UNSPEC;
1285			rt_dispatch(m, saf);
1286			rcb->rcb_family = family;
1287		} else
1288			rt_dispatch(m, saf);
1289	}
1290}
1291
1292static void
1293rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh,
1294    struct rt_metrics *out)
1295{
1296
1297	bzero(out, sizeof(*out));
1298	out->rmx_mtu = nh->nh_mtu;
1299	out->rmx_weight = rt->rt_weight;
1300	out->rmx_nhidx = nhop_get_idx(nh);
1301	/* Kernel -> userland timebase conversion. */
1302	out->rmx_expire = nhop_get_expire(nh) ?
1303	    nhop_get_expire(nh) - time_uptime + time_second : 0;
1304}
1305
1306/*
1307 * Extract the addresses of the passed sockaddrs.
1308 * Do a little sanity checking so as to avoid bad memory references.
1309 * This data is derived straight from userland.
1310 */
1311static int
1312rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1313{
1314	struct sockaddr *sa;
1315	int i;
1316
1317	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
1318		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1319			continue;
1320		sa = (struct sockaddr *)cp;
1321		/*
1322		 * It won't fit.
1323		 */
1324		if (cp + sa->sa_len > cplim) {
1325			RTS_PID_LOG(LOG_DEBUG, "sa_len too big for sa type %d", i);
1326			return (EINVAL);
1327		}
1328		/*
1329		 * there are no more.. quit now
1330		 * If there are more bits, they are in error.
1331		 * I've seen this. route(1) can evidently generate these.
1332		 * This causes kernel to core dump.
1333		 * for compatibility, If we see this, point to a safe address.
1334		 */
1335		if (sa->sa_len == 0) {
1336			rtinfo->rti_info[i] = &sa_zero;
1337			return (0); /* should be EINVAL but for compat */
1338		}
1339		/* accept it */
1340#ifdef INET6
1341		if (sa->sa_family == AF_INET6)
1342			sa6_embedscope((struct sockaddr_in6 *)sa,
1343			    V_ip6_use_defzone);
1344#endif
1345		rtinfo->rti_info[i] = sa;
1346		cp += SA_SIZE(sa);
1347	}
1348	return (0);
1349}
1350
1351#ifdef INET
1352static inline void
1353fill_sockaddr_inet(struct sockaddr_in *sin, struct in_addr addr)
1354{
1355
1356	const struct sockaddr_in nsin = {
1357		.sin_family = AF_INET,
1358		.sin_len = sizeof(struct sockaddr_in),
1359		.sin_addr = addr,
1360	};
1361	*sin = nsin;
1362}
1363#endif
1364
1365#ifdef INET6
1366static inline void
1367fill_sockaddr_inet6(struct sockaddr_in6 *sin6, const struct in6_addr *addr6,
1368    uint32_t scopeid)
1369{
1370
1371	const struct sockaddr_in6 nsin6 = {
1372		.sin6_family = AF_INET6,
1373		.sin6_len = sizeof(struct sockaddr_in6),
1374		.sin6_addr = *addr6,
1375		.sin6_scope_id = scopeid,
1376	};
1377	*sin6 = nsin6;
1378}
1379#endif
1380
1381#if defined(INET6) || defined(INET)
1382/*
1383 * Checks if gateway is suitable for lltable operations.
1384 * Lltable code requires AF_LINK gateway with ifindex
1385 *  and mac address specified.
1386 * Returns 0 on success.
1387 */
1388static int
1389cleanup_xaddrs_lladdr(struct rt_addrinfo *info)
1390{
1391	struct sockaddr_dl *sdl = (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
1392
1393	if (sdl->sdl_family != AF_LINK)
1394		return (EINVAL);
1395
1396	if (sdl->sdl_index == 0) {
1397		RTS_PID_LOG(LOG_DEBUG, "AF_LINK gateway w/o ifindex");
1398		return (EINVAL);
1399	}
1400
1401	if (offsetof(struct sockaddr_dl, sdl_data) + sdl->sdl_nlen + sdl->sdl_alen > sdl->sdl_len) {
1402		RTS_PID_LOG(LOG_DEBUG, "AF_LINK gw: sdl_nlen/sdl_alen too large");
1403		return (EINVAL);
1404	}
1405
1406	return (0);
1407}
1408
1409static int
1410cleanup_xaddrs_gateway(struct rt_addrinfo *info, struct linear_buffer *lb)
1411{
1412	struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
1413	struct sockaddr *sa;
1414
1415	if (info->rti_flags & RTF_LLDATA)
1416		return (cleanup_xaddrs_lladdr(info));
1417
1418	switch (gw->sa_family) {
1419#ifdef INET
1420	case AF_INET:
1421		{
1422			struct sockaddr_in *gw_sin = (struct sockaddr_in *)gw;
1423
1424			/* Ensure reads do not go beyoud SA boundary */
1425			if (SA_SIZE(gw) < offsetof(struct sockaddr_in, sin_zero)) {
1426				RTS_PID_LOG(LOG_DEBUG, "gateway sin_len too small: %d",
1427				    gw->sa_len);
1428				return (EINVAL);
1429			}
1430			sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_in));
1431			if (sa == NULL)
1432				return (ENOBUFS);
1433			fill_sockaddr_inet((struct sockaddr_in *)sa, gw_sin->sin_addr);
1434			info->rti_info[RTAX_GATEWAY] = sa;
1435		}
1436		break;
1437#endif
1438#ifdef INET6
1439	case AF_INET6:
1440		{
1441			struct sockaddr_in6 *gw_sin6 = (struct sockaddr_in6 *)gw;
1442			if (gw_sin6->sin6_len < sizeof(struct sockaddr_in6)) {
1443				RTS_PID_LOG(LOG_DEBUG, "gateway sin6_len too small: %d",
1444				    gw->sa_len);
1445				return (EINVAL);
1446			}
1447			fill_sockaddr_inet6(gw_sin6, &gw_sin6->sin6_addr, 0);
1448			break;
1449		}
1450#endif
1451	case AF_LINK:
1452		{
1453			struct sockaddr_dl *gw_sdl;
1454
1455			size_t sdl_min_len = offsetof(struct sockaddr_dl, sdl_data);
1456			gw_sdl = (struct sockaddr_dl *)gw;
1457			if (gw_sdl->sdl_len < sdl_min_len) {
1458				RTS_PID_LOG(LOG_DEBUG, "gateway sdl_len too small: %d",
1459				    gw_sdl->sdl_len);
1460				return (EINVAL);
1461			}
1462			sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_dl_short));
1463			if (sa == NULL)
1464				return (ENOBUFS);
1465
1466			const struct sockaddr_dl_short sdl = {
1467				.sdl_family = AF_LINK,
1468				.sdl_len = sizeof(struct sockaddr_dl_short),
1469				.sdl_index = gw_sdl->sdl_index,
1470			};
1471			*((struct sockaddr_dl_short *)sa) = sdl;
1472			info->rti_info[RTAX_GATEWAY] = sa;
1473			break;
1474		}
1475	}
1476
1477	return (0);
1478}
1479#endif
1480
1481static void
1482remove_netmask(struct rt_addrinfo *info)
1483{
1484	info->rti_info[RTAX_NETMASK] = NULL;
1485	info->rti_flags |= RTF_HOST;
1486	info->rti_addrs &= ~RTA_NETMASK;
1487}
1488
1489#ifdef INET
1490static int
1491cleanup_xaddrs_inet(struct rt_addrinfo *info, struct linear_buffer *lb)
1492{
1493	struct sockaddr_in *dst_sa, *mask_sa;
1494	const int sa_len = sizeof(struct sockaddr_in);
1495	struct in_addr dst, mask;
1496
1497	/* Check & fixup dst/netmask combination first */
1498	dst_sa = (struct sockaddr_in *)info->rti_info[RTAX_DST];
1499	mask_sa = (struct sockaddr_in *)info->rti_info[RTAX_NETMASK];
1500
1501	/* Ensure reads do not go beyound the buffer size */
1502	if (SA_SIZE(dst_sa) < offsetof(struct sockaddr_in, sin_zero)) {
1503		RTS_PID_LOG(LOG_DEBUG, "prefix dst sin_len too small: %d",
1504		    dst_sa->sin_len);
1505		return (EINVAL);
1506	}
1507
1508	if ((mask_sa != NULL) && mask_sa->sin_len < sizeof(struct sockaddr_in)) {
1509		/*
1510		 * Some older routing software encode mask length into the
1511		 * sin_len, thus resulting in "truncated" sockaddr.
1512		 */
1513		int len = mask_sa->sin_len - offsetof(struct sockaddr_in, sin_addr);
1514		if (len >= 0) {
1515			mask.s_addr = 0;
1516			if (len > sizeof(struct in_addr))
1517				len = sizeof(struct in_addr);
1518			memcpy(&mask, &mask_sa->sin_addr, len);
1519		} else {
1520			RTS_PID_LOG(LOG_DEBUG, "prefix mask sin_len too small: %d",
1521			    mask_sa->sin_len);
1522			return (EINVAL);
1523		}
1524	} else
1525		mask.s_addr = mask_sa ? mask_sa->sin_addr.s_addr : INADDR_BROADCAST;
1526
1527	dst.s_addr = htonl(ntohl(dst_sa->sin_addr.s_addr) & ntohl(mask.s_addr));
1528
1529	/* Construct new "clean" dst/mask sockaddresses */
1530	if ((dst_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1531		return (ENOBUFS);
1532	fill_sockaddr_inet(dst_sa, dst);
1533	info->rti_info[RTAX_DST] = (struct sockaddr *)dst_sa;
1534
1535	if (mask.s_addr != INADDR_BROADCAST) {
1536		if ((mask_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1537			return (ENOBUFS);
1538		fill_sockaddr_inet(mask_sa, mask);
1539		info->rti_info[RTAX_NETMASK] = (struct sockaddr *)mask_sa;
1540		info->rti_flags &= ~RTF_HOST;
1541	} else
1542		remove_netmask(info);
1543
1544	/* Check gateway */
1545	if (info->rti_info[RTAX_GATEWAY] != NULL)
1546		return (cleanup_xaddrs_gateway(info, lb));
1547
1548	return (0);
1549}
1550#endif
1551
1552#ifdef INET6
1553static int
1554cleanup_xaddrs_inet6(struct rt_addrinfo *info, struct linear_buffer *lb)
1555{
1556	struct sockaddr *sa;
1557	struct sockaddr_in6 *dst_sa, *mask_sa;
1558	struct in6_addr mask, *dst;
1559	const int sa_len = sizeof(struct sockaddr_in6);
1560
1561	/* Check & fixup dst/netmask combination first */
1562	dst_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_DST];
1563	mask_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_NETMASK];
1564
1565	if (dst_sa->sin6_len < sizeof(struct sockaddr_in6)) {
1566		RTS_PID_LOG(LOG_DEBUG, "prefix dst sin6_len too small: %d",
1567		    dst_sa->sin6_len);
1568		return (EINVAL);
1569	}
1570
1571	if (mask_sa && mask_sa->sin6_len < sizeof(struct sockaddr_in6)) {
1572		/*
1573		 * Some older routing software encode mask length into the
1574		 * sin6_len, thus resulting in "truncated" sockaddr.
1575		 */
1576		int len = mask_sa->sin6_len - offsetof(struct sockaddr_in6, sin6_addr);
1577		if (len >= 0) {
1578			bzero(&mask, sizeof(mask));
1579			if (len > sizeof(struct in6_addr))
1580				len = sizeof(struct in6_addr);
1581			memcpy(&mask, &mask_sa->sin6_addr, len);
1582		} else {
1583			RTS_PID_LOG(LOG_DEBUG, "rtsock: prefix mask sin6_len too small: %d",
1584			    mask_sa->sin6_len);
1585			return (EINVAL);
1586		}
1587	} else
1588		mask = mask_sa ? mask_sa->sin6_addr : in6mask128;
1589
1590	dst = &dst_sa->sin6_addr;
1591	IN6_MASK_ADDR(dst, &mask);
1592
1593	if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1594		return (ENOBUFS);
1595	fill_sockaddr_inet6((struct sockaddr_in6 *)sa, dst, 0);
1596	info->rti_info[RTAX_DST] = sa;
1597
1598	if (!IN6_ARE_ADDR_EQUAL(&mask, &in6mask128)) {
1599		if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1600			return (ENOBUFS);
1601		fill_sockaddr_inet6((struct sockaddr_in6 *)sa, &mask, 0);
1602		info->rti_info[RTAX_NETMASK] = sa;
1603		info->rti_flags &= ~RTF_HOST;
1604	} else
1605		remove_netmask(info);
1606
1607	/* Check gateway */
1608	if (info->rti_info[RTAX_GATEWAY] != NULL)
1609		return (cleanup_xaddrs_gateway(info, lb));
1610
1611	return (0);
1612}
1613#endif
1614
1615static int
1616cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb)
1617{
1618	int error = EAFNOSUPPORT;
1619
1620	if (info->rti_info[RTAX_DST] == NULL) {
1621		RTS_PID_LOG(LOG_DEBUG, "prefix dst is not set");
1622		return (EINVAL);
1623	}
1624
1625	if (info->rti_flags & RTF_LLDATA) {
1626		/*
1627		 * arp(8)/ndp(8) sends RTA_NETMASK for the associated
1628		 * prefix along with the actual address in RTA_DST.
1629		 * Remove netmask to avoid unnecessary address masking.
1630		 */
1631		remove_netmask(info);
1632	}
1633
1634	switch (info->rti_info[RTAX_DST]->sa_family) {
1635#ifdef INET
1636	case AF_INET:
1637		error = cleanup_xaddrs_inet(info, lb);
1638		break;
1639#endif
1640#ifdef INET6
1641	case AF_INET6:
1642		error = cleanup_xaddrs_inet6(info, lb);
1643		break;
1644#endif
1645	}
1646
1647	return (error);
1648}
1649
1650/*
1651 * Fill in @dmask with valid netmask leaving original @smask
1652 * intact. Mostly used with radix netmasks.
1653 */
1654struct sockaddr *
1655rtsock_fix_netmask(const struct sockaddr *dst, const struct sockaddr *smask,
1656    struct sockaddr_storage *dmask)
1657{
1658	if (dst == NULL || smask == NULL)
1659		return (NULL);
1660
1661	memset(dmask, 0, dst->sa_len);
1662	memcpy(dmask, smask, smask->sa_len);
1663	dmask->ss_len = dst->sa_len;
1664	dmask->ss_family = dst->sa_family;
1665
1666	return ((struct sockaddr *)dmask);
1667}
1668
1669/*
1670 * Writes information related to @rtinfo object to newly-allocated mbuf.
1671 * Assumes MCLBYTES is enough to construct any message.
1672 * Used for OS notifications of vaious events (if/ifa announces,etc)
1673 *
1674 * Returns allocated mbuf or NULL on failure.
1675 */
1676static struct mbuf *
1677rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
1678{
1679	struct sockaddr_storage ss;
1680	struct rt_msghdr *rtm;
1681	struct mbuf *m;
1682	int i;
1683	struct sockaddr *sa;
1684#ifdef INET6
1685	struct sockaddr_in6 *sin6;
1686#endif
1687	int len, dlen;
1688
1689	switch (type) {
1690	case RTM_DELADDR:
1691	case RTM_NEWADDR:
1692		len = sizeof(struct ifa_msghdr);
1693		break;
1694
1695	case RTM_DELMADDR:
1696	case RTM_NEWMADDR:
1697		len = sizeof(struct ifma_msghdr);
1698		break;
1699
1700	case RTM_IFINFO:
1701		len = sizeof(struct if_msghdr);
1702		break;
1703
1704	case RTM_IFANNOUNCE:
1705	case RTM_IEEE80211:
1706		len = sizeof(struct if_announcemsghdr);
1707		break;
1708
1709	default:
1710		len = sizeof(struct rt_msghdr);
1711	}
1712
1713	/* XXXGL: can we use MJUMPAGESIZE cluster here? */
1714	KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
1715	if (len > MHLEN)
1716		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1717	else
1718		m = m_gethdr(M_NOWAIT, MT_DATA);
1719	if (m == NULL)
1720		return (m);
1721
1722	m->m_pkthdr.len = m->m_len = len;
1723	rtm = mtod(m, struct rt_msghdr *);
1724	bzero((caddr_t)rtm, len);
1725	for (i = 0; i < RTAX_MAX; i++) {
1726		if ((sa = rtinfo->rti_info[i]) == NULL)
1727			continue;
1728		rtinfo->rti_addrs |= (1 << i);
1729
1730		dlen = SA_SIZE(sa);
1731		KASSERT(dlen <= sizeof(ss),
1732		    ("%s: sockaddr size overflow", __func__));
1733		bzero(&ss, sizeof(ss));
1734		bcopy(sa, &ss, sa->sa_len);
1735		sa = (struct sockaddr *)&ss;
1736#ifdef INET6
1737		if (sa->sa_family == AF_INET6) {
1738			sin6 = (struct sockaddr_in6 *)sa;
1739			(void)sa6_recoverscope(sin6);
1740		}
1741#endif
1742		m_copyback(m, len, dlen, (caddr_t)sa);
1743		len += dlen;
1744	}
1745	if (m->m_pkthdr.len != len) {
1746		m_freem(m);
1747		return (NULL);
1748	}
1749	rtm->rtm_msglen = len;
1750	rtm->rtm_version = RTM_VERSION;
1751	rtm->rtm_type = type;
1752	return (m);
1753}
1754
1755/*
1756 * Writes information related to @rtinfo object to preallocated buffer.
1757 * Stores needed size in @plen. If @w is NULL, calculates size without
1758 * writing.
1759 * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
1760 *
1761 * Returns 0 on success.
1762 *
1763 */
1764static int
1765rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
1766{
1767	struct sockaddr_storage ss;
1768	int len, buflen = 0, dlen, i;
1769	caddr_t cp = NULL;
1770	struct rt_msghdr *rtm = NULL;
1771#ifdef INET6
1772	struct sockaddr_in6 *sin6;
1773#endif
1774#ifdef COMPAT_FREEBSD32
1775	bool compat32 = false;
1776#endif
1777
1778	switch (type) {
1779	case RTM_DELADDR:
1780	case RTM_NEWADDR:
1781		if (w != NULL && w->w_op == NET_RT_IFLISTL) {
1782#ifdef COMPAT_FREEBSD32
1783			if (w->w_req->flags & SCTL_MASK32) {
1784				len = sizeof(struct ifa_msghdrl32);
1785				compat32 = true;
1786			} else
1787#endif
1788				len = sizeof(struct ifa_msghdrl);
1789		} else
1790			len = sizeof(struct ifa_msghdr);
1791		break;
1792
1793	case RTM_IFINFO:
1794#ifdef COMPAT_FREEBSD32
1795		if (w != NULL && w->w_req->flags & SCTL_MASK32) {
1796			if (w->w_op == NET_RT_IFLISTL)
1797				len = sizeof(struct if_msghdrl32);
1798			else
1799				len = sizeof(struct if_msghdr32);
1800			compat32 = true;
1801			break;
1802		}
1803#endif
1804		if (w != NULL && w->w_op == NET_RT_IFLISTL)
1805			len = sizeof(struct if_msghdrl);
1806		else
1807			len = sizeof(struct if_msghdr);
1808		break;
1809
1810	case RTM_NEWMADDR:
1811		len = sizeof(struct ifma_msghdr);
1812		break;
1813
1814	default:
1815		len = sizeof(struct rt_msghdr);
1816	}
1817
1818	if (w != NULL) {
1819		rtm = (struct rt_msghdr *)w->w_tmem;
1820		buflen = w->w_tmemsize - len;
1821		cp = (caddr_t)w->w_tmem + len;
1822	}
1823
1824	rtinfo->rti_addrs = 0;
1825	for (i = 0; i < RTAX_MAX; i++) {
1826		struct sockaddr *sa;
1827
1828		if ((sa = rtinfo->rti_info[i]) == NULL)
1829			continue;
1830		rtinfo->rti_addrs |= (1 << i);
1831#ifdef COMPAT_FREEBSD32
1832		if (compat32)
1833			dlen = SA_SIZE32(sa);
1834		else
1835#endif
1836			dlen = SA_SIZE(sa);
1837		if (cp != NULL && buflen >= dlen) {
1838			KASSERT(dlen <= sizeof(ss),
1839			    ("%s: sockaddr size overflow", __func__));
1840			bzero(&ss, sizeof(ss));
1841			bcopy(sa, &ss, sa->sa_len);
1842			sa = (struct sockaddr *)&ss;
1843#ifdef INET6
1844			if (sa->sa_family == AF_INET6) {
1845				sin6 = (struct sockaddr_in6 *)sa;
1846				(void)sa6_recoverscope(sin6);
1847			}
1848#endif
1849			bcopy((caddr_t)sa, cp, (unsigned)dlen);
1850			cp += dlen;
1851			buflen -= dlen;
1852		} else if (cp != NULL) {
1853			/*
1854			 * Buffer too small. Count needed size
1855			 * and return with error.
1856			 */
1857			cp = NULL;
1858		}
1859
1860		len += dlen;
1861	}
1862
1863	if (cp != NULL) {
1864		dlen = ALIGN(len) - len;
1865		if (buflen < dlen)
1866			cp = NULL;
1867		else {
1868			bzero(cp, dlen);
1869			cp += dlen;
1870			buflen -= dlen;
1871		}
1872	}
1873	len = ALIGN(len);
1874
1875	if (cp != NULL) {
1876		/* fill header iff buffer is large enough */
1877		rtm->rtm_version = RTM_VERSION;
1878		rtm->rtm_type = type;
1879		rtm->rtm_msglen = len;
1880	}
1881
1882	*plen = len;
1883
1884	if (w != NULL && cp == NULL)
1885		return (ENOBUFS);
1886
1887	return (0);
1888}
1889
1890/*
1891 * This routine is called to generate a message from the routing
1892 * socket indicating that a redirect has occurred, a routing lookup
1893 * has failed, or that a protocol has detected timeouts to a particular
1894 * destination.
1895 */
1896void
1897rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
1898    int fibnum)
1899{
1900	struct rt_msghdr *rtm;
1901	struct mbuf *m;
1902	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1903
1904	if (V_route_cb.any_count == 0)
1905		return;
1906	m = rtsock_msg_mbuf(type, rtinfo);
1907	if (m == NULL)
1908		return;
1909
1910	if (fibnum != RT_ALL_FIBS) {
1911		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
1912		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
1913		M_SETFIB(m, fibnum);
1914		m->m_flags |= RTS_FILTER_FIB;
1915	}
1916
1917	rtm = mtod(m, struct rt_msghdr *);
1918	rtm->rtm_flags = RTF_DONE | flags;
1919	rtm->rtm_errno = error;
1920	rtm->rtm_addrs = rtinfo->rti_addrs;
1921	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1922}
1923
1924void
1925rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1926{
1927
1928	rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
1929}
1930
1931/*
1932 * This routine is called to generate a message from the routing
1933 * socket indicating that the status of a network interface has changed.
1934 */
1935static void
1936rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask __unused)
1937{
1938	struct if_msghdr *ifm;
1939	struct mbuf *m;
1940	struct rt_addrinfo info;
1941
1942	if (V_route_cb.any_count == 0)
1943		return;
1944	bzero((caddr_t)&info, sizeof(info));
1945	m = rtsock_msg_mbuf(RTM_IFINFO, &info);
1946	if (m == NULL)
1947		return;
1948	ifm = mtod(m, struct if_msghdr *);
1949	ifm->ifm_index = ifp->if_index;
1950	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1951	if_data_copy(ifp, &ifm->ifm_data);
1952	ifm->ifm_addrs = 0;
1953	rt_dispatch(m, AF_UNSPEC);
1954}
1955
1956/*
1957 * Announce interface address arrival/withdraw.
1958 * Please do not call directly, use rt_addrmsg().
1959 * Assume input data to be valid.
1960 * Returns 0 on success.
1961 */
1962int
1963rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
1964{
1965	struct rt_addrinfo info;
1966	struct sockaddr *sa;
1967	int ncmd;
1968	struct mbuf *m;
1969	struct ifa_msghdr *ifam;
1970	struct ifnet *ifp = ifa->ifa_ifp;
1971	struct sockaddr_storage ss;
1972
1973	if (V_route_cb.any_count == 0)
1974		return (0);
1975
1976	ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1977
1978	bzero((caddr_t)&info, sizeof(info));
1979	info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1980	info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1981	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
1982	    info.rti_info[RTAX_IFA], ifa->ifa_netmask, &ss);
1983	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1984	if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
1985		return (ENOBUFS);
1986	ifam = mtod(m, struct ifa_msghdr *);
1987	ifam->ifam_index = ifp->if_index;
1988	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
1989	ifam->ifam_flags = ifa->ifa_flags;
1990	ifam->ifam_addrs = info.rti_addrs;
1991
1992	if (fibnum != RT_ALL_FIBS) {
1993		M_SETFIB(m, fibnum);
1994		m->m_flags |= RTS_FILTER_FIB;
1995	}
1996
1997	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1998
1999	return (0);
2000}
2001
2002/*
2003 * Announce route addition/removal to rtsock based on @rt data.
2004 * Callers are advives to use rt_routemsg() instead of using this
2005 *  function directly.
2006 * Assume @rt data is consistent.
2007 *
2008 * Returns 0 on success.
2009 */
2010int
2011rtsock_routemsg(int cmd, struct rtentry *rt, struct nhop_object *nh,
2012    int fibnum)
2013{
2014	union sockaddr_union dst, mask;
2015	struct rt_addrinfo info;
2016
2017	if (V_route_cb.any_count == 0)
2018		return (0);
2019
2020	int family = rt_get_family(rt);
2021	init_sockaddrs_family(family, &dst.sa, &mask.sa);
2022	export_rtaddrs(rt, &dst.sa, &mask.sa);
2023
2024	bzero((caddr_t)&info, sizeof(info));
2025	info.rti_info[RTAX_DST] = &dst.sa;
2026	info.rti_info[RTAX_NETMASK] = &mask.sa;
2027	info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
2028	info.rti_flags = rt->rte_flags | nhop_get_rtflags(nh);
2029	info.rti_ifp = nh->nh_ifp;
2030
2031	return (rtsock_routemsg_info(cmd, &info, fibnum));
2032}
2033
2034int
2035rtsock_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
2036{
2037	struct rt_msghdr *rtm;
2038	struct sockaddr *sa;
2039	struct mbuf *m;
2040
2041	if (V_route_cb.any_count == 0)
2042		return (0);
2043
2044	if (info->rti_flags & RTF_HOST)
2045		info->rti_info[RTAX_NETMASK] = NULL;
2046
2047	m = rtsock_msg_mbuf(cmd, info);
2048	if (m == NULL)
2049		return (ENOBUFS);
2050
2051	if (fibnum != RT_ALL_FIBS) {
2052		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
2053		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
2054		M_SETFIB(m, fibnum);
2055		m->m_flags |= RTS_FILTER_FIB;
2056	}
2057
2058	rtm = mtod(m, struct rt_msghdr *);
2059	rtm->rtm_addrs = info->rti_addrs;
2060	if (info->rti_ifp != NULL)
2061		rtm->rtm_index = info->rti_ifp->if_index;
2062	/* Add RTF_DONE to indicate command 'completion' required by API */
2063	info->rti_flags |= RTF_DONE;
2064	/* Reported routes has to be up */
2065	if (cmd == RTM_ADD || cmd == RTM_CHANGE)
2066		info->rti_flags |= RTF_UP;
2067	rtm->rtm_flags = info->rti_flags;
2068
2069	sa = info->rti_info[RTAX_DST];
2070	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
2071
2072	return (0);
2073}
2074
2075/*
2076 * This is the analogue to the rt_newaddrmsg which performs the same
2077 * function but for multicast group memberhips.  This is easier since
2078 * there is no route state to worry about.
2079 */
2080void
2081rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
2082{
2083	struct rt_addrinfo info;
2084	struct mbuf *m = NULL;
2085	struct ifnet *ifp = ifma->ifma_ifp;
2086	struct ifma_msghdr *ifmam;
2087
2088	if (V_route_cb.any_count == 0)
2089		return;
2090
2091	bzero((caddr_t)&info, sizeof(info));
2092	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2093	if (ifp && ifp->if_addr)
2094		info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
2095	else
2096		info.rti_info[RTAX_IFP] = NULL;
2097	/*
2098	 * If a link-layer address is present, present it as a ``gateway''
2099	 * (similarly to how ARP entries, e.g., are presented).
2100	 */
2101	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
2102	m = rtsock_msg_mbuf(cmd, &info);
2103	if (m == NULL)
2104		return;
2105	ifmam = mtod(m, struct ifma_msghdr *);
2106	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
2107	    __func__));
2108	ifmam->ifmam_index = ifp->if_index;
2109	ifmam->ifmam_addrs = info.rti_addrs;
2110	rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
2111}
2112
2113static struct mbuf *
2114rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
2115	struct rt_addrinfo *info)
2116{
2117	struct if_announcemsghdr *ifan;
2118	struct mbuf *m;
2119
2120	if (V_route_cb.any_count == 0)
2121		return NULL;
2122	bzero((caddr_t)info, sizeof(*info));
2123	m = rtsock_msg_mbuf(type, info);
2124	if (m != NULL) {
2125		ifan = mtod(m, struct if_announcemsghdr *);
2126		ifan->ifan_index = ifp->if_index;
2127		strlcpy(ifan->ifan_name, ifp->if_xname,
2128			sizeof(ifan->ifan_name));
2129		ifan->ifan_what = what;
2130	}
2131	return m;
2132}
2133
2134/*
2135 * This is called to generate routing socket messages indicating
2136 * IEEE80211 wireless events.
2137 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
2138 */
2139void
2140rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
2141{
2142	struct mbuf *m;
2143	struct rt_addrinfo info;
2144
2145	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
2146	if (m != NULL) {
2147		/*
2148		 * Append the ieee80211 data.  Try to stick it in the
2149		 * mbuf containing the ifannounce msg; otherwise allocate
2150		 * a new mbuf and append.
2151		 *
2152		 * NB: we assume m is a single mbuf.
2153		 */
2154		if (data_len > M_TRAILINGSPACE(m)) {
2155			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
2156			if (n == NULL) {
2157				m_freem(m);
2158				return;
2159			}
2160			bcopy(data, mtod(n, void *), data_len);
2161			n->m_len = data_len;
2162			m->m_next = n;
2163		} else if (data_len > 0) {
2164			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
2165			m->m_len += data_len;
2166		}
2167		if (m->m_flags & M_PKTHDR)
2168			m->m_pkthdr.len += data_len;
2169		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
2170		rt_dispatch(m, AF_UNSPEC);
2171	}
2172}
2173
2174/*
2175 * This is called to generate routing socket messages indicating
2176 * network interface arrival and departure.
2177 */
2178static void
2179rt_ifannouncemsg(struct ifnet *ifp, int what)
2180{
2181	struct mbuf *m;
2182	struct rt_addrinfo info;
2183
2184	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
2185	if (m != NULL)
2186		rt_dispatch(m, AF_UNSPEC);
2187}
2188
2189static void
2190rt_dispatch(struct mbuf *m, sa_family_t saf)
2191{
2192
2193	M_ASSERTPKTHDR(m);
2194
2195	m->m_rtsock_family = saf;
2196	if (V_loif)
2197		m->m_pkthdr.rcvif = V_loif;
2198	else {
2199		m_freem(m);
2200		return;
2201	}
2202	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
2203}
2204
2205/*
2206 * This is used in dumping the kernel table via sysctl().
2207 */
2208static int
2209sysctl_dumpentry(struct rtentry *rt, void *vw)
2210{
2211	struct walkarg *w = vw;
2212	struct nhop_object *nh;
2213
2214	NET_EPOCH_ASSERT();
2215
2216	if (!rt_is_exportable(rt, w->w_req->td->td_ucred))
2217		return (0);
2218
2219	export_rtaddrs(rt, w->dst, w->mask);
2220	nh = rt_get_raw_nhop(rt);
2221#ifdef ROUTE_MPATH
2222	if (NH_IS_NHGRP(nh)) {
2223		const struct weightened_nhop *wn;
2224		uint32_t num_nhops;
2225		int error;
2226		wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
2227		for (int i = 0; i < num_nhops; i++) {
2228			error = sysctl_dumpnhop(rt, wn[i].nh, wn[i].weight, w);
2229			if (error != 0)
2230				return (error);
2231		}
2232	} else
2233#endif
2234		sysctl_dumpnhop(rt, nh, rt->rt_weight, w);
2235
2236	return (0);
2237}
2238
2239
2240static int
2241sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight,
2242    struct walkarg *w)
2243{
2244	struct rt_addrinfo info;
2245	int error = 0, size;
2246	uint32_t rtflags;
2247
2248	rtflags = nhop_get_rtflags(nh);
2249
2250	if (w->w_op == NET_RT_FLAGS && !(rtflags & w->w_arg))
2251		return (0);
2252
2253	bzero((caddr_t)&info, sizeof(info));
2254	info.rti_info[RTAX_DST] = w->dst;
2255	info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
2256	info.rti_info[RTAX_NETMASK] = (rtflags & RTF_HOST) ? NULL : w->mask;
2257	info.rti_info[RTAX_GENMASK] = 0;
2258	if (nh->nh_ifp && !(nh->nh_ifp->if_flags & IFF_DYING)) {
2259		info.rti_info[RTAX_IFP] = nh->nh_ifp->if_addr->ifa_addr;
2260		info.rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
2261		if (nh->nh_ifp->if_flags & IFF_POINTOPOINT)
2262			info.rti_info[RTAX_BRD] = nh->nh_ifa->ifa_dstaddr;
2263	}
2264	if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
2265		return (error);
2266	if (w->w_req && w->w_tmem) {
2267		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
2268
2269		bzero(&rtm->rtm_index,
2270		    sizeof(*rtm) - offsetof(struct rt_msghdr, rtm_index));
2271
2272		/*
2273		 * rte flags may consist of RTF_HOST (duplicated in nhop rtflags)
2274		 * and RTF_UP (if entry is linked, which is always true here).
2275		 * Given that, use nhop rtflags & add RTF_UP.
2276		 */
2277		rtm->rtm_flags = rtflags | RTF_UP;
2278		if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
2279			rtm->rtm_flags = RTF_GATEWAY |
2280				(rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
2281		rt_getmetrics(rt, nh, &rtm->rtm_rmx);
2282		rtm->rtm_rmx.rmx_weight = weight;
2283		rtm->rtm_index = nh->nh_ifp->if_index;
2284		rtm->rtm_addrs = info.rti_addrs;
2285		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
2286		return (error);
2287	}
2288	return (error);
2289}
2290
2291static int
2292sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
2293    struct rt_addrinfo *info, struct walkarg *w, int len)
2294{
2295	struct if_msghdrl *ifm;
2296	struct if_data *ifd;
2297
2298	ifm = (struct if_msghdrl *)w->w_tmem;
2299
2300#ifdef COMPAT_FREEBSD32
2301	if (w->w_req->flags & SCTL_MASK32) {
2302		struct if_msghdrl32 *ifm32;
2303
2304		ifm32 = (struct if_msghdrl32 *)ifm;
2305		ifm32->ifm_addrs = info->rti_addrs;
2306		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2307		ifm32->ifm_index = ifp->if_index;
2308		ifm32->_ifm_spare1 = 0;
2309		ifm32->ifm_len = sizeof(*ifm32);
2310		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
2311		ifm32->_ifm_spare2 = 0;
2312		ifd = &ifm32->ifm_data;
2313	} else
2314#endif
2315	{
2316		ifm->ifm_addrs = info->rti_addrs;
2317		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2318		ifm->ifm_index = ifp->if_index;
2319		ifm->_ifm_spare1 = 0;
2320		ifm->ifm_len = sizeof(*ifm);
2321		ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
2322		ifm->_ifm_spare2 = 0;
2323		ifd = &ifm->ifm_data;
2324	}
2325
2326	memcpy(ifd, src_ifd, sizeof(*ifd));
2327
2328	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
2329}
2330
2331static int
2332sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
2333    struct rt_addrinfo *info, struct walkarg *w, int len)
2334{
2335	struct if_msghdr *ifm;
2336	struct if_data *ifd;
2337
2338	ifm = (struct if_msghdr *)w->w_tmem;
2339
2340#ifdef COMPAT_FREEBSD32
2341	if (w->w_req->flags & SCTL_MASK32) {
2342		struct if_msghdr32 *ifm32;
2343
2344		ifm32 = (struct if_msghdr32 *)ifm;
2345		ifm32->ifm_addrs = info->rti_addrs;
2346		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2347		ifm32->ifm_index = ifp->if_index;
2348		ifm32->_ifm_spare1 = 0;
2349		ifd = &ifm32->ifm_data;
2350	} else
2351#endif
2352	{
2353		ifm->ifm_addrs = info->rti_addrs;
2354		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2355		ifm->ifm_index = ifp->if_index;
2356		ifm->_ifm_spare1 = 0;
2357		ifd = &ifm->ifm_data;
2358	}
2359
2360	memcpy(ifd, src_ifd, sizeof(*ifd));
2361
2362	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
2363}
2364
2365static int
2366sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
2367    struct walkarg *w, int len)
2368{
2369	struct ifa_msghdrl *ifam;
2370	struct if_data *ifd;
2371
2372	ifam = (struct ifa_msghdrl *)w->w_tmem;
2373
2374#ifdef COMPAT_FREEBSD32
2375	if (w->w_req->flags & SCTL_MASK32) {
2376		struct ifa_msghdrl32 *ifam32;
2377
2378		ifam32 = (struct ifa_msghdrl32 *)ifam;
2379		ifam32->ifam_addrs = info->rti_addrs;
2380		ifam32->ifam_flags = ifa->ifa_flags;
2381		ifam32->ifam_index = ifa->ifa_ifp->if_index;
2382		ifam32->_ifam_spare1 = 0;
2383		ifam32->ifam_len = sizeof(*ifam32);
2384		ifam32->ifam_data_off =
2385		    offsetof(struct ifa_msghdrl32, ifam_data);
2386		ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
2387		ifd = &ifam32->ifam_data;
2388	} else
2389#endif
2390	{
2391		ifam->ifam_addrs = info->rti_addrs;
2392		ifam->ifam_flags = ifa->ifa_flags;
2393		ifam->ifam_index = ifa->ifa_ifp->if_index;
2394		ifam->_ifam_spare1 = 0;
2395		ifam->ifam_len = sizeof(*ifam);
2396		ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
2397		ifam->ifam_metric = ifa->ifa_ifp->if_metric;
2398		ifd = &ifam->ifam_data;
2399	}
2400
2401	bzero(ifd, sizeof(*ifd));
2402	ifd->ifi_datalen = sizeof(struct if_data);
2403	ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
2404	ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
2405	ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
2406	ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
2407
2408	/* Fixup if_data carp(4) vhid. */
2409	if (carp_get_vhid_p != NULL)
2410		ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
2411
2412	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
2413}
2414
2415static int
2416sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
2417    struct walkarg *w, int len)
2418{
2419	struct ifa_msghdr *ifam;
2420
2421	ifam = (struct ifa_msghdr *)w->w_tmem;
2422	ifam->ifam_addrs = info->rti_addrs;
2423	ifam->ifam_flags = ifa->ifa_flags;
2424	ifam->ifam_index = ifa->ifa_ifp->if_index;
2425	ifam->_ifam_spare1 = 0;
2426	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
2427
2428	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
2429}
2430
2431static int
2432sysctl_iflist(int af, struct walkarg *w)
2433{
2434	struct ifnet *ifp;
2435	struct ifaddr *ifa;
2436	struct if_data ifd;
2437	struct rt_addrinfo info;
2438	int len, error = 0;
2439	struct sockaddr_storage ss;
2440
2441	bzero((caddr_t)&info, sizeof(info));
2442	bzero(&ifd, sizeof(ifd));
2443	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2444		if (w->w_arg && w->w_arg != ifp->if_index)
2445			continue;
2446		if_data_copy(ifp, &ifd);
2447		ifa = ifp->if_addr;
2448		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2449		error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
2450		if (error != 0)
2451			goto done;
2452		info.rti_info[RTAX_IFP] = NULL;
2453		if (w->w_req && w->w_tmem) {
2454			if (w->w_op == NET_RT_IFLISTL)
2455				error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
2456				    len);
2457			else
2458				error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
2459				    len);
2460			if (error)
2461				goto done;
2462		}
2463		while ((ifa = CK_STAILQ_NEXT(ifa, ifa_link)) != NULL) {
2464			if (af && af != ifa->ifa_addr->sa_family)
2465				continue;
2466			if (prison_if(w->w_req->td->td_ucred,
2467			    ifa->ifa_addr) != 0)
2468				continue;
2469			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2470			info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
2471			    ifa->ifa_addr, ifa->ifa_netmask, &ss);
2472			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2473			error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
2474			if (error != 0)
2475				goto done;
2476			if (w->w_req && w->w_tmem) {
2477				if (w->w_op == NET_RT_IFLISTL)
2478					error = sysctl_iflist_ifaml(ifa, &info,
2479					    w, len);
2480				else
2481					error = sysctl_iflist_ifam(ifa, &info,
2482					    w, len);
2483				if (error)
2484					goto done;
2485			}
2486		}
2487		info.rti_info[RTAX_IFA] = NULL;
2488		info.rti_info[RTAX_NETMASK] = NULL;
2489		info.rti_info[RTAX_BRD] = NULL;
2490	}
2491done:
2492	return (error);
2493}
2494
2495static int
2496sysctl_ifmalist(int af, struct walkarg *w)
2497{
2498	struct rt_addrinfo info;
2499	struct ifaddr *ifa;
2500	struct ifmultiaddr *ifma;
2501	struct ifnet *ifp;
2502	int error, len;
2503
2504	NET_EPOCH_ASSERT();
2505
2506	error = 0;
2507	bzero((caddr_t)&info, sizeof(info));
2508
2509	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2510		if (w->w_arg && w->w_arg != ifp->if_index)
2511			continue;
2512		ifa = ifp->if_addr;
2513		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
2514		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2515			if (af && af != ifma->ifma_addr->sa_family)
2516				continue;
2517			if (prison_if(w->w_req->td->td_ucred,
2518			    ifma->ifma_addr) != 0)
2519				continue;
2520			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2521			info.rti_info[RTAX_GATEWAY] =
2522			    (ifma->ifma_addr->sa_family != AF_LINK) ?
2523			    ifma->ifma_lladdr : NULL;
2524			error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
2525			if (error != 0)
2526				break;
2527			if (w->w_req && w->w_tmem) {
2528				struct ifma_msghdr *ifmam;
2529
2530				ifmam = (struct ifma_msghdr *)w->w_tmem;
2531				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
2532				ifmam->ifmam_flags = 0;
2533				ifmam->ifmam_addrs = info.rti_addrs;
2534				ifmam->_ifmam_spare1 = 0;
2535				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
2536				if (error != 0)
2537					break;
2538			}
2539		}
2540		if (error != 0)
2541			break;
2542	}
2543	return (error);
2544}
2545
2546static void
2547rtable_sysctl_dump(uint32_t fibnum, int family, struct walkarg *w)
2548{
2549	union sockaddr_union sa_dst, sa_mask;
2550
2551	w->family = family;
2552	w->dst = (struct sockaddr *)&sa_dst;
2553	w->mask = (struct sockaddr *)&sa_mask;
2554
2555	init_sockaddrs_family(family, w->dst, w->mask);
2556
2557	rib_walk(fibnum, family, false, sysctl_dumpentry, w);
2558}
2559
2560static int
2561sysctl_rtsock(SYSCTL_HANDLER_ARGS)
2562{
2563	struct epoch_tracker et;
2564	int	*name = (int *)arg1;
2565	u_int	namelen = arg2;
2566	struct rib_head *rnh = NULL; /* silence compiler. */
2567	int	i, lim, error = EINVAL;
2568	int	fib = 0;
2569	u_char	af;
2570	struct	walkarg w;
2571
2572	if (namelen < 3)
2573		return (EINVAL);
2574
2575	name++;
2576	namelen--;
2577	if (req->newptr)
2578		return (EPERM);
2579	if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP || name[1] == NET_RT_NHGRP) {
2580		if (namelen == 3)
2581			fib = req->td->td_proc->p_fibnum;
2582		else if (namelen == 4)
2583			fib = (name[3] == RT_ALL_FIBS) ?
2584			    req->td->td_proc->p_fibnum : name[3];
2585		else
2586			return ((namelen < 3) ? EISDIR : ENOTDIR);
2587		if (fib < 0 || fib >= rt_numfibs)
2588			return (EINVAL);
2589	} else if (namelen != 3)
2590		return ((namelen < 3) ? EISDIR : ENOTDIR);
2591	af = name[0];
2592	if (af > AF_MAX)
2593		return (EINVAL);
2594	bzero(&w, sizeof(w));
2595	w.w_op = name[1];
2596	w.w_arg = name[2];
2597	w.w_req = req;
2598
2599	error = sysctl_wire_old_buffer(req, 0);
2600	if (error)
2601		return (error);
2602
2603	/*
2604	 * Allocate reply buffer in advance.
2605	 * All rtsock messages has maximum length of u_short.
2606	 */
2607	w.w_tmemsize = 65536;
2608	w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
2609
2610	NET_EPOCH_ENTER(et);
2611	switch (w.w_op) {
2612	case NET_RT_DUMP:
2613	case NET_RT_FLAGS:
2614		if (af == 0) {			/* dump all tables */
2615			i = 1;
2616			lim = AF_MAX;
2617		} else				/* dump only one table */
2618			i = lim = af;
2619
2620		/*
2621		 * take care of llinfo entries, the caller must
2622		 * specify an AF
2623		 */
2624		if (w.w_op == NET_RT_FLAGS &&
2625		    (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
2626			if (af != 0)
2627				error = lltable_sysctl_dumparp(af, w.w_req);
2628			else
2629				error = EINVAL;
2630			break;
2631		}
2632		/*
2633		 * take care of routing entries
2634		 */
2635		for (error = 0; error == 0 && i <= lim; i++) {
2636			rnh = rt_tables_get_rnh(fib, i);
2637			if (rnh != NULL) {
2638				rtable_sysctl_dump(fib, i, &w);
2639			} else if (af != 0)
2640				error = EAFNOSUPPORT;
2641		}
2642		break;
2643	case NET_RT_NHOP:
2644	case NET_RT_NHGRP:
2645		/* Allow dumping one specific af/fib at a time */
2646		if (namelen < 4) {
2647			error = EINVAL;
2648			break;
2649		}
2650		fib = name[3];
2651		if (fib < 0 || fib > rt_numfibs) {
2652			error = EINVAL;
2653			break;
2654		}
2655		rnh = rt_tables_get_rnh(fib, af);
2656		if (rnh == NULL) {
2657			error = EAFNOSUPPORT;
2658			break;
2659		}
2660		if (w.w_op == NET_RT_NHOP)
2661			error = nhops_dump_sysctl(rnh, w.w_req);
2662		else
2663#ifdef ROUTE_MPATH
2664			error = nhgrp_dump_sysctl(rnh, w.w_req);
2665#else
2666			error = ENOTSUP;
2667#endif
2668		break;
2669	case NET_RT_IFLIST:
2670	case NET_RT_IFLISTL:
2671		error = sysctl_iflist(af, &w);
2672		break;
2673
2674	case NET_RT_IFMALIST:
2675		error = sysctl_ifmalist(af, &w);
2676		break;
2677	}
2678	NET_EPOCH_EXIT(et);
2679
2680	free(w.w_tmem, M_TEMP);
2681	return (error);
2682}
2683
2684static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_MPSAFE,
2685    sysctl_rtsock, "Return route tables and interface/address lists");
2686
2687/*
2688 * Definitions of protocols supported in the ROUTE domain.
2689 */
2690
2691static struct domain routedomain;		/* or at least forward */
2692
2693static struct protosw routesw = {
2694	.pr_type =		SOCK_RAW,
2695	.pr_flags =		PR_ATOMIC|PR_ADDR,
2696	.pr_abort =		rts_close,
2697	.pr_attach =		rts_attach,
2698	.pr_detach =		rts_detach,
2699	.pr_send =		rts_send,
2700	.pr_shutdown =		rts_shutdown,
2701	.pr_disconnect =	rts_disconnect,
2702	.pr_close =		rts_close,
2703};
2704
2705static struct domain routedomain = {
2706	.dom_family =		PF_ROUTE,
2707	.dom_name =		"route",
2708	.dom_nprotosw =		1,
2709	.dom_protosw =		{ &routesw },
2710};
2711
2712DOMAIN_SET(route);
2713