if_gif.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/11/sys/net/if_gif.c 330897 2018-03-14 03:19:51Z eadler $");
36
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/jail.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/module.h>
48#include <sys/rmlock.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/sx.h>
52#include <sys/errno.h>
53#include <sys/time.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/priv.h>
57#include <sys/proc.h>
58#include <sys/protosw.h>
59#include <sys/conf.h>
60#include <machine/cpu.h>
61
62#include <net/if.h>
63#include <net/if_var.h>
64#include <net/if_clone.h>
65#include <net/if_types.h>
66#include <net/netisr.h>
67#include <net/route.h>
68#include <net/bpf.h>
69#include <net/vnet.h>
70
71#include <netinet/in.h>
72#include <netinet/in_systm.h>
73#include <netinet/ip.h>
74#include <netinet/ip_ecn.h>
75#ifdef	INET
76#include <netinet/in_var.h>
77#include <netinet/ip_var.h>
78#endif	/* INET */
79
80#ifdef INET6
81#ifndef INET
82#include <netinet/in.h>
83#endif
84#include <netinet6/in6_var.h>
85#include <netinet/ip6.h>
86#include <netinet6/ip6_ecn.h>
87#include <netinet6/ip6_var.h>
88#include <netinet6/scope6_var.h>
89#include <netinet6/ip6protosw.h>
90#endif /* INET6 */
91
92#include <netinet/ip_encap.h>
93#include <net/ethernet.h>
94#include <net/if_bridgevar.h>
95#include <net/if_gif.h>
96
97#include <security/mac/mac_framework.h>
98
99static const char gifname[] = "gif";
100
101/*
102 * gif_mtx protects a per-vnet gif_softc_list.
103 */
104static VNET_DEFINE(struct mtx, gif_mtx);
105#define	V_gif_mtx		VNET(gif_mtx)
106static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
107static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
108#define	V_gif_softc_list	VNET(gif_softc_list)
109static struct sx gif_ioctl_sx;
110SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
111
112#define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
113					    NULL, MTX_DEF)
114#define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
115#define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
116#define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
117
118void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
119void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
120void	(*ng_gif_attach_p)(struct ifnet *ifp);
121void	(*ng_gif_detach_p)(struct ifnet *ifp);
122
123static int	gif_check_nesting(struct ifnet *, struct mbuf *);
124static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
125    struct sockaddr *);
126static void	gif_delete_tunnel(struct ifnet *);
127static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
128static int	gif_transmit(struct ifnet *, struct mbuf *);
129static void	gif_qflush(struct ifnet *);
130static int	gif_clone_create(struct if_clone *, int, caddr_t);
131static void	gif_clone_destroy(struct ifnet *);
132static VNET_DEFINE(struct if_clone *, gif_cloner);
133#define	V_gif_cloner	VNET(gif_cloner)
134
135static int gifmodevent(module_t, int, void *);
136
137SYSCTL_DECL(_net_link);
138static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
139    "Generic Tunnel Interface");
140#ifndef MAX_GIF_NEST
141/*
142 * This macro controls the default upper limitation on nesting of gif tunnels.
143 * Since, setting a large value to this macro with a careless configuration
144 * may introduce system crash, we don't allow any nestings by default.
145 * If you need to configure nested gif tunnels, you can define this macro
146 * in your kernel configuration file.  However, if you do so, please be
147 * careful to configure the tunnels so that it won't make a loop.
148 */
149#define MAX_GIF_NEST 1
150#endif
151static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
152#define	V_max_gif_nesting	VNET(max_gif_nesting)
153SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
154    &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
155
156/*
157 * By default, we disallow creation of multiple tunnels between the same
158 * pair of addresses.  Some applications require this functionality so
159 * we allow control over this check here.
160 */
161#ifdef XBONEHACK
162static VNET_DEFINE(int, parallel_tunnels) = 1;
163#else
164static VNET_DEFINE(int, parallel_tunnels) = 0;
165#endif
166#define	V_parallel_tunnels	VNET(parallel_tunnels)
167SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
168    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
169    "Allow parallel tunnels?");
170
171/* copy from src/sys/net/if_ethersubr.c */
172static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
173			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
174#ifndef ETHER_IS_BROADCAST
175#define ETHER_IS_BROADCAST(addr) \
176	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
177#endif
178
179static int
180gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
181{
182	struct gif_softc *sc;
183
184	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
185	sc->gif_fibnum = curthread->td_proc->p_fibnum;
186	GIF2IFP(sc) = if_alloc(IFT_GIF);
187	GIF_LOCK_INIT(sc);
188	GIF2IFP(sc)->if_softc = sc;
189	if_initname(GIF2IFP(sc), gifname, unit);
190
191	GIF2IFP(sc)->if_addrlen = 0;
192	GIF2IFP(sc)->if_mtu    = GIF_MTU;
193	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
194#if 0
195	/* turn off ingress filter */
196	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
197#endif
198	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
199	GIF2IFP(sc)->if_transmit  = gif_transmit;
200	GIF2IFP(sc)->if_qflush  = gif_qflush;
201	GIF2IFP(sc)->if_output = gif_output;
202	GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
203	GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
204	if_attach(GIF2IFP(sc));
205	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
206	if (ng_gif_attach_p != NULL)
207		(*ng_gif_attach_p)(GIF2IFP(sc));
208
209	GIF_LIST_LOCK();
210	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
211	GIF_LIST_UNLOCK();
212	return (0);
213}
214
215static void
216gif_clone_destroy(struct ifnet *ifp)
217{
218	struct gif_softc *sc;
219
220	sx_xlock(&gif_ioctl_sx);
221	sc = ifp->if_softc;
222	gif_delete_tunnel(ifp);
223	GIF_LIST_LOCK();
224	LIST_REMOVE(sc, gif_list);
225	GIF_LIST_UNLOCK();
226	if (ng_gif_detach_p != NULL)
227		(*ng_gif_detach_p)(ifp);
228	bpfdetach(ifp);
229	if_detach(ifp);
230	ifp->if_softc = NULL;
231	sx_xunlock(&gif_ioctl_sx);
232
233	if_free(ifp);
234	GIF_LOCK_DESTROY(sc);
235	free(sc, M_GIF);
236}
237
238static void
239vnet_gif_init(const void *unused __unused)
240{
241
242	LIST_INIT(&V_gif_softc_list);
243	GIF_LIST_LOCK_INIT();
244	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
245	    gif_clone_destroy, 0);
246}
247VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
248    vnet_gif_init, NULL);
249
250static void
251vnet_gif_uninit(const void *unused __unused)
252{
253
254	if_clone_detach(V_gif_cloner);
255	GIF_LIST_LOCK_DESTROY();
256}
257VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
258    vnet_gif_uninit, NULL);
259
260static int
261gifmodevent(module_t mod, int type, void *data)
262{
263
264	switch (type) {
265	case MOD_LOAD:
266	case MOD_UNLOAD:
267		break;
268	default:
269		return (EOPNOTSUPP);
270	}
271	return (0);
272}
273
274static moduledata_t gif_mod = {
275	"if_gif",
276	gifmodevent,
277	0
278};
279
280DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
281MODULE_VERSION(if_gif, 1);
282
283int
284gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
285{
286	GIF_RLOCK_TRACKER;
287	const struct ip *ip;
288	struct gif_softc *sc;
289	int ret;
290
291	sc = (struct gif_softc *)arg;
292	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
293		return (0);
294
295	ret = 0;
296	GIF_RLOCK(sc);
297
298	/* no physical address */
299	if (sc->gif_family == 0)
300		goto done;
301
302	switch (proto) {
303#ifdef INET
304	case IPPROTO_IPV4:
305#endif
306#ifdef INET6
307	case IPPROTO_IPV6:
308#endif
309	case IPPROTO_ETHERIP:
310		break;
311	default:
312		goto done;
313	}
314
315	/* Bail on short packets */
316	M_ASSERTPKTHDR(m);
317	if (m->m_pkthdr.len < sizeof(struct ip))
318		goto done;
319
320	ip = mtod(m, const struct ip *);
321	switch (ip->ip_v) {
322#ifdef INET
323	case 4:
324		if (sc->gif_family != AF_INET)
325			goto done;
326		ret = in_gif_encapcheck(m, off, proto, arg);
327		break;
328#endif
329#ifdef INET6
330	case 6:
331		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
332			goto done;
333		if (sc->gif_family != AF_INET6)
334			goto done;
335		ret = in6_gif_encapcheck(m, off, proto, arg);
336		break;
337#endif
338	}
339done:
340	GIF_RUNLOCK(sc);
341	return (ret);
342}
343
344static int
345gif_transmit(struct ifnet *ifp, struct mbuf *m)
346{
347	struct gif_softc *sc;
348	struct etherip_header *eth;
349#ifdef INET
350	struct ip *ip;
351#endif
352#ifdef INET6
353	struct ip6_hdr *ip6;
354	uint32_t t;
355#endif
356	uint32_t af;
357	uint8_t proto, ecn;
358	int error;
359
360#ifdef MAC
361	error = mac_ifnet_check_transmit(ifp, m);
362	if (error) {
363		m_freem(m);
364		goto err;
365	}
366#endif
367	error = ENETDOWN;
368	sc = ifp->if_softc;
369	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
370	    (ifp->if_flags & IFF_UP) == 0 ||
371	    sc->gif_family == 0 ||
372	    (error = gif_check_nesting(ifp, m)) != 0) {
373		m_freem(m);
374		goto err;
375	}
376	/* Now pull back the af that we stashed in the csum_data. */
377	if (ifp->if_bridge)
378		af = AF_LINK;
379	else
380		af = m->m_pkthdr.csum_data;
381	m->m_flags &= ~(M_BCAST|M_MCAST);
382	M_SETFIB(m, sc->gif_fibnum);
383	BPF_MTAP2(ifp, &af, sizeof(af), m);
384	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
385	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
386	/* inner AF-specific encapsulation */
387	ecn = 0;
388	switch (af) {
389#ifdef INET
390	case AF_INET:
391		proto = IPPROTO_IPV4;
392		if (m->m_len < sizeof(struct ip))
393			m = m_pullup(m, sizeof(struct ip));
394		if (m == NULL) {
395			error = ENOBUFS;
396			goto err;
397		}
398		ip = mtod(m, struct ip *);
399		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
400		    ECN_NOCARE, &ecn, &ip->ip_tos);
401		break;
402#endif
403#ifdef INET6
404	case AF_INET6:
405		proto = IPPROTO_IPV6;
406		if (m->m_len < sizeof(struct ip6_hdr))
407			m = m_pullup(m, sizeof(struct ip6_hdr));
408		if (m == NULL) {
409			error = ENOBUFS;
410			goto err;
411		}
412		t = 0;
413		ip6 = mtod(m, struct ip6_hdr *);
414		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
415		    ECN_NOCARE, &t, &ip6->ip6_flow);
416		ecn = (ntohl(t) >> 20) & 0xff;
417		break;
418#endif
419	case AF_LINK:
420		proto = IPPROTO_ETHERIP;
421		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
422		if (m == NULL) {
423			error = ENOBUFS;
424			goto err;
425		}
426		eth = mtod(m, struct etherip_header *);
427		eth->eip_resvh = 0;
428		eth->eip_ver = ETHERIP_VERSION;
429		eth->eip_resvl = 0;
430		break;
431	default:
432		error = EAFNOSUPPORT;
433		m_freem(m);
434		goto err;
435	}
436	/* XXX should we check if our outer source is legal? */
437	/* dispatch to output logic based on outer AF */
438	switch (sc->gif_family) {
439#ifdef INET
440	case AF_INET:
441		error = in_gif_output(ifp, m, proto, ecn);
442		break;
443#endif
444#ifdef INET6
445	case AF_INET6:
446		error = in6_gif_output(ifp, m, proto, ecn);
447		break;
448#endif
449	default:
450		m_freem(m);
451	}
452err:
453	if (error)
454		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
455	return (error);
456}
457
458static void
459gif_qflush(struct ifnet *ifp __unused)
460{
461
462}
463
464#define	MTAG_GIF	1080679712
465static int
466gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
467{
468	struct m_tag *mtag;
469	int count;
470
471	/*
472	 * gif may cause infinite recursion calls when misconfigured.
473	 * We'll prevent this by detecting loops.
474	 *
475	 * High nesting level may cause stack exhaustion.
476	 * We'll prevent this by introducing upper limit.
477	 */
478	count = 1;
479	mtag = NULL;
480	while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) {
481		if (*(struct ifnet **)(mtag + 1) == ifp) {
482			log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp));
483			return (EIO);
484		}
485		count++;
486	}
487	if (count > V_max_gif_nesting) {
488		log(LOG_NOTICE,
489		    "%s: if_output recursively called too many times(%d)\n",
490		    if_name(ifp), count);
491		return (EIO);
492	}
493	mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT);
494	if (mtag == NULL)
495		return (ENOMEM);
496	*(struct ifnet **)(mtag + 1) = ifp;
497	m_tag_prepend(m, mtag);
498	return (0);
499}
500
501int
502gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
503	struct route *ro)
504{
505	uint32_t af;
506
507	if (dst->sa_family == AF_UNSPEC)
508		bcopy(dst->sa_data, &af, sizeof(af));
509	else
510		af = dst->sa_family;
511	/*
512	 * Now save the af in the inbound pkt csum data, this is a cheat since
513	 * we are using the inbound csum_data field to carry the af over to
514	 * the gif_transmit() routine, avoiding using yet another mtag.
515	 */
516	m->m_pkthdr.csum_data = af;
517	return (ifp->if_transmit(ifp, m));
518}
519
520void
521gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
522{
523	struct etherip_header *eip;
524#ifdef INET
525	struct ip *ip;
526#endif
527#ifdef INET6
528	struct ip6_hdr *ip6;
529	uint32_t t;
530#endif
531	struct gif_softc *sc;
532	struct ether_header *eh;
533	struct ifnet *oldifp;
534	int isr, n, af;
535
536	if (ifp == NULL) {
537		/* just in case */
538		m_freem(m);
539		return;
540	}
541	sc = ifp->if_softc;
542	m->m_pkthdr.rcvif = ifp;
543	m_clrprotoflags(m);
544	switch (proto) {
545#ifdef INET
546	case IPPROTO_IPV4:
547		af = AF_INET;
548		if (m->m_len < sizeof(struct ip))
549			m = m_pullup(m, sizeof(struct ip));
550		if (m == NULL)
551			goto drop;
552		ip = mtod(m, struct ip *);
553		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
554		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
555			m_freem(m);
556			goto drop;
557		}
558		break;
559#endif
560#ifdef INET6
561	case IPPROTO_IPV6:
562		af = AF_INET6;
563		if (m->m_len < sizeof(struct ip6_hdr))
564			m = m_pullup(m, sizeof(struct ip6_hdr));
565		if (m == NULL)
566			goto drop;
567		t = htonl((uint32_t)ecn << 20);
568		ip6 = mtod(m, struct ip6_hdr *);
569		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
570		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
571			m_freem(m);
572			goto drop;
573		}
574		break;
575#endif
576	case IPPROTO_ETHERIP:
577		af = AF_LINK;
578		break;
579	default:
580		m_freem(m);
581		goto drop;
582	}
583
584#ifdef MAC
585	mac_ifnet_create_mbuf(ifp, m);
586#endif
587
588	if (bpf_peers_present(ifp->if_bpf)) {
589		uint32_t af1 = af;
590		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
591	}
592
593	if ((ifp->if_flags & IFF_MONITOR) != 0) {
594		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
595		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
596		m_freem(m);
597		return;
598	}
599
600	if (ng_gif_input_p != NULL) {
601		(*ng_gif_input_p)(ifp, &m, af);
602		if (m == NULL)
603			goto drop;
604	}
605
606	/*
607	 * Put the packet to the network layer input queue according to the
608	 * specified address family.
609	 * Note: older versions of gif_input directly called network layer
610	 * input functions, e.g. ip6_input, here.  We changed the policy to
611	 * prevent too many recursive calls of such input functions, which
612	 * might cause kernel panic.  But the change may introduce another
613	 * problem; if the input queue is full, packets are discarded.
614	 * The kernel stack overflow really happened, and we believed
615	 * queue-full rarely occurs, so we changed the policy.
616	 */
617	switch (af) {
618#ifdef INET
619	case AF_INET:
620		isr = NETISR_IP;
621		break;
622#endif
623#ifdef INET6
624	case AF_INET6:
625		isr = NETISR_IPV6;
626		break;
627#endif
628	case AF_LINK:
629		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
630		if (n > m->m_len)
631			m = m_pullup(m, n);
632		if (m == NULL)
633			goto drop;
634		eip = mtod(m, struct etherip_header *);
635		if (eip->eip_ver != ETHERIP_VERSION) {
636			/* discard unknown versions */
637			m_freem(m);
638			goto drop;
639		}
640		m_adj(m, sizeof(struct etherip_header));
641
642		m->m_flags &= ~(M_BCAST|M_MCAST);
643		m->m_pkthdr.rcvif = ifp;
644
645		if (ifp->if_bridge) {
646			oldifp = ifp;
647			eh = mtod(m, struct ether_header *);
648			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
649				if (ETHER_IS_BROADCAST(eh->ether_dhost))
650					m->m_flags |= M_BCAST;
651				else
652					m->m_flags |= M_MCAST;
653				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
654			}
655			BRIDGE_INPUT(ifp, m);
656
657			if (m != NULL && ifp != oldifp) {
658				/*
659				 * The bridge gave us back itself or one of the
660				 * members for which the frame is addressed.
661				 */
662				ether_demux(ifp, m);
663				return;
664			}
665		}
666		if (m != NULL)
667			m_freem(m);
668		return;
669
670	default:
671		if (ng_gif_input_orphan_p != NULL)
672			(*ng_gif_input_orphan_p)(ifp, m, af);
673		else
674			m_freem(m);
675		return;
676	}
677
678	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
679	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
680	M_SETFIB(m, ifp->if_fib);
681	netisr_dispatch(isr, m);
682	return;
683drop:
684	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
685}
686
687/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
688int
689gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
690{
691	GIF_RLOCK_TRACKER;
692	struct ifreq *ifr = (struct ifreq*)data;
693	struct sockaddr *dst, *src;
694	struct gif_softc *sc;
695#ifdef INET
696	struct sockaddr_in *sin = NULL;
697#endif
698#ifdef INET6
699	struct sockaddr_in6 *sin6 = NULL;
700#endif
701	u_int options;
702	int error;
703
704	switch (cmd) {
705	case SIOCSIFADDR:
706		ifp->if_flags |= IFF_UP;
707	case SIOCADDMULTI:
708	case SIOCDELMULTI:
709	case SIOCGIFMTU:
710	case SIOCSIFFLAGS:
711		return (0);
712	case SIOCSIFMTU:
713		if (ifr->ifr_mtu < GIF_MTU_MIN ||
714		    ifr->ifr_mtu > GIF_MTU_MAX)
715			return (EINVAL);
716		else
717			ifp->if_mtu = ifr->ifr_mtu;
718		return (0);
719	}
720	sx_xlock(&gif_ioctl_sx);
721	sc = ifp->if_softc;
722	if (sc == NULL) {
723		error = ENXIO;
724		goto bad;
725	}
726	error = 0;
727	switch (cmd) {
728	case SIOCSIFPHYADDR:
729#ifdef INET6
730	case SIOCSIFPHYADDR_IN6:
731#endif
732		error = EINVAL;
733		switch (cmd) {
734#ifdef INET
735		case SIOCSIFPHYADDR:
736			src = (struct sockaddr *)
737				&(((struct in_aliasreq *)data)->ifra_addr);
738			dst = (struct sockaddr *)
739				&(((struct in_aliasreq *)data)->ifra_dstaddr);
740			break;
741#endif
742#ifdef INET6
743		case SIOCSIFPHYADDR_IN6:
744			src = (struct sockaddr *)
745				&(((struct in6_aliasreq *)data)->ifra_addr);
746			dst = (struct sockaddr *)
747				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
748			break;
749#endif
750		default:
751			goto bad;
752		}
753		/* sa_family must be equal */
754		if (src->sa_family != dst->sa_family ||
755		    src->sa_len != dst->sa_len)
756			goto bad;
757
758		/* validate sa_len */
759		/* check sa_family looks sane for the cmd */
760		switch (src->sa_family) {
761#ifdef INET
762		case AF_INET:
763			if (src->sa_len != sizeof(struct sockaddr_in))
764				goto bad;
765			if (cmd != SIOCSIFPHYADDR) {
766				error = EAFNOSUPPORT;
767				goto bad;
768			}
769			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
770			    satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
771				error = EADDRNOTAVAIL;
772				goto bad;
773			}
774			break;
775#endif
776#ifdef INET6
777		case AF_INET6:
778			if (src->sa_len != sizeof(struct sockaddr_in6))
779				goto bad;
780			if (cmd != SIOCSIFPHYADDR_IN6) {
781				error = EAFNOSUPPORT;
782				goto bad;
783			}
784			error = EADDRNOTAVAIL;
785			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
786			    ||
787			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
788				goto bad;
789			/*
790			 * Check validity of the scope zone ID of the
791			 * addresses, and convert it into the kernel
792			 * internal form if necessary.
793			 */
794			error = sa6_embedscope(satosin6(src), 0);
795			if (error != 0)
796				goto bad;
797			error = sa6_embedscope(satosin6(dst), 0);
798			if (error != 0)
799				goto bad;
800			break;
801#endif
802		default:
803			error = EAFNOSUPPORT;
804			goto bad;
805		}
806		error = gif_set_tunnel(ifp, src, dst);
807		break;
808	case SIOCDIFPHYADDR:
809		gif_delete_tunnel(ifp);
810		break;
811	case SIOCGIFPSRCADDR:
812	case SIOCGIFPDSTADDR:
813#ifdef INET6
814	case SIOCGIFPSRCADDR_IN6:
815	case SIOCGIFPDSTADDR_IN6:
816#endif
817		if (sc->gif_family == 0) {
818			error = EADDRNOTAVAIL;
819			break;
820		}
821		GIF_RLOCK(sc);
822		switch (cmd) {
823#ifdef INET
824		case SIOCGIFPSRCADDR:
825		case SIOCGIFPDSTADDR:
826			if (sc->gif_family != AF_INET) {
827				error = EADDRNOTAVAIL;
828				break;
829			}
830			sin = (struct sockaddr_in *)&ifr->ifr_addr;
831			memset(sin, 0, sizeof(*sin));
832			sin->sin_family = AF_INET;
833			sin->sin_len = sizeof(*sin);
834			break;
835#endif
836#ifdef INET6
837		case SIOCGIFPSRCADDR_IN6:
838		case SIOCGIFPDSTADDR_IN6:
839			if (sc->gif_family != AF_INET6) {
840				error = EADDRNOTAVAIL;
841				break;
842			}
843			sin6 = (struct sockaddr_in6 *)
844				&(((struct in6_ifreq *)data)->ifr_addr);
845			memset(sin6, 0, sizeof(*sin6));
846			sin6->sin6_family = AF_INET6;
847			sin6->sin6_len = sizeof(*sin6);
848			break;
849#endif
850		default:
851			error = EAFNOSUPPORT;
852		}
853		if (error == 0) {
854			switch (cmd) {
855#ifdef INET
856			case SIOCGIFPSRCADDR:
857				sin->sin_addr = sc->gif_iphdr->ip_src;
858				break;
859			case SIOCGIFPDSTADDR:
860				sin->sin_addr = sc->gif_iphdr->ip_dst;
861				break;
862#endif
863#ifdef INET6
864			case SIOCGIFPSRCADDR_IN6:
865				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
866				break;
867			case SIOCGIFPDSTADDR_IN6:
868				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
869				break;
870#endif
871			}
872		}
873		GIF_RUNLOCK(sc);
874		if (error != 0)
875			break;
876		switch (cmd) {
877#ifdef INET
878		case SIOCGIFPSRCADDR:
879		case SIOCGIFPDSTADDR:
880			error = prison_if(curthread->td_ucred,
881			    (struct sockaddr *)sin);
882			if (error != 0)
883				memset(sin, 0, sizeof(*sin));
884			break;
885#endif
886#ifdef INET6
887		case SIOCGIFPSRCADDR_IN6:
888		case SIOCGIFPDSTADDR_IN6:
889			error = prison_if(curthread->td_ucred,
890			    (struct sockaddr *)sin6);
891			if (error == 0)
892				error = sa6_recoverscope(sin6);
893			if (error != 0)
894				memset(sin6, 0, sizeof(*sin6));
895#endif
896		}
897		break;
898	case SIOCGTUNFIB:
899		ifr->ifr_fib = sc->gif_fibnum;
900		break;
901	case SIOCSTUNFIB:
902		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
903			break;
904		if (ifr->ifr_fib >= rt_numfibs)
905			error = EINVAL;
906		else
907			sc->gif_fibnum = ifr->ifr_fib;
908		break;
909	case GIFGOPTS:
910		options = sc->gif_options;
911		error = copyout(&options, ifr->ifr_data, sizeof(options));
912		break;
913	case GIFSOPTS:
914		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
915			break;
916		error = copyin(ifr->ifr_data, &options, sizeof(options));
917		if (error)
918			break;
919		if (options & ~GIF_OPTMASK)
920			error = EINVAL;
921		else
922			sc->gif_options = options;
923		break;
924	default:
925		error = EINVAL;
926		break;
927	}
928bad:
929	sx_xunlock(&gif_ioctl_sx);
930	return (error);
931}
932
933static void
934gif_detach(struct gif_softc *sc)
935{
936
937	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
938	if (sc->gif_ecookie != NULL)
939		encap_detach(sc->gif_ecookie);
940	sc->gif_ecookie = NULL;
941}
942
943static int
944gif_attach(struct gif_softc *sc, int af)
945{
946
947	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
948	switch (af) {
949#ifdef INET
950	case AF_INET:
951		return (in_gif_attach(sc));
952#endif
953#ifdef INET6
954	case AF_INET6:
955		return (in6_gif_attach(sc));
956#endif
957	}
958	return (EAFNOSUPPORT);
959}
960
961static int
962gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
963{
964	struct gif_softc *sc = ifp->if_softc;
965	struct gif_softc *tsc;
966#ifdef INET
967	struct ip *ip;
968#endif
969#ifdef INET6
970	struct ip6_hdr *ip6;
971#endif
972	void *hdr;
973	int error = 0;
974
975	if (sc == NULL)
976		return (ENXIO);
977	/* Disallow parallel tunnels unless instructed otherwise. */
978	if (V_parallel_tunnels == 0) {
979		GIF_LIST_LOCK();
980		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
981			if (tsc == sc || tsc->gif_family != src->sa_family)
982				continue;
983#ifdef INET
984			if (tsc->gif_family == AF_INET &&
985			    tsc->gif_iphdr->ip_src.s_addr ==
986			    satosin(src)->sin_addr.s_addr &&
987			    tsc->gif_iphdr->ip_dst.s_addr ==
988			    satosin(dst)->sin_addr.s_addr) {
989				error = EADDRNOTAVAIL;
990				GIF_LIST_UNLOCK();
991				goto bad;
992			}
993#endif
994#ifdef INET6
995			if (tsc->gif_family == AF_INET6 &&
996			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
997			    &satosin6(src)->sin6_addr) &&
998			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
999			    &satosin6(dst)->sin6_addr)) {
1000				error = EADDRNOTAVAIL;
1001				GIF_LIST_UNLOCK();
1002				goto bad;
1003			}
1004#endif
1005		}
1006		GIF_LIST_UNLOCK();
1007	}
1008	switch (src->sa_family) {
1009#ifdef INET
1010	case AF_INET:
1011		hdr = ip = malloc(sizeof(struct ip), M_GIF,
1012		    M_WAITOK | M_ZERO);
1013		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
1014		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
1015		break;
1016#endif
1017#ifdef INET6
1018	case AF_INET6:
1019		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
1020		    M_WAITOK | M_ZERO);
1021		ip6->ip6_src = satosin6(src)->sin6_addr;
1022		ip6->ip6_dst = satosin6(dst)->sin6_addr;
1023		ip6->ip6_vfc = IPV6_VERSION;
1024		break;
1025#endif
1026	default:
1027		return (EAFNOSUPPORT);
1028	}
1029
1030	if (sc->gif_family != src->sa_family)
1031		gif_detach(sc);
1032	if (sc->gif_family == 0 ||
1033	    sc->gif_family != src->sa_family)
1034		error = gif_attach(sc, src->sa_family);
1035
1036	GIF_WLOCK(sc);
1037	if (sc->gif_family != 0)
1038		free(sc->gif_hdr, M_GIF);
1039	sc->gif_family = src->sa_family;
1040	sc->gif_hdr = hdr;
1041	GIF_WUNLOCK(sc);
1042#if defined(INET) || defined(INET6)
1043bad:
1044#endif
1045	if (error == 0 && sc->gif_family != 0) {
1046		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1047		if_link_state_change(ifp, LINK_STATE_UP);
1048	} else {
1049		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1050		if_link_state_change(ifp, LINK_STATE_DOWN);
1051	}
1052	return (error);
1053}
1054
1055static void
1056gif_delete_tunnel(struct ifnet *ifp)
1057{
1058	struct gif_softc *sc = ifp->if_softc;
1059	int family;
1060
1061	if (sc == NULL)
1062		return;
1063
1064	GIF_WLOCK(sc);
1065	family = sc->gif_family;
1066	sc->gif_family = 0;
1067	GIF_WUNLOCK(sc);
1068	if (family != 0) {
1069		gif_detach(sc);
1070		free(sc->gif_hdr, M_GIF);
1071	}
1072	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1073	if_link_state_change(ifp, LINK_STATE_DOWN);
1074}
1075