if_gif.c revision 276149
1/*-
2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the project nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	$KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/net/if_gif.c 276149 2014-12-23 16:33:44Z ae $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/jail.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/module.h>
46#include <sys/rmlock.h>
47#include <sys/socket.h>
48#include <sys/sockio.h>
49#include <sys/sx.h>
50#include <sys/errno.h>
51#include <sys/time.h>
52#include <sys/sysctl.h>
53#include <sys/syslog.h>
54#include <sys/priv.h>
55#include <sys/proc.h>
56#include <sys/protosw.h>
57#include <sys/conf.h>
58#include <machine/cpu.h>
59
60#include <net/if.h>
61#include <net/if_var.h>
62#include <net/if_clone.h>
63#include <net/if_types.h>
64#include <net/netisr.h>
65#include <net/route.h>
66#include <net/bpf.h>
67#include <net/vnet.h>
68
69#include <netinet/in.h>
70#include <netinet/in_systm.h>
71#include <netinet/ip.h>
72#include <netinet/ip_ecn.h>
73#ifdef	INET
74#include <netinet/in_var.h>
75#include <netinet/in_gif.h>
76#include <netinet/ip_var.h>
77#endif	/* INET */
78
79#ifdef INET6
80#ifndef INET
81#include <netinet/in.h>
82#endif
83#include <netinet6/in6_var.h>
84#include <netinet/ip6.h>
85#include <netinet6/ip6_ecn.h>
86#include <netinet6/ip6_var.h>
87#include <netinet6/scope6_var.h>
88#include <netinet6/in6_gif.h>
89#include <netinet6/ip6protosw.h>
90#endif /* INET6 */
91
92#include <netinet/ip_encap.h>
93#include <net/ethernet.h>
94#include <net/if_bridgevar.h>
95#include <net/if_gif.h>
96
97#include <security/mac/mac_framework.h>
98
99static const char gifname[] = "gif";
100
101/*
102 * gif_mtx protects a per-vnet gif_softc_list.
103 */
104static VNET_DEFINE(struct mtx, gif_mtx);
105#define	V_gif_mtx		VNET(gif_mtx)
106static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
107static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
108#define	V_gif_softc_list	VNET(gif_softc_list)
109static struct sx gif_ioctl_sx;
110SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
111
112#define	GIF_LIST_LOCK_INIT(x)		mtx_init(&V_gif_mtx, "gif_mtx", \
113					    NULL, MTX_DEF)
114#define	GIF_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gif_mtx)
115#define	GIF_LIST_LOCK(x)		mtx_lock(&V_gif_mtx)
116#define	GIF_LIST_UNLOCK(x)		mtx_unlock(&V_gif_mtx)
117
118void	(*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
119void	(*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
120void	(*ng_gif_attach_p)(struct ifnet *ifp);
121void	(*ng_gif_detach_p)(struct ifnet *ifp);
122
123static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
124    struct sockaddr *);
125static void	gif_delete_tunnel(struct ifnet *);
126static int	gif_ioctl(struct ifnet *, u_long, caddr_t);
127static int	gif_transmit(struct ifnet *, struct mbuf *);
128static void	gif_qflush(struct ifnet *);
129static int	gif_clone_create(struct if_clone *, int, caddr_t);
130static void	gif_clone_destroy(struct ifnet *);
131static VNET_DEFINE(struct if_clone *, gif_cloner);
132#define	V_gif_cloner	VNET(gif_cloner)
133
134static int gifmodevent(module_t, int, void *);
135
136SYSCTL_DECL(_net_link);
137static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
138    "Generic Tunnel Interface");
139#ifndef MAX_GIF_NEST
140/*
141 * This macro controls the default upper limitation on nesting of gif tunnels.
142 * Since, setting a large value to this macro with a careless configuration
143 * may introduce system crash, we don't allow any nestings by default.
144 * If you need to configure nested gif tunnels, you can define this macro
145 * in your kernel configuration file.  However, if you do so, please be
146 * careful to configure the tunnels so that it won't make a loop.
147 */
148#define MAX_GIF_NEST 1
149#endif
150static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
151#define	V_max_gif_nesting	VNET(max_gif_nesting)
152SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_RW,
153    &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
154
155/*
156 * By default, we disallow creation of multiple tunnels between the same
157 * pair of addresses.  Some applications require this functionality so
158 * we allow control over this check here.
159 */
160#ifdef XBONEHACK
161static VNET_DEFINE(int, parallel_tunnels) = 1;
162#else
163static VNET_DEFINE(int, parallel_tunnels) = 0;
164#endif
165#define	V_parallel_tunnels	VNET(parallel_tunnels)
166SYSCTL_VNET_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_RW,
167    &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?");
168
169/* copy from src/sys/net/if_ethersubr.c */
170static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
171			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
172#ifndef ETHER_IS_BROADCAST
173#define ETHER_IS_BROADCAST(addr) \
174	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
175#endif
176
177static int
178gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
179{
180	struct gif_softc *sc;
181
182	sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
183	sc->gif_fibnum = curthread->td_proc->p_fibnum;
184	GIF2IFP(sc) = if_alloc(IFT_GIF);
185	GIF_LOCK_INIT(sc);
186	GIF2IFP(sc)->if_softc = sc;
187	if_initname(GIF2IFP(sc), gifname, unit);
188
189	GIF2IFP(sc)->if_addrlen = 0;
190	GIF2IFP(sc)->if_mtu    = GIF_MTU;
191	GIF2IFP(sc)->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
192#if 0
193	/* turn off ingress filter */
194	GIF2IFP(sc)->if_flags  |= IFF_LINK2;
195#endif
196	GIF2IFP(sc)->if_ioctl  = gif_ioctl;
197	GIF2IFP(sc)->if_transmit  = gif_transmit;
198	GIF2IFP(sc)->if_qflush  = gif_qflush;
199	GIF2IFP(sc)->if_output = gif_output;
200	if_attach(GIF2IFP(sc));
201	bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t));
202	if (ng_gif_attach_p != NULL)
203		(*ng_gif_attach_p)(GIF2IFP(sc));
204
205	GIF_LIST_LOCK();
206	LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
207	GIF_LIST_UNLOCK();
208	return (0);
209}
210
211static void
212gif_clone_destroy(struct ifnet *ifp)
213{
214	struct gif_softc *sc;
215
216	sx_xlock(&gif_ioctl_sx);
217	sc = ifp->if_softc;
218	gif_delete_tunnel(ifp);
219	GIF_LIST_LOCK();
220	LIST_REMOVE(sc, gif_list);
221	GIF_LIST_UNLOCK();
222	if (ng_gif_detach_p != NULL)
223		(*ng_gif_detach_p)(ifp);
224	bpfdetach(ifp);
225	if_detach(ifp);
226	ifp->if_softc = NULL;
227	sx_xunlock(&gif_ioctl_sx);
228
229	if_free(ifp);
230	GIF_LOCK_DESTROY(sc);
231	free(sc, M_GIF);
232}
233
234static void
235vnet_gif_init(const void *unused __unused)
236{
237
238	LIST_INIT(&V_gif_softc_list);
239	GIF_LIST_LOCK_INIT();
240	V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
241	    gif_clone_destroy, 0);
242}
243VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
244    vnet_gif_init, NULL);
245
246static void
247vnet_gif_uninit(const void *unused __unused)
248{
249
250	if_clone_detach(V_gif_cloner);
251	GIF_LIST_LOCK_DESTROY();
252}
253VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
254    vnet_gif_uninit, NULL);
255
256static int
257gifmodevent(module_t mod, int type, void *data)
258{
259
260	switch (type) {
261	case MOD_LOAD:
262	case MOD_UNLOAD:
263		break;
264	default:
265		return (EOPNOTSUPP);
266	}
267	return (0);
268}
269
270static moduledata_t gif_mod = {
271	"if_gif",
272	gifmodevent,
273	0
274};
275
276DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
277MODULE_VERSION(if_gif, 1);
278
279int
280gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
281{
282	GIF_RLOCK_TRACKER;
283	struct gif_softc *sc;
284	int ret;
285	uint8_t ver;
286
287	sc = (struct gif_softc *)arg;
288	if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
289		return (0);
290
291	ret = 0;
292	GIF_RLOCK(sc);
293
294	/* no physical address */
295	if (sc->gif_family == 0)
296		goto done;
297
298	switch (proto) {
299#ifdef INET
300	case IPPROTO_IPV4:
301#endif
302#ifdef INET6
303	case IPPROTO_IPV6:
304#endif
305	case IPPROTO_ETHERIP:
306		break;
307	default:
308		goto done;
309	}
310
311	/* Bail on short packets */
312	if (m->m_pkthdr.len < sizeof(struct ip))
313		goto done;
314
315	m_copydata(m, 0, 1, &ver);
316	switch (ver >> 4) {
317#ifdef INET
318	case 4:
319		if (sc->gif_family != AF_INET)
320			goto done;
321		ret = in_gif_encapcheck(m, off, proto, arg);
322		break;
323#endif
324#ifdef INET6
325	case 6:
326		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
327			goto done;
328		if (sc->gif_family != AF_INET6)
329			goto done;
330		ret = in6_gif_encapcheck(m, off, proto, arg);
331		break;
332#endif
333	}
334done:
335	GIF_RUNLOCK(sc);
336	return (ret);
337}
338
339static int
340gif_transmit(struct ifnet *ifp, struct mbuf *m)
341{
342	struct gif_softc *sc;
343	struct etherip_header *eth;
344#ifdef INET
345	struct ip *ip;
346#endif
347#ifdef INET6
348	struct ip6_hdr *ip6;
349	uint32_t t;
350#endif
351	uint32_t af;
352	uint8_t proto, ecn;
353	int error;
354
355	error = ENETDOWN;
356	sc = ifp->if_softc;
357	if (sc->gif_family == 0) {
358		m_freem(m);
359		goto err;
360	}
361	/* Now pull back the af that we stashed in the csum_data. */
362	af = m->m_pkthdr.csum_data;
363	BPF_MTAP2(ifp, &af, sizeof(af), m);
364	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
365	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
366	M_SETFIB(m, sc->gif_fibnum);
367	/* inner AF-specific encapsulation */
368	ecn = 0;
369	switch (af) {
370#ifdef INET
371	case AF_INET:
372		proto = IPPROTO_IPV4;
373		if (m->m_len < sizeof(struct ip))
374			m = m_pullup(m, sizeof(struct ip));
375		if (m == NULL) {
376			error = ENOBUFS;
377			goto err;
378		}
379		ip = mtod(m, struct ip *);
380		ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
381		    ECN_NOCARE, &ecn, &ip->ip_tos);
382		break;
383#endif
384#ifdef INET6
385	case AF_INET6:
386		proto = IPPROTO_IPV6;
387		if (m->m_len < sizeof(struct ip6_hdr))
388			m = m_pullup(m, sizeof(struct ip6_hdr));
389		if (m == NULL) {
390			error = ENOBUFS;
391			goto err;
392		}
393		t = 0;
394		ip6 = mtod(m, struct ip6_hdr *);
395		ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
396		    ECN_NOCARE, &t, &ip6->ip6_flow);
397		ecn = (ntohl(t) >> 20) & 0xff;
398		break;
399#endif
400	case AF_LINK:
401		proto = IPPROTO_ETHERIP;
402		M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT);
403		if (m == NULL) {
404			error = ENOBUFS;
405			goto err;
406		}
407		eth = mtod(m, struct etherip_header *);
408		eth->eip_resvh = 0;
409		if ((sc->gif_options & GIF_SEND_REVETHIP) != 0) {
410			eth->eip_ver = 0;
411			eth->eip_resvl = ETHERIP_VERSION;
412		} else {
413			eth->eip_ver = ETHERIP_VERSION;
414			eth->eip_resvl = 0;
415		}
416		break;
417	default:
418		error = EAFNOSUPPORT;
419		m_freem(m);
420		goto err;
421	}
422	/* XXX should we check if our outer source is legal? */
423	/* dispatch to output logic based on outer AF */
424	switch (sc->gif_family) {
425#ifdef INET
426	case AF_INET:
427		error = in_gif_output(ifp, m, proto, ecn);
428		break;
429#endif
430#ifdef INET6
431	case AF_INET6:
432		error = in6_gif_output(ifp, m, proto, ecn);
433		break;
434#endif
435	default:
436		m_freem(m);
437	}
438err:
439	if (error)
440		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
441	return (error);
442}
443
444static void
445gif_qflush(struct ifnet *ifp __unused)
446{
447
448}
449
450int
451gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
452	struct route *ro)
453{
454	struct m_tag *mtag;
455	uint32_t af;
456	int gif_called;
457	int error = 0;
458#ifdef MAC
459	error = mac_ifnet_check_transmit(ifp, m);
460	if (error)
461		goto err;
462#endif
463	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
464	    (ifp->if_flags & IFF_UP) == 0) {
465		error = ENETDOWN;
466		goto err;
467	}
468
469	/*
470	 * gif may cause infinite recursion calls when misconfigured.
471	 * We'll prevent this by detecting loops.
472	 *
473	 * High nesting level may cause stack exhaustion.
474	 * We'll prevent this by introducing upper limit.
475	 */
476	gif_called = 1;
477	mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, NULL);
478	while (mtag != NULL) {
479		if (*(struct ifnet **)(mtag + 1) == ifp) {
480			log(LOG_NOTICE,
481			    "gif_output: loop detected on %s\n",
482			    (*(struct ifnet **)(mtag + 1))->if_xname);
483			error = EIO;	/* is there better errno? */
484			goto err;
485		}
486		mtag = m_tag_locate(m, MTAG_GIF, MTAG_GIF_CALLED, mtag);
487		gif_called++;
488	}
489	if (gif_called > V_max_gif_nesting) {
490		log(LOG_NOTICE,
491		    "gif_output: recursively called too many times(%d)\n",
492		    gif_called);
493		error = EIO;	/* is there better errno? */
494		goto err;
495	}
496	mtag = m_tag_alloc(MTAG_GIF, MTAG_GIF_CALLED, sizeof(struct ifnet *),
497	    M_NOWAIT);
498	if (mtag == NULL) {
499		error = ENOMEM;
500		goto err;
501	}
502	*(struct ifnet **)(mtag + 1) = ifp;
503	m_tag_prepend(m, mtag);
504
505	m->m_flags &= ~(M_BCAST|M_MCAST);
506	if (dst->sa_family == AF_UNSPEC)
507		bcopy(dst->sa_data, &af, sizeof(af));
508	else
509		af = dst->sa_family;
510	if (ifp->if_bridge)
511		af = AF_LINK;
512	/*
513	 * Now save the af in the inbound pkt csum data, this is a cheat since
514	 * we are using the inbound csum_data field to carry the af over to
515	 * the gif_transmit() routine, avoiding using yet another mtag.
516	 */
517	m->m_pkthdr.csum_data = af;
518	return (ifp->if_transmit(ifp, m));
519err:
520	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
521	m_freem(m);
522	return (error);
523}
524
525void
526gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
527{
528	struct etherip_header *eip;
529#ifdef INET
530	struct ip *ip;
531#endif
532#ifdef INET6
533	struct ip6_hdr *ip6;
534	uint32_t t;
535#endif
536	struct gif_softc *sc;
537	struct ether_header *eh;
538	struct ifnet *oldifp;
539	uint32_t gif_options;
540	int isr, n, af;
541
542	if (ifp == NULL) {
543		/* just in case */
544		m_freem(m);
545		return;
546	}
547	sc = ifp->if_softc;
548	gif_options = sc->gif_options;
549	m->m_pkthdr.rcvif = ifp;
550	m_clrprotoflags(m);
551	switch (proto) {
552#ifdef INET
553	case IPPROTO_IPV4:
554		af = AF_INET;
555		if (m->m_len < sizeof(struct ip))
556			m = m_pullup(m, sizeof(struct ip));
557		if (m == NULL)
558			goto drop;
559		ip = mtod(m, struct ip *);
560		if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
561		    ECN_NOCARE, &ecn, &ip->ip_tos) == 0) {
562			m_freem(m);
563			goto drop;
564		}
565		break;
566#endif
567#ifdef INET6
568	case IPPROTO_IPV6:
569		af = AF_INET6;
570		if (m->m_len < sizeof(struct ip6_hdr))
571			m = m_pullup(m, sizeof(struct ip6_hdr));
572		if (m == NULL)
573			goto drop;
574		t = htonl((uint32_t)ecn << 20);
575		ip6 = mtod(m, struct ip6_hdr *);
576		if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED:
577		    ECN_NOCARE, &t, &ip6->ip6_flow) == 0) {
578			m_freem(m);
579			goto drop;
580		}
581		break;
582#endif
583	case IPPROTO_ETHERIP:
584		af = AF_LINK;
585		break;
586	default:
587		m_freem(m);
588		goto drop;
589	}
590
591#ifdef MAC
592	mac_ifnet_create_mbuf(ifp, m);
593#endif
594
595	if (bpf_peers_present(ifp->if_bpf)) {
596		uint32_t af1 = af;
597		bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m);
598	}
599
600	if ((ifp->if_flags & IFF_MONITOR) != 0) {
601		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
602		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
603		m_freem(m);
604		return;
605	}
606
607	if (ng_gif_input_p != NULL) {
608		(*ng_gif_input_p)(ifp, &m, af);
609		if (m == NULL)
610			goto drop;
611	}
612
613	/*
614	 * Put the packet to the network layer input queue according to the
615	 * specified address family.
616	 * Note: older versions of gif_input directly called network layer
617	 * input functions, e.g. ip6_input, here.  We changed the policy to
618	 * prevent too many recursive calls of such input functions, which
619	 * might cause kernel panic.  But the change may introduce another
620	 * problem; if the input queue is full, packets are discarded.
621	 * The kernel stack overflow really happened, and we believed
622	 * queue-full rarely occurs, so we changed the policy.
623	 */
624	switch (af) {
625#ifdef INET
626	case AF_INET:
627		isr = NETISR_IP;
628		break;
629#endif
630#ifdef INET6
631	case AF_INET6:
632		isr = NETISR_IPV6;
633		break;
634#endif
635	case AF_LINK:
636		n = sizeof(struct etherip_header) + sizeof(struct ether_header);
637		if (n > m->m_len)
638			m = m_pullup(m, n);
639		if (m == NULL)
640			goto drop;
641		eip = mtod(m, struct etherip_header *);
642		/*
643		 * GIF_ACCEPT_REVETHIP (enabled by default) intentionally
644		 * accepts an EtherIP packet with revered version field in
645		 * the header.  This is a knob for backward compatibility
646		 * with FreeBSD 7.2R or prior.
647		 */
648		if (eip->eip_ver != ETHERIP_VERSION) {
649			if ((gif_options & GIF_ACCEPT_REVETHIP) == 0 ||
650			    eip->eip_resvl != ETHERIP_VERSION) {
651				/* discard unknown versions */
652				m_freem(m);
653				goto drop;
654			}
655		}
656		m_adj(m, sizeof(struct etherip_header));
657
658		m->m_flags &= ~(M_BCAST|M_MCAST);
659		m->m_pkthdr.rcvif = ifp;
660
661		if (ifp->if_bridge) {
662			oldifp = ifp;
663			eh = mtod(m, struct ether_header *);
664			if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
665				if (ETHER_IS_BROADCAST(eh->ether_dhost))
666					m->m_flags |= M_BCAST;
667				else
668					m->m_flags |= M_MCAST;
669				if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
670			}
671			BRIDGE_INPUT(ifp, m);
672
673			if (m != NULL && ifp != oldifp) {
674				/*
675				 * The bridge gave us back itself or one of the
676				 * members for which the frame is addressed.
677				 */
678				ether_demux(ifp, m);
679				return;
680			}
681		}
682		if (m != NULL)
683			m_freem(m);
684		return;
685
686	default:
687		if (ng_gif_input_orphan_p != NULL)
688			(*ng_gif_input_orphan_p)(ifp, m, af);
689		else
690			m_freem(m);
691		return;
692	}
693
694	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
695	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
696	M_SETFIB(m, ifp->if_fib);
697	netisr_dispatch(isr, m);
698	return;
699drop:
700	if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
701}
702
703/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
704int
705gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
706{
707	GIF_RLOCK_TRACKER;
708	struct ifreq *ifr = (struct ifreq*)data;
709	struct sockaddr *dst, *src;
710	struct gif_softc *sc;
711#ifdef INET
712	struct sockaddr_in *sin = NULL;
713#endif
714#ifdef INET6
715	struct sockaddr_in6 *sin6 = NULL;
716#endif
717	u_int options;
718	int error;
719
720	switch (cmd) {
721	case SIOCSIFADDR:
722		ifp->if_flags |= IFF_UP;
723	case SIOCADDMULTI:
724	case SIOCDELMULTI:
725	case SIOCGIFMTU:
726	case SIOCSIFFLAGS:
727		return (0);
728	case SIOCSIFMTU:
729		if (ifr->ifr_mtu < GIF_MTU_MIN ||
730		    ifr->ifr_mtu > GIF_MTU_MAX)
731			return (EINVAL);
732		else
733			ifp->if_mtu = ifr->ifr_mtu;
734		return (0);
735	}
736	sx_xlock(&gif_ioctl_sx);
737	sc = ifp->if_softc;
738	if (sc == NULL) {
739		error = ENXIO;
740		goto bad;
741	}
742	error = 0;
743	switch (cmd) {
744	case SIOCSIFPHYADDR:
745#ifdef INET6
746	case SIOCSIFPHYADDR_IN6:
747#endif
748		error = EINVAL;
749		switch (cmd) {
750#ifdef INET
751		case SIOCSIFPHYADDR:
752			src = (struct sockaddr *)
753				&(((struct in_aliasreq *)data)->ifra_addr);
754			dst = (struct sockaddr *)
755				&(((struct in_aliasreq *)data)->ifra_dstaddr);
756			break;
757#endif
758#ifdef INET6
759		case SIOCSIFPHYADDR_IN6:
760			src = (struct sockaddr *)
761				&(((struct in6_aliasreq *)data)->ifra_addr);
762			dst = (struct sockaddr *)
763				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
764			break;
765#endif
766		default:
767			goto bad;
768		}
769		/* sa_family must be equal */
770		if (src->sa_family != dst->sa_family ||
771		    src->sa_len != dst->sa_len)
772			goto bad;
773
774		/* validate sa_len */
775		switch (src->sa_family) {
776#ifdef INET
777		case AF_INET:
778			if (src->sa_len != sizeof(struct sockaddr_in))
779				goto bad;
780			break;
781#endif
782#ifdef INET6
783		case AF_INET6:
784			if (src->sa_len != sizeof(struct sockaddr_in6))
785				goto bad;
786			break;
787#endif
788		default:
789			error = EAFNOSUPPORT;
790			goto bad;
791		}
792		/* check sa_family looks sane for the cmd */
793		error = EAFNOSUPPORT;
794		switch (cmd) {
795#ifdef INET
796		case SIOCSIFPHYADDR:
797			if (src->sa_family == AF_INET)
798				break;
799			goto bad;
800#endif
801#ifdef INET6
802		case SIOCSIFPHYADDR_IN6:
803			if (src->sa_family == AF_INET6)
804				break;
805			goto bad;
806#endif
807		}
808		error = EADDRNOTAVAIL;
809		switch (src->sa_family) {
810#ifdef INET
811		case AF_INET:
812			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
813			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
814				goto bad;
815			break;
816#endif
817#ifdef INET6
818		case AF_INET6:
819			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
820			    ||
821			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
822				goto bad;
823			/*
824			 * Check validity of the scope zone ID of the
825			 * addresses, and convert it into the kernel
826			 * internal form if necessary.
827			 */
828			error = sa6_embedscope(satosin6(src), 0);
829			if (error != 0)
830				goto bad;
831			error = sa6_embedscope(satosin6(dst), 0);
832			if (error != 0)
833				goto bad;
834#endif
835		};
836		error = gif_set_tunnel(ifp, src, dst);
837		break;
838	case SIOCDIFPHYADDR:
839		gif_delete_tunnel(ifp);
840		break;
841	case SIOCGIFPSRCADDR:
842	case SIOCGIFPDSTADDR:
843#ifdef INET6
844	case SIOCGIFPSRCADDR_IN6:
845	case SIOCGIFPDSTADDR_IN6:
846#endif
847		if (sc->gif_family == 0) {
848			error = EADDRNOTAVAIL;
849			break;
850		}
851		GIF_RLOCK(sc);
852		switch (cmd) {
853#ifdef INET
854		case SIOCGIFPSRCADDR:
855		case SIOCGIFPDSTADDR:
856			if (sc->gif_family != AF_INET) {
857				error = EADDRNOTAVAIL;
858				break;
859			}
860			sin = (struct sockaddr_in *)&ifr->ifr_addr;
861			memset(sin, 0, sizeof(*sin));
862			sin->sin_family = AF_INET;
863			sin->sin_len = sizeof(*sin);
864			break;
865#endif
866#ifdef INET6
867		case SIOCGIFPSRCADDR_IN6:
868		case SIOCGIFPDSTADDR_IN6:
869			if (sc->gif_family != AF_INET6) {
870				error = EADDRNOTAVAIL;
871				break;
872			}
873			sin6 = (struct sockaddr_in6 *)
874				&(((struct in6_ifreq *)data)->ifr_addr);
875			memset(sin6, 0, sizeof(*sin6));
876			sin6->sin6_family = AF_INET6;
877			sin6->sin6_len = sizeof(*sin6);
878			break;
879#endif
880		default:
881			error = EAFNOSUPPORT;
882		}
883		if (error == 0) {
884			switch (cmd) {
885#ifdef INET
886			case SIOCGIFPSRCADDR:
887				sin->sin_addr = sc->gif_iphdr->ip_src;
888				break;
889			case SIOCGIFPDSTADDR:
890				sin->sin_addr = sc->gif_iphdr->ip_dst;
891				break;
892#endif
893#ifdef INET6
894			case SIOCGIFPSRCADDR_IN6:
895				sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
896				break;
897			case SIOCGIFPDSTADDR_IN6:
898				sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
899				break;
900#endif
901			}
902		}
903		GIF_RUNLOCK(sc);
904		if (error != 0)
905			break;
906		switch (cmd) {
907#ifdef INET
908		case SIOCGIFPSRCADDR:
909		case SIOCGIFPDSTADDR:
910			error = prison_if(curthread->td_ucred,
911			    (struct sockaddr *)sin);
912			if (error != 0)
913				memset(sin, 0, sizeof(*sin));
914			break;
915#endif
916#ifdef INET6
917		case SIOCGIFPSRCADDR_IN6:
918		case SIOCGIFPDSTADDR_IN6:
919			error = prison_if(curthread->td_ucred,
920			    (struct sockaddr *)sin6);
921			if (error == 0)
922				error = sa6_recoverscope(sin6);
923			if (error != 0)
924				memset(sin6, 0, sizeof(*sin6));
925#endif
926		}
927		break;
928	case GIFGOPTS:
929		options = sc->gif_options;
930		error = copyout(&options, ifr->ifr_data, sizeof(options));
931		break;
932	case GIFSOPTS:
933		if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0)
934			break;
935		error = copyin(ifr->ifr_data, &options, sizeof(options));
936		if (error)
937			break;
938		if (options & ~GIF_OPTMASK)
939			error = EINVAL;
940		else
941			sc->gif_options = options;
942		break;
943
944	default:
945		error = EINVAL;
946		break;
947	}
948bad:
949	sx_xunlock(&gif_ioctl_sx);
950	return (error);
951}
952
953static void
954gif_detach(struct gif_softc *sc)
955{
956
957	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
958	if (sc->gif_ecookie != NULL)
959		encap_detach(sc->gif_ecookie);
960	sc->gif_ecookie = NULL;
961}
962
963static int
964gif_attach(struct gif_softc *sc, int af)
965{
966
967	sx_assert(&gif_ioctl_sx, SA_XLOCKED);
968	switch (af) {
969#ifdef INET
970	case AF_INET:
971		return (in_gif_attach(sc));
972#endif
973#ifdef INET6
974	case AF_INET6:
975		return (in6_gif_attach(sc));
976#endif
977	}
978	return (EAFNOSUPPORT);
979}
980
981static int
982gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
983{
984	struct gif_softc *sc = ifp->if_softc;
985	struct gif_softc *tsc;
986#ifdef INET
987	struct ip *ip;
988#endif
989#ifdef INET6
990	struct ip6_hdr *ip6;
991#endif
992	void *hdr;
993	int error = 0;
994
995	if (sc == NULL)
996		return (ENXIO);
997	/* Disallow parallel tunnels unless instructed otherwise. */
998	if (V_parallel_tunnels == 0) {
999		GIF_LIST_LOCK();
1000		LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
1001			if (tsc == sc || tsc->gif_family != src->sa_family)
1002				continue;
1003#ifdef INET
1004			if (tsc->gif_family == AF_INET &&
1005			    tsc->gif_iphdr->ip_src.s_addr ==
1006			    satosin(src)->sin_addr.s_addr &&
1007			    tsc->gif_iphdr->ip_dst.s_addr ==
1008			    satosin(dst)->sin_addr.s_addr) {
1009				error = EADDRNOTAVAIL;
1010				GIF_LIST_UNLOCK();
1011				goto bad;
1012			}
1013#endif
1014#ifdef INET6
1015			if (tsc->gif_family == AF_INET6 &&
1016			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
1017			    &satosin6(src)->sin6_addr) &&
1018			    IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
1019			    &satosin6(dst)->sin6_addr)) {
1020				error = EADDRNOTAVAIL;
1021				GIF_LIST_UNLOCK();
1022				goto bad;
1023			}
1024#endif
1025		}
1026		GIF_LIST_UNLOCK();
1027	}
1028	switch (src->sa_family) {
1029#ifdef INET
1030	case AF_INET:
1031		hdr = ip = malloc(sizeof(struct ip), M_GIF,
1032		    M_WAITOK | M_ZERO);
1033		ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
1034		ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
1035		break;
1036#endif
1037#ifdef INET6
1038	case AF_INET6:
1039		hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
1040		    M_WAITOK | M_ZERO);
1041		ip6->ip6_src = satosin6(src)->sin6_addr;
1042		ip6->ip6_dst = satosin6(dst)->sin6_addr;
1043		ip6->ip6_vfc = IPV6_VERSION;
1044		break;
1045#endif
1046	default:
1047		return (EAFNOSUPPORT);
1048	};
1049
1050	if (sc->gif_family != src->sa_family)
1051		gif_detach(sc);
1052	if (sc->gif_family == 0 ||
1053	    sc->gif_family != src->sa_family)
1054		error = gif_attach(sc, src->sa_family);
1055
1056	GIF_WLOCK(sc);
1057	if (sc->gif_family != 0)
1058		free(sc->gif_hdr, M_GIF);
1059	sc->gif_family = src->sa_family;
1060	sc->gif_hdr = hdr;
1061	GIF_WUNLOCK(sc);
1062#if defined(INET) || defined(INET6)
1063bad:
1064#endif
1065	if (error == 0 && sc->gif_family != 0)
1066		ifp->if_drv_flags |= IFF_DRV_RUNNING;
1067	else
1068		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1069	return (error);
1070}
1071
1072static void
1073gif_delete_tunnel(struct ifnet *ifp)
1074{
1075	struct gif_softc *sc = ifp->if_softc;
1076	int family;
1077
1078	if (sc == NULL)
1079		return;
1080
1081	GIF_WLOCK(sc);
1082	family = sc->gif_family;
1083	sc->gif_family = 0;
1084	GIF_WUNLOCK(sc);
1085	if (family != 0) {
1086		gif_detach(sc);
1087		free(sc->gif_hdr, M_GIF);
1088	}
1089	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1090}
1091