if_gre.c revision 284074
1/*-
2 * Copyright (c) 1998 The NetBSD Foundation, Inc.
3 * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Heiko W.Rupp <hwr@pilhuhn.de>
8 *
9 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: stable/10/sys/net/if_gre.c 284074 2015-06-06 13:37:11Z ae $");
37
38#include "opt_inet.h"
39#include "opt_inet6.h"
40
41#include <sys/param.h>
42#include <sys/jail.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/libkern.h>
46#include <sys/malloc.h>
47#include <sys/module.h>
48#include <sys/mbuf.h>
49#include <sys/priv.h>
50#include <sys/proc.h>
51#include <sys/protosw.h>
52#include <sys/rmlock.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sx.h>
56#include <sys/sysctl.h>
57#include <sys/syslog.h>
58#include <sys/systm.h>
59
60#include <net/ethernet.h>
61#include <net/if.h>
62#include <net/if_clone.h>
63#include <net/if_var.h>
64#include <net/if_types.h>
65#include <net/netisr.h>
66#include <net/vnet.h>
67#include <net/route.h>
68
69#include <netinet/in.h>
70#ifdef INET
71#include <netinet/in_systm.h>
72#include <netinet/in_var.h>
73#include <netinet/ip.h>
74#include <netinet/ip_var.h>
75#endif
76
77#ifdef INET6
78#include <netinet/ip6.h>
79#include <netinet6/in6_var.h>
80#include <netinet6/ip6_var.h>
81#include <netinet6/scope6_var.h>
82#endif
83
84#include <netinet/ip_encap.h>
85#include <net/bpf.h>
86#include <net/if_gre.h>
87
88#include <machine/in_cksum.h>
89
90#include <security/mac/mac_framework.h>
91#define	GREMTU			1500
92static const char grename[] = "gre";
93static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation");
94static VNET_DEFINE(struct mtx, gre_mtx);
95#define	V_gre_mtx	VNET(gre_mtx)
96#define	GRE_LIST_LOCK_INIT(x)		mtx_init(&V_gre_mtx, "gre_mtx", NULL, \
97					    MTX_DEF)
98#define	GRE_LIST_LOCK_DESTROY(x)	mtx_destroy(&V_gre_mtx)
99#define	GRE_LIST_LOCK(x)		mtx_lock(&V_gre_mtx)
100#define	GRE_LIST_UNLOCK(x)		mtx_unlock(&V_gre_mtx)
101
102static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list);
103#define	V_gre_softc_list	VNET(gre_softc_list)
104static struct sx gre_ioctl_sx;
105SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl");
106
107static int	gre_clone_create(struct if_clone *, int, caddr_t);
108static void	gre_clone_destroy(struct ifnet *);
109static VNET_DEFINE(struct if_clone *, gre_cloner);
110#define	V_gre_cloner	VNET(gre_cloner)
111
112static void	gre_qflush(struct ifnet *);
113static int	gre_transmit(struct ifnet *, struct mbuf *);
114static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
115static int	gre_output(struct ifnet *, struct mbuf *,
116		    const struct sockaddr *, struct route *);
117
118static void	gre_updatehdr(struct gre_softc *);
119static int	gre_set_tunnel(struct ifnet *, struct sockaddr *,
120    struct sockaddr *);
121static void	gre_delete_tunnel(struct ifnet *);
122
123SYSCTL_DECL(_net_link);
124static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0,
125    "Generic Routing Encapsulation");
126#ifndef MAX_GRE_NEST
127/*
128 * This macro controls the default upper limitation on nesting of gre tunnels.
129 * Since, setting a large value to this macro with a careless configuration
130 * may introduce system crash, we don't allow any nestings by default.
131 * If you need to configure nested gre tunnels, you can define this macro
132 * in your kernel configuration file.  However, if you do so, please be
133 * careful to configure the tunnels so that it won't make a loop.
134 */
135#define MAX_GRE_NEST 1
136#endif
137
138static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST;
139#define	V_max_gre_nesting	VNET(max_gre_nesting)
140SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
141    &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels");
142
143static void
144vnet_gre_init(const void *unused __unused)
145{
146	LIST_INIT(&V_gre_softc_list);
147	GRE_LIST_LOCK_INIT();
148	V_gre_cloner = if_clone_simple(grename, gre_clone_create,
149	    gre_clone_destroy, 0);
150}
151VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
152    vnet_gre_init, NULL);
153
154static void
155vnet_gre_uninit(const void *unused __unused)
156{
157
158	if_clone_detach(V_gre_cloner);
159	GRE_LIST_LOCK_DESTROY();
160}
161VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
162    vnet_gre_uninit, NULL);
163
164static int
165gre_clone_create(struct if_clone *ifc, int unit, caddr_t params)
166{
167	struct gre_softc *sc;
168
169	sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO);
170	sc->gre_fibnum = curthread->td_proc->p_fibnum;
171	GRE2IFP(sc) = if_alloc(IFT_TUNNEL);
172	GRE_LOCK_INIT(sc);
173	GRE2IFP(sc)->if_softc = sc;
174	if_initname(GRE2IFP(sc), grename, unit);
175
176	GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU;
177	GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
178	GRE2IFP(sc)->if_output = gre_output;
179	GRE2IFP(sc)->if_ioctl = gre_ioctl;
180	GRE2IFP(sc)->if_transmit = gre_transmit;
181	GRE2IFP(sc)->if_qflush = gre_qflush;
182	if_attach(GRE2IFP(sc));
183	bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t));
184	GRE_LIST_LOCK();
185	LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list);
186	GRE_LIST_UNLOCK();
187	return (0);
188}
189
190static void
191gre_clone_destroy(struct ifnet *ifp)
192{
193	struct gre_softc *sc;
194
195	sx_xlock(&gre_ioctl_sx);
196	sc = ifp->if_softc;
197	gre_delete_tunnel(ifp);
198	GRE_LIST_LOCK();
199	LIST_REMOVE(sc, gre_list);
200	GRE_LIST_UNLOCK();
201	bpfdetach(ifp);
202	if_detach(ifp);
203	ifp->if_softc = NULL;
204	sx_xunlock(&gre_ioctl_sx);
205
206	if_free(ifp);
207	GRE_LOCK_DESTROY(sc);
208	free(sc, M_GRE);
209}
210
211static int
212gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
213{
214	GRE_RLOCK_TRACKER;
215	struct ifreq *ifr = (struct ifreq *)data;
216	struct sockaddr *src, *dst;
217	struct gre_softc *sc;
218#ifdef INET
219	struct sockaddr_in *sin = NULL;
220#endif
221#ifdef INET6
222	struct sockaddr_in6 *sin6 = NULL;
223#endif
224	uint32_t opt;
225	int error;
226
227	switch (cmd) {
228	case SIOCSIFMTU:
229		 /* XXX: */
230		if (ifr->ifr_mtu < 576)
231			return (EINVAL);
232		break;
233	case SIOCSIFADDR:
234		ifp->if_flags |= IFF_UP;
235	case SIOCSIFFLAGS:
236	case SIOCADDMULTI:
237	case SIOCDELMULTI:
238		return (0);
239	case GRESADDRS:
240	case GRESADDRD:
241	case GREGADDRS:
242	case GREGADDRD:
243	case GRESPROTO:
244	case GREGPROTO:
245		return (EOPNOTSUPP);
246	}
247	src = dst = NULL;
248	sx_xlock(&gre_ioctl_sx);
249	sc = ifp->if_softc;
250	if (sc == NULL) {
251		error = ENXIO;
252		goto end;
253	}
254	error = 0;
255	switch (cmd) {
256	case SIOCSIFMTU:
257		GRE_WLOCK(sc);
258		sc->gre_mtu = ifr->ifr_mtu;
259		gre_updatehdr(sc);
260		GRE_WUNLOCK(sc);
261		goto end;
262	case SIOCSIFPHYADDR:
263#ifdef INET6
264	case SIOCSIFPHYADDR_IN6:
265#endif
266		error = EINVAL;
267		switch (cmd) {
268#ifdef INET
269		case SIOCSIFPHYADDR:
270			src = (struct sockaddr *)
271				&(((struct in_aliasreq *)data)->ifra_addr);
272			dst = (struct sockaddr *)
273				&(((struct in_aliasreq *)data)->ifra_dstaddr);
274			break;
275#endif
276#ifdef INET6
277		case SIOCSIFPHYADDR_IN6:
278			src = (struct sockaddr *)
279				&(((struct in6_aliasreq *)data)->ifra_addr);
280			dst = (struct sockaddr *)
281				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
282			break;
283#endif
284		default:
285			error = EAFNOSUPPORT;
286			goto end;
287		}
288		/* sa_family must be equal */
289		if (src->sa_family != dst->sa_family ||
290		    src->sa_len != dst->sa_len)
291			goto end;
292
293		/* validate sa_len */
294		switch (src->sa_family) {
295#ifdef INET
296		case AF_INET:
297			if (src->sa_len != sizeof(struct sockaddr_in))
298				goto end;
299			break;
300#endif
301#ifdef INET6
302		case AF_INET6:
303			if (src->sa_len != sizeof(struct sockaddr_in6))
304				goto end;
305			break;
306#endif
307		default:
308			error = EAFNOSUPPORT;
309			goto end;
310		}
311		/* check sa_family looks sane for the cmd */
312		error = EAFNOSUPPORT;
313		switch (cmd) {
314#ifdef INET
315		case SIOCSIFPHYADDR:
316			if (src->sa_family == AF_INET)
317				break;
318			goto end;
319#endif
320#ifdef INET6
321		case SIOCSIFPHYADDR_IN6:
322			if (src->sa_family == AF_INET6)
323				break;
324			goto end;
325#endif
326		}
327		error = EADDRNOTAVAIL;
328		switch (src->sa_family) {
329#ifdef INET
330		case AF_INET:
331			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
332			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
333				goto end;
334			break;
335#endif
336#ifdef INET6
337		case AF_INET6:
338			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
339			    ||
340			    IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
341				goto end;
342			/*
343			 * Check validity of the scope zone ID of the
344			 * addresses, and convert it into the kernel
345			 * internal form if necessary.
346			 */
347			error = sa6_embedscope(satosin6(src), 0);
348			if (error != 0)
349				goto end;
350			error = sa6_embedscope(satosin6(dst), 0);
351			if (error != 0)
352				goto end;
353#endif
354		};
355		error = gre_set_tunnel(ifp, src, dst);
356		break;
357	case SIOCDIFPHYADDR:
358		gre_delete_tunnel(ifp);
359		break;
360	case SIOCGIFPSRCADDR:
361	case SIOCGIFPDSTADDR:
362#ifdef INET6
363	case SIOCGIFPSRCADDR_IN6:
364	case SIOCGIFPDSTADDR_IN6:
365#endif
366		if (sc->gre_family == 0) {
367			error = EADDRNOTAVAIL;
368			break;
369		}
370		GRE_RLOCK(sc);
371		switch (cmd) {
372#ifdef INET
373		case SIOCGIFPSRCADDR:
374		case SIOCGIFPDSTADDR:
375			if (sc->gre_family != AF_INET) {
376				error = EADDRNOTAVAIL;
377				break;
378			}
379			sin = (struct sockaddr_in *)&ifr->ifr_addr;
380			memset(sin, 0, sizeof(*sin));
381			sin->sin_family = AF_INET;
382			sin->sin_len = sizeof(*sin);
383			break;
384#endif
385#ifdef INET6
386		case SIOCGIFPSRCADDR_IN6:
387		case SIOCGIFPDSTADDR_IN6:
388			if (sc->gre_family != AF_INET6) {
389				error = EADDRNOTAVAIL;
390				break;
391			}
392			sin6 = (struct sockaddr_in6 *)
393				&(((struct in6_ifreq *)data)->ifr_addr);
394			memset(sin6, 0, sizeof(*sin6));
395			sin6->sin6_family = AF_INET6;
396			sin6->sin6_len = sizeof(*sin6);
397			break;
398#endif
399		}
400		if (error == 0) {
401			switch (cmd) {
402#ifdef INET
403			case SIOCGIFPSRCADDR:
404				sin->sin_addr = sc->gre_oip.ip_src;
405				break;
406			case SIOCGIFPDSTADDR:
407				sin->sin_addr = sc->gre_oip.ip_dst;
408				break;
409#endif
410#ifdef INET6
411			case SIOCGIFPSRCADDR_IN6:
412				sin6->sin6_addr = sc->gre_oip6.ip6_src;
413				break;
414			case SIOCGIFPDSTADDR_IN6:
415				sin6->sin6_addr = sc->gre_oip6.ip6_dst;
416				break;
417#endif
418			}
419		}
420		GRE_RUNLOCK(sc);
421		if (error != 0)
422			break;
423		switch (cmd) {
424#ifdef INET
425		case SIOCGIFPSRCADDR:
426		case SIOCGIFPDSTADDR:
427			error = prison_if(curthread->td_ucred,
428			    (struct sockaddr *)sin);
429			if (error != 0)
430				memset(sin, 0, sizeof(*sin));
431			break;
432#endif
433#ifdef INET6
434		case SIOCGIFPSRCADDR_IN6:
435		case SIOCGIFPDSTADDR_IN6:
436			error = prison_if(curthread->td_ucred,
437			    (struct sockaddr *)sin6);
438			if (error == 0)
439				error = sa6_recoverscope(sin6);
440			if (error != 0)
441				memset(sin6, 0, sizeof(*sin6));
442#endif
443		}
444		break;
445	case SIOCGTUNFIB:
446		ifr->ifr_fib = sc->gre_fibnum;
447		break;
448	case SIOCSTUNFIB:
449		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
450			break;
451		if (ifr->ifr_fib >= rt_numfibs)
452			error = EINVAL;
453		else
454			sc->gre_fibnum = ifr->ifr_fib;
455		break;
456	case GRESKEY:
457		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
458			break;
459		if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
460			break;
461		if (sc->gre_key != opt) {
462			GRE_WLOCK(sc);
463			sc->gre_key = opt;
464			gre_updatehdr(sc);
465			GRE_WUNLOCK(sc);
466		}
467		break;
468	case GREGKEY:
469		error = copyout(&sc->gre_key, ifr->ifr_data,
470		    sizeof(sc->gre_key));
471		break;
472	case GRESOPTS:
473		if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
474			break;
475		if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0)
476			break;
477		if (opt & ~GRE_OPTMASK)
478			error = EINVAL;
479		else {
480			if (sc->gre_options != opt) {
481				GRE_WLOCK(sc);
482				sc->gre_options = opt;
483				gre_updatehdr(sc);
484				GRE_WUNLOCK(sc);
485			}
486		}
487		break;
488
489	case GREGOPTS:
490		error = copyout(&sc->gre_options, ifr->ifr_data,
491		    sizeof(sc->gre_options));
492		break;
493	default:
494		error = EINVAL;
495		break;
496	}
497end:
498	sx_xunlock(&gre_ioctl_sx);
499	return (error);
500}
501
502static void
503gre_updatehdr(struct gre_softc *sc)
504{
505	struct grehdr *gh = NULL;
506	uint32_t *opts;
507	uint16_t flags;
508
509	GRE_WLOCK_ASSERT(sc);
510	switch (sc->gre_family) {
511#ifdef INET
512	case AF_INET:
513		sc->gre_hlen = sizeof(struct greip);
514		sc->gre_oip.ip_v = IPPROTO_IPV4;
515		sc->gre_oip.ip_hl = sizeof(struct ip) >> 2;
516		sc->gre_oip.ip_p = IPPROTO_GRE;
517		gh = &sc->gre_gihdr->gi_gre;
518		break;
519#endif
520#ifdef INET6
521	case AF_INET6:
522		sc->gre_hlen = sizeof(struct greip6);
523		sc->gre_oip6.ip6_vfc = IPV6_VERSION;
524		sc->gre_oip6.ip6_nxt = IPPROTO_GRE;
525		gh = &sc->gre_gi6hdr->gi6_gre;
526		break;
527#endif
528	default:
529		return;
530	}
531	flags = 0;
532	opts = gh->gre_opts;
533	if (sc->gre_options & GRE_ENABLE_CSUM) {
534		flags |= GRE_FLAGS_CP;
535		sc->gre_hlen += 2 * sizeof(uint16_t);
536		*opts++ = 0;
537	}
538	if (sc->gre_key != 0) {
539		flags |= GRE_FLAGS_KP;
540		sc->gre_hlen += sizeof(uint32_t);
541		*opts++ = htonl(sc->gre_key);
542	}
543	if (sc->gre_options & GRE_ENABLE_SEQ) {
544		flags |= GRE_FLAGS_SP;
545		sc->gre_hlen += sizeof(uint32_t);
546		*opts++ = 0;
547	} else
548		sc->gre_oseq = 0;
549	gh->gre_flags = htons(flags);
550	GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen;
551}
552
553static void
554gre_detach(struct gre_softc *sc)
555{
556
557	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
558	if (sc->gre_ecookie != NULL)
559		encap_detach(sc->gre_ecookie);
560	sc->gre_ecookie = NULL;
561}
562
563static int
564gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src,
565    struct sockaddr *dst)
566{
567	struct gre_softc *sc, *tsc;
568#ifdef INET6
569	struct ip6_hdr *ip6;
570#endif
571#ifdef INET
572	struct ip *ip;
573#endif
574	void *hdr;
575	int error;
576
577	sx_assert(&gre_ioctl_sx, SA_XLOCKED);
578	GRE_LIST_LOCK();
579	sc = ifp->if_softc;
580	LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) {
581		if (tsc == sc || tsc->gre_family != src->sa_family)
582			continue;
583#ifdef INET
584		if (tsc->gre_family == AF_INET &&
585		    tsc->gre_oip.ip_src.s_addr ==
586		    satosin(src)->sin_addr.s_addr &&
587		    tsc->gre_oip.ip_dst.s_addr ==
588		    satosin(dst)->sin_addr.s_addr) {
589			GRE_LIST_UNLOCK();
590			return (EADDRNOTAVAIL);
591		}
592#endif
593#ifdef INET6
594		if (tsc->gre_family == AF_INET6 &&
595		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src,
596		    &satosin6(src)->sin6_addr) &&
597		    IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst,
598			&satosin6(dst)->sin6_addr)) {
599			GRE_LIST_UNLOCK();
600			return (EADDRNOTAVAIL);
601		}
602#endif
603	}
604	GRE_LIST_UNLOCK();
605
606	error = 0;
607	switch (src->sa_family) {
608#ifdef INET
609	case AF_INET:
610		hdr = ip = malloc(sizeof(struct greip) +
611		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
612		ip->ip_src = satosin(src)->sin_addr;
613		ip->ip_dst = satosin(dst)->sin_addr;
614		break;
615#endif
616#ifdef INET6
617	case AF_INET6:
618		hdr = ip6 = malloc(sizeof(struct greip6) +
619		    3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO);
620		ip6->ip6_src = satosin6(src)->sin6_addr;
621		ip6->ip6_dst = satosin6(dst)->sin6_addr;
622		break;
623#endif
624	default:
625		return (EAFNOSUPPORT);
626	}
627	if (sc->gre_family != src->sa_family)
628		gre_detach(sc);
629	GRE_WLOCK(sc);
630	if (sc->gre_family != 0)
631		free(sc->gre_hdr, M_GRE);
632	sc->gre_family = src->sa_family;
633	sc->gre_hdr = hdr;
634	sc->gre_oseq = 0;
635	sc->gre_iseq = UINT32_MAX;
636	gre_updatehdr(sc);
637	GRE_WUNLOCK(sc);
638
639	switch (src->sa_family) {
640#ifdef INET
641	case AF_INET:
642		error = in_gre_attach(sc);
643		break;
644#endif
645#ifdef INET6
646	case AF_INET6:
647		error = in6_gre_attach(sc);
648		break;
649#endif
650	}
651	if (error == 0)
652		ifp->if_drv_flags |= IFF_DRV_RUNNING;
653	return (error);
654}
655
656static void
657gre_delete_tunnel(struct ifnet *ifp)
658{
659	struct gre_softc *sc = ifp->if_softc;
660	int family;
661
662	GRE_WLOCK(sc);
663	family = sc->gre_family;
664	sc->gre_family = 0;
665	GRE_WUNLOCK(sc);
666	if (family != 0) {
667		gre_detach(sc);
668		free(sc->gre_hdr, M_GRE);
669	}
670	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
671}
672
673int
674gre_input(struct mbuf **mp, int *offp, int proto)
675{
676	struct gre_softc *sc;
677	struct grehdr *gh;
678	struct ifnet *ifp;
679	struct mbuf *m;
680	uint32_t *opts, key;
681	uint16_t flags;
682	int hlen, isr, af;
683
684	m = *mp;
685	sc = encap_getarg(m);
686	KASSERT(sc != NULL, ("encap_getarg returned NULL"));
687
688	ifp = GRE2IFP(sc);
689	gh = (struct grehdr *)mtodo(m, *offp);
690	flags = ntohs(gh->gre_flags);
691	if (flags & ~GRE_FLAGS_MASK)
692		goto drop;
693	opts = gh->gre_opts;
694	hlen = 2 * sizeof(uint16_t);
695	if (flags & GRE_FLAGS_CP) {
696		/* reserved1 field must be zero */
697		if (((uint16_t *)opts)[1] != 0)
698			goto drop;
699		if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0)
700			goto drop;
701		hlen += 2 * sizeof(uint16_t);
702		opts++;
703	}
704	if (flags & GRE_FLAGS_KP) {
705		key = ntohl(*opts);
706		hlen += sizeof(uint32_t);
707		opts++;
708	} else
709		key = 0;
710	/*
711	if (sc->gre_key != 0 && (key != sc->gre_key || key != 0))
712		goto drop;
713	*/
714	if (flags & GRE_FLAGS_SP) {
715		/* seq = ntohl(*opts); */
716		hlen += sizeof(uint32_t);
717	}
718	switch (ntohs(gh->gre_proto)) {
719	case ETHERTYPE_WCCP:
720		/*
721		 * For WCCP skip an additional 4 bytes if after GRE header
722		 * doesn't follow an IP header.
723		 */
724		if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40)
725			hlen += sizeof(uint32_t);
726		/* FALLTHROUGH */
727	case ETHERTYPE_IP:
728		isr = NETISR_IP;
729		af = AF_INET;
730		break;
731	case ETHERTYPE_IPV6:
732		isr = NETISR_IPV6;
733		af = AF_INET6;
734		break;
735	default:
736		goto drop;
737	}
738	m_adj(m, *offp + hlen);
739	m_clrprotoflags(m);
740	m->m_pkthdr.rcvif = ifp;
741	M_SETFIB(m, ifp->if_fib);
742#ifdef MAC
743	mac_ifnet_create_mbuf(ifp, m);
744#endif
745	BPF_MTAP2(ifp, &af, sizeof(af), m);
746	ifp->if_ipackets++;
747	ifp->if_ibytes += m->m_pkthdr.len;
748	if ((ifp->if_flags & IFF_MONITOR) != 0)
749		m_freem(m);
750	else
751		netisr_dispatch(isr, m);
752	return (IPPROTO_DONE);
753drop:
754	ifp->if_ierrors++;
755	m_freem(m);
756	return (IPPROTO_DONE);
757}
758
759#define	MTAG_GRE	1307983903
760static int
761gre_check_nesting(struct ifnet *ifp, struct mbuf *m)
762{
763	struct m_tag *mtag;
764	int count;
765
766	count = 1;
767	mtag = NULL;
768	while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) {
769		if (*(struct ifnet **)(mtag + 1) == ifp) {
770			log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname);
771			return (EIO);
772		}
773		count++;
774	}
775	if (count > V_max_gre_nesting) {
776		log(LOG_NOTICE,
777		    "%s: if_output recursively called too many times(%d)\n",
778		    ifp->if_xname, count);
779		return (EIO);
780	}
781	mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT);
782	if (mtag == NULL)
783		return (ENOMEM);
784	*(struct ifnet **)(mtag + 1) = ifp;
785	m_tag_prepend(m, mtag);
786	return (0);
787}
788
789static int
790gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
791   struct route *ro)
792{
793	uint32_t af;
794	int error;
795
796#ifdef MAC
797	error = mac_ifnet_check_transmit(ifp, m);
798	if (error != 0)
799		goto drop;
800#endif
801	if ((ifp->if_flags & IFF_MONITOR) != 0 ||
802	    (ifp->if_flags & IFF_UP) == 0) {
803		error = ENETDOWN;
804		goto drop;
805	}
806
807	error = gre_check_nesting(ifp, m);
808	if (error != 0)
809		goto drop;
810
811	m->m_flags &= ~(M_BCAST|M_MCAST);
812	if (dst->sa_family == AF_UNSPEC)
813		bcopy(dst->sa_data, &af, sizeof(af));
814	else
815		af = dst->sa_family;
816	BPF_MTAP2(ifp, &af, sizeof(af), m);
817	m->m_pkthdr.csum_data = af;	/* save af for if_transmit */
818	return (ifp->if_transmit(ifp, m));
819drop:
820	m_freem(m);
821	ifp->if_oerrors++;
822	return (error);
823}
824
825static void
826gre_setseqn(struct grehdr *gh, uint32_t seq)
827{
828	uint32_t *opts;
829	uint16_t flags;
830
831	opts = gh->gre_opts;
832	flags = ntohs(gh->gre_flags);
833	KASSERT((flags & GRE_FLAGS_SP) != 0,
834	    ("gre_setseqn called, but GRE_FLAGS_SP isn't set "));
835	if (flags & GRE_FLAGS_CP)
836		opts++;
837	if (flags & GRE_FLAGS_KP)
838		opts++;
839	*opts = htonl(seq);
840}
841
842static int
843gre_transmit(struct ifnet *ifp, struct mbuf *m)
844{
845	GRE_RLOCK_TRACKER;
846	struct gre_softc *sc;
847	struct grehdr *gh;
848	uint32_t iaf, oaf, oseq;
849	int error, hlen, olen, plen;
850	int want_seq, want_csum;
851
852	plen = 0;
853	sc = ifp->if_softc;
854	if (sc == NULL) {
855		error = ENETDOWN;
856		m_freem(m);
857		goto drop;
858	}
859	GRE_RLOCK(sc);
860	if (sc->gre_family == 0) {
861		GRE_RUNLOCK(sc);
862		error = ENETDOWN;
863		m_freem(m);
864		goto drop;
865	}
866	iaf = m->m_pkthdr.csum_data;
867	oaf = sc->gre_family;
868	hlen = sc->gre_hlen;
869	want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0;
870	if (want_seq)
871		oseq = sc->gre_oseq++;
872	else
873		oseq = 0;	/* Make compiler happy. */
874	want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0;
875	M_SETFIB(m, sc->gre_fibnum);
876	M_PREPEND(m, hlen, M_NOWAIT);
877	if (m == NULL) {
878		GRE_RUNLOCK(sc);
879		error = ENOBUFS;
880		goto drop;
881	}
882	bcopy(sc->gre_hdr, mtod(m, void *), hlen);
883	GRE_RUNLOCK(sc);
884	switch (oaf) {
885#ifdef INET
886	case AF_INET:
887		olen = sizeof(struct ip);
888		break;
889#endif
890#ifdef INET6
891	case AF_INET6:
892		olen = sizeof(struct ip6_hdr);
893		break;
894#endif
895	default:
896		error = ENETDOWN;
897		goto drop;
898	}
899	gh = (struct grehdr *)mtodo(m, olen);
900	switch (iaf) {
901#ifdef INET
902	case AF_INET:
903		gh->gre_proto = htons(ETHERTYPE_IP);
904		break;
905#endif
906#ifdef INET6
907	case AF_INET6:
908		gh->gre_proto = htons(ETHERTYPE_IPV6);
909		break;
910#endif
911	default:
912		error = ENETDOWN;
913		goto drop;
914	}
915	if (want_seq)
916		gre_setseqn(gh, oseq);
917	if (want_csum) {
918		*(uint16_t *)gh->gre_opts = in_cksum_skip(m,
919		    m->m_pkthdr.len, olen);
920	}
921	plen = m->m_pkthdr.len - hlen;
922	switch (oaf) {
923#ifdef INET
924	case AF_INET:
925		error = in_gre_output(m, iaf, hlen);
926		break;
927#endif
928#ifdef INET6
929	case AF_INET6:
930		error = in6_gre_output(m, iaf, hlen);
931		break;
932#endif
933	default:
934		m_freem(m);
935		error = ENETDOWN;
936	};
937drop:
938	if (error)
939		ifp->if_oerrors++;
940	else {
941		ifp->if_opackets++;
942		ifp->if_obytes += plen;
943	}
944	return (error);
945}
946
947static void
948gre_qflush(struct ifnet *ifp __unused)
949{
950
951}
952
953static int
954gremodevent(module_t mod, int type, void *data)
955{
956
957	switch (type) {
958	case MOD_LOAD:
959	case MOD_UNLOAD:
960		break;
961	default:
962		return (EOPNOTSUPP);
963	}
964	return (0);
965}
966
967static moduledata_t gre_mod = {
968	"if_gre",
969	gremodevent,
970	0
971};
972
973DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
974MODULE_VERSION(if_gre, 1);
975