1/*	$FreeBSD$	*/
2/*	$OpenBSD: ip_ipip.c,v 1.25 2002/06/10 18:04:55 itojun Exp $ */
3/*-
4 * The authors of this code are John Ioannidis (ji@tla.org),
5 * Angelos D. Keromytis (kermit@csd.uch.gr) and
6 * Niels Provos (provos@physnet.uni-hamburg.de).
7 *
8 * The original version of this code was written by John Ioannidis
9 * for BSD/OS in Athens, Greece, in November 1995.
10 *
11 * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
12 * by Angelos D. Keromytis.
13 *
14 * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
15 * and Niels Provos.
16 *
17 * Additional features in 1999 by Angelos D. Keromytis.
18 *
19 * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
20 * Angelos D. Keromytis and Niels Provos.
21 * Copyright (c) 2001, Angelos D. Keromytis.
22 *
23 * Permission to use, copy, and modify this software with or without fee
24 * is hereby granted, provided that this entire notice is included in
25 * all copies of any software which is or includes a copy or
26 * modification of this software.
27 * You may use this code under the GNU public license if you so wish. Please
28 * contribute changes back to the authors under this freer than GPL license
29 * so that we may further the use of strong encryption without limitations to
30 * all.
31 *
32 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
33 * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
34 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
35 * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
36 * PURPOSE.
37 */
38
39/*
40 * IP-inside-IP processing
41 */
42#include "opt_inet.h"
43#include "opt_inet6.h"
44#include "opt_enc.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/mbuf.h>
49#include <sys/socket.h>
50#include <sys/kernel.h>
51#include <sys/protosw.h>
52#include <sys/sysctl.h>
53
54#include <net/if.h>
55#include <net/pfil.h>
56#include <net/netisr.h>
57#include <net/vnet.h>
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/in_var.h>
62#include <netinet/ip.h>
63#include <netinet/ip_ecn.h>
64#include <netinet/ip_var.h>
65#include <netinet/ip_encap.h>
66
67#include <netipsec/ipsec.h>
68#include <netipsec/xform.h>
69
70#include <netipsec/ipip_var.h>
71
72#ifdef INET6
73#include <netinet/ip6.h>
74#include <netipsec/ipsec6.h>
75#include <netinet6/ip6_ecn.h>
76#include <netinet6/in6_var.h>
77#include <netinet6/ip6protosw.h>
78#endif
79
80#include <netipsec/key.h>
81#include <netipsec/key_debug.h>
82
83#include <machine/stdarg.h>
84
85/*
86 * We can control the acceptance of IP4 packets by altering the sysctl
87 * net.inet.ipip.allow value.  Zero means drop them, all else is acceptance.
88 */
89VNET_DEFINE(int, ipip_allow) = 0;
90VNET_PCPUSTAT_DEFINE(struct ipipstat, ipipstat);
91VNET_PCPUSTAT_SYSINIT(ipipstat);
92
93#ifdef VIMAGE
94VNET_PCPUSTAT_SYSUNINIT(ipipstat);
95#endif /* VIMAGE */
96
97SYSCTL_DECL(_net_inet_ipip);
98SYSCTL_VNET_INT(_net_inet_ipip, OID_AUTO,
99	ipip_allow,	CTLFLAG_RW,	&VNET_NAME(ipip_allow),	0, "");
100SYSCTL_VNET_PCPUSTAT(_net_inet_ipip, IPSECCTL_STATS, stats,
101    struct ipipstat, ipipstat,
102    "IPIP statistics (struct ipipstat, netipsec/ipip_var.h)");
103
104/* XXX IPCOMP */
105#define	M_IPSEC	(M_AUTHIPHDR|M_AUTHIPDGM|M_DECRYPTED)
106
107static void _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp);
108
109#ifdef INET6
110/*
111 * Really only a wrapper for ipip_input(), for use with IPv6.
112 */
113int
114ip4_input6(struct mbuf **m, int *offp, int proto)
115{
116#if 0
117	/* If we do not accept IP-in-IP explicitly, drop.  */
118	if (!V_ipip_allow && ((*m)->m_flags & M_IPSEC) == 0) {
119		DPRINTF(("%s: dropped due to policy\n", __func__));
120		IPIPSTAT_INC(ipips_pdrops);
121		m_freem(*m);
122		return IPPROTO_DONE;
123	}
124#endif
125	_ipip_input(*m, *offp, NULL);
126	return IPPROTO_DONE;
127}
128#endif /* INET6 */
129
130#ifdef INET
131/*
132 * Really only a wrapper for ipip_input(), for use with IPv4.
133 */
134void
135ip4_input(struct mbuf *m, int off)
136{
137#if 0
138	/* If we do not accept IP-in-IP explicitly, drop.  */
139	if (!V_ipip_allow && (m->m_flags & M_IPSEC) == 0) {
140		DPRINTF(("%s: dropped due to policy\n", __func__));
141		IPIPSTAT_INC(ipips_pdrops);
142		m_freem(m);
143		return;
144	}
145#endif
146	_ipip_input(m, off, NULL);
147}
148#endif /* INET */
149
150/*
151 * ipip_input gets called when we receive an IP{46} encapsulated packet,
152 * either because we got it at a real interface, or because AH or ESP
153 * were being used in tunnel mode (in which case the rcvif element will
154 * contain the address of the encX interface associated with the tunnel.
155 */
156
157static void
158_ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp)
159{
160	struct ip *ipo;
161#ifdef INET6
162	struct ip6_hdr *ip6 = NULL;
163	u_int8_t itos;
164#endif
165	int isr;
166	u_int8_t otos;
167	u_int8_t v;
168	int hlen;
169
170	IPIPSTAT_INC(ipips_ipackets);
171
172	m_copydata(m, 0, 1, &v);
173
174	switch (v >> 4) {
175#ifdef INET
176        case 4:
177		hlen = sizeof(struct ip);
178		break;
179#endif /* INET */
180#ifdef INET6
181        case 6:
182		hlen = sizeof(struct ip6_hdr);
183		break;
184#endif
185        default:
186		IPIPSTAT_INC(ipips_family);
187		m_freem(m);
188		return /* EAFNOSUPPORT */;
189	}
190
191	/* Bring the IP header in the first mbuf, if not there already */
192	if (m->m_len < hlen) {
193		if ((m = m_pullup(m, hlen)) == NULL) {
194			DPRINTF(("%s: m_pullup (1) failed\n", __func__));
195			IPIPSTAT_INC(ipips_hdrops);
196			return;
197		}
198	}
199	ipo = mtod(m, struct ip *);
200
201	/* Keep outer ecn field. */
202	switch (v >> 4) {
203#ifdef INET
204	case 4:
205		otos = ipo->ip_tos;
206		break;
207#endif /* INET */
208#ifdef INET6
209	case 6:
210		otos = (ntohl(mtod(m, struct ip6_hdr *)->ip6_flow) >> 20) & 0xff;
211		break;
212#endif
213	default:
214		panic("ipip_input: unknown ip version %u (outer)", v>>4);
215	}
216
217	/* Remove outer IP header */
218	m_adj(m, iphlen);
219
220	/* Sanity check */
221	if (m->m_pkthdr.len < sizeof(struct ip))  {
222		IPIPSTAT_INC(ipips_hdrops);
223		m_freem(m);
224		return;
225	}
226
227	m_copydata(m, 0, 1, &v);
228
229	switch (v >> 4) {
230#ifdef INET
231        case 4:
232		hlen = sizeof(struct ip);
233		break;
234#endif /* INET */
235
236#ifdef INET6
237        case 6:
238		hlen = sizeof(struct ip6_hdr);
239		break;
240#endif
241	default:
242		IPIPSTAT_INC(ipips_family);
243		m_freem(m);
244		return; /* EAFNOSUPPORT */
245	}
246
247	/*
248	 * Bring the inner IP header in the first mbuf, if not there already.
249	 */
250	if (m->m_len < hlen) {
251		if ((m = m_pullup(m, hlen)) == NULL) {
252			DPRINTF(("%s: m_pullup (2) failed\n", __func__));
253			IPIPSTAT_INC(ipips_hdrops);
254			return;
255		}
256	}
257
258	/*
259	 * RFC 1853 specifies that the inner TTL should not be touched on
260	 * decapsulation. There's no reason this comment should be here, but
261	 * this is as good as any a position.
262	 */
263
264	/* Some sanity checks in the inner IP header */
265	switch (v >> 4) {
266#ifdef INET
267    	case 4:
268                ipo = mtod(m, struct ip *);
269		ip_ecn_egress(V_ip4_ipsec_ecn, &otos, &ipo->ip_tos);
270                break;
271#endif /* INET */
272#ifdef INET6
273    	case 6:
274                ip6 = (struct ip6_hdr *) ipo;
275		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
276		ip_ecn_egress(V_ip6_ipsec_ecn, &otos, &itos);
277		ip6->ip6_flow &= ~htonl(0xff << 20);
278		ip6->ip6_flow |= htonl((u_int32_t) itos << 20);
279                break;
280#endif
281	default:
282		panic("ipip_input: unknown ip version %u (inner)", v>>4);
283	}
284
285	/* Check for local address spoofing. */
286	if ((m->m_pkthdr.rcvif == NULL ||
287	    !(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK)) &&
288	    V_ipip_allow != 2) {
289#ifdef INET
290		if ((v >> 4) == IPVERSION &&
291		    in_localip(ipo->ip_src) != 0) {
292			IPIPSTAT_INC(ipips_spoof);
293			m_freem(m);
294			return;
295		}
296#endif
297#ifdef INET6
298		if ((v & IPV6_VERSION_MASK) == IPV6_VERSION &&
299		    in6_localip(&ip6->ip6_src) != 0) {
300			IPIPSTAT_INC(ipips_spoof);
301			m_freem(m);
302			return;
303		}
304#endif
305	}
306
307	/* Statistics */
308	IPIPSTAT_ADD(ipips_ibytes, m->m_pkthdr.len - iphlen);
309
310	/*
311	 * Interface pointer stays the same; if no IPsec processing has
312	 * been done (or will be done), this will point to a normal
313	 * interface. Otherwise, it'll point to an enc interface, which
314	 * will allow a packet filter to distinguish between secure and
315	 * untrusted packets.
316	 */
317
318	switch (v >> 4) {
319#ifdef INET
320	case 4:
321		isr = NETISR_IP;
322		break;
323#endif
324#ifdef INET6
325	case 6:
326		isr = NETISR_IPV6;
327		break;
328#endif
329	default:
330		panic("%s: bogus ip version %u", __func__, v>>4);
331	}
332
333	if (netisr_queue(isr, m)) {	/* (0) on success. */
334		IPIPSTAT_INC(ipips_qfull);
335		DPRINTF(("%s: packet dropped because of full queue\n",
336			__func__));
337	}
338}
339
340int
341ipip_output(
342	struct mbuf *m,
343	struct ipsecrequest *isr,
344	struct mbuf **mp,
345	int skip,
346	int protoff
347)
348{
349	struct secasvar *sav;
350	u_int8_t tp, otos;
351	struct secasindex *saidx;
352	int error;
353#if defined(INET) || defined(INET6)
354	u_int8_t itos;
355#endif
356#ifdef INET
357	struct ip *ipo;
358#endif /* INET */
359#ifdef INET6
360	struct ip6_hdr *ip6, *ip6o;
361#endif /* INET6 */
362
363	sav = isr->sav;
364	IPSEC_ASSERT(sav != NULL, ("null SA"));
365	IPSEC_ASSERT(sav->sah != NULL, ("null SAH"));
366
367	/* XXX Deal with empty TDB source/destination addresses. */
368
369	m_copydata(m, 0, 1, &tp);
370	tp = (tp >> 4) & 0xff;  /* Get the IP version number. */
371
372	saidx = &sav->sah->saidx;
373	switch (saidx->dst.sa.sa_family) {
374#ifdef INET
375	case AF_INET:
376		if (saidx->src.sa.sa_family != AF_INET ||
377		    saidx->src.sin.sin_addr.s_addr == INADDR_ANY ||
378		    saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) {
379			DPRINTF(("%s: unspecified tunnel endpoint "
380			    "address in SA %s/%08lx\n", __func__,
381			    ipsec_address(&saidx->dst),
382			    (u_long) ntohl(sav->spi)));
383			IPIPSTAT_INC(ipips_unspec);
384			error = EINVAL;
385			goto bad;
386		}
387
388		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
389		if (m == 0) {
390			DPRINTF(("%s: M_PREPEND failed\n", __func__));
391			IPIPSTAT_INC(ipips_hdrops);
392			error = ENOBUFS;
393			goto bad;
394		}
395
396		ipo = mtod(m, struct ip *);
397
398		ipo->ip_v = IPVERSION;
399		ipo->ip_hl = 5;
400		ipo->ip_len = htons(m->m_pkthdr.len);
401		ipo->ip_ttl = V_ip_defttl;
402		ipo->ip_sum = 0;
403		ipo->ip_src = saidx->src.sin.sin_addr;
404		ipo->ip_dst = saidx->dst.sin.sin_addr;
405
406		ipo->ip_id = ip_newid();
407
408		/* If the inner protocol is IP... */
409		switch (tp) {
410		case IPVERSION:
411			/* Save ECN notification */
412			m_copydata(m, sizeof(struct ip) +
413			    offsetof(struct ip, ip_tos),
414			    sizeof(u_int8_t), (caddr_t) &itos);
415
416			ipo->ip_p = IPPROTO_IPIP;
417
418			/*
419			 * We should be keeping tunnel soft-state and
420			 * send back ICMPs if needed.
421			 */
422			m_copydata(m, sizeof(struct ip) +
423			    offsetof(struct ip, ip_off),
424			    sizeof(u_int16_t), (caddr_t) &ipo->ip_off);
425			ipo->ip_off = ntohs(ipo->ip_off);
426			ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK);
427			ipo->ip_off = htons(ipo->ip_off);
428			break;
429#ifdef INET6
430		case (IPV6_VERSION >> 4):
431		{
432			u_int32_t itos32;
433
434			/* Save ECN notification. */
435			m_copydata(m, sizeof(struct ip) +
436			    offsetof(struct ip6_hdr, ip6_flow),
437			    sizeof(u_int32_t), (caddr_t) &itos32);
438			itos = ntohl(itos32) >> 20;
439			ipo->ip_p = IPPROTO_IPV6;
440			ipo->ip_off = 0;
441			break;
442		}
443#endif /* INET6 */
444		default:
445			goto nofamily;
446		}
447
448		otos = 0;
449		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
450		ipo->ip_tos = otos;
451		break;
452#endif /* INET */
453
454#ifdef INET6
455	case AF_INET6:
456		if (IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr) ||
457		    saidx->src.sa.sa_family != AF_INET6 ||
458		    IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr)) {
459			DPRINTF(("%s: unspecified tunnel endpoint "
460			    "address in SA %s/%08lx\n", __func__,
461			    ipsec_address(&saidx->dst),
462			    (u_long) ntohl(sav->spi)));
463			IPIPSTAT_INC(ipips_unspec);
464			error = ENOBUFS;
465			goto bad;
466		}
467
468		/* scoped address handling */
469		ip6 = mtod(m, struct ip6_hdr *);
470		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
471			ip6->ip6_src.s6_addr16[1] = 0;
472		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
473			ip6->ip6_dst.s6_addr16[1] = 0;
474
475		M_PREPEND(m, sizeof(struct ip6_hdr), M_NOWAIT);
476		if (m == 0) {
477			DPRINTF(("%s: M_PREPEND failed\n", __func__));
478			IPIPSTAT_INC(ipips_hdrops);
479			error = ENOBUFS;
480			goto bad;
481		}
482
483		/* Initialize IPv6 header */
484		ip6o = mtod(m, struct ip6_hdr *);
485		ip6o->ip6_flow = 0;
486		ip6o->ip6_vfc &= ~IPV6_VERSION_MASK;
487		ip6o->ip6_vfc |= IPV6_VERSION;
488		ip6o->ip6_hlim = IPV6_DEFHLIM;
489		ip6o->ip6_dst = saidx->dst.sin6.sin6_addr;
490		ip6o->ip6_src = saidx->src.sin6.sin6_addr;
491		ip6o->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
492
493		switch (tp) {
494#ifdef INET
495		case IPVERSION:
496			/* Save ECN notification */
497			m_copydata(m, sizeof(struct ip6_hdr) +
498			    offsetof(struct ip, ip_tos), sizeof(u_int8_t),
499			    (caddr_t) &itos);
500
501			/* This is really IPVERSION. */
502			ip6o->ip6_nxt = IPPROTO_IPIP;
503			break;
504#endif /* INET */
505		case (IPV6_VERSION >> 4):
506		{
507			u_int32_t itos32;
508
509			/* Save ECN notification. */
510			m_copydata(m, sizeof(struct ip6_hdr) +
511			    offsetof(struct ip6_hdr, ip6_flow),
512			    sizeof(u_int32_t), (caddr_t) &itos32);
513			itos = ntohl(itos32) >> 20;
514
515			ip6o->ip6_nxt = IPPROTO_IPV6;
516			break;
517		}
518		default:
519			goto nofamily;
520		}
521
522		otos = 0;
523		ip_ecn_ingress(V_ip6_ipsec_ecn, &otos, &itos);
524		ip6o->ip6_flow |= htonl((u_int32_t) otos << 20);
525		break;
526#endif /* INET6 */
527
528	default:
529nofamily:
530		DPRINTF(("%s: unsupported protocol family %u\n", __func__,
531		    saidx->dst.sa.sa_family));
532		IPIPSTAT_INC(ipips_family);
533		error = EAFNOSUPPORT;		/* XXX diffs from openbsd */
534		goto bad;
535	}
536
537	IPIPSTAT_INC(ipips_opackets);
538	*mp = m;
539
540#ifdef INET
541	if (saidx->dst.sa.sa_family == AF_INET) {
542#if 0
543		if (sav->tdb_xform->xf_type == XF_IP4)
544			tdb->tdb_cur_bytes +=
545			    m->m_pkthdr.len - sizeof(struct ip);
546#endif
547		IPIPSTAT_ADD(ipips_obytes,
548		    m->m_pkthdr.len - sizeof(struct ip));
549	}
550#endif /* INET */
551
552#ifdef INET6
553	if (saidx->dst.sa.sa_family == AF_INET6) {
554#if 0
555		if (sav->tdb_xform->xf_type == XF_IP4)
556			tdb->tdb_cur_bytes +=
557			    m->m_pkthdr.len - sizeof(struct ip6_hdr);
558#endif
559		IPIPSTAT_ADD(ipips_obytes,
560		    m->m_pkthdr.len - sizeof(struct ip6_hdr));
561	}
562#endif /* INET6 */
563
564	return 0;
565bad:
566	if (m)
567		m_freem(m);
568	*mp = NULL;
569	return (error);
570}
571
572#ifdef IPSEC
573#if defined(INET) || defined(INET6)
574static int
575ipe4_init(struct secasvar *sav, struct xformsw *xsp)
576{
577	sav->tdb_xform = xsp;
578	return 0;
579}
580
581static int
582ipe4_zeroize(struct secasvar *sav)
583{
584	sav->tdb_xform = NULL;
585	return 0;
586}
587
588static int
589ipe4_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
590{
591	/* This is a rather serious mistake, so no conditional printing. */
592	printf("%s: should never be called\n", __func__);
593	if (m)
594		m_freem(m);
595	return EOPNOTSUPP;
596}
597
598static struct xformsw ipe4_xformsw = {
599	XF_IP4,		0,		"IPv4 Simple Encapsulation",
600	ipe4_init,	ipe4_zeroize,	ipe4_input,	ipip_output,
601};
602
603extern struct domain inetdomain;
604#endif /* INET || INET6 */
605#ifdef INET
606static struct protosw ipe4_protosw = {
607	.pr_type =	SOCK_RAW,
608	.pr_domain =	&inetdomain,
609	.pr_protocol =	IPPROTO_IPV4,
610	.pr_flags =	PR_ATOMIC|PR_ADDR|PR_LASTHDR,
611	.pr_input =	ip4_input,
612	.pr_ctloutput =	rip_ctloutput,
613	.pr_usrreqs =	&rip_usrreqs
614};
615#endif /* INET */
616#if defined(INET6) && defined(INET)
617static struct ip6protosw ipe6_protosw = {
618	.pr_type =	SOCK_RAW,
619	.pr_domain =	&inetdomain,
620	.pr_protocol =	IPPROTO_IPV6,
621	.pr_flags =	PR_ATOMIC|PR_ADDR|PR_LASTHDR,
622	.pr_input =	ip4_input6,
623	.pr_ctloutput =	rip_ctloutput,
624	.pr_usrreqs =	&rip_usrreqs
625};
626#endif /* INET6 && INET */
627
628#ifdef INET
629/*
630 * Check the encapsulated packet to see if we want it
631 */
632static int
633ipe4_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
634{
635	/*
636	 * Only take packets coming from IPSEC tunnels; the rest
637	 * must be handled by the gif tunnel code.  Note that we
638	 * also return a minimum priority when we want the packet
639	 * so any explicit gif tunnels take precedence.
640	 */
641	return ((m->m_flags & M_IPSEC) != 0 ? 1 : 0);
642}
643#endif /* INET */
644
645static void
646ipe4_attach(void)
647{
648
649	xform_register(&ipe4_xformsw);
650	/* attach to encapsulation framework */
651	/* XXX save return cookie for detach on module remove */
652#ifdef INET
653	(void) encap_attach_func(AF_INET, -1,
654		ipe4_encapcheck, &ipe4_protosw, NULL);
655#endif
656#if defined(INET6) && defined(INET)
657	(void) encap_attach_func(AF_INET6, -1,
658		ipe4_encapcheck, (struct protosw *)&ipe6_protosw, NULL);
659#endif
660}
661SYSINIT(ipe4_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipe4_attach, NULL);
662#endif	/* IPSEC */
663