1/* $FreeBSD$ */ 2/* $OpenBSD: ip_ipip.c,v 1.25 2002/06/10 18:04:55 itojun Exp $ */ 3/*- 4 * The authors of this code are John Ioannidis (ji@tla.org), 5 * Angelos D. Keromytis (kermit@csd.uch.gr) and 6 * Niels Provos (provos@physnet.uni-hamburg.de). 7 * 8 * The original version of this code was written by John Ioannidis 9 * for BSD/OS in Athens, Greece, in November 1995. 10 * 11 * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, 12 * by Angelos D. Keromytis. 13 * 14 * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis 15 * and Niels Provos. 16 * 17 * Additional features in 1999 by Angelos D. Keromytis. 18 * 19 * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, 20 * Angelos D. Keromytis and Niels Provos. 21 * Copyright (c) 2001, Angelos D. Keromytis. 22 * 23 * Permission to use, copy, and modify this software with or without fee 24 * is hereby granted, provided that this entire notice is included in 25 * all copies of any software which is or includes a copy or 26 * modification of this software. 27 * You may use this code under the GNU public license if you so wish. Please 28 * contribute changes back to the authors under this freer than GPL license 29 * so that we may further the use of strong encryption without limitations to 30 * all. 31 * 32 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR 33 * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY 34 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE 35 * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR 36 * PURPOSE. 37 */ 38 39/* 40 * IP-inside-IP processing 41 */ 42#include "opt_inet.h" 43#include "opt_inet6.h" 44#include "opt_enc.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/mbuf.h> 49#include <sys/socket.h> 50#include <sys/kernel.h> 51#include <sys/protosw.h> 52#include <sys/sysctl.h> 53 54#include <net/if.h> 55#include <net/pfil.h> 56#include <net/netisr.h> 57#include <net/vnet.h> 58 59#include <netinet/in.h> 60#include <netinet/in_systm.h> 61#include <netinet/in_var.h> 62#include <netinet/ip.h> 63#include <netinet/ip_ecn.h> 64#include <netinet/ip_var.h> 65#include <netinet/ip_encap.h> 66 67#include <netipsec/ipsec.h> 68#include <netipsec/xform.h> 69 70#include <netipsec/ipip_var.h> 71 72#ifdef INET6 73#include <netinet/ip6.h> 74#include <netipsec/ipsec6.h> 75#include <netinet6/ip6_ecn.h> 76#include <netinet6/in6_var.h> 77#include <netinet6/ip6protosw.h> 78#endif 79 80#include <netipsec/key.h> 81#include <netipsec/key_debug.h> 82 83#include <machine/stdarg.h> 84 85/* 86 * We can control the acceptance of IP4 packets by altering the sysctl 87 * net.inet.ipip.allow value. Zero means drop them, all else is acceptance. 88 */ 89VNET_DEFINE(int, ipip_allow) = 0; 90VNET_PCPUSTAT_DEFINE(struct ipipstat, ipipstat); 91VNET_PCPUSTAT_SYSINIT(ipipstat); 92 93#ifdef VIMAGE 94VNET_PCPUSTAT_SYSUNINIT(ipipstat); 95#endif /* VIMAGE */ 96 97SYSCTL_DECL(_net_inet_ipip); 98SYSCTL_VNET_INT(_net_inet_ipip, OID_AUTO, 99 ipip_allow, CTLFLAG_RW, &VNET_NAME(ipip_allow), 0, ""); 100SYSCTL_VNET_PCPUSTAT(_net_inet_ipip, IPSECCTL_STATS, stats, 101 struct ipipstat, ipipstat, 102 "IPIP statistics (struct ipipstat, netipsec/ipip_var.h)"); 103 104/* XXX IPCOMP */ 105#define M_IPSEC (M_AUTHIPHDR|M_AUTHIPDGM|M_DECRYPTED) 106 107static void _ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp); 108 109#ifdef INET6 110/* 111 * Really only a wrapper for ipip_input(), for use with IPv6. 112 */ 113int 114ip4_input6(struct mbuf **m, int *offp, int proto) 115{ 116#if 0 117 /* If we do not accept IP-in-IP explicitly, drop. */ 118 if (!V_ipip_allow && ((*m)->m_flags & M_IPSEC) == 0) { 119 DPRINTF(("%s: dropped due to policy\n", __func__)); 120 IPIPSTAT_INC(ipips_pdrops); 121 m_freem(*m); 122 return IPPROTO_DONE; 123 } 124#endif 125 _ipip_input(*m, *offp, NULL); 126 return IPPROTO_DONE; 127} 128#endif /* INET6 */ 129 130#ifdef INET 131/* 132 * Really only a wrapper for ipip_input(), for use with IPv4. 133 */ 134void 135ip4_input(struct mbuf *m, int off) 136{ 137#if 0 138 /* If we do not accept IP-in-IP explicitly, drop. */ 139 if (!V_ipip_allow && (m->m_flags & M_IPSEC) == 0) { 140 DPRINTF(("%s: dropped due to policy\n", __func__)); 141 IPIPSTAT_INC(ipips_pdrops); 142 m_freem(m); 143 return; 144 } 145#endif 146 _ipip_input(m, off, NULL); 147} 148#endif /* INET */ 149 150/* 151 * ipip_input gets called when we receive an IP{46} encapsulated packet, 152 * either because we got it at a real interface, or because AH or ESP 153 * were being used in tunnel mode (in which case the rcvif element will 154 * contain the address of the encX interface associated with the tunnel. 155 */ 156 157static void 158_ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp) 159{ 160 struct ip *ipo; 161#ifdef INET6 162 struct ip6_hdr *ip6 = NULL; 163 u_int8_t itos; 164#endif 165 int isr; 166 u_int8_t otos; 167 u_int8_t v; 168 int hlen; 169 170 IPIPSTAT_INC(ipips_ipackets); 171 172 m_copydata(m, 0, 1, &v); 173 174 switch (v >> 4) { 175#ifdef INET 176 case 4: 177 hlen = sizeof(struct ip); 178 break; 179#endif /* INET */ 180#ifdef INET6 181 case 6: 182 hlen = sizeof(struct ip6_hdr); 183 break; 184#endif 185 default: 186 IPIPSTAT_INC(ipips_family); 187 m_freem(m); 188 return /* EAFNOSUPPORT */; 189 } 190 191 /* Bring the IP header in the first mbuf, if not there already */ 192 if (m->m_len < hlen) { 193 if ((m = m_pullup(m, hlen)) == NULL) { 194 DPRINTF(("%s: m_pullup (1) failed\n", __func__)); 195 IPIPSTAT_INC(ipips_hdrops); 196 return; 197 } 198 } 199 ipo = mtod(m, struct ip *); 200 201 /* Keep outer ecn field. */ 202 switch (v >> 4) { 203#ifdef INET 204 case 4: 205 otos = ipo->ip_tos; 206 break; 207#endif /* INET */ 208#ifdef INET6 209 case 6: 210 otos = (ntohl(mtod(m, struct ip6_hdr *)->ip6_flow) >> 20) & 0xff; 211 break; 212#endif 213 default: 214 panic("ipip_input: unknown ip version %u (outer)", v>>4); 215 } 216 217 /* Remove outer IP header */ 218 m_adj(m, iphlen); 219 220 /* Sanity check */ 221 if (m->m_pkthdr.len < sizeof(struct ip)) { 222 IPIPSTAT_INC(ipips_hdrops); 223 m_freem(m); 224 return; 225 } 226 227 m_copydata(m, 0, 1, &v); 228 229 switch (v >> 4) { 230#ifdef INET 231 case 4: 232 hlen = sizeof(struct ip); 233 break; 234#endif /* INET */ 235 236#ifdef INET6 237 case 6: 238 hlen = sizeof(struct ip6_hdr); 239 break; 240#endif 241 default: 242 IPIPSTAT_INC(ipips_family); 243 m_freem(m); 244 return; /* EAFNOSUPPORT */ 245 } 246 247 /* 248 * Bring the inner IP header in the first mbuf, if not there already. 249 */ 250 if (m->m_len < hlen) { 251 if ((m = m_pullup(m, hlen)) == NULL) { 252 DPRINTF(("%s: m_pullup (2) failed\n", __func__)); 253 IPIPSTAT_INC(ipips_hdrops); 254 return; 255 } 256 } 257 258 /* 259 * RFC 1853 specifies that the inner TTL should not be touched on 260 * decapsulation. There's no reason this comment should be here, but 261 * this is as good as any a position. 262 */ 263 264 /* Some sanity checks in the inner IP header */ 265 switch (v >> 4) { 266#ifdef INET 267 case 4: 268 ipo = mtod(m, struct ip *); 269 ip_ecn_egress(V_ip4_ipsec_ecn, &otos, &ipo->ip_tos); 270 break; 271#endif /* INET */ 272#ifdef INET6 273 case 6: 274 ip6 = (struct ip6_hdr *) ipo; 275 itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 276 ip_ecn_egress(V_ip6_ipsec_ecn, &otos, &itos); 277 ip6->ip6_flow &= ~htonl(0xff << 20); 278 ip6->ip6_flow |= htonl((u_int32_t) itos << 20); 279 break; 280#endif 281 default: 282 panic("ipip_input: unknown ip version %u (inner)", v>>4); 283 } 284 285 /* Check for local address spoofing. */ 286 if ((m->m_pkthdr.rcvif == NULL || 287 !(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK)) && 288 V_ipip_allow != 2) { 289#ifdef INET 290 if ((v >> 4) == IPVERSION && 291 in_localip(ipo->ip_src) != 0) { 292 IPIPSTAT_INC(ipips_spoof); 293 m_freem(m); 294 return; 295 } 296#endif 297#ifdef INET6 298 if ((v & IPV6_VERSION_MASK) == IPV6_VERSION && 299 in6_localip(&ip6->ip6_src) != 0) { 300 IPIPSTAT_INC(ipips_spoof); 301 m_freem(m); 302 return; 303 } 304#endif 305 } 306 307 /* Statistics */ 308 IPIPSTAT_ADD(ipips_ibytes, m->m_pkthdr.len - iphlen); 309 310 /* 311 * Interface pointer stays the same; if no IPsec processing has 312 * been done (or will be done), this will point to a normal 313 * interface. Otherwise, it'll point to an enc interface, which 314 * will allow a packet filter to distinguish between secure and 315 * untrusted packets. 316 */ 317 318 switch (v >> 4) { 319#ifdef INET 320 case 4: 321 isr = NETISR_IP; 322 break; 323#endif 324#ifdef INET6 325 case 6: 326 isr = NETISR_IPV6; 327 break; 328#endif 329 default: 330 panic("%s: bogus ip version %u", __func__, v>>4); 331 } 332 333 if (netisr_queue(isr, m)) { /* (0) on success. */ 334 IPIPSTAT_INC(ipips_qfull); 335 DPRINTF(("%s: packet dropped because of full queue\n", 336 __func__)); 337 } 338} 339 340int 341ipip_output( 342 struct mbuf *m, 343 struct ipsecrequest *isr, 344 struct mbuf **mp, 345 int skip, 346 int protoff 347) 348{ 349 struct secasvar *sav; 350 u_int8_t tp, otos; 351 struct secasindex *saidx; 352 int error; 353#if defined(INET) || defined(INET6) 354 u_int8_t itos; 355#endif 356#ifdef INET 357 struct ip *ipo; 358#endif /* INET */ 359#ifdef INET6 360 struct ip6_hdr *ip6, *ip6o; 361#endif /* INET6 */ 362 363 sav = isr->sav; 364 IPSEC_ASSERT(sav != NULL, ("null SA")); 365 IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); 366 367 /* XXX Deal with empty TDB source/destination addresses. */ 368 369 m_copydata(m, 0, 1, &tp); 370 tp = (tp >> 4) & 0xff; /* Get the IP version number. */ 371 372 saidx = &sav->sah->saidx; 373 switch (saidx->dst.sa.sa_family) { 374#ifdef INET 375 case AF_INET: 376 if (saidx->src.sa.sa_family != AF_INET || 377 saidx->src.sin.sin_addr.s_addr == INADDR_ANY || 378 saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) { 379 DPRINTF(("%s: unspecified tunnel endpoint " 380 "address in SA %s/%08lx\n", __func__, 381 ipsec_address(&saidx->dst), 382 (u_long) ntohl(sav->spi))); 383 IPIPSTAT_INC(ipips_unspec); 384 error = EINVAL; 385 goto bad; 386 } 387 388 M_PREPEND(m, sizeof(struct ip), M_NOWAIT); 389 if (m == 0) { 390 DPRINTF(("%s: M_PREPEND failed\n", __func__)); 391 IPIPSTAT_INC(ipips_hdrops); 392 error = ENOBUFS; 393 goto bad; 394 } 395 396 ipo = mtod(m, struct ip *); 397 398 ipo->ip_v = IPVERSION; 399 ipo->ip_hl = 5; 400 ipo->ip_len = htons(m->m_pkthdr.len); 401 ipo->ip_ttl = V_ip_defttl; 402 ipo->ip_sum = 0; 403 ipo->ip_src = saidx->src.sin.sin_addr; 404 ipo->ip_dst = saidx->dst.sin.sin_addr; 405 406 ipo->ip_id = ip_newid(); 407 408 /* If the inner protocol is IP... */ 409 switch (tp) { 410 case IPVERSION: 411 /* Save ECN notification */ 412 m_copydata(m, sizeof(struct ip) + 413 offsetof(struct ip, ip_tos), 414 sizeof(u_int8_t), (caddr_t) &itos); 415 416 ipo->ip_p = IPPROTO_IPIP; 417 418 /* 419 * We should be keeping tunnel soft-state and 420 * send back ICMPs if needed. 421 */ 422 m_copydata(m, sizeof(struct ip) + 423 offsetof(struct ip, ip_off), 424 sizeof(u_int16_t), (caddr_t) &ipo->ip_off); 425 ipo->ip_off = ntohs(ipo->ip_off); 426 ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK); 427 ipo->ip_off = htons(ipo->ip_off); 428 break; 429#ifdef INET6 430 case (IPV6_VERSION >> 4): 431 { 432 u_int32_t itos32; 433 434 /* Save ECN notification. */ 435 m_copydata(m, sizeof(struct ip) + 436 offsetof(struct ip6_hdr, ip6_flow), 437 sizeof(u_int32_t), (caddr_t) &itos32); 438 itos = ntohl(itos32) >> 20; 439 ipo->ip_p = IPPROTO_IPV6; 440 ipo->ip_off = 0; 441 break; 442 } 443#endif /* INET6 */ 444 default: 445 goto nofamily; 446 } 447 448 otos = 0; 449 ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); 450 ipo->ip_tos = otos; 451 break; 452#endif /* INET */ 453 454#ifdef INET6 455 case AF_INET6: 456 if (IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr) || 457 saidx->src.sa.sa_family != AF_INET6 || 458 IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr)) { 459 DPRINTF(("%s: unspecified tunnel endpoint " 460 "address in SA %s/%08lx\n", __func__, 461 ipsec_address(&saidx->dst), 462 (u_long) ntohl(sav->spi))); 463 IPIPSTAT_INC(ipips_unspec); 464 error = ENOBUFS; 465 goto bad; 466 } 467 468 /* scoped address handling */ 469 ip6 = mtod(m, struct ip6_hdr *); 470 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) 471 ip6->ip6_src.s6_addr16[1] = 0; 472 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) 473 ip6->ip6_dst.s6_addr16[1] = 0; 474 475 M_PREPEND(m, sizeof(struct ip6_hdr), M_NOWAIT); 476 if (m == 0) { 477 DPRINTF(("%s: M_PREPEND failed\n", __func__)); 478 IPIPSTAT_INC(ipips_hdrops); 479 error = ENOBUFS; 480 goto bad; 481 } 482 483 /* Initialize IPv6 header */ 484 ip6o = mtod(m, struct ip6_hdr *); 485 ip6o->ip6_flow = 0; 486 ip6o->ip6_vfc &= ~IPV6_VERSION_MASK; 487 ip6o->ip6_vfc |= IPV6_VERSION; 488 ip6o->ip6_hlim = IPV6_DEFHLIM; 489 ip6o->ip6_dst = saidx->dst.sin6.sin6_addr; 490 ip6o->ip6_src = saidx->src.sin6.sin6_addr; 491 ip6o->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); 492 493 switch (tp) { 494#ifdef INET 495 case IPVERSION: 496 /* Save ECN notification */ 497 m_copydata(m, sizeof(struct ip6_hdr) + 498 offsetof(struct ip, ip_tos), sizeof(u_int8_t), 499 (caddr_t) &itos); 500 501 /* This is really IPVERSION. */ 502 ip6o->ip6_nxt = IPPROTO_IPIP; 503 break; 504#endif /* INET */ 505 case (IPV6_VERSION >> 4): 506 { 507 u_int32_t itos32; 508 509 /* Save ECN notification. */ 510 m_copydata(m, sizeof(struct ip6_hdr) + 511 offsetof(struct ip6_hdr, ip6_flow), 512 sizeof(u_int32_t), (caddr_t) &itos32); 513 itos = ntohl(itos32) >> 20; 514 515 ip6o->ip6_nxt = IPPROTO_IPV6; 516 break; 517 } 518 default: 519 goto nofamily; 520 } 521 522 otos = 0; 523 ip_ecn_ingress(V_ip6_ipsec_ecn, &otos, &itos); 524 ip6o->ip6_flow |= htonl((u_int32_t) otos << 20); 525 break; 526#endif /* INET6 */ 527 528 default: 529nofamily: 530 DPRINTF(("%s: unsupported protocol family %u\n", __func__, 531 saidx->dst.sa.sa_family)); 532 IPIPSTAT_INC(ipips_family); 533 error = EAFNOSUPPORT; /* XXX diffs from openbsd */ 534 goto bad; 535 } 536 537 IPIPSTAT_INC(ipips_opackets); 538 *mp = m; 539 540#ifdef INET 541 if (saidx->dst.sa.sa_family == AF_INET) { 542#if 0 543 if (sav->tdb_xform->xf_type == XF_IP4) 544 tdb->tdb_cur_bytes += 545 m->m_pkthdr.len - sizeof(struct ip); 546#endif 547 IPIPSTAT_ADD(ipips_obytes, 548 m->m_pkthdr.len - sizeof(struct ip)); 549 } 550#endif /* INET */ 551 552#ifdef INET6 553 if (saidx->dst.sa.sa_family == AF_INET6) { 554#if 0 555 if (sav->tdb_xform->xf_type == XF_IP4) 556 tdb->tdb_cur_bytes += 557 m->m_pkthdr.len - sizeof(struct ip6_hdr); 558#endif 559 IPIPSTAT_ADD(ipips_obytes, 560 m->m_pkthdr.len - sizeof(struct ip6_hdr)); 561 } 562#endif /* INET6 */ 563 564 return 0; 565bad: 566 if (m) 567 m_freem(m); 568 *mp = NULL; 569 return (error); 570} 571 572#ifdef IPSEC 573#if defined(INET) || defined(INET6) 574static int 575ipe4_init(struct secasvar *sav, struct xformsw *xsp) 576{ 577 sav->tdb_xform = xsp; 578 return 0; 579} 580 581static int 582ipe4_zeroize(struct secasvar *sav) 583{ 584 sav->tdb_xform = NULL; 585 return 0; 586} 587 588static int 589ipe4_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) 590{ 591 /* This is a rather serious mistake, so no conditional printing. */ 592 printf("%s: should never be called\n", __func__); 593 if (m) 594 m_freem(m); 595 return EOPNOTSUPP; 596} 597 598static struct xformsw ipe4_xformsw = { 599 XF_IP4, 0, "IPv4 Simple Encapsulation", 600 ipe4_init, ipe4_zeroize, ipe4_input, ipip_output, 601}; 602 603extern struct domain inetdomain; 604#endif /* INET || INET6 */ 605#ifdef INET 606static struct protosw ipe4_protosw = { 607 .pr_type = SOCK_RAW, 608 .pr_domain = &inetdomain, 609 .pr_protocol = IPPROTO_IPV4, 610 .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, 611 .pr_input = ip4_input, 612 .pr_ctloutput = rip_ctloutput, 613 .pr_usrreqs = &rip_usrreqs 614}; 615#endif /* INET */ 616#if defined(INET6) && defined(INET) 617static struct ip6protosw ipe6_protosw = { 618 .pr_type = SOCK_RAW, 619 .pr_domain = &inetdomain, 620 .pr_protocol = IPPROTO_IPV6, 621 .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, 622 .pr_input = ip4_input6, 623 .pr_ctloutput = rip_ctloutput, 624 .pr_usrreqs = &rip_usrreqs 625}; 626#endif /* INET6 && INET */ 627 628#ifdef INET 629/* 630 * Check the encapsulated packet to see if we want it 631 */ 632static int 633ipe4_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 634{ 635 /* 636 * Only take packets coming from IPSEC tunnels; the rest 637 * must be handled by the gif tunnel code. Note that we 638 * also return a minimum priority when we want the packet 639 * so any explicit gif tunnels take precedence. 640 */ 641 return ((m->m_flags & M_IPSEC) != 0 ? 1 : 0); 642} 643#endif /* INET */ 644 645static void 646ipe4_attach(void) 647{ 648 649 xform_register(&ipe4_xformsw); 650 /* attach to encapsulation framework */ 651 /* XXX save return cookie for detach on module remove */ 652#ifdef INET 653 (void) encap_attach_func(AF_INET, -1, 654 ipe4_encapcheck, &ipe4_protosw, NULL); 655#endif 656#if defined(INET6) && defined(INET) 657 (void) encap_attach_func(AF_INET6, -1, 658 ipe4_encapcheck, (struct protosw *)&ipe6_protosw, NULL); 659#endif 660} 661SYSINIT(ipe4_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipe4_attach, NULL); 662#endif /* IPSEC */ 663