ip_input.c revision 167721
1164640Sflz/*- 298186Sgordon * Copyright (c) 1982, 1986, 1988, 1993 378344Sobrien * The Regents of the University of California. All rights reserved. 4157473Sflz * 578344Sobrien * Redistribution and use in source and binary forms, with or without 678344Sobrien * modification, are permitted provided that the following conditions 778344Sobrien * are met: 878344Sobrien * 1. Redistributions of source code must retain the above copyright 978344Sobrien * notice, this list of conditions and the following disclaimer. 1078344Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1178344Sobrien * notice, this list of conditions and the following disclaimer in the 1278344Sobrien * documentation and/or other materials provided with the distribution. 1378344Sobrien * 4. Neither the name of the University nor the names of its contributors 1478344Sobrien * may be used to endorse or promote products derived from this software 1578344Sobrien * without specific prior written permission. 1678344Sobrien * 1778344Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1878344Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1978344Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2078344Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2178344Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2278344Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2378344Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2478344Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2578344Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2678344Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2778344Sobrien * SUCH DAMAGE. 2878344Sobrien * 2978344Sobrien * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 3078344Sobrien * $FreeBSD: head/sys/netinet/ip_input.c 167721 2007-03-19 19:00:51Z andre $ 3178344Sobrien */ 3278344Sobrien 3378344Sobrien#include "opt_bootp.h" 3478344Sobrien#include "opt_ipfw.h" 35169668Smtm#include "opt_ipstealth.h" 36157473Sflz#include "opt_ipsec.h" 3778344Sobrien#include "opt_mac.h" 3898186Sgordon#include "opt_carp.h" 3998186Sgordon 4098186Sgordon#include <sys/param.h> 41131550Scperciva#include <sys/systm.h> 42131550Scperciva#include <sys/callout.h> 43131550Scperciva#include <sys/mbuf.h> 44131550Scperciva#include <sys/malloc.h> 4598186Sgordon#include <sys/domain.h> 4698186Sgordon#include <sys/protosw.h> 47202988Semaste#include <sys/socket.h> 48295949Saraujo#include <sys/time.h> 49124832Smtm#include <sys/kernel.h> 50124832Smtm#include <sys/syslog.h> 51161435Syar#include <sys/sysctl.h> 52332367Skevans 5398186Sgordon#include <net/pfil.h> 5498186Sgordon#include <net/if.h> 5578344Sobrien#include <net/if_types.h> 5678344Sobrien#include <net/if_var.h> 5778344Sobrien#include <net/if_dl.h> 58264243Sdteske#include <net/route.h> 59264243Sdteske#include <net/netisr.h> 60264243Sdteske 61264243Sdteske#include <netinet/in.h> 62264243Sdteske#include <netinet/in_systm.h> 63264243Sdteske#include <netinet/in_var.h> 64264243Sdteske#include <netinet/ip.h> 65264243Sdteske#include <netinet/in_pcb.h> 66264243Sdteske#include <netinet/ip_var.h> 67264243Sdteske#include <netinet/ip_icmp.h> 68264243Sdteske#include <netinet/ip_options.h> 69264243Sdteske#include <machine/in_cksum.h> 70264243Sdteske#ifdef DEV_CARP 71264243Sdteske#include <netinet/ip_carp.h> 72272393Shrs#endif 73272393Shrs#if defined(IPSEC) || defined(FAST_IPSEC) 74272393Shrs#include <netinet/ip_ipsec.h> 75272393Shrs#endif /* IPSEC */ 76272393Shrs 77272393Shrs#include <sys/socketvar.h> 78272393Shrs 79272393Shrs/* XXX: Temporary until ipfw_ether and ipfw_bridge are converted. */ 80272393Shrs#include <netinet/ip_fw.h> 81272393Shrs#include <netinet/ip_dummynet.h> 82272393Shrs 83272393Shrs#include <security/mac/mac_framework.h> 84272393Shrs 85272393Shrsint rsvp_on = 0; 86272393Shrs 87272393Shrsint ipforwarding = 0; 88272393ShrsSYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, 89272393Shrs &ipforwarding, 0, "Enable IP forwarding between interfaces"); 90272393Shrs 91272393Shrsstatic int ipsendredirects = 1; /* XXX */ 92272393ShrsSYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, 93272393Shrs &ipsendredirects, 0, "Enable sending IP redirects"); 94272393Shrs 95272393Shrsint ip_defttl = IPDEFTTL; 96272393ShrsSYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW, 97272393Shrs &ip_defttl, 0, "Maximum TTL on IP packets"); 98272393Shrs 99272393Shrsstatic int ip_keepfaith = 0; 100272393ShrsSYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, 101272393Shrs &ip_keepfaith, 0, 102272393Shrs "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); 103272393Shrs 104272393Shrsstatic int ip_sendsourcequench = 0; 105197144ShrsSYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, 106197144Shrs &ip_sendsourcequench, 0, 107197144Shrs "Enable the transmission of source quench packets"); 10898186Sgordon 109197144Shrsint ip_do_randomid = 0; 110197144ShrsSYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, 111197144Shrs &ip_do_randomid, 0, 112197144Shrs "Assign random ip_id values"); 113272393Shrs 114197144Shrs/* 115197144Shrs * XXX - Setting ip_checkinterface mostly implements the receive side of 116197144Shrs * the Strong ES model described in RFC 1122, but since the routing table 117197144Shrs * and transmit implementation do not implement the Strong ES model, 118197144Shrs * setting this to 1 results in an odd hybrid. 119197144Shrs * 120197144Shrs * XXX - ip_checkinterface currently must be disabled if you use ipnat 121197144Shrs * to translate the destination address to another local interface. 122231667Sdougb * 12398186Sgordon * XXX - ip_checkinterface must be disabled if you add IP aliases 124231667Sdougb * to the loopback interface instead of the interface where the 12598186Sgordon * packets for those addresses are received. 126231667Sdougb */ 12798186Sgordonstatic int ip_checkinterface = 0; 12898186SgordonSYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, 12998186Sgordon &ip_checkinterface, 0, "Verify packet arrives on correct interface"); 130231667Sdougb 131231667Sdougbstruct pfil_head inet_pfil_hook; /* Packet filter hooks */ 13298186Sgordon 133231667Sdougbstatic struct ifqueue ipintrq; 13498186Sgordonstatic int ipqmaxlen = IFQ_MAXLEN; 135231667Sdougb 136231667Sdougbextern struct domain inetdomain; 137231667Sdougbextern struct protosw inetsw[]; 138231667Sdougbu_char ip_protox[IPPROTO_MAX]; 139231667Sdougbstruct in_ifaddrhead in_ifaddrhead; /* first inet address */ 14098186Sgordonstruct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ 141146490Sschweikhu_long in_ifaddrhmask; /* mask for hash table */ 14298186Sgordon 14398186SgordonSYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, 14498186Sgordon &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); 14598186SgordonSYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD, 14698186Sgordon &ipintrq.ifq_drops, 0, 14798186Sgordon "Number of packets dropped from the IP input queue"); 14878344Sobrien 14978344Sobrienstruct ipstat ipstat; 15078344SobrienSYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, 15178344Sobrien &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); 15278344Sobrien 15378344Sobrien/* 15478344Sobrien * IP datagram reassembly. 15598186Sgordon */ 15678344Sobrien#define IPREASS_NHASH_LOG2 6 15778344Sobrien#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) 15878344Sobrien#define IPREASS_HMASK (IPREASS_NHASH - 1) 15978344Sobrien#define IPREASS_HASH(x,y) \ 16078344Sobrien (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) 16178344Sobrien 16278344Sobrienstatic uma_zone_t ipq_zone; 16378344Sobrienstatic TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH]; 16478344Sobrienstatic struct mtx ipqlock; 16578344Sobrien 16678344Sobrien#define IPQ_LOCK() mtx_lock(&ipqlock) 16778344Sobrien#define IPQ_UNLOCK() mtx_unlock(&ipqlock) 168229822Sdougb#define IPQ_LOCK_INIT() mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF) 16978344Sobrien#define IPQ_LOCK_ASSERT() mtx_assert(&ipqlock, MA_OWNED) 17078344Sobrien 17178344Sobrienstatic void maxnipq_update(void); 17278344Sobrienstatic void ipq_zone_change(void *); 17378344Sobrien 174157473Sflzstatic int maxnipq; /* Administrative limit on # reass queues. */ 17598186Sgordonstatic int nipq = 0; /* Total # of reass queues */ 17698186SgordonSYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, 17778344Sobrien &nipq, 0, "Current number of IPv4 fragment reassembly queue entries"); 17898186Sgordon 17998186Sgordonstatic int maxfragsperpacket; 18098186SgordonSYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, 181126286Smtm &maxfragsperpacket, 0, 18298186Sgordon "Maximum number of IPv4 fragments allowed per packet"); 18398186Sgordon 18498186Sgordonstruct callout ipport_tick_callout; 18598186Sgordon 18698186Sgordon#ifdef IPCTL_DEFMTU 187169668SmtmSYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 188169668Smtm &ip_mtu, 0, "Default MTU"); 189169668Smtm#endif 190169668Smtm 19178344Sobrien#ifdef IPSTEALTH 192169668Smtmint ipstealth = 0; 193169668SmtmSYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, 194169668Smtm &ipstealth, 0, "IP stealth mode, no TTL decrementation on forwarding"); 195169668Smtm#endif 196178776Smaxim 197178776Smaxim/* 198178770Smtm * ipfw_ether and ipfw_bridge hooks. 199169668Smtm * XXX: Temporary until those are converted to pfil_hooks as well. 200178770Smtm */ 201178770Smtmip_fw_chk_t *ip_fw_chk_ptr = NULL; 202169668Smtmip_dn_io_t *ip_dn_io_ptr = NULL; 203178770Smtmint fw_one_pass = 1; 204178775Smaxim 205169668Smtmstatic void ip_freef(struct ipqhead *, struct ipq *); 206169668Smtm 207169668Smtm/* 208169668Smtm * IP initialization: fill in IP protocol switch table. 209169668Smtm * All protocols not implemented in kernel go to raw IP protocol handler. 210169668Smtm */ 211169668Smtmvoid 212169668Smtmip_init() 21398186Sgordon{ 21498186Sgordon register struct protosw *pr; 21598186Sgordon register int i; 21698186Sgordon 21798186Sgordon TAILQ_INIT(&in_ifaddrhead); 21878344Sobrien in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask); 21978344Sobrien pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 22098186Sgordon if (pr == NULL) 22178344Sobrien panic("ip_init: PF_INET not found"); 22278344Sobrien 223126285Smtm /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 22478344Sobrien for (i = 0; i < IPPROTO_MAX; i++) 22578344Sobrien ip_protox[i] = pr - inetsw; 226126285Smtm /* 22778344Sobrien * Cycle through IP protocols and put them into the appropriate place 22878344Sobrien * in ip_protox[]. 229126285Smtm */ 230126285Smtm for (pr = inetdomain.dom_protosw; 231126285Smtm pr < inetdomain.dom_protoswNPROTOSW; pr++) 23278344Sobrien if (pr->pr_domain->dom_family == PF_INET && 23378344Sobrien pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 23498186Sgordon /* Be careful to only index valid IP protocols. */ 23578344Sobrien if (pr->pr_protocol < IPPROTO_MAX) 23678344Sobrien ip_protox[pr->pr_protocol] = pr - inetsw; 23778344Sobrien } 23878344Sobrien 23998186Sgordon /* Initialize packet filter hooks. */ 24098186Sgordon inet_pfil_hook.ph_type = PFIL_TYPE_AF; 24178344Sobrien inet_pfil_hook.ph_af = AF_INET; 24298186Sgordon if ((i = pfil_head_register(&inet_pfil_hook)) != 0) 24398186Sgordon printf("%s: WARNING: unable to register pfil hook, " 24478344Sobrien "error %d\n", __func__, i); 24578344Sobrien 24678344Sobrien /* Initialize IP reassembly queue. */ 24778344Sobrien IPQ_LOCK_INIT(); 24878344Sobrien for (i = 0; i < IPREASS_NHASH; i++) 24998186Sgordon TAILQ_INIT(&ipq[i]); 25078344Sobrien maxnipq = nmbclusters / 32; 25198186Sgordon maxfragsperpacket = 16; 25278344Sobrien ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL, 25378344Sobrien NULL, UMA_ALIGN_PTR, 0); 254131061Smtm maxnipq_update(); 25578344Sobrien 25678344Sobrien /* Start ipport_tick. */ 25778344Sobrien callout_init(&ipport_tick_callout, CALLOUT_MPSAFE); 25878344Sobrien ipport_tick(NULL); 259139949Skeramida EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, 26078344Sobrien SHUTDOWN_PRI_DEFAULT); 26178344Sobrien EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change, 26298186Sgordon NULL, EVENTHANDLER_PRI_ANY); 26378344Sobrien 26478344Sobrien /* Initialize various other remaining things. */ 26578344Sobrien ip_id = time_second & 0xffff; 26698186Sgordon ipintrq.ifq_maxlen = ipqmaxlen; 26778344Sobrien mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF); 26898186Sgordon netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE); 26998186Sgordon} 27078344Sobrien 27178344Sobrienvoid ip_fini(xtp) 27278344Sobrien void *xtp; 27378344Sobrien{ 27498186Sgordon callout_stop(&ipport_tick_callout); 27578344Sobrien} 27698186Sgordon 27778344Sobrien/* 27898186Sgordon * Ip input routine. Checksum and byte swap header. If fragmented 27998186Sgordon * try to reassemble. Process options. Pass to next level. 28098186Sgordon */ 28198186Sgordonvoid 28298186Sgordonip_input(struct mbuf *m) 28398186Sgordon{ 28498186Sgordon struct ip *ip = NULL; 28598186Sgordon struct in_ifaddr *ia = NULL; 28698186Sgordon struct ifaddr *ifa; 28798186Sgordon int checkif, hlen = 0; 28898186Sgordon u_short sum; 28998186Sgordon int dchg = 0; /* dest changed after fw */ 29098186Sgordon struct in_addr odst; /* original dst address */ 29198186Sgordon 292155719Sceri M_ASSERTPKTHDR(m); 29398186Sgordon 29498186Sgordon if (m->m_flags & M_FASTFWD_OURS) { 29598186Sgordon /* 29698186Sgordon * Firewall or NAT changed destination to local. 297157841Sflz * We expect ip_len and ip_off to be in host byte order. 298157841Sflz */ 299157841Sflz m->m_flags &= ~M_FASTFWD_OURS; 30098186Sgordon /* Set up some basics that will be used later. */ 30198186Sgordon ip = mtod(m, struct ip *); 30298186Sgordon hlen = ip->ip_hl << 2; 30398186Sgordon goto ours; 30498186Sgordon } 30598186Sgordon 30698186Sgordon ipstat.ips_total++; 30798186Sgordon 30898186Sgordon if (m->m_pkthdr.len < sizeof(struct ip)) 30998186Sgordon goto tooshort; 31078344Sobrien 31198186Sgordon if (m->m_len < sizeof (struct ip) && 312242183Screes (m = m_pullup(m, sizeof (struct ip))) == NULL) { 313242183Screes ipstat.ips_toosmall++; 314170282Syar return; 315170282Syar } 316170282Syar ip = mtod(m, struct ip *); 317170282Syar 318170282Syar if (ip->ip_v != IPVERSION) { 319170282Syar ipstat.ips_badvers++; 320170282Syar goto bad; 321170282Syar } 322170282Syar 323170282Syar hlen = ip->ip_hl << 2; 324170282Syar if (hlen < sizeof(struct ip)) { /* minimum header length */ 325170282Syar ipstat.ips_badhlen++; 326170282Syar goto bad; 327170282Syar } 328170282Syar if (hlen > m->m_len) { 329170282Syar if ((m = m_pullup(m, hlen)) == NULL) { 330170282Syar ipstat.ips_badhlen++; 331170282Syar return; 332170282Syar } 333170282Syar ip = mtod(m, struct ip *); 334170282Syar } 335170282Syar 33678344Sobrien /* 127/8 must not appear on wire - RFC1122 */ 33798186Sgordon if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 338157841Sflz (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 33998186Sgordon if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { 34098186Sgordon ipstat.ips_badaddr++; 341245250Ssmh goto bad; 34298186Sgordon } 34398186Sgordon } 34498186Sgordon 34598186Sgordon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 346151426Sjhb sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 34798186Sgordon } else { 34898186Sgordon if (hlen == sizeof(struct ip)) { 349161435Syar sum = in_cksum_hdr(ip); 350161436Syar } else { 351157657Sflz sum = in_cksum(m, hlen); 352161436Syar } 353157657Sflz } 354157657Sflz if (sum) { 355157657Sflz ipstat.ips_badsum++; 356157657Sflz goto bad; 35798186Sgordon } 35898186Sgordon 35998186Sgordon#ifdef ALTQ 36098186Sgordon if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 361114272Smtm /* packet is dropped by traffic conditioner */ 36298186Sgordon return; 36398186Sgordon#endif 36498186Sgordon 365264243Sdteske /* 366264243Sdteske * Convert fields to host representation. 367264243Sdteske */ 368264243Sdteske ip->ip_len = ntohs(ip->ip_len); 36998186Sgordon if (ip->ip_len < hlen) { 370264243Sdteske ipstat.ips_badlen++; 371264243Sdteske goto bad; 372264243Sdteske } 373264243Sdteske ip->ip_off = ntohs(ip->ip_off); 374264243Sdteske 375264243Sdteske /* 376264243Sdteske * Check that the amount of data in the buffers 377264243Sdteske * is as at least much as the IP header would have us expect. 378264243Sdteske * Trim mbufs if longer than we expect. 379264243Sdteske * Drop packet if shorter than we expect. 380264243Sdteske */ 381264243Sdteske if (m->m_pkthdr.len < ip->ip_len) { 382264243Sdtesketooshort: 383264243Sdteske ipstat.ips_tooshort++; 384264243Sdteske goto bad; 385264243Sdteske } 386264243Sdteske if (m->m_pkthdr.len > ip->ip_len) { 387264243Sdteske if (m->m_len == m->m_pkthdr.len) { 388264243Sdteske m->m_len = ip->ip_len; 389264243Sdteske m->m_pkthdr.len = ip->ip_len; 390264243Sdteske } else 391264243Sdteske m_adj(m, ip->ip_len - m->m_pkthdr.len); 392264243Sdteske } 393264243Sdteske#if defined(IPSEC) || defined(FAST_IPSEC) 394264243Sdteske /* 395264243Sdteske * Bypass packet filtering for packets from a tunnel (gif). 396264243Sdteske */ 397264243Sdteske if (ip_ipsec_filtergif(m)) 398264243Sdteske goto passin; 399264243Sdteske#endif /* IPSEC */ 400264243Sdteske 401264243Sdteske /* 402264243Sdteske * Run through list of hooks for input packets. 403264243Sdteske * 404264243Sdteske * NB: Beware of the destination address changing (e.g. 405264243Sdteske * by NAT rewriting). When this happens, tell 406264243Sdteske * ip_forward to do the right thing. 407264243Sdteske */ 408264243Sdteske 409264243Sdteske /* Jump over all PFIL processing if hooks are not active. */ 410264243Sdteske if (!PFIL_HOOKED(&inet_pfil_hook)) 411264243Sdteske goto passin; 412264243Sdteske 413264243Sdteske odst = ip->ip_dst; 414264243Sdteske if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, 415264243Sdteske PFIL_IN, NULL) != 0) 416264243Sdteske return; 417264243Sdteske if (m == NULL) /* consumed by filter */ 418264243Sdteske return; 419264243Sdteske 420264243Sdteske ip = mtod(m, struct ip *); 421264243Sdteske dchg = (odst.s_addr != ip->ip_dst.s_addr); 422264243Sdteske 423264243Sdteske#ifdef IPFIREWALL_FORWARD 424264243Sdteske if (m->m_flags & M_FASTFWD_OURS) { 425264243Sdteske m->m_flags &= ~M_FASTFWD_OURS; 426264243Sdteske goto ours; 427264243Sdteske } 428264243Sdteske if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) { 429264243Sdteske /* 430264243Sdteske * Directly ship on the packet. This allows to forward packets 431264243Sdteske * that were destined for us to some other directly connected 432264243Sdteske * host. 433264243Sdteske */ 434264243Sdteske ip_forward(m, dchg); 435264243Sdteske return; 436264243Sdteske } 437264243Sdteske#endif /* IPFIREWALL_FORWARD */ 438264243Sdteske 439264243Sdteskepassin: 440264243Sdteske /* 441264243Sdteske * Process options and, if not destined for us, 442264243Sdteske * ship it on. ip_dooptions returns 1 when an 443264243Sdteske * error was detected (causing an icmp message 444264243Sdteske * to be sent and the original packet to be freed). 445264243Sdteske */ 446264243Sdteske if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 447264243Sdteske return; 448264243Sdteske 449264243Sdteske /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 450264243Sdteske * matter if it is destined to another node, or whether it is 451264243Sdteske * a multicast one, RSVP wants it! and prevents it from being forwarded 452264243Sdteske * anywhere else. Also checks if the rsvp daemon is running before 453264243Sdteske * grabbing the packet. 454264243Sdteske */ 455264243Sdteske if (rsvp_on && ip->ip_p==IPPROTO_RSVP) 456264243Sdteske goto ours; 457264243Sdteske 458264243Sdteske /* 459264243Sdteske * Check our list of addresses, to see if the packet is for us. 460264243Sdteske * If we don't have any addresses, assume any unicast packet 461264243Sdteske * we receive might be for us (and let the upper layers deal 462264243Sdteske * with it). 463264243Sdteske */ 464264243Sdteske if (TAILQ_EMPTY(&in_ifaddrhead) && 465264243Sdteske (m->m_flags & (M_MCAST|M_BCAST)) == 0) 466264243Sdteske goto ours; 467264243Sdteske 468264243Sdteske /* 469264243Sdteske * Enable a consistency check between the destination address 470264243Sdteske * and the arrival interface for a unicast packet (the RFC 1122 471264243Sdteske * strong ES model) if IP forwarding is disabled and the packet 472264243Sdteske * is not locally generated and the packet is not subject to 473264243Sdteske * 'ipfw fwd'. 474264243Sdteske * 475264243Sdteske * XXX - Checking also should be disabled if the destination 476264243Sdteske * address is ipnat'ed to a different interface. 477264243Sdteske * 478264243Sdteske * XXX - Checking is incompatible with IP aliases added 479264243Sdteske * to the loopback interface instead of the interface where 480264243Sdteske * the packets are received. 481264243Sdteske * 482264243Sdteske * XXX - This is the case for carp vhost IPs as well so we 483264243Sdteske * insert a workaround. If the packet got here, we already 484264243Sdteske * checked with carp_iamatch() and carp_forus(). 485264243Sdteske */ 486264243Sdteske checkif = ip_checkinterface && (ipforwarding == 0) && 487264243Sdteske m->m_pkthdr.rcvif != NULL && 488264243Sdteske ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) && 489264243Sdteske#ifdef DEV_CARP 490264243Sdteske !m->m_pkthdr.rcvif->if_carp && 491264243Sdteske#endif 492264243Sdteske (dchg == 0); 493264243Sdteske 494264243Sdteske /* 495264243Sdteske * Check for exact addresses in the hash bucket. 496264243Sdteske */ 497264243Sdteske LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 498264243Sdteske /* 499264243Sdteske * If the address matches, verify that the packet 500264243Sdteske * arrived via the correct interface if checking is 501264243Sdteske * enabled. 502264243Sdteske */ 503264243Sdteske if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 504264243Sdteske (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) 505264243Sdteske goto ours; 506264243Sdteske } 507264243Sdteske /* 508264243Sdteske * Check for broadcast addresses. 509264243Sdteske * 510264243Sdteske * Only accept broadcast packets that arrive via the matching 511264243Sdteske * interface. Reception of forwarded directed broadcasts would 512264243Sdteske * be handled via ip_forward() and ether_output() with the loopback 513264243Sdteske * into the stack for SIMPLEX interfaces handled by ether_output(). 514264243Sdteske */ 515264243Sdteske if (m->m_pkthdr.rcvif != NULL && 516264243Sdteske m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { 517264243Sdteske TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { 518264243Sdteske if (ifa->ifa_addr->sa_family != AF_INET) 519264243Sdteske continue; 520264243Sdteske ia = ifatoia(ifa); 521264243Sdteske if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 522264243Sdteske ip->ip_dst.s_addr) 523264243Sdteske goto ours; 524264243Sdteske if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) 525264243Sdteske goto ours; 526264243Sdteske#ifdef BOOTP_COMPAT 527264243Sdteske if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) 528264243Sdteske goto ours; 529264243Sdteske#endif 530264243Sdteske } 531264243Sdteske } 532264243Sdteske /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 533264243Sdteske if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 534264243Sdteske ipstat.ips_cantforward++; 535264243Sdteske m_freem(m); 536264243Sdteske return; 537264243Sdteske } 538264243Sdteske if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 539264243Sdteske struct in_multi *inm; 540264243Sdteske if (ip_mrouter) { 541264243Sdteske /* 542264243Sdteske * If we are acting as a multicast router, all 543264243Sdteske * incoming multicast packets are passed to the 544264243Sdteske * kernel-level multicast forwarding function. 545264243Sdteske * The packet is returned (relatively) intact; if 546264243Sdteske * ip_mforward() returns a non-zero value, the packet 547264243Sdteske * must be discarded, else it may be accepted below. 548264243Sdteske */ 549264243Sdteske if (ip_mforward && 550264243Sdteske ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { 551264243Sdteske ipstat.ips_cantforward++; 552264243Sdteske m_freem(m); 553264243Sdteske return; 554264243Sdteske } 555264243Sdteske 556264243Sdteske /* 557264243Sdteske * The process-level routing daemon needs to receive 558264243Sdteske * all multicast IGMP packets, whether or not this 559264243Sdteske * host belongs to their destination groups. 560264243Sdteske */ 561264243Sdteske if (ip->ip_p == IPPROTO_IGMP) 562264243Sdteske goto ours; 563264243Sdteske ipstat.ips_forward++; 564264243Sdteske } 565264243Sdteske /* 566264243Sdteske * See if we belong to the destination multicast group on the 567264243Sdteske * arrival interface. 568264243Sdteske */ 569264243Sdteske IN_MULTI_LOCK(); 570264243Sdteske IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); 571264243Sdteske IN_MULTI_UNLOCK(); 572264243Sdteske if (inm == NULL) { 573264243Sdteske ipstat.ips_notmember++; 574264243Sdteske m_freem(m); 575264243Sdteske return; 576264243Sdteske } 577264243Sdteske goto ours; 578264243Sdteske } 579264243Sdteske if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 580264243Sdteske goto ours; 581264243Sdteske if (ip->ip_dst.s_addr == INADDR_ANY) 582264243Sdteske goto ours; 583264243Sdteske 584264243Sdteske /* 585264243Sdteske * FAITH(Firewall Aided Internet Translator) 586264243Sdteske */ 587264243Sdteske if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { 588264243Sdteske if (ip_keepfaith) { 589264243Sdteske if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 590264243Sdteske goto ours; 591264243Sdteske } 592264243Sdteske m_freem(m); 593264243Sdteske return; 594264243Sdteske } 595264243Sdteske 596264243Sdteske /* 597264243Sdteske * Not for us; forward if possible and desirable. 598264243Sdteske */ 599264243Sdteske if (ipforwarding == 0) { 600264243Sdteske ipstat.ips_cantforward++; 601264243Sdteske m_freem(m); 602264243Sdteske } else { 603264243Sdteske#if defined(IPSEC) || defined(FAST_IPSEC) 604264243Sdteske if (ip_ipsec_fwd(m)) 605264243Sdteske goto bad; 60698186Sgordon#endif /* IPSEC */ 60798186Sgordon ip_forward(m, dchg); 60898186Sgordon } 60998186Sgordon return; 61098186Sgordon 611206248Sdougbours: 612206248Sdougb#ifdef IPSTEALTH 613126286Smtm /* 61498186Sgordon * IPSTEALTH: Process non-routing options only 61598186Sgordon * if the packet is destined for us. 61698186Sgordon */ 61798186Sgordon if (ipstealth && hlen > sizeof (struct ip) && 61898186Sgordon ip_dooptions(m, 1)) 61998186Sgordon return; 62098186Sgordon#endif /* IPSTEALTH */ 62198186Sgordon 62298186Sgordon /* Count the packet in the ip address stats */ 623206248Sdougb if (ia != NULL) { 62498186Sgordon ia->ia_ifa.if_ipackets++; 62598186Sgordon ia->ia_ifa.if_ibytes += m->m_pkthdr.len; 62698186Sgordon } 62798186Sgordon 62878344Sobrien /* 62998186Sgordon * Attempt reassembly; if it succeeds, proceed. 63098186Sgordon * ip_reass() will return a different mbuf. 63198186Sgordon */ 632206248Sdougb if (ip->ip_off & (IP_MF | IP_OFFMASK)) { 63378344Sobrien m = ip_reass(m); 63498186Sgordon if (m == NULL) 63598186Sgordon return; 63698186Sgordon ip = mtod(m, struct ip *); 63778344Sobrien /* Get the header length of the reassembled packet */ 63878344Sobrien hlen = ip->ip_hl << 2; 63978344Sobrien } 640220962Sdougb 641220962Sdougb /* 642220962Sdougb * Further protocols expect the packet length to be w/o the 643220962Sdougb * IP header. 644220962Sdougb */ 645220962Sdougb ip->ip_len -= hlen; 646220962Sdougb 647220962Sdougb#if defined(IPSEC) || defined(FAST_IPSEC) 648220962Sdougb /* 649220962Sdougb * enforce IPsec policy checking if we are seeing last header. 650220962Sdougb * note that we do not visit this with protocols with pcb layer 651220962Sdougb * code - like udp/tcp/raw ip. 652220962Sdougb */ 653220962Sdougb if (ip_ipsec_input(m)) 654220962Sdougb goto bad; 655220963Sdougb#endif /* IPSEC */ 656220963Sdougb 657220963Sdougb /* 658220963Sdougb * Switch out to protocol's input routine. 659220962Sdougb */ 660220962Sdougb ipstat.ips_delivered++; 661220962Sdougb 662220962Sdougb (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); 663220963Sdougb return; 664220963Sdougbbad: 665220962Sdougb m_freem(m); 666220962Sdougb} 667220962Sdougb 668220962Sdougb/* 669220962Sdougb * After maxnipq has been updated, propagate the change to UMA. The UMA zone 670220962Sdougb * max has slightly different semantics than the sysctl, for historical 671220962Sdougb * reasons. 672220962Sdougb */ 673220962Sdougbstatic void 674220962Sdougbmaxnipq_update(void) 675220962Sdougb{ 676220962Sdougb 677220962Sdougb /* 678220962Sdougb * -1 for unlimited allocation. 679220962Sdougb */ 680220962Sdougb if (maxnipq < 0) 681220962Sdougb uma_zone_set_max(ipq_zone, 0); 682220962Sdougb /* 683197947Sdougb * Positive number for specific bound. 684197947Sdougb */ 685197947Sdougb if (maxnipq > 0) 686197947Sdougb uma_zone_set_max(ipq_zone, maxnipq); 687197947Sdougb /* 688197947Sdougb * Zero specifies no further fragment queue allocation -- set the 689197947Sdougb * bound very low, but rely on implementation elsewhere to actually 690197947Sdougb * prevent allocation and reclaim current queues. 691197947Sdougb */ 692197947Sdougb if (maxnipq == 0) 693197947Sdougb uma_zone_set_max(ipq_zone, 1); 694197947Sdougb} 695197947Sdougb 696197947Sdougbstatic void 69798186Sgordonipq_zone_change(void *tag) 69898186Sgordon{ 69998186Sgordon 70098186Sgordon if (maxnipq > 0 && maxnipq < (nmbclusters / 32)) { 70198186Sgordon maxnipq = nmbclusters / 32; 70278344Sobrien maxnipq_update(); 70398186Sgordon } 70498186Sgordon} 70578344Sobrien 706175676Smtmstatic int 70798186Sgordonsysctl_maxnipq(SYSCTL_HANDLER_ARGS) 708126303Smtm{ 709175676Smtm int error, i; 71078344Sobrien 71178344Sobrien i = maxnipq; 71278344Sobrien error = sysctl_handle_int(oidp, &i, 0, req); 71398186Sgordon if (error || !req->newptr) 71498186Sgordon return (error); 71578344Sobrien 71678344Sobrien /* 71778344Sobrien * XXXRW: Might be a good idea to sanity check the argument and place 71898186Sgordon * an extreme upper bound. 71978344Sobrien */ 72078344Sobrien if (i < -1) 72178344Sobrien return (EINVAL); 72278344Sobrien maxnipq = i; 72398186Sgordon maxnipq_update(); 72498186Sgordon return (0); 72598186Sgordon} 726197144Shrs 727197144ShrsSYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW, 72878344Sobrien NULL, 0, sysctl_maxnipq, "I", 72978344Sobrien "Maximum number of IPv4 fragment reassembly queue entries"); 73098186Sgordon 73198186Sgordon/* 73298186Sgordon * Take incoming datagram fragment and try to reassemble it into 73378344Sobrien * whole datagram. If the argument is the first fragment or one 73498186Sgordon * in between the function will return NULL and store the mbuf 73598186Sgordon * in the fragment chain. If the argument is the last fragment 73678344Sobrien * the packet will be reassembled and the pointer to the new 73778344Sobrien * mbuf returned for further processing. Only m_tags attached 73878344Sobrien * to the first packet/fragment are preserved. 739157653Sflz * The IP header is *NOT* adjusted out of iplen. 740157653Sflz */ 741157653Sflz 742157653Sflzstruct mbuf * 74378344Sobrienip_reass(struct mbuf *m) 74498186Sgordon{ 74578344Sobrien struct ip *ip; 74678344Sobrien struct mbuf *p, *q, *nq, *t; 74778344Sobrien struct ipq *fp = NULL; 74878344Sobrien struct ipqhead *head; 74978344Sobrien int i, hlen, next; 75078344Sobrien u_int8_t ecn, ecn0; 75178344Sobrien u_short hash; 75278344Sobrien 753272974Shrs /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */ 754272974Shrs if (maxnipq == 0 || maxfragsperpacket == 0) { 755242184Shrs ipstat.ips_fragments++; 756242184Shrs ipstat.ips_fragdropped++; 75778344Sobrien m_freem(m); 75878344Sobrien return (NULL); 759295949Saraujo } 760295949Saraujo 76178344Sobrien ip = mtod(m, struct ip *); 76278344Sobrien hlen = ip->ip_hl << 2; 76398186Sgordon 76478344Sobrien hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 76578344Sobrien head = &ipq[hash]; 76698186Sgordon IPQ_LOCK(); 76778344Sobrien 76898186Sgordon /* 76998186Sgordon * Look for queue of fragments 77098186Sgordon * of this datagram. 77178344Sobrien */ 772272974Shrs TAILQ_FOREACH(fp, head, ipq_list) 773272974Shrs if (ip->ip_id == fp->ipq_id && 774288291Sadrian ip->ip_src.s_addr == fp->ipq_src.s_addr && 775288291Sadrian ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 77698186Sgordon#ifdef MAC 77778344Sobrien mac_fragment_match(m, fp) && 77878344Sobrien#endif 77998186Sgordon ip->ip_p == fp->ipq_p) 78098186Sgordon goto found; 78198186Sgordon 78298186Sgordon fp = NULL; 78378344Sobrien 78498186Sgordon /* 78578344Sobrien * Attempt to trim the number of allocated fragment queues if it 78698186Sgordon * exceeds the administrative limit. 78798186Sgordon */ 78898186Sgordon if ((nipq > maxnipq) && (maxnipq > 0)) { 78998186Sgordon /* 79078344Sobrien * drop something from the tail of the current queue 791165565Syar * before proceeding further 79278344Sobrien */ 79378344Sobrien struct ipq *q = TAILQ_LAST(head, ipqhead); 794165565Syar if (q == NULL) { /* gak */ 79578344Sobrien for (i = 0; i < IPREASS_NHASH; i++) { 796165565Syar struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead); 797165565Syar if (r) { 798165565Syar ipstat.ips_fragtimeout += r->ipq_nfrags; 799165565Syar ip_freef(&ipq[i], r); 800165565Syar break; 801165565Syar } 802165565Syar } 803165565Syar } else { 804165565Syar ipstat.ips_fragtimeout += q->ipq_nfrags; 805165565Syar ip_freef(head, q); 806165565Syar } 807165565Syar } 808165565Syar 809165565Syarfound: 810165565Syar /* 81178344Sobrien * Adjust ip_len to not reflect header, 81278344Sobrien * convert offset of this to bytes. 81378344Sobrien */ 81478344Sobrien ip->ip_len -= hlen; 81598186Sgordon if (ip->ip_off & IP_MF) { 81698186Sgordon /* 81778344Sobrien * Make sure that fragments have a data length 81898186Sgordon * that's a non-zero multiple of 8 bytes. 81998186Sgordon */ 82078344Sobrien if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { 82178344Sobrien ipstat.ips_toosmall++; /* XXX */ 82278344Sobrien goto dropfrag; 82378344Sobrien } 82498186Sgordon m->m_flags |= M_FRAG; 82578344Sobrien } else 82698186Sgordon m->m_flags &= ~M_FRAG; 82798186Sgordon ip->ip_off <<= 3; 82898186Sgordon 82998186Sgordon 83078344Sobrien /* 83198186Sgordon * Attempt reassembly; if it succeeds, proceed. 83298186Sgordon * ip_reass() will return a different mbuf. 83378344Sobrien */ 834151685Syar ipstat.ips_fragments++; 835151685Syar m->m_pkthdr.header = ip; 83678344Sobrien 83778344Sobrien /* Previous ip_reass() started here. */ 83878344Sobrien /* 83998186Sgordon * Presence of header sizes in mbufs 84078344Sobrien * would confuse code below. 84198186Sgordon */ 84298186Sgordon m->m_data += hlen; 84398186Sgordon m->m_len -= hlen; 84498186Sgordon 845255809Sdes /* 846255809Sdes * If first fragment to arrive, create a reassembly queue. 847298515Slme */ 848298515Slme if (fp == NULL) { 849298515Slme fp = uma_zalloc(ipq_zone, M_NOWAIT); 850298515Slme if (fp == NULL) 85198186Sgordon goto dropfrag; 85298186Sgordon#ifdef MAC 85398186Sgordon if (mac_init_ipq(fp, M_NOWAIT) != 0) { 85498186Sgordon uma_zfree(ipq_zone, fp); 85598186Sgordon fp = NULL; 856126303Smtm goto dropfrag; 85798186Sgordon } 85898186Sgordon mac_create_ipq(m, fp); 85998186Sgordon#endif 86098186Sgordon TAILQ_INSERT_HEAD(head, fp, ipq_list); 86198186Sgordon nipq++; 86298186Sgordon fp->ipq_nfrags = 1; 86398186Sgordon fp->ipq_ttl = IPFRAGTTL; 86498186Sgordon fp->ipq_p = ip->ip_p; 86598186Sgordon fp->ipq_id = ip->ip_id; 86698186Sgordon fp->ipq_src = ip->ip_src; 86798186Sgordon fp->ipq_dst = ip->ip_dst; 86898186Sgordon fp->ipq_frags = m; 86998186Sgordon m->m_nextpkt = NULL; 870175676Smtm goto done; 87198186Sgordon } else { 872175676Smtm fp->ipq_nfrags++; 87378344Sobrien#ifdef MAC 87478344Sobrien mac_update_ipq(m, fp); 875116097Smtm#endif 87698186Sgordon } 87778344Sobrien 87898186Sgordon#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) 87978344Sobrien 88078344Sobrien /* 881132892Smtm * Handle ECN by comparing this segment with the first one; 882132892Smtm * if CE is set, do not lose CE. 883132892Smtm * drop if CE and not-ECT are mixed for the same packet. 884132892Smtm */ 885132892Smtm ecn = ip->ip_tos & IPTOS_ECN_MASK; 886132892Smtm ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK; 887126303Smtm if (ecn == IPTOS_ECN_CE) { 88898186Sgordon if (ecn0 == IPTOS_ECN_NOTECT) 88978344Sobrien goto dropfrag; 89098186Sgordon if (ecn0 != IPTOS_ECN_CE) 89198186Sgordon GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE; 892175676Smtm } 89378344Sobrien if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) 894198216Sed goto dropfrag; 89598186Sgordon 896126303Smtm /* 897126303Smtm * Find a segment which begins after this one does. 89878344Sobrien */ 89978344Sobrien for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) 90078344Sobrien if (GETIP(q)->ip_off > ip->ip_off) 90178344Sobrien break; 902126303Smtm 903126303Smtm /* 904126303Smtm * If there is a preceding segment, it may provide some of 905126303Smtm * our data already. If so, drop the data from the incoming 906126303Smtm * segment. If it provides all of our data, drop us, otherwise 907126303Smtm * stick new segment in the proper place. 908126303Smtm * 909175676Smtm * If some of the data is dropped from the the preceding 910175676Smtm * segment, then it's checksum is invalidated. 911175676Smtm */ 912175676Smtm if (p) { 913175676Smtm i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; 91478344Sobrien if (i > 0) { 91578344Sobrien if (i >= ip->ip_len) 916161530Sflz goto dropfrag; 917198162Sdougb m_adj(m, i); 918161530Sflz m->m_pkthdr.csum_flags = 0; 919298515Slme ip->ip_off += i; 92098186Sgordon ip->ip_len -= i; 92178344Sobrien } 92298186Sgordon m->m_nextpkt = p->m_nextpkt; 92398186Sgordon p->m_nextpkt = m; 924131135Smtm } else { 925131135Smtm m->m_nextpkt = fp->ipq_frags; 92678344Sobrien fp->ipq_frags = m; 92798186Sgordon } 92898186Sgordon 92998186Sgordon /* 93078344Sobrien * While we overlap succeeding segments trim them or, 93178344Sobrien * if they are completely covered, dequeue them. 93298186Sgordon */ 93378344Sobrien for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; 93478344Sobrien q = nq) { 93578344Sobrien i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; 93698186Sgordon if (i < GETIP(q)->ip_len) { 937150796Syar GETIP(q)->ip_len -= i; 93878344Sobrien GETIP(q)->ip_off += i; 93978344Sobrien m_adj(q, i); 940255809Sdes q->m_pkthdr.csum_flags = 0; 941255809Sdes break; 942255809Sdes } 943255809Sdes nq = q->m_nextpkt; 944255809Sdes m->m_nextpkt = nq; 94578344Sobrien ipstat.ips_fragdropped++; 94698186Sgordon fp->ipq_nfrags--; 94778344Sobrien m_freem(q); 94898186Sgordon } 94978344Sobrien 95098186Sgordon /* 95198186Sgordon * Check for complete reassembly and perform frag per packet 952242184Shrs * limiting. 953272974Shrs * 954295949Saraujo * Frag limiting is performed here so that the nth frag has 955332363Skevans * a chance to complete the packet before we drop the packet. 95678344Sobrien * As a result, n+1 frags are actually allowed per packet, but 95798186Sgordon * only n will ever be stored. (n = maxfragsperpacket.) 958124832Smtm * 95998186Sgordon */ 96098186Sgordon next = 0; 96198186Sgordon for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { 96298186Sgordon if (GETIP(q)->ip_off != next) { 963230374Sdougb if (fp->ipq_nfrags > maxfragsperpacket) { 964179870Smtm ipstat.ips_fragdropped += fp->ipq_nfrags; 965179870Smtm ip_freef(head, fp); 966179870Smtm } 967179870Smtm goto done; 968179870Smtm } 969298515Slme next += GETIP(q)->ip_len; 970206686Sdougb } 97178344Sobrien /* Make sure the last packet didn't have the IP_MF flag */ 97278344Sobrien if (p->m_flags & M_FRAG) { 97378344Sobrien if (fp->ipq_nfrags > maxfragsperpacket) { 974298515Slme ipstat.ips_fragdropped += fp->ipq_nfrags; 975298515Slme ip_freef(head, fp); 976220760Sdougb } 977179870Smtm goto done; 978179870Smtm } 979179870Smtm 980179870Smtm /* 981179870Smtm * Reassembly is complete. Make sure the packet is a sane size. 982179870Smtm */ 983179870Smtm q = fp->ipq_frags; 984175676Smtm ip = GETIP(q); 985175676Smtm if (next + (ip->ip_hl << 2) > IP_MAXPACKET) { 98678344Sobrien ipstat.ips_toolong++; 98778344Sobrien ipstat.ips_fragdropped += fp->ipq_nfrags; 988291770Sjilles ip_freef(head, fp); 989291770Sjilles goto done; 990291770Sjilles } 991291770Sjilles 992291770Sjilles /* 993291770Sjilles * Concatenate fragments. 994291770Sjilles */ 995291770Sjilles m = q; 99678344Sobrien t = m->m_next; 99778344Sobrien m->m_next = NULL; 99878344Sobrien m_cat(m, t); 999165565Syar nq = q->m_nextpkt; 1000165565Syar q->m_nextpkt = NULL; 1001165565Syar for (q = nq; q != NULL; q = nq) { 1002165565Syar nq = q->m_nextpkt; 100378344Sobrien q->m_nextpkt = NULL; 1004165565Syar m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; 1005165565Syar m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; 1006165565Syar m_cat(m, q); 1007116097Smtm } 100878344Sobrien /* 100978344Sobrien * In order to do checksumming faster we do 'end-around carry' here 101098186Sgordon * (and not in for{} loop), though it implies we are not going to 101178344Sobrien * reassemble more than 64k fragments. 1012298515Slme */ 1013298515Slme m->m_pkthdr.csum_data = 1014298515Slme (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); 1015298515Slme#ifdef MAC 1016298515Slme mac_create_datagram_from_ipq(fp, m); 1017298515Slme mac_destroy_ipq(fp); 1018298515Slme#endif 1019298515Slme 1020298515Slme /* 1021298515Slme * Create header for new ip packet by modifying header of first 102278344Sobrien * packet; dequeue and discard fragment reassembly header. 1023165565Syar * Make header visible. 102498186Sgordon */ 102598186Sgordon ip->ip_len = (ip->ip_hl << 2) + next; 102678344Sobrien ip->ip_src = fp->ipq_src; 102778344Sobrien ip->ip_dst = fp->ipq_dst; 102878344Sobrien TAILQ_REMOVE(head, fp, ipq_list); 102978344Sobrien nipq--; 1030165565Syar uma_zfree(ipq_zone, fp); 103178344Sobrien m->m_len += (ip->ip_hl << 2); 103278344Sobrien m->m_data -= (ip->ip_hl << 2); 103378344Sobrien /* some debugging cruft by sklower, below, will go away soon */ 1034242183Screes if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ 1035160667Syar m_fixhdr(m); 1036153152Syar ipstat.ips_reassembled++; 103778344Sobrien IPQ_UNLOCK(); 103878344Sobrien return (m); 1039179946Smtm 1040179946Smtmdropfrag: 1041179946Smtm ipstat.ips_fragdropped++; 1042179946Smtm if (fp != NULL) 104378344Sobrien fp->ipq_nfrags--; 1044160668Syar m_freem(m); 104578344Sobriendone: 1046197947Sdougb IPQ_UNLOCK(); 104778344Sobrien return (NULL); 1048310010Sdteske 104978344Sobrien#undef GETIP 105078344Sobrien} 1051242184Shrs 1052272974Shrs/* 105378344Sobrien * Free a fragment reassembly header and all 105498186Sgordon * associated datagrams. 105578344Sobrien */ 1056310010Sdteskestatic void 105778344Sobrienip_freef(fhp, fp) 1058242184Shrs struct ipqhead *fhp; 1059272974Shrs struct ipq *fp; 106098186Sgordon{ 106198186Sgordon register struct mbuf *q; 106298186Sgordon 106398186Sgordon IPQ_LOCK_ASSERT(); 1064161396Syar 1065161396Syar while (fp->ipq_frags) { 1066161396Syar q = fp->ipq_frags; 1067201036Sdougb fp->ipq_frags = q->m_nextpkt; 1068161396Syar m_freem(q); 1069161396Syar } 1070272974Shrs TAILQ_REMOVE(fhp, fp, ipq_list); 1071272974Shrs uma_zfree(ipq_zone, fp); 1072272974Shrs nipq--; 107378344Sobrien} 107498186Sgordon 1075288291Sadrian/* 1076332363Skevans * IP timer processing; 1077288291Sadrian * if a timer expires on a reassembly 1078165565Syar * queue, discard it. 107998186Sgordon */ 1080179946Smtmvoid 1081179946Smtmip_slowtimo() 1082179946Smtm{ 1083179946Smtm register struct ipq *fp; 108498186Sgordon int i; 108598186Sgordon 108698186Sgordon IPQ_LOCK(); 1087165565Syar for (i = 0; i < IPREASS_NHASH; i++) { 108878344Sobrien for(fp = TAILQ_FIRST(&ipq[i]); fp;) { 108978344Sobrien struct ipq *fpp; 109078344Sobrien 109198186Sgordon fpp = fp; 1092153152Syar fp = TAILQ_NEXT(fp, ipq_list); 1093165565Syar if(--fpp->ipq_ttl == 0) { 1094153152Syar ipstat.ips_fragtimeout += fpp->ipq_nfrags; 109578344Sobrien ip_freef(&ipq[i], fpp); 109678344Sobrien } 1097165565Syar } 109898186Sgordon } 109998186Sgordon /* 110098186Sgordon * If we are over the maximum number of fragments 110178344Sobrien * (due to the limit being lowered), drain off 1102165565Syar * enough to get down to the new limit. 1103165565Syar */ 110498186Sgordon if (maxnipq >= 0 && nipq > maxnipq) { 110598186Sgordon for (i = 0; i < IPREASS_NHASH; i++) { 110698186Sgordon while (nipq > maxnipq && !TAILQ_EMPTY(&ipq[i])) { 110798186Sgordon ipstat.ips_fragdropped += 1108165565Syar TAILQ_FIRST(&ipq[i])->ipq_nfrags; 1109165565Syar ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); 111078344Sobrien } 111178344Sobrien } 111278344Sobrien } 111398186Sgordon IPQ_UNLOCK(); 1114165565Syar} 1115153152Syar 111678344Sobrien/* 1117165565Syar * Drain off all datagram fragments. 1118165565Syar */ 1119165565Syarvoid 1120165565Syarip_drain() 1121165565Syar{ 1122165565Syar int i; 1123165565Syar 112478344Sobrien IPQ_LOCK(); 112578344Sobrien for (i = 0; i < IPREASS_NHASH; i++) { 112678344Sobrien while(!TAILQ_EMPTY(&ipq[i])) { 112778344Sobrien ipstat.ips_fragdropped += 112878344Sobrien TAILQ_FIRST(&ipq[i])->ipq_nfrags; 112978344Sobrien ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); 1130126285Smtm } 113178344Sobrien } 113278344Sobrien IPQ_UNLOCK(); 1133126285Smtm in_rtqdrain(); 113478344Sobrien} 1135165565Syar 1136165565Syar/* 1137165565Syar * The protocol to be inserted into ip_protox[] must be already registered 1138152519Syar * in inetsw[], either statically or through pf_proto_register(). 1139165565Syar */ 1140165565Syarint 1141165565Syaripproto_register(u_char ipproto) 114298186Sgordon{ 1143165565Syar struct protosw *pr; 114478344Sobrien 114578344Sobrien /* Sanity checks. */ 114698186Sgordon if (ipproto == 0) 1147165565Syar return (EPROTONOSUPPORT); 114898186Sgordon 114998186Sgordon /* 115098186Sgordon * The protocol slot must not be occupied by another protocol 1151165565Syar * already. An index pointing to IPPROTO_RAW is unused. 115298186Sgordon */ 115398186Sgordon pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 115478344Sobrien if (pr == NULL) 1155197144Shrs return (EPFNOSUPPORT); 1156197144Shrs if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 1157197144Shrs return (EEXIST); 1158197144Shrs 1159197144Shrs /* Find the protocol position in inetsw[] and set the index. */ 1160197144Shrs for (pr = inetdomain.dom_protosw; 1161197144Shrs pr < inetdomain.dom_protoswNPROTOSW; pr++) { 1162272393Shrs if (pr->pr_domain->dom_family == PF_INET && 1163272393Shrs pr->pr_protocol && pr->pr_protocol == ipproto) { 1164197144Shrs /* Be careful to only index valid IP protocols. */ 1165197144Shrs if (pr->pr_protocol < IPPROTO_MAX) { 1166197144Shrs ip_protox[pr->pr_protocol] = pr - inetsw; 1167197144Shrs return (0); 1168197144Shrs } else 1169197144Shrs return (EINVAL); 1170197144Shrs } 1171197144Shrs } 1172197144Shrs return (EPROTONOSUPPORT); 1173197144Shrs} 117478344Sobrien 1175197144Shrsint 1176197144Shrsipproto_unregister(u_char ipproto) 1177197144Shrs{ 1178197144Shrs struct protosw *pr; 1179197144Shrs 1180197144Shrs /* Sanity checks. */ 1181197144Shrs if (ipproto == 0) 1182197144Shrs return (EPROTONOSUPPORT); 1183197144Shrs 1184197144Shrs /* Check if the protocol was indeed registered. */ 1185197144Shrs pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1186197144Shrs if (pr == NULL) 1187197144Shrs return (EPFNOSUPPORT); 1188197144Shrs if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 1189197144Shrs return (ENOENT); 1190197144Shrs 1191197144Shrs /* Reset the protocol slot to IPPROTO_RAW. */ 1192197144Shrs ip_protox[ipproto] = pr - inetsw; 1193197144Shrs return (0); 1194197144Shrs} 1195197144Shrs 1196197144Shrs/* 1197197144Shrs * Given address of next destination (final or next hop), 119878344Sobrien * return internet address info of interface to be used to get there. 119978344Sobrien */ 120078344Sobrienstruct in_ifaddr * 1201150796Syarip_rtaddr(dst) 120278344Sobrien struct in_addr dst; 120378344Sobrien{ 120478344Sobrien struct route sro; 1205295949Saraujo struct sockaddr_in *sin; 1206295949Saraujo struct in_ifaddr *ifa; 1207295949Saraujo 1208295949Saraujo bzero(&sro, sizeof(sro)); 1209309130Saraujo sin = (struct sockaddr_in *)&sro.ro_dst; 1210309130Saraujo sin->sin_family = AF_INET; 1211309130Saraujo sin->sin_len = sizeof(*sin); 1212309130Saraujo sin->sin_addr = dst; 1213309130Saraujo rtalloc_ign(&sro, RTF_CLONING); 1214309130Saraujo 1215309130Saraujo if (sro.ro_rt == NULL) 1216309130Saraujo return (NULL); 1217309130Saraujo 1218309130Saraujo ifa = ifatoia(sro.ro_rt->rt_ifa); 1219309130Saraujo RTFREE(sro.ro_rt); 1220309130Saraujo return (ifa); 1221295949Saraujo} 1222295949Saraujo 1223295949Saraujou_char inetctlerrmap[PRC_NCMDS] = { 1224295949Saraujo 0, 0, 0, 0, 1225116097Smtm 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 122678344Sobrien EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 122778344Sobrien EMSGSIZE, EHOSTUNREACH, 0, 0, 122898186Sgordon 0, 0, EHOSTUNREACH, 0, 1229150796Syar ENOPROTOOPT, ECONNREFUSED 1230153152Syar}; 123178344Sobrien 123278344Sobrien/* 123378344Sobrien * Forward a packet. If some error occurs return the sender 1234165565Syar * an icmp packet. Note we can't always generate a meaningful 1235165565Syar * icmp message because icmp doesn't have a large enough repertoire 1236165565Syar * of codes and types. 1237165565Syar * 1238165565Syar * If not forwarding, just drop the packet. This could be confusing 1239165565Syar * if ipforwarding was zero but some routing protocol was advancing 1240165565Syar * us as a gateway to somewhere. However, we must let the routing 1241165565Syar * protocol deal with that. 1242165565Syar * 1243165565Syar * The srcrt parameter indicates whether the packet is being forwarded 1244165565Syar * via a source route. 1245165565Syar */ 1246165565Syarvoid 1247165565Syarip_forward(struct mbuf *m, int srcrt) 1248165565Syar{ 1249165565Syar struct ip *ip = mtod(m, struct ip *); 1250165565Syar struct in_ifaddr *ia = NULL; 1251165565Syar struct mbuf *mcopy; 1252165565Syar struct in_addr dest; 1253165565Syar int error, type = 0, code = 0, mtu = 0; 1254165565Syar 1255165565Syar if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 1256165565Syar ipstat.ips_cantforward++; 1257165565Syar m_freem(m); 1258165565Syar return; 1259165565Syar } 1260165565Syar#ifdef IPSTEALTH 1261165565Syar if (!ipstealth) { 1262165565Syar#endif 1263165565Syar if (ip->ip_ttl <= IPTTLDEC) { 1264165565Syar icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 1265165565Syar 0, 0); 1266165565Syar return; 1267165565Syar } 1268165565Syar#ifdef IPSTEALTH 1269165565Syar } 1270165565Syar#endif 1271165565Syar 1272165565Syar if (!srcrt && (ia = ip_rtaddr(ip->ip_dst)) == NULL) { 1273165565Syar icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 1274165565Syar return; 1275165565Syar } 1276165565Syar 1277165565Syar /* 1278165565Syar * Save the IP header and at most 8 bytes of the payload, 1279165565Syar * in case we need to generate an ICMP message to the src. 1280165565Syar * 1281165565Syar * XXX this can be optimized a lot by saving the data in a local 1282165565Syar * buffer on the stack (72 bytes at most), and only allocating the 1283165565Syar * mbuf if really necessary. The vast majority of the packets 1284165565Syar * are forwarded without having to send an ICMP back (either 1285165565Syar * because unnecessary, or because rate limited), so we are 1286165565Syar * really we are wasting a lot of work here. 1287165565Syar * 1288165565Syar * We don't use m_copy() because it might return a reference 1289165565Syar * to a shared cluster. Both this function and ip_output() 1290165565Syar * assume exclusive access to the IP header in `m', so any 1291165565Syar * data in a cluster may change before we reach icmp_error(). 1292165565Syar */ 1293165565Syar MGETHDR(mcopy, M_DONTWAIT, m->m_type); 1294165565Syar if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) { 1295165565Syar /* 1296165565Syar * It's probably ok if the pkthdr dup fails (because 1297165565Syar * the deep copy of the tag chain failed), but for now 1298165565Syar * be conservative and just discard the copy since 1299165565Syar * code below may some day want the tags. 1300165565Syar */ 1301165565Syar m_free(mcopy); 1302165565Syar mcopy = NULL; 1303165565Syar } 1304165565Syar if (mcopy != NULL) { 1305165565Syar mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy)); 1306165565Syar mcopy->m_pkthdr.len = mcopy->m_len; 1307165565Syar m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1308165565Syar } 1309165565Syar 1310165565Syar#ifdef IPSTEALTH 131178344Sobrien if (!ipstealth) { 131278344Sobrien#endif 1313201038Sdougb ip->ip_ttl -= IPTTLDEC; 1314286303Sngie#ifdef IPSTEALTH 1315286303Sngie } 1316286303Sngie#endif 1317286303Sngie 1318286303Sngie /* 1319201038Sdougb * If forwarding packet using same interface that it came in on, 1320201038Sdougb * perhaps should send a redirect to sender to shortcut a hop. 132178344Sobrien * Only send redirect if source is sending directly to us, 132278344Sobrien * and if packet was not source routed (or has any options). 132378344Sobrien * Also, don't send redirect if forwarding using a default route 132478344Sobrien * or a route modified by a redirect. 132578344Sobrien */ 132678344Sobrien dest.s_addr = 0; 132778344Sobrien if (!srcrt && ipsendredirects && ia->ia_ifp == m->m_pkthdr.rcvif) { 132878344Sobrien struct sockaddr_in *sin; 132978344Sobrien struct route ro; 133098186Sgordon struct rtentry *rt; 133198186Sgordon 1332272393Shrs bzero(&ro, sizeof(ro)); 1333197144Shrs sin = (struct sockaddr_in *)&ro.ro_dst; 133498186Sgordon sin->sin_family = AF_INET; 133598186Sgordon sin->sin_len = sizeof(*sin); 133678344Sobrien sin->sin_addr = ip->ip_dst; 1337193118Sdougb rtalloc_ign(&ro, RTF_CLONING); 1338193118Sdougb 133978344Sobrien rt = ro.ro_rt; 1340153105Sdougb 134198186Sgordon if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 134298186Sgordon satosin(rt_key(rt))->sin_addr.s_addr != 0) { 134378344Sobrien#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 134498186Sgordon u_long src = ntohl(ip->ip_src.s_addr); 134598186Sgordon 1346146490Sschweikh if (RTA(rt) && 134798186Sgordon (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1348231888Sdelphij if (rt->rt_flags & RTF_GATEWAY) 1349231888Sdelphij dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1350231888Sdelphij else 1351146490Sschweikh dest.s_addr = ip->ip_dst.s_addr; 135298186Sgordon /* Router requirements says to only send host redirects */ 135398186Sgordon type = ICMP_REDIRECT; 135478344Sobrien code = ICMP_REDIRECT_HOST; 135578344Sobrien } 135678344Sobrien } 135778344Sobrien if (rt) 135878344Sobrien RTFREE(rt); 1359290007Sdes } 1360290007Sdes 1361290007Sdes error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 1362290007Sdes if (error) 136378344Sobrien ipstat.ips_cantforward++; 136478344Sobrien else { 136578344Sobrien ipstat.ips_forward++; 1366270698Shrs if (type) 1367157653Sflz ipstat.ips_redirectsent++; 136878344Sobrien else { 1369219612Sdougb if (mcopy) 1370219612Sdougb m_freem(mcopy); 1371219612Sdougb return; 137298186Sgordon } 137398186Sgordon } 137498186Sgordon if (mcopy == NULL) 137598186Sgordon return; 137698186Sgordon 137798186Sgordon switch (error) { 137898186Sgordon 137998186Sgordon case 0: /* forwarded, but need redirect */ 1380126285Smtm /* type, code set above */ 138198186Sgordon break; 1382179872Smtm 1383275359Sdes case ENETUNREACH: /* shouldn't happen, checked above */ 1384275359Sdes case EHOSTUNREACH: 1385275359Sdes case ENETDOWN: 1386286163Sjilles case EHOSTDOWN: 1387286163Sjilles default: 1388275359Sdes type = ICMP_UNREACH; 1389275359Sdes code = ICMP_UNREACH_HOST; 1390275359Sdes break; 1391275359Sdes 1392275359Sdes case EMSGSIZE: 1393275359Sdes type = ICMP_UNREACH; 1394275359Sdes code = ICMP_UNREACH_NEEDFRAG; 1395275359Sdes 1396275359Sdes#if defined(IPSEC) || defined(FAST_IPSEC) 1397275359Sdes mtu = ip_ipsec_mtu(m); 1398275359Sdes#endif /* IPSEC */ 1399275359Sdes /* 1400275359Sdes * If the MTU wasn't set before use the interface mtu or 1401275359Sdes * fall back to the next smaller mtu step compared to the 1402270698Shrs * current packet size. 1403197144Shrs */ 1404272393Shrs if (mtu == 0) { 1405223227Sjilles if (ia != NULL) 1406197144Shrs mtu = ia->ia_ifp->if_mtu; 1407197144Shrs else 1408197144Shrs mtu = ip_next_mtu(ip->ip_len, 0); 1409197144Shrs } 1410197144Shrs ipstat.ips_cantfrag++; 1411197144Shrs break; 1412197144Shrs 1413223227Sjilles case ENOBUFS: 1414223227Sjilles /* 1415223227Sjilles * A router should not generate ICMP_SOURCEQUENCH as 1416197144Shrs * required in RFC1812 Requirements for IP Version 4 Routers. 1417197144Shrs * Source quench could be a big problem under DoS attacks, 1418197144Shrs * or if the underlying interface is rate-limited. 1419197144Shrs * Those who need source quench packets may re-enable them 1420197144Shrs * via the net.inet.ip.sendsourcequench sysctl. 1421197144Shrs */ 1422197144Shrs if (ip_sendsourcequench == 0) { 1423197144Shrs m_freem(mcopy); 1424197144Shrs return; 1425197144Shrs } else { 1426197144Shrs type = ICMP_SOURCEQUENCH; 1427197144Shrs code = 0; 1428197144Shrs } 1429197144Shrs break; 1430197144Shrs 1431197144Shrs case EACCES: /* ipfw denied packet */ 143278344Sobrien m_freem(mcopy); 1433201036Sdougb return; 1434157473Sflz } 1435157653Sflz icmp_error(mcopy, type, code, dest.s_addr, mtu); 1436157653Sflz} 1437157473Sflz 1438157473Sflzvoid 1439157473Sflzip_savecontrol(inp, mp, ip, m) 1440157473Sflz register struct inpcb *inp; 1441157473Sflz register struct mbuf **mp; 1442157473Sflz register struct ip *ip; 1443157653Sflz register struct mbuf *m; 1444157473Sflz{ 1445157473Sflz if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1446157473Sflz struct bintime bt; 1447157473Sflz 1448157473Sflz bintime(&bt); 1449157473Sflz if (inp->inp_socket->so_options & SO_BINTIME) { 1450157473Sflz *mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt), 1451157473Sflz SCM_BINTIME, SOL_SOCKET); 145278344Sobrien if (*mp) 145378344Sobrien mp = &(*mp)->m_next; 145478344Sobrien } 145578344Sobrien if (inp->inp_socket->so_options & SO_TIMESTAMP) { 145678344Sobrien struct timeval tv; 145778344Sobrien 145878344Sobrien bintime2timeval(&bt, &tv); 145978344Sobrien *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 1460230007Srea SCM_TIMESTAMP, SOL_SOCKET); 146178344Sobrien if (*mp) 146278344Sobrien mp = &(*mp)->m_next; 1463126286Smtm } 146478344Sobrien } 146578344Sobrien if (inp->inp_flags & INP_RECVDSTADDR) { 146678344Sobrien *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, 146778344Sobrien sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 146878344Sobrien if (*mp) 146978344Sobrien mp = &(*mp)->m_next; 147078344Sobrien } 147178344Sobrien if (inp->inp_flags & INP_RECVTTL) { 147278344Sobrien *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl, 147378344Sobrien sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 147478344Sobrien if (*mp) 147578344Sobrien mp = &(*mp)->m_next; 147678344Sobrien } 147778344Sobrien#ifdef notyet 147878344Sobrien /* XXX 147978344Sobrien * Moving these out of udp_input() made them even more broken 1480106643Sgordon * than they already were. 1481106643Sgordon */ 1482106643Sgordon /* options were tossed already */ 1483106643Sgordon if (inp->inp_flags & INP_RECVOPTS) { 148478344Sobrien *mp = sbcreatecontrol((caddr_t) opts_deleted_above, 148578344Sobrien sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 148678344Sobrien if (*mp) 148778344Sobrien mp = &(*mp)->m_next; 148878344Sobrien } 148978344Sobrien /* ip_srcroute doesn't do what we want here, need to fix */ 149078344Sobrien if (inp->inp_flags & INP_RECVRETOPTS) { 149178344Sobrien *mp = sbcreatecontrol((caddr_t) ip_srcroute(m), 149278344Sobrien sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 1493106643Sgordon if (*mp) 1494106643Sgordon mp = &(*mp)->m_next; 1495106643Sgordon } 1496106643Sgordon#endif 149778344Sobrien if (inp->inp_flags & INP_RECVIF) { 149898186Sgordon struct ifnet *ifp; 149998186Sgordon struct sdlbuf { 150098186Sgordon struct sockaddr_dl sdl; 150198186Sgordon u_char pad[32]; 150298186Sgordon } sdlbuf; 150398186Sgordon struct sockaddr_dl *sdp; 150498186Sgordon struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 1505119170Smtm 1506119170Smtm if (((ifp = m->m_pkthdr.rcvif)) 1507119170Smtm && ( ifp->if_index && (ifp->if_index <= if_index))) { 1508119170Smtm sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1509119170Smtm /* 1510119170Smtm * Change our mind and don't try copy. 1511119170Smtm */ 1512119170Smtm if ((sdp->sdl_family != AF_LINK) 151398186Sgordon || (sdp->sdl_len > sizeof(sdlbuf))) { 151498186Sgordon goto makedummy; 151598186Sgordon } 151698186Sgordon bcopy(sdp, sdl2, sdp->sdl_len); 1517106643Sgordon } else { 151898186Sgordonmakedummy: 151998186Sgordon sdl2->sdl_len 152098186Sgordon = offsetof(struct sockaddr_dl, sdl_data[0]); 152198186Sgordon sdl2->sdl_family = AF_LINK; 152298186Sgordon sdl2->sdl_index = 0; 152398186Sgordon sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 152498186Sgordon } 1525106700Sgordon *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, 1526162947Syar IP_RECVIF, IPPROTO_IP); 1527106700Sgordon if (*mp) 1528146490Sschweikh mp = &(*mp)->m_next; 152998186Sgordon } 153098186Sgordon} 153198186Sgordon 153298186Sgordon/* 153398186Sgordon * XXX these routines are called from the upper part of the kernel. 153498186Sgordon * They need to be locked when we remove Giant. 153598186Sgordon * 153698186Sgordon * They could also be moved to ip_mroute.c, since all the RSVP 153798186Sgordon * handling is done there already. 153898186Sgordon */ 153998186Sgordonstatic int ip_rsvp_on; 154098186Sgordonstruct socket *ip_rsvpd; 154198186Sgordonint 154298186Sgordonip_rsvp_init(struct socket *so) 154398186Sgordon{ 154498186Sgordon if (so->so_type != SOCK_RAW || 154598186Sgordon so->so_proto->pr_protocol != IPPROTO_RSVP) 154698186Sgordon return EOPNOTSUPP; 154798186Sgordon 154898186Sgordon if (ip_rsvpd != NULL) 154998186Sgordon return EADDRINUSE; 155098186Sgordon 155198186Sgordon ip_rsvpd = so; 155298186Sgordon /* 155398186Sgordon * This may seem silly, but we need to be sure we don't over-increment 155498186Sgordon * the RSVP counter, in case something slips up. 155598186Sgordon */ 155698186Sgordon if (!ip_rsvp_on) { 155798186Sgordon ip_rsvp_on = 1; 155898186Sgordon rsvp_on++; 155998186Sgordon } 156098186Sgordon 156198186Sgordon return 0; 156298186Sgordon} 156398186Sgordon 156498186Sgordonint 156598186Sgordonip_rsvp_done(void) 156698186Sgordon{ 156798186Sgordon ip_rsvpd = NULL; 156898186Sgordon /* 156998186Sgordon * This may seem silly, but we need to be sure we don't over-decrement 157098186Sgordon * the RSVP counter, in case something slips up. 157198186Sgordon */ 157298186Sgordon if (ip_rsvp_on) { 157398186Sgordon ip_rsvp_on = 0; 157498186Sgordon rsvp_on--; 157598186Sgordon } 157698186Sgordon return 0; 157798186Sgordon} 157898186Sgordon 157998186Sgordonvoid 158098186Sgordonrsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */ 158198186Sgordon{ 158298186Sgordon if (rsvp_input_p) { /* call the real one if loaded */ 158398186Sgordon rsvp_input_p(m, off); 158498186Sgordon return; 158598186Sgordon } 158698186Sgordon 158798186Sgordon /* Can still get packets with rsvp_on = 0 if there is a local member 158898186Sgordon * of the group to which the RSVP packet is addressed. But in this 158998186Sgordon * case we want to throw the packet away. 159098186Sgordon */ 159198186Sgordon 159298186Sgordon if (!rsvp_on) { 159398186Sgordon m_freem(m); 159498186Sgordon return; 159598186Sgordon } 159698186Sgordon 159798186Sgordon if (ip_rsvpd != NULL) { 159898186Sgordon rip_input(m, off); 159998186Sgordon return; 160098186Sgordon } 160198186Sgordon /* Drop the packet */ 160298186Sgordon m_freem(m); 160398186Sgordon} 160498186Sgordon