1139823Simp/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1993
3180305Srwatson *	The Regents of the University of California.
4180305Srwatson * All rights reserved.
51541Srgrimes *
61541Srgrimes * Redistribution and use in source and binary forms, with or without
71541Srgrimes * modification, are permitted provided that the following conditions
81541Srgrimes * are met:
91541Srgrimes * 1. Redistributions of source code must retain the above copyright
101541Srgrimes *    notice, this list of conditions and the following disclaimer.
111541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer in the
131541Srgrimes *    documentation and/or other materials provided with the distribution.
141541Srgrimes * 4. Neither the name of the University nor the names of its contributors
151541Srgrimes *    may be used to endorse or promote products derived from this software
161541Srgrimes *    without specific prior written permission.
171541Srgrimes *
181541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
191541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
201541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
211541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
221541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
231541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
241541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
251541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
261541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
271541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
281541Srgrimes * SUCH DAMAGE.
291541Srgrimes *
3010944Swollman *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
311541Srgrimes */
321541Srgrimes
33172467Ssilby#include <sys/cdefs.h>
34172467Ssilby__FBSDID("$FreeBSD$");
35172467Ssilby
36220880Sbz#include "opt_inet.h"
3755009Sshin#include "opt_inet6.h"
3855009Sshin#include "opt_ipsec.h"
3955009Sshin
401541Srgrimes#include <sys/param.h>
41128664Sbmilekic#include <sys/jail.h>
4222900Swollman#include <sys/kernel.h>
4395759Stanimura#include <sys/lock.h>
441541Srgrimes#include <sys/malloc.h>
451541Srgrimes#include <sys/mbuf.h>
46164033Srwatson#include <sys/priv.h>
4784527Sps#include <sys/proc.h>
4822900Swollman#include <sys/protosw.h>
49185895Szec#include <sys/rwlock.h>
5095759Stanimura#include <sys/signalvar.h>
511541Srgrimes#include <sys/socket.h>
521541Srgrimes#include <sys/socketvar.h>
5395759Stanimura#include <sys/sx.h>
5422900Swollman#include <sys/sysctl.h>
5595759Stanimura#include <sys/systm.h>
5634923Sbde
5792760Sjeff#include <vm/uma.h>
581541Srgrimes
591541Srgrimes#include <net/if.h>
601541Srgrimes#include <net/route.h>
61185571Sbz#include <net/vnet.h>
621541Srgrimes
631541Srgrimes#include <netinet/in.h>
641541Srgrimes#include <netinet/in_systm.h>
6511603Sdg#include <netinet/in_pcb.h>
6611603Sdg#include <netinet/in_var.h>
67222143Sqingli#include <netinet/if_ether.h>
6895759Stanimura#include <netinet/ip.h>
691541Srgrimes#include <netinet/ip_var.h>
701541Srgrimes#include <netinet/ip_mroute.h>
711541Srgrimes
72171167Sgnn#ifdef IPSEC
73105199Ssam#include <netipsec/ipsec.h>
74171167Sgnn#endif /*IPSEC*/
75105199Ssam
76163606Srwatson#include <security/mac/mac_framework.h>
77163606Srwatson
78221131SbzVNET_DEFINE(int, ip_defttl) = IPDEFTTL;
79221131SbzSYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
80221131Sbz    &VNET_NAME(ip_defttl), 0,
81221131Sbz    "Maximum TTL on IP packets");
82221131Sbz
83195699SrwatsonVNET_DEFINE(struct inpcbhead, ripcb);
84195699SrwatsonVNET_DEFINE(struct inpcbinfo, ripcbinfo);
851541Srgrimes
86195727Srwatson#define	V_ripcb			VNET(ripcb)
87195727Srwatson#define	V_ripcbinfo		VNET(ripcbinfo)
88195699Srwatson
89193502Sluigi/*
90201735Sluigi * Control and data hooks for ipfw, dummynet, divert and so on.
91193502Sluigi * The data hooks are not used here but it is convenient
92193502Sluigi * to keep them all in one place.
93193502Sluigi */
94197952SjulianVNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
95197952SjulianVNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
9684516Sps
97201735Sluigiint	(*ip_dn_ctl_ptr)(struct sockopt *);
98201735Sluigiint	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
99201735Sluigivoid	(*ip_divert_ptr)(struct mbuf *, int);
100201735Sluigiint	(*ng_ipfw_input_p)(struct mbuf **, int,
101201735Sluigi			struct ip_fw_args *, int);
102201735Sluigi
103220880Sbz#ifdef INET
1041541Srgrimes/*
105180305Srwatson * Hooks for multicast routing. They all default to NULL, so leave them not
106180305Srwatson * initialized and rely on BSS being set to 0.
107106968Sluigi */
108106968Sluigi
109180305Srwatson/*
110180305Srwatson * The socket used to communicate with the multicast routing daemon.
111180305Srwatson */
112195699SrwatsonVNET_DEFINE(struct socket *, ip_mrouter);
113106968Sluigi
114180305Srwatson/*
115180305Srwatson * The various mrouter and rsvp functions.
116180305Srwatson */
117106968Sluigiint (*ip_mrouter_set)(struct socket *, struct sockopt *);
118106968Sluigiint (*ip_mrouter_get)(struct socket *, struct sockopt *);
119106968Sluigiint (*ip_mrouter_done)(void);
120106968Sluigiint (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
121133874Srwatson		   struct ip_moptions *);
122194581Srdivackyint (*mrt_ioctl)(u_long, caddr_t, int);
123106968Sluigiint (*legal_vif_num)(int);
124106968Sluigiu_long (*ip_mcast_src)(int);
125106968Sluigi
126106968Sluigivoid (*rsvp_input_p)(struct mbuf *m, int off);
127106968Sluigiint (*ip_rsvp_vif)(struct socket *, struct sockopt *);
128106968Sluigivoid (*ip_rsvp_force_done)(struct socket *);
129220880Sbz#endif /* INET */
130106968Sluigi
131220880Sbzu_long	rip_sendspace = 9216;
132220880SbzSYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
133220880Sbz    &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
134220880Sbz
135220880Sbzu_long	rip_recvspace = 9216;
136220880SbzSYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
137220880Sbz    &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
138220880Sbz
139106968Sluigi/*
140180828Smav * Hash functions
141180828Smav */
142180828Smav
143180828Smav#define INP_PCBHASH_RAW_SIZE	256
144180828Smav#define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
145180828Smav        (((proto) + (laddr) + (faddr)) % (mask) + 1)
146180828Smav
147220880Sbz#ifdef INET
148180828Smavstatic void
149180828Smavrip_inshash(struct inpcb *inp)
150180828Smav{
151180828Smav	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
152180828Smav	struct inpcbhead *pcbhash;
153180828Smav	int hash;
154180828Smav
155180828Smav	INP_INFO_WLOCK_ASSERT(pcbinfo);
156180828Smav	INP_WLOCK_ASSERT(inp);
157180828Smav
158180874Smav	if (inp->inp_ip_p != 0 &&
159180874Smav	    inp->inp_laddr.s_addr != INADDR_ANY &&
160180874Smav	    inp->inp_faddr.s_addr != INADDR_ANY) {
161180828Smav		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
162180828Smav		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
163180874Smav	} else
164180828Smav		hash = 0;
165180828Smav	pcbhash = &pcbinfo->ipi_hashbase[hash];
166180828Smav	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
167180828Smav}
168180828Smav
169180828Smavstatic void
170180828Smavrip_delhash(struct inpcb *inp)
171180828Smav{
172180874Smav
173180874Smav	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
174180828Smav	INP_WLOCK_ASSERT(inp);
175180874Smav
176180828Smav	LIST_REMOVE(inp, inp_hash);
177180828Smav}
178220880Sbz#endif /* INET */
179180828Smav
180180828Smav/*
1811541Srgrimes * Raw interface to IP protocol.
1821541Srgrimes */
1831541Srgrimes
1841541Srgrimes/*
185107113Sluigi * Initialize raw connection block q.
1861541Srgrimes */
187157927Spsstatic void
188157927Spsrip_zone_change(void *tag)
189157927Sps{
190157927Sps
191181803Sbz	uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
192157927Sps}
193157927Sps
194160491Supsstatic int
195160491Supsrip_inpcb_init(void *mem, int size, int flags)
196160491Sups{
197165634Sjhb	struct inpcb *inp = mem;
198165634Sjhb
199160491Sups	INP_LOCK_INIT(inp, "inp", "rawinp");
200160491Sups	return (0);
201160491Sups}
202160491Sups
2031541Srgrimesvoid
204169454Srwatsonrip_init(void)
2051541Srgrimes{
206169454Srwatson
207205157Srwatson	in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
208222748Srwatson	    1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
209222748Srwatson	    IPI_HASHFIELDS_NONE);
210180305Srwatson	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
211180305Srwatson	    EVENTHANDLER_PRI_ANY);
2121541Srgrimes}
2131541Srgrimes
214193731Szec#ifdef VIMAGE
215193731Szecvoid
216193731Szecrip_destroy(void)
217193731Szec{
218193731Szec
219205157Srwatson	in_pcbinfo_destroy(&V_ripcbinfo);
220193731Szec}
221193731Szec#endif
222193731Szec
223220880Sbz#ifdef INET
224119634Ssamstatic int
225180589Srwatsonrip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
226180589Srwatson    struct sockaddr_in *ripsrc)
227119634Ssam{
228119634Ssam	int policyfail = 0;
229119634Ssam
230222488Srwatson	INP_LOCK_ASSERT(last);
231128903Srwatson
232171167Sgnn#ifdef IPSEC
233119634Ssam	/* check AH/ESP integrity. */
234125396Sume	if (ipsec4_in_reject(n, last)) {
235119634Ssam		policyfail = 1;
236119634Ssam	}
237171167Sgnn#endif /* IPSEC */
238119634Ssam#ifdef MAC
239172930Srwatson	if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
240119634Ssam		policyfail = 1;
241119634Ssam#endif
242149371Sandre	/* Check the minimum TTL for socket. */
243149371Sandre	if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
244149371Sandre		policyfail = 1;
245119634Ssam	if (!policyfail) {
246119634Ssam		struct mbuf *opts = NULL;
247131151Srwatson		struct socket *so;
248119634Ssam
249131151Srwatson		so = last->inp_socket;
250119634Ssam		if ((last->inp_flags & INP_CONTROLOPTS) ||
251150941Sandre		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
252119634Ssam			ip_savecontrol(last, &opts, ip, n);
253131151Srwatson		SOCKBUF_LOCK(&so->so_rcv);
254131151Srwatson		if (sbappendaddr_locked(&so->so_rcv,
255180589Srwatson		    (struct sockaddr *)ripsrc, n, opts) == 0) {
256119634Ssam			/* should notify about lost packet */
257119634Ssam			m_freem(n);
258119634Ssam			if (opts)
259119634Ssam				m_freem(opts);
260131151Srwatson			SOCKBUF_UNLOCK(&so->so_rcv);
261119634Ssam		} else
262131151Srwatson			sorwakeup_locked(so);
263119634Ssam	} else
264119634Ssam		m_freem(n);
265180305Srwatson	return (policyfail);
266119634Ssam}
267119634Ssam
2681541Srgrimes/*
269180305Srwatson * Setup generic address and protocol structures for raw_input routine, then
270180305Srwatson * pass them along with mbuf chain.
2711541Srgrimes */
2721541Srgrimesvoid
273119634Ssamrip_input(struct mbuf *m, int off)
2741541Srgrimes{
275189592Sbms	struct ifnet *ifp;
276119634Ssam	struct ip *ip = mtod(m, struct ip *);
27782884Sjulian	int proto = ip->ip_p;
278119634Ssam	struct inpcb *inp, *last;
279180589Srwatson	struct sockaddr_in ripsrc;
280180828Smav	int hash;
2811541Srgrimes
282180589Srwatson	bzero(&ripsrc, sizeof(ripsrc));
283180589Srwatson	ripsrc.sin_len = sizeof(ripsrc);
284180589Srwatson	ripsrc.sin_family = AF_INET;
2851541Srgrimes	ripsrc.sin_addr = ip->ip_src;
286119634Ssam	last = NULL;
287189592Sbms
288189592Sbms	ifp = m->m_pkthdr.rcvif;
289226105Sandre	/*
290241923Sglebius	 * Applications on raw sockets expect host byte order.
291226105Sandre	 */
292241923Sglebius	ip->ip_len = ntohs(ip->ip_len);
293241913Sglebius	ip->ip_off = ntohs(ip->ip_off);
294189592Sbms
295180828Smav	hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
296181803Sbz	    ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
297181803Sbz	INP_INFO_RLOCK(&V_ripcbinfo);
298181803Sbz	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
299180833Smav		if (inp->inp_ip_p != proto)
300180828Smav			continue;
301180828Smav#ifdef INET6
302183606Sbz		/* XXX inp locking */
303180828Smav		if ((inp->inp_vflag & INP_IPV4) == 0)
304180833Smav			continue;
305180828Smav#endif
306180828Smav		if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
307180833Smav			continue;
308180828Smav		if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
309180833Smav			continue;
310200473Sbz		if (jailed_without_vnet(inp->inp_cred)) {
311189592Sbms			/*
312189592Sbms			 * XXX: If faddr was bound to multicast group,
313189592Sbms			 * jailed raw socket will drop datagram.
314189592Sbms			 */
315189592Sbms			if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
316189592Sbms				continue;
317189592Sbms		}
318186164Skmacy		if (last != NULL) {
319180828Smav			struct mbuf *n;
320180828Smav
321180828Smav			n = m_copy(m, 0, (int)M_COPYALL);
322180828Smav			if (n != NULL)
323180828Smav		    	    (void) rip_append(last, ip, n, &ripsrc);
324180828Smav			/* XXX count dropped packet */
325180828Smav			INP_RUNLOCK(last);
326180828Smav		}
327183606Sbz		INP_RLOCK(inp);
328180828Smav		last = inp;
329180828Smav	}
330181803Sbz	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
331180833Smav		if (inp->inp_ip_p && inp->inp_ip_p != proto)
332119634Ssam			continue;
33355009Sshin#ifdef INET6
334183606Sbz		/* XXX inp locking */
33555009Sshin		if ((inp->inp_vflag & INP_IPV4) == 0)
336180833Smav			continue;
33755009Sshin#endif
338189592Sbms		if (!in_nullhost(inp->inp_laddr) &&
339189592Sbms		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
340180833Smav			continue;
341189592Sbms		if (!in_nullhost(inp->inp_faddr) &&
342189592Sbms		    !in_hosteq(inp->inp_faddr, ip->ip_src))
343180833Smav			continue;
344200473Sbz		if (jailed_without_vnet(inp->inp_cred)) {
345189592Sbms			/*
346189592Sbms			 * Allow raw socket in jail to receive multicast;
347189592Sbms			 * assume process had PRIV_NETINET_RAW at attach,
348189592Sbms			 * and fall through into normal filter path if so.
349189592Sbms			 */
350189592Sbms			if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
351189592Sbms			    prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
352189592Sbms				continue;
353189592Sbms		}
354189592Sbms		/*
355189592Sbms		 * If this raw socket has multicast state, and we
356189592Sbms		 * have received a multicast, check if this socket
357189592Sbms		 * should receive it, as multicast filtering is now
358189592Sbms		 * the responsibility of the transport layer.
359189592Sbms		 */
360189592Sbms		if (inp->inp_moptions != NULL &&
361189592Sbms		    IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
362199287Sbms			/*
363199287Sbms			 * If the incoming datagram is for IGMP, allow it
364199287Sbms			 * through unconditionally to the raw socket.
365199287Sbms			 *
366199287Sbms			 * In the case of IGMPv2, we may not have explicitly
367199287Sbms			 * joined the group, and may have set IFF_ALLMULTI
368199287Sbms			 * on the interface. imo_multi_filter() may discard
369199287Sbms			 * control traffic we actually need to see.
370199287Sbms			 *
371199287Sbms			 * Userland multicast routing daemons should continue
372199287Sbms			 * filter the control traffic appropriately.
373199287Sbms			 */
374189592Sbms			int blocked;
375189592Sbms
376199287Sbms			blocked = MCAST_PASS;
377199287Sbms			if (proto != IPPROTO_IGMP) {
378199287Sbms				struct sockaddr_in group;
379189592Sbms
380199287Sbms				bzero(&group, sizeof(struct sockaddr_in));
381199287Sbms				group.sin_len = sizeof(struct sockaddr_in);
382199287Sbms				group.sin_family = AF_INET;
383199287Sbms				group.sin_addr = ip->ip_dst;
384199287Sbms
385199287Sbms				blocked = imo_multi_filter(inp->inp_moptions,
386199287Sbms				    ifp,
387199287Sbms				    (struct sockaddr *)&group,
388199287Sbms				    (struct sockaddr *)&ripsrc);
389199287Sbms			}
390199287Sbms
391189592Sbms			if (blocked != MCAST_PASS) {
392190951Srwatson				IPSTAT_INC(ips_notmember);
393189592Sbms				continue;
394189592Sbms			}
395189592Sbms		}
396186164Skmacy		if (last != NULL) {
397119634Ssam			struct mbuf *n;
39878064Sume
399119634Ssam			n = m_copy(m, 0, (int)M_COPYALL);
400119634Ssam			if (n != NULL)
401180589Srwatson				(void) rip_append(last, ip, n, &ripsrc);
402119634Ssam			/* XXX count dropped packet */
403178377Srwatson			INP_RUNLOCK(last);
4041541Srgrimes		}
405183606Sbz		INP_RLOCK(inp);
40619622Sfenner		last = inp;
4071541Srgrimes	}
408181803Sbz	INP_INFO_RUNLOCK(&V_ripcbinfo);
409119634Ssam	if (last != NULL) {
410180589Srwatson		if (rip_append(last, ip, m, &ripsrc) != 0)
411190951Srwatson			IPSTAT_INC(ips_delivered);
412178377Srwatson		INP_RUNLOCK(last);
4131541Srgrimes	} else {
4141541Srgrimes		m_freem(m);
415190951Srwatson		IPSTAT_INC(ips_noproto);
416190951Srwatson		IPSTAT_DEC(ips_delivered);
41778064Sume	}
4187083Swollman}
4197083Swollman
4201541Srgrimes/*
421180305Srwatson * Generate IP header and pass packet to ip_output.  Tack on options user may
422180305Srwatson * have setup with control call.
4231541Srgrimes */
4241541Srgrimesint
425119634Ssamrip_output(struct mbuf *m, struct socket *so, u_long dst)
4261541Srgrimes{
427119634Ssam	struct ip *ip;
428130024Sbmilekic	int error;
429119634Ssam	struct inpcb *inp = sotoinpcb(so);
430134793Sjmg	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
431134793Sjmg	    IP_ALLOWBROADCAST;
4321541Srgrimes
4331541Srgrimes	/*
434180305Srwatson	 * If the user handed us a complete IP packet, use it.  Otherwise,
435180305Srwatson	 * allocate an mbuf for a header and fill it in.
4361541Srgrimes	 */
4371541Srgrimes	if ((inp->inp_flags & INP_HDRINCL) == 0) {
43819183Sfenner		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
43919183Sfenner			m_freem(m);
44019183Sfenner			return(EMSGSIZE);
44119183Sfenner		}
442243882Sglebius		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
443119489Srwatson		if (m == NULL)
444119489Srwatson			return(ENOBUFS);
445130024Sbmilekic
446178377Srwatson		INP_RLOCK(inp);
4471541Srgrimes		ip = mtod(m, struct ip *);
44874024Sru		ip->ip_tos = inp->inp_ip_tos;
449150594Sandre		if (inp->inp_flags & INP_DONTFRAG)
450241913Sglebius			ip->ip_off = htons(IP_DF);
451150594Sandre		else
452241913Sglebius			ip->ip_off = htons(0);
45324570Sdg		ip->ip_p = inp->inp_ip_p;
454241913Sglebius		ip->ip_len = htons(m->m_pkthdr.len);
455188144Sjamie		ip->ip_src = inp->inp_laddr;
456266718Ssmh		ip->ip_dst.s_addr = dst;
457207277Sbz		if (jailed(inp->inp_cred)) {
458207277Sbz			/*
459207277Sbz			 * prison_local_ip4() would be good enough but would
460207277Sbz			 * let a source of INADDR_ANY pass, which we do not
461266718Ssmh			 * want to see from jails.
462207277Sbz			 */
463266718Ssmh			if (ip->ip_src.s_addr == INADDR_ANY) {
464266718Ssmh				error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
465266718Ssmh				    inp->inp_cred);
466266718Ssmh			} else {
467207277Sbz				error = prison_local_ip4(inp->inp_cred,
468207277Sbz				    &ip->ip_src);
469266718Ssmh			}
470207277Sbz			if (error != 0) {
471207277Sbz				INP_RUNLOCK(inp);
472207277Sbz				m_freem(m);
473207277Sbz				return (error);
474207277Sbz			}
475185435Sbz		}
47674024Sru		ip->ip_ttl = inp->inp_ip_ttl;
4771541Srgrimes	} else {
47819183Sfenner		if (m->m_pkthdr.len > IP_MAXPACKET) {
47919183Sfenner			m_freem(m);
48019183Sfenner			return(EMSGSIZE);
48119183Sfenner		}
482178377Srwatson		INP_RLOCK(inp);
4831541Srgrimes		ip = mtod(m, struct ip *);
484188144Sjamie		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
485188144Sjamie		if (error != 0) {
486185435Sbz			INP_RUNLOCK(inp);
487185435Sbz			m_freem(m);
488188144Sjamie			return (error);
489128664Sbmilekic		}
490180305Srwatson
491180305Srwatson		/*
492180305Srwatson		 * Don't allow both user specified and setsockopt options,
493180305Srwatson		 * and don't allow packet length sizes that will crash.
494180305Srwatson		 */
495180305Srwatson		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
49626008Sfenner		    || (ip->ip_len > m->m_pkthdr.len)
497105586Sphk		    || (ip->ip_len < (ip->ip_hl << 2))) {
498178377Srwatson			INP_RUNLOCK(inp);
49914611Spst			m_freem(m);
500180305Srwatson			return (EINVAL);
50114611Spst		}
5021541Srgrimes		if (ip->ip_id == 0)
503133720Sdwmalone			ip->ip_id = ip_newid();
504180305Srwatson
505180305Srwatson		/*
506241923Sglebius		 * Applications on raw sockets pass us packets
507241923Sglebius		 * in host byte order.
508241913Sglebius		 */
509241913Sglebius		ip->ip_len = htons(ip->ip_len);
510241913Sglebius		ip->ip_off = htons(ip->ip_off);
511241913Sglebius
512241913Sglebius		/*
513180305Srwatson		 * XXX prevent ip_output from overwriting header fields.
514180305Srwatson		 */
5151541Srgrimes		flags |= IP_RAWOUTPUT;
516190951Srwatson		IPSTAT_INC(ips_rawout);
5171541Srgrimes	}
51855009Sshin
519162071Sandre	if (inp->inp_flags & INP_ONESBCAST)
520119178Sbms		flags |= IP_SENDONES;
521119178Sbms
522130024Sbmilekic#ifdef MAC
523172930Srwatson	mac_inpcb_create_mbuf(inp, m);
524130024Sbmilekic#endif
525130024Sbmilekic
526130024Sbmilekic	error = ip_output(m, inp->inp_options, NULL, flags,
527130024Sbmilekic	    inp->inp_moptions, inp);
528178377Srwatson	INP_RUNLOCK(inp);
529180305Srwatson	return (error);
5301541Srgrimes}
5311541Srgrimes
5321541Srgrimes/*
5331541Srgrimes * Raw IP socket option processing.
534117737Srwatson *
535136440Srwatson * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
536136440Srwatson * only be created by a privileged process, and as such, socket option
537136440Srwatson * operations to manage system properties on any raw socket were allowed to
538136440Srwatson * take place without explicit additional access control checks.  However,
539136440Srwatson * raw sockets can now also be created in jail(), and therefore explicit
540136440Srwatson * checks are now required.  Likewise, raw sockets can be used by a process
541136440Srwatson * after it gives up privilege, so some caution is required.  For options
542136440Srwatson * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
543136440Srwatson * performed in ip_ctloutput() and therefore no check occurs here.
544171157Srwatson * Unilaterally checking priv_check() here breaks normal IP socket option
545136440Srwatson * operations on raw sockets.
546136440Srwatson *
547136440Srwatson * When adding new socket options here, make sure to add access control
548136440Srwatson * checks here as necessary.
549229265Sbz *
550229265Sbz * XXX-BZ inp locking?
5511541Srgrimes */
5521541Srgrimesint
553119634Ssamrip_ctloutput(struct socket *so, struct sockopt *sopt)
5541541Srgrimes{
55538482Swollman	struct	inpcb *inp = sotoinpcb(so);
55638482Swollman	int	error, optval;
5571541Srgrimes
558185101Sjulian	if (sopt->sopt_level != IPPROTO_IP) {
559185101Sjulian		if ((sopt->sopt_level == SOL_SOCKET) &&
560185101Sjulian		    (sopt->sopt_name == SO_SETFIB)) {
561185101Sjulian			inp->inp_inc.inc_fibnum = so->so_fibnum;
562185101Sjulian			return (0);
563185101Sjulian		}
5641541Srgrimes		return (EINVAL);
565185101Sjulian	}
5661541Srgrimes
56738482Swollman	error = 0;
56838482Swollman	switch (sopt->sopt_dir) {
56938482Swollman	case SOPT_GET:
57038482Swollman		switch (sopt->sopt_name) {
57138482Swollman		case IP_HDRINCL:
57238482Swollman			optval = inp->inp_flags & INP_HDRINCL;
57338482Swollman			error = sooptcopyout(sopt, &optval, sizeof optval);
57438482Swollman			break;
57538482Swollman
576200034Sluigi		case IP_FW3:	/* generic ipfw v.3 functions */
57786047Sluigi		case IP_FW_ADD:	/* ADD actually returns the body... */
57838482Swollman		case IP_FW_GET:
579130281Sru		case IP_FW_TABLE_GETSIZE:
580130281Sru		case IP_FW_TABLE_LIST:
581165648Spiso		case IP_FW_NAT_GET_CONFIG:
582165648Spiso		case IP_FW_NAT_GET_LOG:
583197952Sjulian			if (V_ip_fw_ctl_ptr != NULL)
584197952Sjulian				error = V_ip_fw_ctl_ptr(sopt);
58586047Sluigi			else
58638482Swollman				error = ENOPROTOOPT;
58738482Swollman			break;
58838482Swollman
589200034Sluigi		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
59041793Sluigi		case IP_DUMMYNET_GET:
591133920Sandre			if (ip_dn_ctl_ptr != NULL)
59286047Sluigi				error = ip_dn_ctl_ptr(sopt);
59386047Sluigi			else
59484516Sps				error = ENOPROTOOPT;
59541793Sluigi			break ;
59638482Swollman
59738482Swollman		case MRT_INIT:
59838482Swollman		case MRT_DONE:
59938482Swollman		case MRT_ADD_VIF:
60038482Swollman		case MRT_DEL_VIF:
60138482Swollman		case MRT_ADD_MFC:
60238482Swollman		case MRT_DEL_MFC:
60338482Swollman		case MRT_VERSION:
60438482Swollman		case MRT_ASSERT:
605118622Shsu		case MRT_API_SUPPORT:
606118622Shsu		case MRT_API_CONFIG:
607118622Shsu		case MRT_ADD_BW_UPCALL:
608118622Shsu		case MRT_DEL_BW_UPCALL:
609164033Srwatson			error = priv_check(curthread, PRIV_NETINET_MROUTE);
610136440Srwatson			if (error != 0)
611136440Srwatson				return (error);
612106968Sluigi			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
613106968Sluigi				EOPNOTSUPP;
61438482Swollman			break;
61538482Swollman
61638482Swollman		default:
61738482Swollman			error = ip_ctloutput(so, sopt);
61838482Swollman			break;
61938482Swollman		}
62038482Swollman		break;
62138482Swollman
62238482Swollman	case SOPT_SET:
62338482Swollman		switch (sopt->sopt_name) {
62438482Swollman		case IP_HDRINCL:
62538482Swollman			error = sooptcopyin(sopt, &optval, sizeof optval,
62638482Swollman					    sizeof optval);
62738482Swollman			if (error)
62838482Swollman				break;
62938482Swollman			if (optval)
63010944Swollman				inp->inp_flags |= INP_HDRINCL;
63110944Swollman			else
63210944Swollman				inp->inp_flags &= ~INP_HDRINCL;
63338482Swollman			break;
6341541Srgrimes
635200034Sluigi		case IP_FW3:	/* generic ipfw v.3 functions */
63686910Sru		case IP_FW_ADD:
63738482Swollman		case IP_FW_DEL:
63838482Swollman		case IP_FW_FLUSH:
63938482Swollman		case IP_FW_ZERO:
64049350Sgreen		case IP_FW_RESETLOG:
641130281Sru		case IP_FW_TABLE_ADD:
642130281Sru		case IP_FW_TABLE_DEL:
643130281Sru		case IP_FW_TABLE_FLUSH:
644165648Spiso		case IP_FW_NAT_CFG:
645165648Spiso		case IP_FW_NAT_DEL:
646197952Sjulian			if (V_ip_fw_ctl_ptr != NULL)
647197952Sjulian				error = V_ip_fw_ctl_ptr(sopt);
64886047Sluigi			else
64938482Swollman				error = ENOPROTOOPT;
65038482Swollman			break;
65117758Ssos
652200034Sluigi		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
65341793Sluigi		case IP_DUMMYNET_CONFIGURE:
65441793Sluigi		case IP_DUMMYNET_DEL:
65541793Sluigi		case IP_DUMMYNET_FLUSH:
656133920Sandre			if (ip_dn_ctl_ptr != NULL)
65786047Sluigi				error = ip_dn_ctl_ptr(sopt);
65886047Sluigi			else
65941793Sluigi				error = ENOPROTOOPT ;
66041793Sluigi			break ;
6615543Sugen
66238482Swollman		case IP_RSVP_ON:
663164033Srwatson			error = priv_check(curthread, PRIV_NETINET_MROUTE);
664136440Srwatson			if (error != 0)
665136440Srwatson				return (error);
66638482Swollman			error = ip_rsvp_init(so);
66738482Swollman			break;
66817758Ssos
66938482Swollman		case IP_RSVP_OFF:
670164033Srwatson			error = priv_check(curthread, PRIV_NETINET_MROUTE);
671136440Srwatson			if (error != 0)
672136440Srwatson				return (error);
67338482Swollman			error = ip_rsvp_done();
67438482Swollman			break;
6752531Swollman
67638482Swollman		case IP_RSVP_VIF_ON:
67738482Swollman		case IP_RSVP_VIF_OFF:
678164033Srwatson			error = priv_check(curthread, PRIV_NETINET_MROUTE);
679136440Srwatson			if (error != 0)
680136440Srwatson				return (error);
681106968Sluigi			error = ip_rsvp_vif ?
682106968Sluigi				ip_rsvp_vif(so, sopt) : EINVAL;
68338482Swollman			break;
6842531Swollman
68538482Swollman		case MRT_INIT:
68638482Swollman		case MRT_DONE:
68738482Swollman		case MRT_ADD_VIF:
68838482Swollman		case MRT_DEL_VIF:
68938482Swollman		case MRT_ADD_MFC:
69038482Swollman		case MRT_DEL_MFC:
69138482Swollman		case MRT_VERSION:
69238482Swollman		case MRT_ASSERT:
693118622Shsu		case MRT_API_SUPPORT:
694118622Shsu		case MRT_API_CONFIG:
695118622Shsu		case MRT_ADD_BW_UPCALL:
696118622Shsu		case MRT_DEL_BW_UPCALL:
697164033Srwatson			error = priv_check(curthread, PRIV_NETINET_MROUTE);
698136440Srwatson			if (error != 0)
699136440Srwatson				return (error);
700106968Sluigi			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
701106968Sluigi					EOPNOTSUPP;
70238482Swollman			break;
7039209Swollman
70438482Swollman		default:
70538482Swollman			error = ip_ctloutput(so, sopt);
70638482Swollman			break;
70738482Swollman		}
70838482Swollman		break;
70938482Swollman	}
7109209Swollman
71138482Swollman	return (error);
7121541Srgrimes}
7131541Srgrimes
71422672Swollman/*
715180305Srwatson * This function exists solely to receive the PRC_IFDOWN messages which are
716180305Srwatson * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
717180305Srwatson * in_ifadown() to remove all routes corresponding to that address.  It also
718180305Srwatson * receives the PRC_IFUP messages from if_up() and reinstalls the interface
719180305Srwatson * routes.
72022672Swollman */
72122672Swollmanvoid
722119634Ssamrip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
72322672Swollman{
72422672Swollman	struct in_ifaddr *ia;
72522672Swollman	struct ifnet *ifp;
72622672Swollman	int err;
72722672Swollman	int flags;
72822672Swollman
72938482Swollman	switch (cmd) {
73022672Swollman	case PRC_IFDOWN:
731194951Srwatson		IN_IFADDR_RLOCK();
732181803Sbz		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
73322672Swollman			if (ia->ia_ifa.ifa_addr == sa
73422672Swollman			    && (ia->ia_flags & IFA_ROUTE)) {
735194951Srwatson				ifa_ref(&ia->ia_ifa);
736194951Srwatson				IN_IFADDR_RUNLOCK();
73722672Swollman				/*
73822672Swollman				 * in_ifscrub kills the interface route.
73922672Swollman				 */
740222143Sqingli				in_ifscrub(ia->ia_ifp, ia, 0);
74122672Swollman				/*
742180305Srwatson				 * in_ifadown gets rid of all the rest of the
743180305Srwatson				 * routes.  This is not quite the right thing
744180305Srwatson				 * to do, but at least if we are running a
745180305Srwatson				 * routing process they will come back.
74622672Swollman				 */
74776469Sru				in_ifadown(&ia->ia_ifa, 0);
748194951Srwatson				ifa_free(&ia->ia_ifa);
74922672Swollman				break;
75022672Swollman			}
75122672Swollman		}
752194951Srwatson		if (ia == NULL)		/* If ia matched, already unlocked. */
753194951Srwatson			IN_IFADDR_RUNLOCK();
75422672Swollman		break;
75522672Swollman
75622672Swollman	case PRC_IFUP:
757194951Srwatson		IN_IFADDR_RLOCK();
758181803Sbz		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
75922672Swollman			if (ia->ia_ifa.ifa_addr == sa)
76022672Swollman				break;
76122672Swollman		}
762194951Srwatson		if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
763194951Srwatson			IN_IFADDR_RUNLOCK();
76422672Swollman			return;
765194951Srwatson		}
766194951Srwatson		ifa_ref(&ia->ia_ifa);
767194951Srwatson		IN_IFADDR_RUNLOCK();
76822672Swollman		flags = RTF_UP;
76922672Swollman		ifp = ia->ia_ifa.ifa_ifp;
77022672Swollman
77122672Swollman		if ((ifp->if_flags & IFF_LOOPBACK)
77222672Swollman		    || (ifp->if_flags & IFF_POINTOPOINT))
77322672Swollman			flags |= RTF_HOST;
77422672Swollman
775222143Sqingli		err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
776222143Sqingli		if (err == 0)
777222143Sqingli			ia->ia_flags &= ~IFA_RTSELF;
778222143Sqingli
77922672Swollman		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
78022672Swollman		if (err == 0)
78122672Swollman			ia->ia_flags |= IFA_ROUTE;
782222143Sqingli
783197227Sqingli		err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
784212502Sqingli		if (err == 0)
785212502Sqingli			ia->ia_flags |= IFA_RTSELF;
786222143Sqingli
787194951Srwatson		ifa_free(&ia->ia_ifa);
78822672Swollman		break;
78922672Swollman	}
79022672Swollman}
79122672Swollman
79222900Swollmanstatic int
79383366Sjulianrip_attach(struct socket *so, int proto, struct thread *td)
7941541Srgrimes{
79522900Swollman	struct inpcb *inp;
796119634Ssam	int error;
79711603Sdg
79822900Swollman	inp = sotoinpcb(so);
799157374Srwatson	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
800170587Srwatson
801170587Srwatson	error = priv_check(td, PRIV_NETINET_RAW);
802164033Srwatson	if (error)
803180305Srwatson		return (error);
804157374Srwatson	if (proto >= IPPROTO_MAX || proto < 0)
805106152Sfenner		return EPROTONOSUPPORT;
80655009Sshin	error = soreserve(so, rip_sendspace, rip_recvspace);
807157374Srwatson	if (error)
808180305Srwatson		return (error);
809181803Sbz	INP_INFO_WLOCK(&V_ripcbinfo);
810181803Sbz	error = in_pcballoc(so, &V_ripcbinfo);
811119634Ssam	if (error) {
812181803Sbz		INP_INFO_WUNLOCK(&V_ripcbinfo);
813180305Srwatson		return (error);
814119634Ssam	}
81522900Swollman	inp = (struct inpcb *)so->so_pcb;
81655009Sshin	inp->inp_vflag |= INP_IPV4;
81724570Sdg	inp->inp_ip_p = proto;
818181803Sbz	inp->inp_ip_ttl = V_ip_defttl;
819180828Smav	rip_inshash(inp);
820181803Sbz	INP_INFO_WUNLOCK(&V_ripcbinfo);
821178285Srwatson	INP_WUNLOCK(inp);
822180305Srwatson	return (0);
82322900Swollman}
8241541Srgrimes
825122324Ssamstatic void
826160549Srwatsonrip_detach(struct socket *so)
827122324Ssam{
828160549Srwatson	struct inpcb *inp;
829146858Srwatson
830160549Srwatson	inp = sotoinpcb(so);
831160549Srwatson	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
832160549Srwatson	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
833160549Srwatson	    ("rip_detach: not closed"));
834122324Ssam
835181803Sbz	INP_INFO_WLOCK(&V_ripcbinfo);
836178285Srwatson	INP_WLOCK(inp);
837180828Smav	rip_delhash(inp);
838181803Sbz	if (so == V_ip_mrouter && ip_mrouter_done)
839122324Ssam		ip_mrouter_done();
840122324Ssam	if (ip_rsvp_force_done)
841122324Ssam		ip_rsvp_force_done(so);
842181803Sbz	if (so == V_ip_rsvpd)
843122324Ssam		ip_rsvp_done();
844122324Ssam	in_pcbdetach(inp);
845157374Srwatson	in_pcbfree(inp);
846181803Sbz	INP_INFO_WUNLOCK(&V_ripcbinfo);
847122324Ssam}
848122324Ssam
849157370Srwatsonstatic void
850160549Srwatsonrip_dodisconnect(struct socket *so, struct inpcb *inp)
85122900Swollman{
852222488Srwatson	struct inpcbinfo *pcbinfo;
853180874Smav
854222488Srwatson	pcbinfo = inp->inp_pcbinfo;
855222488Srwatson	INP_INFO_WLOCK(pcbinfo);
856222488Srwatson	INP_WLOCK(inp);
857180828Smav	rip_delhash(inp);
858160549Srwatson	inp->inp_faddr.s_addr = INADDR_ANY;
859180828Smav	rip_inshash(inp);
860160549Srwatson	SOCK_LOCK(so);
861160549Srwatson	so->so_state &= ~SS_ISCONNECTED;
862160549Srwatson	SOCK_UNLOCK(so);
863222488Srwatson	INP_WUNLOCK(inp);
864222488Srwatson	INP_INFO_WUNLOCK(pcbinfo);
865160549Srwatson}
866160549Srwatson
867160549Srwatsonstatic void
868160549Srwatsonrip_abort(struct socket *so)
869160549Srwatson{
87022900Swollman	struct inpcb *inp;
8711541Srgrimes
872157374Srwatson	inp = sotoinpcb(so);
873160549Srwatson	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
874160549Srwatson
875160549Srwatson	rip_dodisconnect(so, inp);
87622900Swollman}
8771541Srgrimes
878157366Srwatsonstatic void
879160549Srwatsonrip_close(struct socket *so)
88022900Swollman{
881122324Ssam	struct inpcb *inp;
882122324Ssam
883157374Srwatson	inp = sotoinpcb(so);
884160549Srwatson	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
885160549Srwatson
886160549Srwatson	rip_dodisconnect(so, inp);
88722900Swollman}
8881541Srgrimes
88922900Swollmanstatic int
89022900Swollmanrip_disconnect(struct socket *so)
89122900Swollman{
892158588Smaxim	struct inpcb *inp;
893158588Smaxim
89497658Stanimura	if ((so->so_state & SS_ISCONNECTED) == 0)
895180305Srwatson		return (ENOTCONN);
896158588Smaxim
897158588Smaxim	inp = sotoinpcb(so);
898158588Smaxim	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
899180305Srwatson
900160549Srwatson	rip_dodisconnect(so, inp);
901157374Srwatson	return (0);
90222900Swollman}
9031541Srgrimes
90422900Swollmanstatic int
90583366Sjulianrip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
90622900Swollman{
90728270Swollman	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
908122324Ssam	struct inpcb *inp;
909188144Sjamie	int error;
9101541Srgrimes
91128270Swollman	if (nam->sa_len != sizeof(*addr))
912180305Srwatson		return (EINVAL);
9131541Srgrimes
914188144Sjamie	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
915188144Sjamie	if (error != 0)
916188144Sjamie		return (error);
917128664Sbmilekic
918193217Spjd	inp = sotoinpcb(so);
919193217Spjd	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
920193217Spjd
921181803Sbz	if (TAILQ_EMPTY(&V_ifnet) ||
922122324Ssam	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
923107113Sluigi	    (addr->sin_addr.s_addr &&
924193217Spjd	     (inp->inp_flags & INP_BINDANY) == 0 &&
925194622Srwatson	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
926180305Srwatson		return (EADDRNOTAVAIL);
927122324Ssam
928181803Sbz	INP_INFO_WLOCK(&V_ripcbinfo);
929178285Srwatson	INP_WLOCK(inp);
930180828Smav	rip_delhash(inp);
93122900Swollman	inp->inp_laddr = addr->sin_addr;
932180828Smav	rip_inshash(inp);
933178285Srwatson	INP_WUNLOCK(inp);
934181803Sbz	INP_INFO_WUNLOCK(&V_ripcbinfo);
935180305Srwatson	return (0);
93622900Swollman}
9371541Srgrimes
93822900Swollmanstatic int
93983366Sjulianrip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
94022900Swollman{
94128270Swollman	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
942122324Ssam	struct inpcb *inp;
9431541Srgrimes
94428270Swollman	if (nam->sa_len != sizeof(*addr))
945180305Srwatson		return (EINVAL);
946181803Sbz	if (TAILQ_EMPTY(&V_ifnet))
947180305Srwatson		return (EADDRNOTAVAIL);
948122324Ssam	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
949180305Srwatson		return (EAFNOSUPPORT);
950122324Ssam
951157374Srwatson	inp = sotoinpcb(so);
952157374Srwatson	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
953180305Srwatson
954181803Sbz	INP_INFO_WLOCK(&V_ripcbinfo);
955178285Srwatson	INP_WLOCK(inp);
956180828Smav	rip_delhash(inp);
95722900Swollman	inp->inp_faddr = addr->sin_addr;
958180828Smav	rip_inshash(inp);
95922900Swollman	soisconnected(so);
960178285Srwatson	INP_WUNLOCK(inp);
961181803Sbz	INP_INFO_WUNLOCK(&V_ripcbinfo);
962180305Srwatson	return (0);
96322900Swollman}
9641541Srgrimes
96522900Swollmanstatic int
96622900Swollmanrip_shutdown(struct socket *so)
96722900Swollman{
968122324Ssam	struct inpcb *inp;
969122324Ssam
970122324Ssam	inp = sotoinpcb(so);
971157374Srwatson	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
972180305Srwatson
973178285Srwatson	INP_WLOCK(inp);
97422900Swollman	socantsendmore(so);
975178285Srwatson	INP_WUNLOCK(inp);
976180305Srwatson	return (0);
97722900Swollman}
9781541Srgrimes
97922900Swollmanstatic int
98028270Swollmanrip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
981169454Srwatson    struct mbuf *control, struct thread *td)
98222900Swollman{
983122324Ssam	struct inpcb *inp;
984122324Ssam	u_long dst;
9851541Srgrimes
986122324Ssam	inp = sotoinpcb(so);
987157374Srwatson	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
988180305Srwatson
989157374Srwatson	/*
990157374Srwatson	 * Note: 'dst' reads below are unlocked.
991157374Srwatson	 */
99222900Swollman	if (so->so_state & SS_ISCONNECTED) {
99322900Swollman		if (nam) {
99422900Swollman			m_freem(m);
995180305Srwatson			return (EISCONN);
99622900Swollman		}
997157374Srwatson		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
99822900Swollman	} else {
99922900Swollman		if (nam == NULL) {
100022900Swollman			m_freem(m);
1001180305Srwatson			return (ENOTCONN);
100222900Swollman		}
100328270Swollman		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
10041541Srgrimes	}
1005180305Srwatson	return (rip_output(m, so, dst));
10061541Srgrimes}
1007220880Sbz#endif /* INET */
100822900Swollman
100936079Swollmanstatic int
101062573Sphkrip_pcblist(SYSCTL_HANDLER_ARGS)
101136079Swollman{
1012119634Ssam	int error, i, n;
101336079Swollman	struct inpcb *inp, **inp_list;
101436079Swollman	inp_gen_t gencnt;
101536079Swollman	struct xinpgen xig;
101636079Swollman
101736079Swollman	/*
101836079Swollman	 * The process of preparing the TCB list is too time-consuming and
101936079Swollman	 * resource-intensive to repeat twice on every request.
102036079Swollman	 */
102136079Swollman	if (req->oldptr == 0) {
1022181803Sbz		n = V_ripcbinfo.ipi_count;
1023211433Sjhb		n += imax(n / 8, 10);
1024211433Sjhb		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
1025180305Srwatson		return (0);
102636079Swollman	}
102736079Swollman
102836079Swollman	if (req->newptr != 0)
1029180305Srwatson		return (EPERM);
103036079Swollman
103136079Swollman	/*
103236079Swollman	 * OK, now we're committed to doing something.
103336079Swollman	 */
1034181803Sbz	INP_INFO_RLOCK(&V_ripcbinfo);
1035181803Sbz	gencnt = V_ripcbinfo.ipi_gencnt;
1036181803Sbz	n = V_ripcbinfo.ipi_count;
1037181803Sbz	INP_INFO_RUNLOCK(&V_ripcbinfo);
103836079Swollman
103936079Swollman	xig.xig_len = sizeof xig;
104036079Swollman	xig.xig_count = n;
104136079Swollman	xig.xig_gen = gencnt;
104236079Swollman	xig.xig_sogen = so_gencnt;
104336079Swollman	error = SYSCTL_OUT(req, &xig, sizeof xig);
104436079Swollman	if (error)
1045180305Srwatson		return (error);
104636079Swollman
1047111119Simp	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
104836079Swollman	if (inp_list == 0)
1049180305Srwatson		return (ENOMEM);
1050178285Srwatson
1051181803Sbz	INP_INFO_RLOCK(&V_ripcbinfo);
1052181803Sbz	for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
105371999Sphk	     inp = LIST_NEXT(inp, inp_list)) {
1054205251Sbz		INP_WLOCK(inp);
1055120181Ssam		if (inp->inp_gencnt <= gencnt &&
1056183982Sbz		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
1057205251Sbz			in_pcbref(inp);
105836079Swollman			inp_list[i++] = inp;
105984527Sps		}
1060205251Sbz		INP_WUNLOCK(inp);
106136079Swollman	}
1062181803Sbz	INP_INFO_RUNLOCK(&V_ripcbinfo);
106336079Swollman	n = i;
106436079Swollman
106536079Swollman	error = 0;
106636079Swollman	for (i = 0; i < n; i++) {
106736079Swollman		inp = inp_list[i];
1068178377Srwatson		INP_RLOCK(inp);
106936079Swollman		if (inp->inp_gencnt <= gencnt) {
107036079Swollman			struct xinpcb xi;
1071186164Skmacy
1072145953Scperciva			bzero(&xi, sizeof(xi));
107336079Swollman			xi.xi_len = sizeof xi;
107436079Swollman			/* XXX should avoid extra copy */
107536079Swollman			bcopy(inp, &xi.xi_inp, sizeof *inp);
107636079Swollman			if (inp->inp_socket)
107736079Swollman				sotoxsocket(inp->inp_socket, &xi.xi_socket);
1078178377Srwatson			INP_RUNLOCK(inp);
107936079Swollman			error = SYSCTL_OUT(req, &xi, sizeof xi);
1080160491Sups		} else
1081178377Srwatson			INP_RUNLOCK(inp);
108236079Swollman	}
1083205251Sbz	INP_INFO_WLOCK(&V_ripcbinfo);
1084205251Sbz	for (i = 0; i < n; i++) {
1085205251Sbz		inp = inp_list[i];
1086222488Srwatson		INP_RLOCK(inp);
1087222488Srwatson		if (!in_pcbrele_rlocked(inp))
1088222488Srwatson			INP_RUNLOCK(inp);
1089205251Sbz	}
1090205251Sbz	INP_INFO_WUNLOCK(&V_ripcbinfo);
1091205251Sbz
109236079Swollman	if (!error) {
109336079Swollman		/*
1094180305Srwatson		 * Give the user an updated idea of our state.  If the
1095180305Srwatson		 * generation differs from what we told her before, she knows
1096180305Srwatson		 * that something happened while we were processing this
1097180305Srwatson		 * request, and it might be necessary to retry.
109836079Swollman		 */
1099181803Sbz		INP_INFO_RLOCK(&V_ripcbinfo);
1100181803Sbz		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
110136079Swollman		xig.xig_sogen = so_gencnt;
1102181803Sbz		xig.xig_count = V_ripcbinfo.ipi_count;
1103181803Sbz		INP_INFO_RUNLOCK(&V_ripcbinfo);
110436079Swollman		error = SYSCTL_OUT(req, &xig, sizeof xig);
110536079Swollman	}
110636079Swollman	free(inp_list, M_TEMP);
1107180305Srwatson	return (error);
110836079Swollman}
110936079Swollman
1110217554SmdfSYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
1111217554Smdf    CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
1112180305Srwatson    rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
111336079Swollman
1114220880Sbz#ifdef INET
111522900Swollmanstruct pr_usrreqs rip_usrreqs = {
1116137386Sphk	.pru_abort =		rip_abort,
1117137386Sphk	.pru_attach =		rip_attach,
1118137386Sphk	.pru_bind =		rip_bind,
1119137386Sphk	.pru_connect =		rip_connect,
1120137386Sphk	.pru_control =		in_control,
1121137386Sphk	.pru_detach =		rip_detach,
1122137386Sphk	.pru_disconnect =	rip_disconnect,
1123169462Srwatson	.pru_peeraddr =		in_getpeeraddr,
1124137386Sphk	.pru_send =		rip_send,
1125137386Sphk	.pru_shutdown =		rip_shutdown,
1126169462Srwatson	.pru_sockaddr =		in_getsockaddr,
1127160549Srwatson	.pru_sosetlabel =	in_pcbsosetlabel,
1128160549Srwatson	.pru_close =		rip_close,
112922900Swollman};
1130220880Sbz#endif /* INET */
1131