1147549Simp/*-
2147549Simp * Copyright (c) 1989 Stephen Deering
3147549Simp * Copyright (c) 1992, 1993
4147549Simp *      The Regents of the University of California.  All rights reserved.
5147549Simp *
6147549Simp * This code is derived from software contributed to Berkeley by
7147549Simp * Stephen Deering of Stanford University.
8147549Simp *
9147549Simp * Redistribution and use in source and binary forms, with or without
10147549Simp * modification, are permitted provided that the following conditions
11147549Simp * are met:
12147549Simp * 1. Redistributions of source code must retain the above copyright
13147549Simp *    notice, this list of conditions and the following disclaimer.
14147549Simp * 2. Redistributions in binary form must reproduce the above copyright
15147549Simp *    notice, this list of conditions and the following disclaimer in the
16147549Simp *    documentation and/or other materials provided with the distribution.
17147549Simp * 4. Neither the name of the University nor the names of its contributors
18147549Simp *    may be used to endorse or promote products derived from this software
19147549Simp *    without specific prior written permission.
20147549Simp *
21147549Simp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22147549Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23147549Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24147549Simp * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25147549Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26147549Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27147549Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28147549Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29147549Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30147549Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31147549Simp * SUCH DAMAGE.
32147549Simp *
33147549Simp *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
34147549Simp */
35147549Simp
361541Srgrimes/*
372531Swollman * IP multicast forwarding procedures
381541Srgrimes *
391541Srgrimes * Written by David Waitzman, BBN Labs, August 1988.
401541Srgrimes * Modified by Steve Deering, Stanford, February 1989.
412531Swollman * Modified by Mark J. Steiglitz, Stanford, May, 1991
422531Swollman * Modified by Van Jacobson, LBL, January 1993
432531Swollman * Modified by Ajit Thyagarajan, PARC, August 1993
449209Swollman * Modified by Bill Fenner, PARC, April 1995
45118622Shsu * Modified by Ahmed Helmy, SGI, June 1996
46118622Shsu * Modified by George Edmond Eddy (Rusty), ISI, February 1998
47118622Shsu * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
48118622Shsu * Modified by Hitoshi Asaeda, WIDE, August 2000
49118622Shsu * Modified by Pavlin Radoslavov, ICSI, October 2002
501541Srgrimes *
519209Swollman * MROUTING Revision: 3.5
52118622Shsu * and PIM-SMv2 and PIM-DM support, advanced API support,
53118622Shsu * bandwidth metering and signaling
541541Srgrimes */
551541Srgrimes
56190012Sbms/*
57190012Sbms * TODO: Prefix functions with ipmf_.
58190012Sbms * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
59190012Sbms * domain attachment (if_afdata) so we can track consumers of that service.
60190012Sbms * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
61190012Sbms * move it to socket options.
62190012Sbms * TODO: Cleanup LSRR removal further.
63190012Sbms * TODO: Push RSVP stubs into raw_ip.c.
64190012Sbms * TODO: Use bitstring.h for vif set.
65190012Sbms * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
66190012Sbms * TODO: Sync ip6_mroute.c with this file.
67190012Sbms */
68190012Sbms
69172467Ssilby#include <sys/cdefs.h>
70172467Ssilby__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_mroute.c 314667 2017-03-04 13:03:31Z avg $");
71172467Ssilby
72166938Sbms#include "opt_inet.h"
7314328Speter#include "opt_mrouting.h"
741541Srgrimes
75118622Shsu#define _PIM_VT 1
76118622Shsu
771541Srgrimes#include <sys/param.h>
7895759Stanimura#include <sys/kernel.h>
79190148Sbms#include <sys/stddef.h>
8095759Stanimura#include <sys/lock.h>
81190054Sbms#include <sys/ktr.h>
8242777Sfenner#include <sys/malloc.h>
831541Srgrimes#include <sys/mbuf.h>
84129880Sphk#include <sys/module.h>
85164033Srwatson#include <sys/priv.h>
8695759Stanimura#include <sys/protosw.h>
8795759Stanimura#include <sys/signalvar.h>
881541Srgrimes#include <sys/socket.h>
891541Srgrimes#include <sys/socketvar.h>
9095759Stanimura#include <sys/sockio.h>
9195759Stanimura#include <sys/sx.h>
9280354Sfenner#include <sys/sysctl.h>
932531Swollman#include <sys/syslog.h>
9495759Stanimura#include <sys/systm.h>
9595759Stanimura#include <sys/time.h>
96253084Sae#include <sys/counter.h>
97190012Sbms
981541Srgrimes#include <net/if.h>
99111888Sjlemon#include <net/netisr.h>
1001541Srgrimes#include <net/route.h>
101196019Srwatson#include <net/vnet.h>
102190012Sbms
1031541Srgrimes#include <netinet/in.h>
10495759Stanimura#include <netinet/igmp.h>
1051541Srgrimes#include <netinet/in_systm.h>
10695759Stanimura#include <netinet/in_var.h>
1071541Srgrimes#include <netinet/ip.h>
10880354Sfenner#include <netinet/ip_encap.h>
1091541Srgrimes#include <netinet/ip_mroute.h>
11095759Stanimura#include <netinet/ip_var.h>
111152592Sandre#include <netinet/ip_options.h>
112118622Shsu#include <netinet/pim.h>
113118622Shsu#include <netinet/pim_var.h>
1149209Swollman#include <netinet/udp.h>
115185571Sbz
11660214Sken#include <machine/in_cksum.h>
1171541Srgrimes
118190054Sbms#ifndef KTR_IPMF
119191660Sbms#define KTR_IPMF KTR_INET
120190054Sbms#endif
1212531Swollman
122118622Shsu#define		VIFI_INVALID	((vifi_t) -1)
123190012Sbms#define		M_HASCL(m)	((m)->m_flags & M_EXT)
124118622Shsu
125215701Sdimstatic VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
126208744Szec#define	V_last_tv_sec	VNET(last_tv_sec)
127208744Szec
128190012Sbmsstatic MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
1299209Swollman
130119792Ssam/*
131119792Ssam * Locking.  We use two locks: one for the virtual interface table and
132119792Ssam * one for the forwarding table.  These locks may be nested in which case
133119792Ssam * the VIF lock must always be taken first.  Note that each lock is used
134119792Ssam * to cover not only the specific data structure but also related data
135190012Sbms * structures.
136119792Ssam */
137119792Ssam
138167116Sbmsstatic struct mtx mrouter_mtx;
139167116Sbms#define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
140167116Sbms#define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
141171744Srwatson#define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
142190012Sbms#define	MROUTER_LOCK_INIT()						\
143167116Sbms	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
144167116Sbms#define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
145167116Sbms
146208744Szecstatic int ip_mrouter_cnt;	/* # of vnets with active mrouters */
147208744Szecstatic int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
148208744Szec
149253084Saestatic VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
150253084SaeVNET_PCPUSTAT_SYSINIT(mrtstat);
151253084SaeVNET_PCPUSTAT_SYSUNINIT(mrtstat);
152253084SaeSYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
153253084Sae    mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
154190012Sbms    "netinet/ip_mroute.h)");
155190012Sbms
156215701Sdimstatic VNET_DEFINE(u_long, mfchash);
157208744Szec#define	V_mfchash		VNET(mfchash)
158208744Szec#define	MFCHASH(a, g)							\
159190012Sbms	((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
160208744Szec	  ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash)
161208744Szec#define	MFCHASHSIZE	256
162190012Sbms
163208744Szecstatic u_long mfchashsize;			/* Hash size */
164215701Sdimstatic VNET_DEFINE(u_char *, nexpire);		/* 0..mfchashsize-1 */
165208744Szec#define	V_nexpire		VNET(nexpire)
166215701Sdimstatic VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
167208744Szec#define	V_mfchashtbl		VNET(mfchashtbl)
168190012Sbms
169119792Ssamstatic struct mtx mfc_mtx;
170190012Sbms#define	MFC_LOCK()		mtx_lock(&mfc_mtx)
171190012Sbms#define	MFC_UNLOCK()		mtx_unlock(&mfc_mtx)
172171744Srwatson#define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
173190012Sbms#define	MFC_LOCK_INIT()							\
174190012Sbms	mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
175119792Ssam#define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
176119792Ssam
177215701Sdimstatic VNET_DEFINE(vifi_t, numvifs);
178208744Szec#define	V_numvifs		VNET(numvifs)
179215701Sdimstatic VNET_DEFINE(struct vif, viftable[MAXVIFS]);
180208744Szec#define	V_viftable		VNET(viftable)
181208744SzecSYSCTL_VNET_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD,
182208744Szec    &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
183190012Sbms    "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
1842531Swollman
185119792Ssamstatic struct mtx vif_mtx;
186190012Sbms#define	VIF_LOCK()		mtx_lock(&vif_mtx)
187190012Sbms#define	VIF_UNLOCK()		mtx_unlock(&vif_mtx)
188119792Ssam#define	VIF_LOCK_ASSERT()	mtx_assert(&vif_mtx, MA_OWNED)
189190012Sbms#define	VIF_LOCK_INIT()							\
190190012Sbms	mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
191119792Ssam#define	VIF_LOCK_DESTROY()	mtx_destroy(&vif_mtx)
192119792Ssam
193162719Sbmsstatic eventhandler_tag if_detach_event_tag = NULL;
194162719Sbms
195215701Sdimstatic VNET_DEFINE(struct callout, expire_upcalls_ch);
196208744Szec#define	V_expire_upcalls_ch	VNET(expire_upcalls_ch)
197208744Szec
1989209Swollman#define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
1999209Swollman#define		UPCALL_EXPIRE	6		/* number of timeouts	*/
2002531Swollman
2012531Swollman/*
202118622Shsu * Bandwidth meter variables and constants
203118622Shsu */
204118622Shsustatic MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
205118622Shsu/*
206118622Shsu * Pending timeouts are stored in a hash table, the key being the
207118622Shsu * expiration time. Periodically, the entries are analysed and processed.
208118622Shsu */
209208744Szec#define	BW_METER_BUCKETS	1024
210215701Sdimstatic VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
211208744Szec#define	V_bw_meter_timers	VNET(bw_meter_timers)
212215701Sdimstatic VNET_DEFINE(struct callout, bw_meter_ch);
213208744Szec#define	V_bw_meter_ch		VNET(bw_meter_ch)
214208744Szec#define	BW_METER_PERIOD (hz)		/* periodical handling of bw meters */
215118622Shsu
216118622Shsu/*
217118622Shsu * Pending upcalls are stored in a vector which is flushed when
218118622Shsu * full, or periodically
219118622Shsu */
220215701Sdimstatic VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
221208744Szec#define	V_bw_upcalls		VNET(bw_upcalls)
222215701Sdimstatic VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
223208744Szec#define	V_bw_upcalls_n    	VNET(bw_upcalls_n)
224215701Sdimstatic VNET_DEFINE(struct callout, bw_upcalls_ch);
225208744Szec#define	V_bw_upcalls_ch		VNET(bw_upcalls_ch)
226208744Szec
227118622Shsu#define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
228118622Shsu
229253084Saestatic VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
230253084SaeVNET_PCPUSTAT_SYSINIT(pimstat);
231253084SaeVNET_PCPUSTAT_SYSUNINIT(pimstat);
232166622Sbms
233166622SbmsSYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
234253084SaeSYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
235253084Sae    pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
236118622Shsu
237166623Sbmsstatic u_long	pim_squelch_wholepkt = 0;
238166623SbmsSYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
239166623Sbms    &pim_squelch_wholepkt, 0,
240166623Sbms    "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
241166623Sbms
242166622Sbmsextern  struct domain inetdomain;
243190012Sbmsstatic const struct protosw in_pim_protosw = {
244166622Sbms	.pr_type =		SOCK_RAW,
245166622Sbms	.pr_domain =		&inetdomain,
246166622Sbms	.pr_protocol =		IPPROTO_PIM,
247166622Sbms	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
248166622Sbms	.pr_input =		pim_input,
249166622Sbms	.pr_output =		(pr_output_t*)rip_output,
250166622Sbms	.pr_ctloutput =		rip_ctloutput,
251166622Sbms	.pr_usrreqs =		&rip_usrreqs
252166622Sbms};
253166622Sbmsstatic const struct encaptab *pim_encap_cookie;
254166938Sbms
255166622Sbmsstatic int pim_encapcheck(const struct mbuf *, int, int, void *);
256166622Sbms
257118622Shsu/*
258118622Shsu * Note: the PIM Register encapsulation adds the following in front of a
259118622Shsu * data packet:
260118622Shsu *
261118622Shsu * struct pim_encap_hdr {
262118622Shsu *    struct ip ip;
263118622Shsu *    struct pim_encap_pimhdr  pim;
264118622Shsu * }
265118622Shsu *
266118622Shsu */
267118622Shsu
268118622Shsustruct pim_encap_pimhdr {
269118622Shsu	struct pim pim;
270118622Shsu	uint32_t   flags;
271118622Shsu};
272190012Sbms#define		PIM_ENCAP_TTL	64
273118622Shsu
274118622Shsustatic struct ip pim_encap_iphdr = {
275118622Shsu#if BYTE_ORDER == LITTLE_ENDIAN
276118622Shsu	sizeof(struct ip) >> 2,
277118622Shsu	IPVERSION,
278118622Shsu#else
279118622Shsu	IPVERSION,
280118622Shsu	sizeof(struct ip) >> 2,
281118622Shsu#endif
282118622Shsu	0,			/* tos */
283118622Shsu	sizeof(struct ip),	/* total length */
284118622Shsu	0,			/* id */
285133874Srwatson	0,			/* frag offset */
286190012Sbms	PIM_ENCAP_TTL,
287118622Shsu	IPPROTO_PIM,
288118622Shsu	0,			/* checksum */
289118622Shsu};
290118622Shsu
291118622Shsustatic struct pim_encap_pimhdr pim_encap_pimhdr = {
292118622Shsu    {
293118622Shsu	PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */
294118622Shsu	0,			/* reserved */
295118622Shsu	0,			/* checksum */
296118622Shsu    },
297118622Shsu    0				/* flags */
298118622Shsu};
299118622Shsu
300215701Sdimstatic VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
301208744Szec#define	V_reg_vif_num		VNET(reg_vif_num)
302215701Sdimstatic VNET_DEFINE(struct ifnet, multicast_register_if);
303208744Szec#define	V_multicast_register_if	VNET(multicast_register_if)
304118622Shsu
305118622Shsu/*
3061541Srgrimes * Private variables.
3071541Srgrimes */
3081541Srgrimes
309190012Sbmsstatic u_long	X_ip_mcast_src(int);
310190012Sbmsstatic int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
311190012Sbms		    struct ip_moptions *);
31292723Salfredstatic int	X_ip_mrouter_done(void);
313190012Sbmsstatic int	X_ip_mrouter_get(struct socket *, struct sockopt *);
314190012Sbmsstatic int	X_ip_mrouter_set(struct socket *, struct sockopt *);
315190012Sbmsstatic int	X_legal_vif_num(int);
316194581Srdivackystatic int	X_mrt_ioctl(u_long, caddr_t, int);
31712579Sbde
318190012Sbmsstatic int	add_bw_upcall(struct bw_upcall *);
319190012Sbmsstatic int	add_mfc(struct mfcctl2 *);
320190012Sbmsstatic int	add_vif(struct vifctl *);
321190012Sbmsstatic void	bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
322190012Sbmsstatic void	bw_meter_process(void);
323190012Sbmsstatic void	bw_meter_receive_packet(struct bw_meter *, int,
324190012Sbms		    struct timeval *);
325190012Sbmsstatic void	bw_upcalls_send(void);
326190012Sbmsstatic int	del_bw_upcall(struct bw_upcall *);
327190012Sbmsstatic int	del_mfc(struct mfcctl2 *);
328190012Sbmsstatic int	del_vif(vifi_t);
329190012Sbmsstatic int	del_vif_locked(vifi_t);
330190012Sbmsstatic void	expire_bw_meter_process(void *);
331190012Sbmsstatic void	expire_bw_upcalls_send(void *);
332190012Sbmsstatic void	expire_mfc(struct mfc *);
333190012Sbmsstatic void	expire_upcalls(void *);
334190012Sbmsstatic void	free_bw_list(struct bw_meter *);
335190012Sbmsstatic int	get_sg_cnt(struct sioc_sg_req *);
336190012Sbmsstatic int	get_vif_cnt(struct sioc_vif_req *);
337190012Sbmsstatic void	if_detached_event(void *, struct ifnet *);
338190012Sbmsstatic int	ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
339190012Sbmsstatic int	ip_mrouter_init(struct socket *, int);
340190012Sbmsstatic __inline struct mfc *
341190012Sbms		mfc_find(struct in_addr *, struct in_addr *);
342190012Sbmsstatic void	phyint_send(struct ip *, struct vif *, struct mbuf *);
343190012Sbmsstatic struct mbuf *
344190012Sbms		pim_register_prepare(struct ip *, struct mbuf *);
345190012Sbmsstatic int	pim_register_send(struct ip *, struct vif *,
346190012Sbms		    struct mbuf *, struct mfc *);
347190012Sbmsstatic int	pim_register_send_rp(struct ip *, struct vif *,
348190012Sbms		    struct mbuf *, struct mfc *);
349190012Sbmsstatic int	pim_register_send_upcall(struct ip *, struct vif *,
350190012Sbms		    struct mbuf *, struct mfc *);
351190012Sbmsstatic void	schedule_bw_meter(struct bw_meter *, struct timeval *);
352190012Sbmsstatic void	send_packet(struct vif *, struct mbuf *);
353190012Sbmsstatic int	set_api_config(uint32_t *);
354190012Sbmsstatic int	set_assert(int);
355190012Sbmsstatic int	socket_send(struct socket *, struct mbuf *,
356190012Sbms		    struct sockaddr_in *);
357190012Sbmsstatic void	unschedule_bw_meter(struct bw_meter *);
3582531Swollman
3592531Swollman/*
360190012Sbms * Kernel multicast forwarding API capabilities and setup.
361118622Shsu * If more API capabilities are added to the kernel, they should be
362118622Shsu * recorded in `mrt_api_support'.
363118622Shsu */
364190012Sbms#define MRT_API_VERSION		0x0305
365190012Sbms
366190012Sbmsstatic const int mrt_api_version = MRT_API_VERSION;
367118622Shsustatic const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
368118622Shsu					 MRT_MFC_FLAGS_BORDER_VIF |
369118622Shsu					 MRT_MFC_RP |
370118622Shsu					 MRT_MFC_BW_UPCALL);
371215701Sdimstatic VNET_DEFINE(uint32_t, mrt_api_config);
372208744Szec#define	V_mrt_api_config	VNET(mrt_api_config)
373215701Sdimstatic VNET_DEFINE(int, pim_assert_enabled);
374208744Szec#define	V_pim_assert_enabled	VNET(pim_assert_enabled)
375190012Sbmsstatic struct timeval pim_assert_interval = { 3, 0 };	/* Rate limit */
3762531Swollman
3772531Swollman/*
378190012Sbms * Find a route for a given origin IP address and multicast group address.
379190012Sbms * Statistics must be updated by the caller.
3802531Swollman */
381190012Sbmsstatic __inline struct mfc *
382190012Sbmsmfc_find(struct in_addr *o, struct in_addr *g)
383106968Sluigi{
384190012Sbms	struct mfc *rt;
3859209Swollman
386190012Sbms	MFC_LOCK_ASSERT();
387119792Ssam
388208744Szec	LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
389190012Sbms		if (in_hosteq(rt->mfc_origin, *o) &&
390190012Sbms		    in_hosteq(rt->mfc_mcastgrp, *g) &&
391190012Sbms		    TAILQ_EMPTY(&rt->mfc_stall))
392190012Sbms			break;
393190012Sbms	}
3942531Swollman
395190012Sbms	return (rt);
3962531Swollman}
3972531Swollman
3982531Swollman/*
399190012Sbms * Handle MRT setsockopt commands to modify the multicast forwarding tables.
4001541Srgrimes */
40112296Sphkstatic int
402106968SluigiX_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
4031541Srgrimes{
404106968Sluigi    int	error, optval;
405106968Sluigi    vifi_t	vifi;
406106968Sluigi    struct	vifctl vifc;
407118622Shsu    struct	mfcctl2 mfc;
408118622Shsu    struct	bw_upcall bw_upcall;
409118622Shsu    uint32_t	i;
4101541Srgrimes
411181803Sbz    if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT)
412106968Sluigi	return EPERM;
41338482Swollman
414106968Sluigi    error = 0;
415106968Sluigi    switch (sopt->sopt_name) {
416106968Sluigi    case MRT_INIT:
417106968Sluigi	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
418106968Sluigi	if (error)
419106968Sluigi	    break;
420106968Sluigi	error = ip_mrouter_init(so, optval);
421106968Sluigi	break;
42238482Swollman
423106968Sluigi    case MRT_DONE:
424106968Sluigi	error = ip_mrouter_done();
425106968Sluigi	break;
42638482Swollman
427106968Sluigi    case MRT_ADD_VIF:
428106968Sluigi	error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
429106968Sluigi	if (error)
430106968Sluigi	    break;
431106968Sluigi	error = add_vif(&vifc);
432106968Sluigi	break;
43338482Swollman
434106968Sluigi    case MRT_DEL_VIF:
435106968Sluigi	error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
436106968Sluigi	if (error)
437106968Sluigi	    break;
438106968Sluigi	error = del_vif(vifi);
439106968Sluigi	break;
44038482Swollman
441106968Sluigi    case MRT_ADD_MFC:
442106968Sluigi    case MRT_DEL_MFC:
443118622Shsu	/*
444118622Shsu	 * select data size depending on API version.
445118622Shsu	 */
446118622Shsu	if (sopt->sopt_name == MRT_ADD_MFC &&
447208744Szec		V_mrt_api_config & MRT_API_FLAGS_ALL) {
448118622Shsu	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2),
449118622Shsu				sizeof(struct mfcctl2));
450118622Shsu	} else {
451118622Shsu	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl),
452118622Shsu				sizeof(struct mfcctl));
453118622Shsu	    bzero((caddr_t)&mfc + sizeof(struct mfcctl),
454118622Shsu			sizeof(mfc) - sizeof(struct mfcctl));
455118622Shsu	}
456106968Sluigi	if (error)
457106968Sluigi	    break;
458106968Sluigi	if (sopt->sopt_name == MRT_ADD_MFC)
459106968Sluigi	    error = add_mfc(&mfc);
460106968Sluigi	else
461106968Sluigi	    error = del_mfc(&mfc);
462106968Sluigi	break;
46338482Swollman
464106968Sluigi    case MRT_ASSERT:
465106968Sluigi	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
466106968Sluigi	if (error)
467106968Sluigi	    break;
468106968Sluigi	set_assert(optval);
469106968Sluigi	break;
47038482Swollman
471118622Shsu    case MRT_API_CONFIG:
472118622Shsu	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
473118622Shsu	if (!error)
474118622Shsu	    error = set_api_config(&i);
475118622Shsu	if (!error)
476118622Shsu	    error = sooptcopyout(sopt, &i, sizeof i);
477118622Shsu	break;
478118622Shsu
479118622Shsu    case MRT_ADD_BW_UPCALL:
480118622Shsu    case MRT_DEL_BW_UPCALL:
481118622Shsu	error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall,
482118622Shsu				sizeof bw_upcall);
483118622Shsu	if (error)
484118622Shsu	    break;
485118622Shsu	if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
486118622Shsu	    error = add_bw_upcall(&bw_upcall);
487118622Shsu	else
488118622Shsu	    error = del_bw_upcall(&bw_upcall);
489118622Shsu	break;
490118622Shsu
491106968Sluigi    default:
492106968Sluigi	error = EOPNOTSUPP;
493106968Sluigi	break;
494106968Sluigi    }
495106968Sluigi    return error;
4962531Swollman}
4971541Srgrimes
4982531Swollman/*
4999209Swollman * Handle MRT getsockopt commands
5009209Swollman */
50112296Sphkstatic int
502106968SluigiX_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
5039209Swollman{
504106968Sluigi    int error;
5059209Swollman
506106968Sluigi    switch (sopt->sopt_name) {
507106968Sluigi    case MRT_VERSION:
508190012Sbms	error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
509106968Sluigi	break;
5109209Swollman
511106968Sluigi    case MRT_ASSERT:
512208744Szec	error = sooptcopyout(sopt, &V_pim_assert_enabled,
513208744Szec	    sizeof V_pim_assert_enabled);
514106968Sluigi	break;
515106968Sluigi
516118622Shsu    case MRT_API_SUPPORT:
517118622Shsu	error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support);
518118622Shsu	break;
519118622Shsu
520118622Shsu    case MRT_API_CONFIG:
521208744Szec	error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config);
522118622Shsu	break;
523118622Shsu
524106968Sluigi    default:
525106968Sluigi	error = EOPNOTSUPP;
526106968Sluigi	break;
527106968Sluigi    }
528106968Sluigi    return error;
5299209Swollman}
5309209Swollman
5319209Swollman/*
5322531Swollman * Handle ioctl commands to obtain information from the cache
5332531Swollman */
53412296Sphkstatic int
535194581SrdivackyX_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
5362531Swollman{
5372531Swollman    int error = 0;
5381541Srgrimes
539134122Scsjp    /*
540134122Scsjp     * Currently the only function calling this ioctl routine is rtioctl().
541134122Scsjp     * Typically, only root can create the raw socket in order to execute
542134122Scsjp     * this ioctl method, however the request might be coming from a prison
543134122Scsjp     */
544164033Srwatson    error = priv_check(curthread, PRIV_NETINET_MROUTE);
545134122Scsjp    if (error)
546134122Scsjp	return (error);
5472531Swollman    switch (cmd) {
548106968Sluigi    case (SIOCGETVIFCNT):
549106968Sluigi	error = get_vif_cnt((struct sioc_vif_req *)data);
550106968Sluigi	break;
551106968Sluigi
552106968Sluigi    case (SIOCGETSGCNT):
553106968Sluigi	error = get_sg_cnt((struct sioc_sg_req *)data);
554106968Sluigi	break;
555106968Sluigi
556106968Sluigi    default:
557106968Sluigi	error = EINVAL;
558106968Sluigi	break;
5592531Swollman    }
5602531Swollman    return error;
5612531Swollman}
5621541Srgrimes
5632531Swollman/*
5649209Swollman * returns the packet, byte, rpf-failure count for the source group provided
5652531Swollman */
5669209Swollmanstatic int
567106968Sluigiget_sg_cnt(struct sioc_sg_req *req)
5682531Swollman{
569106968Sluigi    struct mfc *rt;
5701541Srgrimes
571119792Ssam    MFC_LOCK();
572190012Sbms    rt = mfc_find(&req->src, &req->grp);
573106968Sluigi    if (rt == NULL) {
574119792Ssam	MFC_UNLOCK();
5759209Swollman	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
576106968Sluigi	return EADDRNOTAVAIL;
577106968Sluigi    }
578106968Sluigi    req->pktcnt = rt->mfc_pkt_cnt;
579106968Sluigi    req->bytecnt = rt->mfc_byte_cnt;
580106968Sluigi    req->wrong_if = rt->mfc_wrong_if;
581119792Ssam    MFC_UNLOCK();
5822531Swollman    return 0;
5832531Swollman}
5841541Srgrimes
5852531Swollman/*
5869209Swollman * returns the input and output packet and byte counts on the vif provided
5872531Swollman */
5889209Swollmanstatic int
589106968Sluigiget_vif_cnt(struct sioc_vif_req *req)
5902531Swollman{
591106968Sluigi    vifi_t vifi = req->vifi;
5921541Srgrimes
593119792Ssam    VIF_LOCK();
594208744Szec    if (vifi >= V_numvifs) {
595119792Ssam	VIF_UNLOCK();
596106968Sluigi	return EINVAL;
597119792Ssam    }
5989209Swollman
599208744Szec    req->icount = V_viftable[vifi].v_pkt_in;
600208744Szec    req->ocount = V_viftable[vifi].v_pkt_out;
601208744Szec    req->ibytes = V_viftable[vifi].v_bytes_in;
602208744Szec    req->obytes = V_viftable[vifi].v_bytes_out;
603119792Ssam    VIF_UNLOCK();
6041541Srgrimes
6052531Swollman    return 0;
6062531Swollman}
6072531Swollman
608121446Ssamstatic void
609162719Sbmsif_detached_event(void *arg __unused, struct ifnet *ifp)
610162719Sbms{
611162719Sbms    vifi_t vifi;
612255248Sjhb    u_long i;
613162719Sbms
614167116Sbms    MROUTER_LOCK();
615190012Sbms
616181803Sbz    if (V_ip_mrouter == NULL) {
617167116Sbms	MROUTER_UNLOCK();
618190012Sbms	return;
619162719Sbms    }
620162719Sbms
621190012Sbms    VIF_LOCK();
622190012Sbms    MFC_LOCK();
623190012Sbms
624162719Sbms    /*
625162719Sbms     * Tear down multicast forwarder state associated with this ifnet.
626162719Sbms     * 1. Walk the vif list, matching vifs against this ifnet.
627162719Sbms     * 2. Walk the multicast forwarding cache (mfc) looking for
628162719Sbms     *    inner matches with this vif's index.
629190012Sbms     * 3. Expire any matching multicast forwarding cache entries.
630190012Sbms     * 4. Free vif state. This should disable ALLMULTI on the interface.
631162719Sbms     */
632208744Szec    for (vifi = 0; vifi < V_numvifs; vifi++) {
633208744Szec	if (V_viftable[vifi].v_ifp != ifp)
634162719Sbms		continue;
635190012Sbms	for (i = 0; i < mfchashsize; i++) {
636190012Sbms		struct mfc *rt, *nrt;
637255249Sjhb
638255249Sjhb		LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
639190012Sbms			if (rt->mfc_parent == vifi) {
640190012Sbms				expire_mfc(rt);
641190012Sbms			}
642162719Sbms		}
643162719Sbms	}
644162719Sbms	del_vif_locked(vifi);
645162719Sbms    }
646190012Sbms
647162719Sbms    MFC_UNLOCK();
648162719Sbms    VIF_UNLOCK();
649162719Sbms
650167116Sbms    MROUTER_UNLOCK();
651162719Sbms}
652162719Sbms
6531541Srgrimes/*
654190012Sbms * Enable multicast forwarding.
6551541Srgrimes */
65612296Sphkstatic int
657106968Sluigiip_mrouter_init(struct socket *so, int version)
6581541Srgrimes{
659183550Szec
660190054Sbms    CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__,
661190054Sbms        so->so_type, so->so_proto->pr_protocol);
6629209Swollman
663106968Sluigi    if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
664106968Sluigi	return EOPNOTSUPP;
6651541Srgrimes
66638482Swollman    if (version != 1)
6679209Swollman	return ENOPROTOOPT;
6689209Swollman
669167116Sbms    MROUTER_LOCK();
670123690Ssam
671208744Szec    if (ip_mrouter_unloading) {
672167116Sbms	MROUTER_UNLOCK();
673208744Szec	return ENOPROTOOPT;
674123690Ssam    }
6751541Srgrimes
676208744Szec    if (V_ip_mrouter != NULL) {
677167116Sbms	MROUTER_UNLOCK();
678208744Szec	return EADDRINUSE;
679166972Sbms    }
680162719Sbms
681208744Szec    V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash,
682208744Szec	HASH_NOWAIT);
683190012Sbms
684208744Szec    callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
685208744Szec	curvnet);
686208744Szec    callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
687208744Szec	curvnet);
688208744Szec    callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
689208744Szec	curvnet);
6909209Swollman
691181803Sbz    V_ip_mrouter = so;
692208744Szec    ip_mrouter_cnt++;
693119792Ssam
694167116Sbms    MROUTER_UNLOCK();
695123690Ssam
696190054Sbms    CTR1(KTR_IPMF, "%s: done", __func__);
6972531Swollman
6982531Swollman    return 0;
6991541Srgrimes}
7001541Srgrimes
7011541Srgrimes/*
702190012Sbms * Disable multicast forwarding.
7031541Srgrimes */
70412296Sphkstatic int
705106968SluigiX_ip_mrouter_done(void)
7061541Srgrimes{
707255235Sae    struct ifnet *ifp;
708255248Sjhb    u_long i;
7092531Swollman    vifi_t vifi;
7101541Srgrimes
711167116Sbms    MROUTER_LOCK();
712123690Ssam
713181803Sbz    if (V_ip_mrouter == NULL) {
714167116Sbms	MROUTER_UNLOCK();
715123690Ssam	return EINVAL;
716123690Ssam    }
717123690Ssam
718119792Ssam    /*
719119792Ssam     * Detach/disable hooks to the reset of the system.
720119792Ssam     */
721181803Sbz    V_ip_mrouter = NULL;
722208744Szec    ip_mrouter_cnt--;
723208744Szec    V_mrt_api_config = 0;
7241541Srgrimes
725121700Ssam    VIF_LOCK();
726190012Sbms
7272531Swollman    /*
7282531Swollman     * For each phyint in use, disable promiscuous reception of all IP
7292531Swollman     * multicasts.
7302531Swollman     */
731208744Szec    for (vifi = 0; vifi < V_numvifs; vifi++) {
732208744Szec	if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
733208744Szec		!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
734208744Szec	    ifp = V_viftable[vifi].v_ifp;
73521666Swollman	    if_allmulti(ifp, 0);
7362531Swollman	}
7372531Swollman    }
738208744Szec    bzero((caddr_t)V_viftable, sizeof(V_viftable));
739208744Szec    V_numvifs = 0;
740208744Szec    V_pim_assert_enabled = 0;
741208744Szec
742123690Ssam    VIF_UNLOCK();
743190012Sbms
744208744Szec    callout_stop(&V_expire_upcalls_ch);
745208744Szec    callout_stop(&V_bw_upcalls_ch);
746208744Szec    callout_stop(&V_bw_meter_ch);
7472531Swollman
748121700Ssam    MFC_LOCK();
74942777Sfenner
750190012Sbms    /*
751190012Sbms     * Free all multicast forwarding cache entries.
752190012Sbms     * Do not use hashdestroy(), as we must perform other cleanup.
753190012Sbms     */
754190012Sbms    for (i = 0; i < mfchashsize; i++) {
755190012Sbms	struct mfc *rt, *nrt;
756255249Sjhb
757255249Sjhb	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
758190012Sbms		expire_mfc(rt);
7591541Srgrimes	}
7609209Swollman    }
761208744Szec    free(V_mfchashtbl, M_MRTABLE);
762208744Szec    V_mfchashtbl = NULL;
763190012Sbms
764208744Szec    bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
765190012Sbms
766208744Szec    V_bw_upcalls_n = 0;
767208744Szec    bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
768190012Sbms
769123690Ssam    MFC_UNLOCK();
770118622Shsu
771208744Szec    V_reg_vif_num = VIFI_INVALID;
772118622Shsu
773167116Sbms    MROUTER_UNLOCK();
774123690Ssam
775190054Sbms    CTR1(KTR_IPMF, "%s: done", __func__);
7762531Swollman
7772531Swollman    return 0;
7781541Srgrimes}
7791541Srgrimes
7801541Srgrimes/*
7819209Swollman * Set PIM assert processing global
7829209Swollman */
7839209Swollmanstatic int
784106968Sluigiset_assert(int i)
7859209Swollman{
78638482Swollman    if ((i != 1) && (i != 0))
7879209Swollman	return EINVAL;
7889209Swollman
789208744Szec    V_pim_assert_enabled = i;
7909209Swollman
7919209Swollman    return 0;
7929209Swollman}
7939209Swollman
7949209Swollman/*
795118622Shsu * Configure API capabilities
796118622Shsu */
797118622Shsuint
798118622Shsuset_api_config(uint32_t *apival)
799118622Shsu{
800255248Sjhb    u_long i;
801118622Shsu
802118622Shsu    /*
803118622Shsu     * We can set the API capabilities only if it is the first operation
804118622Shsu     * after MRT_INIT. I.e.:
805118622Shsu     *  - there are no vifs installed
806118622Shsu     *  - pim_assert is not enabled
807118622Shsu     *  - the MFC table is empty
808118622Shsu     */
809208744Szec    if (V_numvifs > 0) {
810118622Shsu	*apival = 0;
811118622Shsu	return EPERM;
812118622Shsu    }
813208744Szec    if (V_pim_assert_enabled) {
814118622Shsu	*apival = 0;
815118622Shsu	return EPERM;
816118622Shsu    }
817190012Sbms
818190012Sbms    MFC_LOCK();
819190012Sbms
820190012Sbms    for (i = 0; i < mfchashsize; i++) {
821208744Szec	if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
822249559Sdelphij	    MFC_UNLOCK();
823118622Shsu	    *apival = 0;
824118622Shsu	    return EPERM;
825118622Shsu	}
826118622Shsu    }
827118622Shsu
828190012Sbms    MFC_UNLOCK();
829190012Sbms
830208744Szec    V_mrt_api_config = *apival & mrt_api_support;
831208744Szec    *apival = V_mrt_api_config;
832118622Shsu
833118622Shsu    return 0;
834118622Shsu}
835118622Shsu
836118622Shsu/*
8371541Srgrimes * Add a vif to the vif table
8381541Srgrimes */
8391541Srgrimesstatic int
840106968Sluigiadd_vif(struct vifctl *vifcp)
8411541Srgrimes{
842208744Szec    struct vif *vifp = V_viftable + vifcp->vifc_vifi;
843106968Sluigi    struct sockaddr_in sin = {sizeof sin, AF_INET};
8442531Swollman    struct ifaddr *ifa;
8452531Swollman    struct ifnet *ifp;
846119792Ssam    int error;
8471541Srgrimes
848119792Ssam    VIF_LOCK();
849119792Ssam    if (vifcp->vifc_vifi >= MAXVIFS) {
850119792Ssam	VIF_UNLOCK();
851106968Sluigi	return EINVAL;
852119792Ssam    }
853166575Sbms    /* rate limiting is no longer supported by this code */
854166575Sbms    if (vifcp->vifc_rate_limit != 0) {
855166575Sbms	log(LOG_ERR, "rate limiting is no longer supported\n");
856166575Sbms	VIF_UNLOCK();
857166575Sbms	return EINVAL;
858166575Sbms    }
859190012Sbms    if (!in_nullhost(vifp->v_lcl_addr)) {
860119792Ssam	VIF_UNLOCK();
861106968Sluigi	return EADDRINUSE;
862119792Ssam    }
863190012Sbms    if (in_nullhost(vifcp->vifc_lcl_addr)) {
864119792Ssam	VIF_UNLOCK();
865106968Sluigi	return EADDRNOTAVAIL;
866119792Ssam    }
8671541Srgrimes
8682531Swollman    /* Find the interface with an address in AF_INET family */
869118622Shsu    if (vifcp->vifc_flags & VIFF_REGISTER) {
870118622Shsu	/*
871118622Shsu	 * XXX: Because VIFF_REGISTER does not really need a valid
872118622Shsu	 * local interface (e.g. it could be 127.0.0.2), we don't
873118622Shsu	 * check its address.
874118622Shsu	 */
875118622Shsu	ifp = NULL;
876166622Sbms    } else {
877118622Shsu	sin.sin_addr = vifcp->vifc_lcl_addr;
878118622Shsu	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
879119792Ssam	if (ifa == NULL) {
880119792Ssam	    VIF_UNLOCK();
881118622Shsu	    return EADDRNOTAVAIL;
882119792Ssam	}
883118622Shsu	ifp = ifa->ifa_ifp;
884194760Srwatson	ifa_free(ifa);
885118622Shsu    }
8861541Srgrimes
887166549Sbms    if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
888190054Sbms	CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__);
889166549Sbms	VIF_UNLOCK();
890166549Sbms	return EOPNOTSUPP;
891118622Shsu    } else if (vifcp->vifc_flags & VIFF_REGISTER) {
892208744Szec	ifp = &V_multicast_register_if;
893190054Sbms	CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp);
894208744Szec	if (V_reg_vif_num == VIFI_INVALID) {
895208744Szec	    if_initname(&V_multicast_register_if, "register_vif", 0);
896208744Szec	    V_multicast_register_if.if_flags = IFF_LOOPBACK;
897208744Szec	    V_reg_vif_num = vifcp->vifc_vifi;
898118622Shsu	}
899106968Sluigi    } else {		/* Make sure the interface supports multicast */
900119792Ssam	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
901119792Ssam	    VIF_UNLOCK();
9022531Swollman	    return EOPNOTSUPP;
903119792Ssam	}
9041541Srgrimes
9052531Swollman	/* Enable promiscuous reception of all IP multicasts from the if */
90622967Swollman	error = if_allmulti(ifp, 1);
907119792Ssam	if (error) {
908119792Ssam	    VIF_UNLOCK();
9092531Swollman	    return error;
910119792Ssam	}
9112531Swollman    }
9121541Srgrimes
9132531Swollman    vifp->v_flags     = vifcp->vifc_flags;
9142531Swollman    vifp->v_threshold = vifcp->vifc_threshold;
9152531Swollman    vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
9162531Swollman    vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
9172531Swollman    vifp->v_ifp       = ifp;
9182531Swollman    /* initialize per vif pkt counters */
9192531Swollman    vifp->v_pkt_in    = 0;
9202531Swollman    vifp->v_pkt_out   = 0;
9219209Swollman    vifp->v_bytes_in  = 0;
9229209Swollman    vifp->v_bytes_out = 0;
9232531Swollman
9242531Swollman    /* Adjust numvifs up if the vifi is higher than numvifs */
925208744Szec    if (V_numvifs <= vifcp->vifc_vifi)
926208744Szec	V_numvifs = vifcp->vifc_vifi + 1;
9272531Swollman
928119792Ssam    VIF_UNLOCK();
929119792Ssam
930190054Sbms    CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__,
931190148Sbms	(int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr),
932190054Sbms	(int)vifcp->vifc_threshold);
9332531Swollman
9342531Swollman    return 0;
9351541Srgrimes}
9361541Srgrimes
9371541Srgrimes/*
9381541Srgrimes * Delete a vif from the vif table
9391541Srgrimes */
9401541Srgrimesstatic int
941162719Sbmsdel_vif_locked(vifi_t vifi)
9421541Srgrimes{
943106968Sluigi    struct vif *vifp;
9441541Srgrimes
945162719Sbms    VIF_LOCK_ASSERT();
946119792Ssam
947208744Szec    if (vifi >= V_numvifs) {
948106968Sluigi	return EINVAL;
949119792Ssam    }
950208744Szec    vifp = &V_viftable[vifi];
951190012Sbms    if (in_nullhost(vifp->v_lcl_addr)) {
952106968Sluigi	return EADDRNOTAVAIL;
953119792Ssam    }
9541541Srgrimes
955118622Shsu    if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
956106968Sluigi	if_allmulti(vifp->v_ifp, 0);
9571541Srgrimes
958118622Shsu    if (vifp->v_flags & VIFF_REGISTER)
959208744Szec	V_reg_vif_num = VIFI_INVALID;
960118622Shsu
9612531Swollman    bzero((caddr_t)vifp, sizeof (*vifp));
9621541Srgrimes
963190054Sbms    CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi);
96438482Swollman
9652531Swollman    /* Adjust numvifs down */
966208744Szec    for (vifi = V_numvifs; vifi > 0; vifi--)
967208744Szec	if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr))
968106968Sluigi	    break;
969208744Szec    V_numvifs = vifi;
9702531Swollman
971162719Sbms    return 0;
972162719Sbms}
973162719Sbms
974162719Sbmsstatic int
975162719Sbmsdel_vif(vifi_t vifi)
976162719Sbms{
977162719Sbms    int cc;
978162719Sbms
979162719Sbms    VIF_LOCK();
980162719Sbms    cc = del_vif_locked(vifi);
981119792Ssam    VIF_UNLOCK();
9822531Swollman
983162719Sbms    return cc;
9841541Srgrimes}
9851541Srgrimes
9861541Srgrimes/*
987106968Sluigi * update an mfc entry without resetting counters and S,G addresses.
988106968Sluigi */
989106968Sluigistatic void
990118622Shsuupdate_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
991106968Sluigi{
992106968Sluigi    int i;
993106968Sluigi
994106968Sluigi    rt->mfc_parent = mfccp->mfcc_parent;
995208744Szec    for (i = 0; i < V_numvifs; i++) {
996106968Sluigi	rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
997208744Szec	rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config &
998118622Shsu	    MRT_MFC_FLAGS_ALL;
999118622Shsu    }
1000118622Shsu    /* set the RP address */
1001208744Szec    if (V_mrt_api_config & MRT_MFC_RP)
1002118622Shsu	rt->mfc_rp = mfccp->mfcc_rp;
1003118622Shsu    else
1004118622Shsu	rt->mfc_rp.s_addr = INADDR_ANY;
1005106968Sluigi}
1006106968Sluigi
1007106968Sluigi/*
1008106968Sluigi * fully initialize an mfc entry from the parameter.
1009106968Sluigi */
1010106968Sluigistatic void
1011118622Shsuinit_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
1012106968Sluigi{
1013106968Sluigi    rt->mfc_origin     = mfccp->mfcc_origin;
1014106968Sluigi    rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
1015106968Sluigi
1016106968Sluigi    update_mfc_params(rt, mfccp);
1017106968Sluigi
1018106968Sluigi    /* initialize pkt counters per src-grp */
1019106968Sluigi    rt->mfc_pkt_cnt    = 0;
1020106968Sluigi    rt->mfc_byte_cnt   = 0;
1021106968Sluigi    rt->mfc_wrong_if   = 0;
1022190012Sbms    timevalclear(&rt->mfc_last_assert);
1023106968Sluigi}
1024106968Sluigi
1025190012Sbmsstatic void
1026190012Sbmsexpire_mfc(struct mfc *rt)
1027190012Sbms{
1028190012Sbms	struct rtdetq *rte, *nrte;
1029106968Sluigi
1030197148Sbms	MFC_LOCK_ASSERT();
1031197148Sbms
1032190012Sbms	free_bw_list(rt->mfc_bw_meter);
1033190012Sbms
1034190012Sbms	TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
1035190012Sbms		m_freem(rte->m);
1036190012Sbms		TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
1037190012Sbms		free(rte, M_MRTABLE);
1038190012Sbms	}
1039190012Sbms
1040190012Sbms	LIST_REMOVE(rt, mfc_hash);
1041190012Sbms	free(rt, M_MRTABLE);
1042190012Sbms}
1043190012Sbms
1044106968Sluigi/*
10452531Swollman * Add an mfc entry
10461541Srgrimes */
10471541Srgrimesstatic int
1048118622Shsuadd_mfc(struct mfcctl2 *mfccp)
10491541Srgrimes{
10502531Swollman    struct mfc *rt;
1051190012Sbms    struct rtdetq *rte, *nrte;
1052190012Sbms    u_long hash = 0;
1053106968Sluigi    u_short nstl;
10541541Srgrimes
1055119792Ssam    VIF_LOCK();
1056119792Ssam    MFC_LOCK();
1057119792Ssam
1058190012Sbms    rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
10591541Srgrimes
10602531Swollman    /* If an entry already exists, just update the fields */
10612531Swollman    if (rt) {
1062190054Sbms	CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x",
1063190148Sbms	    __func__, inet_ntoa(mfccp->mfcc_origin),
1064190054Sbms	    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
1065190054Sbms	    mfccp->mfcc_parent);
1066106968Sluigi	update_mfc_params(rt, mfccp);
1067119792Ssam	MFC_UNLOCK();
1068119792Ssam	VIF_UNLOCK();
1069190012Sbms	return (0);
10702531Swollman    }
10711541Srgrimes
1072133874Srwatson    /*
10732531Swollman     * Find the entry for which the upcall was made and update
10742531Swollman     */
1075190012Sbms    nstl = 0;
1076190012Sbms    hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
1077208744Szec    LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
1078190012Sbms	if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
1079190012Sbms	    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
1080190012Sbms	    !TAILQ_EMPTY(&rt->mfc_stall)) {
1081190054Sbms		CTR5(KTR_IPMF,
1082190054Sbms		    "%s: add mfc orig %s group %lx parent %x qh %p",
1083190148Sbms		    __func__, inet_ntoa(mfccp->mfcc_origin),
1084190054Sbms		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
1085190054Sbms		    mfccp->mfcc_parent,
1086190054Sbms		    TAILQ_FIRST(&rt->mfc_stall));
1087190054Sbms		if (nstl++)
1088190054Sbms			CTR1(KTR_IPMF, "%s: multiple matches", __func__);
10891541Srgrimes
1090190012Sbms		init_mfc_params(rt, mfccp);
1091190012Sbms		rt->mfc_expire = 0;	/* Don't clean this guy up */
1092208744Szec		V_nexpire[hash]--;
10931541Srgrimes
1094190012Sbms		/* Free queued packets, but attempt to forward them first. */
1095190012Sbms		TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
1096190012Sbms			if (rte->ifp != NULL)
1097190012Sbms				ip_mdq(rte->m, rte->ifp, rt, -1);
1098190012Sbms			m_freem(rte->m);
1099190012Sbms			TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
1100190012Sbms			rt->mfc_nstall--;
1101190012Sbms			free(rte, M_MRTABLE);
1102190012Sbms		}
11031541Srgrimes	}
11042531Swollman    }
11051541Srgrimes
11062531Swollman    /*
11072531Swollman     * It is possible that an entry is being inserted without an upcall
11082531Swollman     */
11092531Swollman    if (nstl == 0) {
1110190054Sbms	CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__);
1111208744Szec	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
1112190012Sbms		if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
1113190012Sbms		    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
1114190012Sbms			init_mfc_params(rt, mfccp);
1115190012Sbms			if (rt->mfc_expire)
1116208744Szec			    V_nexpire[hash]--;
1117190012Sbms			rt->mfc_expire = 0;
1118190012Sbms			break; /* XXX */
1119190012Sbms		}
11202531Swollman	}
1121190012Sbms
1122106968Sluigi	if (rt == NULL) {		/* no upcall, so make a new entry */
112342777Sfenner	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
112442777Sfenner	    if (rt == NULL) {
1125119792Ssam		MFC_UNLOCK();
1126119792Ssam		VIF_UNLOCK();
1127190012Sbms		return (ENOBUFS);
11282531Swollman	    }
1129133874Srwatson
1130106968Sluigi	    init_mfc_params(rt, mfccp);
1131190012Sbms	    TAILQ_INIT(&rt->mfc_stall);
1132190012Sbms	    rt->mfc_nstall = 0;
1133190012Sbms
11349209Swollman	    rt->mfc_expire     = 0;
1135190012Sbms	    rt->mfc_bw_meter = NULL;
1136133874Srwatson
1137106968Sluigi	    /* insert new entry at head of hash chain */
1138208744Szec	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
11392531Swollman	}
11402531Swollman    }
1141190012Sbms
1142119792Ssam    MFC_UNLOCK();
1143119792Ssam    VIF_UNLOCK();
1144190012Sbms
1145190012Sbms    return (0);
11461541Srgrimes}
11471541Srgrimes
11481541Srgrimes/*
11492531Swollman * Delete an mfc entry
11501541Srgrimes */
11511541Srgrimesstatic int
1152118622Shsudel_mfc(struct mfcctl2 *mfccp)
11531541Srgrimes{
1154133874Srwatson    struct in_addr	origin;
1155133874Srwatson    struct in_addr	mcastgrp;
1156133874Srwatson    struct mfc		*rt;
11571541Srgrimes
11582531Swollman    origin = mfccp->mfcc_origin;
11592531Swollman    mcastgrp = mfccp->mfcc_mcastgrp;
11601541Srgrimes
1161190054Sbms    CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__,
1162190054Sbms	inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr));
11631541Srgrimes
1164119792Ssam    MFC_LOCK();
11659209Swollman
1166190012Sbms    rt = mfc_find(&origin, &mcastgrp);
116742777Sfenner    if (rt == NULL) {
1168119792Ssam	MFC_UNLOCK();
11699209Swollman	return EADDRNOTAVAIL;
11702531Swollman    }
11711541Srgrimes
1172118622Shsu    /*
1173118622Shsu     * free the bw_meter entries
1174118622Shsu     */
1175190012Sbms    free_bw_list(rt->mfc_bw_meter);
1176118622Shsu    rt->mfc_bw_meter = NULL;
1177118622Shsu
1178190012Sbms    LIST_REMOVE(rt, mfc_hash);
117942777Sfenner    free(rt, M_MRTABLE);
11801541Srgrimes
1181119792Ssam    MFC_UNLOCK();
1182119792Ssam
1183190012Sbms    return (0);
11841541Srgrimes}
11851541Srgrimes
11861541Srgrimes/*
1187190012Sbms * Send a message to the routing daemon on the multicast routing socket.
11889209Swollman */
11899209Swollmanstatic int
1190106968Sluigisocket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
11919209Swollman{
1192106968Sluigi    if (s) {
1193131151Srwatson	SOCKBUF_LOCK(&s->so_rcv);
1194131151Srwatson	if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm,
1195131151Srwatson	    NULL) != 0) {
1196131151Srwatson	    sorwakeup_locked(s);
1197106968Sluigi	    return 0;
11989209Swollman	}
1199131151Srwatson	SOCKBUF_UNLOCK(&s->so_rcv);
1200106968Sluigi    }
1201106968Sluigi    m_freem(mm);
1202106968Sluigi    return -1;
12039209Swollman}
12049209Swollman
12059209Swollman/*
12062531Swollman * IP multicast forwarding function. This function assumes that the packet
12072531Swollman * pointed to by "ip" has arrived on (or is about to be sent to) the interface
12082531Swollman * pointed to by "ifp", and the packet is to be relayed to other networks
12092531Swollman * that have members of the packet's destination IP multicast group.
12102531Swollman *
12119209Swollman * The packet is returned unscathed to the caller, unless it is
12129209Swollman * erroneous, in which case a non-zero return value tells the caller to
12132531Swollman * discard it.
12141541Srgrimes */
12152531Swollman
12162531Swollman#define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
12172531Swollman
121812296Sphkstatic int
1219118622ShsuX_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
1220118622Shsu    struct ip_moptions *imo)
12211541Srgrimes{
1222106968Sluigi    struct mfc *rt;
1223119792Ssam    int error;
12249209Swollman    vifi_t vifi;
12251541Srgrimes
1226190054Sbms    CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p",
1227190148Sbms	inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp);
12281541Srgrimes
122980354Sfenner    if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
1230106968Sluigi		((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
12312531Swollman	/*
12329209Swollman	 * Packet arrived via a physical interface or
1233118622Shsu	 * an encapsulated tunnel or a register_vif.
12342531Swollman	 */
12352531Swollman    } else {
12362531Swollman	/*
12372531Swollman	 * Packet arrived through a source-route tunnel.
12389209Swollman	 * Source-route tunnels are no longer supported.
12392531Swollman	 */
1240190054Sbms	return (1);
12419209Swollman    }
12429209Swollman
1243119792Ssam    VIF_LOCK();
1244119792Ssam    MFC_LOCK();
1245208744Szec    if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) {
1246166629Sbms	if (ip->ip_ttl < MAXTTL)
1247106968Sluigi	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
1248119792Ssam	error = ip_mdq(m, ifp, NULL, vifi);
1249119792Ssam	MFC_UNLOCK();
1250119792Ssam	VIF_UNLOCK();
1251119792Ssam	return error;
12522531Swollman    }
12532531Swollman
12542531Swollman    /*
12552531Swollman     * Don't forward a packet with time-to-live of zero or one,
12562531Swollman     * or a packet destined to a local-only group.
12572531Swollman     */
1258167593Sbms    if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
1259119792Ssam	MFC_UNLOCK();
1260119792Ssam	VIF_UNLOCK();
12619209Swollman	return 0;
1262119792Ssam    }
12632531Swollman
12642531Swollman    /*
12652531Swollman     * Determine forwarding vifs from the forwarding cache table
12662531Swollman     */
1267190966Srwatson    MRTSTAT_INC(mrts_mfc_lookups);
1268190012Sbms    rt = mfc_find(&ip->ip_src, &ip->ip_dst);
12692531Swollman
12702531Swollman    /* Entry exists, so forward if necessary */
12712531Swollman    if (rt != NULL) {
1272119792Ssam	error = ip_mdq(m, ifp, rt, -1);
1273119792Ssam	MFC_UNLOCK();
1274119792Ssam	VIF_UNLOCK();
1275119792Ssam	return error;
12769209Swollman    } else {
12772531Swollman	/*
12782531Swollman	 * If we don't have a route for packet's origin,
1279106968Sluigi	 * Make a copy of the packet & send message to routing daemon
12802531Swollman	 */
12812531Swollman
1282106968Sluigi	struct mbuf *mb0;
1283106968Sluigi	struct rtdetq *rte;
1284106968Sluigi	u_long hash;
128514549Sfenner	int hlen = ip->ip_hl << 2;
12862531Swollman
1287190966Srwatson	MRTSTAT_INC(mrts_mfc_misses);
1288190966Srwatson	MRTSTAT_INC(mrts_no_route);
1289190054Sbms	CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)",
1290190054Sbms	    inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr));
12912531Swollman
12929209Swollman	/*
12939209Swollman	 * Allocate mbufs early so that we don't do extra work if we are
129414549Sfenner	 * just going to fail anyway.  Make sure to pullup the header so
129514549Sfenner	 * that other people can't step on it.
12969209Swollman	 */
1297190012Sbms	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE,
1298190012Sbms	    M_NOWAIT|M_ZERO);
129942777Sfenner	if (rte == NULL) {
1300119792Ssam	    MFC_UNLOCK();
1301119792Ssam	    VIF_UNLOCK();
13029209Swollman	    return ENOBUFS;
13039209Swollman	}
1304190012Sbms
1305243882Sglebius	mb0 = m_copypacket(m, M_NOWAIT);
130614549Sfenner	if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
130714549Sfenner	    mb0 = m_pullup(mb0, hlen);
13089209Swollman	if (mb0 == NULL) {
130942777Sfenner	    free(rte, M_MRTABLE);
1310119792Ssam	    MFC_UNLOCK();
1311119792Ssam	    VIF_UNLOCK();
13129209Swollman	    return ENOBUFS;
13139209Swollman	}
13149209Swollman
1315106968Sluigi	/* is there an upcall waiting for this flow ? */
1316190012Sbms	hash = MFCHASH(ip->ip_src, ip->ip_dst);
1317208744Szec	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
1318190012Sbms		if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
1319190012Sbms		    in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
1320190012Sbms		    !TAILQ_EMPTY(&rt->mfc_stall))
1321190012Sbms			break;
13222531Swollman	}
13232531Swollman
132442777Sfenner	if (rt == NULL) {
13259209Swollman	    int i;
13269209Swollman	    struct igmpmsg *im;
1327106968Sluigi	    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
1328106968Sluigi	    struct mbuf *mm;
13299209Swollman
1330106968Sluigi	    /*
1331106968Sluigi	     * Locate the vifi for the incoming interface for this packet.
1332106968Sluigi	     * If none found, drop packet.
1333106968Sluigi	     */
1334208744Szec	    for (vifi = 0; vifi < V_numvifs &&
1335208744Szec		    V_viftable[vifi].v_ifp != ifp; vifi++)
1336106968Sluigi		;
1337208744Szec	    if (vifi >= V_numvifs)	/* vif not found, drop packet */
1338106968Sluigi		goto non_fatal;
1339106968Sluigi
13402531Swollman	    /* no upcall, so make a new entry */
134142777Sfenner	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1342106968Sluigi	    if (rt == NULL)
1343106968Sluigi		goto fail;
1344190054Sbms
13459209Swollman	    /* Make a copy of the header to send to the user level process */
134617137Sfenner	    mm = m_copy(mb0, 0, hlen);
1347106968Sluigi	    if (mm == NULL)
1348106968Sluigi		goto fail1;
13492531Swollman
1350133874Srwatson	    /*
1351133874Srwatson	     * Send message to routing daemon to install
13529209Swollman	     * a route into the kernel table
13539209Swollman	     */
1354133874Srwatson
13559209Swollman	    im = mtod(mm, struct igmpmsg *);
1356106968Sluigi	    im->im_msgtype = IGMPMSG_NOCACHE;
1357106968Sluigi	    im->im_mbz = 0;
1358106968Sluigi	    im->im_vif = vifi;
13599209Swollman
1360190966Srwatson	    MRTSTAT_INC(mrts_upcalls);
13619209Swollman
1362106968Sluigi	    k_igmpsrc.sin_addr = ip->ip_src;
1363181803Sbz	    if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
1364190054Sbms		CTR0(KTR_IPMF, "ip_mforward: socket queue full");
1365190966Srwatson		MRTSTAT_INC(mrts_upq_sockfull);
1366106968Sluigifail1:
1367106968Sluigi		free(rt, M_MRTABLE);
1368106968Sluigifail:
136942777Sfenner		free(rte, M_MRTABLE);
13709266Swollman		m_freem(mb0);
1371119792Ssam		MFC_UNLOCK();
1372119792Ssam		VIF_UNLOCK();
13739209Swollman		return ENOBUFS;
13749209Swollman	    }
13759209Swollman
13762531Swollman	    /* insert new entry at head of hash chain */
13772531Swollman	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
13782531Swollman	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
13799209Swollman	    rt->mfc_expire	      = UPCALL_EXPIRE;
1380208744Szec	    V_nexpire[hash]++;
1381208744Szec	    for (i = 0; i < V_numvifs; i++) {
13829209Swollman		rt->mfc_ttls[i] = 0;
1383118622Shsu		rt->mfc_flags[i] = 0;
1384118622Shsu	    }
13859209Swollman	    rt->mfc_parent = -1;
13862531Swollman
1387190012Sbms	    /* clear the RP address */
1388190012Sbms	    rt->mfc_rp.s_addr = INADDR_ANY;
1389118622Shsu	    rt->mfc_bw_meter = NULL;
1390118622Shsu
1391201254Ssyrinx	    /* initialize pkt counters per src-grp */
1392201254Ssyrinx	    rt->mfc_pkt_cnt = 0;
1393201254Ssyrinx	    rt->mfc_byte_cnt = 0;
1394201254Ssyrinx	    rt->mfc_wrong_if = 0;
1395201254Ssyrinx	    timevalclear(&rt->mfc_last_assert);
1396201254Ssyrinx
1397201254Ssyrinx	    TAILQ_INIT(&rt->mfc_stall);
1398201254Ssyrinx	    rt->mfc_nstall = 0;
1399201254Ssyrinx
14002531Swollman	    /* link into table */
1401208744Szec	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
1402190012Sbms	    TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link);
1403190012Sbms	    rt->mfc_nstall++;
14042531Swollman
14059209Swollman	} else {
1406190012Sbms	    /* determine if queue has overflowed */
1407190012Sbms	    if (rt->mfc_nstall > MAX_UPQ) {
1408190966Srwatson		MRTSTAT_INC(mrts_upq_ovflw);
1409106968Sluiginon_fatal:
141042777Sfenner		free(rte, M_MRTABLE);
14119266Swollman		m_freem(mb0);
1412119792Ssam		MFC_UNLOCK();
1413119792Ssam		VIF_UNLOCK();
1414190012Sbms		return (0);
14159209Swollman	    }
1416190012Sbms	    TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link);
1417190012Sbms	    rt->mfc_nstall++;
14182531Swollman	}
14192531Swollman
1420133874Srwatson	rte->m			= mb0;
1421133874Srwatson	rte->ifp		= ifp;
14222531Swollman
1423119792Ssam	MFC_UNLOCK();
1424119792Ssam	VIF_UNLOCK();
14252531Swollman
14262531Swollman	return 0;
1427133874Srwatson    }
14281541Srgrimes}
14291541Srgrimes
14301541Srgrimes/*
14312531Swollman * Clean up the cache entry if upcall is not serviced
14321541Srgrimes */
14332531Swollmanstatic void
1434208744Szecexpire_upcalls(void *arg)
14351541Srgrimes{
1436255248Sjhb    u_long i;
14371541Srgrimes
1438208744Szec    CURVNET_SET((struct vnet *) arg);
1439208744Szec
1440119792Ssam    MFC_LOCK();
1441190012Sbms
1442190012Sbms    for (i = 0; i < mfchashsize; i++) {
1443190012Sbms	struct mfc *rt, *nrt;
1444190012Sbms
1445208744Szec	if (V_nexpire[i] == 0)
14469209Swollman	    continue;
144742777Sfenner
1448255249Sjhb	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
1449190012Sbms		if (TAILQ_EMPTY(&rt->mfc_stall))
1450190012Sbms			continue;
1451190012Sbms
1452190012Sbms		if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
1453190012Sbms			continue;
1454190012Sbms
1455118622Shsu		/*
1456118622Shsu		 * free the bw_meter entries
1457118622Shsu		 */
1458190012Sbms		while (rt->mfc_bw_meter != NULL) {
1459190012Sbms		    struct bw_meter *x = rt->mfc_bw_meter;
1460118622Shsu
1461190012Sbms		    rt->mfc_bw_meter = x->bm_mfc_next;
1462118622Shsu		    free(x, M_BWMETER);
1463118622Shsu		}
1464118622Shsu
1465190966Srwatson		MRTSTAT_INC(mrts_cache_cleanups);
1466190054Sbms		CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
1467190054Sbms		    (u_long)ntohl(rt->mfc_origin.s_addr),
1468190054Sbms		    (u_long)ntohl(rt->mfc_mcastgrp.s_addr));
1469190012Sbms
1470190012Sbms		expire_mfc(rt);
14719209Swollman	    }
14722531Swollman    }
1473190012Sbms
1474119792Ssam    MFC_UNLOCK();
1475119792Ssam
1476208744Szec    callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
1477208744Szec	curvnet);
1478208744Szec
1479208744Szec    CURVNET_RESTORE();
14801541Srgrimes}
14811541Srgrimes
14821541Srgrimes/*
14832531Swollman * Packet forwarding routine once entry in the cache is made
14841541Srgrimes */
14851541Srgrimesstatic int
1486106968Sluigiip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
14871541Srgrimes{
1488106968Sluigi    struct ip  *ip = mtod(m, struct ip *);
1489106968Sluigi    vifi_t vifi;
1490241913Sglebius    int plen = ntohs(ip->ip_len);
14911541Srgrimes
1492119792Ssam    VIF_LOCK_ASSERT();
14939209Swollman
14942531Swollman    /*
14959209Swollman     * If xmt_vif is not -1, send on only the requested vif.
14969209Swollman     *
14979209Swollman     * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
14989209Swollman     */
1499208744Szec    if (xmt_vif < V_numvifs) {
1500208744Szec	if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER)
1501208744Szec		pim_register_send(ip, V_viftable + xmt_vif, m, rt);
1502133874Srwatson	else
1503208744Szec		phyint_send(ip, V_viftable + xmt_vif, m);
15049209Swollman	return 1;
15059209Swollman    }
15069209Swollman
15079209Swollman    /*
15082531Swollman     * Don't forward if it didn't arrive from the parent vif for its origin.
15092531Swollman     */
15102531Swollman    vifi = rt->mfc_parent;
1511208744Szec    if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) {
1512190054Sbms	CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)",
1513208744Szec	    __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp);
1514190966Srwatson	MRTSTAT_INC(mrts_wrong_if);
15159209Swollman	++rt->mfc_wrong_if;
15169209Swollman	/*
1517118622Shsu	 * If we are doing PIM assert processing, send a message
1518118622Shsu	 * to the routing daemon.
1519118622Shsu	 *
1520118622Shsu	 * XXX: A PIM-SM router needs the WRONGVIF detection so it
1521118622Shsu	 * can complete the SPT switch, regardless of the type
1522118622Shsu	 * of the iif (broadcast media, GRE tunnel, etc).
15239209Swollman	 */
1524208744Szec	if (V_pim_assert_enabled && (vifi < V_numvifs) &&
1525208744Szec	    V_viftable[vifi].v_ifp) {
15269209Swollman
1527208744Szec	    if (ifp == &V_multicast_register_if)
1528190967Srwatson		PIMSTAT_INC(pims_rcv_registers_wrongiif);
1529118622Shsu
1530118501Shsu	    /* Get vifi for the incoming packet */
1531208744Szec	    for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp;
1532208744Szec		vifi++)
1533118501Shsu		;
1534208744Szec	    if (vifi >= V_numvifs)
1535118622Shsu		return 0;	/* The iif is not found: ignore the packet. */
1536118501Shsu
1537118622Shsu	    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
1538118622Shsu		return 0;	/* WRONGVIF disabled: ignore the packet */
1539118622Shsu
1540190012Sbms	    if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) {
1541106968Sluigi		struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
1542106968Sluigi		struct igmpmsg *im;
1543106968Sluigi		int hlen = ip->ip_hl << 2;
1544106968Sluigi		struct mbuf *mm = m_copy(m, 0, hlen);
1545106968Sluigi
15469209Swollman		if (mm && (M_HASCL(mm) || mm->m_len < hlen))
15479209Swollman		    mm = m_pullup(mm, hlen);
1548106968Sluigi		if (mm == NULL)
15499209Swollman		    return ENOBUFS;
15509209Swollman
15519209Swollman		im = mtod(mm, struct igmpmsg *);
15529209Swollman		im->im_msgtype	= IGMPMSG_WRONGVIF;
15539209Swollman		im->im_mbz		= 0;
15549209Swollman		im->im_vif		= vifi;
15559209Swollman
1556190966Srwatson		MRTSTAT_INC(mrts_upcalls);
1557118501Shsu
15589209Swollman		k_igmpsrc.sin_addr = im->im_src;
1559181803Sbz		if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
1560190054Sbms		    CTR1(KTR_IPMF, "%s: socket queue full", __func__);
1561190966Srwatson		    MRTSTAT_INC(mrts_upq_sockfull);
1562106968Sluigi		    return ENOBUFS;
1563106968Sluigi		}
15649209Swollman	    }
15659209Swollman	}
15669209Swollman	return 0;
15672531Swollman    }
15681541Srgrimes
1569190012Sbms
15709209Swollman    /* If I sourced this packet, it counts as output, else it was input. */
1571208744Szec    if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) {
1572208744Szec	V_viftable[vifi].v_pkt_out++;
1573208744Szec	V_viftable[vifi].v_bytes_out += plen;
15749209Swollman    } else {
1575208744Szec	V_viftable[vifi].v_pkt_in++;
1576208744Szec	V_viftable[vifi].v_bytes_in += plen;
15779209Swollman    }
15782531Swollman    rt->mfc_pkt_cnt++;
15799209Swollman    rt->mfc_byte_cnt += plen;
15801541Srgrimes
15812531Swollman    /*
15822531Swollman     * For each vif, decide if a copy of the packet should be forwarded.
15832531Swollman     * Forward if:
15842531Swollman     *		- the ttl exceeds the vif's threshold
15852531Swollman     *		- there are group members downstream on interface
15862531Swollman     */
1587208744Szec    for (vifi = 0; vifi < V_numvifs; vifi++)
1588106968Sluigi	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1589208744Szec	    V_viftable[vifi].v_pkt_out++;
1590208744Szec	    V_viftable[vifi].v_bytes_out += plen;
1591208744Szec	    if (V_viftable[vifi].v_flags & VIFF_REGISTER)
1592208744Szec		pim_register_send(ip, V_viftable + vifi, m, rt);
1593118622Shsu	    else
1594208744Szec		phyint_send(ip, V_viftable + vifi, m);
15959209Swollman	}
15962531Swollman
1597118622Shsu    /*
1598118622Shsu     * Perform upcall-related bw measuring.
1599118622Shsu     */
1600118622Shsu    if (rt->mfc_bw_meter != NULL) {
1601118622Shsu	struct bw_meter *x;
1602118622Shsu	struct timeval now;
1603118622Shsu
1604190012Sbms	microtime(&now);
1605119792Ssam	MFC_LOCK_ASSERT();
1606118622Shsu	for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
1607118622Shsu	    bw_meter_receive_packet(x, plen, &now);
1608118622Shsu    }
1609118622Shsu
16102531Swollman    return 0;
16111541Srgrimes}
16121541Srgrimes
16139209Swollman/*
1614190012Sbms * Check if a vif number is legal/ok. This is used by in_mcast.c.
16151541Srgrimes */
161612296Sphkstatic int
1617106968SluigiX_legal_vif_num(int vif)
16189209Swollman{
1619190012Sbms	int ret;
1620190012Sbms
1621190012Sbms	ret = 0;
1622190012Sbms	if (vif < 0)
1623190012Sbms		return (ret);
1624190012Sbms
1625190012Sbms	VIF_LOCK();
1626208744Szec	if (vif < V_numvifs)
1627190012Sbms		ret = 1;
1628190012Sbms	VIF_UNLOCK();
1629190012Sbms
1630190012Sbms	return (ret);
16312531Swollman}
16322531Swollman
16339209Swollman/*
16349209Swollman * Return the local address used by this vif
16359209Swollman */
163612296Sphkstatic u_long
1637106968SluigiX_ip_mcast_src(int vifi)
16389209Swollman{
1639190012Sbms	in_addr_t addr;
1640190012Sbms
1641190012Sbms	addr = INADDR_ANY;
1642190012Sbms	if (vifi < 0)
1643190012Sbms		return (addr);
1644190012Sbms
1645190012Sbms	VIF_LOCK();
1646208744Szec	if (vifi < V_numvifs)
1647208744Szec		addr = V_viftable[vifi].v_lcl_addr.s_addr;
1648190012Sbms	VIF_UNLOCK();
1649190012Sbms
1650190012Sbms	return (addr);
16519209Swollman}
16529209Swollman
16532531Swollmanstatic void
1654106968Sluigiphyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
16551541Srgrimes{
1656106968Sluigi    struct mbuf *mb_copy;
1657106968Sluigi    int hlen = ip->ip_hl << 2;
16581541Srgrimes
1659119792Ssam    VIF_LOCK_ASSERT();
1660119792Ssam
16613571Swollman    /*
16629209Swollman     * Make a new reference to the packet; make sure that
16639209Swollman     * the IP header is actually copied, not just referenced,
16649209Swollman     * so that ip_output() only scribbles on the copy.
16653571Swollman     */
1666243882Sglebius    mb_copy = m_copypacket(m, M_NOWAIT);
16679209Swollman    if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
16689209Swollman	mb_copy = m_pullup(mb_copy, hlen);
16693571Swollman    if (mb_copy == NULL)
16709209Swollman	return;
16713571Swollman
1672166575Sbms    send_packet(vifp, mb_copy);
16732531Swollman}
16741541Srgrimes
16759209Swollmanstatic void
1676166575Sbmssend_packet(struct vif *vifp, struct mbuf *m)
16771541Srgrimes{
1678106968Sluigi	struct ip_moptions imo;
1679158729Sbms	struct in_multi *imm[2];
1680106968Sluigi	int error;
1681106968Sluigi
1682166575Sbms	VIF_LOCK_ASSERT();
1683166575Sbms
168410203Swollman	imo.imo_multicast_ifp  = vifp->v_ifp;
168510203Swollman	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
168610203Swollman	imo.imo_multicast_loop = 1;
168710203Swollman	imo.imo_multicast_vif  = -1;
1688158729Sbms	imo.imo_num_memberships = 0;
1689158729Sbms	imo.imo_max_memberships = 2;
1690158729Sbms	imo.imo_membership  = &imm[0];
169110203Swollman
169215292Swollman	/*
169315292Swollman	 * Re-entrancy should not be a problem here, because
169415292Swollman	 * the packets that we send out and are looped back at us
169515292Swollman	 * should get rejected because they appear to come from
169615292Swollman	 * the loopback interface, thus preventing looping.
169715292Swollman	 */
1698238016Sglebius	error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
1699190054Sbms	CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
1700208744Szec	    (ptrdiff_t)(vifp - V_viftable), error);
17019209Swollman}
17028876Srgrimes
1703190012Sbms/*
1704190012Sbms * Stubs for old RSVP socket shim implementation.
1705190012Sbms */
1706190012Sbms
1707106968Sluigistatic int
1708190012SbmsX_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused)
17099209Swollman{
17109209Swollman
1711190012Sbms	return (EOPNOTSUPP);
17129209Swollman}
17139209Swollman
1714106968Sluigistatic void
1715190012SbmsX_ip_rsvp_force_done(struct socket *so __unused)
17169209Swollman{
17179209Swollman
17189209Swollman}
17199209Swollman
1720106968Sluigistatic void
1721190012SbmsX_rsvp_input(struct mbuf *m, int off __unused)
17229209Swollman{
17239209Swollman
1724190012Sbms	if (!V_rsvp_on)
1725190012Sbms		m_freem(m);
17269209Swollman}
17279209Swollman
1728118622Shsu/*
1729118622Shsu * Code for bandwidth monitors
1730118622Shsu */
1731118622Shsu
1732118622Shsu/*
1733118622Shsu * Define common interface for timeval-related methods
1734118622Shsu */
1735118622Shsu#define	BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp)
1736118622Shsu#define	BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp))
1737118622Shsu#define	BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp))
1738118622Shsu
1739118622Shsustatic uint32_t
1740118622Shsucompute_bw_meter_flags(struct bw_upcall *req)
1741118622Shsu{
1742118622Shsu    uint32_t flags = 0;
1743118622Shsu
1744118622Shsu    if (req->bu_flags & BW_UPCALL_UNIT_PACKETS)
1745118622Shsu	flags |= BW_METER_UNIT_PACKETS;
1746118622Shsu    if (req->bu_flags & BW_UPCALL_UNIT_BYTES)
1747118622Shsu	flags |= BW_METER_UNIT_BYTES;
1748118622Shsu    if (req->bu_flags & BW_UPCALL_GEQ)
1749118622Shsu	flags |= BW_METER_GEQ;
1750118622Shsu    if (req->bu_flags & BW_UPCALL_LEQ)
1751118622Shsu	flags |= BW_METER_LEQ;
1752133874Srwatson
1753118622Shsu    return flags;
1754118622Shsu}
1755133874Srwatson
1756118622Shsu/*
1757118622Shsu * Add a bw_meter entry
1758118622Shsu */
17592763Swollmanstatic int
1760118622Shsuadd_bw_upcall(struct bw_upcall *req)
1761118622Shsu{
1762118622Shsu    struct mfc *mfc;
1763118622Shsu    struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
1764118622Shsu		BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
1765118622Shsu    struct timeval now;
1766118622Shsu    struct bw_meter *x;
1767118622Shsu    uint32_t flags;
1768133874Srwatson
1769208744Szec    if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
1770118622Shsu	return EOPNOTSUPP;
1771133874Srwatson
1772118622Shsu    /* Test if the flags are valid */
1773118622Shsu    if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES)))
1774118622Shsu	return EINVAL;
1775118622Shsu    if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)))
1776118622Shsu	return EINVAL;
1777118622Shsu    if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
1778118622Shsu	    == (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
1779118622Shsu	return EINVAL;
1780133874Srwatson
1781118622Shsu    /* Test if the threshold time interval is valid */
1782118622Shsu    if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <))
1783118622Shsu	return EINVAL;
1784133874Srwatson
1785118622Shsu    flags = compute_bw_meter_flags(req);
1786118622Shsu
1787118622Shsu    /*
1788118622Shsu     * Find if we have already same bw_meter entry
1789118622Shsu     */
1790119792Ssam    MFC_LOCK();
1791190012Sbms    mfc = mfc_find(&req->bu_src, &req->bu_dst);
1792118622Shsu    if (mfc == NULL) {
1793119792Ssam	MFC_UNLOCK();
1794118622Shsu	return EADDRNOTAVAIL;
1795118622Shsu    }
1796118622Shsu    for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
1797118622Shsu	if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
1798118622Shsu			   &req->bu_threshold.b_time, ==)) &&
1799118622Shsu	    (x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
1800118622Shsu	    (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
1801118622Shsu	    (x->bm_flags & BW_METER_USER_FLAGS) == flags)  {
1802119792Ssam	    MFC_UNLOCK();
1803118622Shsu	    return 0;		/* XXX Already installed */
1804118622Shsu	}
1805118622Shsu    }
1806133874Srwatson
1807118622Shsu    /* Allocate the new bw_meter entry */
1808118622Shsu    x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
1809119792Ssam    if (x == NULL) {
1810119792Ssam	MFC_UNLOCK();
1811118622Shsu	return ENOBUFS;
1812119792Ssam    }
1813133874Srwatson
1814118622Shsu    /* Set the new bw_meter entry */
1815118622Shsu    x->bm_threshold.b_time = req->bu_threshold.b_time;
1816190012Sbms    microtime(&now);
1817118622Shsu    x->bm_start_time = now;
1818118622Shsu    x->bm_threshold.b_packets = req->bu_threshold.b_packets;
1819118622Shsu    x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
1820118622Shsu    x->bm_measured.b_packets = 0;
1821118622Shsu    x->bm_measured.b_bytes = 0;
1822118622Shsu    x->bm_flags = flags;
1823118622Shsu    x->bm_time_next = NULL;
1824118622Shsu    x->bm_time_hash = BW_METER_BUCKETS;
1825133874Srwatson
1826118622Shsu    /* Add the new bw_meter entry to the front of entries for this MFC */
1827118622Shsu    x->bm_mfc = mfc;
1828118622Shsu    x->bm_mfc_next = mfc->mfc_bw_meter;
1829118622Shsu    mfc->mfc_bw_meter = x;
1830118622Shsu    schedule_bw_meter(x, &now);
1831119792Ssam    MFC_UNLOCK();
1832133874Srwatson
1833118622Shsu    return 0;
1834118622Shsu}
1835118622Shsu
1836118622Shsustatic void
1837118622Shsufree_bw_list(struct bw_meter *list)
1838118622Shsu{
1839118622Shsu    while (list != NULL) {
1840118622Shsu	struct bw_meter *x = list;
1841118622Shsu
1842118622Shsu	list = list->bm_mfc_next;
1843118622Shsu	unschedule_bw_meter(x);
1844118622Shsu	free(x, M_BWMETER);
1845118622Shsu    }
1846118622Shsu}
1847118622Shsu
1848118622Shsu/*
1849118622Shsu * Delete one or multiple bw_meter entries
1850118622Shsu */
1851118622Shsustatic int
1852118622Shsudel_bw_upcall(struct bw_upcall *req)
1853118622Shsu{
1854118622Shsu    struct mfc *mfc;
1855118622Shsu    struct bw_meter *x;
1856133874Srwatson
1857208744Szec    if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
1858118622Shsu	return EOPNOTSUPP;
1859133874Srwatson
1860119792Ssam    MFC_LOCK();
1861190012Sbms
1862118622Shsu    /* Find the corresponding MFC entry */
1863190012Sbms    mfc = mfc_find(&req->bu_src, &req->bu_dst);
1864118622Shsu    if (mfc == NULL) {
1865119792Ssam	MFC_UNLOCK();
1866118622Shsu	return EADDRNOTAVAIL;
1867118622Shsu    } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) {
1868118622Shsu	/*
1869118622Shsu	 * Delete all bw_meter entries for this mfc
1870118622Shsu	 */
1871118622Shsu	struct bw_meter *list;
1872133874Srwatson
1873118622Shsu	list = mfc->mfc_bw_meter;
1874118622Shsu	mfc->mfc_bw_meter = NULL;
1875118622Shsu	free_bw_list(list);
1876119792Ssam	MFC_UNLOCK();
1877118622Shsu	return 0;
1878118622Shsu    } else {			/* Delete a single bw_meter entry */
1879118622Shsu	struct bw_meter *prev;
1880118622Shsu	uint32_t flags = 0;
1881118622Shsu
1882118622Shsu	flags = compute_bw_meter_flags(req);
1883118622Shsu
1884118622Shsu	/* Find the bw_meter entry to delete */
1885118622Shsu	for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
1886133046Shsu	     prev = x, x = x->bm_mfc_next) {
1887118622Shsu	    if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
1888118622Shsu			       &req->bu_threshold.b_time, ==)) &&
1889118622Shsu		(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
1890118622Shsu		(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
1891118622Shsu		(x->bm_flags & BW_METER_USER_FLAGS) == flags)
1892118622Shsu		break;
1893118622Shsu	}
1894118622Shsu	if (x != NULL) { /* Delete entry from the list for this MFC */
1895118622Shsu	    if (prev != NULL)
1896118622Shsu		prev->bm_mfc_next = x->bm_mfc_next;	/* remove from middle*/
1897118622Shsu	    else
1898118622Shsu		x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
1899118622Shsu
1900118622Shsu	    unschedule_bw_meter(x);
1901119792Ssam	    MFC_UNLOCK();
1902118622Shsu	    /* Free the bw_meter entry */
1903118622Shsu	    free(x, M_BWMETER);
1904118622Shsu	    return 0;
1905118622Shsu	} else {
1906119792Ssam	    MFC_UNLOCK();
1907118622Shsu	    return EINVAL;
1908118622Shsu	}
1909118622Shsu    }
1910118622Shsu    /* NOTREACHED */
1911118622Shsu}
1912118622Shsu
1913118622Shsu/*
1914118622Shsu * Perform bandwidth measurement processing that may result in an upcall
1915118622Shsu */
1916118622Shsustatic void
1917118622Shsubw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
1918118622Shsu{
1919118622Shsu    struct timeval delta;
1920133874Srwatson
1921119792Ssam    MFC_LOCK_ASSERT();
1922119792Ssam
1923118622Shsu    delta = *nowp;
1924118622Shsu    BW_TIMEVALDECR(&delta, &x->bm_start_time);
1925133874Srwatson
1926118622Shsu    if (x->bm_flags & BW_METER_GEQ) {
1927118622Shsu	/*
1928118622Shsu	 * Processing for ">=" type of bw_meter entry
1929118622Shsu	 */
1930118622Shsu	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
1931118622Shsu	    /* Reset the bw_meter entry */
1932118622Shsu	    x->bm_start_time = *nowp;
1933118622Shsu	    x->bm_measured.b_packets = 0;
1934118622Shsu	    x->bm_measured.b_bytes = 0;
1935118622Shsu	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
1936118622Shsu	}
1937133874Srwatson
1938118622Shsu	/* Record that a packet is received */
1939118622Shsu	x->bm_measured.b_packets++;
1940118622Shsu	x->bm_measured.b_bytes += plen;
1941133874Srwatson
1942118622Shsu	/*
1943118622Shsu	 * Test if we should deliver an upcall
1944118622Shsu	 */
1945133874Srwatson	if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) {
1946118622Shsu	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
1947118622Shsu		 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) ||
1948118622Shsu		((x->bm_flags & BW_METER_UNIT_BYTES) &&
1949118622Shsu		 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) {
1950118622Shsu		/* Prepare an upcall for delivery */
1951118622Shsu		bw_meter_prepare_upcall(x, nowp);
1952118622Shsu		x->bm_flags |= BW_METER_UPCALL_DELIVERED;
1953118622Shsu	    }
1954118622Shsu	}
1955118622Shsu    } else if (x->bm_flags & BW_METER_LEQ) {
1956118622Shsu	/*
1957118622Shsu	 * Processing for "<=" type of bw_meter entry
1958118622Shsu	 */
1959118622Shsu	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
1960118622Shsu	    /*
1961118622Shsu	     * We are behind time with the multicast forwarding table
1962118622Shsu	     * scanning for "<=" type of bw_meter entries, so test now
1963118622Shsu	     * if we should deliver an upcall.
1964118622Shsu	     */
1965118622Shsu	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
1966118622Shsu		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
1967118622Shsu		((x->bm_flags & BW_METER_UNIT_BYTES) &&
1968118622Shsu		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
1969118622Shsu		/* Prepare an upcall for delivery */
1970118622Shsu		bw_meter_prepare_upcall(x, nowp);
1971118622Shsu	    }
1972118622Shsu	    /* Reschedule the bw_meter entry */
1973118622Shsu	    unschedule_bw_meter(x);
1974118622Shsu	    schedule_bw_meter(x, nowp);
1975118622Shsu	}
1976133874Srwatson
1977118622Shsu	/* Record that a packet is received */
1978118622Shsu	x->bm_measured.b_packets++;
1979118622Shsu	x->bm_measured.b_bytes += plen;
1980133874Srwatson
1981118622Shsu	/*
1982118622Shsu	 * Test if we should restart the measuring interval
1983118622Shsu	 */
1984118622Shsu	if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
1985118622Shsu	     x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
1986118622Shsu	    (x->bm_flags & BW_METER_UNIT_BYTES &&
1987118622Shsu	     x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
1988118622Shsu	    /* Don't restart the measuring interval */
1989118622Shsu	} else {
1990118622Shsu	    /* Do restart the measuring interval */
1991118622Shsu	    /*
1992118622Shsu	     * XXX: note that we don't unschedule and schedule, because this
1993118622Shsu	     * might be too much overhead per packet. Instead, when we process
1994118622Shsu	     * all entries for a given timer hash bin, we check whether it is
1995118622Shsu	     * really a timeout. If not, we reschedule at that time.
1996118622Shsu	     */
1997118622Shsu	    x->bm_start_time = *nowp;
1998118622Shsu	    x->bm_measured.b_packets = 0;
1999118622Shsu	    x->bm_measured.b_bytes = 0;
2000118622Shsu	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
2001118622Shsu	}
2002118622Shsu    }
2003118622Shsu}
2004118622Shsu
2005118622Shsu/*
2006118622Shsu * Prepare a bandwidth-related upcall
2007118622Shsu */
2008118622Shsustatic void
2009118622Shsubw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
2010118622Shsu{
2011118622Shsu    struct timeval delta;
2012118622Shsu    struct bw_upcall *u;
2013133874Srwatson
2014119792Ssam    MFC_LOCK_ASSERT();
2015133874Srwatson
2016118622Shsu    /*
2017133874Srwatson     * Compute the measured time interval
2018118622Shsu     */
2019118622Shsu    delta = *nowp;
2020118622Shsu    BW_TIMEVALDECR(&delta, &x->bm_start_time);
2021133874Srwatson
2022118622Shsu    /*
2023118622Shsu     * If there are too many pending upcalls, deliver them now
2024118622Shsu     */
2025208744Szec    if (V_bw_upcalls_n >= BW_UPCALLS_MAX)
2026118622Shsu	bw_upcalls_send();
2027133874Srwatson
2028118622Shsu    /*
2029118622Shsu     * Set the bw_upcall entry
2030118622Shsu     */
2031208744Szec    u = &V_bw_upcalls[V_bw_upcalls_n++];
2032118622Shsu    u->bu_src = x->bm_mfc->mfc_origin;
2033118622Shsu    u->bu_dst = x->bm_mfc->mfc_mcastgrp;
2034118622Shsu    u->bu_threshold.b_time = x->bm_threshold.b_time;
2035118622Shsu    u->bu_threshold.b_packets = x->bm_threshold.b_packets;
2036118622Shsu    u->bu_threshold.b_bytes = x->bm_threshold.b_bytes;
2037118622Shsu    u->bu_measured.b_time = delta;
2038118622Shsu    u->bu_measured.b_packets = x->bm_measured.b_packets;
2039118622Shsu    u->bu_measured.b_bytes = x->bm_measured.b_bytes;
2040118622Shsu    u->bu_flags = 0;
2041118622Shsu    if (x->bm_flags & BW_METER_UNIT_PACKETS)
2042118622Shsu	u->bu_flags |= BW_UPCALL_UNIT_PACKETS;
2043118622Shsu    if (x->bm_flags & BW_METER_UNIT_BYTES)
2044118622Shsu	u->bu_flags |= BW_UPCALL_UNIT_BYTES;
2045118622Shsu    if (x->bm_flags & BW_METER_GEQ)
2046118622Shsu	u->bu_flags |= BW_UPCALL_GEQ;
2047118622Shsu    if (x->bm_flags & BW_METER_LEQ)
2048118622Shsu	u->bu_flags |= BW_UPCALL_LEQ;
2049118622Shsu}
2050118622Shsu
2051118622Shsu/*
2052118622Shsu * Send the pending bandwidth-related upcalls
2053118622Shsu */
2054118622Shsustatic void
2055118622Shsubw_upcalls_send(void)
2056118622Shsu{
2057118622Shsu    struct mbuf *m;
2058208744Szec    int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]);
2059118622Shsu    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
2060118622Shsu    static struct igmpmsg igmpmsg = { 0,		/* unused1 */
2061118622Shsu				      0,		/* unused2 */
2062118622Shsu				      IGMPMSG_BW_UPCALL,/* im_msgtype */
2063118622Shsu				      0,		/* im_mbz  */
2064118622Shsu				      0,		/* im_vif  */
2065118622Shsu				      0,		/* unused3 */
2066118622Shsu				      { 0 },		/* im_src  */
2067118622Shsu				      { 0 } };		/* im_dst  */
2068133874Srwatson
2069119792Ssam    MFC_LOCK_ASSERT();
2070119792Ssam
2071208744Szec    if (V_bw_upcalls_n == 0)
2072118622Shsu	return;			/* No pending upcalls */
2073118622Shsu
2074208744Szec    V_bw_upcalls_n = 0;
2075133874Srwatson
2076118622Shsu    /*
2077118622Shsu     * Allocate a new mbuf, initialize it with the header and
2078118622Shsu     * the payload for the pending calls.
2079118622Shsu     */
2080248324Sglebius    m = m_gethdr(M_NOWAIT, MT_DATA);
2081118622Shsu    if (m == NULL) {
2082118622Shsu	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
2083118622Shsu	return;
2084118622Shsu    }
2085133874Srwatson
2086118622Shsu    m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
2087208744Szec    m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
2088133874Srwatson
2089118622Shsu    /*
2090118622Shsu     * Send the upcalls
2091118622Shsu     * XXX do we need to set the address in k_igmpsrc ?
2092118622Shsu     */
2093190966Srwatson    MRTSTAT_INC(mrts_upcalls);
2094181803Sbz    if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) {
2095118622Shsu	log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n");
2096190966Srwatson	MRTSTAT_INC(mrts_upq_sockfull);
2097118622Shsu    }
2098118622Shsu}
2099118622Shsu
2100118622Shsu/*
2101118622Shsu * Compute the timeout hash value for the bw_meter entries
2102118622Shsu */
2103118622Shsu#define	BW_METER_TIMEHASH(bw_meter, hash)				\
2104118622Shsu    do {								\
2105118622Shsu	struct timeval next_timeval = (bw_meter)->bm_start_time;	\
2106118622Shsu									\
2107118622Shsu	BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
2108118622Shsu	(hash) = next_timeval.tv_sec;					\
2109118622Shsu	if (next_timeval.tv_usec)					\
2110118622Shsu	    (hash)++; /* XXX: make sure we don't timeout early */	\
2111118622Shsu	(hash) %= BW_METER_BUCKETS;					\
2112118622Shsu    } while (0)
2113118622Shsu
2114118622Shsu/*
2115118622Shsu * Schedule a timer to process periodically bw_meter entry of type "<="
2116118622Shsu * by linking the entry in the proper hash bucket.
2117118622Shsu */
2118118622Shsustatic void
2119118622Shsuschedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
2120118622Shsu{
2121119792Ssam    int time_hash;
2122133874Srwatson
2123119792Ssam    MFC_LOCK_ASSERT();
2124119792Ssam
2125118622Shsu    if (!(x->bm_flags & BW_METER_LEQ))
2126118622Shsu	return;		/* XXX: we schedule timers only for "<=" entries */
2127133874Srwatson
2128118622Shsu    /*
2129118622Shsu     * Reset the bw_meter entry
2130118622Shsu     */
2131118622Shsu    x->bm_start_time = *nowp;
2132118622Shsu    x->bm_measured.b_packets = 0;
2133118622Shsu    x->bm_measured.b_bytes = 0;
2134118622Shsu    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
2135133874Srwatson
2136118622Shsu    /*
2137118622Shsu     * Compute the timeout hash value and insert the entry
2138118622Shsu     */
2139118622Shsu    BW_METER_TIMEHASH(x, time_hash);
2140208744Szec    x->bm_time_next = V_bw_meter_timers[time_hash];
2141208744Szec    V_bw_meter_timers[time_hash] = x;
2142118622Shsu    x->bm_time_hash = time_hash;
2143118622Shsu}
2144118622Shsu
2145118622Shsu/*
2146118622Shsu * Unschedule the periodic timer that processes bw_meter entry of type "<="
2147118622Shsu * by removing the entry from the proper hash bucket.
2148118622Shsu */
2149118622Shsustatic void
2150118622Shsuunschedule_bw_meter(struct bw_meter *x)
2151118622Shsu{
2152118622Shsu    int time_hash;
2153118622Shsu    struct bw_meter *prev, *tmp;
2154133874Srwatson
2155119792Ssam    MFC_LOCK_ASSERT();
2156119792Ssam
2157118622Shsu    if (!(x->bm_flags & BW_METER_LEQ))
2158118622Shsu	return;		/* XXX: we schedule timers only for "<=" entries */
2159133874Srwatson
2160118622Shsu    /*
2161118622Shsu     * Compute the timeout hash value and delete the entry
2162118622Shsu     */
2163118622Shsu    time_hash = x->bm_time_hash;
2164118622Shsu    if (time_hash >= BW_METER_BUCKETS)
2165118622Shsu	return;		/* Entry was not scheduled */
2166133874Srwatson
2167208744Szec    for (prev = NULL, tmp = V_bw_meter_timers[time_hash];
2168118622Shsu	     tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
2169118622Shsu	if (tmp == x)
2170118622Shsu	    break;
2171133874Srwatson
2172118622Shsu    if (tmp == NULL)
2173118622Shsu	panic("unschedule_bw_meter: bw_meter entry not found");
2174133874Srwatson
2175118622Shsu    if (prev != NULL)
2176118622Shsu	prev->bm_time_next = x->bm_time_next;
2177118622Shsu    else
2178208744Szec	V_bw_meter_timers[time_hash] = x->bm_time_next;
2179133874Srwatson
2180118622Shsu    x->bm_time_next = NULL;
2181118622Shsu    x->bm_time_hash = BW_METER_BUCKETS;
2182118622Shsu}
2183118622Shsu
2184118622Shsu
2185118622Shsu/*
2186118622Shsu * Process all "<=" type of bw_meter that should be processed now,
2187118622Shsu * and for each entry prepare an upcall if necessary. Each processed
2188118622Shsu * entry is rescheduled again for the (periodic) processing.
2189118622Shsu *
2190118622Shsu * This is run periodically (once per second normally). On each round,
2191118622Shsu * all the potentially matching entries are in the hash slot that we are
2192118622Shsu * looking at.
2193118622Shsu */
2194118622Shsustatic void
2195118622Shsubw_meter_process()
2196118622Shsu{
2197118622Shsu    uint32_t loops;
2198119792Ssam    int i;
2199118622Shsu    struct timeval now, process_endtime;
2200133874Srwatson
2201190012Sbms    microtime(&now);
2202208744Szec    if (V_last_tv_sec == now.tv_sec)
2203118622Shsu	return;		/* nothing to do */
2204118622Shsu
2205208744Szec    loops = now.tv_sec - V_last_tv_sec;
2206208744Szec    V_last_tv_sec = now.tv_sec;
2207118622Shsu    if (loops > BW_METER_BUCKETS)
2208118622Shsu	loops = BW_METER_BUCKETS;
2209118622Shsu
2210119792Ssam    MFC_LOCK();
2211118622Shsu    /*
2212118622Shsu     * Process all bins of bw_meter entries from the one after the last
2213118622Shsu     * processed to the current one. On entry, i points to the last bucket
2214118622Shsu     * visited, so we need to increment i at the beginning of the loop.
2215118622Shsu     */
2216119134Shsu    for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
2217118622Shsu	struct bw_meter *x, *tmp_list;
2218133874Srwatson
2219118622Shsu	if (++i >= BW_METER_BUCKETS)
2220118622Shsu	    i = 0;
2221133874Srwatson
2222119134Shsu	/* Disconnect the list of bw_meter entries from the bin */
2223208744Szec	tmp_list = V_bw_meter_timers[i];
2224208744Szec	V_bw_meter_timers[i] = NULL;
2225133874Srwatson
2226119134Shsu	/* Process the list of bw_meter entries */
2227118622Shsu	while (tmp_list != NULL) {
2228118622Shsu	    x = tmp_list;
2229118622Shsu	    tmp_list = tmp_list->bm_time_next;
2230133874Srwatson
2231118622Shsu	    /* Test if the time interval is over */
2232118622Shsu	    process_endtime = x->bm_start_time;
2233118622Shsu	    BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
2234118622Shsu	    if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
2235118622Shsu		/* Not yet: reschedule, but don't reset */
2236118622Shsu		int time_hash;
2237133874Srwatson
2238118622Shsu		BW_METER_TIMEHASH(x, time_hash);
2239119134Shsu		if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
2240119134Shsu		    /*
2241119134Shsu		     * XXX: somehow the bin processing is a bit ahead of time.
2242119134Shsu		     * Put the entry in the next bin.
2243119134Shsu		     */
2244119134Shsu		    if (++time_hash >= BW_METER_BUCKETS)
2245119134Shsu			time_hash = 0;
2246119134Shsu		}
2247208744Szec		x->bm_time_next = V_bw_meter_timers[time_hash];
2248208744Szec		V_bw_meter_timers[time_hash] = x;
2249118622Shsu		x->bm_time_hash = time_hash;
2250133874Srwatson
2251118622Shsu		continue;
2252118622Shsu	    }
2253133874Srwatson
2254118622Shsu	    /*
2255118622Shsu	     * Test if we should deliver an upcall
2256118622Shsu	     */
2257118622Shsu	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
2258118622Shsu		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
2259118622Shsu		((x->bm_flags & BW_METER_UNIT_BYTES) &&
2260118622Shsu		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
2261118622Shsu		/* Prepare an upcall for delivery */
2262118622Shsu		bw_meter_prepare_upcall(x, &now);
2263118622Shsu	    }
2264133874Srwatson
2265118622Shsu	    /*
2266118622Shsu	     * Reschedule for next processing
2267118622Shsu	     */
2268118622Shsu	    schedule_bw_meter(x, &now);
2269118622Shsu	}
2270118622Shsu    }
2271133874Srwatson
2272118622Shsu    /* Send all upcalls that are pending delivery */
2273118622Shsu    bw_upcalls_send();
2274119792Ssam
2275119792Ssam    MFC_UNLOCK();
2276118622Shsu}
2277118622Shsu
2278118622Shsu/*
2279118622Shsu * A periodic function for sending all upcalls that are pending delivery
2280118622Shsu */
2281118622Shsustatic void
2282208744Szecexpire_bw_upcalls_send(void *arg)
2283118622Shsu{
2284208744Szec    CURVNET_SET((struct vnet *) arg);
2285208744Szec
2286119792Ssam    MFC_LOCK();
2287118622Shsu    bw_upcalls_send();
2288119792Ssam    MFC_UNLOCK();
2289133874Srwatson
2290208744Szec    callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
2291208744Szec	curvnet);
2292208744Szec    CURVNET_RESTORE();
2293118622Shsu}
2294118622Shsu
2295118622Shsu/*
2296118622Shsu * A periodic function for periodic scanning of the multicast forwarding
2297118622Shsu * table for processing all "<=" bw_meter entries.
2298118622Shsu */
2299118622Shsustatic void
2300208744Szecexpire_bw_meter_process(void *arg)
2301118622Shsu{
2302208744Szec    CURVNET_SET((struct vnet *) arg);
2303208744Szec
2304208744Szec    if (V_mrt_api_config & MRT_MFC_BW_UPCALL)
2305118622Shsu	bw_meter_process();
2306133874Srwatson
2307208744Szec    callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
2308208744Szec	curvnet);
2309208744Szec    CURVNET_RESTORE();
2310118622Shsu}
2311118622Shsu
2312118622Shsu/*
2313118622Shsu * End of bandwidth monitoring code
2314118622Shsu */
2315118622Shsu
2316118622Shsu/*
2317118622Shsu * Send the packet up to the user daemon, or eventually do kernel encapsulation
2318118622Shsu *
2319118622Shsu */
2320118622Shsustatic int
2321169454Srwatsonpim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
2322169454Srwatson    struct mfc *rt)
2323118622Shsu{
2324118622Shsu    struct mbuf *mb_copy, *mm;
2325133874Srwatson
2326166623Sbms    /*
2327166623Sbms     * Do not send IGMP_WHOLEPKT notifications to userland, if the
2328166623Sbms     * rendezvous point was unspecified, and we were told not to.
2329166623Sbms     */
2330208744Szec    if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) &&
2331190012Sbms	in_nullhost(rt->mfc_rp))
2332166623Sbms	return 0;
2333166623Sbms
2334118622Shsu    mb_copy = pim_register_prepare(ip, m);
2335118622Shsu    if (mb_copy == NULL)
2336118622Shsu	return ENOBUFS;
2337133874Srwatson
2338118622Shsu    /*
2339118622Shsu     * Send all the fragments. Note that the mbuf for each fragment
2340118622Shsu     * is freed by the sending machinery.
2341118622Shsu     */
2342118622Shsu    for (mm = mb_copy; mm; mm = mb_copy) {
2343118622Shsu	mb_copy = mm->m_nextpkt;
2344118622Shsu	mm->m_nextpkt = 0;
2345118622Shsu	mm = m_pullup(mm, sizeof(struct ip));
2346118622Shsu	if (mm != NULL) {
2347118622Shsu	    ip = mtod(mm, struct ip *);
2348208744Szec	    if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) {
2349118622Shsu		pim_register_send_rp(ip, vifp, mm, rt);
2350118622Shsu	    } else {
2351118622Shsu		pim_register_send_upcall(ip, vifp, mm, rt);
2352118622Shsu	    }
2353118622Shsu	}
2354118622Shsu    }
2355133874Srwatson
2356118622Shsu    return 0;
2357118622Shsu}
2358118622Shsu
2359118622Shsu/*
2360118622Shsu * Return a copy of the data packet that is ready for PIM Register
2361118622Shsu * encapsulation.
2362118622Shsu * XXX: Note that in the returned copy the IP header is a valid one.
2363118622Shsu */
2364118622Shsustatic struct mbuf *
2365118622Shsupim_register_prepare(struct ip *ip, struct mbuf *m)
2366118622Shsu{
2367118622Shsu    struct mbuf *mb_copy = NULL;
2368118622Shsu    int mtu;
2369133874Srwatson
2370119134Shsu    /* Take care of delayed checksums */
2371118622Shsu    if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
2372118622Shsu	in_delayed_cksum(m);
2373118622Shsu	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
2374118622Shsu    }
2375119134Shsu
2376118622Shsu    /*
2377118622Shsu     * Copy the old packet & pullup its IP header into the
2378118622Shsu     * new mbuf so we can modify it.
2379118622Shsu     */
2380243882Sglebius    mb_copy = m_copypacket(m, M_NOWAIT);
2381118622Shsu    if (mb_copy == NULL)
2382118622Shsu	return NULL;
2383118622Shsu    mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
2384118622Shsu    if (mb_copy == NULL)
2385118622Shsu	return NULL;
2386133874Srwatson
2387118622Shsu    /* take care of the TTL */
2388118622Shsu    ip = mtod(mb_copy, struct ip *);
2389118622Shsu    --ip->ip_ttl;
2390133874Srwatson
2391118622Shsu    /* Compute the MTU after the PIM Register encapsulation */
2392118622Shsu    mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
2393133874Srwatson
2394241913Sglebius    if (ntohs(ip->ip_len) <= mtu) {
2395119134Shsu	/* Turn the IP header into a valid one */
2396119134Shsu	ip->ip_sum = 0;
2397119134Shsu	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
2398119134Shsu    } else {
2399119134Shsu	/* Fragment the packet */
2400242161Sglebius	mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
2401242161Sglebius	if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
2402119134Shsu	    m_freem(mb_copy);
2403119134Shsu	    return NULL;
2404119134Shsu	}
2405118622Shsu    }
2406118622Shsu    return mb_copy;
2407118622Shsu}
2408118622Shsu
2409118622Shsu/*
2410118622Shsu * Send an upcall with the data packet to the user-level process.
2411118622Shsu */
2412118622Shsustatic int
2413118622Shsupim_register_send_upcall(struct ip *ip, struct vif *vifp,
2414169454Srwatson    struct mbuf *mb_copy, struct mfc *rt)
2415118622Shsu{
2416118622Shsu    struct mbuf *mb_first;
2417118622Shsu    int len = ntohs(ip->ip_len);
2418118622Shsu    struct igmpmsg *im;
2419118622Shsu    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
2420133874Srwatson
2421119792Ssam    VIF_LOCK_ASSERT();
2422119792Ssam
2423118622Shsu    /*
2424118622Shsu     * Add a new mbuf with an upcall header
2425118622Shsu     */
2426248324Sglebius    mb_first = m_gethdr(M_NOWAIT, MT_DATA);
2427118622Shsu    if (mb_first == NULL) {
2428118622Shsu	m_freem(mb_copy);
2429118622Shsu	return ENOBUFS;
2430118622Shsu    }
2431118622Shsu    mb_first->m_data += max_linkhdr;
2432118622Shsu    mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg);
2433118622Shsu    mb_first->m_len = sizeof(struct igmpmsg);
2434118622Shsu    mb_first->m_next = mb_copy;
2435133874Srwatson
2436118622Shsu    /* Send message to routing daemon */
2437118622Shsu    im = mtod(mb_first, struct igmpmsg *);
2438118622Shsu    im->im_msgtype	= IGMPMSG_WHOLEPKT;
2439118622Shsu    im->im_mbz		= 0;
2440208744Szec    im->im_vif		= vifp - V_viftable;
2441118622Shsu    im->im_src		= ip->ip_src;
2442118622Shsu    im->im_dst		= ip->ip_dst;
2443133874Srwatson
2444118622Shsu    k_igmpsrc.sin_addr	= ip->ip_src;
2445133874Srwatson
2446190966Srwatson    MRTSTAT_INC(mrts_upcalls);
2447133874Srwatson
2448181803Sbz    if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) {
2449190054Sbms	CTR1(KTR_IPMF, "%s: socket queue full", __func__);
2450190966Srwatson	MRTSTAT_INC(mrts_upq_sockfull);
2451118622Shsu	return ENOBUFS;
2452118622Shsu    }
2453133874Srwatson
2454118622Shsu    /* Keep statistics */
2455190967Srwatson    PIMSTAT_INC(pims_snd_registers_msgs);
2456190967Srwatson    PIMSTAT_ADD(pims_snd_registers_bytes, len);
2457133874Srwatson
2458118622Shsu    return 0;
2459118622Shsu}
2460118622Shsu
2461118622Shsu/*
2462118622Shsu * Encapsulate the data packet in PIM Register message and send it to the RP.
2463118622Shsu */
2464118622Shsustatic int
2465169454Srwatsonpim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
2466169454Srwatson    struct mfc *rt)
2467118622Shsu{
2468118622Shsu    struct mbuf *mb_first;
2469118622Shsu    struct ip *ip_outer;
2470118622Shsu    struct pim_encap_pimhdr *pimhdr;
2471118622Shsu    int len = ntohs(ip->ip_len);
2472118622Shsu    vifi_t vifi = rt->mfc_parent;
2473133874Srwatson
2474119792Ssam    VIF_LOCK_ASSERT();
2475133874Srwatson
2476208744Szec    if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) {
2477118622Shsu	m_freem(mb_copy);
2478118622Shsu	return EADDRNOTAVAIL;		/* The iif vif is invalid */
2479118622Shsu    }
2480133874Srwatson
2481118622Shsu    /*
2482118622Shsu     * Add a new mbuf with the encapsulating header
2483118622Shsu     */
2484248324Sglebius    mb_first = m_gethdr(M_NOWAIT, MT_DATA);
2485118622Shsu    if (mb_first == NULL) {
2486118622Shsu	m_freem(mb_copy);
2487118622Shsu	return ENOBUFS;
2488118622Shsu    }
2489118622Shsu    mb_first->m_data += max_linkhdr;
2490118622Shsu    mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
2491118622Shsu    mb_first->m_next = mb_copy;
2492118622Shsu
2493118622Shsu    mb_first->m_pkthdr.len = len + mb_first->m_len;
2494133874Srwatson
2495118622Shsu    /*
2496118622Shsu     * Fill in the encapsulating IP and PIM header
2497118622Shsu     */
2498118622Shsu    ip_outer = mtod(mb_first, struct ip *);
2499118622Shsu    *ip_outer = pim_encap_iphdr;
2500133720Sdwmalone    ip_outer->ip_id = ip_newid();
2501241913Sglebius    ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
2502241913Sglebius	sizeof(pim_encap_pimhdr));
2503208744Szec    ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
2504118622Shsu    ip_outer->ip_dst = rt->mfc_rp;
2505118622Shsu    /*
2506118622Shsu     * Copy the inner header TOS to the outer header, and take care of the
2507118622Shsu     * IP_DF bit.
2508118622Shsu     */
2509118622Shsu    ip_outer->ip_tos = ip->ip_tos;
2510241913Sglebius    if (ip->ip_off & htons(IP_DF))
2511241913Sglebius	ip_outer->ip_off |= htons(IP_DF);
2512118622Shsu    pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
2513118622Shsu					 + sizeof(pim_encap_iphdr));
2514118622Shsu    *pimhdr = pim_encap_pimhdr;
2515118622Shsu    /* If the iif crosses a border, set the Border-bit */
2516208744Szec    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config)
2517118622Shsu	pimhdr->flags |= htonl(PIM_BORDER_REGISTER);
2518133874Srwatson
2519118622Shsu    mb_first->m_data += sizeof(pim_encap_iphdr);
2520118622Shsu    pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
2521118622Shsu    mb_first->m_data -= sizeof(pim_encap_iphdr);
2522133874Srwatson
2523166575Sbms    send_packet(vifp, mb_first);
2524133874Srwatson
2525118622Shsu    /* Keep statistics */
2526190967Srwatson    PIMSTAT_INC(pims_snd_registers_msgs);
2527190967Srwatson    PIMSTAT_ADD(pims_snd_registers_bytes, len);
2528133874Srwatson
2529118622Shsu    return 0;
2530118622Shsu}
2531118622Shsu
2532118622Shsu/*
2533190012Sbms * pim_encapcheck() is called by the encap4_input() path at runtime to
2534166622Sbms * determine if a packet is for PIM; allowing PIM to be dynamically loaded
2535166622Sbms * into the kernel.
2536166622Sbms */
2537166622Sbmsstatic int
2538166622Sbmspim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
2539166622Sbms{
2540166622Sbms
2541166622Sbms#ifdef DIAGNOSTIC
2542166622Sbms    KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
2543166622Sbms#endif
2544166622Sbms    if (proto != IPPROTO_PIM)
2545166622Sbms	return 0;	/* not for us; reject the datagram. */
2546166622Sbms
2547166622Sbms    return 64;		/* claim the datagram. */
2548166622Sbms}
2549166622Sbms
2550166622Sbms/*
2551118622Shsu * PIM-SMv2 and PIM-DM messages processing.
2552118622Shsu * Receives and verifies the PIM control messages, and passes them
2553118622Shsu * up to the listening socket, using rip_input().
2554118622Shsu * The only message with special processing is the PIM_REGISTER message
2555118622Shsu * (used by PIM-SM): the PIM header is stripped off, and the inner packet
2556118622Shsu * is passed to if_simloop().
2557118622Shsu */
2558118622Shsuvoid
2559261208Sglebiuspim_input(struct mbuf *m, int iphlen)
2560118622Shsu{
2561118622Shsu    struct ip *ip = mtod(m, struct ip *);
2562118622Shsu    struct pim *pim;
2563118622Shsu    int minlen;
2564261208Sglebius    int datalen = ntohs(ip->ip_len) - iphlen;
2565118622Shsu    int ip_tos;
2566133874Srwatson
2567118622Shsu    /* Keep statistics */
2568190967Srwatson    PIMSTAT_INC(pims_rcv_total_msgs);
2569190967Srwatson    PIMSTAT_ADD(pims_rcv_total_bytes, datalen);
2570133874Srwatson
2571118622Shsu    /*
2572118622Shsu     * Validate lengths
2573118622Shsu     */
2574118622Shsu    if (datalen < PIM_MINLEN) {
2575190967Srwatson	PIMSTAT_INC(pims_rcv_tooshort);
2576190054Sbms	CTR3(KTR_IPMF, "%s: short packet (%d) from %s",
2577190148Sbms	    __func__, datalen, inet_ntoa(ip->ip_src));
2578118622Shsu	m_freem(m);
2579118622Shsu	return;
2580118622Shsu    }
2581133874Srwatson
2582118622Shsu    /*
2583118622Shsu     * If the packet is at least as big as a REGISTER, go agead
2584118622Shsu     * and grab the PIM REGISTER header size, to avoid another
2585118622Shsu     * possible m_pullup() later.
2586133874Srwatson     *
2587118622Shsu     * PIM_MINLEN       == pimhdr + u_int32_t == 4 + 4 = 8
2588118622Shsu     * PIM_REG_MINLEN   == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28
2589118622Shsu     */
2590118622Shsu    minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN);
2591118622Shsu    /*
2592118622Shsu     * Get the IP and PIM headers in contiguous memory, and
2593118622Shsu     * possibly the PIM REGISTER header.
2594118622Shsu     */
2595261208Sglebius    if (m->m_len < minlen && (m = m_pullup(m, minlen)) == 0) {
2596190054Sbms	CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
2597118622Shsu	return;
2598118622Shsu    }
2599190054Sbms
2600118622Shsu    /* m_pullup() may have given us a new mbuf so reset ip. */
2601118622Shsu    ip = mtod(m, struct ip *);
2602118622Shsu    ip_tos = ip->ip_tos;
2603133874Srwatson
2604118622Shsu    /* adjust mbuf to point to the PIM header */
2605118622Shsu    m->m_data += iphlen;
2606118622Shsu    m->m_len  -= iphlen;
2607118622Shsu    pim = mtod(m, struct pim *);
2608133874Srwatson
2609118622Shsu    /*
2610118622Shsu     * Validate checksum. If PIM REGISTER, exclude the data packet.
2611118622Shsu     *
2612118622Shsu     * XXX: some older PIMv2 implementations don't make this distinction,
2613118622Shsu     * so for compatibility reason perform the checksum over part of the
2614118622Shsu     * message, and if error, then over the whole message.
2615118622Shsu     */
2616118622Shsu    if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) {
2617118622Shsu	/* do nothing, checksum okay */
2618118622Shsu    } else if (in_cksum(m, datalen)) {
2619190967Srwatson	PIMSTAT_INC(pims_rcv_badsum);
2620190054Sbms	CTR1(KTR_IPMF, "%s: invalid checksum", __func__);
2621118622Shsu	m_freem(m);
2622118622Shsu	return;
2623118622Shsu    }
2624118622Shsu
2625118622Shsu    /* PIM version check */
2626118622Shsu    if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) {
2627190967Srwatson	PIMSTAT_INC(pims_rcv_badversion);
2628190054Sbms	CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__,
2629190054Sbms	    (int)PIM_VT_V(pim->pim_vt), PIM_VERSION);
2630118622Shsu	m_freem(m);
2631118622Shsu	return;
2632118622Shsu    }
2633133874Srwatson
2634118622Shsu    /* restore mbuf back to the outer IP */
2635118622Shsu    m->m_data -= iphlen;
2636118622Shsu    m->m_len  += iphlen;
2637133874Srwatson
2638118622Shsu    if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) {
2639118622Shsu	/*
2640118622Shsu	 * Since this is a REGISTER, we'll make a copy of the register
2641118622Shsu	 * headers ip + pim + u_int32 + encap_ip, to be passed up to the
2642118622Shsu	 * routing daemon.
2643118622Shsu	 */
2644118622Shsu	struct sockaddr_in dst = { sizeof(dst), AF_INET };
2645118622Shsu	struct mbuf *mcp;
2646118622Shsu	struct ip *encap_ip;
2647118622Shsu	u_int32_t *reghdr;
2648119792Ssam	struct ifnet *vifp;
2649133874Srwatson
2650119792Ssam	VIF_LOCK();
2651208744Szec	if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) {
2652119792Ssam	    VIF_UNLOCK();
2653190054Sbms	    CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__,
2654208744Szec		(int)V_reg_vif_num);
2655118622Shsu	    m_freem(m);
2656118622Shsu	    return;
2657118622Shsu	}
2658119792Ssam	/* XXX need refcnt? */
2659208744Szec	vifp = V_viftable[V_reg_vif_num].v_ifp;
2660119792Ssam	VIF_UNLOCK();
2661133874Srwatson
2662118622Shsu	/*
2663118622Shsu	 * Validate length
2664118622Shsu	 */
2665118622Shsu	if (datalen < PIM_REG_MINLEN) {
2666190967Srwatson	    PIMSTAT_INC(pims_rcv_tooshort);
2667190967Srwatson	    PIMSTAT_INC(pims_rcv_badregisters);
2668190054Sbms	    CTR1(KTR_IPMF, "%s: register packet size too small", __func__);
2669118622Shsu	    m_freem(m);
2670118622Shsu	    return;
2671118622Shsu	}
2672133874Srwatson
2673118622Shsu	reghdr = (u_int32_t *)(pim + 1);
2674118622Shsu	encap_ip = (struct ip *)(reghdr + 1);
2675133874Srwatson
2676190054Sbms	CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d",
2677190054Sbms	    __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len));
2678133874Srwatson
2679118622Shsu	/* verify the version number of the inner packet */
2680118622Shsu	if (encap_ip->ip_v != IPVERSION) {
2681190967Srwatson	    PIMSTAT_INC(pims_rcv_badregisters);
2682190054Sbms	    CTR1(KTR_IPMF, "%s: bad encap ip version", __func__);
2683118622Shsu	    m_freem(m);
2684118622Shsu	    return;
2685118622Shsu	}
2686133874Srwatson
2687118622Shsu	/* verify the inner packet is destined to a mcast group */
2688118622Shsu	if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) {
2689190967Srwatson	    PIMSTAT_INC(pims_rcv_badregisters);
2690190054Sbms	    CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__,
2691190054Sbms		inet_ntoa(encap_ip->ip_dst));
2692118622Shsu	    m_freem(m);
2693118622Shsu	    return;
2694118622Shsu	}
2695126741Shsu
2696126741Shsu	/* If a NULL_REGISTER, pass it to the daemon */
2697126741Shsu	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
2698126741Shsu	    goto pim_input_to_daemon;
2699126741Shsu
2700118622Shsu	/*
2701118622Shsu	 * Copy the TOS from the outer IP header to the inner IP header.
2702118622Shsu	 */
2703118622Shsu	if (encap_ip->ip_tos != ip_tos) {
2704118622Shsu	    /* Outer TOS -> inner TOS */
2705118622Shsu	    encap_ip->ip_tos = ip_tos;
2706118622Shsu	    /* Recompute the inner header checksum. Sigh... */
2707133874Srwatson
2708118622Shsu	    /* adjust mbuf to point to the inner IP header */
2709118622Shsu	    m->m_data += (iphlen + PIM_MINLEN);
2710118622Shsu	    m->m_len  -= (iphlen + PIM_MINLEN);
2711133874Srwatson
2712118622Shsu	    encap_ip->ip_sum = 0;
2713118622Shsu	    encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2);
2714133874Srwatson
2715118622Shsu	    /* restore mbuf to point back to the outer IP header */
2716118622Shsu	    m->m_data -= (iphlen + PIM_MINLEN);
2717118622Shsu	    m->m_len  += (iphlen + PIM_MINLEN);
2718118622Shsu	}
2719126741Shsu
2720118622Shsu	/*
2721118622Shsu	 * Decapsulate the inner IP packet and loopback to forward it
2722133874Srwatson	 * as a normal multicast packet. Also, make a copy of the
2723118622Shsu	 *     outer_iphdr + pimhdr + reghdr + encap_iphdr
2724118622Shsu	 * to pass to the daemon later, so it can take the appropriate
2725118622Shsu	 * actions (e.g., send back PIM_REGISTER_STOP).
2726118622Shsu	 * XXX: here m->m_data points to the outer IP header.
2727118622Shsu	 */
2728118622Shsu	mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN);
2729118622Shsu	if (mcp == NULL) {
2730190054Sbms	    CTR1(KTR_IPMF, "%s: m_copy() failed", __func__);
2731118622Shsu	    m_freem(m);
2732118622Shsu	    return;
2733118622Shsu	}
2734133874Srwatson
2735118622Shsu	/* Keep statistics */
2736118622Shsu	/* XXX: registers_bytes include only the encap. mcast pkt */
2737190967Srwatson	PIMSTAT_INC(pims_rcv_registers_msgs);
2738190967Srwatson	PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len));
2739133874Srwatson
2740118622Shsu	/*
2741118622Shsu	 * forward the inner ip packet; point m_data at the inner ip.
2742118622Shsu	 */
2743118622Shsu	m_adj(m, iphlen + PIM_MINLEN);
2744133874Srwatson
2745190054Sbms	CTR4(KTR_IPMF,
2746190054Sbms	    "%s: forward decap'd REGISTER: src %lx dst %lx vif %d",
2747190054Sbms	    __func__,
2748190054Sbms	    (u_long)ntohl(encap_ip->ip_src.s_addr),
2749190054Sbms	    (u_long)ntohl(encap_ip->ip_dst.s_addr),
2750208744Szec	    (int)V_reg_vif_num);
2751190054Sbms
2752119792Ssam	/* NB: vifp was collected above; can it change on us? */
2753119792Ssam	if_simloop(vifp, m, dst.sin_family, 0);
2754133874Srwatson
2755118622Shsu	/* prepare the register head to send to the mrouting daemon */
2756118622Shsu	m = mcp;
2757118622Shsu    }
2758118622Shsu
2759133874Srwatsonpim_input_to_daemon:
2760118622Shsu    /*
2761118622Shsu     * Pass the PIM message up to the daemon; if it is a Register message,
2762118622Shsu     * pass the 'head' only up to the daemon. This includes the
2763118622Shsu     * outer IP header, PIM header, PIM-Register header and the
2764118622Shsu     * inner IP header.
2765118622Shsu     * XXX: the outer IP header pkt size of a Register is not adjust to
2766118622Shsu     * reflect the fact that the inner multicast data is truncated.
2767118622Shsu     */
2768118622Shsu    rip_input(m, iphlen);
2769118622Shsu
2770118622Shsu    return;
2771118622Shsu}
2772118622Shsu
2773118622Shsustatic int
2774190012Sbmssysctl_mfctable(SYSCTL_HANDLER_ARGS)
2775190012Sbms{
2776190012Sbms	struct mfc	*rt;
2777190012Sbms	int		 error, i;
2778190012Sbms
2779190012Sbms	if (req->newptr)
2780190012Sbms		return (EPERM);
2781208744Szec	if (V_mfchashtbl == NULL)	/* XXX unlocked */
2782190012Sbms		return (0);
2783190012Sbms	error = sysctl_wire_old_buffer(req, 0);
2784190012Sbms	if (error)
2785190012Sbms		return (error);
2786190012Sbms
2787190012Sbms	MFC_LOCK();
2788190012Sbms	for (i = 0; i < mfchashsize; i++) {
2789208744Szec		LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) {
2790190012Sbms			error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
2791190012Sbms			if (error)
2792190012Sbms				goto out_locked;
2793190012Sbms		}
2794190012Sbms	}
2795190012Sbmsout_locked:
2796190012Sbms	MFC_UNLOCK();
2797190012Sbms	return (error);
2798190012Sbms}
2799190012Sbms
2800227309Sedstatic SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
2801227309Sed    sysctl_mfctable, "IPv4 Multicast Forwarding Table "
2802227309Sed    "(struct *mfc[mfchashsize], netinet/ip_mroute.h)");
2803190012Sbms
2804208744Szecstatic void
2805208744Szecvnet_mroute_init(const void *unused __unused)
2806208744Szec{
2807208744Szec
2808208744Szec	MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
2809208744Szec	bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
2810314667Savg	callout_init(&V_expire_upcalls_ch, 1);
2811314667Savg	callout_init(&V_bw_upcalls_ch, 1);
2812314667Savg	callout_init(&V_bw_meter_ch, 1);
2813208744Szec}
2814208744Szec
2815232517SzecVNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init,
2816208744Szec	NULL);
2817208744Szec
2818208744Szecstatic void
2819208744Szecvnet_mroute_uninit(const void *unused __unused)
2820208744Szec{
2821208744Szec
2822208744Szec	FREE(V_nexpire, M_MRTABLE);
2823208744Szec	V_nexpire = NULL;
2824208744Szec}
2825208744Szec
2826208744SzecVNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE,
2827208744Szec	vnet_mroute_uninit, NULL);
2828208744Szec
2829190012Sbmsstatic int
283080354Sfennerip_mroute_modevent(module_t mod, int type, void *unused)
28312763Swollman{
2832183550Szec
2833106968Sluigi    switch (type) {
2834106968Sluigi    case MOD_LOAD:
2835167116Sbms	MROUTER_LOCK_INIT();
2836208744Szec
2837208744Szec	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
2838208744Szec	    if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
2839208744Szec	if (if_detach_event_tag == NULL) {
2840249562Sdelphij		printf("ip_mroute: unable to register "
2841249562Sdelphij		    "ifnet_departure_event handler\n");
2842208744Szec		MROUTER_LOCK_DESTROY();
2843208744Szec		return (EINVAL);
2844208744Szec	}
2845208744Szec
2846123690Ssam	MFC_LOCK_INIT();
2847123690Ssam	VIF_LOCK_INIT();
2848190012Sbms
2849190012Sbms	mfchashsize = MFCHASHSIZE;
2850190012Sbms	if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) &&
2851190012Sbms	    !powerof2(mfchashsize)) {
2852190012Sbms		printf("WARNING: %s not a power of 2; using default\n",
2853190012Sbms		    "net.inet.ip.mfchashsize");
2854190012Sbms		mfchashsize = MFCHASHSIZE;
2855190012Sbms	}
2856190012Sbms
2857190012Sbms	pim_squelch_wholepkt = 0;
2858166623Sbms	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
2859166623Sbms	    &pim_squelch_wholepkt);
2860166938Sbms
2861166622Sbms	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
2862166622Sbms	    pim_encapcheck, &in_pim_protosw, NULL);
2863166622Sbms	if (pim_encap_cookie == NULL) {
2864166622Sbms		printf("ip_mroute: unable to attach pim encap\n");
2865166622Sbms		VIF_LOCK_DESTROY();
2866166622Sbms		MFC_LOCK_DESTROY();
2867167116Sbms		MROUTER_LOCK_DESTROY();
2868166622Sbms		return (EINVAL);
2869166622Sbms	}
2870166938Sbms
2871106968Sluigi	ip_mcast_src = X_ip_mcast_src;
2872106968Sluigi	ip_mforward = X_ip_mforward;
2873106968Sluigi	ip_mrouter_done = X_ip_mrouter_done;
2874106968Sluigi	ip_mrouter_get = X_ip_mrouter_get;
2875106968Sluigi	ip_mrouter_set = X_ip_mrouter_set;
2876166938Sbms
2877106968Sluigi	ip_rsvp_force_done = X_ip_rsvp_force_done;
2878106968Sluigi	ip_rsvp_vif = X_ip_rsvp_vif;
2879166938Sbms
2880106968Sluigi	legal_vif_num = X_legal_vif_num;
2881106968Sluigi	mrt_ioctl = X_mrt_ioctl;
2882106968Sluigi	rsvp_input_p = X_rsvp_input;
2883106968Sluigi	break;
28842763Swollman
2885106968Sluigi    case MOD_UNLOAD:
2886121446Ssam	/*
2887121446Ssam	 * Typically module unload happens after the user-level
2888121446Ssam	 * process has shutdown the kernel services (the check
2889121446Ssam	 * below insures someone can't just yank the module out
2890121446Ssam	 * from under a running process).  But if the module is
2891121446Ssam	 * just loaded and then unloaded w/o starting up a user
2892121446Ssam	 * process we still need to cleanup.
2893121446Ssam	 */
2894208744Szec	MROUTER_LOCK();
2895208744Szec	if (ip_mrouter_cnt != 0) {
2896208744Szec	    MROUTER_UNLOCK();
2897190012Sbms	    return (EINVAL);
2898208744Szec	}
2899208744Szec	ip_mrouter_unloading = 1;
2900208744Szec	MROUTER_UNLOCK();
29012763Swollman
2902208744Szec	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
2903208744Szec
2904166622Sbms	if (pim_encap_cookie) {
2905166622Sbms	    encap_detach(pim_encap_cookie);
2906166622Sbms	    pim_encap_cookie = NULL;
2907166622Sbms	}
2908190012Sbms
2909106968Sluigi	ip_mcast_src = NULL;
2910106968Sluigi	ip_mforward = NULL;
2911106968Sluigi	ip_mrouter_done = NULL;
2912106968Sluigi	ip_mrouter_get = NULL;
2913106968Sluigi	ip_mrouter_set = NULL;
2914166938Sbms
2915106968Sluigi	ip_rsvp_force_done = NULL;
2916106968Sluigi	ip_rsvp_vif = NULL;
2917166938Sbms
2918106968Sluigi	legal_vif_num = NULL;
2919106968Sluigi	mrt_ioctl = NULL;
2920106968Sluigi	rsvp_input_p = NULL;
2921166938Sbms
2922123690Ssam	VIF_LOCK_DESTROY();
2923123690Ssam	MFC_LOCK_DESTROY();
2924167116Sbms	MROUTER_LOCK_DESTROY();
2925106968Sluigi	break;
2926166938Sbms
2927132199Sphk    default:
2928132199Sphk	return EOPNOTSUPP;
2929106968Sluigi    }
2930106968Sluigi    return 0;
29312763Swollman}
29322763Swollman
293380354Sfennerstatic moduledata_t ip_mroutemod = {
2934106968Sluigi    "ip_mroute",
2935106968Sluigi    ip_mroute_modevent,
2936241394Skevlo    0
293780354Sfenner};
2938190012Sbms
2939232517SzecDECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
2940