1147549Simp/*- 2147549Simp * Copyright (c) 1989 Stephen Deering 3147549Simp * Copyright (c) 1992, 1993 4147549Simp * The Regents of the University of California. All rights reserved. 5147549Simp * 6147549Simp * This code is derived from software contributed to Berkeley by 7147549Simp * Stephen Deering of Stanford University. 8147549Simp * 9147549Simp * Redistribution and use in source and binary forms, with or without 10147549Simp * modification, are permitted provided that the following conditions 11147549Simp * are met: 12147549Simp * 1. Redistributions of source code must retain the above copyright 13147549Simp * notice, this list of conditions and the following disclaimer. 14147549Simp * 2. Redistributions in binary form must reproduce the above copyright 15147549Simp * notice, this list of conditions and the following disclaimer in the 16147549Simp * documentation and/or other materials provided with the distribution. 17147549Simp * 4. Neither the name of the University nor the names of its contributors 18147549Simp * may be used to endorse or promote products derived from this software 19147549Simp * without specific prior written permission. 20147549Simp * 21147549Simp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22147549Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23147549Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24147549Simp * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25147549Simp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26147549Simp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27147549Simp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28147549Simp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29147549Simp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30147549Simp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31147549Simp * SUCH DAMAGE. 32147549Simp * 33147549Simp * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93 34147549Simp */ 35147549Simp 361541Srgrimes/* 372531Swollman * IP multicast forwarding procedures 381541Srgrimes * 391541Srgrimes * Written by David Waitzman, BBN Labs, August 1988. 401541Srgrimes * Modified by Steve Deering, Stanford, February 1989. 412531Swollman * Modified by Mark J. Steiglitz, Stanford, May, 1991 422531Swollman * Modified by Van Jacobson, LBL, January 1993 432531Swollman * Modified by Ajit Thyagarajan, PARC, August 1993 449209Swollman * Modified by Bill Fenner, PARC, April 1995 45118622Shsu * Modified by Ahmed Helmy, SGI, June 1996 46118622Shsu * Modified by George Edmond Eddy (Rusty), ISI, February 1998 47118622Shsu * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000 48118622Shsu * Modified by Hitoshi Asaeda, WIDE, August 2000 49118622Shsu * Modified by Pavlin Radoslavov, ICSI, October 2002 501541Srgrimes * 519209Swollman * MROUTING Revision: 3.5 52118622Shsu * and PIM-SMv2 and PIM-DM support, advanced API support, 53118622Shsu * bandwidth metering and signaling 541541Srgrimes */ 551541Srgrimes 56190012Sbms/* 57190012Sbms * TODO: Prefix functions with ipmf_. 58190012Sbms * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol 59190012Sbms * domain attachment (if_afdata) so we can track consumers of that service. 60190012Sbms * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT, 61190012Sbms * move it to socket options. 62190012Sbms * TODO: Cleanup LSRR removal further. 63190012Sbms * TODO: Push RSVP stubs into raw_ip.c. 64190012Sbms * TODO: Use bitstring.h for vif set. 65190012Sbms * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded. 66190012Sbms * TODO: Sync ip6_mroute.c with this file. 67190012Sbms */ 68190012Sbms 69172467Ssilby#include <sys/cdefs.h> 70172467Ssilby__FBSDID("$FreeBSD: stable/10/sys/netinet/ip_mroute.c 314667 2017-03-04 13:03:31Z avg $"); 71172467Ssilby 72166938Sbms#include "opt_inet.h" 7314328Speter#include "opt_mrouting.h" 741541Srgrimes 75118622Shsu#define _PIM_VT 1 76118622Shsu 771541Srgrimes#include <sys/param.h> 7895759Stanimura#include <sys/kernel.h> 79190148Sbms#include <sys/stddef.h> 8095759Stanimura#include <sys/lock.h> 81190054Sbms#include <sys/ktr.h> 8242777Sfenner#include <sys/malloc.h> 831541Srgrimes#include <sys/mbuf.h> 84129880Sphk#include <sys/module.h> 85164033Srwatson#include <sys/priv.h> 8695759Stanimura#include <sys/protosw.h> 8795759Stanimura#include <sys/signalvar.h> 881541Srgrimes#include <sys/socket.h> 891541Srgrimes#include <sys/socketvar.h> 9095759Stanimura#include <sys/sockio.h> 9195759Stanimura#include <sys/sx.h> 9280354Sfenner#include <sys/sysctl.h> 932531Swollman#include <sys/syslog.h> 9495759Stanimura#include <sys/systm.h> 9595759Stanimura#include <sys/time.h> 96253084Sae#include <sys/counter.h> 97190012Sbms 981541Srgrimes#include <net/if.h> 99111888Sjlemon#include <net/netisr.h> 1001541Srgrimes#include <net/route.h> 101196019Srwatson#include <net/vnet.h> 102190012Sbms 1031541Srgrimes#include <netinet/in.h> 10495759Stanimura#include <netinet/igmp.h> 1051541Srgrimes#include <netinet/in_systm.h> 10695759Stanimura#include <netinet/in_var.h> 1071541Srgrimes#include <netinet/ip.h> 10880354Sfenner#include <netinet/ip_encap.h> 1091541Srgrimes#include <netinet/ip_mroute.h> 11095759Stanimura#include <netinet/ip_var.h> 111152592Sandre#include <netinet/ip_options.h> 112118622Shsu#include <netinet/pim.h> 113118622Shsu#include <netinet/pim_var.h> 1149209Swollman#include <netinet/udp.h> 115185571Sbz 11660214Sken#include <machine/in_cksum.h> 1171541Srgrimes 118190054Sbms#ifndef KTR_IPMF 119191660Sbms#define KTR_IPMF KTR_INET 120190054Sbms#endif 1212531Swollman 122118622Shsu#define VIFI_INVALID ((vifi_t) -1) 123190012Sbms#define M_HASCL(m) ((m)->m_flags & M_EXT) 124118622Shsu 125215701Sdimstatic VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */ 126208744Szec#define V_last_tv_sec VNET(last_tv_sec) 127208744Szec 128190012Sbmsstatic MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache"); 1299209Swollman 130119792Ssam/* 131119792Ssam * Locking. We use two locks: one for the virtual interface table and 132119792Ssam * one for the forwarding table. These locks may be nested in which case 133119792Ssam * the VIF lock must always be taken first. Note that each lock is used 134119792Ssam * to cover not only the specific data structure but also related data 135190012Sbms * structures. 136119792Ssam */ 137119792Ssam 138167116Sbmsstatic struct mtx mrouter_mtx; 139167116Sbms#define MROUTER_LOCK() mtx_lock(&mrouter_mtx) 140167116Sbms#define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx) 141171744Srwatson#define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED) 142190012Sbms#define MROUTER_LOCK_INIT() \ 143167116Sbms mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF) 144167116Sbms#define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx) 145167116Sbms 146208744Szecstatic int ip_mrouter_cnt; /* # of vnets with active mrouters */ 147208744Szecstatic int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */ 148208744Szec 149253084Saestatic VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat); 150253084SaeVNET_PCPUSTAT_SYSINIT(mrtstat); 151253084SaeVNET_PCPUSTAT_SYSUNINIT(mrtstat); 152253084SaeSYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat, 153253084Sae mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, " 154190012Sbms "netinet/ip_mroute.h)"); 155190012Sbms 156215701Sdimstatic VNET_DEFINE(u_long, mfchash); 157208744Szec#define V_mfchash VNET(mfchash) 158208744Szec#define MFCHASH(a, g) \ 159190012Sbms ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ 160208744Szec ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash) 161208744Szec#define MFCHASHSIZE 256 162190012Sbms 163208744Szecstatic u_long mfchashsize; /* Hash size */ 164215701Sdimstatic VNET_DEFINE(u_char *, nexpire); /* 0..mfchashsize-1 */ 165208744Szec#define V_nexpire VNET(nexpire) 166215701Sdimstatic VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl); 167208744Szec#define V_mfchashtbl VNET(mfchashtbl) 168190012Sbms 169119792Ssamstatic struct mtx mfc_mtx; 170190012Sbms#define MFC_LOCK() mtx_lock(&mfc_mtx) 171190012Sbms#define MFC_UNLOCK() mtx_unlock(&mfc_mtx) 172171744Srwatson#define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED) 173190012Sbms#define MFC_LOCK_INIT() \ 174190012Sbms mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF) 175119792Ssam#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) 176119792Ssam 177215701Sdimstatic VNET_DEFINE(vifi_t, numvifs); 178208744Szec#define V_numvifs VNET(numvifs) 179215701Sdimstatic VNET_DEFINE(struct vif, viftable[MAXVIFS]); 180208744Szec#define V_viftable VNET(viftable) 181208744SzecSYSCTL_VNET_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, 182208744Szec &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]", 183190012Sbms "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); 1842531Swollman 185119792Ssamstatic struct mtx vif_mtx; 186190012Sbms#define VIF_LOCK() mtx_lock(&vif_mtx) 187190012Sbms#define VIF_UNLOCK() mtx_unlock(&vif_mtx) 188119792Ssam#define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED) 189190012Sbms#define VIF_LOCK_INIT() \ 190190012Sbms mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF) 191119792Ssam#define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx) 192119792Ssam 193162719Sbmsstatic eventhandler_tag if_detach_event_tag = NULL; 194162719Sbms 195215701Sdimstatic VNET_DEFINE(struct callout, expire_upcalls_ch); 196208744Szec#define V_expire_upcalls_ch VNET(expire_upcalls_ch) 197208744Szec 1989209Swollman#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 1999209Swollman#define UPCALL_EXPIRE 6 /* number of timeouts */ 2002531Swollman 2012531Swollman/* 202118622Shsu * Bandwidth meter variables and constants 203118622Shsu */ 204118622Shsustatic MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters"); 205118622Shsu/* 206118622Shsu * Pending timeouts are stored in a hash table, the key being the 207118622Shsu * expiration time. Periodically, the entries are analysed and processed. 208118622Shsu */ 209208744Szec#define BW_METER_BUCKETS 1024 210215701Sdimstatic VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]); 211208744Szec#define V_bw_meter_timers VNET(bw_meter_timers) 212215701Sdimstatic VNET_DEFINE(struct callout, bw_meter_ch); 213208744Szec#define V_bw_meter_ch VNET(bw_meter_ch) 214208744Szec#define BW_METER_PERIOD (hz) /* periodical handling of bw meters */ 215118622Shsu 216118622Shsu/* 217118622Shsu * Pending upcalls are stored in a vector which is flushed when 218118622Shsu * full, or periodically 219118622Shsu */ 220215701Sdimstatic VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]); 221208744Szec#define V_bw_upcalls VNET(bw_upcalls) 222215701Sdimstatic VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */ 223208744Szec#define V_bw_upcalls_n VNET(bw_upcalls_n) 224215701Sdimstatic VNET_DEFINE(struct callout, bw_upcalls_ch); 225208744Szec#define V_bw_upcalls_ch VNET(bw_upcalls_ch) 226208744Szec 227118622Shsu#define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */ 228118622Shsu 229253084Saestatic VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat); 230253084SaeVNET_PCPUSTAT_SYSINIT(pimstat); 231253084SaeVNET_PCPUSTAT_SYSUNINIT(pimstat); 232166622Sbms 233166622SbmsSYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM"); 234253084SaeSYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat, 235253084Sae pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)"); 236118622Shsu 237166623Sbmsstatic u_long pim_squelch_wholepkt = 0; 238166623SbmsSYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW, 239166623Sbms &pim_squelch_wholepkt, 0, 240166623Sbms "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified"); 241166623Sbms 242166622Sbmsextern struct domain inetdomain; 243190012Sbmsstatic const struct protosw in_pim_protosw = { 244166622Sbms .pr_type = SOCK_RAW, 245166622Sbms .pr_domain = &inetdomain, 246166622Sbms .pr_protocol = IPPROTO_PIM, 247166622Sbms .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, 248166622Sbms .pr_input = pim_input, 249166622Sbms .pr_output = (pr_output_t*)rip_output, 250166622Sbms .pr_ctloutput = rip_ctloutput, 251166622Sbms .pr_usrreqs = &rip_usrreqs 252166622Sbms}; 253166622Sbmsstatic const struct encaptab *pim_encap_cookie; 254166938Sbms 255166622Sbmsstatic int pim_encapcheck(const struct mbuf *, int, int, void *); 256166622Sbms 257118622Shsu/* 258118622Shsu * Note: the PIM Register encapsulation adds the following in front of a 259118622Shsu * data packet: 260118622Shsu * 261118622Shsu * struct pim_encap_hdr { 262118622Shsu * struct ip ip; 263118622Shsu * struct pim_encap_pimhdr pim; 264118622Shsu * } 265118622Shsu * 266118622Shsu */ 267118622Shsu 268118622Shsustruct pim_encap_pimhdr { 269118622Shsu struct pim pim; 270118622Shsu uint32_t flags; 271118622Shsu}; 272190012Sbms#define PIM_ENCAP_TTL 64 273118622Shsu 274118622Shsustatic struct ip pim_encap_iphdr = { 275118622Shsu#if BYTE_ORDER == LITTLE_ENDIAN 276118622Shsu sizeof(struct ip) >> 2, 277118622Shsu IPVERSION, 278118622Shsu#else 279118622Shsu IPVERSION, 280118622Shsu sizeof(struct ip) >> 2, 281118622Shsu#endif 282118622Shsu 0, /* tos */ 283118622Shsu sizeof(struct ip), /* total length */ 284118622Shsu 0, /* id */ 285133874Srwatson 0, /* frag offset */ 286190012Sbms PIM_ENCAP_TTL, 287118622Shsu IPPROTO_PIM, 288118622Shsu 0, /* checksum */ 289118622Shsu}; 290118622Shsu 291118622Shsustatic struct pim_encap_pimhdr pim_encap_pimhdr = { 292118622Shsu { 293118622Shsu PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */ 294118622Shsu 0, /* reserved */ 295118622Shsu 0, /* checksum */ 296118622Shsu }, 297118622Shsu 0 /* flags */ 298118622Shsu}; 299118622Shsu 300215701Sdimstatic VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID; 301208744Szec#define V_reg_vif_num VNET(reg_vif_num) 302215701Sdimstatic VNET_DEFINE(struct ifnet, multicast_register_if); 303208744Szec#define V_multicast_register_if VNET(multicast_register_if) 304118622Shsu 305118622Shsu/* 3061541Srgrimes * Private variables. 3071541Srgrimes */ 3081541Srgrimes 309190012Sbmsstatic u_long X_ip_mcast_src(int); 310190012Sbmsstatic int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *, 311190012Sbms struct ip_moptions *); 31292723Salfredstatic int X_ip_mrouter_done(void); 313190012Sbmsstatic int X_ip_mrouter_get(struct socket *, struct sockopt *); 314190012Sbmsstatic int X_ip_mrouter_set(struct socket *, struct sockopt *); 315190012Sbmsstatic int X_legal_vif_num(int); 316194581Srdivackystatic int X_mrt_ioctl(u_long, caddr_t, int); 31712579Sbde 318190012Sbmsstatic int add_bw_upcall(struct bw_upcall *); 319190012Sbmsstatic int add_mfc(struct mfcctl2 *); 320190012Sbmsstatic int add_vif(struct vifctl *); 321190012Sbmsstatic void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *); 322190012Sbmsstatic void bw_meter_process(void); 323190012Sbmsstatic void bw_meter_receive_packet(struct bw_meter *, int, 324190012Sbms struct timeval *); 325190012Sbmsstatic void bw_upcalls_send(void); 326190012Sbmsstatic int del_bw_upcall(struct bw_upcall *); 327190012Sbmsstatic int del_mfc(struct mfcctl2 *); 328190012Sbmsstatic int del_vif(vifi_t); 329190012Sbmsstatic int del_vif_locked(vifi_t); 330190012Sbmsstatic void expire_bw_meter_process(void *); 331190012Sbmsstatic void expire_bw_upcalls_send(void *); 332190012Sbmsstatic void expire_mfc(struct mfc *); 333190012Sbmsstatic void expire_upcalls(void *); 334190012Sbmsstatic void free_bw_list(struct bw_meter *); 335190012Sbmsstatic int get_sg_cnt(struct sioc_sg_req *); 336190012Sbmsstatic int get_vif_cnt(struct sioc_vif_req *); 337190012Sbmsstatic void if_detached_event(void *, struct ifnet *); 338190012Sbmsstatic int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); 339190012Sbmsstatic int ip_mrouter_init(struct socket *, int); 340190012Sbmsstatic __inline struct mfc * 341190012Sbms mfc_find(struct in_addr *, struct in_addr *); 342190012Sbmsstatic void phyint_send(struct ip *, struct vif *, struct mbuf *); 343190012Sbmsstatic struct mbuf * 344190012Sbms pim_register_prepare(struct ip *, struct mbuf *); 345190012Sbmsstatic int pim_register_send(struct ip *, struct vif *, 346190012Sbms struct mbuf *, struct mfc *); 347190012Sbmsstatic int pim_register_send_rp(struct ip *, struct vif *, 348190012Sbms struct mbuf *, struct mfc *); 349190012Sbmsstatic int pim_register_send_upcall(struct ip *, struct vif *, 350190012Sbms struct mbuf *, struct mfc *); 351190012Sbmsstatic void schedule_bw_meter(struct bw_meter *, struct timeval *); 352190012Sbmsstatic void send_packet(struct vif *, struct mbuf *); 353190012Sbmsstatic int set_api_config(uint32_t *); 354190012Sbmsstatic int set_assert(int); 355190012Sbmsstatic int socket_send(struct socket *, struct mbuf *, 356190012Sbms struct sockaddr_in *); 357190012Sbmsstatic void unschedule_bw_meter(struct bw_meter *); 3582531Swollman 3592531Swollman/* 360190012Sbms * Kernel multicast forwarding API capabilities and setup. 361118622Shsu * If more API capabilities are added to the kernel, they should be 362118622Shsu * recorded in `mrt_api_support'. 363118622Shsu */ 364190012Sbms#define MRT_API_VERSION 0x0305 365190012Sbms 366190012Sbmsstatic const int mrt_api_version = MRT_API_VERSION; 367118622Shsustatic const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | 368118622Shsu MRT_MFC_FLAGS_BORDER_VIF | 369118622Shsu MRT_MFC_RP | 370118622Shsu MRT_MFC_BW_UPCALL); 371215701Sdimstatic VNET_DEFINE(uint32_t, mrt_api_config); 372208744Szec#define V_mrt_api_config VNET(mrt_api_config) 373215701Sdimstatic VNET_DEFINE(int, pim_assert_enabled); 374208744Szec#define V_pim_assert_enabled VNET(pim_assert_enabled) 375190012Sbmsstatic struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */ 3762531Swollman 3772531Swollman/* 378190012Sbms * Find a route for a given origin IP address and multicast group address. 379190012Sbms * Statistics must be updated by the caller. 3802531Swollman */ 381190012Sbmsstatic __inline struct mfc * 382190012Sbmsmfc_find(struct in_addr *o, struct in_addr *g) 383106968Sluigi{ 384190012Sbms struct mfc *rt; 3859209Swollman 386190012Sbms MFC_LOCK_ASSERT(); 387119792Ssam 388208744Szec LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) { 389190012Sbms if (in_hosteq(rt->mfc_origin, *o) && 390190012Sbms in_hosteq(rt->mfc_mcastgrp, *g) && 391190012Sbms TAILQ_EMPTY(&rt->mfc_stall)) 392190012Sbms break; 393190012Sbms } 3942531Swollman 395190012Sbms return (rt); 3962531Swollman} 3972531Swollman 3982531Swollman/* 399190012Sbms * Handle MRT setsockopt commands to modify the multicast forwarding tables. 4001541Srgrimes */ 40112296Sphkstatic int 402106968SluigiX_ip_mrouter_set(struct socket *so, struct sockopt *sopt) 4031541Srgrimes{ 404106968Sluigi int error, optval; 405106968Sluigi vifi_t vifi; 406106968Sluigi struct vifctl vifc; 407118622Shsu struct mfcctl2 mfc; 408118622Shsu struct bw_upcall bw_upcall; 409118622Shsu uint32_t i; 4101541Srgrimes 411181803Sbz if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT) 412106968Sluigi return EPERM; 41338482Swollman 414106968Sluigi error = 0; 415106968Sluigi switch (sopt->sopt_name) { 416106968Sluigi case MRT_INIT: 417106968Sluigi error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 418106968Sluigi if (error) 419106968Sluigi break; 420106968Sluigi error = ip_mrouter_init(so, optval); 421106968Sluigi break; 42238482Swollman 423106968Sluigi case MRT_DONE: 424106968Sluigi error = ip_mrouter_done(); 425106968Sluigi break; 42638482Swollman 427106968Sluigi case MRT_ADD_VIF: 428106968Sluigi error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); 429106968Sluigi if (error) 430106968Sluigi break; 431106968Sluigi error = add_vif(&vifc); 432106968Sluigi break; 43338482Swollman 434106968Sluigi case MRT_DEL_VIF: 435106968Sluigi error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); 436106968Sluigi if (error) 437106968Sluigi break; 438106968Sluigi error = del_vif(vifi); 439106968Sluigi break; 44038482Swollman 441106968Sluigi case MRT_ADD_MFC: 442106968Sluigi case MRT_DEL_MFC: 443118622Shsu /* 444118622Shsu * select data size depending on API version. 445118622Shsu */ 446118622Shsu if (sopt->sopt_name == MRT_ADD_MFC && 447208744Szec V_mrt_api_config & MRT_API_FLAGS_ALL) { 448118622Shsu error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2), 449118622Shsu sizeof(struct mfcctl2)); 450118622Shsu } else { 451118622Shsu error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl), 452118622Shsu sizeof(struct mfcctl)); 453118622Shsu bzero((caddr_t)&mfc + sizeof(struct mfcctl), 454118622Shsu sizeof(mfc) - sizeof(struct mfcctl)); 455118622Shsu } 456106968Sluigi if (error) 457106968Sluigi break; 458106968Sluigi if (sopt->sopt_name == MRT_ADD_MFC) 459106968Sluigi error = add_mfc(&mfc); 460106968Sluigi else 461106968Sluigi error = del_mfc(&mfc); 462106968Sluigi break; 46338482Swollman 464106968Sluigi case MRT_ASSERT: 465106968Sluigi error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 466106968Sluigi if (error) 467106968Sluigi break; 468106968Sluigi set_assert(optval); 469106968Sluigi break; 47038482Swollman 471118622Shsu case MRT_API_CONFIG: 472118622Shsu error = sooptcopyin(sopt, &i, sizeof i, sizeof i); 473118622Shsu if (!error) 474118622Shsu error = set_api_config(&i); 475118622Shsu if (!error) 476118622Shsu error = sooptcopyout(sopt, &i, sizeof i); 477118622Shsu break; 478118622Shsu 479118622Shsu case MRT_ADD_BW_UPCALL: 480118622Shsu case MRT_DEL_BW_UPCALL: 481118622Shsu error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall, 482118622Shsu sizeof bw_upcall); 483118622Shsu if (error) 484118622Shsu break; 485118622Shsu if (sopt->sopt_name == MRT_ADD_BW_UPCALL) 486118622Shsu error = add_bw_upcall(&bw_upcall); 487118622Shsu else 488118622Shsu error = del_bw_upcall(&bw_upcall); 489118622Shsu break; 490118622Shsu 491106968Sluigi default: 492106968Sluigi error = EOPNOTSUPP; 493106968Sluigi break; 494106968Sluigi } 495106968Sluigi return error; 4962531Swollman} 4971541Srgrimes 4982531Swollman/* 4999209Swollman * Handle MRT getsockopt commands 5009209Swollman */ 50112296Sphkstatic int 502106968SluigiX_ip_mrouter_get(struct socket *so, struct sockopt *sopt) 5039209Swollman{ 504106968Sluigi int error; 5059209Swollman 506106968Sluigi switch (sopt->sopt_name) { 507106968Sluigi case MRT_VERSION: 508190012Sbms error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version); 509106968Sluigi break; 5109209Swollman 511106968Sluigi case MRT_ASSERT: 512208744Szec error = sooptcopyout(sopt, &V_pim_assert_enabled, 513208744Szec sizeof V_pim_assert_enabled); 514106968Sluigi break; 515106968Sluigi 516118622Shsu case MRT_API_SUPPORT: 517118622Shsu error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support); 518118622Shsu break; 519118622Shsu 520118622Shsu case MRT_API_CONFIG: 521208744Szec error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config); 522118622Shsu break; 523118622Shsu 524106968Sluigi default: 525106968Sluigi error = EOPNOTSUPP; 526106968Sluigi break; 527106968Sluigi } 528106968Sluigi return error; 5299209Swollman} 5309209Swollman 5319209Swollman/* 5322531Swollman * Handle ioctl commands to obtain information from the cache 5332531Swollman */ 53412296Sphkstatic int 535194581SrdivackyX_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused) 5362531Swollman{ 5372531Swollman int error = 0; 5381541Srgrimes 539134122Scsjp /* 540134122Scsjp * Currently the only function calling this ioctl routine is rtioctl(). 541134122Scsjp * Typically, only root can create the raw socket in order to execute 542134122Scsjp * this ioctl method, however the request might be coming from a prison 543134122Scsjp */ 544164033Srwatson error = priv_check(curthread, PRIV_NETINET_MROUTE); 545134122Scsjp if (error) 546134122Scsjp return (error); 5472531Swollman switch (cmd) { 548106968Sluigi case (SIOCGETVIFCNT): 549106968Sluigi error = get_vif_cnt((struct sioc_vif_req *)data); 550106968Sluigi break; 551106968Sluigi 552106968Sluigi case (SIOCGETSGCNT): 553106968Sluigi error = get_sg_cnt((struct sioc_sg_req *)data); 554106968Sluigi break; 555106968Sluigi 556106968Sluigi default: 557106968Sluigi error = EINVAL; 558106968Sluigi break; 5592531Swollman } 5602531Swollman return error; 5612531Swollman} 5621541Srgrimes 5632531Swollman/* 5649209Swollman * returns the packet, byte, rpf-failure count for the source group provided 5652531Swollman */ 5669209Swollmanstatic int 567106968Sluigiget_sg_cnt(struct sioc_sg_req *req) 5682531Swollman{ 569106968Sluigi struct mfc *rt; 5701541Srgrimes 571119792Ssam MFC_LOCK(); 572190012Sbms rt = mfc_find(&req->src, &req->grp); 573106968Sluigi if (rt == NULL) { 574119792Ssam MFC_UNLOCK(); 5759209Swollman req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 576106968Sluigi return EADDRNOTAVAIL; 577106968Sluigi } 578106968Sluigi req->pktcnt = rt->mfc_pkt_cnt; 579106968Sluigi req->bytecnt = rt->mfc_byte_cnt; 580106968Sluigi req->wrong_if = rt->mfc_wrong_if; 581119792Ssam MFC_UNLOCK(); 5822531Swollman return 0; 5832531Swollman} 5841541Srgrimes 5852531Swollman/* 5869209Swollman * returns the input and output packet and byte counts on the vif provided 5872531Swollman */ 5889209Swollmanstatic int 589106968Sluigiget_vif_cnt(struct sioc_vif_req *req) 5902531Swollman{ 591106968Sluigi vifi_t vifi = req->vifi; 5921541Srgrimes 593119792Ssam VIF_LOCK(); 594208744Szec if (vifi >= V_numvifs) { 595119792Ssam VIF_UNLOCK(); 596106968Sluigi return EINVAL; 597119792Ssam } 5989209Swollman 599208744Szec req->icount = V_viftable[vifi].v_pkt_in; 600208744Szec req->ocount = V_viftable[vifi].v_pkt_out; 601208744Szec req->ibytes = V_viftable[vifi].v_bytes_in; 602208744Szec req->obytes = V_viftable[vifi].v_bytes_out; 603119792Ssam VIF_UNLOCK(); 6041541Srgrimes 6052531Swollman return 0; 6062531Swollman} 6072531Swollman 608121446Ssamstatic void 609162719Sbmsif_detached_event(void *arg __unused, struct ifnet *ifp) 610162719Sbms{ 611162719Sbms vifi_t vifi; 612255248Sjhb u_long i; 613162719Sbms 614167116Sbms MROUTER_LOCK(); 615190012Sbms 616181803Sbz if (V_ip_mrouter == NULL) { 617167116Sbms MROUTER_UNLOCK(); 618190012Sbms return; 619162719Sbms } 620162719Sbms 621190012Sbms VIF_LOCK(); 622190012Sbms MFC_LOCK(); 623190012Sbms 624162719Sbms /* 625162719Sbms * Tear down multicast forwarder state associated with this ifnet. 626162719Sbms * 1. Walk the vif list, matching vifs against this ifnet. 627162719Sbms * 2. Walk the multicast forwarding cache (mfc) looking for 628162719Sbms * inner matches with this vif's index. 629190012Sbms * 3. Expire any matching multicast forwarding cache entries. 630190012Sbms * 4. Free vif state. This should disable ALLMULTI on the interface. 631162719Sbms */ 632208744Szec for (vifi = 0; vifi < V_numvifs; vifi++) { 633208744Szec if (V_viftable[vifi].v_ifp != ifp) 634162719Sbms continue; 635190012Sbms for (i = 0; i < mfchashsize; i++) { 636190012Sbms struct mfc *rt, *nrt; 637255249Sjhb 638255249Sjhb LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { 639190012Sbms if (rt->mfc_parent == vifi) { 640190012Sbms expire_mfc(rt); 641190012Sbms } 642162719Sbms } 643162719Sbms } 644162719Sbms del_vif_locked(vifi); 645162719Sbms } 646190012Sbms 647162719Sbms MFC_UNLOCK(); 648162719Sbms VIF_UNLOCK(); 649162719Sbms 650167116Sbms MROUTER_UNLOCK(); 651162719Sbms} 652162719Sbms 6531541Srgrimes/* 654190012Sbms * Enable multicast forwarding. 6551541Srgrimes */ 65612296Sphkstatic int 657106968Sluigiip_mrouter_init(struct socket *so, int version) 6581541Srgrimes{ 659183550Szec 660190054Sbms CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__, 661190054Sbms so->so_type, so->so_proto->pr_protocol); 6629209Swollman 663106968Sluigi if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) 664106968Sluigi return EOPNOTSUPP; 6651541Srgrimes 66638482Swollman if (version != 1) 6679209Swollman return ENOPROTOOPT; 6689209Swollman 669167116Sbms MROUTER_LOCK(); 670123690Ssam 671208744Szec if (ip_mrouter_unloading) { 672167116Sbms MROUTER_UNLOCK(); 673208744Szec return ENOPROTOOPT; 674123690Ssam } 6751541Srgrimes 676208744Szec if (V_ip_mrouter != NULL) { 677167116Sbms MROUTER_UNLOCK(); 678208744Szec return EADDRINUSE; 679166972Sbms } 680162719Sbms 681208744Szec V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash, 682208744Szec HASH_NOWAIT); 683190012Sbms 684208744Szec callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, 685208744Szec curvnet); 686208744Szec callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, 687208744Szec curvnet); 688208744Szec callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, 689208744Szec curvnet); 6909209Swollman 691181803Sbz V_ip_mrouter = so; 692208744Szec ip_mrouter_cnt++; 693119792Ssam 694167116Sbms MROUTER_UNLOCK(); 695123690Ssam 696190054Sbms CTR1(KTR_IPMF, "%s: done", __func__); 6972531Swollman 6982531Swollman return 0; 6991541Srgrimes} 7001541Srgrimes 7011541Srgrimes/* 702190012Sbms * Disable multicast forwarding. 7031541Srgrimes */ 70412296Sphkstatic int 705106968SluigiX_ip_mrouter_done(void) 7061541Srgrimes{ 707255235Sae struct ifnet *ifp; 708255248Sjhb u_long i; 7092531Swollman vifi_t vifi; 7101541Srgrimes 711167116Sbms MROUTER_LOCK(); 712123690Ssam 713181803Sbz if (V_ip_mrouter == NULL) { 714167116Sbms MROUTER_UNLOCK(); 715123690Ssam return EINVAL; 716123690Ssam } 717123690Ssam 718119792Ssam /* 719119792Ssam * Detach/disable hooks to the reset of the system. 720119792Ssam */ 721181803Sbz V_ip_mrouter = NULL; 722208744Szec ip_mrouter_cnt--; 723208744Szec V_mrt_api_config = 0; 7241541Srgrimes 725121700Ssam VIF_LOCK(); 726190012Sbms 7272531Swollman /* 7282531Swollman * For each phyint in use, disable promiscuous reception of all IP 7292531Swollman * multicasts. 7302531Swollman */ 731208744Szec for (vifi = 0; vifi < V_numvifs; vifi++) { 732208744Szec if (!in_nullhost(V_viftable[vifi].v_lcl_addr) && 733208744Szec !(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { 734208744Szec ifp = V_viftable[vifi].v_ifp; 73521666Swollman if_allmulti(ifp, 0); 7362531Swollman } 7372531Swollman } 738208744Szec bzero((caddr_t)V_viftable, sizeof(V_viftable)); 739208744Szec V_numvifs = 0; 740208744Szec V_pim_assert_enabled = 0; 741208744Szec 742123690Ssam VIF_UNLOCK(); 743190012Sbms 744208744Szec callout_stop(&V_expire_upcalls_ch); 745208744Szec callout_stop(&V_bw_upcalls_ch); 746208744Szec callout_stop(&V_bw_meter_ch); 7472531Swollman 748121700Ssam MFC_LOCK(); 74942777Sfenner 750190012Sbms /* 751190012Sbms * Free all multicast forwarding cache entries. 752190012Sbms * Do not use hashdestroy(), as we must perform other cleanup. 753190012Sbms */ 754190012Sbms for (i = 0; i < mfchashsize; i++) { 755190012Sbms struct mfc *rt, *nrt; 756255249Sjhb 757255249Sjhb LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { 758190012Sbms expire_mfc(rt); 7591541Srgrimes } 7609209Swollman } 761208744Szec free(V_mfchashtbl, M_MRTABLE); 762208744Szec V_mfchashtbl = NULL; 763190012Sbms 764208744Szec bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize); 765190012Sbms 766208744Szec V_bw_upcalls_n = 0; 767208744Szec bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); 768190012Sbms 769123690Ssam MFC_UNLOCK(); 770118622Shsu 771208744Szec V_reg_vif_num = VIFI_INVALID; 772118622Shsu 773167116Sbms MROUTER_UNLOCK(); 774123690Ssam 775190054Sbms CTR1(KTR_IPMF, "%s: done", __func__); 7762531Swollman 7772531Swollman return 0; 7781541Srgrimes} 7791541Srgrimes 7801541Srgrimes/* 7819209Swollman * Set PIM assert processing global 7829209Swollman */ 7839209Swollmanstatic int 784106968Sluigiset_assert(int i) 7859209Swollman{ 78638482Swollman if ((i != 1) && (i != 0)) 7879209Swollman return EINVAL; 7889209Swollman 789208744Szec V_pim_assert_enabled = i; 7909209Swollman 7919209Swollman return 0; 7929209Swollman} 7939209Swollman 7949209Swollman/* 795118622Shsu * Configure API capabilities 796118622Shsu */ 797118622Shsuint 798118622Shsuset_api_config(uint32_t *apival) 799118622Shsu{ 800255248Sjhb u_long i; 801118622Shsu 802118622Shsu /* 803118622Shsu * We can set the API capabilities only if it is the first operation 804118622Shsu * after MRT_INIT. I.e.: 805118622Shsu * - there are no vifs installed 806118622Shsu * - pim_assert is not enabled 807118622Shsu * - the MFC table is empty 808118622Shsu */ 809208744Szec if (V_numvifs > 0) { 810118622Shsu *apival = 0; 811118622Shsu return EPERM; 812118622Shsu } 813208744Szec if (V_pim_assert_enabled) { 814118622Shsu *apival = 0; 815118622Shsu return EPERM; 816118622Shsu } 817190012Sbms 818190012Sbms MFC_LOCK(); 819190012Sbms 820190012Sbms for (i = 0; i < mfchashsize; i++) { 821208744Szec if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) { 822249559Sdelphij MFC_UNLOCK(); 823118622Shsu *apival = 0; 824118622Shsu return EPERM; 825118622Shsu } 826118622Shsu } 827118622Shsu 828190012Sbms MFC_UNLOCK(); 829190012Sbms 830208744Szec V_mrt_api_config = *apival & mrt_api_support; 831208744Szec *apival = V_mrt_api_config; 832118622Shsu 833118622Shsu return 0; 834118622Shsu} 835118622Shsu 836118622Shsu/* 8371541Srgrimes * Add a vif to the vif table 8381541Srgrimes */ 8391541Srgrimesstatic int 840106968Sluigiadd_vif(struct vifctl *vifcp) 8411541Srgrimes{ 842208744Szec struct vif *vifp = V_viftable + vifcp->vifc_vifi; 843106968Sluigi struct sockaddr_in sin = {sizeof sin, AF_INET}; 8442531Swollman struct ifaddr *ifa; 8452531Swollman struct ifnet *ifp; 846119792Ssam int error; 8471541Srgrimes 848119792Ssam VIF_LOCK(); 849119792Ssam if (vifcp->vifc_vifi >= MAXVIFS) { 850119792Ssam VIF_UNLOCK(); 851106968Sluigi return EINVAL; 852119792Ssam } 853166575Sbms /* rate limiting is no longer supported by this code */ 854166575Sbms if (vifcp->vifc_rate_limit != 0) { 855166575Sbms log(LOG_ERR, "rate limiting is no longer supported\n"); 856166575Sbms VIF_UNLOCK(); 857166575Sbms return EINVAL; 858166575Sbms } 859190012Sbms if (!in_nullhost(vifp->v_lcl_addr)) { 860119792Ssam VIF_UNLOCK(); 861106968Sluigi return EADDRINUSE; 862119792Ssam } 863190012Sbms if (in_nullhost(vifcp->vifc_lcl_addr)) { 864119792Ssam VIF_UNLOCK(); 865106968Sluigi return EADDRNOTAVAIL; 866119792Ssam } 8671541Srgrimes 8682531Swollman /* Find the interface with an address in AF_INET family */ 869118622Shsu if (vifcp->vifc_flags & VIFF_REGISTER) { 870118622Shsu /* 871118622Shsu * XXX: Because VIFF_REGISTER does not really need a valid 872118622Shsu * local interface (e.g. it could be 127.0.0.2), we don't 873118622Shsu * check its address. 874118622Shsu */ 875118622Shsu ifp = NULL; 876166622Sbms } else { 877118622Shsu sin.sin_addr = vifcp->vifc_lcl_addr; 878118622Shsu ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 879119792Ssam if (ifa == NULL) { 880119792Ssam VIF_UNLOCK(); 881118622Shsu return EADDRNOTAVAIL; 882119792Ssam } 883118622Shsu ifp = ifa->ifa_ifp; 884194760Srwatson ifa_free(ifa); 885118622Shsu } 8861541Srgrimes 887166549Sbms if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) { 888190054Sbms CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__); 889166549Sbms VIF_UNLOCK(); 890166549Sbms return EOPNOTSUPP; 891118622Shsu } else if (vifcp->vifc_flags & VIFF_REGISTER) { 892208744Szec ifp = &V_multicast_register_if; 893190054Sbms CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp); 894208744Szec if (V_reg_vif_num == VIFI_INVALID) { 895208744Szec if_initname(&V_multicast_register_if, "register_vif", 0); 896208744Szec V_multicast_register_if.if_flags = IFF_LOOPBACK; 897208744Szec V_reg_vif_num = vifcp->vifc_vifi; 898118622Shsu } 899106968Sluigi } else { /* Make sure the interface supports multicast */ 900119792Ssam if ((ifp->if_flags & IFF_MULTICAST) == 0) { 901119792Ssam VIF_UNLOCK(); 9022531Swollman return EOPNOTSUPP; 903119792Ssam } 9041541Srgrimes 9052531Swollman /* Enable promiscuous reception of all IP multicasts from the if */ 90622967Swollman error = if_allmulti(ifp, 1); 907119792Ssam if (error) { 908119792Ssam VIF_UNLOCK(); 9092531Swollman return error; 910119792Ssam } 9112531Swollman } 9121541Srgrimes 9132531Swollman vifp->v_flags = vifcp->vifc_flags; 9142531Swollman vifp->v_threshold = vifcp->vifc_threshold; 9152531Swollman vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 9162531Swollman vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 9172531Swollman vifp->v_ifp = ifp; 9182531Swollman /* initialize per vif pkt counters */ 9192531Swollman vifp->v_pkt_in = 0; 9202531Swollman vifp->v_pkt_out = 0; 9219209Swollman vifp->v_bytes_in = 0; 9229209Swollman vifp->v_bytes_out = 0; 9232531Swollman 9242531Swollman /* Adjust numvifs up if the vifi is higher than numvifs */ 925208744Szec if (V_numvifs <= vifcp->vifc_vifi) 926208744Szec V_numvifs = vifcp->vifc_vifi + 1; 9272531Swollman 928119792Ssam VIF_UNLOCK(); 929119792Ssam 930190054Sbms CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__, 931190148Sbms (int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr), 932190054Sbms (int)vifcp->vifc_threshold); 9332531Swollman 9342531Swollman return 0; 9351541Srgrimes} 9361541Srgrimes 9371541Srgrimes/* 9381541Srgrimes * Delete a vif from the vif table 9391541Srgrimes */ 9401541Srgrimesstatic int 941162719Sbmsdel_vif_locked(vifi_t vifi) 9421541Srgrimes{ 943106968Sluigi struct vif *vifp; 9441541Srgrimes 945162719Sbms VIF_LOCK_ASSERT(); 946119792Ssam 947208744Szec if (vifi >= V_numvifs) { 948106968Sluigi return EINVAL; 949119792Ssam } 950208744Szec vifp = &V_viftable[vifi]; 951190012Sbms if (in_nullhost(vifp->v_lcl_addr)) { 952106968Sluigi return EADDRNOTAVAIL; 953119792Ssam } 9541541Srgrimes 955118622Shsu if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) 956106968Sluigi if_allmulti(vifp->v_ifp, 0); 9571541Srgrimes 958118622Shsu if (vifp->v_flags & VIFF_REGISTER) 959208744Szec V_reg_vif_num = VIFI_INVALID; 960118622Shsu 9612531Swollman bzero((caddr_t)vifp, sizeof (*vifp)); 9621541Srgrimes 963190054Sbms CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi); 96438482Swollman 9652531Swollman /* Adjust numvifs down */ 966208744Szec for (vifi = V_numvifs; vifi > 0; vifi--) 967208744Szec if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr)) 968106968Sluigi break; 969208744Szec V_numvifs = vifi; 9702531Swollman 971162719Sbms return 0; 972162719Sbms} 973162719Sbms 974162719Sbmsstatic int 975162719Sbmsdel_vif(vifi_t vifi) 976162719Sbms{ 977162719Sbms int cc; 978162719Sbms 979162719Sbms VIF_LOCK(); 980162719Sbms cc = del_vif_locked(vifi); 981119792Ssam VIF_UNLOCK(); 9822531Swollman 983162719Sbms return cc; 9841541Srgrimes} 9851541Srgrimes 9861541Srgrimes/* 987106968Sluigi * update an mfc entry without resetting counters and S,G addresses. 988106968Sluigi */ 989106968Sluigistatic void 990118622Shsuupdate_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 991106968Sluigi{ 992106968Sluigi int i; 993106968Sluigi 994106968Sluigi rt->mfc_parent = mfccp->mfcc_parent; 995208744Szec for (i = 0; i < V_numvifs; i++) { 996106968Sluigi rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; 997208744Szec rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config & 998118622Shsu MRT_MFC_FLAGS_ALL; 999118622Shsu } 1000118622Shsu /* set the RP address */ 1001208744Szec if (V_mrt_api_config & MRT_MFC_RP) 1002118622Shsu rt->mfc_rp = mfccp->mfcc_rp; 1003118622Shsu else 1004118622Shsu rt->mfc_rp.s_addr = INADDR_ANY; 1005106968Sluigi} 1006106968Sluigi 1007106968Sluigi/* 1008106968Sluigi * fully initialize an mfc entry from the parameter. 1009106968Sluigi */ 1010106968Sluigistatic void 1011118622Shsuinit_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) 1012106968Sluigi{ 1013106968Sluigi rt->mfc_origin = mfccp->mfcc_origin; 1014106968Sluigi rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 1015106968Sluigi 1016106968Sluigi update_mfc_params(rt, mfccp); 1017106968Sluigi 1018106968Sluigi /* initialize pkt counters per src-grp */ 1019106968Sluigi rt->mfc_pkt_cnt = 0; 1020106968Sluigi rt->mfc_byte_cnt = 0; 1021106968Sluigi rt->mfc_wrong_if = 0; 1022190012Sbms timevalclear(&rt->mfc_last_assert); 1023106968Sluigi} 1024106968Sluigi 1025190012Sbmsstatic void 1026190012Sbmsexpire_mfc(struct mfc *rt) 1027190012Sbms{ 1028190012Sbms struct rtdetq *rte, *nrte; 1029106968Sluigi 1030197148Sbms MFC_LOCK_ASSERT(); 1031197148Sbms 1032190012Sbms free_bw_list(rt->mfc_bw_meter); 1033190012Sbms 1034190012Sbms TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { 1035190012Sbms m_freem(rte->m); 1036190012Sbms TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); 1037190012Sbms free(rte, M_MRTABLE); 1038190012Sbms } 1039190012Sbms 1040190012Sbms LIST_REMOVE(rt, mfc_hash); 1041190012Sbms free(rt, M_MRTABLE); 1042190012Sbms} 1043190012Sbms 1044106968Sluigi/* 10452531Swollman * Add an mfc entry 10461541Srgrimes */ 10471541Srgrimesstatic int 1048118622Shsuadd_mfc(struct mfcctl2 *mfccp) 10491541Srgrimes{ 10502531Swollman struct mfc *rt; 1051190012Sbms struct rtdetq *rte, *nrte; 1052190012Sbms u_long hash = 0; 1053106968Sluigi u_short nstl; 10541541Srgrimes 1055119792Ssam VIF_LOCK(); 1056119792Ssam MFC_LOCK(); 1057119792Ssam 1058190012Sbms rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); 10591541Srgrimes 10602531Swollman /* If an entry already exists, just update the fields */ 10612531Swollman if (rt) { 1062190054Sbms CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x", 1063190148Sbms __func__, inet_ntoa(mfccp->mfcc_origin), 1064190054Sbms (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1065190054Sbms mfccp->mfcc_parent); 1066106968Sluigi update_mfc_params(rt, mfccp); 1067119792Ssam MFC_UNLOCK(); 1068119792Ssam VIF_UNLOCK(); 1069190012Sbms return (0); 10702531Swollman } 10711541Srgrimes 1072133874Srwatson /* 10732531Swollman * Find the entry for which the upcall was made and update 10742531Swollman */ 1075190012Sbms nstl = 0; 1076190012Sbms hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); 1077208744Szec LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { 1078190012Sbms if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 1079190012Sbms in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && 1080190012Sbms !TAILQ_EMPTY(&rt->mfc_stall)) { 1081190054Sbms CTR5(KTR_IPMF, 1082190054Sbms "%s: add mfc orig %s group %lx parent %x qh %p", 1083190148Sbms __func__, inet_ntoa(mfccp->mfcc_origin), 1084190054Sbms (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), 1085190054Sbms mfccp->mfcc_parent, 1086190054Sbms TAILQ_FIRST(&rt->mfc_stall)); 1087190054Sbms if (nstl++) 1088190054Sbms CTR1(KTR_IPMF, "%s: multiple matches", __func__); 10891541Srgrimes 1090190012Sbms init_mfc_params(rt, mfccp); 1091190012Sbms rt->mfc_expire = 0; /* Don't clean this guy up */ 1092208744Szec V_nexpire[hash]--; 10931541Srgrimes 1094190012Sbms /* Free queued packets, but attempt to forward them first. */ 1095190012Sbms TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { 1096190012Sbms if (rte->ifp != NULL) 1097190012Sbms ip_mdq(rte->m, rte->ifp, rt, -1); 1098190012Sbms m_freem(rte->m); 1099190012Sbms TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); 1100190012Sbms rt->mfc_nstall--; 1101190012Sbms free(rte, M_MRTABLE); 1102190012Sbms } 11031541Srgrimes } 11042531Swollman } 11051541Srgrimes 11062531Swollman /* 11072531Swollman * It is possible that an entry is being inserted without an upcall 11082531Swollman */ 11092531Swollman if (nstl == 0) { 1110190054Sbms CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__); 1111208744Szec LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { 1112190012Sbms if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 1113190012Sbms in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) { 1114190012Sbms init_mfc_params(rt, mfccp); 1115190012Sbms if (rt->mfc_expire) 1116208744Szec V_nexpire[hash]--; 1117190012Sbms rt->mfc_expire = 0; 1118190012Sbms break; /* XXX */ 1119190012Sbms } 11202531Swollman } 1121190012Sbms 1122106968Sluigi if (rt == NULL) { /* no upcall, so make a new entry */ 112342777Sfenner rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 112442777Sfenner if (rt == NULL) { 1125119792Ssam MFC_UNLOCK(); 1126119792Ssam VIF_UNLOCK(); 1127190012Sbms return (ENOBUFS); 11282531Swollman } 1129133874Srwatson 1130106968Sluigi init_mfc_params(rt, mfccp); 1131190012Sbms TAILQ_INIT(&rt->mfc_stall); 1132190012Sbms rt->mfc_nstall = 0; 1133190012Sbms 11349209Swollman rt->mfc_expire = 0; 1135190012Sbms rt->mfc_bw_meter = NULL; 1136133874Srwatson 1137106968Sluigi /* insert new entry at head of hash chain */ 1138208744Szec LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); 11392531Swollman } 11402531Swollman } 1141190012Sbms 1142119792Ssam MFC_UNLOCK(); 1143119792Ssam VIF_UNLOCK(); 1144190012Sbms 1145190012Sbms return (0); 11461541Srgrimes} 11471541Srgrimes 11481541Srgrimes/* 11492531Swollman * Delete an mfc entry 11501541Srgrimes */ 11511541Srgrimesstatic int 1152118622Shsudel_mfc(struct mfcctl2 *mfccp) 11531541Srgrimes{ 1154133874Srwatson struct in_addr origin; 1155133874Srwatson struct in_addr mcastgrp; 1156133874Srwatson struct mfc *rt; 11571541Srgrimes 11582531Swollman origin = mfccp->mfcc_origin; 11592531Swollman mcastgrp = mfccp->mfcc_mcastgrp; 11601541Srgrimes 1161190054Sbms CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__, 1162190054Sbms inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr)); 11631541Srgrimes 1164119792Ssam MFC_LOCK(); 11659209Swollman 1166190012Sbms rt = mfc_find(&origin, &mcastgrp); 116742777Sfenner if (rt == NULL) { 1168119792Ssam MFC_UNLOCK(); 11699209Swollman return EADDRNOTAVAIL; 11702531Swollman } 11711541Srgrimes 1172118622Shsu /* 1173118622Shsu * free the bw_meter entries 1174118622Shsu */ 1175190012Sbms free_bw_list(rt->mfc_bw_meter); 1176118622Shsu rt->mfc_bw_meter = NULL; 1177118622Shsu 1178190012Sbms LIST_REMOVE(rt, mfc_hash); 117942777Sfenner free(rt, M_MRTABLE); 11801541Srgrimes 1181119792Ssam MFC_UNLOCK(); 1182119792Ssam 1183190012Sbms return (0); 11841541Srgrimes} 11851541Srgrimes 11861541Srgrimes/* 1187190012Sbms * Send a message to the routing daemon on the multicast routing socket. 11889209Swollman */ 11899209Swollmanstatic int 1190106968Sluigisocket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) 11919209Swollman{ 1192106968Sluigi if (s) { 1193131151Srwatson SOCKBUF_LOCK(&s->so_rcv); 1194131151Srwatson if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm, 1195131151Srwatson NULL) != 0) { 1196131151Srwatson sorwakeup_locked(s); 1197106968Sluigi return 0; 11989209Swollman } 1199131151Srwatson SOCKBUF_UNLOCK(&s->so_rcv); 1200106968Sluigi } 1201106968Sluigi m_freem(mm); 1202106968Sluigi return -1; 12039209Swollman} 12049209Swollman 12059209Swollman/* 12062531Swollman * IP multicast forwarding function. This function assumes that the packet 12072531Swollman * pointed to by "ip" has arrived on (or is about to be sent to) the interface 12082531Swollman * pointed to by "ifp", and the packet is to be relayed to other networks 12092531Swollman * that have members of the packet's destination IP multicast group. 12102531Swollman * 12119209Swollman * The packet is returned unscathed to the caller, unless it is 12129209Swollman * erroneous, in which case a non-zero return value tells the caller to 12132531Swollman * discard it. 12141541Srgrimes */ 12152531Swollman 12162531Swollman#define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 12172531Swollman 121812296Sphkstatic int 1219118622ShsuX_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, 1220118622Shsu struct ip_moptions *imo) 12211541Srgrimes{ 1222106968Sluigi struct mfc *rt; 1223119792Ssam int error; 12249209Swollman vifi_t vifi; 12251541Srgrimes 1226190054Sbms CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p", 1227190148Sbms inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp); 12281541Srgrimes 122980354Sfenner if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 || 1230106968Sluigi ((u_char *)(ip + 1))[1] != IPOPT_LSRR ) { 12312531Swollman /* 12329209Swollman * Packet arrived via a physical interface or 1233118622Shsu * an encapsulated tunnel or a register_vif. 12342531Swollman */ 12352531Swollman } else { 12362531Swollman /* 12372531Swollman * Packet arrived through a source-route tunnel. 12389209Swollman * Source-route tunnels are no longer supported. 12392531Swollman */ 1240190054Sbms return (1); 12419209Swollman } 12429209Swollman 1243119792Ssam VIF_LOCK(); 1244119792Ssam MFC_LOCK(); 1245208744Szec if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) { 1246166629Sbms if (ip->ip_ttl < MAXTTL) 1247106968Sluigi ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1248119792Ssam error = ip_mdq(m, ifp, NULL, vifi); 1249119792Ssam MFC_UNLOCK(); 1250119792Ssam VIF_UNLOCK(); 1251119792Ssam return error; 12522531Swollman } 12532531Swollman 12542531Swollman /* 12552531Swollman * Don't forward a packet with time-to-live of zero or one, 12562531Swollman * or a packet destined to a local-only group. 12572531Swollman */ 1258167593Sbms if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) { 1259119792Ssam MFC_UNLOCK(); 1260119792Ssam VIF_UNLOCK(); 12619209Swollman return 0; 1262119792Ssam } 12632531Swollman 12642531Swollman /* 12652531Swollman * Determine forwarding vifs from the forwarding cache table 12662531Swollman */ 1267190966Srwatson MRTSTAT_INC(mrts_mfc_lookups); 1268190012Sbms rt = mfc_find(&ip->ip_src, &ip->ip_dst); 12692531Swollman 12702531Swollman /* Entry exists, so forward if necessary */ 12712531Swollman if (rt != NULL) { 1272119792Ssam error = ip_mdq(m, ifp, rt, -1); 1273119792Ssam MFC_UNLOCK(); 1274119792Ssam VIF_UNLOCK(); 1275119792Ssam return error; 12769209Swollman } else { 12772531Swollman /* 12782531Swollman * If we don't have a route for packet's origin, 1279106968Sluigi * Make a copy of the packet & send message to routing daemon 12802531Swollman */ 12812531Swollman 1282106968Sluigi struct mbuf *mb0; 1283106968Sluigi struct rtdetq *rte; 1284106968Sluigi u_long hash; 128514549Sfenner int hlen = ip->ip_hl << 2; 12862531Swollman 1287190966Srwatson MRTSTAT_INC(mrts_mfc_misses); 1288190966Srwatson MRTSTAT_INC(mrts_no_route); 1289190054Sbms CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)", 1290190054Sbms inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr)); 12912531Swollman 12929209Swollman /* 12939209Swollman * Allocate mbufs early so that we don't do extra work if we are 129414549Sfenner * just going to fail anyway. Make sure to pullup the header so 129514549Sfenner * that other people can't step on it. 12969209Swollman */ 1297190012Sbms rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, 1298190012Sbms M_NOWAIT|M_ZERO); 129942777Sfenner if (rte == NULL) { 1300119792Ssam MFC_UNLOCK(); 1301119792Ssam VIF_UNLOCK(); 13029209Swollman return ENOBUFS; 13039209Swollman } 1304190012Sbms 1305243882Sglebius mb0 = m_copypacket(m, M_NOWAIT); 130614549Sfenner if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) 130714549Sfenner mb0 = m_pullup(mb0, hlen); 13089209Swollman if (mb0 == NULL) { 130942777Sfenner free(rte, M_MRTABLE); 1310119792Ssam MFC_UNLOCK(); 1311119792Ssam VIF_UNLOCK(); 13129209Swollman return ENOBUFS; 13139209Swollman } 13149209Swollman 1315106968Sluigi /* is there an upcall waiting for this flow ? */ 1316190012Sbms hash = MFCHASH(ip->ip_src, ip->ip_dst); 1317208744Szec LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) { 1318190012Sbms if (in_hosteq(ip->ip_src, rt->mfc_origin) && 1319190012Sbms in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && 1320190012Sbms !TAILQ_EMPTY(&rt->mfc_stall)) 1321190012Sbms break; 13222531Swollman } 13232531Swollman 132442777Sfenner if (rt == NULL) { 13259209Swollman int i; 13269209Swollman struct igmpmsg *im; 1327106968Sluigi struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1328106968Sluigi struct mbuf *mm; 13299209Swollman 1330106968Sluigi /* 1331106968Sluigi * Locate the vifi for the incoming interface for this packet. 1332106968Sluigi * If none found, drop packet. 1333106968Sluigi */ 1334208744Szec for (vifi = 0; vifi < V_numvifs && 1335208744Szec V_viftable[vifi].v_ifp != ifp; vifi++) 1336106968Sluigi ; 1337208744Szec if (vifi >= V_numvifs) /* vif not found, drop packet */ 1338106968Sluigi goto non_fatal; 1339106968Sluigi 13402531Swollman /* no upcall, so make a new entry */ 134142777Sfenner rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1342106968Sluigi if (rt == NULL) 1343106968Sluigi goto fail; 1344190054Sbms 13459209Swollman /* Make a copy of the header to send to the user level process */ 134617137Sfenner mm = m_copy(mb0, 0, hlen); 1347106968Sluigi if (mm == NULL) 1348106968Sluigi goto fail1; 13492531Swollman 1350133874Srwatson /* 1351133874Srwatson * Send message to routing daemon to install 13529209Swollman * a route into the kernel table 13539209Swollman */ 1354133874Srwatson 13559209Swollman im = mtod(mm, struct igmpmsg *); 1356106968Sluigi im->im_msgtype = IGMPMSG_NOCACHE; 1357106968Sluigi im->im_mbz = 0; 1358106968Sluigi im->im_vif = vifi; 13599209Swollman 1360190966Srwatson MRTSTAT_INC(mrts_upcalls); 13619209Swollman 1362106968Sluigi k_igmpsrc.sin_addr = ip->ip_src; 1363181803Sbz if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { 1364190054Sbms CTR0(KTR_IPMF, "ip_mforward: socket queue full"); 1365190966Srwatson MRTSTAT_INC(mrts_upq_sockfull); 1366106968Sluigifail1: 1367106968Sluigi free(rt, M_MRTABLE); 1368106968Sluigifail: 136942777Sfenner free(rte, M_MRTABLE); 13709266Swollman m_freem(mb0); 1371119792Ssam MFC_UNLOCK(); 1372119792Ssam VIF_UNLOCK(); 13739209Swollman return ENOBUFS; 13749209Swollman } 13759209Swollman 13762531Swollman /* insert new entry at head of hash chain */ 13772531Swollman rt->mfc_origin.s_addr = ip->ip_src.s_addr; 13782531Swollman rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; 13799209Swollman rt->mfc_expire = UPCALL_EXPIRE; 1380208744Szec V_nexpire[hash]++; 1381208744Szec for (i = 0; i < V_numvifs; i++) { 13829209Swollman rt->mfc_ttls[i] = 0; 1383118622Shsu rt->mfc_flags[i] = 0; 1384118622Shsu } 13859209Swollman rt->mfc_parent = -1; 13862531Swollman 1387190012Sbms /* clear the RP address */ 1388190012Sbms rt->mfc_rp.s_addr = INADDR_ANY; 1389118622Shsu rt->mfc_bw_meter = NULL; 1390118622Shsu 1391201254Ssyrinx /* initialize pkt counters per src-grp */ 1392201254Ssyrinx rt->mfc_pkt_cnt = 0; 1393201254Ssyrinx rt->mfc_byte_cnt = 0; 1394201254Ssyrinx rt->mfc_wrong_if = 0; 1395201254Ssyrinx timevalclear(&rt->mfc_last_assert); 1396201254Ssyrinx 1397201254Ssyrinx TAILQ_INIT(&rt->mfc_stall); 1398201254Ssyrinx rt->mfc_nstall = 0; 1399201254Ssyrinx 14002531Swollman /* link into table */ 1401208744Szec LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash); 1402190012Sbms TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link); 1403190012Sbms rt->mfc_nstall++; 14042531Swollman 14059209Swollman } else { 1406190012Sbms /* determine if queue has overflowed */ 1407190012Sbms if (rt->mfc_nstall > MAX_UPQ) { 1408190966Srwatson MRTSTAT_INC(mrts_upq_ovflw); 1409106968Sluiginon_fatal: 141042777Sfenner free(rte, M_MRTABLE); 14119266Swollman m_freem(mb0); 1412119792Ssam MFC_UNLOCK(); 1413119792Ssam VIF_UNLOCK(); 1414190012Sbms return (0); 14159209Swollman } 1416190012Sbms TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link); 1417190012Sbms rt->mfc_nstall++; 14182531Swollman } 14192531Swollman 1420133874Srwatson rte->m = mb0; 1421133874Srwatson rte->ifp = ifp; 14222531Swollman 1423119792Ssam MFC_UNLOCK(); 1424119792Ssam VIF_UNLOCK(); 14252531Swollman 14262531Swollman return 0; 1427133874Srwatson } 14281541Srgrimes} 14291541Srgrimes 14301541Srgrimes/* 14312531Swollman * Clean up the cache entry if upcall is not serviced 14321541Srgrimes */ 14332531Swollmanstatic void 1434208744Szecexpire_upcalls(void *arg) 14351541Srgrimes{ 1436255248Sjhb u_long i; 14371541Srgrimes 1438208744Szec CURVNET_SET((struct vnet *) arg); 1439208744Szec 1440119792Ssam MFC_LOCK(); 1441190012Sbms 1442190012Sbms for (i = 0; i < mfchashsize; i++) { 1443190012Sbms struct mfc *rt, *nrt; 1444190012Sbms 1445208744Szec if (V_nexpire[i] == 0) 14469209Swollman continue; 144742777Sfenner 1448255249Sjhb LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) { 1449190012Sbms if (TAILQ_EMPTY(&rt->mfc_stall)) 1450190012Sbms continue; 1451190012Sbms 1452190012Sbms if (rt->mfc_expire == 0 || --rt->mfc_expire > 0) 1453190012Sbms continue; 1454190012Sbms 1455118622Shsu /* 1456118622Shsu * free the bw_meter entries 1457118622Shsu */ 1458190012Sbms while (rt->mfc_bw_meter != NULL) { 1459190012Sbms struct bw_meter *x = rt->mfc_bw_meter; 1460118622Shsu 1461190012Sbms rt->mfc_bw_meter = x->bm_mfc_next; 1462118622Shsu free(x, M_BWMETER); 1463118622Shsu } 1464118622Shsu 1465190966Srwatson MRTSTAT_INC(mrts_cache_cleanups); 1466190054Sbms CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__, 1467190054Sbms (u_long)ntohl(rt->mfc_origin.s_addr), 1468190054Sbms (u_long)ntohl(rt->mfc_mcastgrp.s_addr)); 1469190012Sbms 1470190012Sbms expire_mfc(rt); 14719209Swollman } 14722531Swollman } 1473190012Sbms 1474119792Ssam MFC_UNLOCK(); 1475119792Ssam 1476208744Szec callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, 1477208744Szec curvnet); 1478208744Szec 1479208744Szec CURVNET_RESTORE(); 14801541Srgrimes} 14811541Srgrimes 14821541Srgrimes/* 14832531Swollman * Packet forwarding routine once entry in the cache is made 14841541Srgrimes */ 14851541Srgrimesstatic int 1486106968Sluigiip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) 14871541Srgrimes{ 1488106968Sluigi struct ip *ip = mtod(m, struct ip *); 1489106968Sluigi vifi_t vifi; 1490241913Sglebius int plen = ntohs(ip->ip_len); 14911541Srgrimes 1492119792Ssam VIF_LOCK_ASSERT(); 14939209Swollman 14942531Swollman /* 14959209Swollman * If xmt_vif is not -1, send on only the requested vif. 14969209Swollman * 14979209Swollman * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) 14989209Swollman */ 1499208744Szec if (xmt_vif < V_numvifs) { 1500208744Szec if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER) 1501208744Szec pim_register_send(ip, V_viftable + xmt_vif, m, rt); 1502133874Srwatson else 1503208744Szec phyint_send(ip, V_viftable + xmt_vif, m); 15049209Swollman return 1; 15059209Swollman } 15069209Swollman 15079209Swollman /* 15082531Swollman * Don't forward if it didn't arrive from the parent vif for its origin. 15092531Swollman */ 15102531Swollman vifi = rt->mfc_parent; 1511208744Szec if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) { 1512190054Sbms CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)", 1513208744Szec __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp); 1514190966Srwatson MRTSTAT_INC(mrts_wrong_if); 15159209Swollman ++rt->mfc_wrong_if; 15169209Swollman /* 1517118622Shsu * If we are doing PIM assert processing, send a message 1518118622Shsu * to the routing daemon. 1519118622Shsu * 1520118622Shsu * XXX: A PIM-SM router needs the WRONGVIF detection so it 1521118622Shsu * can complete the SPT switch, regardless of the type 1522118622Shsu * of the iif (broadcast media, GRE tunnel, etc). 15239209Swollman */ 1524208744Szec if (V_pim_assert_enabled && (vifi < V_numvifs) && 1525208744Szec V_viftable[vifi].v_ifp) { 15269209Swollman 1527208744Szec if (ifp == &V_multicast_register_if) 1528190967Srwatson PIMSTAT_INC(pims_rcv_registers_wrongiif); 1529118622Shsu 1530118501Shsu /* Get vifi for the incoming packet */ 1531208744Szec for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp; 1532208744Szec vifi++) 1533118501Shsu ; 1534208744Szec if (vifi >= V_numvifs) 1535118622Shsu return 0; /* The iif is not found: ignore the packet. */ 1536118501Shsu 1537118622Shsu if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) 1538118622Shsu return 0; /* WRONGVIF disabled: ignore the packet */ 1539118622Shsu 1540190012Sbms if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) { 1541106968Sluigi struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 1542106968Sluigi struct igmpmsg *im; 1543106968Sluigi int hlen = ip->ip_hl << 2; 1544106968Sluigi struct mbuf *mm = m_copy(m, 0, hlen); 1545106968Sluigi 15469209Swollman if (mm && (M_HASCL(mm) || mm->m_len < hlen)) 15479209Swollman mm = m_pullup(mm, hlen); 1548106968Sluigi if (mm == NULL) 15499209Swollman return ENOBUFS; 15509209Swollman 15519209Swollman im = mtod(mm, struct igmpmsg *); 15529209Swollman im->im_msgtype = IGMPMSG_WRONGVIF; 15539209Swollman im->im_mbz = 0; 15549209Swollman im->im_vif = vifi; 15559209Swollman 1556190966Srwatson MRTSTAT_INC(mrts_upcalls); 1557118501Shsu 15589209Swollman k_igmpsrc.sin_addr = im->im_src; 1559181803Sbz if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) { 1560190054Sbms CTR1(KTR_IPMF, "%s: socket queue full", __func__); 1561190966Srwatson MRTSTAT_INC(mrts_upq_sockfull); 1562106968Sluigi return ENOBUFS; 1563106968Sluigi } 15649209Swollman } 15659209Swollman } 15669209Swollman return 0; 15672531Swollman } 15681541Srgrimes 1569190012Sbms 15709209Swollman /* If I sourced this packet, it counts as output, else it was input. */ 1571208744Szec if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) { 1572208744Szec V_viftable[vifi].v_pkt_out++; 1573208744Szec V_viftable[vifi].v_bytes_out += plen; 15749209Swollman } else { 1575208744Szec V_viftable[vifi].v_pkt_in++; 1576208744Szec V_viftable[vifi].v_bytes_in += plen; 15779209Swollman } 15782531Swollman rt->mfc_pkt_cnt++; 15799209Swollman rt->mfc_byte_cnt += plen; 15801541Srgrimes 15812531Swollman /* 15822531Swollman * For each vif, decide if a copy of the packet should be forwarded. 15832531Swollman * Forward if: 15842531Swollman * - the ttl exceeds the vif's threshold 15852531Swollman * - there are group members downstream on interface 15862531Swollman */ 1587208744Szec for (vifi = 0; vifi < V_numvifs; vifi++) 1588106968Sluigi if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1589208744Szec V_viftable[vifi].v_pkt_out++; 1590208744Szec V_viftable[vifi].v_bytes_out += plen; 1591208744Szec if (V_viftable[vifi].v_flags & VIFF_REGISTER) 1592208744Szec pim_register_send(ip, V_viftable + vifi, m, rt); 1593118622Shsu else 1594208744Szec phyint_send(ip, V_viftable + vifi, m); 15959209Swollman } 15962531Swollman 1597118622Shsu /* 1598118622Shsu * Perform upcall-related bw measuring. 1599118622Shsu */ 1600118622Shsu if (rt->mfc_bw_meter != NULL) { 1601118622Shsu struct bw_meter *x; 1602118622Shsu struct timeval now; 1603118622Shsu 1604190012Sbms microtime(&now); 1605119792Ssam MFC_LOCK_ASSERT(); 1606118622Shsu for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) 1607118622Shsu bw_meter_receive_packet(x, plen, &now); 1608118622Shsu } 1609118622Shsu 16102531Swollman return 0; 16111541Srgrimes} 16121541Srgrimes 16139209Swollman/* 1614190012Sbms * Check if a vif number is legal/ok. This is used by in_mcast.c. 16151541Srgrimes */ 161612296Sphkstatic int 1617106968SluigiX_legal_vif_num(int vif) 16189209Swollman{ 1619190012Sbms int ret; 1620190012Sbms 1621190012Sbms ret = 0; 1622190012Sbms if (vif < 0) 1623190012Sbms return (ret); 1624190012Sbms 1625190012Sbms VIF_LOCK(); 1626208744Szec if (vif < V_numvifs) 1627190012Sbms ret = 1; 1628190012Sbms VIF_UNLOCK(); 1629190012Sbms 1630190012Sbms return (ret); 16312531Swollman} 16322531Swollman 16339209Swollman/* 16349209Swollman * Return the local address used by this vif 16359209Swollman */ 163612296Sphkstatic u_long 1637106968SluigiX_ip_mcast_src(int vifi) 16389209Swollman{ 1639190012Sbms in_addr_t addr; 1640190012Sbms 1641190012Sbms addr = INADDR_ANY; 1642190012Sbms if (vifi < 0) 1643190012Sbms return (addr); 1644190012Sbms 1645190012Sbms VIF_LOCK(); 1646208744Szec if (vifi < V_numvifs) 1647208744Szec addr = V_viftable[vifi].v_lcl_addr.s_addr; 1648190012Sbms VIF_UNLOCK(); 1649190012Sbms 1650190012Sbms return (addr); 16519209Swollman} 16529209Swollman 16532531Swollmanstatic void 1654106968Sluigiphyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) 16551541Srgrimes{ 1656106968Sluigi struct mbuf *mb_copy; 1657106968Sluigi int hlen = ip->ip_hl << 2; 16581541Srgrimes 1659119792Ssam VIF_LOCK_ASSERT(); 1660119792Ssam 16613571Swollman /* 16629209Swollman * Make a new reference to the packet; make sure that 16639209Swollman * the IP header is actually copied, not just referenced, 16649209Swollman * so that ip_output() only scribbles on the copy. 16653571Swollman */ 1666243882Sglebius mb_copy = m_copypacket(m, M_NOWAIT); 16679209Swollman if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) 16689209Swollman mb_copy = m_pullup(mb_copy, hlen); 16693571Swollman if (mb_copy == NULL) 16709209Swollman return; 16713571Swollman 1672166575Sbms send_packet(vifp, mb_copy); 16732531Swollman} 16741541Srgrimes 16759209Swollmanstatic void 1676166575Sbmssend_packet(struct vif *vifp, struct mbuf *m) 16771541Srgrimes{ 1678106968Sluigi struct ip_moptions imo; 1679158729Sbms struct in_multi *imm[2]; 1680106968Sluigi int error; 1681106968Sluigi 1682166575Sbms VIF_LOCK_ASSERT(); 1683166575Sbms 168410203Swollman imo.imo_multicast_ifp = vifp->v_ifp; 168510203Swollman imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 168610203Swollman imo.imo_multicast_loop = 1; 168710203Swollman imo.imo_multicast_vif = -1; 1688158729Sbms imo.imo_num_memberships = 0; 1689158729Sbms imo.imo_max_memberships = 2; 1690158729Sbms imo.imo_membership = &imm[0]; 169110203Swollman 169215292Swollman /* 169315292Swollman * Re-entrancy should not be a problem here, because 169415292Swollman * the packets that we send out and are looped back at us 169515292Swollman * should get rejected because they appear to come from 169615292Swollman * the loopback interface, thus preventing looping. 169715292Swollman */ 1698238016Sglebius error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL); 1699190054Sbms CTR3(KTR_IPMF, "%s: vif %td err %d", __func__, 1700208744Szec (ptrdiff_t)(vifp - V_viftable), error); 17019209Swollman} 17028876Srgrimes 1703190012Sbms/* 1704190012Sbms * Stubs for old RSVP socket shim implementation. 1705190012Sbms */ 1706190012Sbms 1707106968Sluigistatic int 1708190012SbmsX_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused) 17099209Swollman{ 17109209Swollman 1711190012Sbms return (EOPNOTSUPP); 17129209Swollman} 17139209Swollman 1714106968Sluigistatic void 1715190012SbmsX_ip_rsvp_force_done(struct socket *so __unused) 17169209Swollman{ 17179209Swollman 17189209Swollman} 17199209Swollman 1720106968Sluigistatic void 1721190012SbmsX_rsvp_input(struct mbuf *m, int off __unused) 17229209Swollman{ 17239209Swollman 1724190012Sbms if (!V_rsvp_on) 1725190012Sbms m_freem(m); 17269209Swollman} 17279209Swollman 1728118622Shsu/* 1729118622Shsu * Code for bandwidth monitors 1730118622Shsu */ 1731118622Shsu 1732118622Shsu/* 1733118622Shsu * Define common interface for timeval-related methods 1734118622Shsu */ 1735118622Shsu#define BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp) 1736118622Shsu#define BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp)) 1737118622Shsu#define BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp)) 1738118622Shsu 1739118622Shsustatic uint32_t 1740118622Shsucompute_bw_meter_flags(struct bw_upcall *req) 1741118622Shsu{ 1742118622Shsu uint32_t flags = 0; 1743118622Shsu 1744118622Shsu if (req->bu_flags & BW_UPCALL_UNIT_PACKETS) 1745118622Shsu flags |= BW_METER_UNIT_PACKETS; 1746118622Shsu if (req->bu_flags & BW_UPCALL_UNIT_BYTES) 1747118622Shsu flags |= BW_METER_UNIT_BYTES; 1748118622Shsu if (req->bu_flags & BW_UPCALL_GEQ) 1749118622Shsu flags |= BW_METER_GEQ; 1750118622Shsu if (req->bu_flags & BW_UPCALL_LEQ) 1751118622Shsu flags |= BW_METER_LEQ; 1752133874Srwatson 1753118622Shsu return flags; 1754118622Shsu} 1755133874Srwatson 1756118622Shsu/* 1757118622Shsu * Add a bw_meter entry 1758118622Shsu */ 17592763Swollmanstatic int 1760118622Shsuadd_bw_upcall(struct bw_upcall *req) 1761118622Shsu{ 1762118622Shsu struct mfc *mfc; 1763118622Shsu struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC, 1764118622Shsu BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC }; 1765118622Shsu struct timeval now; 1766118622Shsu struct bw_meter *x; 1767118622Shsu uint32_t flags; 1768133874Srwatson 1769208744Szec if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) 1770118622Shsu return EOPNOTSUPP; 1771133874Srwatson 1772118622Shsu /* Test if the flags are valid */ 1773118622Shsu if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES))) 1774118622Shsu return EINVAL; 1775118622Shsu if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))) 1776118622Shsu return EINVAL; 1777118622Shsu if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 1778118622Shsu == (BW_UPCALL_GEQ | BW_UPCALL_LEQ)) 1779118622Shsu return EINVAL; 1780133874Srwatson 1781118622Shsu /* Test if the threshold time interval is valid */ 1782118622Shsu if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <)) 1783118622Shsu return EINVAL; 1784133874Srwatson 1785118622Shsu flags = compute_bw_meter_flags(req); 1786118622Shsu 1787118622Shsu /* 1788118622Shsu * Find if we have already same bw_meter entry 1789118622Shsu */ 1790119792Ssam MFC_LOCK(); 1791190012Sbms mfc = mfc_find(&req->bu_src, &req->bu_dst); 1792118622Shsu if (mfc == NULL) { 1793119792Ssam MFC_UNLOCK(); 1794118622Shsu return EADDRNOTAVAIL; 1795118622Shsu } 1796118622Shsu for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) { 1797118622Shsu if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 1798118622Shsu &req->bu_threshold.b_time, ==)) && 1799118622Shsu (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 1800118622Shsu (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 1801118622Shsu (x->bm_flags & BW_METER_USER_FLAGS) == flags) { 1802119792Ssam MFC_UNLOCK(); 1803118622Shsu return 0; /* XXX Already installed */ 1804118622Shsu } 1805118622Shsu } 1806133874Srwatson 1807118622Shsu /* Allocate the new bw_meter entry */ 1808118622Shsu x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT); 1809119792Ssam if (x == NULL) { 1810119792Ssam MFC_UNLOCK(); 1811118622Shsu return ENOBUFS; 1812119792Ssam } 1813133874Srwatson 1814118622Shsu /* Set the new bw_meter entry */ 1815118622Shsu x->bm_threshold.b_time = req->bu_threshold.b_time; 1816190012Sbms microtime(&now); 1817118622Shsu x->bm_start_time = now; 1818118622Shsu x->bm_threshold.b_packets = req->bu_threshold.b_packets; 1819118622Shsu x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; 1820118622Shsu x->bm_measured.b_packets = 0; 1821118622Shsu x->bm_measured.b_bytes = 0; 1822118622Shsu x->bm_flags = flags; 1823118622Shsu x->bm_time_next = NULL; 1824118622Shsu x->bm_time_hash = BW_METER_BUCKETS; 1825133874Srwatson 1826118622Shsu /* Add the new bw_meter entry to the front of entries for this MFC */ 1827118622Shsu x->bm_mfc = mfc; 1828118622Shsu x->bm_mfc_next = mfc->mfc_bw_meter; 1829118622Shsu mfc->mfc_bw_meter = x; 1830118622Shsu schedule_bw_meter(x, &now); 1831119792Ssam MFC_UNLOCK(); 1832133874Srwatson 1833118622Shsu return 0; 1834118622Shsu} 1835118622Shsu 1836118622Shsustatic void 1837118622Shsufree_bw_list(struct bw_meter *list) 1838118622Shsu{ 1839118622Shsu while (list != NULL) { 1840118622Shsu struct bw_meter *x = list; 1841118622Shsu 1842118622Shsu list = list->bm_mfc_next; 1843118622Shsu unschedule_bw_meter(x); 1844118622Shsu free(x, M_BWMETER); 1845118622Shsu } 1846118622Shsu} 1847118622Shsu 1848118622Shsu/* 1849118622Shsu * Delete one or multiple bw_meter entries 1850118622Shsu */ 1851118622Shsustatic int 1852118622Shsudel_bw_upcall(struct bw_upcall *req) 1853118622Shsu{ 1854118622Shsu struct mfc *mfc; 1855118622Shsu struct bw_meter *x; 1856133874Srwatson 1857208744Szec if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL)) 1858118622Shsu return EOPNOTSUPP; 1859133874Srwatson 1860119792Ssam MFC_LOCK(); 1861190012Sbms 1862118622Shsu /* Find the corresponding MFC entry */ 1863190012Sbms mfc = mfc_find(&req->bu_src, &req->bu_dst); 1864118622Shsu if (mfc == NULL) { 1865119792Ssam MFC_UNLOCK(); 1866118622Shsu return EADDRNOTAVAIL; 1867118622Shsu } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) { 1868118622Shsu /* 1869118622Shsu * Delete all bw_meter entries for this mfc 1870118622Shsu */ 1871118622Shsu struct bw_meter *list; 1872133874Srwatson 1873118622Shsu list = mfc->mfc_bw_meter; 1874118622Shsu mfc->mfc_bw_meter = NULL; 1875118622Shsu free_bw_list(list); 1876119792Ssam MFC_UNLOCK(); 1877118622Shsu return 0; 1878118622Shsu } else { /* Delete a single bw_meter entry */ 1879118622Shsu struct bw_meter *prev; 1880118622Shsu uint32_t flags = 0; 1881118622Shsu 1882118622Shsu flags = compute_bw_meter_flags(req); 1883118622Shsu 1884118622Shsu /* Find the bw_meter entry to delete */ 1885118622Shsu for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL; 1886133046Shsu prev = x, x = x->bm_mfc_next) { 1887118622Shsu if ((BW_TIMEVALCMP(&x->bm_threshold.b_time, 1888118622Shsu &req->bu_threshold.b_time, ==)) && 1889118622Shsu (x->bm_threshold.b_packets == req->bu_threshold.b_packets) && 1890118622Shsu (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) && 1891118622Shsu (x->bm_flags & BW_METER_USER_FLAGS) == flags) 1892118622Shsu break; 1893118622Shsu } 1894118622Shsu if (x != NULL) { /* Delete entry from the list for this MFC */ 1895118622Shsu if (prev != NULL) 1896118622Shsu prev->bm_mfc_next = x->bm_mfc_next; /* remove from middle*/ 1897118622Shsu else 1898118622Shsu x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */ 1899118622Shsu 1900118622Shsu unschedule_bw_meter(x); 1901119792Ssam MFC_UNLOCK(); 1902118622Shsu /* Free the bw_meter entry */ 1903118622Shsu free(x, M_BWMETER); 1904118622Shsu return 0; 1905118622Shsu } else { 1906119792Ssam MFC_UNLOCK(); 1907118622Shsu return EINVAL; 1908118622Shsu } 1909118622Shsu } 1910118622Shsu /* NOTREACHED */ 1911118622Shsu} 1912118622Shsu 1913118622Shsu/* 1914118622Shsu * Perform bandwidth measurement processing that may result in an upcall 1915118622Shsu */ 1916118622Shsustatic void 1917118622Shsubw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp) 1918118622Shsu{ 1919118622Shsu struct timeval delta; 1920133874Srwatson 1921119792Ssam MFC_LOCK_ASSERT(); 1922119792Ssam 1923118622Shsu delta = *nowp; 1924118622Shsu BW_TIMEVALDECR(&delta, &x->bm_start_time); 1925133874Srwatson 1926118622Shsu if (x->bm_flags & BW_METER_GEQ) { 1927118622Shsu /* 1928118622Shsu * Processing for ">=" type of bw_meter entry 1929118622Shsu */ 1930118622Shsu if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 1931118622Shsu /* Reset the bw_meter entry */ 1932118622Shsu x->bm_start_time = *nowp; 1933118622Shsu x->bm_measured.b_packets = 0; 1934118622Shsu x->bm_measured.b_bytes = 0; 1935118622Shsu x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 1936118622Shsu } 1937133874Srwatson 1938118622Shsu /* Record that a packet is received */ 1939118622Shsu x->bm_measured.b_packets++; 1940118622Shsu x->bm_measured.b_bytes += plen; 1941133874Srwatson 1942118622Shsu /* 1943118622Shsu * Test if we should deliver an upcall 1944118622Shsu */ 1945133874Srwatson if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) { 1946118622Shsu if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 1947118622Shsu (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) || 1948118622Shsu ((x->bm_flags & BW_METER_UNIT_BYTES) && 1949118622Shsu (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) { 1950118622Shsu /* Prepare an upcall for delivery */ 1951118622Shsu bw_meter_prepare_upcall(x, nowp); 1952118622Shsu x->bm_flags |= BW_METER_UPCALL_DELIVERED; 1953118622Shsu } 1954118622Shsu } 1955118622Shsu } else if (x->bm_flags & BW_METER_LEQ) { 1956118622Shsu /* 1957118622Shsu * Processing for "<=" type of bw_meter entry 1958118622Shsu */ 1959118622Shsu if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) { 1960118622Shsu /* 1961118622Shsu * We are behind time with the multicast forwarding table 1962118622Shsu * scanning for "<=" type of bw_meter entries, so test now 1963118622Shsu * if we should deliver an upcall. 1964118622Shsu */ 1965118622Shsu if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 1966118622Shsu (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 1967118622Shsu ((x->bm_flags & BW_METER_UNIT_BYTES) && 1968118622Shsu (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 1969118622Shsu /* Prepare an upcall for delivery */ 1970118622Shsu bw_meter_prepare_upcall(x, nowp); 1971118622Shsu } 1972118622Shsu /* Reschedule the bw_meter entry */ 1973118622Shsu unschedule_bw_meter(x); 1974118622Shsu schedule_bw_meter(x, nowp); 1975118622Shsu } 1976133874Srwatson 1977118622Shsu /* Record that a packet is received */ 1978118622Shsu x->bm_measured.b_packets++; 1979118622Shsu x->bm_measured.b_bytes += plen; 1980133874Srwatson 1981118622Shsu /* 1982118622Shsu * Test if we should restart the measuring interval 1983118622Shsu */ 1984118622Shsu if ((x->bm_flags & BW_METER_UNIT_PACKETS && 1985118622Shsu x->bm_measured.b_packets <= x->bm_threshold.b_packets) || 1986118622Shsu (x->bm_flags & BW_METER_UNIT_BYTES && 1987118622Shsu x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) { 1988118622Shsu /* Don't restart the measuring interval */ 1989118622Shsu } else { 1990118622Shsu /* Do restart the measuring interval */ 1991118622Shsu /* 1992118622Shsu * XXX: note that we don't unschedule and schedule, because this 1993118622Shsu * might be too much overhead per packet. Instead, when we process 1994118622Shsu * all entries for a given timer hash bin, we check whether it is 1995118622Shsu * really a timeout. If not, we reschedule at that time. 1996118622Shsu */ 1997118622Shsu x->bm_start_time = *nowp; 1998118622Shsu x->bm_measured.b_packets = 0; 1999118622Shsu x->bm_measured.b_bytes = 0; 2000118622Shsu x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2001118622Shsu } 2002118622Shsu } 2003118622Shsu} 2004118622Shsu 2005118622Shsu/* 2006118622Shsu * Prepare a bandwidth-related upcall 2007118622Shsu */ 2008118622Shsustatic void 2009118622Shsubw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp) 2010118622Shsu{ 2011118622Shsu struct timeval delta; 2012118622Shsu struct bw_upcall *u; 2013133874Srwatson 2014119792Ssam MFC_LOCK_ASSERT(); 2015133874Srwatson 2016118622Shsu /* 2017133874Srwatson * Compute the measured time interval 2018118622Shsu */ 2019118622Shsu delta = *nowp; 2020118622Shsu BW_TIMEVALDECR(&delta, &x->bm_start_time); 2021133874Srwatson 2022118622Shsu /* 2023118622Shsu * If there are too many pending upcalls, deliver them now 2024118622Shsu */ 2025208744Szec if (V_bw_upcalls_n >= BW_UPCALLS_MAX) 2026118622Shsu bw_upcalls_send(); 2027133874Srwatson 2028118622Shsu /* 2029118622Shsu * Set the bw_upcall entry 2030118622Shsu */ 2031208744Szec u = &V_bw_upcalls[V_bw_upcalls_n++]; 2032118622Shsu u->bu_src = x->bm_mfc->mfc_origin; 2033118622Shsu u->bu_dst = x->bm_mfc->mfc_mcastgrp; 2034118622Shsu u->bu_threshold.b_time = x->bm_threshold.b_time; 2035118622Shsu u->bu_threshold.b_packets = x->bm_threshold.b_packets; 2036118622Shsu u->bu_threshold.b_bytes = x->bm_threshold.b_bytes; 2037118622Shsu u->bu_measured.b_time = delta; 2038118622Shsu u->bu_measured.b_packets = x->bm_measured.b_packets; 2039118622Shsu u->bu_measured.b_bytes = x->bm_measured.b_bytes; 2040118622Shsu u->bu_flags = 0; 2041118622Shsu if (x->bm_flags & BW_METER_UNIT_PACKETS) 2042118622Shsu u->bu_flags |= BW_UPCALL_UNIT_PACKETS; 2043118622Shsu if (x->bm_flags & BW_METER_UNIT_BYTES) 2044118622Shsu u->bu_flags |= BW_UPCALL_UNIT_BYTES; 2045118622Shsu if (x->bm_flags & BW_METER_GEQ) 2046118622Shsu u->bu_flags |= BW_UPCALL_GEQ; 2047118622Shsu if (x->bm_flags & BW_METER_LEQ) 2048118622Shsu u->bu_flags |= BW_UPCALL_LEQ; 2049118622Shsu} 2050118622Shsu 2051118622Shsu/* 2052118622Shsu * Send the pending bandwidth-related upcalls 2053118622Shsu */ 2054118622Shsustatic void 2055118622Shsubw_upcalls_send(void) 2056118622Shsu{ 2057118622Shsu struct mbuf *m; 2058208744Szec int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]); 2059118622Shsu struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 2060118622Shsu static struct igmpmsg igmpmsg = { 0, /* unused1 */ 2061118622Shsu 0, /* unused2 */ 2062118622Shsu IGMPMSG_BW_UPCALL,/* im_msgtype */ 2063118622Shsu 0, /* im_mbz */ 2064118622Shsu 0, /* im_vif */ 2065118622Shsu 0, /* unused3 */ 2066118622Shsu { 0 }, /* im_src */ 2067118622Shsu { 0 } }; /* im_dst */ 2068133874Srwatson 2069119792Ssam MFC_LOCK_ASSERT(); 2070119792Ssam 2071208744Szec if (V_bw_upcalls_n == 0) 2072118622Shsu return; /* No pending upcalls */ 2073118622Shsu 2074208744Szec V_bw_upcalls_n = 0; 2075133874Srwatson 2076118622Shsu /* 2077118622Shsu * Allocate a new mbuf, initialize it with the header and 2078118622Shsu * the payload for the pending calls. 2079118622Shsu */ 2080248324Sglebius m = m_gethdr(M_NOWAIT, MT_DATA); 2081118622Shsu if (m == NULL) { 2082118622Shsu log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n"); 2083118622Shsu return; 2084118622Shsu } 2085133874Srwatson 2086118622Shsu m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg); 2087208744Szec m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]); 2088133874Srwatson 2089118622Shsu /* 2090118622Shsu * Send the upcalls 2091118622Shsu * XXX do we need to set the address in k_igmpsrc ? 2092118622Shsu */ 2093190966Srwatson MRTSTAT_INC(mrts_upcalls); 2094181803Sbz if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) { 2095118622Shsu log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n"); 2096190966Srwatson MRTSTAT_INC(mrts_upq_sockfull); 2097118622Shsu } 2098118622Shsu} 2099118622Shsu 2100118622Shsu/* 2101118622Shsu * Compute the timeout hash value for the bw_meter entries 2102118622Shsu */ 2103118622Shsu#define BW_METER_TIMEHASH(bw_meter, hash) \ 2104118622Shsu do { \ 2105118622Shsu struct timeval next_timeval = (bw_meter)->bm_start_time; \ 2106118622Shsu \ 2107118622Shsu BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \ 2108118622Shsu (hash) = next_timeval.tv_sec; \ 2109118622Shsu if (next_timeval.tv_usec) \ 2110118622Shsu (hash)++; /* XXX: make sure we don't timeout early */ \ 2111118622Shsu (hash) %= BW_METER_BUCKETS; \ 2112118622Shsu } while (0) 2113118622Shsu 2114118622Shsu/* 2115118622Shsu * Schedule a timer to process periodically bw_meter entry of type "<=" 2116118622Shsu * by linking the entry in the proper hash bucket. 2117118622Shsu */ 2118118622Shsustatic void 2119118622Shsuschedule_bw_meter(struct bw_meter *x, struct timeval *nowp) 2120118622Shsu{ 2121119792Ssam int time_hash; 2122133874Srwatson 2123119792Ssam MFC_LOCK_ASSERT(); 2124119792Ssam 2125118622Shsu if (!(x->bm_flags & BW_METER_LEQ)) 2126118622Shsu return; /* XXX: we schedule timers only for "<=" entries */ 2127133874Srwatson 2128118622Shsu /* 2129118622Shsu * Reset the bw_meter entry 2130118622Shsu */ 2131118622Shsu x->bm_start_time = *nowp; 2132118622Shsu x->bm_measured.b_packets = 0; 2133118622Shsu x->bm_measured.b_bytes = 0; 2134118622Shsu x->bm_flags &= ~BW_METER_UPCALL_DELIVERED; 2135133874Srwatson 2136118622Shsu /* 2137118622Shsu * Compute the timeout hash value and insert the entry 2138118622Shsu */ 2139118622Shsu BW_METER_TIMEHASH(x, time_hash); 2140208744Szec x->bm_time_next = V_bw_meter_timers[time_hash]; 2141208744Szec V_bw_meter_timers[time_hash] = x; 2142118622Shsu x->bm_time_hash = time_hash; 2143118622Shsu} 2144118622Shsu 2145118622Shsu/* 2146118622Shsu * Unschedule the periodic timer that processes bw_meter entry of type "<=" 2147118622Shsu * by removing the entry from the proper hash bucket. 2148118622Shsu */ 2149118622Shsustatic void 2150118622Shsuunschedule_bw_meter(struct bw_meter *x) 2151118622Shsu{ 2152118622Shsu int time_hash; 2153118622Shsu struct bw_meter *prev, *tmp; 2154133874Srwatson 2155119792Ssam MFC_LOCK_ASSERT(); 2156119792Ssam 2157118622Shsu if (!(x->bm_flags & BW_METER_LEQ)) 2158118622Shsu return; /* XXX: we schedule timers only for "<=" entries */ 2159133874Srwatson 2160118622Shsu /* 2161118622Shsu * Compute the timeout hash value and delete the entry 2162118622Shsu */ 2163118622Shsu time_hash = x->bm_time_hash; 2164118622Shsu if (time_hash >= BW_METER_BUCKETS) 2165118622Shsu return; /* Entry was not scheduled */ 2166133874Srwatson 2167208744Szec for (prev = NULL, tmp = V_bw_meter_timers[time_hash]; 2168118622Shsu tmp != NULL; prev = tmp, tmp = tmp->bm_time_next) 2169118622Shsu if (tmp == x) 2170118622Shsu break; 2171133874Srwatson 2172118622Shsu if (tmp == NULL) 2173118622Shsu panic("unschedule_bw_meter: bw_meter entry not found"); 2174133874Srwatson 2175118622Shsu if (prev != NULL) 2176118622Shsu prev->bm_time_next = x->bm_time_next; 2177118622Shsu else 2178208744Szec V_bw_meter_timers[time_hash] = x->bm_time_next; 2179133874Srwatson 2180118622Shsu x->bm_time_next = NULL; 2181118622Shsu x->bm_time_hash = BW_METER_BUCKETS; 2182118622Shsu} 2183118622Shsu 2184118622Shsu 2185118622Shsu/* 2186118622Shsu * Process all "<=" type of bw_meter that should be processed now, 2187118622Shsu * and for each entry prepare an upcall if necessary. Each processed 2188118622Shsu * entry is rescheduled again for the (periodic) processing. 2189118622Shsu * 2190118622Shsu * This is run periodically (once per second normally). On each round, 2191118622Shsu * all the potentially matching entries are in the hash slot that we are 2192118622Shsu * looking at. 2193118622Shsu */ 2194118622Shsustatic void 2195118622Shsubw_meter_process() 2196118622Shsu{ 2197118622Shsu uint32_t loops; 2198119792Ssam int i; 2199118622Shsu struct timeval now, process_endtime; 2200133874Srwatson 2201190012Sbms microtime(&now); 2202208744Szec if (V_last_tv_sec == now.tv_sec) 2203118622Shsu return; /* nothing to do */ 2204118622Shsu 2205208744Szec loops = now.tv_sec - V_last_tv_sec; 2206208744Szec V_last_tv_sec = now.tv_sec; 2207118622Shsu if (loops > BW_METER_BUCKETS) 2208118622Shsu loops = BW_METER_BUCKETS; 2209118622Shsu 2210119792Ssam MFC_LOCK(); 2211118622Shsu /* 2212118622Shsu * Process all bins of bw_meter entries from the one after the last 2213118622Shsu * processed to the current one. On entry, i points to the last bucket 2214118622Shsu * visited, so we need to increment i at the beginning of the loop. 2215118622Shsu */ 2216119134Shsu for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) { 2217118622Shsu struct bw_meter *x, *tmp_list; 2218133874Srwatson 2219118622Shsu if (++i >= BW_METER_BUCKETS) 2220118622Shsu i = 0; 2221133874Srwatson 2222119134Shsu /* Disconnect the list of bw_meter entries from the bin */ 2223208744Szec tmp_list = V_bw_meter_timers[i]; 2224208744Szec V_bw_meter_timers[i] = NULL; 2225133874Srwatson 2226119134Shsu /* Process the list of bw_meter entries */ 2227118622Shsu while (tmp_list != NULL) { 2228118622Shsu x = tmp_list; 2229118622Shsu tmp_list = tmp_list->bm_time_next; 2230133874Srwatson 2231118622Shsu /* Test if the time interval is over */ 2232118622Shsu process_endtime = x->bm_start_time; 2233118622Shsu BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time); 2234118622Shsu if (BW_TIMEVALCMP(&process_endtime, &now, >)) { 2235118622Shsu /* Not yet: reschedule, but don't reset */ 2236118622Shsu int time_hash; 2237133874Srwatson 2238118622Shsu BW_METER_TIMEHASH(x, time_hash); 2239119134Shsu if (time_hash == i && process_endtime.tv_sec == now.tv_sec) { 2240119134Shsu /* 2241119134Shsu * XXX: somehow the bin processing is a bit ahead of time. 2242119134Shsu * Put the entry in the next bin. 2243119134Shsu */ 2244119134Shsu if (++time_hash >= BW_METER_BUCKETS) 2245119134Shsu time_hash = 0; 2246119134Shsu } 2247208744Szec x->bm_time_next = V_bw_meter_timers[time_hash]; 2248208744Szec V_bw_meter_timers[time_hash] = x; 2249118622Shsu x->bm_time_hash = time_hash; 2250133874Srwatson 2251118622Shsu continue; 2252118622Shsu } 2253133874Srwatson 2254118622Shsu /* 2255118622Shsu * Test if we should deliver an upcall 2256118622Shsu */ 2257118622Shsu if (((x->bm_flags & BW_METER_UNIT_PACKETS) && 2258118622Shsu (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) || 2259118622Shsu ((x->bm_flags & BW_METER_UNIT_BYTES) && 2260118622Shsu (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) { 2261118622Shsu /* Prepare an upcall for delivery */ 2262118622Shsu bw_meter_prepare_upcall(x, &now); 2263118622Shsu } 2264133874Srwatson 2265118622Shsu /* 2266118622Shsu * Reschedule for next processing 2267118622Shsu */ 2268118622Shsu schedule_bw_meter(x, &now); 2269118622Shsu } 2270118622Shsu } 2271133874Srwatson 2272118622Shsu /* Send all upcalls that are pending delivery */ 2273118622Shsu bw_upcalls_send(); 2274119792Ssam 2275119792Ssam MFC_UNLOCK(); 2276118622Shsu} 2277118622Shsu 2278118622Shsu/* 2279118622Shsu * A periodic function for sending all upcalls that are pending delivery 2280118622Shsu */ 2281118622Shsustatic void 2282208744Szecexpire_bw_upcalls_send(void *arg) 2283118622Shsu{ 2284208744Szec CURVNET_SET((struct vnet *) arg); 2285208744Szec 2286119792Ssam MFC_LOCK(); 2287118622Shsu bw_upcalls_send(); 2288119792Ssam MFC_UNLOCK(); 2289133874Srwatson 2290208744Szec callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send, 2291208744Szec curvnet); 2292208744Szec CURVNET_RESTORE(); 2293118622Shsu} 2294118622Shsu 2295118622Shsu/* 2296118622Shsu * A periodic function for periodic scanning of the multicast forwarding 2297118622Shsu * table for processing all "<=" bw_meter entries. 2298118622Shsu */ 2299118622Shsustatic void 2300208744Szecexpire_bw_meter_process(void *arg) 2301118622Shsu{ 2302208744Szec CURVNET_SET((struct vnet *) arg); 2303208744Szec 2304208744Szec if (V_mrt_api_config & MRT_MFC_BW_UPCALL) 2305118622Shsu bw_meter_process(); 2306133874Srwatson 2307208744Szec callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process, 2308208744Szec curvnet); 2309208744Szec CURVNET_RESTORE(); 2310118622Shsu} 2311118622Shsu 2312118622Shsu/* 2313118622Shsu * End of bandwidth monitoring code 2314118622Shsu */ 2315118622Shsu 2316118622Shsu/* 2317118622Shsu * Send the packet up to the user daemon, or eventually do kernel encapsulation 2318118622Shsu * 2319118622Shsu */ 2320118622Shsustatic int 2321169454Srwatsonpim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m, 2322169454Srwatson struct mfc *rt) 2323118622Shsu{ 2324118622Shsu struct mbuf *mb_copy, *mm; 2325133874Srwatson 2326166623Sbms /* 2327166623Sbms * Do not send IGMP_WHOLEPKT notifications to userland, if the 2328166623Sbms * rendezvous point was unspecified, and we were told not to. 2329166623Sbms */ 2330208744Szec if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) && 2331190012Sbms in_nullhost(rt->mfc_rp)) 2332166623Sbms return 0; 2333166623Sbms 2334118622Shsu mb_copy = pim_register_prepare(ip, m); 2335118622Shsu if (mb_copy == NULL) 2336118622Shsu return ENOBUFS; 2337133874Srwatson 2338118622Shsu /* 2339118622Shsu * Send all the fragments. Note that the mbuf for each fragment 2340118622Shsu * is freed by the sending machinery. 2341118622Shsu */ 2342118622Shsu for (mm = mb_copy; mm; mm = mb_copy) { 2343118622Shsu mb_copy = mm->m_nextpkt; 2344118622Shsu mm->m_nextpkt = 0; 2345118622Shsu mm = m_pullup(mm, sizeof(struct ip)); 2346118622Shsu if (mm != NULL) { 2347118622Shsu ip = mtod(mm, struct ip *); 2348208744Szec if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) { 2349118622Shsu pim_register_send_rp(ip, vifp, mm, rt); 2350118622Shsu } else { 2351118622Shsu pim_register_send_upcall(ip, vifp, mm, rt); 2352118622Shsu } 2353118622Shsu } 2354118622Shsu } 2355133874Srwatson 2356118622Shsu return 0; 2357118622Shsu} 2358118622Shsu 2359118622Shsu/* 2360118622Shsu * Return a copy of the data packet that is ready for PIM Register 2361118622Shsu * encapsulation. 2362118622Shsu * XXX: Note that in the returned copy the IP header is a valid one. 2363118622Shsu */ 2364118622Shsustatic struct mbuf * 2365118622Shsupim_register_prepare(struct ip *ip, struct mbuf *m) 2366118622Shsu{ 2367118622Shsu struct mbuf *mb_copy = NULL; 2368118622Shsu int mtu; 2369133874Srwatson 2370119134Shsu /* Take care of delayed checksums */ 2371118622Shsu if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2372118622Shsu in_delayed_cksum(m); 2373118622Shsu m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 2374118622Shsu } 2375119134Shsu 2376118622Shsu /* 2377118622Shsu * Copy the old packet & pullup its IP header into the 2378118622Shsu * new mbuf so we can modify it. 2379118622Shsu */ 2380243882Sglebius mb_copy = m_copypacket(m, M_NOWAIT); 2381118622Shsu if (mb_copy == NULL) 2382118622Shsu return NULL; 2383118622Shsu mb_copy = m_pullup(mb_copy, ip->ip_hl << 2); 2384118622Shsu if (mb_copy == NULL) 2385118622Shsu return NULL; 2386133874Srwatson 2387118622Shsu /* take care of the TTL */ 2388118622Shsu ip = mtod(mb_copy, struct ip *); 2389118622Shsu --ip->ip_ttl; 2390133874Srwatson 2391118622Shsu /* Compute the MTU after the PIM Register encapsulation */ 2392118622Shsu mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr); 2393133874Srwatson 2394241913Sglebius if (ntohs(ip->ip_len) <= mtu) { 2395119134Shsu /* Turn the IP header into a valid one */ 2396119134Shsu ip->ip_sum = 0; 2397119134Shsu ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 2398119134Shsu } else { 2399119134Shsu /* Fragment the packet */ 2400242161Sglebius mb_copy->m_pkthdr.csum_flags |= CSUM_IP; 2401242161Sglebius if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) { 2402119134Shsu m_freem(mb_copy); 2403119134Shsu return NULL; 2404119134Shsu } 2405118622Shsu } 2406118622Shsu return mb_copy; 2407118622Shsu} 2408118622Shsu 2409118622Shsu/* 2410118622Shsu * Send an upcall with the data packet to the user-level process. 2411118622Shsu */ 2412118622Shsustatic int 2413118622Shsupim_register_send_upcall(struct ip *ip, struct vif *vifp, 2414169454Srwatson struct mbuf *mb_copy, struct mfc *rt) 2415118622Shsu{ 2416118622Shsu struct mbuf *mb_first; 2417118622Shsu int len = ntohs(ip->ip_len); 2418118622Shsu struct igmpmsg *im; 2419118622Shsu struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; 2420133874Srwatson 2421119792Ssam VIF_LOCK_ASSERT(); 2422119792Ssam 2423118622Shsu /* 2424118622Shsu * Add a new mbuf with an upcall header 2425118622Shsu */ 2426248324Sglebius mb_first = m_gethdr(M_NOWAIT, MT_DATA); 2427118622Shsu if (mb_first == NULL) { 2428118622Shsu m_freem(mb_copy); 2429118622Shsu return ENOBUFS; 2430118622Shsu } 2431118622Shsu mb_first->m_data += max_linkhdr; 2432118622Shsu mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg); 2433118622Shsu mb_first->m_len = sizeof(struct igmpmsg); 2434118622Shsu mb_first->m_next = mb_copy; 2435133874Srwatson 2436118622Shsu /* Send message to routing daemon */ 2437118622Shsu im = mtod(mb_first, struct igmpmsg *); 2438118622Shsu im->im_msgtype = IGMPMSG_WHOLEPKT; 2439118622Shsu im->im_mbz = 0; 2440208744Szec im->im_vif = vifp - V_viftable; 2441118622Shsu im->im_src = ip->ip_src; 2442118622Shsu im->im_dst = ip->ip_dst; 2443133874Srwatson 2444118622Shsu k_igmpsrc.sin_addr = ip->ip_src; 2445133874Srwatson 2446190966Srwatson MRTSTAT_INC(mrts_upcalls); 2447133874Srwatson 2448181803Sbz if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) { 2449190054Sbms CTR1(KTR_IPMF, "%s: socket queue full", __func__); 2450190966Srwatson MRTSTAT_INC(mrts_upq_sockfull); 2451118622Shsu return ENOBUFS; 2452118622Shsu } 2453133874Srwatson 2454118622Shsu /* Keep statistics */ 2455190967Srwatson PIMSTAT_INC(pims_snd_registers_msgs); 2456190967Srwatson PIMSTAT_ADD(pims_snd_registers_bytes, len); 2457133874Srwatson 2458118622Shsu return 0; 2459118622Shsu} 2460118622Shsu 2461118622Shsu/* 2462118622Shsu * Encapsulate the data packet in PIM Register message and send it to the RP. 2463118622Shsu */ 2464118622Shsustatic int 2465169454Srwatsonpim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, 2466169454Srwatson struct mfc *rt) 2467118622Shsu{ 2468118622Shsu struct mbuf *mb_first; 2469118622Shsu struct ip *ip_outer; 2470118622Shsu struct pim_encap_pimhdr *pimhdr; 2471118622Shsu int len = ntohs(ip->ip_len); 2472118622Shsu vifi_t vifi = rt->mfc_parent; 2473133874Srwatson 2474119792Ssam VIF_LOCK_ASSERT(); 2475133874Srwatson 2476208744Szec if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) { 2477118622Shsu m_freem(mb_copy); 2478118622Shsu return EADDRNOTAVAIL; /* The iif vif is invalid */ 2479118622Shsu } 2480133874Srwatson 2481118622Shsu /* 2482118622Shsu * Add a new mbuf with the encapsulating header 2483118622Shsu */ 2484248324Sglebius mb_first = m_gethdr(M_NOWAIT, MT_DATA); 2485118622Shsu if (mb_first == NULL) { 2486118622Shsu m_freem(mb_copy); 2487118622Shsu return ENOBUFS; 2488118622Shsu } 2489118622Shsu mb_first->m_data += max_linkhdr; 2490118622Shsu mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr); 2491118622Shsu mb_first->m_next = mb_copy; 2492118622Shsu 2493118622Shsu mb_first->m_pkthdr.len = len + mb_first->m_len; 2494133874Srwatson 2495118622Shsu /* 2496118622Shsu * Fill in the encapsulating IP and PIM header 2497118622Shsu */ 2498118622Shsu ip_outer = mtod(mb_first, struct ip *); 2499118622Shsu *ip_outer = pim_encap_iphdr; 2500133720Sdwmalone ip_outer->ip_id = ip_newid(); 2501241913Sglebius ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) + 2502241913Sglebius sizeof(pim_encap_pimhdr)); 2503208744Szec ip_outer->ip_src = V_viftable[vifi].v_lcl_addr; 2504118622Shsu ip_outer->ip_dst = rt->mfc_rp; 2505118622Shsu /* 2506118622Shsu * Copy the inner header TOS to the outer header, and take care of the 2507118622Shsu * IP_DF bit. 2508118622Shsu */ 2509118622Shsu ip_outer->ip_tos = ip->ip_tos; 2510241913Sglebius if (ip->ip_off & htons(IP_DF)) 2511241913Sglebius ip_outer->ip_off |= htons(IP_DF); 2512118622Shsu pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer 2513118622Shsu + sizeof(pim_encap_iphdr)); 2514118622Shsu *pimhdr = pim_encap_pimhdr; 2515118622Shsu /* If the iif crosses a border, set the Border-bit */ 2516208744Szec if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config) 2517118622Shsu pimhdr->flags |= htonl(PIM_BORDER_REGISTER); 2518133874Srwatson 2519118622Shsu mb_first->m_data += sizeof(pim_encap_iphdr); 2520118622Shsu pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr)); 2521118622Shsu mb_first->m_data -= sizeof(pim_encap_iphdr); 2522133874Srwatson 2523166575Sbms send_packet(vifp, mb_first); 2524133874Srwatson 2525118622Shsu /* Keep statistics */ 2526190967Srwatson PIMSTAT_INC(pims_snd_registers_msgs); 2527190967Srwatson PIMSTAT_ADD(pims_snd_registers_bytes, len); 2528133874Srwatson 2529118622Shsu return 0; 2530118622Shsu} 2531118622Shsu 2532118622Shsu/* 2533190012Sbms * pim_encapcheck() is called by the encap4_input() path at runtime to 2534166622Sbms * determine if a packet is for PIM; allowing PIM to be dynamically loaded 2535166622Sbms * into the kernel. 2536166622Sbms */ 2537166622Sbmsstatic int 2538166622Sbmspim_encapcheck(const struct mbuf *m, int off, int proto, void *arg) 2539166622Sbms{ 2540166622Sbms 2541166622Sbms#ifdef DIAGNOSTIC 2542166622Sbms KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); 2543166622Sbms#endif 2544166622Sbms if (proto != IPPROTO_PIM) 2545166622Sbms return 0; /* not for us; reject the datagram. */ 2546166622Sbms 2547166622Sbms return 64; /* claim the datagram. */ 2548166622Sbms} 2549166622Sbms 2550166622Sbms/* 2551118622Shsu * PIM-SMv2 and PIM-DM messages processing. 2552118622Shsu * Receives and verifies the PIM control messages, and passes them 2553118622Shsu * up to the listening socket, using rip_input(). 2554118622Shsu * The only message with special processing is the PIM_REGISTER message 2555118622Shsu * (used by PIM-SM): the PIM header is stripped off, and the inner packet 2556118622Shsu * is passed to if_simloop(). 2557118622Shsu */ 2558118622Shsuvoid 2559261208Sglebiuspim_input(struct mbuf *m, int iphlen) 2560118622Shsu{ 2561118622Shsu struct ip *ip = mtod(m, struct ip *); 2562118622Shsu struct pim *pim; 2563118622Shsu int minlen; 2564261208Sglebius int datalen = ntohs(ip->ip_len) - iphlen; 2565118622Shsu int ip_tos; 2566133874Srwatson 2567118622Shsu /* Keep statistics */ 2568190967Srwatson PIMSTAT_INC(pims_rcv_total_msgs); 2569190967Srwatson PIMSTAT_ADD(pims_rcv_total_bytes, datalen); 2570133874Srwatson 2571118622Shsu /* 2572118622Shsu * Validate lengths 2573118622Shsu */ 2574118622Shsu if (datalen < PIM_MINLEN) { 2575190967Srwatson PIMSTAT_INC(pims_rcv_tooshort); 2576190054Sbms CTR3(KTR_IPMF, "%s: short packet (%d) from %s", 2577190148Sbms __func__, datalen, inet_ntoa(ip->ip_src)); 2578118622Shsu m_freem(m); 2579118622Shsu return; 2580118622Shsu } 2581133874Srwatson 2582118622Shsu /* 2583118622Shsu * If the packet is at least as big as a REGISTER, go agead 2584118622Shsu * and grab the PIM REGISTER header size, to avoid another 2585118622Shsu * possible m_pullup() later. 2586133874Srwatson * 2587118622Shsu * PIM_MINLEN == pimhdr + u_int32_t == 4 + 4 = 8 2588118622Shsu * PIM_REG_MINLEN == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28 2589118622Shsu */ 2590118622Shsu minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN); 2591118622Shsu /* 2592118622Shsu * Get the IP and PIM headers in contiguous memory, and 2593118622Shsu * possibly the PIM REGISTER header. 2594118622Shsu */ 2595261208Sglebius if (m->m_len < minlen && (m = m_pullup(m, minlen)) == 0) { 2596190054Sbms CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__); 2597118622Shsu return; 2598118622Shsu } 2599190054Sbms 2600118622Shsu /* m_pullup() may have given us a new mbuf so reset ip. */ 2601118622Shsu ip = mtod(m, struct ip *); 2602118622Shsu ip_tos = ip->ip_tos; 2603133874Srwatson 2604118622Shsu /* adjust mbuf to point to the PIM header */ 2605118622Shsu m->m_data += iphlen; 2606118622Shsu m->m_len -= iphlen; 2607118622Shsu pim = mtod(m, struct pim *); 2608133874Srwatson 2609118622Shsu /* 2610118622Shsu * Validate checksum. If PIM REGISTER, exclude the data packet. 2611118622Shsu * 2612118622Shsu * XXX: some older PIMv2 implementations don't make this distinction, 2613118622Shsu * so for compatibility reason perform the checksum over part of the 2614118622Shsu * message, and if error, then over the whole message. 2615118622Shsu */ 2616118622Shsu if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) { 2617118622Shsu /* do nothing, checksum okay */ 2618118622Shsu } else if (in_cksum(m, datalen)) { 2619190967Srwatson PIMSTAT_INC(pims_rcv_badsum); 2620190054Sbms CTR1(KTR_IPMF, "%s: invalid checksum", __func__); 2621118622Shsu m_freem(m); 2622118622Shsu return; 2623118622Shsu } 2624118622Shsu 2625118622Shsu /* PIM version check */ 2626118622Shsu if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) { 2627190967Srwatson PIMSTAT_INC(pims_rcv_badversion); 2628190054Sbms CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__, 2629190054Sbms (int)PIM_VT_V(pim->pim_vt), PIM_VERSION); 2630118622Shsu m_freem(m); 2631118622Shsu return; 2632118622Shsu } 2633133874Srwatson 2634118622Shsu /* restore mbuf back to the outer IP */ 2635118622Shsu m->m_data -= iphlen; 2636118622Shsu m->m_len += iphlen; 2637133874Srwatson 2638118622Shsu if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) { 2639118622Shsu /* 2640118622Shsu * Since this is a REGISTER, we'll make a copy of the register 2641118622Shsu * headers ip + pim + u_int32 + encap_ip, to be passed up to the 2642118622Shsu * routing daemon. 2643118622Shsu */ 2644118622Shsu struct sockaddr_in dst = { sizeof(dst), AF_INET }; 2645118622Shsu struct mbuf *mcp; 2646118622Shsu struct ip *encap_ip; 2647118622Shsu u_int32_t *reghdr; 2648119792Ssam struct ifnet *vifp; 2649133874Srwatson 2650119792Ssam VIF_LOCK(); 2651208744Szec if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) { 2652119792Ssam VIF_UNLOCK(); 2653190054Sbms CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__, 2654208744Szec (int)V_reg_vif_num); 2655118622Shsu m_freem(m); 2656118622Shsu return; 2657118622Shsu } 2658119792Ssam /* XXX need refcnt? */ 2659208744Szec vifp = V_viftable[V_reg_vif_num].v_ifp; 2660119792Ssam VIF_UNLOCK(); 2661133874Srwatson 2662118622Shsu /* 2663118622Shsu * Validate length 2664118622Shsu */ 2665118622Shsu if (datalen < PIM_REG_MINLEN) { 2666190967Srwatson PIMSTAT_INC(pims_rcv_tooshort); 2667190967Srwatson PIMSTAT_INC(pims_rcv_badregisters); 2668190054Sbms CTR1(KTR_IPMF, "%s: register packet size too small", __func__); 2669118622Shsu m_freem(m); 2670118622Shsu return; 2671118622Shsu } 2672133874Srwatson 2673118622Shsu reghdr = (u_int32_t *)(pim + 1); 2674118622Shsu encap_ip = (struct ip *)(reghdr + 1); 2675133874Srwatson 2676190054Sbms CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d", 2677190054Sbms __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len)); 2678133874Srwatson 2679118622Shsu /* verify the version number of the inner packet */ 2680118622Shsu if (encap_ip->ip_v != IPVERSION) { 2681190967Srwatson PIMSTAT_INC(pims_rcv_badregisters); 2682190054Sbms CTR1(KTR_IPMF, "%s: bad encap ip version", __func__); 2683118622Shsu m_freem(m); 2684118622Shsu return; 2685118622Shsu } 2686133874Srwatson 2687118622Shsu /* verify the inner packet is destined to a mcast group */ 2688118622Shsu if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) { 2689190967Srwatson PIMSTAT_INC(pims_rcv_badregisters); 2690190054Sbms CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__, 2691190054Sbms inet_ntoa(encap_ip->ip_dst)); 2692118622Shsu m_freem(m); 2693118622Shsu return; 2694118622Shsu } 2695126741Shsu 2696126741Shsu /* If a NULL_REGISTER, pass it to the daemon */ 2697126741Shsu if ((ntohl(*reghdr) & PIM_NULL_REGISTER)) 2698126741Shsu goto pim_input_to_daemon; 2699126741Shsu 2700118622Shsu /* 2701118622Shsu * Copy the TOS from the outer IP header to the inner IP header. 2702118622Shsu */ 2703118622Shsu if (encap_ip->ip_tos != ip_tos) { 2704118622Shsu /* Outer TOS -> inner TOS */ 2705118622Shsu encap_ip->ip_tos = ip_tos; 2706118622Shsu /* Recompute the inner header checksum. Sigh... */ 2707133874Srwatson 2708118622Shsu /* adjust mbuf to point to the inner IP header */ 2709118622Shsu m->m_data += (iphlen + PIM_MINLEN); 2710118622Shsu m->m_len -= (iphlen + PIM_MINLEN); 2711133874Srwatson 2712118622Shsu encap_ip->ip_sum = 0; 2713118622Shsu encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2); 2714133874Srwatson 2715118622Shsu /* restore mbuf to point back to the outer IP header */ 2716118622Shsu m->m_data -= (iphlen + PIM_MINLEN); 2717118622Shsu m->m_len += (iphlen + PIM_MINLEN); 2718118622Shsu } 2719126741Shsu 2720118622Shsu /* 2721118622Shsu * Decapsulate the inner IP packet and loopback to forward it 2722133874Srwatson * as a normal multicast packet. Also, make a copy of the 2723118622Shsu * outer_iphdr + pimhdr + reghdr + encap_iphdr 2724118622Shsu * to pass to the daemon later, so it can take the appropriate 2725118622Shsu * actions (e.g., send back PIM_REGISTER_STOP). 2726118622Shsu * XXX: here m->m_data points to the outer IP header. 2727118622Shsu */ 2728118622Shsu mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN); 2729118622Shsu if (mcp == NULL) { 2730190054Sbms CTR1(KTR_IPMF, "%s: m_copy() failed", __func__); 2731118622Shsu m_freem(m); 2732118622Shsu return; 2733118622Shsu } 2734133874Srwatson 2735118622Shsu /* Keep statistics */ 2736118622Shsu /* XXX: registers_bytes include only the encap. mcast pkt */ 2737190967Srwatson PIMSTAT_INC(pims_rcv_registers_msgs); 2738190967Srwatson PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len)); 2739133874Srwatson 2740118622Shsu /* 2741118622Shsu * forward the inner ip packet; point m_data at the inner ip. 2742118622Shsu */ 2743118622Shsu m_adj(m, iphlen + PIM_MINLEN); 2744133874Srwatson 2745190054Sbms CTR4(KTR_IPMF, 2746190054Sbms "%s: forward decap'd REGISTER: src %lx dst %lx vif %d", 2747190054Sbms __func__, 2748190054Sbms (u_long)ntohl(encap_ip->ip_src.s_addr), 2749190054Sbms (u_long)ntohl(encap_ip->ip_dst.s_addr), 2750208744Szec (int)V_reg_vif_num); 2751190054Sbms 2752119792Ssam /* NB: vifp was collected above; can it change on us? */ 2753119792Ssam if_simloop(vifp, m, dst.sin_family, 0); 2754133874Srwatson 2755118622Shsu /* prepare the register head to send to the mrouting daemon */ 2756118622Shsu m = mcp; 2757118622Shsu } 2758118622Shsu 2759133874Srwatsonpim_input_to_daemon: 2760118622Shsu /* 2761118622Shsu * Pass the PIM message up to the daemon; if it is a Register message, 2762118622Shsu * pass the 'head' only up to the daemon. This includes the 2763118622Shsu * outer IP header, PIM header, PIM-Register header and the 2764118622Shsu * inner IP header. 2765118622Shsu * XXX: the outer IP header pkt size of a Register is not adjust to 2766118622Shsu * reflect the fact that the inner multicast data is truncated. 2767118622Shsu */ 2768118622Shsu rip_input(m, iphlen); 2769118622Shsu 2770118622Shsu return; 2771118622Shsu} 2772118622Shsu 2773118622Shsustatic int 2774190012Sbmssysctl_mfctable(SYSCTL_HANDLER_ARGS) 2775190012Sbms{ 2776190012Sbms struct mfc *rt; 2777190012Sbms int error, i; 2778190012Sbms 2779190012Sbms if (req->newptr) 2780190012Sbms return (EPERM); 2781208744Szec if (V_mfchashtbl == NULL) /* XXX unlocked */ 2782190012Sbms return (0); 2783190012Sbms error = sysctl_wire_old_buffer(req, 0); 2784190012Sbms if (error) 2785190012Sbms return (error); 2786190012Sbms 2787190012Sbms MFC_LOCK(); 2788190012Sbms for (i = 0; i < mfchashsize; i++) { 2789208744Szec LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) { 2790190012Sbms error = SYSCTL_OUT(req, rt, sizeof(struct mfc)); 2791190012Sbms if (error) 2792190012Sbms goto out_locked; 2793190012Sbms } 2794190012Sbms } 2795190012Sbmsout_locked: 2796190012Sbms MFC_UNLOCK(); 2797190012Sbms return (error); 2798190012Sbms} 2799190012Sbms 2800227309Sedstatic SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, 2801227309Sed sysctl_mfctable, "IPv4 Multicast Forwarding Table " 2802227309Sed "(struct *mfc[mfchashsize], netinet/ip_mroute.h)"); 2803190012Sbms 2804208744Szecstatic void 2805208744Szecvnet_mroute_init(const void *unused __unused) 2806208744Szec{ 2807208744Szec 2808208744Szec MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO); 2809208744Szec bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers)); 2810314667Savg callout_init(&V_expire_upcalls_ch, 1); 2811314667Savg callout_init(&V_bw_upcalls_ch, 1); 2812314667Savg callout_init(&V_bw_meter_ch, 1); 2813208744Szec} 2814208744Szec 2815232517SzecVNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init, 2816208744Szec NULL); 2817208744Szec 2818208744Szecstatic void 2819208744Szecvnet_mroute_uninit(const void *unused __unused) 2820208744Szec{ 2821208744Szec 2822208744Szec FREE(V_nexpire, M_MRTABLE); 2823208744Szec V_nexpire = NULL; 2824208744Szec} 2825208744Szec 2826208744SzecVNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, 2827208744Szec vnet_mroute_uninit, NULL); 2828208744Szec 2829190012Sbmsstatic int 283080354Sfennerip_mroute_modevent(module_t mod, int type, void *unused) 28312763Swollman{ 2832183550Szec 2833106968Sluigi switch (type) { 2834106968Sluigi case MOD_LOAD: 2835167116Sbms MROUTER_LOCK_INIT(); 2836208744Szec 2837208744Szec if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 2838208744Szec if_detached_event, NULL, EVENTHANDLER_PRI_ANY); 2839208744Szec if (if_detach_event_tag == NULL) { 2840249562Sdelphij printf("ip_mroute: unable to register " 2841249562Sdelphij "ifnet_departure_event handler\n"); 2842208744Szec MROUTER_LOCK_DESTROY(); 2843208744Szec return (EINVAL); 2844208744Szec } 2845208744Szec 2846123690Ssam MFC_LOCK_INIT(); 2847123690Ssam VIF_LOCK_INIT(); 2848190012Sbms 2849190012Sbms mfchashsize = MFCHASHSIZE; 2850190012Sbms if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) && 2851190012Sbms !powerof2(mfchashsize)) { 2852190012Sbms printf("WARNING: %s not a power of 2; using default\n", 2853190012Sbms "net.inet.ip.mfchashsize"); 2854190012Sbms mfchashsize = MFCHASHSIZE; 2855190012Sbms } 2856190012Sbms 2857190012Sbms pim_squelch_wholepkt = 0; 2858166623Sbms TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt", 2859166623Sbms &pim_squelch_wholepkt); 2860166938Sbms 2861166622Sbms pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM, 2862166622Sbms pim_encapcheck, &in_pim_protosw, NULL); 2863166622Sbms if (pim_encap_cookie == NULL) { 2864166622Sbms printf("ip_mroute: unable to attach pim encap\n"); 2865166622Sbms VIF_LOCK_DESTROY(); 2866166622Sbms MFC_LOCK_DESTROY(); 2867167116Sbms MROUTER_LOCK_DESTROY(); 2868166622Sbms return (EINVAL); 2869166622Sbms } 2870166938Sbms 2871106968Sluigi ip_mcast_src = X_ip_mcast_src; 2872106968Sluigi ip_mforward = X_ip_mforward; 2873106968Sluigi ip_mrouter_done = X_ip_mrouter_done; 2874106968Sluigi ip_mrouter_get = X_ip_mrouter_get; 2875106968Sluigi ip_mrouter_set = X_ip_mrouter_set; 2876166938Sbms 2877106968Sluigi ip_rsvp_force_done = X_ip_rsvp_force_done; 2878106968Sluigi ip_rsvp_vif = X_ip_rsvp_vif; 2879166938Sbms 2880106968Sluigi legal_vif_num = X_legal_vif_num; 2881106968Sluigi mrt_ioctl = X_mrt_ioctl; 2882106968Sluigi rsvp_input_p = X_rsvp_input; 2883106968Sluigi break; 28842763Swollman 2885106968Sluigi case MOD_UNLOAD: 2886121446Ssam /* 2887121446Ssam * Typically module unload happens after the user-level 2888121446Ssam * process has shutdown the kernel services (the check 2889121446Ssam * below insures someone can't just yank the module out 2890121446Ssam * from under a running process). But if the module is 2891121446Ssam * just loaded and then unloaded w/o starting up a user 2892121446Ssam * process we still need to cleanup. 2893121446Ssam */ 2894208744Szec MROUTER_LOCK(); 2895208744Szec if (ip_mrouter_cnt != 0) { 2896208744Szec MROUTER_UNLOCK(); 2897190012Sbms return (EINVAL); 2898208744Szec } 2899208744Szec ip_mrouter_unloading = 1; 2900208744Szec MROUTER_UNLOCK(); 29012763Swollman 2902208744Szec EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); 2903208744Szec 2904166622Sbms if (pim_encap_cookie) { 2905166622Sbms encap_detach(pim_encap_cookie); 2906166622Sbms pim_encap_cookie = NULL; 2907166622Sbms } 2908190012Sbms 2909106968Sluigi ip_mcast_src = NULL; 2910106968Sluigi ip_mforward = NULL; 2911106968Sluigi ip_mrouter_done = NULL; 2912106968Sluigi ip_mrouter_get = NULL; 2913106968Sluigi ip_mrouter_set = NULL; 2914166938Sbms 2915106968Sluigi ip_rsvp_force_done = NULL; 2916106968Sluigi ip_rsvp_vif = NULL; 2917166938Sbms 2918106968Sluigi legal_vif_num = NULL; 2919106968Sluigi mrt_ioctl = NULL; 2920106968Sluigi rsvp_input_p = NULL; 2921166938Sbms 2922123690Ssam VIF_LOCK_DESTROY(); 2923123690Ssam MFC_LOCK_DESTROY(); 2924167116Sbms MROUTER_LOCK_DESTROY(); 2925106968Sluigi break; 2926166938Sbms 2927132199Sphk default: 2928132199Sphk return EOPNOTSUPP; 2929106968Sluigi } 2930106968Sluigi return 0; 29312763Swollman} 29322763Swollman 293380354Sfennerstatic moduledata_t ip_mroutemod = { 2934106968Sluigi "ip_mroute", 2935106968Sluigi ip_mroute_modevent, 2936241394Skevlo 0 293780354Sfenner}; 2938190012Sbms 2939232517SzecDECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); 2940