1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2007-2009 Bruce Simpson.
5 * Copyright (c) 1988 Stephen Deering.
6 * Copyright (c) 1992, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Stephen Deering of Stanford University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37/*
38 * Internet Group Management Protocol (IGMP) routines.
39 * [RFC1112, RFC2236, RFC3376]
40 *
41 * Written by Steve Deering, Stanford, May 1988.
42 * Modified by Rosen Sharma, Stanford, Aug 1994.
43 * Modified by Bill Fenner, Xerox PARC, Feb 1995.
44 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995.
45 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson.
46 *
47 * MULTICAST Revision: 3.5.1.4
48 */
49
50#include <sys/cdefs.h>
51#include "opt_ddb.h"
52
53#include <sys/param.h>
54#include <sys/systm.h>
55#include <sys/module.h>
56#include <sys/malloc.h>
57#include <sys/mbuf.h>
58#include <sys/socket.h>
59#include <sys/kernel.h>
60#include <sys/lock.h>
61#include <sys/sysctl.h>
62#include <sys/ktr.h>
63#include <sys/condvar.h>
64
65#ifdef DDB
66#include <ddb/ddb.h>
67#endif
68
69#include <net/if.h>
70#include <net/if_var.h>
71#include <net/if_private.h>
72#include <net/netisr.h>
73#include <net/vnet.h>
74
75#include <netinet/in.h>
76#include <netinet/in_var.h>
77#include <netinet/in_systm.h>
78#include <netinet/ip.h>
79#include <netinet/ip_var.h>
80#include <netinet/ip_options.h>
81#include <netinet/igmp.h>
82#include <netinet/igmp_var.h>
83
84#include <machine/in_cksum.h>
85
86#include <security/mac/mac_framework.h>
87
88#ifndef KTR_IGMPV3
89#define KTR_IGMPV3 KTR_INET
90#endif
91
92#define	IGMP_SLOWHZ	2	/* 2 slow timeouts per second */
93#define	IGMP_FASTHZ	5	/* 5 fast timeouts per second */
94#define	IGMP_RESPONSE_BURST_INTERVAL	(IGMP_FASTHZ / 2)
95
96static struct igmp_ifsoftc *
97		igi_alloc_locked(struct ifnet *);
98static void	igi_delete_locked(const struct ifnet *);
99static void	igmp_dispatch_queue(struct mbufq *, int, const int);
100static void	igmp_fasttimo_vnet(void);
101static void	igmp_final_leave(struct in_multi *, struct igmp_ifsoftc *);
102static int	igmp_handle_state_change(struct in_multi *,
103		    struct igmp_ifsoftc *);
104static int	igmp_initial_join(struct in_multi *, struct igmp_ifsoftc *);
105static int	igmp_input_v1_query(struct ifnet *, const struct ip *,
106		    const struct igmp *);
107static int	igmp_input_v2_query(struct ifnet *, const struct ip *,
108		    const struct igmp *);
109static int	igmp_input_v3_query(struct ifnet *, const struct ip *,
110		    /*const*/ struct igmpv3 *);
111static int	igmp_input_v3_group_query(struct in_multi *,
112		    struct igmp_ifsoftc *, int, /*const*/ struct igmpv3 *);
113static int	igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *,
114		    /*const*/ struct igmp *);
115static int	igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *,
116		    /*const*/ struct igmp *);
117static void	igmp_intr(struct mbuf *);
118static int	igmp_isgroupreported(const struct in_addr);
119static struct mbuf *
120		igmp_ra_alloc(void);
121#ifdef KTR
122static char *	igmp_rec_type_to_str(const int);
123#endif
124static void	igmp_set_version(struct igmp_ifsoftc *, const int);
125static void	igmp_slowtimo_vnet(void);
126static int	igmp_v1v2_queue_report(struct in_multi *, const int);
127static void	igmp_v1v2_process_group_timer(struct in_multi *, const int);
128static void	igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *);
129static void	igmp_v2_update_group(struct in_multi *, const int);
130static void	igmp_v3_cancel_link_timers(struct igmp_ifsoftc *);
131static void	igmp_v3_dispatch_general_query(struct igmp_ifsoftc *);
132static struct mbuf *
133		igmp_v3_encap_report(struct ifnet *, struct mbuf *);
134static int	igmp_v3_enqueue_group_record(struct mbufq *,
135		    struct in_multi *, const int, const int, const int);
136static int	igmp_v3_enqueue_filter_change(struct mbufq *,
137		    struct in_multi *);
138static void	igmp_v3_process_group_timers(struct in_multi_head *,
139		    struct mbufq *, struct mbufq *, struct in_multi *,
140		    const int);
141static int	igmp_v3_merge_state_changes(struct in_multi *,
142		    struct mbufq *);
143static void	igmp_v3_suppress_group_record(struct in_multi *);
144static int	sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS);
145static int	sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS);
146static int	sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS);
147static int	sysctl_igmp_stat(SYSCTL_HANDLER_ARGS);
148
149static const struct netisr_handler igmp_nh = {
150	.nh_name = "igmp",
151	.nh_handler = igmp_intr,
152	.nh_proto = NETISR_IGMP,
153	.nh_policy = NETISR_POLICY_SOURCE,
154};
155
156/*
157 * System-wide globals.
158 *
159 * Unlocked access to these is OK, except for the global IGMP output
160 * queue. The IGMP subsystem lock ends up being system-wide for the moment,
161 * because all VIMAGEs have to share a global output queue, as netisrs
162 * themselves are not virtualized.
163 *
164 * Locking:
165 *  * The permitted lock order is: IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
166 *    Any may be taken independently; if any are held at the same
167 *    time, the above lock order must be followed.
168 *  * All output is delegated to the netisr.
169 *  * IN_MULTI_LIST_LOCK covers in_multi.
170 *  * IGMP_LOCK covers igmp_ifsoftc and any global variables in this file,
171 *    including the output queue.
172 *  * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of
173 *    per-link state iterators.
174 *  * igmp_ifsoftc is valid as long as PF_INET is attached to the interface,
175 *    therefore it is not refcounted.
176 *    We allow unlocked reads of igmp_ifsoftc when accessed via in_multi.
177 *
178 * Reference counting
179 *  * IGMP acquires its own reference every time an in_multi is passed to
180 *    it and the group is being joined for the first time.
181 *  * IGMP releases its reference(s) on in_multi in a deferred way,
182 *    because the operations which process the release run as part of
183 *    a loop whose control variables are directly affected by the release
184 *    (that, and not recursing on the IF_ADDR_LOCK).
185 *
186 * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds
187 * to a vnet in ifp->if_vnet.
188 *
189 * SMPng: XXX We may potentially race operations on ifma_protospec.
190 * The problem is that we currently lack a clean way of taking the
191 * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing,
192 * as anything which modifies ifma needs to be covered by that lock.
193 * So check for ifma_protospec being NULL before proceeding.
194 */
195struct mtx		 igmp_mtx;
196
197struct mbuf		*m_raopt;		 /* Router Alert option */
198static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
199
200/*
201 * VIMAGE-wide globals.
202 *
203 * The IGMPv3 timers themselves need to run per-image, however, for
204 * historical reasons, timers run globally.  This needs to be improved.
205 * An ifnet can only be in one vimage at a time, and the loopback
206 * ifnet, loif, is itself virtualized.
207 * It would otherwise be possible to seriously hose IGMP state,
208 * and create inconsistencies in upstream multicast routing, if you have
209 * multiple VIMAGEs running on the same link joining different multicast
210 * groups, UNLESS the "primary IP address" is different. This is because
211 * IGMP for IPv4 does not force link-local addresses to be used for each
212 * node, unlike MLD for IPv6.
213 * Obviously the IGMPv3 per-interface state has per-vimage granularity
214 * also as a result.
215 *
216 * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection
217 * policy to control the address used by IGMP on the link.
218 */
219VNET_DEFINE_STATIC(int, interface_timers_running);	/* IGMPv3 general
220							 * query response */
221VNET_DEFINE_STATIC(int, state_change_timers_running);	/* IGMPv3 state-change
222							 * retransmit */
223VNET_DEFINE_STATIC(int, current_state_timers_running);	/* IGMPv1/v2 host
224							 * report; IGMPv3 g/sg
225							 * query response */
226
227#define	V_interface_timers_running	VNET(interface_timers_running)
228#define	V_state_change_timers_running	VNET(state_change_timers_running)
229#define	V_current_state_timers_running	VNET(current_state_timers_running)
230
231VNET_PCPUSTAT_DEFINE(struct igmpstat, igmpstat);
232VNET_PCPUSTAT_SYSINIT(igmpstat);
233VNET_PCPUSTAT_SYSUNINIT(igmpstat);
234
235VNET_DEFINE_STATIC(LIST_HEAD(, igmp_ifsoftc), igi_head) =
236    LIST_HEAD_INITIALIZER(igi_head);
237VNET_DEFINE_STATIC(struct timeval, igmp_gsrdelay) = {10, 0};
238
239#define	V_igi_head			VNET(igi_head)
240#define	V_igmp_gsrdelay			VNET(igmp_gsrdelay)
241
242VNET_DEFINE_STATIC(int, igmp_recvifkludge) = 1;
243VNET_DEFINE_STATIC(int, igmp_sendra) = 1;
244VNET_DEFINE_STATIC(int, igmp_sendlocal) = 1;
245VNET_DEFINE_STATIC(int, igmp_v1enable) = 1;
246VNET_DEFINE_STATIC(int, igmp_v2enable) = 1;
247VNET_DEFINE_STATIC(int, igmp_legacysupp);
248VNET_DEFINE_STATIC(int, igmp_default_version) = IGMP_VERSION_3;
249
250#define	V_igmp_recvifkludge		VNET(igmp_recvifkludge)
251#define	V_igmp_sendra			VNET(igmp_sendra)
252#define	V_igmp_sendlocal		VNET(igmp_sendlocal)
253#define	V_igmp_v1enable			VNET(igmp_v1enable)
254#define	V_igmp_v2enable			VNET(igmp_v2enable)
255#define	V_igmp_legacysupp		VNET(igmp_legacysupp)
256#define	V_igmp_default_version		VNET(igmp_default_version)
257
258/*
259 * Virtualized sysctls.
260 */
261SYSCTL_PROC(_net_inet_igmp, IGMPCTL_STATS, stats,
262    CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE,
263    &VNET_NAME(igmpstat), 0, sysctl_igmp_stat, "S,igmpstat",
264    "IGMP statistics (struct igmpstat, netinet/igmp_var.h)");
265SYSCTL_INT(_net_inet_igmp, OID_AUTO, recvifkludge, CTLFLAG_VNET | CTLFLAG_RW,
266    &VNET_NAME(igmp_recvifkludge), 0,
267    "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address");
268SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendra, CTLFLAG_VNET | CTLFLAG_RW,
269    &VNET_NAME(igmp_sendra), 0,
270    "Send IP Router Alert option in IGMPv2/v3 messages");
271SYSCTL_INT(_net_inet_igmp, OID_AUTO, sendlocal, CTLFLAG_VNET | CTLFLAG_RW,
272    &VNET_NAME(igmp_sendlocal), 0,
273    "Send IGMP membership reports for 224.0.0.0/24 groups");
274SYSCTL_INT(_net_inet_igmp, OID_AUTO, v1enable, CTLFLAG_VNET | CTLFLAG_RW,
275    &VNET_NAME(igmp_v1enable), 0,
276    "Enable backwards compatibility with IGMPv1");
277SYSCTL_INT(_net_inet_igmp, OID_AUTO, v2enable, CTLFLAG_VNET | CTLFLAG_RW,
278    &VNET_NAME(igmp_v2enable), 0,
279    "Enable backwards compatibility with IGMPv2");
280SYSCTL_INT(_net_inet_igmp, OID_AUTO, legacysupp, CTLFLAG_VNET | CTLFLAG_RW,
281    &VNET_NAME(igmp_legacysupp), 0,
282    "Allow v1/v2 reports to suppress v3 group responses");
283SYSCTL_PROC(_net_inet_igmp, OID_AUTO, default_version,
284    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
285    &VNET_NAME(igmp_default_version), 0, sysctl_igmp_default_version, "I",
286    "Default version of IGMP to run on each interface");
287SYSCTL_PROC(_net_inet_igmp, OID_AUTO, gsrdelay,
288    CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
289    &VNET_NAME(igmp_gsrdelay.tv_sec), 0, sysctl_igmp_gsr, "I",
290    "Rate limit for IGMPv3 Group-and-Source queries in seconds");
291
292/*
293 * Non-virtualized sysctls.
294 */
295static SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo,
296    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_igmp_ifinfo,
297    "Per-interface IGMPv3 state");
298
299static __inline void
300igmp_save_context(struct mbuf *m, struct ifnet *ifp)
301{
302
303#ifdef VIMAGE
304	m->m_pkthdr.PH_loc.ptr = ifp->if_vnet;
305#endif /* VIMAGE */
306	m->m_pkthdr.rcvif = ifp;
307	m->m_pkthdr.flowid = ifp->if_index;
308}
309
310static __inline void
311igmp_scrub_context(struct mbuf *m)
312{
313
314	m->m_pkthdr.PH_loc.ptr = NULL;
315	m->m_pkthdr.flowid = 0;
316}
317
318/*
319 * Restore context from a queued IGMP output chain.
320 * Return saved ifindex.
321 *
322 * VIMAGE: The assertion is there to make sure that we
323 * actually called CURVNET_SET() with what's in the mbuf chain.
324 */
325static __inline uint32_t
326igmp_restore_context(struct mbuf *m)
327{
328
329#ifdef notyet
330#if defined(VIMAGE) && defined(INVARIANTS)
331	KASSERT(curvnet == (m->m_pkthdr.PH_loc.ptr),
332	    ("%s: called when curvnet was not restored", __func__));
333#endif
334#endif
335	return (m->m_pkthdr.flowid);
336}
337
338/*
339 * IGMP statistics.
340 */
341static int
342sysctl_igmp_stat(SYSCTL_HANDLER_ARGS)
343{
344	struct igmpstat igps0;
345	int error;
346	char *p;
347
348	error = sysctl_wire_old_buffer(req, sizeof(struct igmpstat));
349	if (error)
350		return (error);
351
352	if (req->oldptr != NULL) {
353		if (req->oldlen < sizeof(struct igmpstat))
354			error = ENOMEM;
355		else {
356			/*
357			 * Copy the counters, and explicitly set the struct's
358			 * version and length fields.
359			 */
360			COUNTER_ARRAY_COPY(VNET(igmpstat), &igps0,
361			    sizeof(struct igmpstat) / sizeof(uint64_t));
362			igps0.igps_version = IGPS_VERSION_3;
363			igps0.igps_len = IGPS_VERSION3_LEN;
364			error = SYSCTL_OUT(req, &igps0,
365			    sizeof(struct igmpstat));
366		}
367	} else
368		req->validlen = sizeof(struct igmpstat);
369	if (error)
370		goto out;
371	if (req->newptr != NULL) {
372		if (req->newlen < sizeof(struct igmpstat))
373			error = ENOMEM;
374		else
375			error = SYSCTL_IN(req, &igps0,
376			    sizeof(igps0));
377		if (error)
378			goto out;
379		/*
380		 * igps0 must be "all zero".
381		 */
382		p = (char *)&igps0;
383		while (p < (char *)&igps0 + sizeof(igps0) && *p == '\0')
384			p++;
385		if (p != (char *)&igps0 + sizeof(igps0)) {
386			error = EINVAL;
387			goto out;
388		}
389		COUNTER_ARRAY_ZERO(VNET(igmpstat),
390		    sizeof(struct igmpstat) / sizeof(uint64_t));
391	}
392out:
393	return (error);
394}
395
396/*
397 * Retrieve or set default IGMP version.
398 *
399 * VIMAGE: Assume curvnet set by caller.
400 * SMPng: NOTE: Serialized by IGMP lock.
401 */
402static int
403sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS)
404{
405	int	 error;
406	int	 new;
407
408	error = sysctl_wire_old_buffer(req, sizeof(int));
409	if (error)
410		return (error);
411
412	IGMP_LOCK();
413
414	new = V_igmp_default_version;
415
416	error = sysctl_handle_int(oidp, &new, 0, req);
417	if (error || !req->newptr)
418		goto out_locked;
419
420	if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) {
421		error = EINVAL;
422		goto out_locked;
423	}
424
425	CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d",
426	     V_igmp_default_version, new);
427
428	V_igmp_default_version = new;
429
430out_locked:
431	IGMP_UNLOCK();
432	return (error);
433}
434
435/*
436 * Retrieve or set threshold between group-source queries in seconds.
437 *
438 * VIMAGE: Assume curvnet set by caller.
439 * SMPng: NOTE: Serialized by IGMP lock.
440 */
441static int
442sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS)
443{
444	int error;
445	int i;
446
447	error = sysctl_wire_old_buffer(req, sizeof(int));
448	if (error)
449		return (error);
450
451	IGMP_LOCK();
452
453	i = V_igmp_gsrdelay.tv_sec;
454
455	error = sysctl_handle_int(oidp, &i, 0, req);
456	if (error || !req->newptr)
457		goto out_locked;
458
459	if (i < -1 || i >= 60) {
460		error = EINVAL;
461		goto out_locked;
462	}
463
464	CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d",
465	     V_igmp_gsrdelay.tv_sec, i);
466	V_igmp_gsrdelay.tv_sec = i;
467
468out_locked:
469	IGMP_UNLOCK();
470	return (error);
471}
472
473/*
474 * Expose struct igmp_ifsoftc to userland, keyed by ifindex.
475 * For use by ifmcstat(8).
476 *
477 * SMPng: NOTE: Does an unlocked ifindex space read.
478 * VIMAGE: Assume curvnet set by caller. The node handler itself
479 * is not directly virtualized.
480 */
481static int
482sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS)
483{
484	struct epoch_tracker	 et;
485	int			*name;
486	int			 error;
487	u_int			 namelen;
488	struct ifnet		*ifp;
489	struct igmp_ifsoftc	*igi;
490
491	name = (int *)arg1;
492	namelen = arg2;
493
494	if (req->newptr != NULL)
495		return (EPERM);
496
497	if (namelen != 1)
498		return (EINVAL);
499
500	error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo));
501	if (error)
502		return (error);
503
504	IN_MULTI_LIST_LOCK();
505	IGMP_LOCK();
506
507	error = ENOENT;
508
509	NET_EPOCH_ENTER(et);
510	ifp = ifnet_byindex(name[0]);
511	NET_EPOCH_EXIT(et);
512	if (ifp == NULL)
513		goto out_locked;
514
515	LIST_FOREACH(igi, &V_igi_head, igi_link) {
516		if (ifp == igi->igi_ifp) {
517			struct igmp_ifinfo info;
518
519			info.igi_version = igi->igi_version;
520			info.igi_v1_timer = igi->igi_v1_timer;
521			info.igi_v2_timer = igi->igi_v2_timer;
522			info.igi_v3_timer = igi->igi_v3_timer;
523			info.igi_flags = igi->igi_flags;
524			info.igi_rv = igi->igi_rv;
525			info.igi_qi = igi->igi_qi;
526			info.igi_qri = igi->igi_qri;
527			info.igi_uri = igi->igi_uri;
528			error = SYSCTL_OUT(req, &info, sizeof(info));
529			break;
530		}
531	}
532
533out_locked:
534	IGMP_UNLOCK();
535	IN_MULTI_LIST_UNLOCK();
536	return (error);
537}
538
539/*
540 * Dispatch an entire queue of pending packet chains
541 * using the netisr.
542 * VIMAGE: Assumes the vnet pointer has been set.
543 */
544static void
545igmp_dispatch_queue(struct mbufq *mq, int limit, const int loop)
546{
547	struct epoch_tracker et;
548	struct mbuf *m;
549
550	NET_EPOCH_ENTER(et);
551	while ((m = mbufq_dequeue(mq)) != NULL) {
552		CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, mq, m);
553		if (loop)
554			m->m_flags |= M_IGMP_LOOP;
555		netisr_dispatch(NETISR_IGMP, m);
556		if (--limit == 0)
557			break;
558	}
559	NET_EPOCH_EXIT(et);
560}
561
562/*
563 * Filter outgoing IGMP report state by group.
564 *
565 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1).
566 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are
567 * disabled for all groups in the 224.0.0.0/24 link-local scope. However,
568 * this may break certain IGMP snooping switches which rely on the old
569 * report behaviour.
570 *
571 * Return zero if the given group is one for which IGMP reports
572 * should be suppressed, or non-zero if reports should be issued.
573 */
574static __inline int
575igmp_isgroupreported(const struct in_addr addr)
576{
577
578	if (in_allhosts(addr) ||
579	    ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr)))))
580		return (0);
581
582	return (1);
583}
584
585/*
586 * Construct a Router Alert option to use in outgoing packets.
587 */
588static struct mbuf *
589igmp_ra_alloc(void)
590{
591	struct mbuf	*m;
592	struct ipoption	*p;
593
594	m = m_get(M_WAITOK, MT_DATA);
595	p = mtod(m, struct ipoption *);
596	p->ipopt_dst.s_addr = INADDR_ANY;
597	p->ipopt_list[0] = (char)IPOPT_RA;	/* Router Alert Option */
598	p->ipopt_list[1] = 0x04;		/* 4 bytes long */
599	p->ipopt_list[2] = IPOPT_EOL;		/* End of IP option list */
600	p->ipopt_list[3] = 0x00;		/* pad byte */
601	m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1];
602
603	return (m);
604}
605
606/*
607 * Attach IGMP when PF_INET is attached to an interface.
608 */
609struct igmp_ifsoftc *
610igmp_domifattach(struct ifnet *ifp)
611{
612	struct igmp_ifsoftc *igi;
613
614	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
615	    __func__, ifp, ifp->if_xname);
616
617	IGMP_LOCK();
618
619	igi = igi_alloc_locked(ifp);
620	if (!(ifp->if_flags & IFF_MULTICAST))
621		igi->igi_flags |= IGIF_SILENT;
622
623	IGMP_UNLOCK();
624
625	return (igi);
626}
627
628/*
629 * VIMAGE: assume curvnet set by caller.
630 */
631static struct igmp_ifsoftc *
632igi_alloc_locked(/*const*/ struct ifnet *ifp)
633{
634	struct igmp_ifsoftc *igi;
635
636	IGMP_LOCK_ASSERT();
637
638	igi = malloc(sizeof(struct igmp_ifsoftc), M_IGMP, M_NOWAIT|M_ZERO);
639	if (igi == NULL)
640		goto out;
641
642	igi->igi_ifp = ifp;
643	igi->igi_version = V_igmp_default_version;
644	igi->igi_flags = 0;
645	igi->igi_rv = IGMP_RV_INIT;
646	igi->igi_qi = IGMP_QI_INIT;
647	igi->igi_qri = IGMP_QRI_INIT;
648	igi->igi_uri = IGMP_URI_INIT;
649	mbufq_init(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS);
650
651	LIST_INSERT_HEAD(&V_igi_head, igi, igi_link);
652
653	CTR2(KTR_IGMPV3, "allocate igmp_ifsoftc for ifp %p(%s)",
654	     ifp, ifp->if_xname);
655
656out:
657	return (igi);
658}
659
660/*
661 * Hook for ifdetach.
662 *
663 * NOTE: Some finalization tasks need to run before the protocol domain
664 * is detached, but also before the link layer does its cleanup.
665 *
666 * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK().
667 * XXX This is also bitten by unlocked ifma_protospec access.
668 */
669void
670igmp_ifdetach(struct ifnet *ifp)
671{
672	struct epoch_tracker	 et;
673	struct igmp_ifsoftc	*igi;
674	struct ifmultiaddr	*ifma;
675	struct in_multi		*inm;
676	struct in_multi_head inm_free_tmp;
677	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp,
678	    ifp->if_xname);
679
680	SLIST_INIT(&inm_free_tmp);
681	IGMP_LOCK();
682
683	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
684	if (igi->igi_version == IGMP_VERSION_3) {
685		IF_ADDR_WLOCK(ifp);
686		NET_EPOCH_ENTER(et);
687		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
688			inm = inm_ifmultiaddr_get_inm(ifma);
689			if (inm == NULL)
690				continue;
691			if (inm->inm_state == IGMP_LEAVING_MEMBER)
692				inm_rele_locked(&inm_free_tmp, inm);
693			inm_clear_recorded(inm);
694		}
695		NET_EPOCH_EXIT(et);
696		IF_ADDR_WUNLOCK(ifp);
697		inm_release_list_deferred(&inm_free_tmp);
698	}
699	IGMP_UNLOCK();
700
701}
702
703/*
704 * Hook for domifdetach.
705 */
706void
707igmp_domifdetach(struct ifnet *ifp)
708{
709
710	CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)",
711	    __func__, ifp, ifp->if_xname);
712
713	IGMP_LOCK();
714	igi_delete_locked(ifp);
715	IGMP_UNLOCK();
716}
717
718static void
719igi_delete_locked(const struct ifnet *ifp)
720{
721	struct igmp_ifsoftc *igi, *tigi;
722
723	CTR3(KTR_IGMPV3, "%s: freeing igmp_ifsoftc for ifp %p(%s)",
724	    __func__, ifp, ifp->if_xname);
725
726	IGMP_LOCK_ASSERT();
727
728	LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) {
729		if (igi->igi_ifp == ifp) {
730			/*
731			 * Free deferred General Query responses.
732			 */
733			mbufq_drain(&igi->igi_gq);
734
735			LIST_REMOVE(igi, igi_link);
736			free(igi, M_IGMP);
737			return;
738		}
739	}
740}
741
742/*
743 * Process a received IGMPv1 query.
744 * Return non-zero if the message should be dropped.
745 *
746 * VIMAGE: The curvnet pointer is derived from the input ifp.
747 */
748static int
749igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip,
750    const struct igmp *igmp)
751{
752	struct ifmultiaddr	*ifma;
753	struct igmp_ifsoftc	*igi;
754	struct in_multi		*inm;
755
756	NET_EPOCH_ASSERT();
757
758	/*
759	 * IGMPv1 Host Mmembership Queries SHOULD always be addressed to
760	 * 224.0.0.1. They are always treated as General Queries.
761	 * igmp_group is always ignored. Do not drop it as a userland
762	 * daemon may wish to see it.
763	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
764	 */
765	if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) {
766		IGMPSTAT_INC(igps_rcv_badqueries);
767		return (0);
768	}
769	IGMPSTAT_INC(igps_rcv_gen_queries);
770
771	IN_MULTI_LIST_LOCK();
772	IGMP_LOCK();
773
774	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
775	KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
776
777	if (igi->igi_flags & IGIF_LOOPBACK) {
778		CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)",
779		    ifp, ifp->if_xname);
780		goto out_locked;
781	}
782
783	/*
784	 * Switch to IGMPv1 host compatibility mode.
785	 */
786	igmp_set_version(igi, IGMP_VERSION_1);
787
788	CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname);
789
790	/*
791	 * Start the timers in all of our group records
792	 * for the interface on which the query arrived,
793	 * except those which are already running.
794	 */
795	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
796		inm = inm_ifmultiaddr_get_inm(ifma);
797		if (inm == NULL)
798			continue;
799		if (inm->inm_timer != 0)
800			continue;
801		switch (inm->inm_state) {
802		case IGMP_NOT_MEMBER:
803		case IGMP_SILENT_MEMBER:
804			break;
805		case IGMP_G_QUERY_PENDING_MEMBER:
806		case IGMP_SG_QUERY_PENDING_MEMBER:
807		case IGMP_REPORTING_MEMBER:
808		case IGMP_IDLE_MEMBER:
809		case IGMP_LAZY_MEMBER:
810		case IGMP_SLEEPING_MEMBER:
811		case IGMP_AWAKENING_MEMBER:
812			inm->inm_state = IGMP_REPORTING_MEMBER;
813			inm->inm_timer = IGMP_RANDOM_DELAY(
814			    IGMP_V1V2_MAX_RI * IGMP_FASTHZ);
815			V_current_state_timers_running = 1;
816			break;
817		case IGMP_LEAVING_MEMBER:
818			break;
819		}
820	}
821
822out_locked:
823	IGMP_UNLOCK();
824	IN_MULTI_LIST_UNLOCK();
825
826	return (0);
827}
828
829/*
830 * Process a received IGMPv2 general or group-specific query.
831 */
832static int
833igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip,
834    const struct igmp *igmp)
835{
836	struct ifmultiaddr	*ifma;
837	struct igmp_ifsoftc	*igi;
838	struct in_multi		*inm;
839	int			 is_general_query;
840	uint16_t		 timer;
841
842	NET_EPOCH_ASSERT();
843
844	is_general_query = 0;
845
846	/*
847	 * Validate address fields upfront.
848	 * XXX SMPng: unlocked increments in igmpstat assumed atomic.
849	 */
850	if (in_nullhost(igmp->igmp_group)) {
851		/*
852		 * IGMPv2 General Query.
853		 * If this was not sent to the all-hosts group, ignore it.
854		 */
855		if (!in_allhosts(ip->ip_dst))
856			return (0);
857		IGMPSTAT_INC(igps_rcv_gen_queries);
858		is_general_query = 1;
859	} else {
860		/* IGMPv2 Group-Specific Query. */
861		IGMPSTAT_INC(igps_rcv_group_queries);
862	}
863
864	IN_MULTI_LIST_LOCK();
865	IGMP_LOCK();
866
867	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
868	KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
869
870	if (igi->igi_flags & IGIF_LOOPBACK) {
871		CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)",
872		    ifp, ifp->if_xname);
873		goto out_locked;
874	}
875
876	/*
877	 * Ignore v2 query if in v1 Compatibility Mode.
878	 */
879	if (igi->igi_version == IGMP_VERSION_1)
880		goto out_locked;
881
882	igmp_set_version(igi, IGMP_VERSION_2);
883
884	timer = igmp->igmp_code * IGMP_FASTHZ / IGMP_TIMER_SCALE;
885	if (timer == 0)
886		timer = 1;
887
888	if (is_general_query) {
889		/*
890		 * For each reporting group joined on this
891		 * interface, kick the report timer.
892		 */
893		CTR2(KTR_IGMPV3, "process v2 general query on ifp %p(%s)",
894		    ifp, ifp->if_xname);
895		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
896			inm = inm_ifmultiaddr_get_inm(ifma);
897			if (inm == NULL)
898				continue;
899			igmp_v2_update_group(inm, timer);
900		}
901	} else {
902		/*
903		 * Group-specific IGMPv2 query, we need only
904		 * look up the single group to process it.
905		 */
906		inm = inm_lookup(ifp, igmp->igmp_group);
907		if (inm != NULL) {
908			CTR3(KTR_IGMPV3,
909			    "process v2 query 0x%08x on ifp %p(%s)",
910			    ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
911			igmp_v2_update_group(inm, timer);
912		}
913	}
914
915out_locked:
916	IGMP_UNLOCK();
917	IN_MULTI_LIST_UNLOCK();
918
919	return (0);
920}
921
922/*
923 * Update the report timer on a group in response to an IGMPv2 query.
924 *
925 * If we are becoming the reporting member for this group, start the timer.
926 * If we already are the reporting member for this group, and timer is
927 * below the threshold, reset it.
928 *
929 * We may be updating the group for the first time since we switched
930 * to IGMPv3. If we are, then we must clear any recorded source lists,
931 * and transition to REPORTING state; the group timer is overloaded
932 * for group and group-source query responses.
933 *
934 * Unlike IGMPv3, the delay per group should be jittered
935 * to avoid bursts of IGMPv2 reports.
936 */
937static void
938igmp_v2_update_group(struct in_multi *inm, const int timer)
939{
940
941	CTR4(KTR_IGMPV3, "0x%08x: %s/%s timer=%d", __func__,
942	    ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname, timer);
943
944	IN_MULTI_LIST_LOCK_ASSERT();
945
946	switch (inm->inm_state) {
947	case IGMP_NOT_MEMBER:
948	case IGMP_SILENT_MEMBER:
949		break;
950	case IGMP_REPORTING_MEMBER:
951		if (inm->inm_timer != 0 &&
952		    inm->inm_timer <= timer) {
953			CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, "
954			    "skipping.", __func__);
955			break;
956		}
957		/* FALLTHROUGH */
958	case IGMP_SG_QUERY_PENDING_MEMBER:
959	case IGMP_G_QUERY_PENDING_MEMBER:
960	case IGMP_IDLE_MEMBER:
961	case IGMP_LAZY_MEMBER:
962	case IGMP_AWAKENING_MEMBER:
963		CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__);
964		inm->inm_state = IGMP_REPORTING_MEMBER;
965		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
966		V_current_state_timers_running = 1;
967		break;
968	case IGMP_SLEEPING_MEMBER:
969		CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__);
970		inm->inm_state = IGMP_AWAKENING_MEMBER;
971		break;
972	case IGMP_LEAVING_MEMBER:
973		break;
974	}
975}
976
977/*
978 * Process a received IGMPv3 general, group-specific or
979 * group-and-source-specific query.
980 * Assumes m has already been pulled up to the full IGMP message length.
981 * Return 0 if successful, otherwise an appropriate error code is returned.
982 */
983static int
984igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip,
985    /*const*/ struct igmpv3 *igmpv3)
986{
987	struct igmp_ifsoftc	*igi;
988	struct in_multi		*inm;
989	int			 is_general_query;
990	uint32_t		 maxresp, nsrc, qqi;
991	uint16_t		 timer;
992	uint8_t			 qrv;
993
994	is_general_query = 0;
995
996	CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname);
997
998	maxresp = igmpv3->igmp_code;	/* in 1/10ths of a second */
999	if (maxresp >= 128) {
1000		maxresp = IGMP_MANT(igmpv3->igmp_code) <<
1001			  (IGMP_EXP(igmpv3->igmp_code) + 3);
1002	}
1003
1004	/*
1005	 * Robustness must never be less than 2 for on-wire IGMPv3.
1006	 * FUTURE: Check if ifp has IGIF_LOOPBACK set, as we will make
1007	 * an exception for interfaces whose IGMPv3 state changes
1008	 * are redirected to loopback (e.g. MANET).
1009	 */
1010	qrv = IGMP_QRV(igmpv3->igmp_misc);
1011	if (qrv < 2) {
1012		CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__,
1013		    qrv, IGMP_RV_INIT);
1014		qrv = IGMP_RV_INIT;
1015	}
1016
1017	qqi = igmpv3->igmp_qqi;
1018	if (qqi >= 128) {
1019		qqi = IGMP_MANT(igmpv3->igmp_qqi) <<
1020		     (IGMP_EXP(igmpv3->igmp_qqi) + 3);
1021	}
1022
1023	timer = maxresp * IGMP_FASTHZ / IGMP_TIMER_SCALE;
1024	if (timer == 0)
1025		timer = 1;
1026
1027	nsrc = ntohs(igmpv3->igmp_numsrc);
1028
1029	/*
1030	 * Validate address fields and versions upfront before
1031	 * accepting v3 query.
1032	 * XXX SMPng: Unlocked access to igmpstat counters here.
1033	 */
1034	if (in_nullhost(igmpv3->igmp_group)) {
1035		/*
1036		 * IGMPv3 General Query.
1037		 *
1038		 * General Queries SHOULD be directed to 224.0.0.1.
1039		 * A general query with a source list has undefined
1040		 * behaviour; discard it.
1041		 */
1042		IGMPSTAT_INC(igps_rcv_gen_queries);
1043		if (!in_allhosts(ip->ip_dst) || nsrc > 0) {
1044			IGMPSTAT_INC(igps_rcv_badqueries);
1045			return (0);
1046		}
1047		is_general_query = 1;
1048	} else {
1049		/* Group or group-source specific query. */
1050		if (nsrc == 0)
1051			IGMPSTAT_INC(igps_rcv_group_queries);
1052		else
1053			IGMPSTAT_INC(igps_rcv_gsr_queries);
1054	}
1055
1056	IN_MULTI_LIST_LOCK();
1057	IGMP_LOCK();
1058
1059	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
1060	KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
1061
1062	if (igi->igi_flags & IGIF_LOOPBACK) {
1063		CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)",
1064		    ifp, ifp->if_xname);
1065		goto out_locked;
1066	}
1067
1068	/*
1069	 * Discard the v3 query if we're in Compatibility Mode.
1070	 * The RFC is not obviously worded that hosts need to stay in
1071	 * compatibility mode until the Old Version Querier Present
1072	 * timer expires.
1073	 */
1074	if (igi->igi_version != IGMP_VERSION_3) {
1075		CTR3(KTR_IGMPV3, "ignore v3 query in v%d mode on ifp %p(%s)",
1076		    igi->igi_version, ifp, ifp->if_xname);
1077		goto out_locked;
1078	}
1079
1080	igmp_set_version(igi, IGMP_VERSION_3);
1081	igi->igi_rv = qrv;
1082	igi->igi_qi = qqi;
1083	igi->igi_qri = maxresp;
1084
1085	CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi,
1086	    maxresp);
1087
1088	if (is_general_query) {
1089		/*
1090		 * Schedule a current-state report on this ifp for
1091		 * all groups, possibly containing source lists.
1092		 * If there is a pending General Query response
1093		 * scheduled earlier than the selected delay, do
1094		 * not schedule any other reports.
1095		 * Otherwise, reset the interface timer.
1096		 */
1097		CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)",
1098		    ifp, ifp->if_xname);
1099		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) {
1100			igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer);
1101			V_interface_timers_running = 1;
1102		}
1103	} else {
1104		/*
1105		 * Group-source-specific queries are throttled on
1106		 * a per-group basis to defeat denial-of-service attempts.
1107		 * Queries for groups we are not a member of on this
1108		 * link are simply ignored.
1109		 */
1110		inm = inm_lookup(ifp, igmpv3->igmp_group);
1111		if (inm == NULL)
1112			goto out_locked;
1113		if (nsrc > 0) {
1114			if (!ratecheck(&inm->inm_lastgsrtv,
1115			    &V_igmp_gsrdelay)) {
1116				CTR1(KTR_IGMPV3, "%s: GS query throttled.",
1117				    __func__);
1118				IGMPSTAT_INC(igps_drop_gsr_queries);
1119				goto out_locked;
1120			}
1121		}
1122		CTR3(KTR_IGMPV3, "process v3 0x%08x query on ifp %p(%s)",
1123		     ntohl(igmpv3->igmp_group.s_addr), ifp, ifp->if_xname);
1124		/*
1125		 * If there is a pending General Query response
1126		 * scheduled sooner than the selected delay, no
1127		 * further report need be scheduled.
1128		 * Otherwise, prepare to respond to the
1129		 * group-specific or group-and-source query.
1130		 */
1131		if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer)
1132			igmp_input_v3_group_query(inm, igi, timer, igmpv3);
1133	}
1134
1135out_locked:
1136	IGMP_UNLOCK();
1137	IN_MULTI_LIST_UNLOCK();
1138
1139	return (0);
1140}
1141
1142/*
1143 * Process a received IGMPv3 group-specific or group-and-source-specific
1144 * query.
1145 * Return <0 if any error occurred. Currently this is ignored.
1146 */
1147static int
1148igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifsoftc *igi,
1149    int timer, /*const*/ struct igmpv3 *igmpv3)
1150{
1151	int			 retval;
1152	uint16_t		 nsrc;
1153
1154	IN_MULTI_LIST_LOCK_ASSERT();
1155	IGMP_LOCK_ASSERT();
1156
1157	retval = 0;
1158
1159	switch (inm->inm_state) {
1160	case IGMP_NOT_MEMBER:
1161	case IGMP_SILENT_MEMBER:
1162	case IGMP_SLEEPING_MEMBER:
1163	case IGMP_LAZY_MEMBER:
1164	case IGMP_AWAKENING_MEMBER:
1165	case IGMP_IDLE_MEMBER:
1166	case IGMP_LEAVING_MEMBER:
1167		return (retval);
1168		break;
1169	case IGMP_REPORTING_MEMBER:
1170	case IGMP_G_QUERY_PENDING_MEMBER:
1171	case IGMP_SG_QUERY_PENDING_MEMBER:
1172		break;
1173	}
1174
1175	nsrc = ntohs(igmpv3->igmp_numsrc);
1176
1177	/*
1178	 * Deal with group-specific queries upfront.
1179	 * If any group query is already pending, purge any recorded
1180	 * source-list state if it exists, and schedule a query response
1181	 * for this group-specific query.
1182	 */
1183	if (nsrc == 0) {
1184		if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
1185		    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) {
1186			inm_clear_recorded(inm);
1187			timer = min(inm->inm_timer, timer);
1188		}
1189		inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER;
1190		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1191		V_current_state_timers_running = 1;
1192		return (retval);
1193	}
1194
1195	/*
1196	 * Deal with the case where a group-and-source-specific query has
1197	 * been received but a group-specific query is already pending.
1198	 */
1199	if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) {
1200		timer = min(inm->inm_timer, timer);
1201		inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1202		V_current_state_timers_running = 1;
1203		return (retval);
1204	}
1205
1206	/*
1207	 * Finally, deal with the case where a group-and-source-specific
1208	 * query has been received, where a response to a previous g-s-r
1209	 * query exists, or none exists.
1210	 * In this case, we need to parse the source-list which the Querier
1211	 * has provided us with and check if we have any source list filter
1212	 * entries at T1 for these sources. If we do not, there is no need
1213	 * schedule a report and the query may be dropped.
1214	 * If we do, we must record them and schedule a current-state
1215	 * report for those sources.
1216	 * FIXME: Handling source lists larger than 1 mbuf requires that
1217	 * we pass the mbuf chain pointer down to this function, and use
1218	 * m_getptr() to walk the chain.
1219	 */
1220	if (inm->inm_nsrc > 0) {
1221		const struct in_addr	*ap;
1222		int			 i, nrecorded;
1223
1224		ap = (const struct in_addr *)(igmpv3 + 1);
1225		nrecorded = 0;
1226		for (i = 0; i < nsrc; i++, ap++) {
1227			retval = inm_record_source(inm, ap->s_addr);
1228			if (retval < 0)
1229				break;
1230			nrecorded += retval;
1231		}
1232		if (nrecorded > 0) {
1233			CTR1(KTR_IGMPV3,
1234			    "%s: schedule response to SG query", __func__);
1235			inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER;
1236			inm->inm_timer = IGMP_RANDOM_DELAY(timer);
1237			V_current_state_timers_running = 1;
1238		}
1239	}
1240
1241	return (retval);
1242}
1243
1244/*
1245 * Process a received IGMPv1 host membership report.
1246 *
1247 * NOTE: 0.0.0.0 workaround breaks const correctness.
1248 */
1249static int
1250igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip,
1251    /*const*/ struct igmp *igmp)
1252{
1253	struct in_ifaddr *ia;
1254	struct in_multi *inm;
1255
1256	IGMPSTAT_INC(igps_rcv_reports);
1257
1258	if (ifp->if_flags & IFF_LOOPBACK)
1259		return (0);
1260
1261	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1262	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1263		IGMPSTAT_INC(igps_rcv_badreports);
1264		return (EINVAL);
1265	}
1266
1267	/*
1268	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1269	 * Booting clients may use the source address 0.0.0.0. Some
1270	 * IGMP daemons may not know how to use IP_RECVIF to determine
1271	 * the interface upon which this message was received.
1272	 * Replace 0.0.0.0 with the subnet address if told to do so.
1273	 */
1274	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1275		IFP_TO_IA(ifp, ia);
1276		if (ia != NULL)
1277			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1278	}
1279
1280	CTR3(KTR_IGMPV3, "process v1 report 0x%08x on ifp %p(%s)",
1281	     ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
1282
1283	/*
1284	 * IGMPv1 report suppression.
1285	 * If we are a member of this group, and our membership should be
1286	 * reported, stop our group timer and transition to the 'lazy' state.
1287	 */
1288	IN_MULTI_LIST_LOCK();
1289	inm = inm_lookup(ifp, igmp->igmp_group);
1290	if (inm != NULL) {
1291		struct igmp_ifsoftc *igi;
1292
1293		igi = inm->inm_igi;
1294		if (igi == NULL) {
1295			KASSERT(igi != NULL,
1296			    ("%s: no igi for ifp %p", __func__, ifp));
1297			goto out_locked;
1298		}
1299
1300		IGMPSTAT_INC(igps_rcv_ourreports);
1301
1302		/*
1303		 * If we are in IGMPv3 host mode, do not allow the
1304		 * other host's IGMPv1 report to suppress our reports
1305		 * unless explicitly configured to do so.
1306		 */
1307		if (igi->igi_version == IGMP_VERSION_3) {
1308			if (V_igmp_legacysupp)
1309				igmp_v3_suppress_group_record(inm);
1310			goto out_locked;
1311		}
1312
1313		inm->inm_timer = 0;
1314
1315		switch (inm->inm_state) {
1316		case IGMP_NOT_MEMBER:
1317		case IGMP_SILENT_MEMBER:
1318			break;
1319		case IGMP_IDLE_MEMBER:
1320		case IGMP_LAZY_MEMBER:
1321		case IGMP_AWAKENING_MEMBER:
1322			CTR3(KTR_IGMPV3,
1323			    "report suppressed for 0x%08x on ifp %p(%s)",
1324			    ntohl(igmp->igmp_group.s_addr), ifp,
1325			    ifp->if_xname);
1326		case IGMP_SLEEPING_MEMBER:
1327			inm->inm_state = IGMP_SLEEPING_MEMBER;
1328			break;
1329		case IGMP_REPORTING_MEMBER:
1330			CTR3(KTR_IGMPV3,
1331			    "report suppressed for 0x%08x on ifp %p(%s)",
1332			    ntohl(igmp->igmp_group.s_addr), ifp,
1333			    ifp->if_xname);
1334			if (igi->igi_version == IGMP_VERSION_1)
1335				inm->inm_state = IGMP_LAZY_MEMBER;
1336			else if (igi->igi_version == IGMP_VERSION_2)
1337				inm->inm_state = IGMP_SLEEPING_MEMBER;
1338			break;
1339		case IGMP_G_QUERY_PENDING_MEMBER:
1340		case IGMP_SG_QUERY_PENDING_MEMBER:
1341		case IGMP_LEAVING_MEMBER:
1342			break;
1343		}
1344	}
1345
1346out_locked:
1347	IN_MULTI_LIST_UNLOCK();
1348
1349	return (0);
1350}
1351
1352/*
1353 * Process a received IGMPv2 host membership report.
1354 *
1355 * NOTE: 0.0.0.0 workaround breaks const correctness.
1356 */
1357static int
1358igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip,
1359    /*const*/ struct igmp *igmp)
1360{
1361	struct in_ifaddr *ia;
1362	struct in_multi *inm;
1363
1364	/*
1365	 * Make sure we don't hear our own membership report.  Fast
1366	 * leave requires knowing that we are the only member of a
1367	 * group.
1368	 */
1369	IFP_TO_IA(ifp, ia);
1370	if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) {
1371		return (0);
1372	}
1373
1374	IGMPSTAT_INC(igps_rcv_reports);
1375
1376	if (ifp->if_flags & IFF_LOOPBACK) {
1377		return (0);
1378	}
1379
1380	if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) ||
1381	    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
1382		IGMPSTAT_INC(igps_rcv_badreports);
1383		return (EINVAL);
1384	}
1385
1386	/*
1387	 * RFC 3376, Section 4.2.13, 9.2, 9.3:
1388	 * Booting clients may use the source address 0.0.0.0. Some
1389	 * IGMP daemons may not know how to use IP_RECVIF to determine
1390	 * the interface upon which this message was received.
1391	 * Replace 0.0.0.0 with the subnet address if told to do so.
1392	 */
1393	if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) {
1394		if (ia != NULL)
1395			ip->ip_src.s_addr = htonl(ia->ia_subnet);
1396	}
1397
1398	CTR3(KTR_IGMPV3, "process v2 report 0x%08x on ifp %p(%s)",
1399	     ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
1400
1401	/*
1402	 * IGMPv2 report suppression.
1403	 * If we are a member of this group, and our membership should be
1404	 * reported, and our group timer is pending or about to be reset,
1405	 * stop our group timer by transitioning to the 'lazy' state.
1406	 */
1407	IN_MULTI_LIST_LOCK();
1408	inm = inm_lookup(ifp, igmp->igmp_group);
1409	if (inm != NULL) {
1410		struct igmp_ifsoftc *igi;
1411
1412		igi = inm->inm_igi;
1413		KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp));
1414
1415		IGMPSTAT_INC(igps_rcv_ourreports);
1416
1417		/*
1418		 * If we are in IGMPv3 host mode, do not allow the
1419		 * other host's IGMPv1 report to suppress our reports
1420		 * unless explicitly configured to do so.
1421		 */
1422		if (igi->igi_version == IGMP_VERSION_3) {
1423			if (V_igmp_legacysupp)
1424				igmp_v3_suppress_group_record(inm);
1425			goto out_locked;
1426		}
1427
1428		inm->inm_timer = 0;
1429
1430		switch (inm->inm_state) {
1431		case IGMP_NOT_MEMBER:
1432		case IGMP_SILENT_MEMBER:
1433		case IGMP_SLEEPING_MEMBER:
1434			break;
1435		case IGMP_REPORTING_MEMBER:
1436		case IGMP_IDLE_MEMBER:
1437		case IGMP_AWAKENING_MEMBER:
1438			CTR3(KTR_IGMPV3,
1439			    "report suppressed for 0x%08x on ifp %p(%s)",
1440			    ntohl(igmp->igmp_group.s_addr), ifp, ifp->if_xname);
1441		case IGMP_LAZY_MEMBER:
1442			inm->inm_state = IGMP_LAZY_MEMBER;
1443			break;
1444		case IGMP_G_QUERY_PENDING_MEMBER:
1445		case IGMP_SG_QUERY_PENDING_MEMBER:
1446		case IGMP_LEAVING_MEMBER:
1447			break;
1448		}
1449	}
1450
1451out_locked:
1452	IN_MULTI_LIST_UNLOCK();
1453
1454	return (0);
1455}
1456
1457int
1458igmp_input(struct mbuf **mp, int *offp, int proto)
1459{
1460	int iphlen;
1461	struct ifnet *ifp;
1462	struct igmp *igmp;
1463	struct ip *ip;
1464	struct mbuf *m;
1465	int igmplen;
1466	int minlen;
1467	int queryver;
1468
1469	CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, *mp, *offp);
1470
1471	m = *mp;
1472	ifp = m->m_pkthdr.rcvif;
1473	*mp = NULL;
1474
1475	IGMPSTAT_INC(igps_rcv_total);
1476
1477	ip = mtod(m, struct ip *);
1478	iphlen = *offp;
1479	igmplen = ntohs(ip->ip_len) - iphlen;
1480
1481	/*
1482	 * Validate lengths.
1483	 */
1484	if (igmplen < IGMP_MINLEN) {
1485		IGMPSTAT_INC(igps_rcv_tooshort);
1486		m_freem(m);
1487		return (IPPROTO_DONE);
1488	}
1489
1490	/*
1491	 * Always pullup to the minimum size for v1/v2 or v3
1492	 * to amortize calls to m_pullup().
1493	 */
1494	minlen = iphlen;
1495	if (igmplen >= IGMP_V3_QUERY_MINLEN)
1496		minlen += IGMP_V3_QUERY_MINLEN;
1497	else
1498		minlen += IGMP_MINLEN;
1499	if ((!M_WRITABLE(m) || m->m_len < minlen) &&
1500	    (m = m_pullup(m, minlen)) == NULL) {
1501		IGMPSTAT_INC(igps_rcv_tooshort);
1502		return (IPPROTO_DONE);
1503	}
1504	ip = mtod(m, struct ip *);
1505
1506	/*
1507	 * Validate checksum.
1508	 */
1509	m->m_data += iphlen;
1510	m->m_len -= iphlen;
1511	igmp = mtod(m, struct igmp *);
1512	if (in_cksum(m, igmplen)) {
1513		IGMPSTAT_INC(igps_rcv_badsum);
1514		m_freem(m);
1515		return (IPPROTO_DONE);
1516	}
1517	m->m_data -= iphlen;
1518	m->m_len += iphlen;
1519
1520	/*
1521	 * IGMP control traffic is link-scope, and must have a TTL of 1.
1522	 * DVMRP traffic (e.g. mrinfo, mtrace) is an exception;
1523	 * probe packets may come from beyond the LAN.
1524	 */
1525	if (igmp->igmp_type != IGMP_DVMRP && ip->ip_ttl != 1) {
1526		IGMPSTAT_INC(igps_rcv_badttl);
1527		m_freem(m);
1528		return (IPPROTO_DONE);
1529	}
1530
1531	switch (igmp->igmp_type) {
1532	case IGMP_HOST_MEMBERSHIP_QUERY:
1533		if (igmplen == IGMP_MINLEN) {
1534			if (igmp->igmp_code == 0)
1535				queryver = IGMP_VERSION_1;
1536			else
1537				queryver = IGMP_VERSION_2;
1538		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
1539			queryver = IGMP_VERSION_3;
1540		} else {
1541			IGMPSTAT_INC(igps_rcv_tooshort);
1542			m_freem(m);
1543			return (IPPROTO_DONE);
1544		}
1545
1546		switch (queryver) {
1547		case IGMP_VERSION_1:
1548			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1549			if (!V_igmp_v1enable)
1550				break;
1551			if (igmp_input_v1_query(ifp, ip, igmp) != 0) {
1552				m_freem(m);
1553				return (IPPROTO_DONE);
1554			}
1555			break;
1556
1557		case IGMP_VERSION_2:
1558			IGMPSTAT_INC(igps_rcv_v1v2_queries);
1559			if (!V_igmp_v2enable)
1560				break;
1561			if (igmp_input_v2_query(ifp, ip, igmp) != 0) {
1562				m_freem(m);
1563				return (IPPROTO_DONE);
1564			}
1565			break;
1566
1567		case IGMP_VERSION_3: {
1568				struct igmpv3 *igmpv3;
1569				uint16_t igmpv3len;
1570				uint16_t nsrc;
1571
1572				IGMPSTAT_INC(igps_rcv_v3_queries);
1573				igmpv3 = (struct igmpv3 *)igmp;
1574				/*
1575				 * Validate length based on source count.
1576				 */
1577				nsrc = ntohs(igmpv3->igmp_numsrc);
1578				if (nsrc * sizeof(in_addr_t) >
1579				    UINT16_MAX - iphlen - IGMP_V3_QUERY_MINLEN) {
1580					IGMPSTAT_INC(igps_rcv_tooshort);
1581					m_freem(m);
1582					return (IPPROTO_DONE);
1583				}
1584				/*
1585				 * m_pullup() may modify m, so pullup in
1586				 * this scope.
1587				 */
1588				igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN +
1589				   sizeof(struct in_addr) * nsrc;
1590				if ((!M_WRITABLE(m) ||
1591				     m->m_len < igmpv3len) &&
1592				    (m = m_pullup(m, igmpv3len)) == NULL) {
1593					IGMPSTAT_INC(igps_rcv_tooshort);
1594					return (IPPROTO_DONE);
1595				}
1596				igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *)
1597				    + iphlen);
1598				if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) {
1599					m_freem(m);
1600					return (IPPROTO_DONE);
1601				}
1602			}
1603			break;
1604		}
1605		break;
1606
1607	case IGMP_v1_HOST_MEMBERSHIP_REPORT:
1608		if (!V_igmp_v1enable)
1609			break;
1610		if (igmp_input_v1_report(ifp, ip, igmp) != 0) {
1611			m_freem(m);
1612			return (IPPROTO_DONE);
1613		}
1614		break;
1615
1616	case IGMP_v2_HOST_MEMBERSHIP_REPORT:
1617		if (!V_igmp_v2enable)
1618			break;
1619		if (!ip_checkrouteralert(m))
1620			IGMPSTAT_INC(igps_rcv_nora);
1621		if (igmp_input_v2_report(ifp, ip, igmp) != 0) {
1622			m_freem(m);
1623			return (IPPROTO_DONE);
1624		}
1625		break;
1626
1627	case IGMP_v3_HOST_MEMBERSHIP_REPORT:
1628		/*
1629		 * Hosts do not need to process IGMPv3 membership reports,
1630		 * as report suppression is no longer required.
1631		 */
1632		if (!ip_checkrouteralert(m))
1633			IGMPSTAT_INC(igps_rcv_nora);
1634		break;
1635
1636	default:
1637		break;
1638	}
1639
1640	/*
1641	 * Pass all valid IGMP packets up to any process(es) listening on a
1642	 * raw IGMP socket.
1643	 */
1644	*mp = m;
1645	return (rip_input(mp, offp, proto));
1646}
1647
1648/*
1649 * Fast timeout handler (global).
1650 * VIMAGE: Timeout handlers are expected to service all vimages.
1651 */
1652static struct callout igmpfast_callout;
1653static void
1654igmp_fasttimo(void *arg __unused)
1655{
1656	struct epoch_tracker et;
1657	VNET_ITERATOR_DECL(vnet_iter);
1658
1659	NET_EPOCH_ENTER(et);
1660	VNET_LIST_RLOCK_NOSLEEP();
1661	VNET_FOREACH(vnet_iter) {
1662		CURVNET_SET(vnet_iter);
1663		igmp_fasttimo_vnet();
1664		CURVNET_RESTORE();
1665	}
1666	VNET_LIST_RUNLOCK_NOSLEEP();
1667	NET_EPOCH_EXIT(et);
1668
1669	callout_reset(&igmpfast_callout, hz / IGMP_FASTHZ, igmp_fasttimo, NULL);
1670}
1671
1672/*
1673 * Fast timeout handler (per-vnet).
1674 *
1675 * VIMAGE: Assume caller has set up our curvnet.
1676 */
1677static void
1678igmp_fasttimo_vnet(void)
1679{
1680	struct mbufq		 scq;	/* State-change packets */
1681	struct mbufq		 qrq;	/* Query response packets */
1682	struct ifnet		*ifp;
1683	struct igmp_ifsoftc	*igi;
1684	struct ifmultiaddr	*ifma;
1685	struct in_multi		*inm;
1686	struct in_multi_head inm_free_tmp;
1687	int			 loop, uri_fasthz;
1688
1689	loop = 0;
1690	uri_fasthz = 0;
1691
1692	/*
1693	 * Quick check to see if any work needs to be done, in order to
1694	 * minimize the overhead of fasttimo processing.
1695	 * SMPng: XXX Unlocked reads.
1696	 */
1697	if (!V_current_state_timers_running &&
1698	    !V_interface_timers_running &&
1699	    !V_state_change_timers_running)
1700		return;
1701
1702	SLIST_INIT(&inm_free_tmp);
1703	IN_MULTI_LIST_LOCK();
1704	IGMP_LOCK();
1705
1706	/*
1707	 * IGMPv3 General Query response timer processing.
1708	 */
1709	if (V_interface_timers_running) {
1710		CTR1(KTR_IGMPV3, "%s: interface timers running", __func__);
1711
1712		V_interface_timers_running = 0;
1713		LIST_FOREACH(igi, &V_igi_head, igi_link) {
1714			if (igi->igi_v3_timer == 0) {
1715				/* Do nothing. */
1716			} else if (--igi->igi_v3_timer == 0) {
1717				igmp_v3_dispatch_general_query(igi);
1718			} else {
1719				V_interface_timers_running = 1;
1720			}
1721		}
1722	}
1723
1724	if (!V_current_state_timers_running &&
1725	    !V_state_change_timers_running)
1726		goto out_locked;
1727
1728	V_current_state_timers_running = 0;
1729	V_state_change_timers_running = 0;
1730
1731	CTR1(KTR_IGMPV3, "%s: state change timers running", __func__);
1732
1733	/*
1734	 * IGMPv1/v2/v3 host report and state-change timer processing.
1735	 * Note: Processing a v3 group timer may remove a node.
1736	 */
1737	LIST_FOREACH(igi, &V_igi_head, igi_link) {
1738		ifp = igi->igi_ifp;
1739
1740		if (igi->igi_version == IGMP_VERSION_3) {
1741			loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
1742			uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri *
1743			    IGMP_FASTHZ);
1744			mbufq_init(&qrq, IGMP_MAX_G_GS_PACKETS);
1745			mbufq_init(&scq, IGMP_MAX_STATE_CHANGE_PACKETS);
1746		}
1747
1748		IF_ADDR_WLOCK(ifp);
1749		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1750			inm = inm_ifmultiaddr_get_inm(ifma);
1751			if (inm == NULL)
1752				continue;
1753			switch (igi->igi_version) {
1754			case IGMP_VERSION_1:
1755			case IGMP_VERSION_2:
1756				igmp_v1v2_process_group_timer(inm,
1757				    igi->igi_version);
1758				break;
1759			case IGMP_VERSION_3:
1760				igmp_v3_process_group_timers(&inm_free_tmp, &qrq,
1761				    &scq, inm, uri_fasthz);
1762				break;
1763			}
1764		}
1765		IF_ADDR_WUNLOCK(ifp);
1766
1767		if (igi->igi_version == IGMP_VERSION_3) {
1768			igmp_dispatch_queue(&qrq, 0, loop);
1769			igmp_dispatch_queue(&scq, 0, loop);
1770
1771			/*
1772			 * Free the in_multi reference(s) for this
1773			 * IGMP lifecycle.
1774			 */
1775			inm_release_list_deferred(&inm_free_tmp);
1776		}
1777	}
1778
1779out_locked:
1780	IGMP_UNLOCK();
1781	IN_MULTI_LIST_UNLOCK();
1782}
1783
1784/*
1785 * Update host report group timer for IGMPv1/v2.
1786 * Will update the global pending timer flags.
1787 */
1788static void
1789igmp_v1v2_process_group_timer(struct in_multi *inm, const int version)
1790{
1791	int report_timer_expired;
1792
1793	IN_MULTI_LIST_LOCK_ASSERT();
1794	IGMP_LOCK_ASSERT();
1795
1796	if (inm->inm_timer == 0) {
1797		report_timer_expired = 0;
1798	} else if (--inm->inm_timer == 0) {
1799		report_timer_expired = 1;
1800	} else {
1801		V_current_state_timers_running = 1;
1802		return;
1803	}
1804
1805	switch (inm->inm_state) {
1806	case IGMP_NOT_MEMBER:
1807	case IGMP_SILENT_MEMBER:
1808	case IGMP_IDLE_MEMBER:
1809	case IGMP_LAZY_MEMBER:
1810	case IGMP_SLEEPING_MEMBER:
1811	case IGMP_AWAKENING_MEMBER:
1812		break;
1813	case IGMP_REPORTING_MEMBER:
1814		if (report_timer_expired) {
1815			inm->inm_state = IGMP_IDLE_MEMBER;
1816			(void)igmp_v1v2_queue_report(inm,
1817			    (version == IGMP_VERSION_2) ?
1818			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
1819			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
1820		}
1821		break;
1822	case IGMP_G_QUERY_PENDING_MEMBER:
1823	case IGMP_SG_QUERY_PENDING_MEMBER:
1824	case IGMP_LEAVING_MEMBER:
1825		break;
1826	}
1827}
1828
1829/*
1830 * Update a group's timers for IGMPv3.
1831 * Will update the global pending timer flags.
1832 * Note: Unlocked read from igi.
1833 */
1834static void
1835igmp_v3_process_group_timers(struct in_multi_head *inmh,
1836    struct mbufq *qrq, struct mbufq *scq,
1837    struct in_multi *inm, const int uri_fasthz)
1838{
1839	int query_response_timer_expired;
1840	int state_change_retransmit_timer_expired;
1841
1842	IN_MULTI_LIST_LOCK_ASSERT();
1843	IGMP_LOCK_ASSERT();
1844
1845	query_response_timer_expired = 0;
1846	state_change_retransmit_timer_expired = 0;
1847
1848	/*
1849	 * During a transition from v1/v2 compatibility mode back to v3,
1850	 * a group record in REPORTING state may still have its group
1851	 * timer active. This is a no-op in this function; it is easier
1852	 * to deal with it here than to complicate the slow-timeout path.
1853	 */
1854	if (inm->inm_timer == 0) {
1855		query_response_timer_expired = 0;
1856	} else if (--inm->inm_timer == 0) {
1857		query_response_timer_expired = 1;
1858	} else {
1859		V_current_state_timers_running = 1;
1860	}
1861
1862	if (inm->inm_sctimer == 0) {
1863		state_change_retransmit_timer_expired = 0;
1864	} else if (--inm->inm_sctimer == 0) {
1865		state_change_retransmit_timer_expired = 1;
1866	} else {
1867		V_state_change_timers_running = 1;
1868	}
1869
1870	/* We are in fasttimo, so be quick about it. */
1871	if (!state_change_retransmit_timer_expired &&
1872	    !query_response_timer_expired)
1873		return;
1874
1875	switch (inm->inm_state) {
1876	case IGMP_NOT_MEMBER:
1877	case IGMP_SILENT_MEMBER:
1878	case IGMP_SLEEPING_MEMBER:
1879	case IGMP_LAZY_MEMBER:
1880	case IGMP_AWAKENING_MEMBER:
1881	case IGMP_IDLE_MEMBER:
1882		break;
1883	case IGMP_G_QUERY_PENDING_MEMBER:
1884	case IGMP_SG_QUERY_PENDING_MEMBER:
1885		/*
1886		 * Respond to a previously pending Group-Specific
1887		 * or Group-and-Source-Specific query by enqueueing
1888		 * the appropriate Current-State report for
1889		 * immediate transmission.
1890		 */
1891		if (query_response_timer_expired) {
1892			int retval __unused;
1893
1894			retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1,
1895			    (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER));
1896			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
1897			    __func__, retval);
1898			inm->inm_state = IGMP_REPORTING_MEMBER;
1899			/* XXX Clear recorded sources for next time. */
1900			inm_clear_recorded(inm);
1901		}
1902		/* FALLTHROUGH */
1903	case IGMP_REPORTING_MEMBER:
1904	case IGMP_LEAVING_MEMBER:
1905		if (state_change_retransmit_timer_expired) {
1906			/*
1907			 * State-change retransmission timer fired.
1908			 * If there are any further pending retransmissions,
1909			 * set the global pending state-change flag, and
1910			 * reset the timer.
1911			 */
1912			if (--inm->inm_scrv > 0) {
1913				inm->inm_sctimer = uri_fasthz;
1914				V_state_change_timers_running = 1;
1915			}
1916			/*
1917			 * Retransmit the previously computed state-change
1918			 * report. If there are no further pending
1919			 * retransmissions, the mbuf queue will be consumed.
1920			 * Update T0 state to T1 as we have now sent
1921			 * a state-change.
1922			 */
1923			(void)igmp_v3_merge_state_changes(inm, scq);
1924
1925			inm_commit(inm);
1926			CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
1927			    ntohl(inm->inm_addr.s_addr),
1928			    inm->inm_ifp->if_xname);
1929
1930			/*
1931			 * If we are leaving the group for good, make sure
1932			 * we release IGMP's reference to it.
1933			 * This release must be deferred using a SLIST,
1934			 * as we are called from a loop which traverses
1935			 * the in_ifmultiaddr TAILQ.
1936			 */
1937			if (inm->inm_state == IGMP_LEAVING_MEMBER &&
1938			    inm->inm_scrv == 0) {
1939				inm->inm_state = IGMP_NOT_MEMBER;
1940				inm_rele_locked(inmh, inm);
1941			}
1942		}
1943		break;
1944	}
1945}
1946
1947/*
1948 * Suppress a group's pending response to a group or source/group query.
1949 *
1950 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency.
1951 * Do NOT update ST1/ST0 as this operation merely suppresses
1952 * the currently pending group record.
1953 * Do NOT suppress the response to a general query. It is possible but
1954 * it would require adding another state or flag.
1955 */
1956static void
1957igmp_v3_suppress_group_record(struct in_multi *inm)
1958{
1959
1960	IN_MULTI_LIST_LOCK_ASSERT();
1961
1962	KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3,
1963		("%s: not IGMPv3 mode on link", __func__));
1964
1965	if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER ||
1966	    inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER)
1967		return;
1968
1969	if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
1970		inm_clear_recorded(inm);
1971
1972	inm->inm_timer = 0;
1973	inm->inm_state = IGMP_REPORTING_MEMBER;
1974}
1975
1976/*
1977 * Switch to a different IGMP version on the given interface,
1978 * as per Section 7.2.1.
1979 */
1980static void
1981igmp_set_version(struct igmp_ifsoftc *igi, const int version)
1982{
1983	int old_version_timer;
1984
1985	IGMP_LOCK_ASSERT();
1986
1987	CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__,
1988	    version, igi->igi_ifp, igi->igi_ifp->if_xname);
1989
1990	if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) {
1991		/*
1992		 * Compute the "Older Version Querier Present" timer as per
1993		 * Section 8.12.
1994		 */
1995		old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri;
1996		old_version_timer *= IGMP_SLOWHZ;
1997
1998		if (version == IGMP_VERSION_1) {
1999			igi->igi_v1_timer = old_version_timer;
2000			igi->igi_v2_timer = 0;
2001		} else if (version == IGMP_VERSION_2) {
2002			igi->igi_v1_timer = 0;
2003			igi->igi_v2_timer = old_version_timer;
2004		}
2005	}
2006
2007	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2008		if (igi->igi_version != IGMP_VERSION_2) {
2009			igi->igi_version = IGMP_VERSION_2;
2010			igmp_v3_cancel_link_timers(igi);
2011		}
2012	} else if (igi->igi_v1_timer > 0) {
2013		if (igi->igi_version != IGMP_VERSION_1) {
2014			igi->igi_version = IGMP_VERSION_1;
2015			igmp_v3_cancel_link_timers(igi);
2016		}
2017	}
2018}
2019
2020/*
2021 * Cancel pending IGMPv3 timers for the given link and all groups
2022 * joined on it; state-change, general-query, and group-query timers.
2023 *
2024 * Only ever called on a transition from v3 to Compatibility mode. Kill
2025 * the timers stone dead (this may be expensive for large N groups), they
2026 * will be restarted if Compatibility Mode deems that they must be due to
2027 * query processing.
2028 */
2029static void
2030igmp_v3_cancel_link_timers(struct igmp_ifsoftc *igi)
2031{
2032	struct ifmultiaddr	*ifma;
2033	struct ifnet		*ifp;
2034	struct in_multi		*inm;
2035	struct in_multi_head inm_free_tmp;
2036
2037	CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__,
2038	    igi->igi_ifp, igi->igi_ifp->if_xname);
2039
2040	IN_MULTI_LIST_LOCK_ASSERT();
2041	IGMP_LOCK_ASSERT();
2042	NET_EPOCH_ASSERT();
2043
2044	SLIST_INIT(&inm_free_tmp);
2045
2046	/*
2047	 * Stop the v3 General Query Response on this link stone dead.
2048	 * If fasttimo is woken up due to V_interface_timers_running,
2049	 * the flag will be cleared if there are no pending link timers.
2050	 */
2051	igi->igi_v3_timer = 0;
2052
2053	/*
2054	 * Now clear the current-state and state-change report timers
2055	 * for all memberships scoped to this link.
2056	 */
2057	ifp = igi->igi_ifp;
2058	IF_ADDR_WLOCK(ifp);
2059	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2060		inm = inm_ifmultiaddr_get_inm(ifma);
2061		if (inm == NULL)
2062			continue;
2063		switch (inm->inm_state) {
2064		case IGMP_NOT_MEMBER:
2065		case IGMP_SILENT_MEMBER:
2066		case IGMP_IDLE_MEMBER:
2067		case IGMP_LAZY_MEMBER:
2068		case IGMP_SLEEPING_MEMBER:
2069		case IGMP_AWAKENING_MEMBER:
2070			/*
2071			 * These states are either not relevant in v3 mode,
2072			 * or are unreported. Do nothing.
2073			 */
2074			break;
2075		case IGMP_LEAVING_MEMBER:
2076			/*
2077			 * If we are leaving the group and switching to
2078			 * compatibility mode, we need to release the final
2079			 * reference held for issuing the INCLUDE {}, and
2080			 * transition to REPORTING to ensure the host leave
2081			 * message is sent upstream to the old querier --
2082			 * transition to NOT would lose the leave and race.
2083			 */
2084			inm_rele_locked(&inm_free_tmp, inm);
2085			/* FALLTHROUGH */
2086		case IGMP_G_QUERY_PENDING_MEMBER:
2087		case IGMP_SG_QUERY_PENDING_MEMBER:
2088			inm_clear_recorded(inm);
2089			/* FALLTHROUGH */
2090		case IGMP_REPORTING_MEMBER:
2091			inm->inm_state = IGMP_REPORTING_MEMBER;
2092			break;
2093		}
2094		/*
2095		 * Always clear state-change and group report timers.
2096		 * Free any pending IGMPv3 state-change records.
2097		 */
2098		inm->inm_sctimer = 0;
2099		inm->inm_timer = 0;
2100		mbufq_drain(&inm->inm_scq);
2101	}
2102	IF_ADDR_WUNLOCK(ifp);
2103
2104	inm_release_list_deferred(&inm_free_tmp);
2105}
2106
2107/*
2108 * Update the Older Version Querier Present timers for a link.
2109 * See Section 7.2.1 of RFC 3376.
2110 */
2111static void
2112igmp_v1v2_process_querier_timers(struct igmp_ifsoftc *igi)
2113{
2114
2115	IGMP_LOCK_ASSERT();
2116
2117	if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) {
2118		/*
2119		 * IGMPv1 and IGMPv2 Querier Present timers expired.
2120		 *
2121		 * Revert to IGMPv3.
2122		 */
2123		if (V_igmp_default_version == IGMP_VERSION_3 &&
2124		    igi->igi_version != IGMP_VERSION_3) {
2125			CTR5(KTR_IGMPV3,
2126			    "%s: transition from v%d -> v%d on %p(%s)",
2127			    __func__, igi->igi_version, IGMP_VERSION_3,
2128			    igi->igi_ifp, igi->igi_ifp->if_xname);
2129			igi->igi_version = IGMP_VERSION_3;
2130		}
2131	} else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) {
2132		/*
2133		 * IGMPv1 Querier Present timer expired,
2134		 * IGMPv2 Querier Present timer running.
2135		 * If IGMPv2 was disabled since last timeout,
2136		 * revert to IGMPv3.
2137		 * If IGMPv2 is enabled, revert to IGMPv2.
2138		 */
2139		if (V_igmp_default_version == IGMP_VERSION_3 &&
2140		    !V_igmp_v2enable) {
2141			CTR5(KTR_IGMPV3,
2142			    "%s: transition from v%d -> v%d on %p(%s)",
2143			    __func__, igi->igi_version, IGMP_VERSION_3,
2144			    igi->igi_ifp, igi->igi_ifp->if_xname);
2145			igi->igi_v2_timer = 0;
2146			igi->igi_version = IGMP_VERSION_3;
2147		} else {
2148			--igi->igi_v2_timer;
2149			if (V_igmp_default_version == IGMP_VERSION_2 &&
2150			    igi->igi_version != IGMP_VERSION_2) {
2151				CTR5(KTR_IGMPV3,
2152				    "%s: transition from v%d -> v%d on %p(%s)",
2153				    __func__, igi->igi_version, IGMP_VERSION_2,
2154				    igi->igi_ifp, igi->igi_ifp->if_xname);
2155				igi->igi_version = IGMP_VERSION_2;
2156				igmp_v3_cancel_link_timers(igi);
2157			}
2158		}
2159	} else if (igi->igi_v1_timer > 0) {
2160		/*
2161		 * IGMPv1 Querier Present timer running.
2162		 * Stop IGMPv2 timer if running.
2163		 *
2164		 * If IGMPv1 was disabled since last timeout,
2165		 * revert to IGMPv3.
2166		 * If IGMPv1 is enabled, reset IGMPv2 timer if running.
2167		 */
2168		if (V_igmp_default_version == IGMP_VERSION_3 &&
2169		    !V_igmp_v1enable) {
2170			CTR5(KTR_IGMPV3,
2171			    "%s: transition from v%d -> v%d on %p(%s)",
2172			    __func__, igi->igi_version, IGMP_VERSION_3,
2173			    igi->igi_ifp, igi->igi_ifp->if_xname);
2174			igi->igi_v1_timer = 0;
2175			igi->igi_version = IGMP_VERSION_3;
2176		} else {
2177			--igi->igi_v1_timer;
2178		}
2179		if (igi->igi_v2_timer > 0) {
2180			CTR3(KTR_IGMPV3,
2181			    "%s: cancel v2 timer on %p(%s)",
2182			    __func__, igi->igi_ifp, igi->igi_ifp->if_xname);
2183			igi->igi_v2_timer = 0;
2184		}
2185	}
2186}
2187
2188/*
2189 * Global slowtimo handler.
2190 * VIMAGE: Timeout handlers are expected to service all vimages.
2191 */
2192static struct callout igmpslow_callout;
2193static void
2194igmp_slowtimo(void *arg __unused)
2195{
2196	struct epoch_tracker et;
2197	VNET_ITERATOR_DECL(vnet_iter);
2198
2199	NET_EPOCH_ENTER(et);
2200	VNET_LIST_RLOCK_NOSLEEP();
2201	VNET_FOREACH(vnet_iter) {
2202		CURVNET_SET(vnet_iter);
2203		igmp_slowtimo_vnet();
2204		CURVNET_RESTORE();
2205	}
2206	VNET_LIST_RUNLOCK_NOSLEEP();
2207	NET_EPOCH_EXIT(et);
2208
2209	callout_reset(&igmpslow_callout, hz / IGMP_SLOWHZ, igmp_slowtimo, NULL);
2210}
2211
2212/*
2213 * Per-vnet slowtimo handler.
2214 */
2215static void
2216igmp_slowtimo_vnet(void)
2217{
2218	struct igmp_ifsoftc *igi;
2219
2220	IGMP_LOCK();
2221
2222	LIST_FOREACH(igi, &V_igi_head, igi_link) {
2223		igmp_v1v2_process_querier_timers(igi);
2224	}
2225
2226	IGMP_UNLOCK();
2227}
2228
2229/*
2230 * Dispatch an IGMPv1/v2 host report or leave message.
2231 * These are always small enough to fit inside a single mbuf.
2232 */
2233static int
2234igmp_v1v2_queue_report(struct in_multi *inm, const int type)
2235{
2236	struct epoch_tracker 	et;
2237	struct ifnet		*ifp;
2238	struct igmp		*igmp;
2239	struct ip		*ip;
2240	struct mbuf		*m;
2241
2242	IN_MULTI_LIST_LOCK_ASSERT();
2243	IGMP_LOCK_ASSERT();
2244
2245	ifp = inm->inm_ifp;
2246
2247	m = m_gethdr(M_NOWAIT, MT_DATA);
2248	if (m == NULL)
2249		return (ENOMEM);
2250	M_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp));
2251
2252	m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp);
2253
2254	m->m_data += sizeof(struct ip);
2255	m->m_len = sizeof(struct igmp);
2256
2257	igmp = mtod(m, struct igmp *);
2258	igmp->igmp_type = type;
2259	igmp->igmp_code = 0;
2260	igmp->igmp_group = inm->inm_addr;
2261	igmp->igmp_cksum = 0;
2262	igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp));
2263
2264	m->m_data -= sizeof(struct ip);
2265	m->m_len += sizeof(struct ip);
2266
2267	ip = mtod(m, struct ip *);
2268	ip->ip_tos = 0;
2269	ip->ip_len = htons(sizeof(struct ip) + sizeof(struct igmp));
2270	ip->ip_off = 0;
2271	ip->ip_p = IPPROTO_IGMP;
2272	ip->ip_src.s_addr = INADDR_ANY;
2273
2274	if (type == IGMP_HOST_LEAVE_MESSAGE)
2275		ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP);
2276	else
2277		ip->ip_dst = inm->inm_addr;
2278
2279	igmp_save_context(m, ifp);
2280
2281	m->m_flags |= M_IGMPV2;
2282	if (inm->inm_igi->igi_flags & IGIF_LOOPBACK)
2283		m->m_flags |= M_IGMP_LOOP;
2284
2285	CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m);
2286	NET_EPOCH_ENTER(et);
2287	netisr_dispatch(NETISR_IGMP, m);
2288	NET_EPOCH_EXIT(et);
2289
2290	return (0);
2291}
2292
2293/*
2294 * Process a state change from the upper layer for the given IPv4 group.
2295 *
2296 * Each socket holds a reference on the in_multi in its own ip_moptions.
2297 * The socket layer will have made the necessary updates to.the group
2298 * state, it is now up to IGMP to issue a state change report if there
2299 * has been any change between T0 (when the last state-change was issued)
2300 * and T1 (now).
2301 *
2302 * We use the IGMPv3 state machine at group level. The IGMP module
2303 * however makes the decision as to which IGMP protocol version to speak.
2304 * A state change *from* INCLUDE {} always means an initial join.
2305 * A state change *to* INCLUDE {} always means a final leave.
2306 *
2307 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can
2308 * save ourselves a bunch of work; any exclusive mode groups need not
2309 * compute source filter lists.
2310 *
2311 * VIMAGE: curvnet should have been set by caller, as this routine
2312 * is called from the socket option handlers.
2313 */
2314int
2315igmp_change_state(struct in_multi *inm)
2316{
2317	struct igmp_ifsoftc *igi;
2318	struct ifnet *ifp;
2319	int error;
2320
2321	error = 0;
2322	IN_MULTI_LOCK_ASSERT();
2323	/*
2324	 * Try to detect if the upper layer just asked us to change state
2325	 * for an interface which has now gone away.
2326	 */
2327	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
2328	ifp = inm->inm_ifma->ifma_ifp;
2329	if (ifp == NULL)
2330		return (0);
2331	/*
2332	 * Sanity check that netinet's notion of ifp is the
2333	 * same as net's.
2334	 */
2335	KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
2336
2337	IGMP_LOCK();
2338
2339	igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp;
2340	KASSERT(igi != NULL, ("%s: no igmp_ifsoftc for ifp %p", __func__, ifp));
2341
2342	/*
2343	 * If we detect a state transition to or from MCAST_UNDEFINED
2344	 * for this group, then we are starting or finishing an IGMP
2345	 * life cycle for this group.
2346	 */
2347	if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) {
2348		CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__,
2349		    inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode);
2350		if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) {
2351			CTR1(KTR_IGMPV3, "%s: initial join", __func__);
2352			error = igmp_initial_join(inm, igi);
2353			goto out_locked;
2354		} else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) {
2355			CTR1(KTR_IGMPV3, "%s: final leave", __func__);
2356			igmp_final_leave(inm, igi);
2357			goto out_locked;
2358		}
2359	} else {
2360		CTR1(KTR_IGMPV3, "%s: filter set change", __func__);
2361	}
2362
2363	error = igmp_handle_state_change(inm, igi);
2364
2365out_locked:
2366	IGMP_UNLOCK();
2367	return (error);
2368}
2369
2370/*
2371 * Perform the initial join for an IGMP group.
2372 *
2373 * When joining a group:
2374 *  If the group should have its IGMP traffic suppressed, do nothing.
2375 *  IGMPv1 starts sending IGMPv1 host membership reports.
2376 *  IGMPv2 starts sending IGMPv2 host membership reports.
2377 *  IGMPv3 will schedule an IGMPv3 state-change report containing the
2378 *  initial state of the membership.
2379 */
2380static int
2381igmp_initial_join(struct in_multi *inm, struct igmp_ifsoftc *igi)
2382{
2383	struct ifnet		*ifp;
2384	struct mbufq		*mq;
2385	int			 error, retval, syncstates;
2386
2387	CTR4(KTR_IGMPV3, "%s: initial join 0x%08x on ifp %p(%s)", __func__,
2388	    ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname);
2389
2390	error = 0;
2391	syncstates = 1;
2392
2393	ifp = inm->inm_ifp;
2394
2395	IN_MULTI_LOCK_ASSERT();
2396	IGMP_LOCK_ASSERT();
2397
2398	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
2399
2400	/*
2401	 * Groups joined on loopback or marked as 'not reported',
2402	 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and
2403	 * are never reported in any IGMP protocol exchanges.
2404	 * All other groups enter the appropriate IGMP state machine
2405	 * for the version in use on this link.
2406	 * A link marked as IGIF_SILENT causes IGMP to be completely
2407	 * disabled for the link.
2408	 */
2409	if ((ifp->if_flags & IFF_LOOPBACK) ||
2410	    (igi->igi_flags & IGIF_SILENT) ||
2411	    !igmp_isgroupreported(inm->inm_addr)) {
2412		CTR1(KTR_IGMPV3,
2413"%s: not kicking state machine for silent group", __func__);
2414		inm->inm_state = IGMP_SILENT_MEMBER;
2415		inm->inm_timer = 0;
2416	} else {
2417		/*
2418		 * Deal with overlapping in_multi lifecycle.
2419		 * If this group was LEAVING, then make sure
2420		 * we drop the reference we picked up to keep the
2421		 * group around for the final INCLUDE {} enqueue.
2422		 */
2423		if (igi->igi_version == IGMP_VERSION_3 &&
2424		    inm->inm_state == IGMP_LEAVING_MEMBER) {
2425			MPASS(inm->inm_refcount > 1);
2426			inm_rele_locked(NULL, inm);
2427		}
2428		inm->inm_state = IGMP_REPORTING_MEMBER;
2429
2430		switch (igi->igi_version) {
2431		case IGMP_VERSION_1:
2432		case IGMP_VERSION_2:
2433			inm->inm_state = IGMP_IDLE_MEMBER;
2434			error = igmp_v1v2_queue_report(inm,
2435			    (igi->igi_version == IGMP_VERSION_2) ?
2436			     IGMP_v2_HOST_MEMBERSHIP_REPORT :
2437			     IGMP_v1_HOST_MEMBERSHIP_REPORT);
2438			if (error == 0) {
2439				inm->inm_timer = IGMP_RANDOM_DELAY(
2440				    IGMP_V1V2_MAX_RI * IGMP_FASTHZ);
2441				V_current_state_timers_running = 1;
2442			}
2443			break;
2444
2445		case IGMP_VERSION_3:
2446			/*
2447			 * Defer update of T0 to T1, until the first copy
2448			 * of the state change has been transmitted.
2449			 */
2450			syncstates = 0;
2451
2452			/*
2453			 * Immediately enqueue a State-Change Report for
2454			 * this interface, freeing any previous reports.
2455			 * Don't kick the timers if there is nothing to do,
2456			 * or if an error occurred.
2457			 */
2458			mq = &inm->inm_scq;
2459			mbufq_drain(mq);
2460			retval = igmp_v3_enqueue_group_record(mq, inm, 1,
2461			    0, 0);
2462			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
2463			    __func__, retval);
2464			if (retval <= 0) {
2465				error = retval * -1;
2466				break;
2467			}
2468
2469			/*
2470			 * Schedule transmission of pending state-change
2471			 * report up to RV times for this link. The timer
2472			 * will fire at the next igmp_fasttimo (~200ms),
2473			 * giving us an opportunity to merge the reports.
2474			 */
2475			if (igi->igi_flags & IGIF_LOOPBACK) {
2476				inm->inm_scrv = 1;
2477			} else {
2478				KASSERT(igi->igi_rv > 1,
2479				   ("%s: invalid robustness %d", __func__,
2480				    igi->igi_rv));
2481				inm->inm_scrv = igi->igi_rv;
2482			}
2483			inm->inm_sctimer = 1;
2484			V_state_change_timers_running = 1;
2485
2486			error = 0;
2487			break;
2488		}
2489	}
2490
2491	/*
2492	 * Only update the T0 state if state change is atomic,
2493	 * i.e. we don't need to wait for a timer to fire before we
2494	 * can consider the state change to have been communicated.
2495	 */
2496	if (syncstates) {
2497		inm_commit(inm);
2498		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
2499		    ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
2500	}
2501
2502	return (error);
2503}
2504
2505/*
2506 * Issue an intermediate state change during the IGMP life-cycle.
2507 */
2508static int
2509igmp_handle_state_change(struct in_multi *inm, struct igmp_ifsoftc *igi)
2510{
2511	struct ifnet		*ifp;
2512	int			 retval;
2513
2514	CTR4(KTR_IGMPV3, "%s: state change for 0x%08x on ifp %p(%s)", __func__,
2515	    ntohl(inm->inm_addr.s_addr), inm->inm_ifp, inm->inm_ifp->if_xname);
2516
2517	ifp = inm->inm_ifp;
2518
2519	IN_MULTI_LIST_LOCK_ASSERT();
2520	IGMP_LOCK_ASSERT();
2521
2522	KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__));
2523
2524	if ((ifp->if_flags & IFF_LOOPBACK) ||
2525	    (igi->igi_flags & IGIF_SILENT) ||
2526	    !igmp_isgroupreported(inm->inm_addr) ||
2527	    (igi->igi_version != IGMP_VERSION_3)) {
2528		if (!igmp_isgroupreported(inm->inm_addr)) {
2529			CTR1(KTR_IGMPV3,
2530"%s: not kicking state machine for silent group", __func__);
2531		}
2532		CTR1(KTR_IGMPV3, "%s: nothing to do", __func__);
2533		inm_commit(inm);
2534		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
2535		    ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
2536		return (0);
2537	}
2538
2539	mbufq_drain(&inm->inm_scq);
2540
2541	retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0);
2542	CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval);
2543	if (retval <= 0)
2544		return (-retval);
2545
2546	/*
2547	 * If record(s) were enqueued, start the state-change
2548	 * report timer for this group.
2549	 */
2550	inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv);
2551	inm->inm_sctimer = 1;
2552	V_state_change_timers_running = 1;
2553
2554	return (0);
2555}
2556
2557/*
2558 * Perform the final leave for an IGMP group.
2559 *
2560 * When leaving a group:
2561 *  IGMPv1 does nothing.
2562 *  IGMPv2 sends a host leave message, if and only if we are the reporter.
2563 *  IGMPv3 enqueues a state-change report containing a transition
2564 *  to INCLUDE {} for immediate transmission.
2565 */
2566static void
2567igmp_final_leave(struct in_multi *inm, struct igmp_ifsoftc *igi)
2568{
2569	int syncstates;
2570
2571	syncstates = 1;
2572
2573	CTR4(KTR_IGMPV3, "%s: final leave 0x%08x on ifp %p(%s)",
2574	    __func__, ntohl(inm->inm_addr.s_addr), inm->inm_ifp,
2575	    inm->inm_ifp->if_xname);
2576
2577	IN_MULTI_LIST_LOCK_ASSERT();
2578	IGMP_LOCK_ASSERT();
2579
2580	switch (inm->inm_state) {
2581	case IGMP_NOT_MEMBER:
2582	case IGMP_SILENT_MEMBER:
2583	case IGMP_LEAVING_MEMBER:
2584		/* Already leaving or left; do nothing. */
2585		CTR1(KTR_IGMPV3,
2586"%s: not kicking state machine for silent group", __func__);
2587		break;
2588	case IGMP_REPORTING_MEMBER:
2589	case IGMP_IDLE_MEMBER:
2590	case IGMP_G_QUERY_PENDING_MEMBER:
2591	case IGMP_SG_QUERY_PENDING_MEMBER:
2592		if (igi->igi_version == IGMP_VERSION_2) {
2593#ifdef INVARIANTS
2594			if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER ||
2595			    inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)
2596			panic("%s: IGMPv3 state reached, not IGMPv3 mode",
2597			     __func__);
2598#endif
2599			igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE);
2600			inm->inm_state = IGMP_NOT_MEMBER;
2601		} else if (igi->igi_version == IGMP_VERSION_3) {
2602			/*
2603			 * Stop group timer and all pending reports.
2604			 * Immediately enqueue a state-change report
2605			 * TO_IN {} to be sent on the next fast timeout,
2606			 * giving us an opportunity to merge reports.
2607			 */
2608			mbufq_drain(&inm->inm_scq);
2609			inm->inm_timer = 0;
2610			if (igi->igi_flags & IGIF_LOOPBACK) {
2611				inm->inm_scrv = 1;
2612			} else {
2613				inm->inm_scrv = igi->igi_rv;
2614			}
2615			CTR4(KTR_IGMPV3, "%s: Leaving 0x%08x/%s with %d "
2616			    "pending retransmissions.", __func__,
2617			    ntohl(inm->inm_addr.s_addr),
2618			    inm->inm_ifp->if_xname, inm->inm_scrv);
2619			if (inm->inm_scrv == 0) {
2620				inm->inm_state = IGMP_NOT_MEMBER;
2621				inm->inm_sctimer = 0;
2622			} else {
2623				int retval __unused;
2624
2625				inm_acquire_locked(inm);
2626
2627				retval = igmp_v3_enqueue_group_record(
2628				    &inm->inm_scq, inm, 1, 0, 0);
2629				KASSERT(retval != 0,
2630				    ("%s: enqueue record = %d", __func__,
2631				     retval));
2632
2633				inm->inm_state = IGMP_LEAVING_MEMBER;
2634				inm->inm_sctimer = 1;
2635				V_state_change_timers_running = 1;
2636				syncstates = 0;
2637			}
2638			break;
2639		}
2640		break;
2641	case IGMP_LAZY_MEMBER:
2642	case IGMP_SLEEPING_MEMBER:
2643	case IGMP_AWAKENING_MEMBER:
2644		/* Our reports are suppressed; do nothing. */
2645		break;
2646	}
2647
2648	if (syncstates) {
2649		inm_commit(inm);
2650		CTR3(KTR_IGMPV3, "%s: T1 -> T0 for 0x%08x/%s", __func__,
2651		    ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
2652		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
2653		CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for 0x%08x/%s",
2654		    __func__, ntohl(inm->inm_addr.s_addr),
2655		    inm->inm_ifp->if_xname);
2656	}
2657}
2658
2659/*
2660 * Enqueue an IGMPv3 group record to the given output queue.
2661 *
2662 * XXX This function could do with having the allocation code
2663 * split out, and the multiple-tree-walks coalesced into a single
2664 * routine as has been done in igmp_v3_enqueue_filter_change().
2665 *
2666 * If is_state_change is zero, a current-state record is appended.
2667 * If is_state_change is non-zero, a state-change report is appended.
2668 *
2669 * If is_group_query is non-zero, an mbuf packet chain is allocated.
2670 * If is_group_query is zero, and if there is a packet with free space
2671 * at the tail of the queue, it will be appended to providing there
2672 * is enough free space.
2673 * Otherwise a new mbuf packet chain is allocated.
2674 *
2675 * If is_source_query is non-zero, each source is checked to see if
2676 * it was recorded for a Group-Source query, and will be omitted if
2677 * it is not both in-mode and recorded.
2678 *
2679 * The function will attempt to allocate leading space in the packet
2680 * for the IP/IGMP header to be prepended without fragmenting the chain.
2681 *
2682 * If successful the size of all data appended to the queue is returned,
2683 * otherwise an error code less than zero is returned, or zero if
2684 * no record(s) were appended.
2685 */
2686static int
2687igmp_v3_enqueue_group_record(struct mbufq *mq, struct in_multi *inm,
2688    const int is_state_change, const int is_group_query,
2689    const int is_source_query)
2690{
2691	struct igmp_grouprec	 ig;
2692	struct igmp_grouprec	*pig;
2693	struct ifnet		*ifp;
2694	struct ip_msource	*ims, *nims;
2695	struct mbuf		*m0, *m, *md;
2696	int			 is_filter_list_change;
2697	int			 minrec0len, m0srcs, msrcs, nbytes, off;
2698	int			 record_has_sources;
2699	int			 now;
2700	int			 type;
2701	in_addr_t		 naddr;
2702	uint8_t			 mode;
2703
2704	IN_MULTI_LIST_LOCK_ASSERT();
2705
2706	ifp = inm->inm_ifp;
2707	is_filter_list_change = 0;
2708	m = NULL;
2709	m0 = NULL;
2710	m0srcs = 0;
2711	msrcs = 0;
2712	nbytes = 0;
2713	nims = NULL;
2714	record_has_sources = 1;
2715	pig = NULL;
2716	type = IGMP_DO_NOTHING;
2717	mode = inm->inm_st[1].iss_fmode;
2718
2719	/*
2720	 * If we did not transition out of ASM mode during t0->t1,
2721	 * and there are no source nodes to process, we can skip
2722	 * the generation of source records.
2723	 */
2724	if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 &&
2725	    inm->inm_nsrc == 0)
2726		record_has_sources = 0;
2727
2728	if (is_state_change) {
2729		/*
2730		 * Queue a state change record.
2731		 * If the mode did not change, and there are non-ASM
2732		 * listeners or source filters present,
2733		 * we potentially need to issue two records for the group.
2734		 * If we are transitioning to MCAST_UNDEFINED, we need
2735		 * not send any sources.
2736		 * If there are ASM listeners, and there was no filter
2737		 * mode transition of any kind, do nothing.
2738		 */
2739		if (mode != inm->inm_st[0].iss_fmode) {
2740			if (mode == MCAST_EXCLUDE) {
2741				CTR1(KTR_IGMPV3, "%s: change to EXCLUDE",
2742				    __func__);
2743				type = IGMP_CHANGE_TO_EXCLUDE_MODE;
2744			} else {
2745				CTR1(KTR_IGMPV3, "%s: change to INCLUDE",
2746				    __func__);
2747				type = IGMP_CHANGE_TO_INCLUDE_MODE;
2748				if (mode == MCAST_UNDEFINED)
2749					record_has_sources = 0;
2750			}
2751		} else {
2752			if (record_has_sources) {
2753				is_filter_list_change = 1;
2754			} else {
2755				type = IGMP_DO_NOTHING;
2756			}
2757		}
2758	} else {
2759		/*
2760		 * Queue a current state record.
2761		 */
2762		if (mode == MCAST_EXCLUDE) {
2763			type = IGMP_MODE_IS_EXCLUDE;
2764		} else if (mode == MCAST_INCLUDE) {
2765			type = IGMP_MODE_IS_INCLUDE;
2766			KASSERT(inm->inm_st[1].iss_asm == 0,
2767			    ("%s: inm %p is INCLUDE but ASM count is %d",
2768			     __func__, inm, inm->inm_st[1].iss_asm));
2769		}
2770	}
2771
2772	/*
2773	 * Generate the filter list changes using a separate function.
2774	 */
2775	if (is_filter_list_change)
2776		return (igmp_v3_enqueue_filter_change(mq, inm));
2777
2778	if (type == IGMP_DO_NOTHING) {
2779		CTR3(KTR_IGMPV3, "%s: nothing to do for 0x%08x/%s", __func__,
2780		    ntohl(inm->inm_addr.s_addr), inm->inm_ifp->if_xname);
2781		return (0);
2782	}
2783
2784	/*
2785	 * If any sources are present, we must be able to fit at least
2786	 * one in the trailing space of the tail packet's mbuf,
2787	 * ideally more.
2788	 */
2789	minrec0len = sizeof(struct igmp_grouprec);
2790	if (record_has_sources)
2791		minrec0len += sizeof(in_addr_t);
2792
2793	CTR4(KTR_IGMPV3, "%s: queueing %s for 0x%08x/%s", __func__,
2794	    igmp_rec_type_to_str(type), ntohl(inm->inm_addr.s_addr),
2795	    inm->inm_ifp->if_xname);
2796
2797	/*
2798	 * Check if we have a packet in the tail of the queue for this
2799	 * group into which the first group record for this group will fit.
2800	 * Otherwise allocate a new packet.
2801	 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT.
2802	 * Note: Group records for G/GSR query responses MUST be sent
2803	 * in their own packet.
2804	 */
2805	m0 = mbufq_last(mq);
2806	if (!is_group_query &&
2807	    m0 != NULL &&
2808	    (m0->m_pkthdr.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) &&
2809	    (m0->m_pkthdr.len + minrec0len) <
2810	     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
2811		m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2812			    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
2813		m = m0;
2814		CTR1(KTR_IGMPV3, "%s: use existing packet", __func__);
2815	} else {
2816		if (mbufq_full(mq)) {
2817			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
2818			return (-ENOMEM);
2819		}
2820		m = NULL;
2821		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
2822		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
2823		if (!is_state_change && !is_group_query) {
2824			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
2825			if (m)
2826				m->m_data += IGMP_LEADINGSPACE;
2827		}
2828		if (m == NULL) {
2829			m = m_gethdr(M_NOWAIT, MT_DATA);
2830			if (m)
2831				M_ALIGN(m, IGMP_LEADINGSPACE);
2832		}
2833		if (m == NULL)
2834			return (-ENOMEM);
2835
2836		igmp_save_context(m, ifp);
2837
2838		CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__);
2839	}
2840
2841	/*
2842	 * Append group record.
2843	 * If we have sources, we don't know how many yet.
2844	 */
2845	ig.ig_type = type;
2846	ig.ig_datalen = 0;
2847	ig.ig_numsrc = 0;
2848	ig.ig_group = inm->inm_addr;
2849	if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
2850		if (m != m0)
2851			m_freem(m);
2852		CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
2853		return (-ENOMEM);
2854	}
2855	nbytes += sizeof(struct igmp_grouprec);
2856
2857	/*
2858	 * Append as many sources as will fit in the first packet.
2859	 * If we are appending to a new packet, the chain allocation
2860	 * may potentially use clusters; use m_getptr() in this case.
2861	 * If we are appending to an existing packet, we need to obtain
2862	 * a pointer to the group record after m_append(), in case a new
2863	 * mbuf was allocated.
2864	 * Only append sources which are in-mode at t1. If we are
2865	 * transitioning to MCAST_UNDEFINED state on the group, do not
2866	 * include source entries.
2867	 * Only report recorded sources in our filter set when responding
2868	 * to a group-source query.
2869	 */
2870	if (record_has_sources) {
2871		if (m == m0) {
2872			md = m_last(m);
2873			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
2874			    md->m_len - nbytes);
2875		} else {
2876			md = m_getptr(m, 0, &off);
2877			pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) +
2878			    off);
2879		}
2880		msrcs = 0;
2881		RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) {
2882			CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
2883			    ims->ims_haddr);
2884			now = ims_get_mode(inm, ims, 1);
2885			CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now);
2886			if ((now != mode) ||
2887			    (now == mode && mode == MCAST_UNDEFINED)) {
2888				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
2889				continue;
2890			}
2891			if (is_source_query && ims->ims_stp == 0) {
2892				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
2893				    __func__);
2894				continue;
2895			}
2896			CTR1(KTR_IGMPV3, "%s: append node", __func__);
2897			naddr = htonl(ims->ims_haddr);
2898			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
2899				if (m != m0)
2900					m_freem(m);
2901				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
2902				    __func__);
2903				return (-ENOMEM);
2904			}
2905			nbytes += sizeof(in_addr_t);
2906			++msrcs;
2907			if (msrcs == m0srcs)
2908				break;
2909		}
2910		CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__,
2911		    msrcs);
2912		pig->ig_numsrc = htons(msrcs);
2913		nbytes += (msrcs * sizeof(in_addr_t));
2914	}
2915
2916	if (is_source_query && msrcs == 0) {
2917		CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__);
2918		if (m != m0)
2919			m_freem(m);
2920		return (0);
2921	}
2922
2923	/*
2924	 * We are good to go with first packet.
2925	 */
2926	if (m != m0) {
2927		CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__);
2928		m->m_pkthdr.vt_nrecs = 1;
2929		mbufq_enqueue(mq, m);
2930	} else
2931		m->m_pkthdr.vt_nrecs++;
2932
2933	/*
2934	 * No further work needed if no source list in packet(s).
2935	 */
2936	if (!record_has_sources)
2937		return (nbytes);
2938
2939	/*
2940	 * Whilst sources remain to be announced, we need to allocate
2941	 * a new packet and fill out as many sources as will fit.
2942	 * Always try for a cluster first.
2943	 */
2944	while (nims != NULL) {
2945		if (mbufq_full(mq)) {
2946			CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__);
2947			return (-ENOMEM);
2948		}
2949		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
2950		if (m)
2951			m->m_data += IGMP_LEADINGSPACE;
2952		if (m == NULL) {
2953			m = m_gethdr(M_NOWAIT, MT_DATA);
2954			if (m)
2955				M_ALIGN(m, IGMP_LEADINGSPACE);
2956		}
2957		if (m == NULL)
2958			return (-ENOMEM);
2959		igmp_save_context(m, ifp);
2960		md = m_getptr(m, 0, &off);
2961		pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off);
2962		CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__);
2963
2964		if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) {
2965			if (m != m0)
2966				m_freem(m);
2967			CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__);
2968			return (-ENOMEM);
2969		}
2970		m->m_pkthdr.vt_nrecs = 1;
2971		nbytes += sizeof(struct igmp_grouprec);
2972
2973		m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
2974		    sizeof(struct igmp_grouprec)) / sizeof(in_addr_t);
2975
2976		msrcs = 0;
2977		RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
2978			CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
2979			    ims->ims_haddr);
2980			now = ims_get_mode(inm, ims, 1);
2981			if ((now != mode) ||
2982			    (now == mode && mode == MCAST_UNDEFINED)) {
2983				CTR1(KTR_IGMPV3, "%s: skip node", __func__);
2984				continue;
2985			}
2986			if (is_source_query && ims->ims_stp == 0) {
2987				CTR1(KTR_IGMPV3, "%s: skip unrecorded node",
2988				    __func__);
2989				continue;
2990			}
2991			CTR1(KTR_IGMPV3, "%s: append node", __func__);
2992			naddr = htonl(ims->ims_haddr);
2993			if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) {
2994				if (m != m0)
2995					m_freem(m);
2996				CTR1(KTR_IGMPV3, "%s: m_append() failed.",
2997				    __func__);
2998				return (-ENOMEM);
2999			}
3000			++msrcs;
3001			if (msrcs == m0srcs)
3002				break;
3003		}
3004		pig->ig_numsrc = htons(msrcs);
3005		nbytes += (msrcs * sizeof(in_addr_t));
3006
3007		CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__);
3008		mbufq_enqueue(mq, m);
3009	}
3010
3011	return (nbytes);
3012}
3013
3014/*
3015 * Type used to mark record pass completion.
3016 * We exploit the fact we can cast to this easily from the
3017 * current filter modes on each ip_msource node.
3018 */
3019typedef enum {
3020	REC_NONE = 0x00,	/* MCAST_UNDEFINED */
3021	REC_ALLOW = 0x01,	/* MCAST_INCLUDE */
3022	REC_BLOCK = 0x02,	/* MCAST_EXCLUDE */
3023	REC_FULL = REC_ALLOW | REC_BLOCK
3024} rectype_t;
3025
3026/*
3027 * Enqueue an IGMPv3 filter list change to the given output queue.
3028 *
3029 * Source list filter state is held in an RB-tree. When the filter list
3030 * for a group is changed without changing its mode, we need to compute
3031 * the deltas between T0 and T1 for each source in the filter set,
3032 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3033 *
3034 * As we may potentially queue two record types, and the entire R-B tree
3035 * needs to be walked at once, we break this out into its own function
3036 * so we can generate a tightly packed queue of packets.
3037 *
3038 * XXX This could be written to only use one tree walk, although that makes
3039 * serializing into the mbuf chains a bit harder. For now we do two walks
3040 * which makes things easier on us, and it may or may not be harder on
3041 * the L2 cache.
3042 *
3043 * If successful the size of all data appended to the queue is returned,
3044 * otherwise an error code less than zero is returned, or zero if
3045 * no record(s) were appended.
3046 */
3047static int
3048igmp_v3_enqueue_filter_change(struct mbufq *mq, struct in_multi *inm)
3049{
3050	static const int MINRECLEN =
3051	    sizeof(struct igmp_grouprec) + sizeof(in_addr_t);
3052	struct ifnet		*ifp;
3053	struct igmp_grouprec	 ig;
3054	struct igmp_grouprec	*pig;
3055	struct ip_msource	*ims, *nims;
3056	struct mbuf		*m, *m0, *md;
3057	in_addr_t		 naddr;
3058	int			 m0srcs, nbytes, npbytes, off, rsrcs, schanged;
3059#ifdef KTR
3060	int			 nallow, nblock;
3061#endif
3062	uint8_t			 mode, now, then;
3063	rectype_t		 crt, drt, nrt;
3064
3065	IN_MULTI_LIST_LOCK_ASSERT();
3066
3067	if (inm->inm_nsrc == 0 ||
3068	    (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0))
3069		return (0);
3070
3071	ifp = inm->inm_ifp;			/* interface */
3072	mode = inm->inm_st[1].iss_fmode;	/* filter mode at t1 */
3073	crt = REC_NONE;	/* current group record type */
3074	drt = REC_NONE;	/* mask of completed group record types */
3075	nrt = REC_NONE;	/* record type for current node */
3076	m0srcs = 0;	/* # source which will fit in current mbuf chain */
3077	nbytes = 0;	/* # of bytes appended to group's state-change queue */
3078	npbytes = 0;	/* # of bytes appended this packet */
3079	rsrcs = 0;	/* # sources encoded in current record */
3080	schanged = 0;	/* # nodes encoded in overall filter change */
3081#ifdef KTR
3082	nallow = 0;	/* # of source entries in ALLOW_NEW */
3083	nblock = 0;	/* # of source entries in BLOCK_OLD */
3084#endif
3085	nims = NULL;	/* next tree node pointer */
3086
3087	/*
3088	 * For each possible filter record mode.
3089	 * The first kind of source we encounter tells us which
3090	 * is the first kind of record we start appending.
3091	 * If a node transitioned to UNDEFINED at t1, its mode is treated
3092	 * as the inverse of the group's filter mode.
3093	 */
3094	while (drt != REC_FULL) {
3095		do {
3096			m0 = mbufq_last(mq);
3097			if (m0 != NULL &&
3098			    (m0->m_pkthdr.vt_nrecs + 1 <=
3099			     IGMP_V3_REPORT_MAXRECS) &&
3100			    (m0->m_pkthdr.len + MINRECLEN) <
3101			     (ifp->if_mtu - IGMP_LEADINGSPACE)) {
3102				m = m0;
3103				m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3104					    sizeof(struct igmp_grouprec)) /
3105				    sizeof(in_addr_t);
3106				CTR1(KTR_IGMPV3,
3107				    "%s: use previous packet", __func__);
3108			} else {
3109				m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
3110				if (m)
3111					m->m_data += IGMP_LEADINGSPACE;
3112				if (m == NULL) {
3113					m = m_gethdr(M_NOWAIT, MT_DATA);
3114					if (m)
3115						M_ALIGN(m, IGMP_LEADINGSPACE);
3116				}
3117				if (m == NULL) {
3118					CTR1(KTR_IGMPV3,
3119					    "%s: m_get*() failed", __func__);
3120					return (-ENOMEM);
3121				}
3122				m->m_pkthdr.vt_nrecs = 0;
3123				igmp_save_context(m, ifp);
3124				m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE -
3125				    sizeof(struct igmp_grouprec)) /
3126				    sizeof(in_addr_t);
3127				npbytes = 0;
3128				CTR1(KTR_IGMPV3,
3129				    "%s: allocated new packet", __func__);
3130			}
3131			/*
3132			 * Append the IGMP group record header to the
3133			 * current packet's data area.
3134			 * Recalculate pointer to free space for next
3135			 * group record, in case m_append() allocated
3136			 * a new mbuf or cluster.
3137			 */
3138			memset(&ig, 0, sizeof(ig));
3139			ig.ig_group = inm->inm_addr;
3140			if (!m_append(m, sizeof(ig), (void *)&ig)) {
3141				if (m != m0)
3142					m_freem(m);
3143				CTR1(KTR_IGMPV3,
3144				    "%s: m_append() failed", __func__);
3145				return (-ENOMEM);
3146			}
3147			npbytes += sizeof(struct igmp_grouprec);
3148			if (m != m0) {
3149				/* new packet; offset in c hain */
3150				md = m_getptr(m, npbytes -
3151				    sizeof(struct igmp_grouprec), &off);
3152				pig = (struct igmp_grouprec *)(mtod(md,
3153				    uint8_t *) + off);
3154			} else {
3155				/* current packet; offset from last append */
3156				md = m_last(m);
3157				pig = (struct igmp_grouprec *)(mtod(md,
3158				    uint8_t *) + md->m_len -
3159				    sizeof(struct igmp_grouprec));
3160			}
3161			/*
3162			 * Begin walking the tree for this record type
3163			 * pass, or continue from where we left off
3164			 * previously if we had to allocate a new packet.
3165			 * Only report deltas in-mode at t1.
3166			 * We need not report included sources as allowed
3167			 * if we are in inclusive mode on the group,
3168			 * however the converse is not true.
3169			 */
3170			rsrcs = 0;
3171			if (nims == NULL)
3172				nims = RB_MIN(ip_msource_tree, &inm->inm_srcs);
3173			RB_FOREACH_FROM(ims, ip_msource_tree, nims) {
3174				CTR2(KTR_IGMPV3, "%s: visit node 0x%08x",
3175				    __func__, ims->ims_haddr);
3176				now = ims_get_mode(inm, ims, 1);
3177				then = ims_get_mode(inm, ims, 0);
3178				CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d",
3179				    __func__, then, now);
3180				if (now == then) {
3181					CTR1(KTR_IGMPV3,
3182					    "%s: skip unchanged", __func__);
3183					continue;
3184				}
3185				if (mode == MCAST_EXCLUDE &&
3186				    now == MCAST_INCLUDE) {
3187					CTR1(KTR_IGMPV3,
3188					    "%s: skip IN src on EX group",
3189					    __func__);
3190					continue;
3191				}
3192				nrt = (rectype_t)now;
3193				if (nrt == REC_NONE)
3194					nrt = (rectype_t)(~mode & REC_FULL);
3195				if (schanged++ == 0) {
3196					crt = nrt;
3197				} else if (crt != nrt)
3198					continue;
3199				naddr = htonl(ims->ims_haddr);
3200				if (!m_append(m, sizeof(in_addr_t),
3201				    (void *)&naddr)) {
3202					if (m != m0)
3203						m_freem(m);
3204					CTR1(KTR_IGMPV3,
3205					    "%s: m_append() failed", __func__);
3206					return (-ENOMEM);
3207				}
3208#ifdef KTR
3209				nallow += !!(crt == REC_ALLOW);
3210				nblock += !!(crt == REC_BLOCK);
3211#endif
3212				if (++rsrcs == m0srcs)
3213					break;
3214			}
3215			/*
3216			 * If we did not append any tree nodes on this
3217			 * pass, back out of allocations.
3218			 */
3219			if (rsrcs == 0) {
3220				npbytes -= sizeof(struct igmp_grouprec);
3221				if (m != m0) {
3222					CTR1(KTR_IGMPV3,
3223					    "%s: m_free(m)", __func__);
3224					m_freem(m);
3225				} else {
3226					CTR1(KTR_IGMPV3,
3227					    "%s: m_adj(m, -ig)", __func__);
3228					m_adj(m, -((int)sizeof(
3229					    struct igmp_grouprec)));
3230				}
3231				continue;
3232			}
3233			npbytes += (rsrcs * sizeof(in_addr_t));
3234			if (crt == REC_ALLOW)
3235				pig->ig_type = IGMP_ALLOW_NEW_SOURCES;
3236			else if (crt == REC_BLOCK)
3237				pig->ig_type = IGMP_BLOCK_OLD_SOURCES;
3238			pig->ig_numsrc = htons(rsrcs);
3239			/*
3240			 * Count the new group record, and enqueue this
3241			 * packet if it wasn't already queued.
3242			 */
3243			m->m_pkthdr.vt_nrecs++;
3244			if (m != m0)
3245				mbufq_enqueue(mq, m);
3246			nbytes += npbytes;
3247		} while (nims != NULL);
3248		drt |= crt;
3249		crt = (~crt & REC_FULL);
3250	}
3251
3252	CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__,
3253	    nallow, nblock);
3254
3255	return (nbytes);
3256}
3257
3258static int
3259igmp_v3_merge_state_changes(struct in_multi *inm, struct mbufq *scq)
3260{
3261	struct mbufq	*gq;
3262	struct mbuf	*m;		/* pending state-change */
3263	struct mbuf	*m0;		/* copy of pending state-change */
3264	struct mbuf	*mt;		/* last state-change in packet */
3265	int		 docopy, domerge;
3266	u_int		 recslen;
3267
3268	docopy = 0;
3269	domerge = 0;
3270	recslen = 0;
3271
3272	IN_MULTI_LIST_LOCK_ASSERT();
3273	IGMP_LOCK_ASSERT();
3274
3275	/*
3276	 * If there are further pending retransmissions, make a writable
3277	 * copy of each queued state-change message before merging.
3278	 */
3279	if (inm->inm_scrv > 0)
3280		docopy = 1;
3281
3282	gq = &inm->inm_scq;
3283#ifdef KTR
3284	if (mbufq_first(gq) == NULL) {
3285		CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty",
3286		    __func__, inm);
3287	}
3288#endif
3289
3290	m = mbufq_first(gq);
3291	while (m != NULL) {
3292		/*
3293		 * Only merge the report into the current packet if
3294		 * there is sufficient space to do so; an IGMPv3 report
3295		 * packet may only contain 65,535 group records.
3296		 * Always use a simple mbuf chain concatentation to do this,
3297		 * as large state changes for single groups may have
3298		 * allocated clusters.
3299		 */
3300		domerge = 0;
3301		mt = mbufq_last(scq);
3302		if (mt != NULL) {
3303			recslen = m_length(m, NULL);
3304
3305			if ((mt->m_pkthdr.vt_nrecs +
3306			    m->m_pkthdr.vt_nrecs <=
3307			    IGMP_V3_REPORT_MAXRECS) &&
3308			    (mt->m_pkthdr.len + recslen <=
3309			    (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE)))
3310				domerge = 1;
3311		}
3312
3313		if (!domerge && mbufq_full(gq)) {
3314			CTR2(KTR_IGMPV3,
3315			    "%s: outbound queue full, skipping whole packet %p",
3316			    __func__, m);
3317			mt = m->m_nextpkt;
3318			if (!docopy)
3319				m_freem(m);
3320			m = mt;
3321			continue;
3322		}
3323
3324		if (!docopy) {
3325			CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m);
3326			m0 = mbufq_dequeue(gq);
3327			m = m0->m_nextpkt;
3328		} else {
3329			CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m);
3330			m0 = m_dup(m, M_NOWAIT);
3331			if (m0 == NULL)
3332				return (ENOMEM);
3333			m0->m_nextpkt = NULL;
3334			m = m->m_nextpkt;
3335		}
3336
3337		if (!domerge) {
3338			CTR3(KTR_IGMPV3, "%s: queueing %p to scq %p)",
3339			    __func__, m0, scq);
3340			mbufq_enqueue(scq, m0);
3341		} else {
3342			struct mbuf *mtl;	/* last mbuf of packet mt */
3343
3344			CTR3(KTR_IGMPV3, "%s: merging %p with scq tail %p)",
3345			    __func__, m0, mt);
3346
3347			mtl = m_last(mt);
3348			m0->m_flags &= ~M_PKTHDR;
3349			mt->m_pkthdr.len += recslen;
3350			mt->m_pkthdr.vt_nrecs +=
3351			    m0->m_pkthdr.vt_nrecs;
3352
3353			mtl->m_next = m0;
3354		}
3355	}
3356
3357	return (0);
3358}
3359
3360/*
3361 * Respond to a pending IGMPv3 General Query.
3362 */
3363static void
3364igmp_v3_dispatch_general_query(struct igmp_ifsoftc *igi)
3365{
3366	struct ifmultiaddr	*ifma;
3367	struct ifnet		*ifp;
3368	struct in_multi		*inm;
3369	int			 retval __unused, loop;
3370
3371	IN_MULTI_LIST_LOCK_ASSERT();
3372	IGMP_LOCK_ASSERT();
3373	NET_EPOCH_ASSERT();
3374
3375	KASSERT(igi->igi_version == IGMP_VERSION_3,
3376	    ("%s: called when version %d", __func__, igi->igi_version));
3377
3378	/*
3379	 * Check that there are some packets queued. If so, send them first.
3380	 * For large number of groups the reply to general query can take
3381	 * many packets, we should finish sending them before starting of
3382	 * queuing the new reply.
3383	 */
3384	if (!mbufq_empty(&igi->igi_gq))
3385		goto send;
3386
3387	ifp = igi->igi_ifp;
3388
3389	CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3390		inm = inm_ifmultiaddr_get_inm(ifma);
3391		if (inm == NULL)
3392			continue;
3393		KASSERT(ifp == inm->inm_ifp,
3394		    ("%s: inconsistent ifp", __func__));
3395
3396		switch (inm->inm_state) {
3397		case IGMP_NOT_MEMBER:
3398		case IGMP_SILENT_MEMBER:
3399			break;
3400		case IGMP_REPORTING_MEMBER:
3401		case IGMP_IDLE_MEMBER:
3402		case IGMP_LAZY_MEMBER:
3403		case IGMP_SLEEPING_MEMBER:
3404		case IGMP_AWAKENING_MEMBER:
3405			inm->inm_state = IGMP_REPORTING_MEMBER;
3406			retval = igmp_v3_enqueue_group_record(&igi->igi_gq,
3407			    inm, 0, 0, 0);
3408			CTR2(KTR_IGMPV3, "%s: enqueue record = %d",
3409			    __func__, retval);
3410			break;
3411		case IGMP_G_QUERY_PENDING_MEMBER:
3412		case IGMP_SG_QUERY_PENDING_MEMBER:
3413		case IGMP_LEAVING_MEMBER:
3414			break;
3415		}
3416	}
3417
3418send:
3419	loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0;
3420	igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop);
3421
3422	/*
3423	 * Slew transmission of bursts over 500ms intervals.
3424	 */
3425	if (mbufq_first(&igi->igi_gq) != NULL) {
3426		igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY(
3427		    IGMP_RESPONSE_BURST_INTERVAL);
3428		V_interface_timers_running = 1;
3429	}
3430}
3431
3432/*
3433 * Transmit the next pending IGMP message in the output queue.
3434 *
3435 * We get called from netisr_processqueue(). A mutex private to igmpoq
3436 * will be acquired and released around this routine.
3437 *
3438 * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis.
3439 * MRT: Nothing needs to be done, as IGMP traffic is always local to
3440 * a link and uses a link-scope multicast address.
3441 */
3442static void
3443igmp_intr(struct mbuf *m)
3444{
3445	struct ip_moptions	 imo;
3446	struct ifnet		*ifp;
3447	struct mbuf		*ipopts, *m0;
3448	int			 error;
3449	uint32_t		 ifindex;
3450
3451	CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m);
3452
3453	/*
3454	 * Set VNET image pointer from enqueued mbuf chain
3455	 * before doing anything else. Whilst we use interface
3456	 * indexes to guard against interface detach, they are
3457	 * unique to each VIMAGE and must be retrieved.
3458	 */
3459	CURVNET_SET((struct vnet *)(m->m_pkthdr.PH_loc.ptr));
3460	ifindex = igmp_restore_context(m);
3461
3462	/*
3463	 * Check if the ifnet still exists. This limits the scope of
3464	 * any race in the absence of a global ifp lock for low cost
3465	 * (an array lookup).
3466	 */
3467	ifp = ifnet_byindex(ifindex);
3468	if (ifp == NULL) {
3469		CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.",
3470		    __func__, m, ifindex);
3471		m_freem(m);
3472		IPSTAT_INC(ips_noroute);
3473		goto out;
3474	}
3475
3476	ipopts = V_igmp_sendra ? m_raopt : NULL;
3477
3478	imo.imo_multicast_ttl  = 1;
3479	imo.imo_multicast_vif  = -1;
3480	imo.imo_multicast_loop = (V_ip_mrouter != NULL);
3481
3482	/*
3483	 * If the user requested that IGMP traffic be explicitly
3484	 * redirected to the loopback interface (e.g. they are running a
3485	 * MANET interface and the routing protocol needs to see the
3486	 * updates), handle this now.
3487	 */
3488	if (m->m_flags & M_IGMP_LOOP)
3489		imo.imo_multicast_ifp = V_loif;
3490	else
3491		imo.imo_multicast_ifp = ifp;
3492
3493	if (m->m_flags & M_IGMPV2) {
3494		m0 = m;
3495	} else {
3496		m0 = igmp_v3_encap_report(ifp, m);
3497		if (m0 == NULL) {
3498			CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m);
3499			m_freem(m);
3500			IPSTAT_INC(ips_odropped);
3501			goto out;
3502		}
3503	}
3504
3505	igmp_scrub_context(m0);
3506	m_clrprotoflags(m);
3507	m0->m_pkthdr.rcvif = V_loif;
3508#ifdef MAC
3509	mac_netinet_igmp_send(ifp, m0);
3510#endif
3511	error = ip_output(m0, ipopts, NULL, 0, &imo, NULL);
3512	if (error) {
3513		CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error);
3514		goto out;
3515	}
3516
3517	IGMPSTAT_INC(igps_snd_reports);
3518
3519out:
3520	/*
3521	 * We must restore the existing vnet pointer before
3522	 * continuing as we are run from netisr context.
3523	 */
3524	CURVNET_RESTORE();
3525}
3526
3527/*
3528 * Encapsulate an IGMPv3 report.
3529 *
3530 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf
3531 * chain has already had its IP/IGMPv3 header prepended. In this case
3532 * the function will not attempt to prepend; the lengths and checksums
3533 * will however be re-computed.
3534 *
3535 * Returns a pointer to the new mbuf chain head, or NULL if the
3536 * allocation failed.
3537 */
3538static struct mbuf *
3539igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m)
3540{
3541	struct igmp_report	*igmp;
3542	struct ip		*ip;
3543	int			 hdrlen, igmpreclen;
3544
3545	KASSERT((m->m_flags & M_PKTHDR),
3546	    ("%s: mbuf chain %p is !M_PKTHDR", __func__, m));
3547
3548	igmpreclen = m_length(m, NULL);
3549	hdrlen = sizeof(struct ip) + sizeof(struct igmp_report);
3550
3551	if (m->m_flags & M_IGMPV3_HDR) {
3552		igmpreclen -= hdrlen;
3553	} else {
3554		M_PREPEND(m, hdrlen, M_NOWAIT);
3555		if (m == NULL)
3556			return (NULL);
3557		m->m_flags |= M_IGMPV3_HDR;
3558	}
3559
3560	CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen);
3561
3562	m->m_data += sizeof(struct ip);
3563	m->m_len -= sizeof(struct ip);
3564
3565	igmp = mtod(m, struct igmp_report *);
3566	igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT;
3567	igmp->ir_rsv1 = 0;
3568	igmp->ir_rsv2 = 0;
3569	igmp->ir_numgrps = htons(m->m_pkthdr.vt_nrecs);
3570	igmp->ir_cksum = 0;
3571	igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen);
3572	m->m_pkthdr.vt_nrecs = 0;
3573
3574	m->m_data -= sizeof(struct ip);
3575	m->m_len += sizeof(struct ip);
3576
3577	ip = mtod(m, struct ip *);
3578	ip->ip_tos = IPTOS_PREC_INTERNETCONTROL;
3579	ip->ip_len = htons(hdrlen + igmpreclen);
3580	ip->ip_off = htons(IP_DF);
3581	ip->ip_p = IPPROTO_IGMP;
3582	ip->ip_sum = 0;
3583
3584	ip->ip_src.s_addr = INADDR_ANY;
3585
3586	if (m->m_flags & M_IGMP_LOOP) {
3587		struct in_ifaddr *ia;
3588
3589		IFP_TO_IA(ifp, ia);
3590		if (ia != NULL)
3591			ip->ip_src = ia->ia_addr.sin_addr;
3592	}
3593
3594	ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP);
3595
3596	return (m);
3597}
3598
3599#ifdef KTR
3600static char *
3601igmp_rec_type_to_str(const int type)
3602{
3603
3604	switch (type) {
3605		case IGMP_CHANGE_TO_EXCLUDE_MODE:
3606			return "TO_EX";
3607			break;
3608		case IGMP_CHANGE_TO_INCLUDE_MODE:
3609			return "TO_IN";
3610			break;
3611		case IGMP_MODE_IS_EXCLUDE:
3612			return "MODE_EX";
3613			break;
3614		case IGMP_MODE_IS_INCLUDE:
3615			return "MODE_IN";
3616			break;
3617		case IGMP_ALLOW_NEW_SOURCES:
3618			return "ALLOW_NEW";
3619			break;
3620		case IGMP_BLOCK_OLD_SOURCES:
3621			return "BLOCK_OLD";
3622			break;
3623		default:
3624			break;
3625	}
3626	return "unknown";
3627}
3628#endif
3629
3630#ifdef VIMAGE
3631static void
3632vnet_igmp_init(const void *unused __unused)
3633{
3634
3635	netisr_register_vnet(&igmp_nh);
3636}
3637VNET_SYSINIT(vnet_igmp_init, SI_SUB_PROTO_MC, SI_ORDER_ANY,
3638    vnet_igmp_init, NULL);
3639
3640static void
3641vnet_igmp_uninit(const void *unused __unused)
3642{
3643
3644	/* This can happen when we shutdown the entire network stack. */
3645	CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
3646
3647	netisr_unregister_vnet(&igmp_nh);
3648}
3649VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY,
3650    vnet_igmp_uninit, NULL);
3651#endif
3652
3653#ifdef DDB
3654DB_SHOW_COMMAND(igi_list, db_show_igi_list)
3655{
3656	struct igmp_ifsoftc *igi, *tigi;
3657	LIST_HEAD(_igi_list, igmp_ifsoftc) *igi_head;
3658
3659	if (!have_addr) {
3660		db_printf("usage: show igi_list <addr>\n");
3661		return;
3662	}
3663	igi_head = (struct _igi_list *)addr;
3664
3665	LIST_FOREACH_SAFE(igi, igi_head, igi_link, tigi) {
3666		db_printf("igmp_ifsoftc %p:\n", igi);
3667		db_printf("    ifp %p\n", igi->igi_ifp);
3668		db_printf("    version %u\n", igi->igi_version);
3669		db_printf("    v1_timer %u\n", igi->igi_v1_timer);
3670		db_printf("    v2_timer %u\n", igi->igi_v2_timer);
3671		db_printf("    v3_timer %u\n", igi->igi_v3_timer);
3672		db_printf("    flags %#x\n", igi->igi_flags);
3673		db_printf("    rv %u\n", igi->igi_rv);
3674		db_printf("    qi %u\n", igi->igi_qi);
3675		db_printf("    qri %u\n", igi->igi_qri);
3676		db_printf("    uri %u\n", igi->igi_uri);
3677		/* struct mbufq    igi_gq; */
3678		db_printf("\n");
3679	}
3680}
3681#endif
3682
3683static int
3684igmp_modevent(module_t mod, int type, void *unused __unused)
3685{
3686
3687	switch (type) {
3688	case MOD_LOAD:
3689		CTR1(KTR_IGMPV3, "%s: initializing", __func__);
3690		IGMP_LOCK_INIT();
3691		m_raopt = igmp_ra_alloc();
3692		netisr_register(&igmp_nh);
3693		callout_init(&igmpslow_callout, 1);
3694		callout_reset(&igmpslow_callout, hz / IGMP_SLOWHZ,
3695		    igmp_slowtimo, NULL);
3696		callout_init(&igmpfast_callout, 1);
3697		callout_reset(&igmpfast_callout, hz / IGMP_FASTHZ,
3698		    igmp_fasttimo, NULL);
3699		break;
3700	case MOD_UNLOAD:
3701		CTR1(KTR_IGMPV3, "%s: tearing down", __func__);
3702		netisr_unregister(&igmp_nh);
3703		m_free(m_raopt);
3704		m_raopt = NULL;
3705		IGMP_LOCK_DESTROY();
3706		break;
3707	default:
3708		return (EOPNOTSUPP);
3709	}
3710	return (0);
3711}
3712
3713static moduledata_t igmp_mod = {
3714    "igmp",
3715    igmp_modevent,
3716    0
3717};
3718DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE);
3719