in_mcast.c revision 190753
1/*-
2 * Copyright (c) 2007-2009 Bruce Simpson.
3 * Copyright (c) 2005 Robert N. M. Watson.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote
15 *    products derived from this software without specific prior written
16 *    permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * IPv4 multicast socket, group, and socket option processing module.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/netinet/in_mcast.c 190753 2009-04-05 23:25:06Z kan $");
37
38#include "opt_route.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/mbuf.h>
45#include <sys/protosw.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48#include <sys/protosw.h>
49#include <sys/sysctl.h>
50#include <sys/vimage.h>
51#include <sys/ktr.h>
52#include <sys/tree.h>
53
54#include <net/if.h>
55#include <net/if_dl.h>
56#include <net/route.h>
57#include <net/vnet.h>
58
59#include <netinet/in.h>
60#include <netinet/in_systm.h>
61#include <netinet/in_pcb.h>
62#include <netinet/in_var.h>
63#include <netinet/ip_var.h>
64#include <netinet/igmp_var.h>
65#include <netinet/vinet.h>
66
67#ifndef KTR_IGMPV3
68#define KTR_IGMPV3 KTR_SUBSYS
69#endif
70
71#ifndef __SOCKUNION_DECLARED
72union sockunion {
73	struct sockaddr_storage	ss;
74	struct sockaddr		sa;
75	struct sockaddr_dl	sdl;
76	struct sockaddr_in	sin;
77};
78typedef union sockunion sockunion_t;
79#define __SOCKUNION_DECLARED
80#endif /* __SOCKUNION_DECLARED */
81
82static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
83    "IPv4 multicast PCB-layer source filter");
84static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
85static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
86static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
87    "IPv4 multicast IGMP-layer source filter");
88
89#ifdef VIMAGE_GLOBALS
90struct in_multihead in_multihead;	/* XXX now unused; retain for ABI */
91#endif
92
93/*
94 * Locking:
95 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
96 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
97 *   it can be taken by code in net/if.c also.
98 * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
99 *
100 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
101 * any need for in_multi itself to be virtualized -- it is bound to an ifp
102 * anyway no matter what happens.
103 */
104struct mtx in_multi_mtx;
105MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
106
107/*
108 * Functions with non-static linkage defined in this file should be
109 * declared in in_var.h:
110 *  imo_multi_filter()
111 *  in_addmulti()
112 *  in_delmulti()
113 *  in_joingroup()
114 *  in_joingroup_locked()
115 *  in_leavegroup()
116 *  in_leavegroup_locked()
117 * and ip_var.h:
118 *  inp_freemoptions()
119 *  inp_getmoptions()
120 *  inp_setmoptions()
121 *
122 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
123 * and in_delmulti().
124 */
125static void	imf_commit(struct in_mfilter *);
126static int	imf_get_source(struct in_mfilter *imf,
127		    const struct sockaddr_in *psin,
128		    struct in_msource **);
129static struct in_msource *
130		imf_graft(struct in_mfilter *, const uint8_t,
131		    const struct sockaddr_in *);
132static void	imf_leave(struct in_mfilter *);
133static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
134static void	imf_purge(struct in_mfilter *);
135static void	imf_rollback(struct in_mfilter *);
136static void	imf_reap(struct in_mfilter *);
137static int	imo_grow(struct ip_moptions *);
138static size_t	imo_match_group(const struct ip_moptions *,
139		    const struct ifnet *, const struct sockaddr *);
140static struct in_msource *
141		imo_match_source(const struct ip_moptions *, const size_t,
142		    const struct sockaddr *);
143static void	ims_merge(struct ip_msource *ims,
144		    const struct in_msource *lims, const int rollback);
145static int	in_getmulti(struct ifnet *, const struct in_addr *,
146		    struct in_multi **);
147static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
148		    const int noalloc, struct ip_msource **pims);
149static int	inm_is_ifp_detached(const struct in_multi *);
150static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
151static void	inm_purge(struct in_multi *);
152static void	inm_reap(struct in_multi *);
153static struct ip_moptions *
154		inp_findmoptions(struct inpcb *);
155static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
156static int	inp_join_group(struct inpcb *, struct sockopt *);
157static int	inp_leave_group(struct inpcb *, struct sockopt *);
158static struct ifnet *
159		inp_lookup_mcast_ifp(const struct inpcb *,
160		    const struct sockaddr_in *, const struct in_addr);
161static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
162static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
163static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
164static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
165
166SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
167
168static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
169SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
170    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0,
171    "Max source filters per group");
172TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc);
173
174static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
175SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
176    CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0,
177    "Max source filters per socket");
178TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc);
179
180int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
181SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
182    &in_mcast_loop, 0, "Loopback multicast datagrams by default");
183TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
184
185SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
186    CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
187    "Per-interface stack-wide source filters");
188
189/*
190 * Inline function which wraps assertions for a valid ifp.
191 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
192 * is detached.
193 */
194static int __inline
195inm_is_ifp_detached(const struct in_multi *inm)
196{
197	struct ifnet *ifp;
198
199	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
200	ifp = inm->inm_ifma->ifma_ifp;
201	if (ifp != NULL) {
202		/*
203		 * Sanity check that netinet's notion of ifp is the
204		 * same as net's.
205		 */
206		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
207	}
208
209	return (ifp == NULL);
210}
211
212/*
213 * Initialize an in_mfilter structure to a known state at t0, t1
214 * with an empty source filter list.
215 */
216static __inline void
217imf_init(struct in_mfilter *imf, const int st0, const int st1)
218{
219	memset(imf, 0, sizeof(struct in_mfilter));
220	RB_INIT(&imf->imf_sources);
221	imf->imf_st[0] = st0;
222	imf->imf_st[1] = st1;
223}
224
225/*
226 * Resize the ip_moptions vector to the next power-of-two minus 1.
227 * May be called with locks held; do not sleep.
228 */
229static int
230imo_grow(struct ip_moptions *imo)
231{
232	struct in_multi		**nmships;
233	struct in_multi		**omships;
234	struct in_mfilter	 *nmfilters;
235	struct in_mfilter	 *omfilters;
236	size_t			  idx;
237	size_t			  newmax;
238	size_t			  oldmax;
239
240	nmships = NULL;
241	nmfilters = NULL;
242	omships = imo->imo_membership;
243	omfilters = imo->imo_mfilters;
244	oldmax = imo->imo_max_memberships;
245	newmax = ((oldmax + 1) * 2) - 1;
246
247	if (newmax <= IP_MAX_MEMBERSHIPS) {
248		nmships = (struct in_multi **)realloc(omships,
249		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
250		nmfilters = (struct in_mfilter *)realloc(omfilters,
251		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
252		if (nmships != NULL && nmfilters != NULL) {
253			/* Initialize newly allocated source filter heads. */
254			for (idx = oldmax; idx < newmax; idx++) {
255				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
256				    MCAST_EXCLUDE);
257			}
258			imo->imo_max_memberships = newmax;
259			imo->imo_membership = nmships;
260			imo->imo_mfilters = nmfilters;
261		}
262	}
263
264	if (nmships == NULL || nmfilters == NULL) {
265		if (nmships != NULL)
266			free(nmships, M_IPMOPTS);
267		if (nmfilters != NULL)
268			free(nmfilters, M_INMFILTER);
269		return (ETOOMANYREFS);
270	}
271
272	return (0);
273}
274
275/*
276 * Find an IPv4 multicast group entry for this ip_moptions instance
277 * which matches the specified group, and optionally an interface.
278 * Return its index into the array, or -1 if not found.
279 */
280static size_t
281imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
282    const struct sockaddr *group)
283{
284	const struct sockaddr_in *gsin;
285	struct in_multi	**pinm;
286	int		  idx;
287	int		  nmships;
288
289	gsin = (const struct sockaddr_in *)group;
290
291	/* The imo_membership array may be lazy allocated. */
292	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
293		return (-1);
294
295	nmships = imo->imo_num_memberships;
296	pinm = &imo->imo_membership[0];
297	for (idx = 0; idx < nmships; idx++, pinm++) {
298		if (*pinm == NULL)
299			continue;
300		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
301		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
302			break;
303		}
304	}
305	if (idx >= nmships)
306		idx = -1;
307
308	return (idx);
309}
310
311/*
312 * Find an IPv4 multicast source entry for this imo which matches
313 * the given group index for this socket, and source address.
314 *
315 * NOTE: This does not check if the entry is in-mode, merely if
316 * it exists, which may not be the desired behaviour.
317 */
318static struct in_msource *
319imo_match_source(const struct ip_moptions *imo, const size_t gidx,
320    const struct sockaddr *src)
321{
322	struct ip_msource	 find;
323	struct in_mfilter	*imf;
324	struct ip_msource	*ims;
325	const sockunion_t	*psa;
326
327	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
328	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
329	    ("%s: invalid index %d\n", __func__, (int)gidx));
330
331	/* The imo_mfilters array may be lazy allocated. */
332	if (imo->imo_mfilters == NULL)
333		return (NULL);
334	imf = &imo->imo_mfilters[gidx];
335
336	/* Source trees are keyed in host byte order. */
337	psa = (const sockunion_t *)src;
338	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
339	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
340
341	return ((struct in_msource *)ims);
342}
343
344/*
345 * Perform filtering for multicast datagrams on a socket by group and source.
346 *
347 * Returns 0 if a datagram should be allowed through, or various error codes
348 * if the socket was not a member of the group, or the source was muted, etc.
349 */
350int
351imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
352    const struct sockaddr *group, const struct sockaddr *src)
353{
354	size_t gidx;
355	struct in_msource *ims;
356	int mode;
357
358	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
359
360	gidx = imo_match_group(imo, ifp, group);
361	if (gidx == -1)
362		return (MCAST_NOTGMEMBER);
363
364	/*
365	 * Check if the source was included in an (S,G) join.
366	 * Allow reception on exclusive memberships by default,
367	 * reject reception on inclusive memberships by default.
368	 * Exclude source only if an in-mode exclude filter exists.
369	 * Include source only if an in-mode include filter exists.
370	 * NOTE: We are comparing group state here at IGMP t1 (now)
371	 * with socket-layer t0 (since last downcall).
372	 */
373	mode = imo->imo_mfilters[gidx].imf_st[1];
374	ims = imo_match_source(imo, gidx, src);
375
376	if ((ims == NULL && mode == MCAST_INCLUDE) ||
377	    (ims != NULL && ims->imsl_st[0] != mode))
378		return (MCAST_NOTSMEMBER);
379
380	return (MCAST_PASS);
381}
382
383/*
384 * Find and return a reference to an in_multi record for (ifp, group),
385 * and bump its reference count.
386 * If one does not exist, try to allocate it, and update link-layer multicast
387 * filters on ifp to listen for group.
388 * Assumes the IN_MULTI lock is held across the call.
389 * Return 0 if successful, otherwise return an appropriate error code.
390 */
391static int
392in_getmulti(struct ifnet *ifp, const struct in_addr *group,
393    struct in_multi **pinm)
394{
395	INIT_VNET_INET(ifp->if_vnet);
396	struct sockaddr_in	 gsin;
397	struct ifmultiaddr	*ifma;
398	struct in_ifinfo	*ii;
399	struct in_multi		*inm;
400	int error;
401
402#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
403	IFF_ASSERTGIANT(ifp);
404#endif
405	IN_MULTI_LOCK_ASSERT();
406
407	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
408
409	inm = inm_lookup(ifp, *group);
410	if (inm != NULL) {
411		/*
412		 * If we already joined this group, just bump the
413		 * refcount and return it.
414		 */
415		KASSERT(inm->inm_refcount >= 1,
416		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
417		++inm->inm_refcount;
418		*pinm = inm;
419		return (0);
420	}
421
422	memset(&gsin, 0, sizeof(gsin));
423	gsin.sin_family = AF_INET;
424	gsin.sin_len = sizeof(struct sockaddr_in);
425	gsin.sin_addr = *group;
426
427	/*
428	 * Check if a link-layer group is already associated
429	 * with this network-layer group on the given ifnet.
430	 */
431	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
432	if (error != 0)
433		return (error);
434
435	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
436	IF_ADDR_LOCK(ifp);
437
438	/*
439	 * If something other than netinet is occupying the link-layer
440	 * group, print a meaningful error message and back out of
441	 * the allocation.
442	 * Otherwise, bump the refcount on the existing network-layer
443	 * group association and return it.
444	 */
445	if (ifma->ifma_protospec != NULL) {
446		inm = (struct in_multi *)ifma->ifma_protospec;
447#ifdef INVARIANTS
448		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
449		    __func__));
450		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
451		    ("%s: ifma not AF_INET", __func__));
452		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
453		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
454		    !in_hosteq(inm->inm_addr, *group))
455			panic("%s: ifma %p is inconsistent with %p (%s)",
456			    __func__, ifma, inm, inet_ntoa(*group));
457#endif
458		++inm->inm_refcount;
459		*pinm = inm;
460		IF_ADDR_UNLOCK(ifp);
461		return (0);
462	}
463
464	IF_ADDR_LOCK_ASSERT(ifp);
465
466	/*
467	 * A new in_multi record is needed; allocate and initialize it.
468	 * We DO NOT perform an IGMP join as the in_ layer may need to
469	 * push an initial source list down to IGMP to support SSM.
470	 *
471	 * The initial source filter state is INCLUDE, {} as per the RFC.
472	 */
473	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
474	if (inm == NULL) {
475		if_delmulti_ifma(ifma);
476		IF_ADDR_UNLOCK(ifp);
477		return (ENOMEM);
478	}
479	inm->inm_addr = *group;
480	inm->inm_ifp = ifp;
481	inm->inm_igi = ii->ii_igmp;
482	inm->inm_ifma = ifma;
483	inm->inm_refcount = 1;
484	inm->inm_state = IGMP_NOT_MEMBER;
485
486	/*
487	 * Pending state-changes per group are subject to a bounds check.
488	 */
489	IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
490
491	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
492	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
493	RB_INIT(&inm->inm_srcs);
494
495	ifma->ifma_protospec = inm;
496
497	*pinm = inm;
498
499	IF_ADDR_UNLOCK(ifp);
500	return (0);
501}
502
503/*
504 * Drop a reference to an in_multi record.
505 *
506 * If the refcount drops to 0, free the in_multi record and
507 * delete the underlying link-layer membership.
508 */
509void
510inm_release_locked(struct in_multi *inm)
511{
512	struct ifmultiaddr *ifma;
513
514#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
515	if (!inm_is_ifp_detached(inm))
516		IFF_ASSERTGIANT(ifp);
517#endif
518
519	IN_MULTI_LOCK_ASSERT();
520
521	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
522
523	if (--inm->inm_refcount > 0) {
524		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
525		    inm->inm_refcount);
526		return;
527	}
528
529	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
530
531	ifma = inm->inm_ifma;
532
533	/* XXX this access is not covered by IF_ADDR_LOCK */
534	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
535	KASSERT(ifma->ifma_protospec == inm,
536	    ("%s: ifma_protospec != inm", __func__));
537	ifma->ifma_protospec = NULL;
538
539	inm_purge(inm);
540
541	free(inm, M_IPMADDR);
542
543	if_delmulti_ifma(ifma);
544}
545
546/*
547 * Clear recorded source entries for a group.
548 * Used by the IGMP code. Caller must hold the IN_MULTI lock.
549 * FIXME: Should reap.
550 */
551void
552inm_clear_recorded(struct in_multi *inm)
553{
554	struct ip_msource	*ims;
555
556	IN_MULTI_LOCK_ASSERT();
557
558	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
559		if (ims->ims_stp) {
560			ims->ims_stp = 0;
561			--inm->inm_st[1].iss_rec;
562		}
563	}
564	KASSERT(inm->inm_st[1].iss_rec == 0,
565	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
566}
567
568/*
569 * Record a source as pending for a Source-Group IGMPv3 query.
570 * This lives here as it modifies the shared tree.
571 *
572 * inm is the group descriptor.
573 * naddr is the address of the source to record in network-byte order.
574 *
575 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
576 * lazy-allocate a source node in response to an SG query.
577 * Otherwise, no allocation is performed. This saves some memory
578 * with the trade-off that the source will not be reported to the
579 * router if joined in the window between the query response and
580 * the group actually being joined on the local host.
581 *
582 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
583 * This turns off the allocation of a recorded source entry if
584 * the group has not been joined.
585 *
586 * Return 0 if the source didn't exist or was already marked as recorded.
587 * Return 1 if the source was marked as recorded by this function.
588 * Return <0 if any error occured (negated errno code).
589 */
590int
591inm_record_source(struct in_multi *inm, const in_addr_t naddr)
592{
593	struct ip_msource	 find;
594	struct ip_msource	*ims, *nims;
595
596	IN_MULTI_LOCK_ASSERT();
597
598	find.ims_haddr = ntohl(naddr);
599	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
600	if (ims && ims->ims_stp)
601		return (0);
602	if (ims == NULL) {
603		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
604			return (-ENOSPC);
605		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
606		    M_NOWAIT | M_ZERO);
607		if (nims == NULL)
608			return (-ENOMEM);
609		nims->ims_haddr = find.ims_haddr;
610		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
611		++inm->inm_nsrc;
612		ims = nims;
613	}
614
615	/*
616	 * Mark the source as recorded and update the recorded
617	 * source count.
618	 */
619	++ims->ims_stp;
620	++inm->inm_st[1].iss_rec;
621
622	return (1);
623}
624
625/*
626 * Return a pointer to an in_msource owned by an in_mfilter,
627 * given its source address.
628 * Lazy-allocate if needed. If this is a new entry its filter state is
629 * undefined at t0.
630 *
631 * imf is the filter set being modified.
632 * haddr is the source address in *host* byte-order.
633 *
634 * SMPng: May be called with locks held; malloc must not block.
635 */
636static int
637imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
638    struct in_msource **plims)
639{
640	struct ip_msource	 find;
641	struct ip_msource	*ims, *nims;
642	struct in_msource	*lims;
643	int			 error;
644
645	error = 0;
646	ims = NULL;
647	lims = NULL;
648
649	/* key is host byte order */
650	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
651	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
652	lims = (struct in_msource *)ims;
653	if (lims == NULL) {
654		if (imf->imf_nsrc == in_mcast_maxsocksrc)
655			return (ENOSPC);
656		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
657		    M_NOWAIT | M_ZERO);
658		if (nims == NULL)
659			return (ENOMEM);
660		lims = (struct in_msource *)nims;
661		lims->ims_haddr = find.ims_haddr;
662		lims->imsl_st[0] = MCAST_UNDEFINED;
663		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
664		++imf->imf_nsrc;
665	}
666
667	*plims = lims;
668
669	return (error);
670}
671
672/*
673 * Graft a source entry into an existing socket-layer filter set,
674 * maintaining any required invariants and checking allocations.
675 *
676 * The source is marked as being in the new filter mode at t1.
677 *
678 * Return the pointer to the new node, otherwise return NULL.
679 */
680static struct in_msource *
681imf_graft(struct in_mfilter *imf, const uint8_t st1,
682    const struct sockaddr_in *psin)
683{
684	struct ip_msource	*nims;
685	struct in_msource	*lims;
686
687	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
688	    M_NOWAIT | M_ZERO);
689	if (nims == NULL)
690		return (NULL);
691	lims = (struct in_msource *)nims;
692	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
693	lims->imsl_st[0] = MCAST_UNDEFINED;
694	lims->imsl_st[1] = st1;
695	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
696	++imf->imf_nsrc;
697
698	return (lims);
699}
700
701/*
702 * Prune a source entry from an existing socket-layer filter set,
703 * maintaining any required invariants and checking allocations.
704 *
705 * The source is marked as being left at t1, it is not freed.
706 *
707 * Return 0 if no error occurred, otherwise return an errno value.
708 */
709static int
710imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
711{
712	struct ip_msource	 find;
713	struct ip_msource	*ims;
714	struct in_msource	*lims;
715
716	/* key is host byte order */
717	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
718	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
719	if (ims == NULL)
720		return (ENOENT);
721	lims = (struct in_msource *)ims;
722	lims->imsl_st[1] = MCAST_UNDEFINED;
723	return (0);
724}
725
726/*
727 * Revert socket-layer filter set deltas at t1 to t0 state.
728 */
729static void
730imf_rollback(struct in_mfilter *imf)
731{
732	struct ip_msource	*ims, *tims;
733	struct in_msource	*lims;
734
735	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
736		lims = (struct in_msource *)ims;
737		if (lims->imsl_st[0] == lims->imsl_st[1]) {
738			/* no change at t1 */
739			continue;
740		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
741			/* revert change to existing source at t1 */
742			lims->imsl_st[1] = lims->imsl_st[0];
743		} else {
744			/* revert source added t1 */
745			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
746			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
747			free(ims, M_INMFILTER);
748			imf->imf_nsrc--;
749		}
750	}
751	imf->imf_st[1] = imf->imf_st[0];
752}
753
754/*
755 * Mark socket-layer filter set as INCLUDE {} at t1.
756 */
757static void
758imf_leave(struct in_mfilter *imf)
759{
760	struct ip_msource	*ims;
761	struct in_msource	*lims;
762
763	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
764		lims = (struct in_msource *)ims;
765		lims->imsl_st[1] = MCAST_UNDEFINED;
766	}
767	imf->imf_st[1] = MCAST_INCLUDE;
768}
769
770/*
771 * Mark socket-layer filter set deltas as committed.
772 */
773static void
774imf_commit(struct in_mfilter *imf)
775{
776	struct ip_msource	*ims;
777	struct in_msource	*lims;
778
779	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
780		lims = (struct in_msource *)ims;
781		lims->imsl_st[0] = lims->imsl_st[1];
782	}
783	imf->imf_st[0] = imf->imf_st[1];
784}
785
786/*
787 * Reap unreferenced sources from socket-layer filter set.
788 */
789static void
790imf_reap(struct in_mfilter *imf)
791{
792	struct ip_msource	*ims, *tims;
793	struct in_msource	*lims;
794
795	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
796		lims = (struct in_msource *)ims;
797		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
798		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
799			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
800			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
801			free(ims, M_INMFILTER);
802			imf->imf_nsrc--;
803		}
804	}
805}
806
807/*
808 * Purge socket-layer filter set.
809 */
810static void
811imf_purge(struct in_mfilter *imf)
812{
813	struct ip_msource	*ims, *tims;
814
815	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
816		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
817		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
818		free(ims, M_INMFILTER);
819		imf->imf_nsrc--;
820	}
821	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
822	KASSERT(RB_EMPTY(&imf->imf_sources),
823	    ("%s: imf_sources not empty", __func__));
824}
825
826/*
827 * Look up a source filter entry for a multicast group.
828 *
829 * inm is the group descriptor to work with.
830 * haddr is the host-byte-order IPv4 address to look up.
831 * noalloc may be non-zero to suppress allocation of sources.
832 * *pims will be set to the address of the retrieved or allocated source.
833 *
834 * SMPng: NOTE: may be called with locks held.
835 * Return 0 if successful, otherwise return a non-zero error code.
836 */
837static int
838inm_get_source(struct in_multi *inm, const in_addr_t haddr,
839    const int noalloc, struct ip_msource **pims)
840{
841	struct ip_msource	 find;
842	struct ip_msource	*ims, *nims;
843#ifdef KTR
844	struct in_addr ia;
845#endif
846
847	find.ims_haddr = haddr;
848	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
849	if (ims == NULL && !noalloc) {
850		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
851			return (ENOSPC);
852		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
853		    M_NOWAIT | M_ZERO);
854		if (nims == NULL)
855			return (ENOMEM);
856		nims->ims_haddr = haddr;
857		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
858		++inm->inm_nsrc;
859		ims = nims;
860#ifdef KTR
861		ia.s_addr = htonl(haddr);
862		CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__,
863		    inet_ntoa(ia), ims);
864#endif
865	}
866
867	*pims = ims;
868	return (0);
869}
870
871/*
872 * Merge socket-layer source into IGMP-layer source.
873 * If rollback is non-zero, perform the inverse of the merge.
874 */
875static void
876ims_merge(struct ip_msource *ims, const struct in_msource *lims,
877    const int rollback)
878{
879	int n = rollback ? -1 : 1;
880#ifdef KTR
881	struct in_addr ia;
882
883	ia.s_addr = htonl(ims->ims_haddr);
884#endif
885
886	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
887		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s",
888		    __func__, n, inet_ntoa(ia));
889		ims->ims_st[1].ex -= n;
890	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
891		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s",
892		    __func__, n, inet_ntoa(ia));
893		ims->ims_st[1].in -= n;
894	}
895
896	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
897		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s",
898		    __func__, n, inet_ntoa(ia));
899		ims->ims_st[1].ex += n;
900	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
901		CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s",
902		    __func__, n, inet_ntoa(ia));
903		ims->ims_st[1].in += n;
904	}
905}
906
907/*
908 * Atomically update the global in_multi state, when a membership's
909 * filter list is being updated in any way.
910 *
911 * imf is the per-inpcb-membership group filter pointer.
912 * A fake imf may be passed for in-kernel consumers.
913 *
914 * XXX This is a candidate for a set-symmetric-difference style loop
915 * which would eliminate the repeated lookup from root of ims nodes,
916 * as they share the same key space.
917 *
918 * If any error occurred this function will back out of refcounts
919 * and return a non-zero value.
920 */
921static int
922inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
923{
924	struct ip_msource	*ims, *nims;
925	struct in_msource	*lims;
926	int			 schanged, error;
927	int			 nsrc0, nsrc1;
928
929	schanged = 0;
930	error = 0;
931	nsrc1 = nsrc0 = 0;
932
933	/*
934	 * Update the source filters first, as this may fail.
935	 * Maintain count of in-mode filters at t0, t1. These are
936	 * used to work out if we transition into ASM mode or not.
937	 * Maintain a count of source filters whose state was
938	 * actually modified by this operation.
939	 */
940	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
941		lims = (struct in_msource *)ims;
942		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
943		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
944		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
945		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
946		++schanged;
947		if (error)
948			break;
949		ims_merge(nims, lims, 0);
950	}
951	if (error) {
952		struct ip_msource *bims;
953
954		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
955			lims = (struct in_msource *)ims;
956			if (lims->imsl_st[0] == lims->imsl_st[1])
957				continue;
958			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
959			if (bims == NULL)
960				continue;
961			ims_merge(bims, lims, 1);
962		}
963		goto out_reap;
964	}
965
966	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
967	    __func__, nsrc0, nsrc1);
968
969	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
970	if (imf->imf_st[0] == imf->imf_st[1] &&
971	    imf->imf_st[1] == MCAST_INCLUDE) {
972		if (nsrc1 == 0) {
973			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
974			--inm->inm_st[1].iss_in;
975		}
976	}
977
978	/* Handle filter mode transition on socket. */
979	if (imf->imf_st[0] != imf->imf_st[1]) {
980		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
981		    __func__, imf->imf_st[0], imf->imf_st[1]);
982
983		if (imf->imf_st[0] == MCAST_EXCLUDE) {
984			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
985			--inm->inm_st[1].iss_ex;
986		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
987			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
988			--inm->inm_st[1].iss_in;
989		}
990
991		if (imf->imf_st[1] == MCAST_EXCLUDE) {
992			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
993			inm->inm_st[1].iss_ex++;
994		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
995			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
996			inm->inm_st[1].iss_in++;
997		}
998	}
999
1000	/*
1001	 * Track inm filter state in terms of listener counts.
1002	 * If there are any exclusive listeners, stack-wide
1003	 * membership is exclusive.
1004	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1005	 * If no listeners remain, state is undefined at t1,
1006	 * and the IGMP lifecycle for this group should finish.
1007	 */
1008	if (inm->inm_st[1].iss_ex > 0) {
1009		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
1010		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1011	} else if (inm->inm_st[1].iss_in > 0) {
1012		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1013		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1014	} else {
1015		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1016		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1017	}
1018
1019	/* Decrement ASM listener count on transition out of ASM mode. */
1020	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1021		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1022		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0))
1023			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1024			--inm->inm_st[1].iss_asm;
1025	}
1026
1027	/* Increment ASM listener count on transition to ASM mode. */
1028	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1029		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1030		inm->inm_st[1].iss_asm++;
1031	}
1032
1033	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1034	inm_print(inm);
1035
1036out_reap:
1037	if (schanged > 0) {
1038		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1039		inm_reap(inm);
1040	}
1041	return (error);
1042}
1043
1044/*
1045 * Mark an in_multi's filter set deltas as committed.
1046 * Called by IGMP after a state change has been enqueued.
1047 */
1048void
1049inm_commit(struct in_multi *inm)
1050{
1051	struct ip_msource	*ims;
1052
1053	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1054	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1055	inm_print(inm);
1056
1057	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1058		ims->ims_st[0] = ims->ims_st[1];
1059	}
1060	inm->inm_st[0] = inm->inm_st[1];
1061}
1062
1063/*
1064 * Reap unreferenced nodes from an in_multi's filter set.
1065 */
1066static void
1067inm_reap(struct in_multi *inm)
1068{
1069	struct ip_msource	*ims, *tims;
1070
1071	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1072		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1073		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1074		    ims->ims_stp != 0)
1075			continue;
1076		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1077		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1078		free(ims, M_IPMSOURCE);
1079		inm->inm_nsrc--;
1080	}
1081}
1082
1083/*
1084 * Purge all source nodes from an in_multi's filter set.
1085 */
1086static void
1087inm_purge(struct in_multi *inm)
1088{
1089	struct ip_msource	*ims, *tims;
1090
1091	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1092		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1093		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1094		free(ims, M_IPMSOURCE);
1095		inm->inm_nsrc--;
1096	}
1097}
1098
1099/*
1100 * Join a multicast group; unlocked entry point.
1101 *
1102 * SMPng: XXX: in_joingroup() is called from in_control() when Giant
1103 * is not held. Fortunately, ifp is unlikely to have been detached
1104 * at this point, so we assume it's OK to recurse.
1105 */
1106int
1107in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1108    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1109{
1110	int error;
1111
1112	IN_MULTI_LOCK();
1113	error = in_joingroup_locked(ifp, gina, imf, pinm);
1114	IN_MULTI_UNLOCK();
1115
1116	return (error);
1117}
1118
1119/*
1120 * Join a multicast group; real entry point.
1121 *
1122 * Only preserves atomicity at inm level.
1123 * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1124 *
1125 * If the IGMP downcall fails, the group is not joined, and an error
1126 * code is returned.
1127 */
1128int
1129in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1130    /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1131{
1132	struct in_mfilter	 timf;
1133	struct in_multi		*inm;
1134	int			 error;
1135
1136	IN_MULTI_LOCK_ASSERT();
1137
1138	CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__,
1139	    inet_ntoa(*gina), ifp, ifp->if_xname);
1140
1141	error = 0;
1142	inm = NULL;
1143
1144	/*
1145	 * If no imf was specified (i.e. kernel consumer),
1146	 * fake one up and assume it is an ASM join.
1147	 */
1148	if (imf == NULL) {
1149		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1150		imf = &timf;
1151	}
1152
1153	error = in_getmulti(ifp, gina, &inm);
1154	if (error) {
1155		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1156		return (error);
1157	}
1158
1159	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1160	error = inm_merge(inm, imf);
1161	if (error) {
1162		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1163		goto out_inm_release;
1164	}
1165
1166	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1167	error = igmp_change_state(inm);
1168	if (error) {
1169		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1170		goto out_inm_release;
1171	}
1172
1173out_inm_release:
1174	if (error) {
1175		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1176		inm_release_locked(inm);
1177	} else {
1178		*pinm = inm;
1179	}
1180
1181	return (error);
1182}
1183
1184/*
1185 * Leave a multicast group; unlocked entry point.
1186 */
1187int
1188in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1189{
1190	struct ifnet *ifp;
1191	int error;
1192
1193	ifp = inm->inm_ifp;
1194
1195	IN_MULTI_LOCK();
1196	error = in_leavegroup_locked(inm, imf);
1197	IN_MULTI_UNLOCK();
1198
1199	return (error);
1200}
1201
1202/*
1203 * Leave a multicast group; real entry point.
1204 * All source filters will be expunged.
1205 *
1206 * Only preserves atomicity at inm level.
1207 *
1208 * Holding the write lock for the INP which contains imf
1209 * is highly advisable. We can't assert for it as imf does not
1210 * contain a back-pointer to the owning inp.
1211 *
1212 * Note: This is not the same as inm_release(*) as this function also
1213 * makes a state change downcall into IGMP.
1214 */
1215int
1216in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1217{
1218	struct in_mfilter	 timf;
1219	int			 error;
1220
1221	error = 0;
1222
1223#if defined(INVARIANTS) && defined(IFF_ASSERTGIANT)
1224	if (!inm_is_ifp_detached(inm))
1225		IFF_ASSERTGIANT(inm->inm_ifp);
1226#endif
1227
1228	IN_MULTI_LOCK_ASSERT();
1229
1230	CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__,
1231	    inm, inet_ntoa(inm->inm_addr),
1232	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1233	    imf);
1234
1235	/*
1236	 * If no imf was specified (i.e. kernel consumer),
1237	 * fake one up and assume it is an ASM join.
1238	 */
1239	if (imf == NULL) {
1240		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1241		imf = &timf;
1242	}
1243
1244	/*
1245	 * Begin state merge transaction at IGMP layer.
1246	 *
1247	 * As this particular invocation should not cause any memory
1248	 * to be allocated, and there is no opportunity to roll back
1249	 * the transaction, it MUST NOT fail.
1250	 */
1251	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1252	error = inm_merge(inm, imf);
1253	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1254
1255	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1256	error = igmp_change_state(inm);
1257	if (error)
1258		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1259
1260	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1261	inm_release_locked(inm);
1262
1263	return (error);
1264}
1265
1266/*#ifndef BURN_BRIDGES*/
1267/*
1268 * Join an IPv4 multicast group in (*,G) exclusive mode.
1269 * The group must be a 224.0.0.0/24 link-scope group.
1270 * This KPI is for legacy kernel consumers only.
1271 */
1272struct in_multi *
1273in_addmulti(struct in_addr *ap, struct ifnet *ifp)
1274{
1275	struct in_multi *pinm;
1276	int error;
1277
1278	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
1279	    ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap)));
1280
1281	error = in_joingroup(ifp, ap, NULL, &pinm);
1282	if (error != 0)
1283		pinm = NULL;
1284
1285	return (pinm);
1286}
1287
1288/*
1289 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
1290 * This KPI is for legacy kernel consumers only.
1291 */
1292void
1293in_delmulti(struct in_multi *inm)
1294{
1295
1296	(void)in_leavegroup(inm, NULL);
1297}
1298/*#endif*/
1299
1300/*
1301 * Block or unblock an ASM multicast source on an inpcb.
1302 * This implements the delta-based API described in RFC 3678.
1303 *
1304 * The delta-based API applies only to exclusive-mode memberships.
1305 * An IGMP downcall will be performed.
1306 *
1307 * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1308 *
1309 * Return 0 if successful, otherwise return an appropriate error code.
1310 */
1311static int
1312inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1313{
1314	INIT_VNET_NET(curvnet);
1315	INIT_VNET_INET(curvnet);
1316	struct group_source_req		 gsr;
1317	sockunion_t			*gsa, *ssa;
1318	struct ifnet			*ifp;
1319	struct in_mfilter		*imf;
1320	struct ip_moptions		*imo;
1321	struct in_msource		*ims;
1322	struct in_multi			*inm;
1323	size_t				 idx;
1324	uint16_t			 fmode;
1325	int				 error, doblock;
1326
1327	ifp = NULL;
1328	error = 0;
1329	doblock = 0;
1330
1331	memset(&gsr, 0, sizeof(struct group_source_req));
1332	gsa = (sockunion_t *)&gsr.gsr_group;
1333	ssa = (sockunion_t *)&gsr.gsr_source;
1334
1335	switch (sopt->sopt_name) {
1336	case IP_BLOCK_SOURCE:
1337	case IP_UNBLOCK_SOURCE: {
1338		struct ip_mreq_source	 mreqs;
1339
1340		error = sooptcopyin(sopt, &mreqs,
1341		    sizeof(struct ip_mreq_source),
1342		    sizeof(struct ip_mreq_source));
1343		if (error)
1344			return (error);
1345
1346		gsa->sin.sin_family = AF_INET;
1347		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1348		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1349
1350		ssa->sin.sin_family = AF_INET;
1351		ssa->sin.sin_len = sizeof(struct sockaddr_in);
1352		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1353
1354		if (!in_nullhost(mreqs.imr_interface))
1355			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1356
1357		if (sopt->sopt_name == IP_BLOCK_SOURCE)
1358			doblock = 1;
1359
1360		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1361		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1362		break;
1363	    }
1364
1365	case MCAST_BLOCK_SOURCE:
1366	case MCAST_UNBLOCK_SOURCE:
1367		error = sooptcopyin(sopt, &gsr,
1368		    sizeof(struct group_source_req),
1369		    sizeof(struct group_source_req));
1370		if (error)
1371			return (error);
1372
1373		if (gsa->sin.sin_family != AF_INET ||
1374		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1375			return (EINVAL);
1376
1377		if (ssa->sin.sin_family != AF_INET ||
1378		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1379			return (EINVAL);
1380
1381		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1382			return (EADDRNOTAVAIL);
1383
1384		ifp = ifnet_byindex(gsr.gsr_interface);
1385
1386		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1387			doblock = 1;
1388		break;
1389
1390	default:
1391		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1392		    __func__, sopt->sopt_name);
1393		return (EOPNOTSUPP);
1394		break;
1395	}
1396
1397	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1398		return (EINVAL);
1399
1400	/*
1401	 * Check if we are actually a member of this group.
1402	 */
1403	imo = inp_findmoptions(inp);
1404	idx = imo_match_group(imo, ifp, &gsa->sa);
1405	if (idx == -1 || imo->imo_mfilters == NULL) {
1406		error = EADDRNOTAVAIL;
1407		goto out_inp_locked;
1408	}
1409
1410	KASSERT(imo->imo_mfilters != NULL,
1411	    ("%s: imo_mfilters not allocated", __func__));
1412	imf = &imo->imo_mfilters[idx];
1413	inm = imo->imo_membership[idx];
1414
1415	/*
1416	 * Attempting to use the delta-based API on an
1417	 * non exclusive-mode membership is an error.
1418	 */
1419	fmode = imf->imf_st[0];
1420	if (fmode != MCAST_EXCLUDE) {
1421		error = EINVAL;
1422		goto out_inp_locked;
1423	}
1424
1425	/*
1426	 * Deal with error cases up-front:
1427	 *  Asked to block, but already blocked; or
1428	 *  Asked to unblock, but nothing to unblock.
1429	 * If adding a new block entry, allocate it.
1430	 */
1431	ims = imo_match_source(imo, idx, &ssa->sa);
1432	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1433		CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
1434		    inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ");
1435		error = EADDRNOTAVAIL;
1436		goto out_inp_locked;
1437	}
1438
1439	INP_WLOCK_ASSERT(inp);
1440
1441	/*
1442	 * Begin state merge transaction at socket layer.
1443	 */
1444	if (doblock) {
1445		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1446		ims = imf_graft(imf, fmode, &ssa->sin);
1447		if (ims == NULL)
1448			error = ENOMEM;
1449	} else {
1450		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1451		error = imf_prune(imf, &ssa->sin);
1452	}
1453
1454	if (error) {
1455		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1456		goto out_imf_rollback;
1457	}
1458
1459	/*
1460	 * Begin state merge transaction at IGMP layer.
1461	 */
1462	IN_MULTI_LOCK();
1463
1464	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1465	error = inm_merge(inm, imf);
1466	if (error) {
1467		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1468		goto out_imf_rollback;
1469	}
1470
1471	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1472	error = igmp_change_state(inm);
1473	if (error)
1474		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1475
1476	IN_MULTI_UNLOCK();
1477
1478out_imf_rollback:
1479	if (error)
1480		imf_rollback(imf);
1481	else
1482		imf_commit(imf);
1483
1484	imf_reap(imf);
1485
1486out_inp_locked:
1487	INP_WUNLOCK(inp);
1488	return (error);
1489}
1490
1491/*
1492 * Given an inpcb, return its multicast options structure pointer.  Accepts
1493 * an unlocked inpcb pointer, but will return it locked.  May sleep.
1494 *
1495 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1496 * SMPng: NOTE: Returns with the INP write lock held.
1497 */
1498static struct ip_moptions *
1499inp_findmoptions(struct inpcb *inp)
1500{
1501	struct ip_moptions	 *imo;
1502	struct in_multi		**immp;
1503	struct in_mfilter	 *imfp;
1504	size_t			  idx;
1505
1506	INP_WLOCK(inp);
1507	if (inp->inp_moptions != NULL)
1508		return (inp->inp_moptions);
1509
1510	INP_WUNLOCK(inp);
1511
1512	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1513	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
1514	    M_WAITOK | M_ZERO);
1515	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
1516	    M_INMFILTER, M_WAITOK);
1517
1518	imo->imo_multicast_ifp = NULL;
1519	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1520	imo->imo_multicast_vif = -1;
1521	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1522	imo->imo_multicast_loop = in_mcast_loop;
1523	imo->imo_num_memberships = 0;
1524	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1525	imo->imo_membership = immp;
1526
1527	/* Initialize per-group source filters. */
1528	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
1529		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1530	imo->imo_mfilters = imfp;
1531
1532	INP_WLOCK(inp);
1533	if (inp->inp_moptions != NULL) {
1534		free(imfp, M_INMFILTER);
1535		free(immp, M_IPMOPTS);
1536		free(imo, M_IPMOPTS);
1537		return (inp->inp_moptions);
1538	}
1539	inp->inp_moptions = imo;
1540	return (imo);
1541}
1542
1543/*
1544 * Discard the IP multicast options (and source filters).
1545 *
1546 * SMPng: NOTE: assumes INP write lock is held.
1547 */
1548void
1549inp_freemoptions(struct ip_moptions *imo)
1550{
1551	struct in_mfilter	*imf;
1552	size_t			 idx, nmships;
1553
1554	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
1555
1556	nmships = imo->imo_num_memberships;
1557	for (idx = 0; idx < nmships; ++idx) {
1558		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
1559		if (imf)
1560			imf_leave(imf);
1561		(void)in_leavegroup(imo->imo_membership[idx], imf);
1562		if (imf)
1563			imf_purge(imf);
1564	}
1565
1566	if (imo->imo_mfilters)
1567		free(imo->imo_mfilters, M_INMFILTER);
1568	free(imo->imo_membership, M_IPMOPTS);
1569	free(imo, M_IPMOPTS);
1570}
1571
1572/*
1573 * Atomically get source filters on a socket for an IPv4 multicast group.
1574 * Called with INP lock held; returns with lock released.
1575 */
1576static int
1577inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1578{
1579	INIT_VNET_NET(curvnet);
1580	struct __msfilterreq	 msfr;
1581	sockunion_t		*gsa;
1582	struct ifnet		*ifp;
1583	struct ip_moptions	*imo;
1584	struct in_mfilter	*imf;
1585	struct ip_msource	*ims;
1586	struct in_msource	*lims;
1587	struct sockaddr_in	*psin;
1588	struct sockaddr_storage	*ptss;
1589	struct sockaddr_storage	*tss;
1590	int			 error;
1591	size_t			 idx, nsrcs, ncsrcs;
1592
1593	INP_WLOCK_ASSERT(inp);
1594
1595	imo = inp->inp_moptions;
1596	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1597
1598	INP_WUNLOCK(inp);
1599
1600	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1601	    sizeof(struct __msfilterreq));
1602	if (error)
1603		return (error);
1604
1605	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1606		return (EINVAL);
1607
1608	ifp = ifnet_byindex(msfr.msfr_ifindex);
1609	if (ifp == NULL)
1610		return (EINVAL);
1611
1612	INP_WLOCK(inp);
1613
1614	/*
1615	 * Lookup group on the socket.
1616	 */
1617	gsa = (sockunion_t *)&msfr.msfr_group;
1618	idx = imo_match_group(imo, ifp, &gsa->sa);
1619	if (idx == -1 || imo->imo_mfilters == NULL) {
1620		INP_WUNLOCK(inp);
1621		return (EADDRNOTAVAIL);
1622	}
1623	imf = &imo->imo_mfilters[idx];
1624
1625	/*
1626	 * Ignore memberships which are in limbo.
1627	 */
1628	if (imf->imf_st[1] == MCAST_UNDEFINED) {
1629		INP_WUNLOCK(inp);
1630		return (EAGAIN);
1631	}
1632	msfr.msfr_fmode = imf->imf_st[1];
1633
1634	/*
1635	 * If the user specified a buffer, copy out the source filter
1636	 * entries to userland gracefully.
1637	 * We only copy out the number of entries which userland
1638	 * has asked for, but we always tell userland how big the
1639	 * buffer really needs to be.
1640	 */
1641	tss = NULL;
1642	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1643		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1644		    M_TEMP, M_NOWAIT | M_ZERO);
1645		if (tss == NULL) {
1646			INP_WUNLOCK(inp);
1647			return (ENOBUFS);
1648		}
1649	}
1650
1651	/*
1652	 * Count number of sources in-mode at t0.
1653	 * If buffer space exists and remains, copy out source entries.
1654	 */
1655	nsrcs = msfr.msfr_nsrcs;
1656	ncsrcs = 0;
1657	ptss = tss;
1658	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1659		lims = (struct in_msource *)ims;
1660		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1661		    lims->imsl_st[0] != imf->imf_st[0])
1662			continue;
1663		++ncsrcs;
1664		if (tss != NULL && nsrcs-- > 0) {
1665			psin = (struct sockaddr_in *)ptss++;
1666			psin->sin_family = AF_INET;
1667			psin->sin_len = sizeof(struct sockaddr_in);
1668			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1669		}
1670	}
1671
1672	INP_WUNLOCK(inp);
1673
1674	if (tss != NULL) {
1675		error = copyout(tss, msfr.msfr_srcs,
1676		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1677		free(tss, M_TEMP);
1678		if (error)
1679			return (error);
1680	}
1681
1682	msfr.msfr_nsrcs = ncsrcs;
1683	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1684
1685	return (error);
1686}
1687
1688/*
1689 * Return the IP multicast options in response to user getsockopt().
1690 */
1691int
1692inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1693{
1694	INIT_VNET_INET(curvnet);
1695	struct ip_mreqn		 mreqn;
1696	struct ip_moptions	*imo;
1697	struct ifnet		*ifp;
1698	struct in_ifaddr	*ia;
1699	int			 error, optval;
1700	u_char			 coptval;
1701
1702	INP_WLOCK(inp);
1703	imo = inp->inp_moptions;
1704	/*
1705	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1706	 * or is a divert socket, reject it.
1707	 */
1708	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1709	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1710	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1711		INP_WUNLOCK(inp);
1712		return (EOPNOTSUPP);
1713	}
1714
1715	error = 0;
1716	switch (sopt->sopt_name) {
1717	case IP_MULTICAST_VIF:
1718		if (imo != NULL)
1719			optval = imo->imo_multicast_vif;
1720		else
1721			optval = -1;
1722		INP_WUNLOCK(inp);
1723		error = sooptcopyout(sopt, &optval, sizeof(int));
1724		break;
1725
1726	case IP_MULTICAST_IF:
1727		memset(&mreqn, 0, sizeof(struct ip_mreqn));
1728		if (imo != NULL) {
1729			ifp = imo->imo_multicast_ifp;
1730			if (!in_nullhost(imo->imo_multicast_addr)) {
1731				mreqn.imr_address = imo->imo_multicast_addr;
1732			} else if (ifp != NULL) {
1733				mreqn.imr_ifindex = ifp->if_index;
1734				IFP_TO_IA(ifp, ia);
1735				if (ia != NULL) {
1736					mreqn.imr_address =
1737					    IA_SIN(ia)->sin_addr;
1738				}
1739			}
1740		}
1741		INP_WUNLOCK(inp);
1742		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1743			error = sooptcopyout(sopt, &mreqn,
1744			    sizeof(struct ip_mreqn));
1745		} else {
1746			error = sooptcopyout(sopt, &mreqn.imr_address,
1747			    sizeof(struct in_addr));
1748		}
1749		break;
1750
1751	case IP_MULTICAST_TTL:
1752		if (imo == 0)
1753			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1754		else
1755			optval = coptval = imo->imo_multicast_ttl;
1756		INP_WUNLOCK(inp);
1757		if (sopt->sopt_valsize == sizeof(u_char))
1758			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1759		else
1760			error = sooptcopyout(sopt, &optval, sizeof(int));
1761		break;
1762
1763	case IP_MULTICAST_LOOP:
1764		if (imo == 0)
1765			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1766		else
1767			optval = coptval = imo->imo_multicast_loop;
1768		INP_WUNLOCK(inp);
1769		if (sopt->sopt_valsize == sizeof(u_char))
1770			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1771		else
1772			error = sooptcopyout(sopt, &optval, sizeof(int));
1773		break;
1774
1775	case IP_MSFILTER:
1776		if (imo == NULL) {
1777			error = EADDRNOTAVAIL;
1778			INP_WUNLOCK(inp);
1779		} else {
1780			error = inp_get_source_filters(inp, sopt);
1781		}
1782		break;
1783
1784	default:
1785		INP_WUNLOCK(inp);
1786		error = ENOPROTOOPT;
1787		break;
1788	}
1789
1790	INP_UNLOCK_ASSERT(inp);
1791
1792	return (error);
1793}
1794
1795/*
1796 * Look up the ifnet to use for a multicast group membership,
1797 * given the IPv4 address of an interface, and the IPv4 group address.
1798 *
1799 * This routine exists to support legacy multicast applications
1800 * which do not understand that multicast memberships are scoped to
1801 * specific physical links in the networking stack, or which need
1802 * to join link-scope groups before IPv4 addresses are configured.
1803 *
1804 * If inp is non-NULL, use this socket's current FIB number for any
1805 * required FIB lookup.
1806 * If ina is INADDR_ANY, look up the group address in the unicast FIB,
1807 * and use its ifp; usually, this points to the default next-hop.
1808 *
1809 * If the FIB lookup fails, attempt to use the first non-loopback
1810 * interface with multicast capability in the system as a
1811 * last resort. The legacy IPv4 ASM API requires that we do
1812 * this in order to allow groups to be joined when the routing
1813 * table has not yet been populated during boot.
1814 *
1815 * Returns NULL if no ifp could be found.
1816 *
1817 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
1818 * FUTURE: Implement IPv4 source-address selection.
1819 */
1820static struct ifnet *
1821inp_lookup_mcast_ifp(const struct inpcb *inp,
1822    const struct sockaddr_in *gsin, const struct in_addr ina)
1823{
1824	struct ifnet *ifp;
1825
1826	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
1827	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
1828	    ("%s: not multicast", __func__));
1829
1830	ifp = NULL;
1831	if (!in_nullhost(ina)) {
1832		INADDR_TO_IFP(ina, ifp);
1833	} else {
1834		struct route ro;
1835
1836		ro.ro_rt = NULL;
1837		memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in));
1838		in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0);
1839		if (ro.ro_rt != NULL) {
1840			ifp = ro.ro_rt->rt_ifp;
1841			KASSERT(ifp != NULL, ("%s: null ifp", __func__));
1842			RTFREE(ro.ro_rt);
1843		} else {
1844			struct in_ifaddr *ia;
1845			struct ifnet *mifp;
1846
1847			mifp = NULL;
1848			TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1849				mifp = ia->ia_ifp;
1850				if (!(mifp->if_flags & IFF_LOOPBACK) &&
1851				     (mifp->if_flags & IFF_MULTICAST)) {
1852					ifp = mifp;
1853					break;
1854				}
1855			}
1856		}
1857	}
1858
1859	return (ifp);
1860}
1861
1862/*
1863 * Join an IPv4 multicast group, possibly with a source.
1864 */
1865static int
1866inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1867{
1868	INIT_VNET_NET(curvnet);
1869	INIT_VNET_INET(curvnet);
1870	struct group_source_req		 gsr;
1871	sockunion_t			*gsa, *ssa;
1872	struct ifnet			*ifp;
1873	struct in_mfilter		*imf;
1874	struct ip_moptions		*imo;
1875	struct in_multi			*inm;
1876	struct in_msource		*lims;
1877	size_t				 idx;
1878	int				 error, is_new;
1879
1880	ifp = NULL;
1881	imf = NULL;
1882	error = 0;
1883	is_new = 0;
1884
1885	memset(&gsr, 0, sizeof(struct group_source_req));
1886	gsa = (sockunion_t *)&gsr.gsr_group;
1887	gsa->ss.ss_family = AF_UNSPEC;
1888	ssa = (sockunion_t *)&gsr.gsr_source;
1889	ssa->ss.ss_family = AF_UNSPEC;
1890
1891	switch (sopt->sopt_name) {
1892	case IP_ADD_MEMBERSHIP:
1893	case IP_ADD_SOURCE_MEMBERSHIP: {
1894		struct ip_mreq_source	 mreqs;
1895
1896		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
1897			error = sooptcopyin(sopt, &mreqs,
1898			    sizeof(struct ip_mreq),
1899			    sizeof(struct ip_mreq));
1900			/*
1901			 * Do argument switcharoo from ip_mreq into
1902			 * ip_mreq_source to avoid using two instances.
1903			 */
1904			mreqs.imr_interface = mreqs.imr_sourceaddr;
1905			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1906		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1907			error = sooptcopyin(sopt, &mreqs,
1908			    sizeof(struct ip_mreq_source),
1909			    sizeof(struct ip_mreq_source));
1910		}
1911		if (error)
1912			return (error);
1913
1914		gsa->sin.sin_family = AF_INET;
1915		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1916		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1917
1918		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1919			ssa->sin.sin_family = AF_INET;
1920			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1921			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1922		}
1923
1924		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
1925		    mreqs.imr_interface);
1926		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
1927		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1928		break;
1929	}
1930
1931	case MCAST_JOIN_GROUP:
1932	case MCAST_JOIN_SOURCE_GROUP:
1933		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1934			error = sooptcopyin(sopt, &gsr,
1935			    sizeof(struct group_req),
1936			    sizeof(struct group_req));
1937		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1938			error = sooptcopyin(sopt, &gsr,
1939			    sizeof(struct group_source_req),
1940			    sizeof(struct group_source_req));
1941		}
1942		if (error)
1943			return (error);
1944
1945		if (gsa->sin.sin_family != AF_INET ||
1946		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1947			return (EINVAL);
1948
1949		/*
1950		 * Overwrite the port field if present, as the sockaddr
1951		 * being copied in may be matched with a binary comparison.
1952		 */
1953		gsa->sin.sin_port = 0;
1954		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1955			if (ssa->sin.sin_family != AF_INET ||
1956			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1957				return (EINVAL);
1958			ssa->sin.sin_port = 0;
1959		}
1960
1961		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1962			return (EADDRNOTAVAIL);
1963		ifp = ifnet_byindex(gsr.gsr_interface);
1964		break;
1965
1966	default:
1967		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1968		    __func__, sopt->sopt_name);
1969		return (EOPNOTSUPP);
1970		break;
1971	}
1972
1973	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1974		return (EINVAL);
1975
1976	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1977		return (EADDRNOTAVAIL);
1978
1979	/*
1980	 * MCAST_JOIN_SOURCE on an exclusive membership is an error.
1981	 * On an existing inclusive membership, it just adds the
1982	 * source to the filter list.
1983	 */
1984	imo = inp_findmoptions(inp);
1985	idx = imo_match_group(imo, ifp, &gsa->sa);
1986	if (idx == -1) {
1987		is_new = 1;
1988	} else {
1989		inm = imo->imo_membership[idx];
1990		imf = &imo->imo_mfilters[idx];
1991		if (ssa->ss.ss_family != AF_UNSPEC &&
1992		    imf->imf_st[1] != MCAST_INCLUDE) {
1993			error = EINVAL;
1994			goto out_inp_locked;
1995		}
1996		lims = imo_match_source(imo, idx, &ssa->sa);
1997		if (lims != NULL) {
1998			error = EADDRNOTAVAIL;
1999			goto out_inp_locked;
2000		}
2001	}
2002
2003	/*
2004	 * Begin state merge transaction at socket layer.
2005	 */
2006	INP_WLOCK_ASSERT(inp);
2007
2008	if (is_new) {
2009		if (imo->imo_num_memberships == imo->imo_max_memberships) {
2010			error = imo_grow(imo);
2011			if (error)
2012				goto out_inp_locked;
2013		}
2014		/*
2015		 * Allocate the new slot upfront so we can deal with
2016		 * grafting the new source filter in same code path
2017		 * as for join-source on existing membership.
2018		 */
2019		idx = imo->imo_num_memberships;
2020		imo->imo_membership[idx] = NULL;
2021		imo->imo_num_memberships++;
2022		KASSERT(imo->imo_mfilters != NULL,
2023		    ("%s: imf_mfilters vector was not allocated", __func__));
2024		imf = &imo->imo_mfilters[idx];
2025		KASSERT(RB_EMPTY(&imf->imf_sources),
2026		    ("%s: imf_sources not empty", __func__));
2027	}
2028
2029	/*
2030	 * Graft new source into filter list for this inpcb's
2031	 * membership of the group. The in_multi may not have
2032	 * been allocated yet if this is a new membership.
2033	 */
2034	if (ssa->ss.ss_family != AF_UNSPEC) {
2035		/* Membership starts in IN mode */
2036		if (is_new) {
2037			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2038			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
2039		} else {
2040			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2041		}
2042		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2043		if (lims == NULL) {
2044			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2045			    __func__);
2046			error = ENOMEM;
2047			goto out_imo_free;
2048		}
2049	}
2050
2051	/*
2052	 * Begin state merge transaction at IGMP layer.
2053	 */
2054	IN_MULTI_LOCK();
2055
2056	if (is_new) {
2057		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2058		    &inm);
2059		if (error)
2060			goto out_imo_free;
2061		imo->imo_membership[idx] = inm;
2062	} else {
2063		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2064		error = inm_merge(inm, imf);
2065		if (error) {
2066			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2067			    __func__);
2068			goto out_imf_rollback;
2069		}
2070		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2071		error = igmp_change_state(inm);
2072		if (error) {
2073			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2074			    __func__);
2075			goto out_imf_rollback;
2076		}
2077	}
2078
2079	IN_MULTI_UNLOCK();
2080
2081out_imf_rollback:
2082	INP_WLOCK_ASSERT(inp);
2083	if (error) {
2084		imf_rollback(imf);
2085		if (is_new)
2086			imf_purge(imf);
2087		else
2088			imf_reap(imf);
2089	} else {
2090		imf_commit(imf);
2091	}
2092
2093out_imo_free:
2094	if (error && is_new) {
2095		imo->imo_membership[idx] = NULL;
2096		--imo->imo_num_memberships;
2097	}
2098
2099out_inp_locked:
2100	INP_WUNLOCK(inp);
2101	return (error);
2102}
2103
2104/*
2105 * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2106 */
2107static int
2108inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2109{
2110	INIT_VNET_NET(curvnet);
2111	INIT_VNET_INET(curvnet);
2112	struct group_source_req		 gsr;
2113	struct ip_mreq_source		 mreqs;
2114	sockunion_t			*gsa, *ssa;
2115	struct ifnet			*ifp;
2116	struct in_mfilter		*imf;
2117	struct ip_moptions		*imo;
2118	struct in_msource		*ims;
2119	struct in_multi			*inm;
2120	size_t				 idx;
2121	int				 error, is_final;
2122
2123	ifp = NULL;
2124	error = 0;
2125	is_final = 1;
2126
2127	memset(&gsr, 0, sizeof(struct group_source_req));
2128	gsa = (sockunion_t *)&gsr.gsr_group;
2129	gsa->ss.ss_family = AF_UNSPEC;
2130	ssa = (sockunion_t *)&gsr.gsr_source;
2131	ssa->ss.ss_family = AF_UNSPEC;
2132
2133	switch (sopt->sopt_name) {
2134	case IP_DROP_MEMBERSHIP:
2135	case IP_DROP_SOURCE_MEMBERSHIP:
2136		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2137			error = sooptcopyin(sopt, &mreqs,
2138			    sizeof(struct ip_mreq),
2139			    sizeof(struct ip_mreq));
2140			/*
2141			 * Swap interface and sourceaddr arguments,
2142			 * as ip_mreq and ip_mreq_source are laid
2143			 * out differently.
2144			 */
2145			mreqs.imr_interface = mreqs.imr_sourceaddr;
2146			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2147		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2148			error = sooptcopyin(sopt, &mreqs,
2149			    sizeof(struct ip_mreq_source),
2150			    sizeof(struct ip_mreq_source));
2151		}
2152		if (error)
2153			return (error);
2154
2155		gsa->sin.sin_family = AF_INET;
2156		gsa->sin.sin_len = sizeof(struct sockaddr_in);
2157		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2158
2159		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2160			ssa->sin.sin_family = AF_INET;
2161			ssa->sin.sin_len = sizeof(struct sockaddr_in);
2162			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2163		}
2164
2165		if (!in_nullhost(gsa->sin.sin_addr))
2166			INADDR_TO_IFP(mreqs.imr_interface, ifp);
2167
2168		CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p",
2169		    __func__, inet_ntoa(mreqs.imr_interface), ifp);
2170
2171		break;
2172
2173	case MCAST_LEAVE_GROUP:
2174	case MCAST_LEAVE_SOURCE_GROUP:
2175		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2176			error = sooptcopyin(sopt, &gsr,
2177			    sizeof(struct group_req),
2178			    sizeof(struct group_req));
2179		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2180			error = sooptcopyin(sopt, &gsr,
2181			    sizeof(struct group_source_req),
2182			    sizeof(struct group_source_req));
2183		}
2184		if (error)
2185			return (error);
2186
2187		if (gsa->sin.sin_family != AF_INET ||
2188		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2189			return (EINVAL);
2190
2191		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2192			if (ssa->sin.sin_family != AF_INET ||
2193			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2194				return (EINVAL);
2195		}
2196
2197		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2198			return (EADDRNOTAVAIL);
2199
2200		ifp = ifnet_byindex(gsr.gsr_interface);
2201		break;
2202
2203	default:
2204		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2205		    __func__, sopt->sopt_name);
2206		return (EOPNOTSUPP);
2207		break;
2208	}
2209
2210	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2211		return (EINVAL);
2212
2213	/*
2214	 * Find the membership in the membership array.
2215	 */
2216	imo = inp_findmoptions(inp);
2217	idx = imo_match_group(imo, ifp, &gsa->sa);
2218	if (idx == -1) {
2219		error = EADDRNOTAVAIL;
2220		goto out_inp_locked;
2221	}
2222	inm = imo->imo_membership[idx];
2223	imf = &imo->imo_mfilters[idx];
2224
2225	if (ssa->ss.ss_family != AF_UNSPEC)
2226		is_final = 0;
2227
2228	/*
2229	 * Begin state merge transaction at socket layer.
2230	 */
2231	INP_WLOCK_ASSERT(inp);
2232
2233	/*
2234	 * If we were instructed only to leave a given source, do so.
2235	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2236	 */
2237	if (is_final) {
2238		imf_leave(imf);
2239	} else {
2240		if (imf->imf_st[0] == MCAST_EXCLUDE) {
2241			error = EADDRNOTAVAIL;
2242			goto out_inp_locked;
2243		}
2244		ims = imo_match_source(imo, idx, &ssa->sa);
2245		if (ims == NULL) {
2246			CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__,
2247			    inet_ntoa(ssa->sin.sin_addr), "not ");
2248			error = EADDRNOTAVAIL;
2249			goto out_inp_locked;
2250		}
2251		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2252		error = imf_prune(imf, &ssa->sin);
2253		if (error) {
2254			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2255			    __func__);
2256			goto out_inp_locked;
2257		}
2258	}
2259
2260	/*
2261	 * Begin state merge transaction at IGMP layer.
2262	 */
2263	IN_MULTI_LOCK();
2264
2265	if (is_final) {
2266		/*
2267		 * Give up the multicast address record to which
2268		 * the membership points.
2269		 */
2270		(void)in_leavegroup_locked(inm, imf);
2271	} else {
2272		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2273		error = inm_merge(inm, imf);
2274		if (error) {
2275			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2276			    __func__);
2277			goto out_imf_rollback;
2278		}
2279
2280		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2281		error = igmp_change_state(inm);
2282		if (error) {
2283			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2284			    __func__);
2285		}
2286	}
2287
2288	IN_MULTI_UNLOCK();
2289
2290out_imf_rollback:
2291	if (error)
2292		imf_rollback(imf);
2293	else
2294		imf_commit(imf);
2295
2296	imf_reap(imf);
2297
2298	if (is_final) {
2299		/* Remove the gap in the membership array. */
2300		for (++idx; idx < imo->imo_num_memberships; ++idx)
2301			imo->imo_membership[idx-1] = imo->imo_membership[idx];
2302		imo->imo_num_memberships--;
2303	}
2304
2305out_inp_locked:
2306	INP_WUNLOCK(inp);
2307	return (error);
2308}
2309
2310/*
2311 * Select the interface for transmitting IPv4 multicast datagrams.
2312 *
2313 * Either an instance of struct in_addr or an instance of struct ip_mreqn
2314 * may be passed to this socket option. An address of INADDR_ANY or an
2315 * interface index of 0 is used to remove a previous selection.
2316 * When no interface is selected, one is chosen for every send.
2317 */
2318static int
2319inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2320{
2321	INIT_VNET_NET(curvnet);
2322	struct in_addr		 addr;
2323	struct ip_mreqn		 mreqn;
2324	struct ifnet		*ifp;
2325	struct ip_moptions	*imo;
2326	int			 error;
2327
2328	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2329		/*
2330		 * An interface index was specified using the
2331		 * Linux-derived ip_mreqn structure.
2332		 */
2333		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2334		    sizeof(struct ip_mreqn));
2335		if (error)
2336			return (error);
2337
2338		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
2339			return (EINVAL);
2340
2341		if (mreqn.imr_ifindex == 0) {
2342			ifp = NULL;
2343		} else {
2344			ifp = ifnet_byindex(mreqn.imr_ifindex);
2345			if (ifp == NULL)
2346				return (EADDRNOTAVAIL);
2347		}
2348	} else {
2349		/*
2350		 * An interface was specified by IPv4 address.
2351		 * This is the traditional BSD usage.
2352		 */
2353		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2354		    sizeof(struct in_addr));
2355		if (error)
2356			return (error);
2357		if (in_nullhost(addr)) {
2358			ifp = NULL;
2359		} else {
2360			INADDR_TO_IFP(addr, ifp);
2361			if (ifp == NULL)
2362				return (EADDRNOTAVAIL);
2363		}
2364		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp,
2365		    inet_ntoa(addr));
2366	}
2367
2368	/* Reject interfaces which do not support multicast. */
2369	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2370		return (EOPNOTSUPP);
2371
2372	imo = inp_findmoptions(inp);
2373	imo->imo_multicast_ifp = ifp;
2374	imo->imo_multicast_addr.s_addr = INADDR_ANY;
2375	INP_WUNLOCK(inp);
2376
2377	return (0);
2378}
2379
2380/*
2381 * Atomically set source filters on a socket for an IPv4 multicast group.
2382 *
2383 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2384 */
2385static int
2386inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2387{
2388	INIT_VNET_NET(curvnet);
2389	struct __msfilterreq	 msfr;
2390	sockunion_t		*gsa;
2391	struct ifnet		*ifp;
2392	struct in_mfilter	*imf;
2393	struct ip_moptions	*imo;
2394	struct in_multi		*inm;
2395	size_t			 idx;
2396	int			 error;
2397
2398	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2399	    sizeof(struct __msfilterreq));
2400	if (error)
2401		return (error);
2402
2403	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc ||
2404	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
2405	     msfr.msfr_fmode != MCAST_INCLUDE))
2406		return (EINVAL);
2407
2408	if (msfr.msfr_group.ss_family != AF_INET ||
2409	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2410		return (EINVAL);
2411
2412	gsa = (sockunion_t *)&msfr.msfr_group;
2413	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2414		return (EINVAL);
2415
2416	gsa->sin.sin_port = 0;	/* ignore port */
2417
2418	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2419		return (EADDRNOTAVAIL);
2420
2421	ifp = ifnet_byindex(msfr.msfr_ifindex);
2422	if (ifp == NULL)
2423		return (EADDRNOTAVAIL);
2424
2425	/*
2426	 * Take the INP write lock.
2427	 * Check if this socket is a member of this group.
2428	 */
2429	imo = inp_findmoptions(inp);
2430	idx = imo_match_group(imo, ifp, &gsa->sa);
2431	if (idx == -1 || imo->imo_mfilters == NULL) {
2432		error = EADDRNOTAVAIL;
2433		goto out_inp_locked;
2434	}
2435	inm = imo->imo_membership[idx];
2436	imf = &imo->imo_mfilters[idx];
2437
2438	/*
2439	 * Begin state merge transaction at socket layer.
2440	 */
2441	INP_WLOCK_ASSERT(inp);
2442
2443	imf->imf_st[1] = msfr.msfr_fmode;
2444
2445	/*
2446	 * Apply any new source filters, if present.
2447	 * Make a copy of the user-space source vector so
2448	 * that we may copy them with a single copyin. This
2449	 * allows us to deal with page faults up-front.
2450	 */
2451	if (msfr.msfr_nsrcs > 0) {
2452		struct in_msource	*lims;
2453		struct sockaddr_in	*psin;
2454		struct sockaddr_storage	*kss, *pkss;
2455		int			 i;
2456
2457		INP_WUNLOCK(inp);
2458
2459		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2460		    __func__, (unsigned long)msfr.msfr_nsrcs);
2461		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2462		    M_TEMP, M_WAITOK);
2463		error = copyin(msfr.msfr_srcs, kss,
2464		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2465		if (error) {
2466			free(kss, M_TEMP);
2467			return (error);
2468		}
2469
2470		INP_WLOCK(inp);
2471
2472		/*
2473		 * Mark all source filters as UNDEFINED at t1.
2474		 * Restore new group filter mode, as imf_leave()
2475		 * will set it to INCLUDE.
2476		 */
2477		imf_leave(imf);
2478		imf->imf_st[1] = msfr.msfr_fmode;
2479
2480		/*
2481		 * Update socket layer filters at t1, lazy-allocating
2482		 * new entries. This saves a bunch of memory at the
2483		 * cost of one RB_FIND() per source entry; duplicate
2484		 * entries in the msfr_nsrcs vector are ignored.
2485		 * If we encounter an error, rollback transaction.
2486		 *
2487		 * XXX This too could be replaced with a set-symmetric
2488		 * difference like loop to avoid walking from root
2489		 * every time, as the key space is common.
2490		 */
2491		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2492			psin = (struct sockaddr_in *)pkss;
2493			if (psin->sin_family != AF_INET) {
2494				error = EAFNOSUPPORT;
2495				break;
2496			}
2497			if (psin->sin_len != sizeof(struct sockaddr_in)) {
2498				error = EINVAL;
2499				break;
2500			}
2501			error = imf_get_source(imf, psin, &lims);
2502			if (error)
2503				break;
2504			lims->imsl_st[1] = imf->imf_st[1];
2505		}
2506		free(kss, M_TEMP);
2507	}
2508
2509	if (error)
2510		goto out_imf_rollback;
2511
2512	INP_WLOCK_ASSERT(inp);
2513	IN_MULTI_LOCK();
2514
2515	/*
2516	 * Begin state merge transaction at IGMP layer.
2517	 */
2518	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2519	error = inm_merge(inm, imf);
2520	if (error) {
2521		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2522		goto out_imf_rollback;
2523	}
2524
2525	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2526	error = igmp_change_state(inm);
2527	if (error)
2528		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2529
2530	IN_MULTI_UNLOCK();
2531
2532out_imf_rollback:
2533	if (error)
2534		imf_rollback(imf);
2535	else
2536		imf_commit(imf);
2537
2538	imf_reap(imf);
2539
2540out_inp_locked:
2541	INP_WUNLOCK(inp);
2542	return (error);
2543}
2544
2545/*
2546 * Set the IP multicast options in response to user setsockopt().
2547 *
2548 * Many of the socket options handled in this function duplicate the
2549 * functionality of socket options in the regular unicast API. However,
2550 * it is not possible to merge the duplicate code, because the idempotence
2551 * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2552 * the effects of these options must be treated as separate and distinct.
2553 *
2554 * SMPng: XXX: Unlocked read of inp_socket believed OK.
2555 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2556 * is refactored to no longer use vifs.
2557 */
2558int
2559inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2560{
2561	struct ip_moptions	*imo;
2562	int			 error;
2563
2564	error = 0;
2565
2566	/*
2567	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2568	 * or is a divert socket, reject it.
2569	 */
2570	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2571	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2572	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2573		return (EOPNOTSUPP);
2574
2575	switch (sopt->sopt_name) {
2576	case IP_MULTICAST_VIF: {
2577		int vifi;
2578		/*
2579		 * Select a multicast VIF for transmission.
2580		 * Only useful if multicast forwarding is active.
2581		 */
2582		if (legal_vif_num == NULL) {
2583			error = EOPNOTSUPP;
2584			break;
2585		}
2586		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2587		if (error)
2588			break;
2589		if (!legal_vif_num(vifi) && (vifi != -1)) {
2590			error = EINVAL;
2591			break;
2592		}
2593		imo = inp_findmoptions(inp);
2594		imo->imo_multicast_vif = vifi;
2595		INP_WUNLOCK(inp);
2596		break;
2597	}
2598
2599	case IP_MULTICAST_IF:
2600		error = inp_set_multicast_if(inp, sopt);
2601		break;
2602
2603	case IP_MULTICAST_TTL: {
2604		u_char ttl;
2605
2606		/*
2607		 * Set the IP time-to-live for outgoing multicast packets.
2608		 * The original multicast API required a char argument,
2609		 * which is inconsistent with the rest of the socket API.
2610		 * We allow either a char or an int.
2611		 */
2612		if (sopt->sopt_valsize == sizeof(u_char)) {
2613			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2614			    sizeof(u_char));
2615			if (error)
2616				break;
2617		} else {
2618			u_int ittl;
2619
2620			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2621			    sizeof(u_int));
2622			if (error)
2623				break;
2624			if (ittl > 255) {
2625				error = EINVAL;
2626				break;
2627			}
2628			ttl = (u_char)ittl;
2629		}
2630		imo = inp_findmoptions(inp);
2631		imo->imo_multicast_ttl = ttl;
2632		INP_WUNLOCK(inp);
2633		break;
2634	}
2635
2636	case IP_MULTICAST_LOOP: {
2637		u_char loop;
2638
2639		/*
2640		 * Set the loopback flag for outgoing multicast packets.
2641		 * Must be zero or one.  The original multicast API required a
2642		 * char argument, which is inconsistent with the rest
2643		 * of the socket API.  We allow either a char or an int.
2644		 */
2645		if (sopt->sopt_valsize == sizeof(u_char)) {
2646			error = sooptcopyin(sopt, &loop, sizeof(u_char),
2647			    sizeof(u_char));
2648			if (error)
2649				break;
2650		} else {
2651			u_int iloop;
2652
2653			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2654					    sizeof(u_int));
2655			if (error)
2656				break;
2657			loop = (u_char)iloop;
2658		}
2659		imo = inp_findmoptions(inp);
2660		imo->imo_multicast_loop = !!loop;
2661		INP_WUNLOCK(inp);
2662		break;
2663	}
2664
2665	case IP_ADD_MEMBERSHIP:
2666	case IP_ADD_SOURCE_MEMBERSHIP:
2667	case MCAST_JOIN_GROUP:
2668	case MCAST_JOIN_SOURCE_GROUP:
2669		error = inp_join_group(inp, sopt);
2670		break;
2671
2672	case IP_DROP_MEMBERSHIP:
2673	case IP_DROP_SOURCE_MEMBERSHIP:
2674	case MCAST_LEAVE_GROUP:
2675	case MCAST_LEAVE_SOURCE_GROUP:
2676		error = inp_leave_group(inp, sopt);
2677		break;
2678
2679	case IP_BLOCK_SOURCE:
2680	case IP_UNBLOCK_SOURCE:
2681	case MCAST_BLOCK_SOURCE:
2682	case MCAST_UNBLOCK_SOURCE:
2683		error = inp_block_unblock_source(inp, sopt);
2684		break;
2685
2686	case IP_MSFILTER:
2687		error = inp_set_source_filters(inp, sopt);
2688		break;
2689
2690	default:
2691		error = EOPNOTSUPP;
2692		break;
2693	}
2694
2695	INP_UNLOCK_ASSERT(inp);
2696
2697	return (error);
2698}
2699
2700/*
2701 * Expose IGMP's multicast filter mode and source list(s) to userland,
2702 * keyed by (ifindex, group).
2703 * The filter mode is written out as a uint32_t, followed by
2704 * 0..n of struct in_addr.
2705 * For use by ifmcstat(8).
2706 * SMPng: NOTE: unlocked read of ifindex space.
2707 */
2708static int
2709sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2710{
2711	INIT_VNET_NET(curvnet);
2712	struct in_addr			 src, group;
2713	struct ifnet			*ifp;
2714	struct ifmultiaddr		*ifma;
2715	struct in_multi			*inm;
2716	struct ip_msource		*ims;
2717	int				*name;
2718	int				 retval;
2719	u_int				 namelen;
2720	uint32_t			 fmode, ifindex;
2721
2722	name = (int *)arg1;
2723	namelen = arg2;
2724
2725	if (req->newptr != NULL)
2726		return (EPERM);
2727
2728	if (namelen != 2)
2729		return (EINVAL);
2730
2731	ifindex = name[0];
2732	if (ifindex <= 0 || ifindex > V_if_index) {
2733		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
2734		    __func__, ifindex);
2735		return (ENOENT);
2736	}
2737
2738	group.s_addr = name[1];
2739	if (!IN_MULTICAST(ntohl(group.s_addr))) {
2740		CTR2(KTR_IGMPV3, "%s: group %s is not multicast",
2741		    __func__, inet_ntoa(group));
2742		return (EINVAL);
2743	}
2744
2745	ifp = ifnet_byindex(ifindex);
2746	if (ifp == NULL) {
2747		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2748		    __func__, ifindex);
2749		return (ENOENT);
2750	}
2751
2752	retval = sysctl_wire_old_buffer(req,
2753	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2754	if (retval)
2755		return (retval);
2756
2757	IN_MULTI_LOCK();
2758
2759	IF_ADDR_LOCK(ifp);
2760	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2761		if (ifma->ifma_addr->sa_family != AF_INET ||
2762		    ifma->ifma_protospec == NULL)
2763			continue;
2764		inm = (struct in_multi *)ifma->ifma_protospec;
2765		if (!in_hosteq(inm->inm_addr, group))
2766			continue;
2767		fmode = inm->inm_st[1].iss_fmode;
2768		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2769		if (retval != 0)
2770			break;
2771		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2772#ifdef KTR
2773			struct in_addr ina;
2774			ina.s_addr = htonl(ims->ims_haddr);
2775			CTR2(KTR_IGMPV3, "%s: visit node %s", __func__,
2776			    inet_ntoa(ina));
2777#endif
2778			/*
2779			 * Only copy-out sources which are in-mode.
2780			 */
2781			if (fmode != ims_get_mode(inm, ims, 1)) {
2782				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2783				    __func__);
2784				continue;
2785			}
2786			src.s_addr = htonl(ims->ims_haddr);
2787			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2788			if (retval != 0)
2789				break;
2790		}
2791	}
2792	IF_ADDR_UNLOCK(ifp);
2793
2794	IN_MULTI_UNLOCK();
2795
2796	return (retval);
2797}
2798
2799#ifdef KTR
2800
2801static const char *inm_modestrs[] = { "un", "in", "ex" };
2802
2803static const char *
2804inm_mode_str(const int mode)
2805{
2806
2807	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2808		return (inm_modestrs[mode]);
2809	return ("??");
2810}
2811
2812static const char *inm_statestrs[] = {
2813	"not-member",
2814	"silent",
2815	"idle",
2816	"lazy",
2817	"sleeping",
2818	"awakening",
2819	"query-pending",
2820	"sg-query-pending",
2821	"leaving"
2822};
2823
2824static const char *
2825inm_state_str(const int state)
2826{
2827
2828	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
2829		return (inm_statestrs[state]);
2830	return ("??");
2831}
2832
2833/*
2834 * Dump an in_multi structure to the console.
2835 */
2836void
2837inm_print(const struct in_multi *inm)
2838{
2839	int t;
2840
2841	if ((ktr_mask & KTR_IGMPV3) == 0)
2842		return;
2843
2844	printf("%s: --- begin inm %p ---\n", __func__, inm);
2845	printf("addr %s ifp %p(%s) ifma %p\n",
2846	    inet_ntoa(inm->inm_addr),
2847	    inm->inm_ifp,
2848	    inm->inm_ifp->if_xname,
2849	    inm->inm_ifma);
2850	printf("timer %u state %s refcount %u scq.len %u\n",
2851	    inm->inm_timer,
2852	    inm_state_str(inm->inm_state),
2853	    inm->inm_refcount,
2854	    inm->inm_scq.ifq_len);
2855	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
2856	    inm->inm_igi,
2857	    inm->inm_nsrc,
2858	    inm->inm_sctimer,
2859	    inm->inm_scrv);
2860	for (t = 0; t < 2; t++) {
2861		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2862		    inm_mode_str(inm->inm_st[t].iss_fmode),
2863		    inm->inm_st[t].iss_asm,
2864		    inm->inm_st[t].iss_ex,
2865		    inm->inm_st[t].iss_in,
2866		    inm->inm_st[t].iss_rec);
2867	}
2868	printf("%s: --- end inm %p ---\n", __func__, inm);
2869}
2870
2871#else /* !KTR */
2872
2873void
2874inm_print(const struct in_multi *inm)
2875{
2876
2877}
2878
2879#endif /* KTR */
2880
2881RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
2882